LinkGrep.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ 4.0 / 4.0 / untmp / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / fx / src / Services / Web / System / Web / Services / Discovery / LinkGrep.cs / 1305376 / LinkGrep.cs

                            //------------------------------------------------------------------------------ 
// 
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//----------------------------------------------------------------------------- 

namespace System.Web.Services.Discovery { 
 
    using System;
    using System.IO; 
    using System.Text.RegularExpressions;
    using System.Web.Services.Protocols;
    using System.Globalization;
 
    internal class LinkGrep {
        private LinkGrep() {} 
 
        private static string ReadEntireStream(TextReader input) {
            char[] buffer = new char[4096]; 
            int bufferpos = 0;
            int delta;

            // 
            for (;;) {
                delta = input.Read(buffer, bufferpos, buffer.Length - bufferpos); 
 
                if (delta == 0)
                    break; 

                bufferpos += delta;

                if (bufferpos == buffer.Length) { 
                    char[] newbuf = new char[buffer.Length * 2];
                    System.Array.Copy(buffer, 0, newbuf, 0, buffer.Length); 
                    buffer = newbuf; 
                }
            } 

            return new string(buffer, 0, bufferpos);
        }
 
        internal static string SearchForLink(Stream stream) {
            string text = null; 
            text = ReadEntireStream(new StreamReader(stream)); 

            int textpos = 0; 

            Match match;

            if ((match = doctypeDirectiveRegex.Match(text, textpos)).Success) { 
                textpos += match.Length;
            } 
 
            bool oneMatch;
            for (;;) { 

                // Reset match flag
                oneMatch = false;
 
                // 1: scan for text up to the next tag.
 
                // First case: check for whitespace going all the way to the next tag 

                if ((match = whitespaceRegex.Match(text, textpos)).Success) { 
                    oneMatch = true;
                }

                // Second case: there may be some nonwhitespace; scan it 

                else if ((match = textRegex.Match(text, textpos)).Success) { 
                    oneMatch = true; 
                }
 
                // we might be done now

                textpos += match.Length;
                if (textpos == text.Length) 
                    break;
 
                // 2: handle constructs that start with < 

                // First, check to see if it's a tag 

                if ((match = tagRegex.Match(text, textpos)).Success)
                {
                    oneMatch = true; 
                    string tag = match.Groups["tagname"].Value;
 
                    if (String.Compare(tag, "link", StringComparison.OrdinalIgnoreCase) == 0) { 
                        CaptureCollection attrnames = match.Groups["attrname"].Captures;
                        CaptureCollection attrvalues = match.Groups["attrval"].Captures; 

                        int count = attrnames.Count;
                        bool rightType = false;
                        bool rightRel = false; 
                        string href = null;
                        for (int i = 0; i < count; i++) { 
                            string attrName = attrnames[i].ToString(); 
                            string attrValue = attrvalues[i].ToString();
                            if (String.Compare(attrName, "type", StringComparison.OrdinalIgnoreCase) == 0 && 
                                ContentType.MatchesBase(attrValue, ContentType.TextXml)) {
                                rightType = true;
                            }
                            else if (String.Compare(attrName, "rel", StringComparison.OrdinalIgnoreCase) == 0 && 
                                String.Compare(attrValue, "alternate", StringComparison.OrdinalIgnoreCase) == 0) {
                                rightRel = true; 
                            } 
                            else if (String.Compare(attrName, "href", StringComparison.OrdinalIgnoreCase) == 0) {
                                href = attrValue; 
                            }

                            if (rightType && rightRel && href != null) {
                                // Got a link to a disco file! 
                                return href;
                            } 
                        } 
                    }
                    else if (tag == "body") { 
                        // If body begins, get out, since link tags should only be defined in the head
                        break;
                    }
 
                }
 
                // check to see if it's an end tag 

                else if ((match = endtagRegex.Match(text, textpos)).Success) { 
                    oneMatch = true;
                }

                // check to see if it's a comment 

                else if ((match = commentRegex.Match(text, textpos)).Success) { 
                    oneMatch = true; 
                }
 
                // we might be done now

                textpos += match.Length;
 
                if (textpos == text.Length)
                    break; 
 
                // If we couldn't get one single match, it means that it's probably not HTML, so bail
                if (!oneMatch) { 
                    break;
                }
            }
 
            return null;
        } 
 
        private readonly static Regex tagRegex = new Regex
        ( 
            "\\G<" +                                    // leading <
            "(?[\\w:.-]+(?=:)|):?" +            // optional prefix:
            "(?[\\w.-]+)" +                    // tagname
 
            "(?:\\s+" +                                 // zero or more attributes
                "(?[\\w:.-]+(?=:)|):?" +    //     optional attrprefix: 
                "(?[\\w.-]+)" +               //     attrname 
                "\\s*=\\s*" +                           //     required equals
                "(?:" +                                 //     quoted value 
                    "\"(?[^\"]*)\"" +          //          double quoted attrval
                    "|'(?[^\']*)'" +           //          single quoted attrval
                    "|(?[a-zA-Z0-9\\-._:]+)" + //          attrval with no quotes (SGML-approved chars)
                ")" +                                   //     end quoted value 
            ")*" +                                      // end attribute
 
            "\\s*(?/)?>"                         // optional trailing /, and trailing > 
        );
 
        private readonly static Regex doctypeDirectiveRegex = new Regex
        (
            @"\G",
            RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace 
        );
 
        private readonly static Regex endtagRegex = new Regex 
        (
            "\\G[\\w:-]+(?=:)|):?" +             // optional prefix:
            "(?[\\w-]+)\\s*>"                 // tagname
        );
 
        private readonly static Regex commentRegex = new Regex
        ( 
            "\\G