TokenizerHelper.cs source code in C# .NET

                        Code:
                         / 4.0 / 4.0 / DEVDIV_TFS / Dev10 / Releases / RTMRel / wpf / src / Shared / MS / Internal / TokenizerHelper.cs / 1305600 / TokenizerHelper.cs
                        
                        
                            //---------------------------------------------------------------------------- 
//
// Copyright (C) Microsoft Corporation.  All rights reserved.
//
// File: TokenizerHelper.cs 
//
// Description: This file contains the implementation of TokenizerHelper. 
//              This class should be used by most - if not all - MIL parsers. 
//
// History: 
//  05/19/2003 : [....] - Created it
//  05/20/2003 : [....] - Moved to Shared
//
//--------------------------------------------------------------------------- 

using System; 
using System.Collections; 
using System.Diagnostics;
using System.ComponentModel; 
using System.Globalization;

#if !PBTCOMPILER
using System.Windows; 
#endif
 
#if WINDOWS_BASE 
    using MS.Internal.WindowsBase;
#elif PRESENTATION_CORE 
    using MS.Internal.PresentationCore;
#elif PRESENTATIONFRAMEWORK
    using MS.Internal.PresentationFramework;
#elif PBTCOMPILER 
    using MS.Utility ;
    using MS.Internal.Markup; 
#elif DRT 
    using MS.Internal.Drt;
#else 
#error Attempt to use FriendAccessAllowedAttribute from an unknown assembly.
using MS.Internal.YourAssemblyName;
#endif
 
#if PBTCOMPILER
namespace MS.Internal.Markup 
#else 
namespace MS.Internal
#endif 
{
#if !PBTCOMPILER
    [FriendAccessAllowed]
#endif 
    internal class TokenizerHelper
    { 
        ///  
        /// Constructor for TokenizerHelper which accepts an IFormatProvider.
        /// If the IFormatProvider is null, we use the thread's IFormatProvider info. 
        /// We will use ',' as the list separator, unless it's the same as the
        /// decimal separator.  If it *is*, then we can't determine if, say, "23,5" is one
        /// number or two.  In this case, we will use ";" as the separator.
        ///  
        ///  The string which will be tokenized. 
        ///  The IFormatProvider which controls this tokenization.  
        internal TokenizerHelper(string str, IFormatProvider formatProvider) 
        {
            char numberSeparator = GetNumericListSeparator(formatProvider); 

            Initialize(str, '\'', numberSeparator);
        }
 
        /// 
        /// Initialize the TokenizerHelper with the string to tokenize, 
        /// the char which represents quotes and the list separator. 
        /// 
        ///  The string to tokenize.  
        ///  The quote char. 
        ///  The list separator. 
        internal TokenizerHelper(string str,
                                 char quoteChar, 
                                 char separator)
        { 
            Initialize(str, quoteChar, separator); 
        }
 
        /// 
        /// Initialize the TokenizerHelper with the string to tokenize,
        /// the char which represents quotes and the list separator.
        ///  
        ///  The string to tokenize. 
        ///  The quote char.  
        ///  The list separator.  
        private void Initialize(string str,
                                char quoteChar, 
                                char separator)
        {
            _str = str;
            _strLen = str == null ? 0 : str.Length; 
            _currentTokenIndex = -1;
            _quoteChar = quoteChar; 
            _argSeparator = separator; 

            // immediately forward past any whitespace so 
            // NextToken() logic always starts on the first
            // character of the next token.
            while (_charIndex < _strLen)
            { 
                if (!Char.IsWhiteSpace(_str, _charIndex))
                { 
                    break; 
                }
 
                ++_charIndex;
            }
        }
 
        internal string GetCurrentToken()
        { 
            // if no current token, return null 
            if (_currentTokenIndex < 0)
            { 
                return null;
            }

            return _str.Substring(_currentTokenIndex,_currentTokenLength); 
        }
 
        ///  
        /// Throws an exception if there is any non-whitespace left un-parsed.
        ///  
        internal void LastTokenRequired()
        {
            if (_charIndex != _strLen)
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperExtraDataEncountered, _charIndex, _str));
            } 
        } 

        ///  
        /// Advances to the NextToken
        /// 
        /// true if next token was found, false if at end of string
        internal bool NextToken() 
        {
            return NextToken(false); 
        } 

        ///  
        /// Advances to the NextToken, throwing an exception if not present
        /// 
        /// The next token found
        internal string NextTokenRequired() 
        {
            if (!NextToken(false)) 
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperPrematureStringTermination, _str));
            } 

            return GetCurrentToken();
        }
 
        /// 
        /// Advances to the NextToken, throwing an exception if not present 
        ///  
        /// The next token found
        internal string NextTokenRequired(bool allowQuotedToken) 
        {
            if (!NextToken(allowQuotedToken))
            {
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperPrematureStringTermination, _str)); 
            }
 
            return GetCurrentToken(); 
        }
 
        /// 
        /// Advances to the NextToken
        /// 
        /// true if next token was found, false if at end of string 
        internal bool NextToken(bool allowQuotedToken)
        { 
            // use the currently-set separator character. 
            return NextToken(allowQuotedToken, _argSeparator);
        } 

        /// 
        /// Advances to the NextToken.  A separator character can be specified
        /// which overrides the one previously set. 
        /// 
        /// true if next token was found, false if at end of string 
        internal bool NextToken(bool allowQuotedToken, char separator) 
        {
            _currentTokenIndex = -1; // reset the currentTokenIndex 
            _foundSeparator = false; // reset

            // If we're at end of the string, just return false.
            if (_charIndex >= _strLen) 
            {
                return false; 
            } 

            char currentChar = _str[_charIndex]; 

            Debug.Assert(!Char.IsWhiteSpace(currentChar),"Token started on Whitespace");

            // setup the quoteCount 
            int quoteCount = 0;
 
            // If we are allowing a quoted token and this token begins with a quote, 
            // set up the quote count and skip the initial quote
            if (allowQuotedToken && 
                currentChar == _quoteChar)
            {
                quoteCount++; // increment quote count
                ++_charIndex; // move to next character 
            }
 
            int newTokenIndex = _charIndex; 
            int newTokenLength = 0;
 
            // loop until hit end of string or hit a , or whitespace
            // if at end of string ust return false.
            while (_charIndex < _strLen)
            { 
                currentChar = _str[_charIndex];
 
                // if have a QuoteCount and this is a quote 
                // decrement the quoteCount
                if (quoteCount > 0) 
                {
                    // if anything but a quoteChar we move on
                    if (currentChar == _quoteChar)
                    { 
                        --quoteCount;
 
                        // if at zero which it always should for now 
                        // break out of the loop
                        if (0 == quoteCount) 
                        {
                            ++_charIndex; // move past the quote
                            break;
                        } 
                    }
                } 
                else if ((Char.IsWhiteSpace(currentChar)) || (currentChar == separator)) 
                {
                    if (currentChar == separator) 
                    {
                        _foundSeparator = true;
                    }
                    break; 
                }
 
                ++_charIndex; 
                ++newTokenLength;
            } 

            // if quoteCount isn't zero we hit the end of the string
            // before the ending quote
            if (quoteCount > 0) 
            {
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperMissingEndQuote, _str)); 
            } 

            ScanToNextToken(separator); // move so at the start of the nextToken for next call 

            // finally made it, update the _currentToken values
            _currentTokenIndex = newTokenIndex;
            _currentTokenLength = newTokenLength; 

            if (_currentTokenLength < 1) 
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
            } 

            return true;
        }
 
        // helper to move the _charIndex to the next token or to the end of the string
        void ScanToNextToken(char separator) 
        { 
            // if already at end of the string don't bother
            if (_charIndex < _strLen) 
            {
                char currentChar = _str[_charIndex];

                // check that the currentChar is a space or the separator.  If not 
                // we have an error. this can happen in the quote case
                // that the char after the quotes string isn't a char. 
                if (!(currentChar == separator) && 
                    !Char.IsWhiteSpace(currentChar))
                { 
                    throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperExtraDataEncountered, _charIndex, _str));
                }

                // loop until hit a character that isn't 
                // an argument separator or whitespace.
                // !!!Todo: if more than one argSet throw an exception 
                int argSepCount = 0; 
                while (_charIndex < _strLen)
                { 
                    currentChar = _str[_charIndex];

                    if (currentChar == separator)
                    { 
                        _foundSeparator = true;
                        ++argSepCount; 
                        _charIndex++; 

                        if (argSepCount > 1) 
                        {
                            throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
                        }
                    } 
                    else if (Char.IsWhiteSpace(currentChar))
                    { 
                        ++_charIndex; 
                    }
                    else 
                    {
                        break;
                    }
                } 

                // if there was a separatorChar then we shouldn't be 
                // at the end of string or means there was a separator 
                // but there isn't an arg
 
                if (argSepCount > 0 && _charIndex >= _strLen)
                {
                    throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
                } 
            }
        } 
 
        // Helper to get the numeric list separator for a given IFormatProvider.
        // Separator is a comma [,] if the decimal separator is not a comma, or a semicolon [;] otherwise. 
        static internal char GetNumericListSeparator(IFormatProvider provider)
        {
            char numericSeparator = ',';
 
            // Get the NumberFormatInfo out of the provider, if possible
            // If the IFormatProvider doesn't not contain a NumberFormatInfo, then 
            // this method returns the current culture's NumberFormatInfo. 
            NumberFormatInfo numberFormat = NumberFormatInfo.GetInstance(provider);
 
            Debug.Assert(null != numberFormat);

            // Is the decimal separator is the same as the list separator?
            // If so, we use the ";". 
            if ((numberFormat.NumberDecimalSeparator.Length > 0) && (numericSeparator == numberFormat.NumberDecimalSeparator[0]))
            { 
                numericSeparator = ';'; 
            }
 
            return numericSeparator;
        }

        internal bool FoundSeparator 
        {
            get 
            { 
                return _foundSeparator;
            } 
        }

        char _quoteChar;
        char _argSeparator; 
        string _str;
        int _strLen; 
        int _charIndex; 
        internal int _currentTokenIndex;
        internal int _currentTokenLength; 
        bool _foundSeparator;
    }
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
//---------------------------------------------------------------------------- 
//
// Copyright (C) Microsoft Corporation.  All rights reserved.
//
// File: TokenizerHelper.cs 
//
// Description: This file contains the implementation of TokenizerHelper. 
//              This class should be used by most - if not all - MIL parsers. 
//
// History: 
//  05/19/2003 : [....] - Created it
//  05/20/2003 : [....] - Moved to Shared
//
//--------------------------------------------------------------------------- 

using System; 
using System.Collections; 
using System.Diagnostics;
using System.ComponentModel; 
using System.Globalization;

#if !PBTCOMPILER
using System.Windows; 
#endif
 
#if WINDOWS_BASE 
    using MS.Internal.WindowsBase;
#elif PRESENTATION_CORE 
    using MS.Internal.PresentationCore;
#elif PRESENTATIONFRAMEWORK
    using MS.Internal.PresentationFramework;
#elif PBTCOMPILER 
    using MS.Utility ;
    using MS.Internal.Markup; 
#elif DRT 
    using MS.Internal.Drt;
#else 
#error Attempt to use FriendAccessAllowedAttribute from an unknown assembly.
using MS.Internal.YourAssemblyName;
#endif
 
#if PBTCOMPILER
namespace MS.Internal.Markup 
#else 
namespace MS.Internal
#endif 
{
#if !PBTCOMPILER
    [FriendAccessAllowed]
#endif 
    internal class TokenizerHelper
    { 
        ///  
        /// Constructor for TokenizerHelper which accepts an IFormatProvider.
        /// If the IFormatProvider is null, we use the thread's IFormatProvider info. 
        /// We will use ',' as the list separator, unless it's the same as the
        /// decimal separator.  If it *is*, then we can't determine if, say, "23,5" is one
        /// number or two.  In this case, we will use ";" as the separator.
        ///  
        ///  The string which will be tokenized. 
        ///  The IFormatProvider which controls this tokenization.  
        internal TokenizerHelper(string str, IFormatProvider formatProvider) 
        {
            char numberSeparator = GetNumericListSeparator(formatProvider); 

            Initialize(str, '\'', numberSeparator);
        }
 
        /// 
        /// Initialize the TokenizerHelper with the string to tokenize, 
        /// the char which represents quotes and the list separator. 
        /// 
        ///  The string to tokenize.  
        ///  The quote char. 
        ///  The list separator. 
        internal TokenizerHelper(string str,
                                 char quoteChar, 
                                 char separator)
        { 
            Initialize(str, quoteChar, separator); 
        }
 
        /// 
        /// Initialize the TokenizerHelper with the string to tokenize,
        /// the char which represents quotes and the list separator.
        ///  
        ///  The string to tokenize. 
        ///  The quote char.  
        ///  The list separator.  
        private void Initialize(string str,
                                char quoteChar, 
                                char separator)
        {
            _str = str;
            _strLen = str == null ? 0 : str.Length; 
            _currentTokenIndex = -1;
            _quoteChar = quoteChar; 
            _argSeparator = separator; 

            // immediately forward past any whitespace so 
            // NextToken() logic always starts on the first
            // character of the next token.
            while (_charIndex < _strLen)
            { 
                if (!Char.IsWhiteSpace(_str, _charIndex))
                { 
                    break; 
                }
 
                ++_charIndex;
            }
        }
 
        internal string GetCurrentToken()
        { 
            // if no current token, return null 
            if (_currentTokenIndex < 0)
            { 
                return null;
            }

            return _str.Substring(_currentTokenIndex,_currentTokenLength); 
        }
 
        ///  
        /// Throws an exception if there is any non-whitespace left un-parsed.
        ///  
        internal void LastTokenRequired()
        {
            if (_charIndex != _strLen)
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperExtraDataEncountered, _charIndex, _str));
            } 
        } 

        ///  
        /// Advances to the NextToken
        /// 
        /// true if next token was found, false if at end of string
        internal bool NextToken() 
        {
            return NextToken(false); 
        } 

        ///  
        /// Advances to the NextToken, throwing an exception if not present
        /// 
        /// The next token found
        internal string NextTokenRequired() 
        {
            if (!NextToken(false)) 
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperPrematureStringTermination, _str));
            } 

            return GetCurrentToken();
        }
 
        /// 
        /// Advances to the NextToken, throwing an exception if not present 
        ///  
        /// The next token found
        internal string NextTokenRequired(bool allowQuotedToken) 
        {
            if (!NextToken(allowQuotedToken))
            {
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperPrematureStringTermination, _str)); 
            }
 
            return GetCurrentToken(); 
        }
 
        /// 
        /// Advances to the NextToken
        /// 
        /// true if next token was found, false if at end of string 
        internal bool NextToken(bool allowQuotedToken)
        { 
            // use the currently-set separator character. 
            return NextToken(allowQuotedToken, _argSeparator);
        } 

        /// 
        /// Advances to the NextToken.  A separator character can be specified
        /// which overrides the one previously set. 
        /// 
        /// true if next token was found, false if at end of string 
        internal bool NextToken(bool allowQuotedToken, char separator) 
        {
            _currentTokenIndex = -1; // reset the currentTokenIndex 
            _foundSeparator = false; // reset

            // If we're at end of the string, just return false.
            if (_charIndex >= _strLen) 
            {
                return false; 
            } 

            char currentChar = _str[_charIndex]; 

            Debug.Assert(!Char.IsWhiteSpace(currentChar),"Token started on Whitespace");

            // setup the quoteCount 
            int quoteCount = 0;
 
            // If we are allowing a quoted token and this token begins with a quote, 
            // set up the quote count and skip the initial quote
            if (allowQuotedToken && 
                currentChar == _quoteChar)
            {
                quoteCount++; // increment quote count
                ++_charIndex; // move to next character 
            }
 
            int newTokenIndex = _charIndex; 
            int newTokenLength = 0;
 
            // loop until hit end of string or hit a , or whitespace
            // if at end of string ust return false.
            while (_charIndex < _strLen)
            { 
                currentChar = _str[_charIndex];
 
                // if have a QuoteCount and this is a quote 
                // decrement the quoteCount
                if (quoteCount > 0) 
                {
                    // if anything but a quoteChar we move on
                    if (currentChar == _quoteChar)
                    { 
                        --quoteCount;
 
                        // if at zero which it always should for now 
                        // break out of the loop
                        if (0 == quoteCount) 
                        {
                            ++_charIndex; // move past the quote
                            break;
                        } 
                    }
                } 
                else if ((Char.IsWhiteSpace(currentChar)) || (currentChar == separator)) 
                {
                    if (currentChar == separator) 
                    {
                        _foundSeparator = true;
                    }
                    break; 
                }
 
                ++_charIndex; 
                ++newTokenLength;
            } 

            // if quoteCount isn't zero we hit the end of the string
            // before the ending quote
            if (quoteCount > 0) 
            {
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperMissingEndQuote, _str)); 
            } 

            ScanToNextToken(separator); // move so at the start of the nextToken for next call 

            // finally made it, update the _currentToken values
            _currentTokenIndex = newTokenIndex;
            _currentTokenLength = newTokenLength; 

            if (_currentTokenLength < 1) 
            { 
                throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
            } 

            return true;
        }
 
        // helper to move the _charIndex to the next token or to the end of the string
        void ScanToNextToken(char separator) 
        { 
            // if already at end of the string don't bother
            if (_charIndex < _strLen) 
            {
                char currentChar = _str[_charIndex];

                // check that the currentChar is a space or the separator.  If not 
                // we have an error. this can happen in the quote case
                // that the char after the quotes string isn't a char. 
                if (!(currentChar == separator) && 
                    !Char.IsWhiteSpace(currentChar))
                { 
                    throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperExtraDataEncountered, _charIndex, _str));
                }

                // loop until hit a character that isn't 
                // an argument separator or whitespace.
                // !!!Todo: if more than one argSet throw an exception 
                int argSepCount = 0; 
                while (_charIndex < _strLen)
                { 
                    currentChar = _str[_charIndex];

                    if (currentChar == separator)
                    { 
                        _foundSeparator = true;
                        ++argSepCount; 
                        _charIndex++; 

                        if (argSepCount > 1) 
                        {
                            throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
                        }
                    } 
                    else if (Char.IsWhiteSpace(currentChar))
                    { 
                        ++_charIndex; 
                    }
                    else 
                    {
                        break;
                    }
                } 

                // if there was a separatorChar then we shouldn't be 
                // at the end of string or means there was a separator 
                // but there isn't an arg
 
                if (argSepCount > 0 && _charIndex >= _strLen)
                {
                    throw new System.InvalidOperationException(SR.Get(SRID.TokenizerHelperEmptyToken, _charIndex, _str));
                } 
            }
        } 
 
        // Helper to get the numeric list separator for a given IFormatProvider.
        // Separator is a comma [,] if the decimal separator is not a comma, or a semicolon [;] otherwise. 
        static internal char GetNumericListSeparator(IFormatProvider provider)
        {
            char numericSeparator = ',';
 
            // Get the NumberFormatInfo out of the provider, if possible
            // If the IFormatProvider doesn't not contain a NumberFormatInfo, then 
            // this method returns the current culture's NumberFormatInfo. 
            NumberFormatInfo numberFormat = NumberFormatInfo.GetInstance(provider);
 
            Debug.Assert(null != numberFormat);

            // Is the decimal separator is the same as the list separator?
            // If so, we use the ";". 
            if ((numberFormat.NumberDecimalSeparator.Length > 0) && (numericSeparator == numberFormat.NumberDecimalSeparator[0]))
            { 
                numericSeparator = ';'; 
            }
 
            return numericSeparator;
        }

        internal bool FoundSeparator 
        {
            get 
            { 
                return _foundSeparator;
            } 
        }

        char _quoteChar;
        char _argSeparator; 
        string _str;
        int _strLen; 
        int _charIndex; 
        internal int _currentTokenIndex;
        internal int _currentTokenLength; 
        bool _foundSeparator;
    }
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
TokenizerHelper.cs source code in C# .NET

Source code for the .NET framework in C#

Code:

Link Menu