Normalization.cs source code in C# .NET

Source code for the .NET framework in C#



/ Net / Net / 3.5.50727.3053 / DEVDIV / depot / DevDiv / releases / whidbey / netfxsp / ndp / clr / src / BCL / System / Text / Normalization.cs / 1 / Normalization.cs

                            // ==++== 
//   Copyright (c) Microsoft Corporation.  All rights reserved.
// ==--== 
namespace System.Text
    using System; 
    using System.Globalization;
    using System.Text; 
    using System.Runtime.CompilerServices;

    // This is the enumeration for Normalization Forms
    public enum NormalizationForm
        FormC    = 1, 
        FormD    = 2,
        FormKC   = 5, 
        FormKD   = 6

    internal enum ExtendedNormalizationForms 
        FormC    = 1, 
        FormD    = 2, 
        FormKC   = 5,
        FormKD   = 6, 
        FormIdna = 0xd,
        FormCDisallowUnassigned     = 0x101,
        FormDDisallowUnassigned     = 0x102,
        FormKCDisallowUnassigned    = 0x105, 
        FormKDDisallowUnassigned    = 0x106,
        FormIdnaDisallowUnassigned  = 0x10d 

    // This internal class wraps up our normalization behavior 

    internal class Normalization
        private static Normalization NFC; 
        private static Normalization NFD;
        private static Normalization NFKC; 
        private static Normalization NFKD; 
        private static Normalization IDNA;
        private static Normalization NFCDisallowUnassigned; 
        private static Normalization NFDDisallowUnassigned;
        private static Normalization NFKCDisallowUnassigned;
        private static Normalization NFKDDisallowUnassigned;
        private static Normalization IDNADisallowUnassigned; 

        private NormalizationForm normalizationForm; 
        // These are error codes we get back from the Normalization DLL
        private const int ERROR_SUCCESS = 0; 
        private const int ERROR_NOT_ENOUGH_MEMORY = 8;
        private const int ERROR_INVALID_PARAMETER = 87;
        private const int ERROR_INSUFFICIENT_BUFFER = 122;
        private const int ERROR_NO_UNICODE_TRANSLATION = 1113; 

        internal unsafe Normalization(NormalizationForm form, String strDataFile) 
            // Remember which form we are
            this.normalizationForm = form; 
            // Load the DLL
            if (!nativeLoadNormalizationDLL())
                // Unable to load the normalization DLL! 
                throw new ArgumentException(

            // Tell the DLL where to find our data 
            byte* pTables = GlobalizationAssembly.GetGlobalizationResourceBytePtr(
                typeof(Normalization).Assembly, strDataFile);
            if (pTables == null)
                // Unable to load the specified normalizationForm,
                // tables not loaded from file 
                throw new ArgumentException( 

            // All we have to do is let the .dll know how to load it, then
            // we can ignore the returned pointer.
            byte* objNorm = nativeNormalizationInitNormalization(form, pTables); 
            if (objNorm == null)
                // Unable to load the specified normalizationForm 
                // native library class not initialized correctly
                throw new OutOfMemoryException( 
        static internal Normalization GetNormalization(NormalizationForm form)
            switch ((ExtendedNormalizationForms)form) 
                case ExtendedNormalizationForms.FormC: 
                    return GetFormC();
                case ExtendedNormalizationForms.FormD:
                    return GetFormD();
                case ExtendedNormalizationForms.FormKC: 
                    return GetFormKC();
                case ExtendedNormalizationForms.FormKD: 
                    return GetFormKD(); 
                case ExtendedNormalizationForms.FormIdna:
                    return GetFormIDNA(); 
                case ExtendedNormalizationForms.FormCDisallowUnassigned:
                    return GetFormCDisallowUnassigned();
                case ExtendedNormalizationForms.FormDDisallowUnassigned:
                    return GetFormDDisallowUnassigned(); 
                case ExtendedNormalizationForms.FormKCDisallowUnassigned:
                    return GetFormKCDisallowUnassigned(); 
                case ExtendedNormalizationForms.FormKDDisallowUnassigned: 
                    return GetFormKDDisallowUnassigned();
                case ExtendedNormalizationForms.FormIdnaDisallowUnassigned: 
                    return GetFormIDNADisallowUnassigned();

            // They were supposed to have a form that we know about! 
            throw new ArgumentException(

        static internal Normalization GetFormC() 
            if (NFC != null)
                return NFC;
            NFC = new Normalization(NormalizationForm.FormC, "normnfc.nlp");
            return NFC; 

        static internal Normalization GetFormD() 
            if (NFD != null)
                return NFD;
            NFD = new Normalization(NormalizationForm.FormD, "normnfd.nlp");
            return NFD; 

        static internal Normalization GetFormKC() 
            if (NFKC != null)
                return NFKC;
            NFKC = new Normalization(NormalizationForm.FormKC, "normnfkc.nlp");
            return NFKC; 

        static internal Normalization GetFormKD() 
            if (NFKD != null)
                return NFKD;
            NFKD = new Normalization(NormalizationForm.FormKD, "normnfkd.nlp");
            return NFKD; 

        static internal Normalization GetFormIDNA() 
            if (IDNA != null)
                return IDNA;
            IDNA = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormIdna, "normidna.nlp");
            return IDNA; 

        static internal Normalization GetFormCDisallowUnassigned() 
            if (NFCDisallowUnassigned != null)
                return NFCDisallowUnassigned;
            NFCDisallowUnassigned = new Normalization(
                (NormalizationForm)ExtendedNormalizationForms.FormCDisallowUnassigned, "normnfc.nlp"); 
            return NFCDisallowUnassigned; 
        static internal Normalization GetFormDDisallowUnassigned()
            if (NFDDisallowUnassigned != null)
                return NFDDisallowUnassigned; 

            NFDDisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormDDisallowUnassigned, "normnfd.nlp"); 
            return NFDDisallowUnassigned;

        static internal Normalization GetFormKCDisallowUnassigned()
            if (NFKCDisallowUnassigned != null) 
                return NFKCDisallowUnassigned;
            NFKCDisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormKCDisallowUnassigned, "normnfkc.nlp");
            return NFKCDisallowUnassigned; 

        static internal Normalization GetFormKDDisallowUnassigned()
            if (NFKDDisallowUnassigned != null)
                return NFKDDisallowUnassigned; 
            NFKDDisallowUnassigned = new Normalization(
                (NormalizationForm)ExtendedNormalizationForms.FormKDDisallowUnassigned, "normnfkd.nlp"); 
            return NFKDDisallowUnassigned;

        static internal Normalization GetFormIDNADisallowUnassigned() 
            if (IDNADisallowUnassigned!= null) 
                return IDNADisallowUnassigned; 

            IDNADisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormIdnaDisallowUnassigned, "normidna.nlp");
            return IDNADisallowUnassigned;
        internal static bool IsNormalized(String strInput, NormalizationForm normForm)
            return GetNormalization(normForm).IsNormalized(strInput); 
        private bool IsNormalized(String strInput)
            if (strInput == null)
                throw new ArgumentNullException( 
                    Environment.GetResourceString("ArgumentNull_String"), "strInput");
            int iError = ERROR_SUCCESS; 
            int iTest = nativeNormalizationIsNormalizedString(
                normalizationForm, ref iError, strInput, strInput.Length); 

                // Success doesn't need to do anything 
                case ERROR_SUCCESS:
                // Do appropriate stuff for the individual errors:
                // Only possible value here is ERROR_NO_UNICODE_TRANSLATION 
                case ERROR_NO_UNICODE_TRANSLATION:
                    throw new ArgumentException(
                        Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ),
                case ERROR_NOT_ENOUGH_MEMORY:
                    throw new OutOfMemoryException( 
                    throw new InvalidOperationException( 
                        Environment.GetResourceString("UnknownError_Num", iError));

            // Bit 1 is true, 0 is false from our return value. 
            return ((iTest & 1) == 1);
        internal static String Normalize(String strInput, NormalizationForm normForm)
            return GetNormalization(normForm).Normalize(strInput);

        internal String Normalize(String strInput) 
            if (strInput == null) 
                throw new ArgumentNullException( "strInput", 
            // Guess our buffer size first
            int iLength = GuessLength(strInput);

            // Don't break for empty strings (only possible for D & KD and not really possible at that) 
            if (iLength == 0) return String.Empty;
            // Someplace to stick our buffer 
            char[] cBuffer = null;
            int iError = ERROR_INSUFFICIENT_BUFFER;
            while (iError == ERROR_INSUFFICIENT_BUFFER)
                // (re)allocation buffer and normalize string 
                cBuffer = new char[iLength];
                iLength = nativeNormalizationNormalizeString( 
                    normalizationForm, ref iError, 
                    strInput, strInput.Length, cBuffer, cBuffer.Length);
                // Could have an error (actually it'd be quite hard to have an error here)
                if (iError != ERROR_SUCCESS)
                        // Do appropriate stuff for the individual errors: 
                        case ERROR_INSUFFICIENT_BUFFER: 
                            BCLDebug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length");
                        case ERROR_NO_UNICODE_TRANSLATION:
                            // Illegal code point or order found.  Ie: FFFE or D800 D800, etc.
                            throw new ArgumentException(
                                Environment.GetResourceString("Argument_InvalidCharSequence", iLength ), 
                        case ERROR_NOT_ENOUGH_MEMORY: 
                            throw new OutOfMemoryException( 
                        case ERROR_INVALID_PARAMETER: 
                            // Shouldn't have invalid parameters here unless we have a bug, drop through...
                            // We shouldn't get here...
                            throw new InvalidOperationException( 
                                Environment.GetResourceString("UnknownError_Num", iError));
            // Copy our buffer into our new string, which will be the appropriate size
            String strReturn = new String(cBuffer, 0, iLength);

            // Return our output string 
            return strReturn;
        internal int GuessLength(String strInput)
            if (strInput == null)
                throw new ArgumentNullException( "strInput",
            // Get our guess
            int iError = 0; 
            int iGuess = nativeNormalizationNormalizeString( 
                normalizationForm, ref iError, strInput, strInput.Length, null, 0);
            // Could have an error (actually it'd be quite hard to have an error here)
            BCLDebug.Assert(iError == ERROR_SUCCESS, "GuessLength() shouldn't return errors.");
            if (iError != ERROR_SUCCESS)
                // We shouldn't really be able to get here..., guessing length is
                // a trivial math function... 
                // Can't really be Out of Memory, but just in case: 
                if (iError == ERROR_NOT_ENOUGH_MEMORY)
                    throw new OutOfMemoryException( 

                // Who knows what happened?  Not us!
                throw new InvalidOperationException( 
                    Environment.GetResourceString("UnknownError_Num", iError));
            // Well, we guessed it
            return iGuess; 

        unsafe private static extern bool nativeLoadNormalizationDLL(); 

        unsafe private static extern int nativeNormalizationNormalizeString( 
            NormalizationForm NormForm, ref int iError,
            String lpSrcString, int cwSrcLength, 
            char[] lpDstString, int cwDstLength);

        unsafe private static extern int nativeNormalizationIsNormalizedString( 
            NormalizationForm NormForm, ref int iError,
            String lpString, int cwLength); 
        unsafe private static extern byte* nativeNormalizationInitNormalization( 
            NormalizationForm NormForm, byte* pTableData);


// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// ==++== 
//   Copyright (c) Microsoft Corporation.  All rights reserved.
// ==--== 
namespace System.Text
    using System; 
    using System.Globalization;
    using System.Text; 
    using System.Runtime.CompilerServices;

    // This is the enumeration for Normalization Forms
    public enum NormalizationForm
        FormC    = 1, 
        FormD    = 2,
        FormKC   = 5, 
        FormKD   = 6

    internal enum ExtendedNormalizationForms 
        FormC    = 1, 
        FormD    = 2, 
        FormKC   = 5,
        FormKD   = 6, 
        FormIdna = 0xd,
        FormCDisallowUnassigned     = 0x101,
        FormDDisallowUnassigned     = 0x102,
        FormKCDisallowUnassigned    = 0x105, 
        FormKDDisallowUnassigned    = 0x106,
        FormIdnaDisallowUnassigned  = 0x10d 

    // This internal class wraps up our normalization behavior 

    internal class Normalization
        private static Normalization NFC; 
        private static Normalization NFD;
        private static Normalization NFKC; 
        private static Normalization NFKD; 
        private static Normalization IDNA;
        private static Normalization NFCDisallowUnassigned; 
        private static Normalization NFDDisallowUnassigned;
        private static Normalization NFKCDisallowUnassigned;
        private static Normalization NFKDDisallowUnassigned;
        private static Normalization IDNADisallowUnassigned; 

        private NormalizationForm normalizationForm; 
        // These are error codes we get back from the Normalization DLL
        private const int ERROR_SUCCESS = 0; 
        private const int ERROR_NOT_ENOUGH_MEMORY = 8;
        private const int ERROR_INVALID_PARAMETER = 87;
        private const int ERROR_INSUFFICIENT_BUFFER = 122;
        private const int ERROR_NO_UNICODE_TRANSLATION = 1113; 

        internal unsafe Normalization(NormalizationForm form, String strDataFile) 
            // Remember which form we are
            this.normalizationForm = form; 
            // Load the DLL
            if (!nativeLoadNormalizationDLL())
                // Unable to load the normalization DLL! 
                throw new ArgumentException(

            // Tell the DLL where to find our data 
            byte* pTables = GlobalizationAssembly.GetGlobalizationResourceBytePtr(
                typeof(Normalization).Assembly, strDataFile);
            if (pTables == null)
                // Unable to load the specified normalizationForm,
                // tables not loaded from file 
                throw new ArgumentException( 

            // All we have to do is let the .dll know how to load it, then
            // we can ignore the returned pointer.
            byte* objNorm = nativeNormalizationInitNormalization(form, pTables); 
            if (objNorm == null)
                // Unable to load the specified normalizationForm 
                // native library class not initialized correctly
                throw new OutOfMemoryException( 
        static internal Normalization GetNormalization(NormalizationForm form)
            switch ((ExtendedNormalizationForms)form) 
                case ExtendedNormalizationForms.FormC: 
                    return GetFormC();
                case ExtendedNormalizationForms.FormD:
                    return GetFormD();
                case ExtendedNormalizationForms.FormKC: 
                    return GetFormKC();
                case ExtendedNormalizationForms.FormKD: 
                    return GetFormKD(); 
                case ExtendedNormalizationForms.FormIdna:
                    return GetFormIDNA(); 
                case ExtendedNormalizationForms.FormCDisallowUnassigned:
                    return GetFormCDisallowUnassigned();
                case ExtendedNormalizationForms.FormDDisallowUnassigned:
                    return GetFormDDisallowUnassigned(); 
                case ExtendedNormalizationForms.FormKCDisallowUnassigned:
                    return GetFormKCDisallowUnassigned(); 
                case ExtendedNormalizationForms.FormKDDisallowUnassigned: 
                    return GetFormKDDisallowUnassigned();
                case ExtendedNormalizationForms.FormIdnaDisallowUnassigned: 
                    return GetFormIDNADisallowUnassigned();

            // They were supposed to have a form that we know about! 
            throw new ArgumentException(

        static internal Normalization GetFormC() 
            if (NFC != null)
                return NFC;
            NFC = new Normalization(NormalizationForm.FormC, "normnfc.nlp");
            return NFC; 

        static internal Normalization GetFormD() 
            if (NFD != null)
                return NFD;
            NFD = new Normalization(NormalizationForm.FormD, "normnfd.nlp");
            return NFD; 

        static internal Normalization GetFormKC() 
            if (NFKC != null)
                return NFKC;
            NFKC = new Normalization(NormalizationForm.FormKC, "normnfkc.nlp");
            return NFKC; 

        static internal Normalization GetFormKD() 
            if (NFKD != null)
                return NFKD;
            NFKD = new Normalization(NormalizationForm.FormKD, "normnfkd.nlp");
            return NFKD; 

        static internal Normalization GetFormIDNA() 
            if (IDNA != null)
                return IDNA;
            IDNA = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormIdna, "normidna.nlp");
            return IDNA; 

        static internal Normalization GetFormCDisallowUnassigned() 
            if (NFCDisallowUnassigned != null)
                return NFCDisallowUnassigned;
            NFCDisallowUnassigned = new Normalization(
                (NormalizationForm)ExtendedNormalizationForms.FormCDisallowUnassigned, "normnfc.nlp"); 
            return NFCDisallowUnassigned; 
        static internal Normalization GetFormDDisallowUnassigned()
            if (NFDDisallowUnassigned != null)
                return NFDDisallowUnassigned; 

            NFDDisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormDDisallowUnassigned, "normnfd.nlp"); 
            return NFDDisallowUnassigned;

        static internal Normalization GetFormKCDisallowUnassigned()
            if (NFKCDisallowUnassigned != null) 
                return NFKCDisallowUnassigned;
            NFKCDisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormKCDisallowUnassigned, "normnfkc.nlp");
            return NFKCDisallowUnassigned; 

        static internal Normalization GetFormKDDisallowUnassigned()
            if (NFKDDisallowUnassigned != null)
                return NFKDDisallowUnassigned; 
            NFKDDisallowUnassigned = new Normalization(
                (NormalizationForm)ExtendedNormalizationForms.FormKDDisallowUnassigned, "normnfkd.nlp"); 
            return NFKDDisallowUnassigned;

        static internal Normalization GetFormIDNADisallowUnassigned() 
            if (IDNADisallowUnassigned!= null) 
                return IDNADisallowUnassigned; 

            IDNADisallowUnassigned = new Normalization( 
                (NormalizationForm)ExtendedNormalizationForms.FormIdnaDisallowUnassigned, "normidna.nlp");
            return IDNADisallowUnassigned;
        internal static bool IsNormalized(String strInput, NormalizationForm normForm)
            return GetNormalization(normForm).IsNormalized(strInput); 
        private bool IsNormalized(String strInput)
            if (strInput == null)
                throw new ArgumentNullException( 
                    Environment.GetResourceString("ArgumentNull_String"), "strInput");
            int iError = ERROR_SUCCESS; 
            int iTest = nativeNormalizationIsNormalizedString(
                normalizationForm, ref iError, strInput, strInput.Length); 

                // Success doesn't need to do anything 
                case ERROR_SUCCESS:
                // Do appropriate stuff for the individual errors:
                // Only possible value here is ERROR_NO_UNICODE_TRANSLATION 
                case ERROR_NO_UNICODE_TRANSLATION:
                    throw new ArgumentException(
                        Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ),
                case ERROR_NOT_ENOUGH_MEMORY:
                    throw new OutOfMemoryException( 
                    throw new InvalidOperationException( 
                        Environment.GetResourceString("UnknownError_Num", iError));

            // Bit 1 is true, 0 is false from our return value. 
            return ((iTest & 1) == 1);
        internal static String Normalize(String strInput, NormalizationForm normForm)
            return GetNormalization(normForm).Normalize(strInput);

        internal String Normalize(String strInput) 
            if (strInput == null) 
                throw new ArgumentNullException( "strInput", 
            // Guess our buffer size first
            int iLength = GuessLength(strInput);

            // Don't break for empty strings (only possible for D & KD and not really possible at that) 
            if (iLength == 0) return String.Empty;
            // Someplace to stick our buffer 
            char[] cBuffer = null;
            int iError = ERROR_INSUFFICIENT_BUFFER;
            while (iError == ERROR_INSUFFICIENT_BUFFER)
                // (re)allocation buffer and normalize string 
                cBuffer = new char[iLength];
                iLength = nativeNormalizationNormalizeString( 
                    normalizationForm, ref iError, 
                    strInput, strInput.Length, cBuffer, cBuffer.Length);
                // Could have an error (actually it'd be quite hard to have an error here)
                if (iError != ERROR_SUCCESS)
                        // Do appropriate stuff for the individual errors: 
                        case ERROR_INSUFFICIENT_BUFFER: 
                            BCLDebug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length");
                        case ERROR_NO_UNICODE_TRANSLATION:
                            // Illegal code point or order found.  Ie: FFFE or D800 D800, etc.
                            throw new ArgumentException(
                                Environment.GetResourceString("Argument_InvalidCharSequence", iLength ), 
                        case ERROR_NOT_ENOUGH_MEMORY: 
                            throw new OutOfMemoryException( 
                        case ERROR_INVALID_PARAMETER: 
                            // Shouldn't have invalid parameters here unless we have a bug, drop through...
                            // We shouldn't get here...
                            throw new InvalidOperationException( 
                                Environment.GetResourceString("UnknownError_Num", iError));
            // Copy our buffer into our new string, which will be the appropriate size
            String strReturn = new String(cBuffer, 0, iLength);

            // Return our output string 
            return strReturn;
        internal int GuessLength(String strInput)
            if (strInput == null)
                throw new ArgumentNullException( "strInput",
            // Get our guess
            int iError = 0; 
            int iGuess = nativeNormalizationNormalizeString( 
                normalizationForm, ref iError, strInput, strInput.Length, null, 0);
            // Could have an error (actually it'd be quite hard to have an error here)
            BCLDebug.Assert(iError == ERROR_SUCCESS, "GuessLength() shouldn't return errors.");
            if (iError != ERROR_SUCCESS)
                // We shouldn't really be able to get here..., guessing length is
                // a trivial math function... 
                // Can't really be Out of Memory, but just in case: 
                if (iError == ERROR_NOT_ENOUGH_MEMORY)
                    throw new OutOfMemoryException( 

                // Who knows what happened?  Not us!
                throw new InvalidOperationException( 
                    Environment.GetResourceString("UnknownError_Num", iError));
            // Well, we guessed it
            return iGuess; 

        unsafe private static extern bool nativeLoadNormalizationDLL(); 

        unsafe private static extern int nativeNormalizationNormalizeString( 
            NormalizationForm NormForm, ref int iError,
            String lpSrcString, int cwSrcLength, 
            char[] lpDstString, int cwDstLength);

        unsafe private static extern int nativeNormalizationIsNormalizedString( 
            NormalizationForm NormForm, ref int iError,
            String lpString, int cwLength); 
        unsafe private static extern byte* nativeNormalizationInitNormalization( 
            NormalizationForm NormForm, byte* pTableData);


// File provided for Reference Use Only by Microsoft Corporation (c) 2007.


Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK