Classification.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ DotNET / DotNET / 8.0 / untmp / WIN_WINDOWS / lh_tools_devdiv_wpf / Windows / wcp / Core / MS / Internal / Classification.cs / 1 / Classification.cs

                            //------------------------------------------------------------------------ 
//
//  Microsoft Windows Client Platform
//  Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//  File:      Classification.cs
// 
//  Contents:  Unicode classification entry point 
//
//  Created:   7-14-2002 [....] ([....]) 
//
//-----------------------------------------------------------------------

using System; 
using System.Diagnostics;
using MS.Internal; 
using System.Windows; 
using System.Security;
using System.Collections; 
using System.Runtime.InteropServices;
using System.Windows.Media.TextFormatting;

namespace MS.Internal 
{
    ///  
    /// Hold the classification table pointers. 
    /// 
    internal static class Classification 
    {
        /// 
        /// This structure has a cloned one in the unmanaged side. Doing any change in this
        /// structure should have the same change on unmanaged side too. 
        /// 
        [StructLayout(LayoutKind.Sequential)] 
        internal unsafe struct CombiningMarksClassificationData 
        {
            internal IntPtr CombiningCharsIndexes; // Two dimentional array of base char classes, 
            internal int    CombiningCharsIndexesTableLength;
            internal int    CombiningCharsIndexesTableSegmentLength;

            internal IntPtr CombiningMarkIndexes; // Combining mark classes array, with length = length 
            internal int    CombiningMarkIndexesTableLength;
 
            internal IntPtr CombinationChars; // Two dimentional array of combined characters 
            internal int    CombinationCharsBaseCount;
            internal int    CombinationCharsMarkCount; 
        }

        /// 
        /// This structure has a cloned one in the unmanaged side. doing any change in  that 
        /// structure should have same change in the unmanaged side too.
        ///  
        [StructLayout(LayoutKind.Sequential)] 
        internal unsafe struct RawClassificationTables
        { 
            internal IntPtr UnicodeClasses;
            internal IntPtr CharacterAttributes;
            internal IntPtr Mirroring;
            internal CombiningMarksClassificationData CombiningMarksClassification; 
        };
 
        /// 
        /// Critical - as this code performs an elevation.
        /// Safe - This is an entry point that grabs several RO shared pages for text layout 
        /// purpose. Reading it directly won't yield any readable content to the reader, nor
        /// will it yield any useful information to outside world as these are just loads of
        /// Unicode classification hex data that only Text engine knows how to use � hence,
        /// not an Information Disclosure threat. 
        ///
        [SecurityCritical, SecurityTreatAsSafe] 
        [SuppressUnmanagedCodeSecurity] 
        [DllImport(Microsoft.Internal.DllImport.PresentationNative, EntryPoint="MILGetClassificationTables")]
        internal static extern void MILGetClassificationTables(out RawClassificationTables ct); 
        /// 
        ///    Critical: This accesses unsafe code and retrieves pointers that it stores locally
        ///    The pointers retrieved are not validated for correctness and they are later dereferenced.
        ///    TreatAsSafe: The constructor is safe since it simply stores these pointers. The risk here 
        ///    in the future is not of these pointers being spoofed since they are not settable from outside.
        ///  
        [SecurityCritical,SecurityTreatAsSafe] 
        static Classification()
        { 
            unsafe
            {
                RawClassificationTables ct = new RawClassificationTables();
                MILGetClassificationTables(out ct); 

                _unicodeClassTable   = new SecurityCriticalData(ct.UnicodeClasses); 
                _charAttributeTable  = new SecurityCriticalData(ct.CharacterAttributes); 
                _mirroredCharTable   = new SecurityCriticalData(ct.Mirroring);
 
                _combiningMarksClassification = new SecurityCriticalData(ct.CombiningMarksClassification);
            }
        }
 
        /// 
        /// Lookup Unicode character class for a Unicode UTF16 value 
        ///  
        /// 
        ///    Critical: This accesses unsafe code and dereferences a location in 
        ///    a prepopulated Array. The risk is you might derefence a bogus memory
        ///    location.
        ///    TreatAsSafe: This code is ok since it reduces codepoint to one of 256 possible
        ///    values and will always succeed. Also this information is ok to expose. 
        /// 
        [SecurityCritical, SecurityTreatAsSafe] 
        static public short GetUnicodeClassUTF16(char codepoint) 
        {
            unsafe 
            {
                short **plane0 = UnicodeClassTable[0];
                Invariant.Assert((long)plane0 >= (long)UnicodeClass.Max);
 
                short* pcc = plane0[codepoint >> 8];
                return ((long) pcc < (long) UnicodeClass.Max ? 
                    (short)pcc : pcc[codepoint & 0xFF]); 
            }
        } 


        /// 
        /// Lookup Unicode character class for a Unicode scalar value 
        /// 
        ///  
        ///    Critical: This accesses unsafe code and derefences a pointer retrieved from unmanaged code 
        ///    TreatAsSafe: There is bounds checking in place and this dereferences a valid structure which
        ///    is guaranteed to be populated 
        /// 
        [SecurityCritical,SecurityTreatAsSafe]
        static public short GetUnicodeClass(int unicodeScalar)
        { 
            unsafe
            { 
                Invariant.Assert(unicodeScalar >= 0 && unicodeScalar <= 0x10FFFF); 
                short **ppcc = UnicodeClassTable[((unicodeScalar >> 16) & 0xFF) % 17];
 
                if ((long)ppcc < (long)UnicodeClass.Max)
                    return (short)ppcc;

                short *pcc = ppcc[(unicodeScalar & 0xFFFF) >> 8]; 

                if ((long)pcc < (long)UnicodeClass.Max) 
                    return (short)pcc; 

                return pcc[unicodeScalar & 0xFF]; 
            }
        }

 
        /// 
        /// Compute Unicode scalar value from unicode codepoint stream 
        ///  
        static internal int UnicodeScalar(
            CharacterBufferRange unicodeString, 
            out int              sizeofChar
            )
        {
            Invariant.Assert(unicodeString.CharacterBuffer != null && unicodeString.Length > 0); 

            int ch = unicodeString[0]; 
            sizeofChar = 1; 

            if (    unicodeString.Length >= 2 
                &&  (ch & 0xFC00) == 0xD800
                &&  (unicodeString[1] & 0xFC00) == 0xDC00
                )
            { 
                ch = (((ch & 0x03FF) << 10) | (unicodeString[1] & 0x3FF)) + 0x10000;
                sizeofChar++; 
            } 

            return ch; 
        }


        ///  
        /// Check whether the character is combining mark
        ///  
        ///  
        ///    Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable).
        ///    It trusts the value passed in to derfence the table with no implicit bounds or validity checks. 
        ///    TreatAsSafe: This information is safe to expose at the same time the unicodeScalar passed in
        ///    is validated for bounds
        /// 
        [SecurityCritical,SecurityTreatAsSafe] 
        static public bool IsCombining(int unicodeScalar)
        { 
            unsafe 
            {
                byte itemClass = Classification.CharAttributeTable[GetUnicodeClass(unicodeScalar)].ItemClass; 

                return itemClass == (byte)ItemClass.SimpleMarkClass
                    || itemClass == (byte)ItemClass.ComplexMarkClass;
            } 
        }
 
        ///  
        /// Check whether the character is a joiner character
        ///  
        /// 
        ///    Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable).
        ///    It trusts the value passed in to derfence the table with no implicit bounds or validity checks.
        ///    TreatAsSafe: This information is safe to expose at the same time the unicodeScalar passed in 
        ///    is validated for bounds
        ///  
        [SecurityCritical,SecurityTreatAsSafe] 
        static public bool IsJoiner(int unicodeScalar)
        { 
            unsafe
            {
                byte itemClass = Classification.CharAttributeTable[GetUnicodeClass(unicodeScalar)].ItemClass;
 
                return itemClass == (byte) ItemClass.JoinerClass;
            } 
        } 

        ///  
        /// Scan UTF16 character string until a character with specified attributes is found
        /// 
        /// character index of first character matching the attribute.
        ///  
        ///    Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable).
        ///    It keeps accesing a buffer with no validation in terms of the variables passed in. 
        ///    TreatAsSafe: This information is safe to expose, as in the worst case it tells you information 
        ///    of where the next UTF16 character is. Also the constructor for characterbuffer can be one of three
        ///    a string, a char array or an unmanaged char*. The third case is critical and tightly controlled 
        ///    so the risk of bogus length is significantly mitigated.
        /// 
        [SecurityCritical,SecurityTreatAsSafe]
        static public int AdvanceUntilUTF16( 
            CharacterBuffer     charBuffer,
            int                 offsetToFirstChar, 
            int                 stringLength, 
            ushort              mask,
            out ushort          charFlags 
            )
        {
            int i = offsetToFirstChar;
            int limit = offsetToFirstChar + stringLength; 
            charFlags = 0;
 
            while (i < limit) 
            {
                unsafe 
                {
                    ushort flags = (ushort)Classification.CharAttributeTable[(int)GetUnicodeClassUTF16(charBuffer[i])].Flags;

                    if((flags & mask) != 0) 
                        break;
 
                    charFlags |= flags; 
                }
                i++; 
            }
            return i - offsetToFirstChar;
        }
 
        /// 
        /// Scan UTF16 character string until a character without specified attributes is found 
        ///  
        /// character index of first character without the attribute.
        ///  
        ///    Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). It acceses
        ///    elements in an array with no type checking.
        ///    TreatAsSafe: This code exposes the index of the next non UTF16 character in a run. This is ok to expose
        ///    Also the calls to CharBuffer and CahrAttribute do the requisite bounds checking. 
        /// 
        [SecurityCritical,SecurityTreatAsSafe] 
        static public int AdvanceWhileUTF16( 
            CharacterBuffer charBuffer,
            int offsetToFirstChar, 
            int stringLength,
            ushort mask,
            out ushort charFlags
            ) 
        {
            int i = offsetToFirstChar; 
            int limit = offsetToFirstChar + stringLength; 
            charFlags = 0;
 
            while (i < limit)
            {
                unsafe
                { 
                    ushort flags = (ushort)Classification.CharAttributeTable[(int)GetUnicodeClassUTF16(charBuffer[i])].Flags;
 
                    if ((flags & mask) == 0) 
                        break;
 
                    charFlags |= flags;
                }
                i++;
            } 
            return i - offsetToFirstChar;
        } 
 
        /// 
        /// Scan character string until a character that is not the specified ItemClass is found 
        /// 
        /// character index of first character that is not the specified ItemClass
        /// 
        ///    Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). It acceses 
        ///    elements in an array with no type checking.
        ///    TreatAsSafe: This code exposes the index of the next non UTF16 character in a run. This is ok to expose 
        ///    Also the calls to CharBuffer and CahrAttribute do the requisite bounds checking. 
        /// 
        [SecurityCritical,SecurityTreatAsSafe] 
        static public int AdvanceWhile(
            CharacterBufferRange unicodeString,
            ItemClass            itemClass
            ) 
        {
            int i     = 0; 
            int limit = unicodeString.Length; 
            int sizeofChar = 0;
 
            while (i < limit)
            {
                int ch = Classification.UnicodeScalar(
                    new CharacterBufferRange(unicodeString, i, limit - i), 
                    out sizeofChar
                    ); 
 
                unsafe
                { 
                    byte currentClass = (byte) Classification.CharAttributeTable[(int)GetUnicodeClass(ch)].ItemClass;
                    if (currentClass != (byte) itemClass)
                        break;
                } 

                i += sizeofChar; 
            } 

            return i; 
        }

        /// 
        /// Lookup mirrored counterpart character of the specified UTF16 character 
        /// 
        ///  
        ///     Critical: This code derefernces a pointer and indexes into it 
        ///     TreatAsSafe: It accesses the structure to return a valid entry from one of 256 entries.
        ///  
        [SecurityCritical,SecurityTreatAsSafe]
        static public int GetMirroredCharacterUTF16(char codepoint)
        {
            unsafe 
            {
                char* pcc = Classification.MirroredCharTable[codepoint >> 8]; 
                if (pcc == null) 
                    return codepoint;
 
                int mirroredCodepoint = pcc[codepoint & 0xFF];
                return mirroredCodepoint == 0 ? codepoint : mirroredCodepoint;
            }
        } 

        ///  
        ///    Critical: This accesses unsafe code and returns a pointer 
        /// 
        private static unsafe short*** UnicodeClassTable 
        {
            [SecurityCritical]
            get { return (short***)_unicodeClassTable.Value; }
        } 
        /// 
        ///    Critical: This accesses unsafe code and returns a pointer 
        ///  
        private static unsafe CharacterAttribute* CharAttributeTable
        { 
            [SecurityCritical]
            get { return (CharacterAttribute*)_charAttributeTable.Value; }
        }
 
        /// 
        ///    Critical: This accesses unsafe code and indexes into an array 
        ///    Safe    : This method does bound check on the input char class. 
        /// 
        [SecurityCritical, SecurityTreatAsSafe] 
        internal static CharacterAttribute CharAttributeOf(int charClass)
        {
            unsafe
            { 
                Invariant.Assert(charClass >= 0 && charClass < (int) UnicodeClass.Max);
                return CharAttributeTable[charClass]; 
            } 
        }
        ///  
        ///    Critical: This accesses unsafe code and returns a pointer
        /// 
        private static unsafe char** MirroredCharTable
        { 
            [ SecurityCritical]
            get { return (char**)_mirroredCharTable.Value; } 
        } 

        ///  
        ///    Critical: This accesses unsafe code and returns a variable retrieved under an elevation.
        ///    TreatAsSafe: This data is safe to return at the same time all this structure contains is
        ///    IntPtr. If you try to do unsafe operations with the IntPtr you require unsafe code.
        ///  
        [SecurityCritical,SecurityTreatAsSafe]
        internal static unsafe void GetCombiningMarksClassificationData( 
                                    out CombiningMarksClassificationData combiningMarksClassification) 
        {
            combiningMarksClassification = _combiningMarksClassification.Value; 
        }


        static private readonly SecurityCriticalData  _unicodeClassTable; 
        static private readonly SecurityCriticalData _charAttributeTable;
        static private readonly SecurityCriticalData _mirroredCharTable; 
        static private readonly SecurityCriticalData _combiningMarksClassification; 
    }
} 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.


                        

Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK