Code:
/ Dotnetfx_Vista_SP2 / Dotnetfx_Vista_SP2 / 8.0.50727.4016 / DEVDIV / depot / DevDiv / releases / whidbey / NetFxQFE / ndp / clr / src / BCL / System / Globalization / CharUnicodeInfo.cs / 1 / CharUnicodeInfo.cs
// ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Reflection; // Only statics, does not need to be marked with the serializable attribute public sealed class CharUnicodeInfo : System.Object { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* m_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* m_pCategoryLevel1Index; unsafe static byte* m_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in m_pNumericValues. unsafe static ushort* m_pNumericLevel1Index; // The numeric value table, which is indexed by m_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* m_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* m_pNumericValues; // The digit value table, which is indexed by m_pNumericLevel1Index. It shares the same indice as m_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* m_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // unsafe static CharUnicodeInfo() { m_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)m_pDataTable; // Set up the native pointer to different part of the tables. m_pCategoryLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToCategoriesIndex); m_pCategoriesValue = (byte*) (m_pDataTable + mainHeader->OffsetToCategoriesValue); m_pNumericLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToNumbericIndex); m_pNumericValues = (byte*) (m_pDataTable + mainHeader->OffsetToNumbericValue); m_pDigitValues = (DigitValues*) (m_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(m_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Define a private ctor so that compiler won't generate a default public ctor for us. // //////////////////////////////////////////////////////////////////////// private CharUnicodeInfo() { } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(s.Length > 0, "s.Length > 0"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { BCLDebug.Assert(s != null, "s!=null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static double InternalGetNumericValue(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(m_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static DigitValues* InternalGetDigitValues(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(m_pDigitValues[pBytePtr[(ch & 0x000f)]]); } internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = m_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //BCLDebug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { BCLDebug.Assert(value != null, "value can not be null"); BCLDebug.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { BCLDebug.Assert(str != null, "str can not be null"); BCLDebug.Assert(str.Length > 0, "str.Length > 0");; BCLDebug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { BCLDebug.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== //////////////////////////////////////////////////////////////////////////// // // Class: CharacterInfo // // Purpose: This class implements a set of methods for retrieving // character type information. Character type information is // independent of culture and region. // // Date: August 12, 1998 // //////////////////////////////////////////////////////////////////////////// namespace System.Globalization { //This class has only static members and therefore doesn't need to be serialized. using System; using System.Threading; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using System.Reflection; // Only statics, does not need to be marked with the serializable attribute public sealed class CharUnicodeInfo : System.Object { //--------------------------------------------------------------------// // Internal Information // //-------------------------------------------------------------------// // // Native methods to access the Unicode category data tables in charinfo.nlp. // internal const char HIGH_SURROGATE_START = '\ud800'; internal const char HIGH_SURROGATE_END = '\udbff'; internal const char LOW_SURROGATE_START = '\udc00'; internal const char LOW_SURROGATE_END = '\udfff'; internal const int UNICODE_CATEGORY_OFFSET = 0; internal const int BIDI_CATEGORY_OFFSET = 1; // The base pointer of the data table unsafe static byte* m_pDataTable; // The native pointer to the 12:4:4 index table of the Unicode cateogry data. unsafe static ushort* m_pCategoryLevel1Index; unsafe static byte* m_pCategoriesValue; // The native pointer to the 12:4:4 index table of the Unicode numeric data. // The value of this index table is an index into the real value table stored in m_pNumericValues. unsafe static ushort* m_pNumericLevel1Index; // The numeric value table, which is indexed by m_pNumericLevel1Index. // Every item contains the value for numeric value. // unsafe static double* m_pNumericValues; // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. unsafe static byte* m_pNumericValues; // The digit value table, which is indexed by m_pNumericLevel1Index. It shares the same indice as m_pNumericValues. // Every item contains the value for decimal digit/digit value. unsafe static DigitValues* m_pDigitValues; internal const String UNICODE_INFO_FILE_NAME = "charinfo.nlp"; // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff. internal const int UNICODE_PLANE01_START = 0x10000; // // This is the header for the native data table that we load from UNICODE_INFO_FILE_NAME. // // Excplicit layout is used here since a syntax like char[16] can not be used in sequential layout. [StructLayout(LayoutKind.Explicit)] internal unsafe struct UnicodeDataHeader { [FieldOffset(0)] internal char TableName; // WCHAR[16] [FieldOffset(0x20)] internal ushort version; // WORD[4] [FieldOffset(0x28)] internal uint OffsetToCategoriesIndex; // DWORD [FieldOffset(0x2c)] internal uint OffsetToCategoriesValue; // DWORD [FieldOffset(0x30)] internal uint OffsetToNumbericIndex; // DWORD [FieldOffset(0x34)] internal uint OffsetToDigitValue; // DWORD [FieldOffset(0x38)] internal uint OffsetToNumbericValue; // DWORD } // NOTE: It's important to specify pack size here, since the size of the structure is 2 bytes. Otherwise, // the default pack size will be 4. [StructLayout(LayoutKind.Sequential, Pack=2)] internal struct DigitValues { internal sbyte decimalDigit; internal sbyte digit; } //We need to allocate the underlying table that provides us with the information that we //use. We allocate this once in the class initializer and then we don't need to worry //about it again. // unsafe static CharUnicodeInfo() { m_pDataTable = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, UNICODE_INFO_FILE_NAME); UnicodeDataHeader* mainHeader = (UnicodeDataHeader*)m_pDataTable; // Set up the native pointer to different part of the tables. m_pCategoryLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToCategoriesIndex); m_pCategoriesValue = (byte*) (m_pDataTable + mainHeader->OffsetToCategoriesValue); m_pNumericLevel1Index = (ushort*) (m_pDataTable + mainHeader->OffsetToNumbericIndex); m_pNumericValues = (byte*) (m_pDataTable + mainHeader->OffsetToNumbericValue); m_pDigitValues = (DigitValues*) (m_pDataTable + mainHeader->OffsetToDigitValue); // Go to native side to make sure the native CharacterInfoTable pointer in the native side is initialized. nativeInitTable(m_pDataTable); } //////////////////////////////////////////////////////////////////////// // // Define a private ctor so that compiler won't generate a default public ctor for us. // //////////////////////////////////////////////////////////////////////// private CharUnicodeInfo() { } //////////////////////////////////////////////////////////////////////// // // Actions: // Convert the BMP character or surrogate pointed by index to a UTF32 value. // This is similar to Char.ConvertToUTF32, but the difference is that // it does not throw exceptions when invalid surrogate characters are passed in. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index < s.Length"); if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // Convert a character or a surrogate pair starting at index of string s // to UTF32 value. // // Parameters: // s The string // index The starting index. It can point to a BMP character or // a surrogate pair. // len The length of the string. // charLength [out] If the index points to a BMP char, charLength // will be 1. If the index points to a surrogate pair, // charLength will be 2. // // WARNING: since it doesn't throw an exception it CAN return a value // in the surrogate range D800-DFFF, which are not legal unicode values. // // Returns: // The UTF32 value // //////////////////////////////////////////////////////////////////////// internal static int InternalConvertToUtf32(String s, int index, out int charLength) { BCLDebug.Assert(s != null, "s != null"); BCLDebug.Assert(s.Length > 0, "s.Length > 0"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); charLength = 1; if (index < s.Length - 1) { int temp1 = (int)s[index] - HIGH_SURROGATE_START; if (temp1 >= 0 && temp1 <= 0x3ff) { int temp2 = (int)s[index+1] - LOW_SURROGATE_START; if (temp2 >= 0 && temp2 <= 0x3ff) { // Convert the surrogate to UTF32 and get the result. charLength++; return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START); } } } return ((int)s[index]); } //////////////////////////////////////////////////////////////////////// // // IsWhiteSpace // // Determines if the given character is a white space character. // //////////////////////////////////////////////////////////////////////// internal static bool IsWhiteSpace(String s, int index) { BCLDebug.Assert(s != null, "s!=null"); BCLDebug.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length"); UnicodeCategory uc = GetUnicodeCategory(s, index); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } internal static bool IsWhiteSpace(char c) { UnicodeCategory uc = GetUnicodeCategory(c); // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator". // And U+2029 is th eonly character which is under the category "ParagraphSeparator". switch (uc) { case (UnicodeCategory.SpaceSeparator): case (UnicodeCategory.LineSeparator): case (UnicodeCategory.ParagraphSeparator): return (true); } return (false); } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static double InternalGetNumericValue(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. #if WIN64 // To get around the IA64 alignment issue. Our double data is aligned in 8-byte boundary, but loader loads the embeded table starting // at 4-byte boundary. This cause a alignment issue since double is 8-byte. byte* pSourcePtr = &(m_pNumericValues[pBytePtr[(ch & 0x000f)] * sizeof(double)]); if (((long)pSourcePtr % 8) != 0) { // We are not aligned in 8-byte boundary. Do a copy. double ret; byte* retPtr = (byte*)&ret; Buffer.memcpyimpl(pSourcePtr, retPtr, sizeof(double)); return (ret); } return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #else return (((double*)m_pNumericValues)[pBytePtr[(ch & 0x000f)]]); #endif } // // This is called by the public char and string, index versions // // Note that for ch in the range D800-DFFF we just treat it as any other non-numeric character // internal unsafe static DigitValues* InternalGetDigitValues(int ch) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pNumericLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // The offset is referred to an float item in m_pNumericFloatData. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pNumericLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pNumericLevel1Index[index]); // Get the result from the 0 -3 bit of ch. return &(m_pDigitValues[pBytePtr[(ch & 0x000f)]]); } internal unsafe static sbyte InternalGetDecimalDigitValue(int ch) { return (InternalGetDigitValues(ch)->decimalDigit); } internal unsafe static sbyte InternalGetDigitValue(int ch) { return (InternalGetDigitValues(ch)->digit); } //////////////////////////////////////////////////////////////////////// // //Returns the numeric value associated with the character c. If the character is a fraction, // the return value will not be an integer. If the character does not have a numeric value, the return value is -1. // //Returns: // the numeric value for the specified Unicode character. If the character does not have a numeric value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static double GetNumericValue(char ch) { return (InternalGetNumericValue(ch)); } public static double GetNumericValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetNumericValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Returns the decimal digit value associated with the character c. // // The value should be from 0 ~ 9. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: Decimal Digits. Digits that can be used to form decimal-radix numbers. //Returns: // the decimal digit value for the specified Unicode character. If the character does not have a decimal digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDecimalDigitValue(char ch) { return (InternalGetDecimalDigitValue(ch)); } public static int GetDecimalDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDecimalDigitValue(InternalConvertToUtf32(s, index))); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the digit value associated with the character c. // If the character does not have a numeric value, the return value is -1. // From Unicode.org: If the character represents a digit, not necessarily a decimal digit, // the value is here. This covers digits which do not form decimal radix forms, such as the compatibility superscript digits. // // An example is: U+2460 IRCLED DIGIT ONE. This character has digit value 1, but does not have associcated decimal digit value. // //Returns: // the digit value for the specified Unicode character. If the character does not have a digit value, the return value is -1. //Arguments: // ch a Unicode character //Exceptions: // ArgumentNullException // ArgumentOutOfRangeException // //////////////////////////////////////////////////////////////////////// public static int GetDigitValue(char ch) { return (InternalGetDigitValue(ch)); } public static int GetDigitValue(String s, int index) { if (s == null) { throw new ArgumentNullException("s"); } if (index < 0 || index >= s.Length) { throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index")); } return (InternalGetDigitValue(InternalConvertToUtf32(s, index))); } public static UnicodeCategory GetUnicodeCategory(char ch) { return (InternalGetUnicodeCategory(ch)) ; } public static UnicodeCategory GetUnicodeCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return InternalGetUnicodeCategory(s, index); } internal unsafe static UnicodeCategory InternalGetUnicodeCategory(int ch) { return ((UnicodeCategory)InternalGetCategoryValue(ch, UNICODE_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // ch a Unicode character //Exceptions: // None // //Note that this API will return values for D800-DF00 surrogate halves. // //////////////////////////////////////////////////////////////////////// internal unsafe static byte InternalGetCategoryValue(int ch, int offset) { BCLDebug.Assert(ch >= 0 && ch <= 0x10ffff, "ch is not in valid Unicode range."); // Get the level 2 item from the highest 12 bit (8 - 19) of ch. ushort index = m_pCategoryLevel1Index[ch >> 8]; // Get the level 2 WORD offset from the 4 - 7 bit of ch. This provides the base offset of the level 3 table. // Note that & has the lower precedence than addition, so don't forget the parathesis. index = m_pCategoryLevel1Index[index + ((ch >> 4) & 0x000f)]; byte* pBytePtr = (byte*)&(m_pCategoryLevel1Index[index]); // Get the result from the 0 -3 bit of ch. byte valueIndex = pBytePtr[(ch & 0x000f)]; byte uc = m_pCategoriesValue[valueIndex * 2 + offset]; // // Make sure that OtherNotAssigned is the last category in UnicodeCategory. // If that changes, change the following assertion as well. // //BCLDebug.Assert(uc >= 0 && uc <= UnicodeCategory.OtherNotAssigned, "Table returns incorrect Unicode category"); return (uc); } // internal static BidiCategory GetBidiCategory(char ch) { // return ((BidiCategory)InternalGetCategoryValue(c, BIDI_CATEGORY_OFFSET)); // } internal static BidiCategory GetBidiCategory(String s, int index) { if (s==null) throw new ArgumentNullException("s"); if (((uint)index)>=((uint)s.Length)) { throw new ArgumentOutOfRangeException("index"); } return ((BidiCategory)InternalGetCategoryValue(InternalConvertToUtf32(s, index), BIDI_CATEGORY_OFFSET)); } //////////////////////////////////////////////////////////////////////// // //Action: Returns the Unicode Category property for the character c. //Returns: // an value in UnicodeCategory enum //Arguments: // value a Unicode String // index Index for the specified string. //Exceptions: // None // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String value, int index) { BCLDebug.Assert(value != null, "value can not be null"); BCLDebug.Assert(index < value.Length, "index < value.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(value, index))); } //////////////////////////////////////////////////////////////////////// // // Get the Unicode category of the character starting at index. If the character is in BMP, charLength will return 1. // If the character is a valid surrogate pair, charLength will return 2. // //////////////////////////////////////////////////////////////////////// internal static UnicodeCategory InternalGetUnicodeCategory(String str, int index, out int charLength) { BCLDebug.Assert(str != null, "str can not be null"); BCLDebug.Assert(str.Length > 0, "str.Length > 0");; BCLDebug.Assert(index >= 0 && index < str.Length, "index >= 0 && index < str.Length"); return (InternalGetUnicodeCategory(InternalConvertToUtf32(str, index, out charLength))); } internal static bool IsCombiningCategory(UnicodeCategory uc) { BCLDebug.Assert(uc >= 0, "uc >= 0"); return ( uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark ); } [MethodImplAttribute(MethodImplOptions.InternalCall)] private unsafe static extern void nativeInitTable(byte* bytePtr); } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- WebPartMinimizeVerb.cs
- ContainerSelectorBehavior.cs
- RowVisual.cs
- InternalBufferOverflowException.cs
- WebSysDefaultValueAttribute.cs
- DbConnectionPoolCounters.cs
- WebPartAddingEventArgs.cs
- SoapHeaders.cs
- Invariant.cs
- MsmqIntegrationProcessProtocolHandler.cs
- SqlClientMetaDataCollectionNames.cs
- Transform3DGroup.cs
- SHA512.cs
- CodeEventReferenceExpression.cs
- CurrencyWrapper.cs
- CssStyleCollection.cs
- DetailsViewCommandEventArgs.cs
- Wildcard.cs
- GenericAuthenticationEventArgs.cs
- LineInfo.cs
- HtmlWindowCollection.cs
- ExtractCollection.cs
- Stopwatch.cs
- PeerMessageDispatcher.cs
- FormViewInsertedEventArgs.cs
- SqlUtils.cs
- MethodResolver.cs
- CodeExporter.cs
- SignatureResourcePool.cs
- HandoffBehavior.cs
- XmlTextWriter.cs
- Point3DValueSerializer.cs
- FontFaceLayoutInfo.cs
- AttributeQuery.cs
- PolyQuadraticBezierSegment.cs
- BreakSafeBase.cs
- RequestCachePolicy.cs
- TextModifierScope.cs
- SQLByte.cs
- ProcessStartInfo.cs
- GridViewRow.cs
- RunInstallerAttribute.cs
- GeneralTransform3DGroup.cs
- FileDialogCustomPlace.cs
- TabletDevice.cs
- SmiContextFactory.cs
- NullableConverter.cs
- CapabilitiesPattern.cs
- BinaryFormatter.cs
- FormatException.cs
- ResolveRequestResponseAsyncResult.cs
- CqlErrorHelper.cs
- IntegerValidatorAttribute.cs
- CopyAction.cs
- TextFormattingConverter.cs
- ScriptingProfileServiceSection.cs
- TableRowGroup.cs
- StylusPoint.cs
- QueryCacheManager.cs
- CompositeFontFamily.cs
- UIElementPropertyUndoUnit.cs
- ServiceElement.cs
- HoistedLocals.cs
- WebResponse.cs
- DataGridViewBindingCompleteEventArgs.cs
- Rule.cs
- HwndSubclass.cs
- DbCommandTree.cs
- WrappingXamlSchemaContext.cs
- DataColumnChangeEvent.cs
- MetadataAssemblyHelper.cs
- FileLevelControlBuilderAttribute.cs
- HitTestParameters3D.cs
- Timeline.cs
- TransformedBitmap.cs
- Point.cs
- WebPartRestoreVerb.cs
- RadioButton.cs
- MulticastIPAddressInformationCollection.cs
- OutOfMemoryException.cs
- StylusPointDescription.cs
- SurrogateEncoder.cs
- DataGridViewColumnEventArgs.cs
- BuildResult.cs
- DispatchWrapper.cs
- XmlRawWriter.cs
- LiteralSubsegment.cs
- SecurityContextKeyIdentifierClause.cs
- AnnotationResource.cs
- TemplateControlCodeDomTreeGenerator.cs
- XPathAncestorQuery.cs
- Win32PrintDialog.cs
- EventlogProvider.cs
- PopupRoot.cs
- ListBoxDesigner.cs
- MonthCalendar.cs
- AssemblyResourceLoader.cs
- __Filters.cs
- CqlParser.cs
- XmlElement.cs