Code:
/ 4.0 / 4.0 / untmp / DEVDIV_TFS / Dev10 / Releases / RTMRel / wpf / src / Core / CSharp / MS / Internal / Classification.cs / 1599983 / Classification.cs
//------------------------------------------------------------------------ // // Microsoft Windows Client Platform // Copyright (c) Microsoft Corporation. All rights reserved. // // File: Classification.cs // // Contents: Unicode classification entry point // // Created: 7-14-2002 Tarek Mahmoud Sayed ([....]) // //----------------------------------------------------------------------- using System; using System.Diagnostics; using MS.Internal; using System.Windows; using System.Security; using System.Collections; using System.Runtime.InteropServices; using System.Windows.Media.TextFormatting; namespace MS.Internal { ////// This class is used as a level on indirection for classes in managed c++ to be able to utilize methods /// from the static class Classification. /// We cannot make MC++ reference PresentationCore.dll since this will result in cirular reference. /// internal class ClassificationUtility : MS.Internal.Text.TextInterface.IClassification { // We have restored this list from WPF 3.x. // The original list can be found under // $/Dev10/pu/WPF/wpf/src/Core/CSharp/MS/Internal/Shaping/Script.cs internal static readonly bool[] ScriptCaretInfo = new bool[] { /* Default */ false, /* Arabic */ false, /* Armenian */ false, /* Bengali */ true, /* Bopomofo */ false, /* Braille */ false, /* Buginese */ true, /* Buhid */ false, /* CanadianSyllabics */ false, /* Cherokee */ false, /* CJKIdeographic */ false, /* Coptic */ false, /* CypriotSyllabary */ false, /* Cyrillic */ false, /* Deseret */ false, /* Devanagari */ true, /* Ethiopic */ false, /* Georgian */ false, /* Glagolitic */ false, /* Gothic */ false, /* Greek */ false, /* Gujarati */ true, /* Gurmukhi */ true, /* Hangul */ true, /* Hanunoo */ false, /* Hebrew */ true, /* Kannada */ true, /* Kana */ false, /* Kharoshthi */ true, /* Khmer */ true, /* Lao */ true, /* Latin */ false, /* Limbu */ true, /* LinearB */ false, /* Malayalam */ true, /* MathematicalAlphanumericSymbols */ false, /* Mongolian */ true, /* MusicalSymbols */ false, /* Myanmar */ true, /* NewTaiLue */ true, /* Ogham */ false, /* OldItalic */ false, /* OldPersianCuneiform */ false, /* Oriya */ true, /* Osmanya */ false, /* Runic */ false, /* Shavian */ false, /* Sinhala */ true, /* SylotiNagri */ true, /* Syriac */ false, /* Tagalog */ false, /* Tagbanwa */ false, /* TaiLe */ false, /* Tamil */ true, /* Telugu */ true, /* Thaana */ true, /* Thai */ true, /* Tibetan */ true, /* Tifinagh */ false, /* UgariticCuneiform */ false, /* Yi */ false, /* Digit */ false, /* Control */ false, /* Mirror */ false, }; static private ClassificationUtility _classificationUtilityInstance = new ClassificationUtility(); static internal ClassificationUtility Instance { get { return _classificationUtilityInstance; } } public void GetCharAttribute( int unicodeScalar, out bool isCombining, out bool needsCaretInfo, out bool isIndic, out bool isDigit, out bool isLatin, out bool isStrong ) { CharacterAttribute charAttribute = Classification.CharAttributeOf((int)Classification.GetUnicodeClass(unicodeScalar)); byte itemClass = charAttribute.ItemClass; isCombining = (itemClass == (byte)ItemClass.SimpleMarkClass || itemClass == (byte)ItemClass.ComplexMarkClass); isStrong = (itemClass == (byte)ItemClass.StrongClass); int script = charAttribute.Script; needsCaretInfo = ScriptCaretInfo[script]; ScriptID scriptId = (ScriptID)script; isDigit = scriptId == ScriptID.Digit; isLatin = scriptId == ScriptID.Latin; if (isLatin) { isIndic = false; } else { isIndic = IsScriptIndic(scriptId); } } ////// Returns true if specified script is Indic. /// private static bool IsScriptIndic(ScriptID scriptId) { if (scriptId == ScriptID.Bengali || scriptId == ScriptID.Devanagari || scriptId == ScriptID.Gurmukhi || scriptId == ScriptID.Gujarati || scriptId == ScriptID.Kannada || scriptId == ScriptID.Malayalam || scriptId == ScriptID.Oriya || scriptId == ScriptID.Tamil || scriptId == ScriptID.Telugu) { return true; } else { return false; } } } ////// Hold the classification table pointers. /// internal static class Classification { ////// This structure has a cloned one in the unmanaged side. Doing any change in this /// structure should have the same change on unmanaged side too. /// [StructLayout(LayoutKind.Sequential)] internal unsafe struct CombiningMarksClassificationData { internal IntPtr CombiningCharsIndexes; // Two dimentional array of base char classes, internal int CombiningCharsIndexesTableLength; internal int CombiningCharsIndexesTableSegmentLength; internal IntPtr CombiningMarkIndexes; // Combining mark classes array, with length = length internal int CombiningMarkIndexesTableLength; internal IntPtr CombinationChars; // Two dimentional array of combined characters internal int CombinationCharsBaseCount; internal int CombinationCharsMarkCount; } ////// This structure has a cloned one in the unmanaged side. doing any change in that /// structure should have same change in the unmanaged side too. /// [StructLayout(LayoutKind.Sequential)] internal unsafe struct RawClassificationTables { internal IntPtr UnicodeClasses; internal IntPtr CharacterAttributes; internal IntPtr Mirroring; internal CombiningMarksClassificationData CombiningMarksClassification; }; ////// Critical - as this code performs an elevation. /// Safe - This is an entry point that grabs several RO shared pages for text layout /// purpose. Reading it directly won't yield any readable content to the reader, nor /// will it yield any useful information to outside world as these are just loads of /// Unicode classification hex data that only Text engine knows how to use � hence, /// not an Information Disclosure threat. /// [SecurityCritical, SecurityTreatAsSafe] [SuppressUnmanagedCodeSecurity] [DllImport(Microsoft.Internal.DllImport.PresentationNative, EntryPoint="MILGetClassificationTables")] internal static extern void MILGetClassificationTables(out RawClassificationTables ct); ////// Critical: This accesses unsafe code and retrieves pointers that it stores locally /// The pointers retrieved are not validated for correctness and they are later dereferenced. /// TreatAsSafe: The constructor is safe since it simply stores these pointers. The risk here /// in the future is not of these pointers being spoofed since they are not settable from outside. /// [SecurityCritical,SecurityTreatAsSafe] static Classification() { unsafe { RawClassificationTables ct = new RawClassificationTables(); MILGetClassificationTables(out ct); _unicodeClassTable = new SecurityCriticalData(ct.UnicodeClasses); _charAttributeTable = new SecurityCriticalData (ct.CharacterAttributes); _mirroredCharTable = new SecurityCriticalData (ct.Mirroring); _combiningMarksClassification = new SecurityCriticalData (ct.CombiningMarksClassification); } } /// /// Lookup Unicode character class for a Unicode UTF16 value /// ////// Critical: This accesses unsafe code and dereferences a location in /// a prepopulated Array. The risk is you might derefence a bogus memory /// location. /// TreatAsSafe: This code is ok since it reduces codepoint to one of 256 possible /// values and will always succeed. Also this information is ok to expose. /// [SecurityCritical, SecurityTreatAsSafe] static public short GetUnicodeClassUTF16(char codepoint) { unsafe { short **plane0 = UnicodeClassTable[0]; Invariant.Assert((long)plane0 >= (long)UnicodeClass.Max); short* pcc = plane0[codepoint >> 8]; return ((long) pcc < (long) UnicodeClass.Max ? (short)pcc : pcc[codepoint & 0xFF]); } } ////// Lookup Unicode character class for a Unicode scalar value /// ////// Critical: This accesses unsafe code and derefences a pointer retrieved from unmanaged code /// TreatAsSafe: There is bounds checking in place and this dereferences a valid structure which /// is guaranteed to be populated /// [SecurityCritical,SecurityTreatAsSafe] static public short GetUnicodeClass(int unicodeScalar) { unsafe { Invariant.Assert(unicodeScalar >= 0 && unicodeScalar <= 0x10FFFF); short **ppcc = UnicodeClassTable[((unicodeScalar >> 16) & 0xFF) % 17]; if ((long)ppcc < (long)UnicodeClass.Max) return (short)ppcc; short *pcc = ppcc[(unicodeScalar & 0xFFFF) >> 8]; if ((long)pcc < (long)UnicodeClass.Max) return (short)pcc; return pcc[unicodeScalar & 0xFF]; } } ////// Compute Unicode scalar value from unicode codepoint stream /// static internal int UnicodeScalar( CharacterBufferRange unicodeString, out int sizeofChar ) { Invariant.Assert(unicodeString.CharacterBuffer != null && unicodeString.Length > 0); int ch = unicodeString[0]; sizeofChar = 1; if ( unicodeString.Length >= 2 && (ch & 0xFC00) == 0xD800 && (unicodeString[1] & 0xFC00) == 0xDC00 ) { ch = (((ch & 0x03FF) << 10) | (unicodeString[1] & 0x3FF)) + 0x10000; sizeofChar++; } return ch; } ////// Check whether the character is combining mark /// ////// Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). /// It trusts the value passed in to derfence the table with no implicit bounds or validity checks. /// TreatAsSafe: This information is safe to expose at the same time the unicodeScalar passed in /// is validated for bounds /// [SecurityCritical,SecurityTreatAsSafe] static public bool IsCombining(int unicodeScalar) { unsafe { byte itemClass = Classification.CharAttributeTable[GetUnicodeClass(unicodeScalar)].ItemClass; return itemClass == (byte)ItemClass.SimpleMarkClass || itemClass == (byte)ItemClass.ComplexMarkClass; } } ////// Check whether the character is a joiner character /// ////// Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). /// It trusts the value passed in to derfence the table with no implicit bounds or validity checks. /// TreatAsSafe: This information is safe to expose at the same time the unicodeScalar passed in /// is validated for bounds /// [SecurityCritical,SecurityTreatAsSafe] static public bool IsJoiner(int unicodeScalar) { unsafe { byte itemClass = Classification.CharAttributeTable[GetUnicodeClass(unicodeScalar)].ItemClass; return itemClass == (byte) ItemClass.JoinerClass; } } ////// Scan UTF16 character string until a character with specified attributes is found /// ///character index of first character matching the attribute. ////// Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). /// It keeps accesing a buffer with no validation in terms of the variables passed in. /// TreatAsSafe: This information is safe to expose, as in the worst case it tells you information /// of where the next UTF16 character is. Also the constructor for characterbuffer can be one of three /// a string, a char array or an unmanaged char*. The third case is critical and tightly controlled /// so the risk of bogus length is significantly mitigated. /// [SecurityCritical,SecurityTreatAsSafe] static public int AdvanceUntilUTF16( CharacterBuffer charBuffer, int offsetToFirstChar, int stringLength, ushort mask, out ushort charFlags ) { int i = offsetToFirstChar; int limit = offsetToFirstChar + stringLength; charFlags = 0; while (i < limit) { unsafe { ushort flags = (ushort)Classification.CharAttributeTable[(int)GetUnicodeClassUTF16(charBuffer[i])].Flags; if((flags & mask) != 0) break; charFlags |= flags; } i++; } return i - offsetToFirstChar; } ////// Scan character string until a character that is not the specified ItemClass is found /// ///character index of first character that is not the specified ItemClass ////// Critical: This code acceses a function call that returns a pointer (get_CharAttributeTable). It acceses /// elements in an array with no type checking. /// TreatAsSafe: This code exposes the index of the next non UTF16 character in a run. This is ok to expose /// Also the calls to CharBuffer and CahrAttribute do the requisite bounds checking. /// [SecurityCritical,SecurityTreatAsSafe] static public int AdvanceWhile( CharacterBufferRange unicodeString, ItemClass itemClass ) { int i = 0; int limit = unicodeString.Length; int sizeofChar = 0; while (i < limit) { int ch = Classification.UnicodeScalar( new CharacterBufferRange(unicodeString, i, limit - i), out sizeofChar ); unsafe { byte currentClass = (byte) Classification.CharAttributeTable[(int)GetUnicodeClass(ch)].ItemClass; if (currentClass != (byte) itemClass) break; } i += sizeofChar; } return i; } ////// Critical: This accesses unsafe code and returns a pointer /// private static unsafe short*** UnicodeClassTable { [SecurityCritical] get { return (short***)_unicodeClassTable.Value; } } ////// Critical: This accesses unsafe code and returns a pointer /// private static unsafe CharacterAttribute* CharAttributeTable { [SecurityCritical] get { return (CharacterAttribute*)_charAttributeTable.Value; } } ////// Critical: This accesses unsafe code and indexes into an array /// Safe : This method does bound check on the input char class. /// [SecurityCritical, SecurityTreatAsSafe] internal static CharacterAttribute CharAttributeOf(int charClass) { unsafe { Invariant.Assert(charClass >= 0 && charClass < (int) UnicodeClass.Max); return CharAttributeTable[charClass]; } } static private readonly SecurityCriticalData_unicodeClassTable; static private readonly SecurityCriticalData _charAttributeTable; static private readonly SecurityCriticalData _mirroredCharTable; static private readonly SecurityCriticalData _combiningMarksClassification; } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- StoreContentChangedEventArgs.cs
- PopupRootAutomationPeer.cs
- ViewLoader.cs
- WebConfigurationManager.cs
- TransactionState.cs
- RegularExpressionValidator.cs
- SqlBooleanizer.cs
- CacheModeConverter.cs
- base64Transforms.cs
- ToolStripPanel.cs
- StoryFragments.cs
- recordstatescratchpad.cs
- ValidationErrorInfo.cs
- UntypedNullExpression.cs
- CroppedBitmap.cs
- ConfigurationException.cs
- SHA1.cs
- DataControlField.cs
- WebPartMinimizeVerb.cs
- PasswordRecoveryDesigner.cs
- LoadGrammarCompletedEventArgs.cs
- TransformDescriptor.cs
- CngKeyBlobFormat.cs
- UpDownEvent.cs
- WebPartEditorOkVerb.cs
- EmptyStringExpandableObjectConverter.cs
- EventManager.cs
- WebProxyScriptElement.cs
- Timer.cs
- ObservableCollection.cs
- DynamicMethod.cs
- XmlProcessingInstruction.cs
- SqlBulkCopyColumnMapping.cs
- MailMessageEventArgs.cs
- Int32Collection.cs
- ValidationEventArgs.cs
- MethodAccessException.cs
- ServiceHttpHandlerFactory.cs
- XPathChildIterator.cs
- DataGridViewColumn.cs
- DescendantBaseQuery.cs
- RegexCaptureCollection.cs
- BrushValueSerializer.cs
- Math.cs
- BaseProcessor.cs
- SymbolEqualComparer.cs
- ExtendedProperty.cs
- Helpers.cs
- AsymmetricKeyExchangeFormatter.cs
- SignatureHelper.cs
- ContractType.cs
- ToolboxDataAttribute.cs
- MenuCommand.cs
- SendReply.cs
- ToolBarButtonClickEvent.cs
- BulletedListEventArgs.cs
- InfoCardRSAPKCS1SignatureFormatter.cs
- TransactionFlowOption.cs
- wmiprovider.cs
- CodeParameterDeclarationExpressionCollection.cs
- SQLDecimalStorage.cs
- ClientProtocol.cs
- loginstatus.cs
- HasCopySemanticsAttribute.cs
- SystemIcmpV4Statistics.cs
- EntityDataSourceStatementEditorForm.cs
- Point3D.cs
- RC2.cs
- NotConverter.cs
- ILGenerator.cs
- RenderData.cs
- PersonalizationState.cs
- BaseCAMarshaler.cs
- DataGridHeaderBorder.cs
- XPathNavigatorReader.cs
- Registry.cs
- Double.cs
- MultidimensionalArrayItemReference.cs
- WebPartAddingEventArgs.cs
- SubMenuStyleCollection.cs
- MarshalByValueComponent.cs
- BindableAttribute.cs
- AutoResizedEvent.cs
- InternalsVisibleToAttribute.cs
- DataBoundControlAdapter.cs
- ProjectionPathBuilder.cs
- SessionStateUtil.cs
- MulticastNotSupportedException.cs
- AnimationTimeline.cs
- Parallel.cs
- SiteMapNodeItemEventArgs.cs
- MessageProtectionOrder.cs
- NumberFormatInfo.cs
- SBCSCodePageEncoding.cs
- Exception.cs
- FlowLayout.cs
- TypeSemantics.cs
- RoutingConfiguration.cs
- GlyphShapingProperties.cs
- NegationPusher.cs