thaishape.cs source code in C# .NET

                        Code:
                         / DotNET / DotNET / 8.0 / untmp / WIN_WINDOWS / lh_tools_devdiv_wpf / Windows / wcp / Core / MS / Internal / Shaping / thaishape.cs / 2 / thaishape.cs
                        
                        
                            //---------------------------------------------------------------------- 
//
//  Microsoft Windows Client Platform
//  Copyright (C) Microsoft Corporation, 2003
// 
//  File:      ThaiShape.cs
// 
//  Contents:  Implementation of Thai shaping engine and its factory 
//
//  Created:   08-05-2003 [....] ([....]) 
//
//-----------------------------------------------------------------------

// #define VALIDATE_CLUSTER_PARAMETERS 

using System; 
using System.Security; 
using System.Security.Permissions;
using System.Diagnostics; 
using System.Collections;
using System.Globalization;
using System.Windows;
using System.Windows.Media; 
using System.Windows.Media.TextFormatting;
using MS.Internal.FontCache; 
using MS.Internal.FontFace; 
using MS.Internal.PresentationCore;
 

namespace MS.Internal.Shaping
{
 
    /// 
    /// ThaiCharClass - enumeration of Thai classification flags 
    /// ordinal position 
    /// 
    internal enum ThaiCharClass : byte 
    {
        UnknownCharClass,     // Unknown class -- can take any diacritic
        AcceptsNoMark,        // AcceptsNoMark -- cannot take diacritics
        LaoAmChar,            // The Am character 
        ThaiAmChar,           // The Am character
        BaseAcceptsLower,     // No upper diacritic is allowed. below is okay 
        BaseAcceptsMarks,     // Base glyph can have diacritics 
        SpaceChar,            // space may be base of mark (only one)
        FirstBaseChar = BaseAcceptsLower, 
        LastBaseChar = SpaceChar,
        AboveMarkClosest,     // Above mark closest to base
        AboveMarkClose,       // Second level above mark
        AboveMarkFurther,     // Third level above mark 
        AboveMarkFurthest,    // Fourth level above mark
        BelowMarkClosest,     // Below mark closest to base 
        BelowMarkFurthest,    // Second level below mark 
        LastDiacritic = BelowMarkFurthest,
        UnicodeLayoutControl, 
        NumberOfThaiCharClasses,
    };

 
    /// 
    /// The Thai Shaping Engine - (shapes Thai text) 
    ///  
    /// 
    /// The IShaper and IShapingEngine interfaces are implemented to 
    /// provide the shaping methods for Thai Scripts.
    /// There are four Thai private types defined/used in this class:
    /// 1.) ThaiShapeInfo - this class manages the shape information
    /// 2.) ThaiClusterCop - this class manages the canonical ordering 
    /// 3.) ThaiFontInfo - this class manages the font interface
    /// 4.) ThaiCharClassInfo - contains the char classification tables 
    /// 
    internal sealed class ThaiShape : BaseShape
    { 
        private static readonly ScriptTags[] _supportedScripts =
                                    new ScriptTags[] {ScriptTags.Thai,ScriptTags.Lao};

 
        //--------------------------------------
        // 
        //  Constructors 
        //
        //-------------------------------------- 

#region Constructors

        ///  
        /// Constructor for the Thai Open Type Shaping Engine.
        ///  
        internal ThaiShape() 
        {
            // At least one of the Thai fonts (Angsana UPC) calls sara aa 
            // a mark (but its not a zero width mark).
            _forceDiacriticsToZeroWidth = false;
        }
 
#endregion
 
        //-------------------------------------- 
        //
        //  Internal Methods 
        //
        //--------------------------------------

#region Internal methods 

        ///  
        /// ThaiShape.GetCharClassifier - Base shape 
        /// 
        ///  
        ///     This will normally be overridden by derived shapers. It is used in OnLoadFont
        /// 
        protected override ShaperCharacterClassifier GetCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace)
        { 
            return new ThaiCharClassifier (scriptTag, fontFace);
        } 
 

 
        /// 
        ///     ThaiShape.GetGlyphs - Thai override of the GetGlyphs() helper function.
        /// 
        /// shaping currentRun 
        /// Text item
        /// number of glyphs 
        ///  
        /// Critical - calls critical code
        ///  
        [SecurityCritical]
        unsafe protected override int GetGlyphs ( ref ShapingWorkspace currentRun, Item item )
        {
            // Create support for diacritic reordering.  "clusterControl" is 
            // on the stack; fast to alloc, easy to throw away.
            ushort *clusterGlyphs = stackalloc ushort [ ThaiClusterCop.ReorderingArraySize ]; 
            ThaiClusterCop clusterCop = new ThaiClusterCop( clusterGlyphs ); 

            bool isClusterInProgress = false; 

            // get shape info for every character
            CharShapeInfo currShape;
            while ( currentRun.GetNextShape (out currShape) ) 
            {
                if ( (currShape & CharShapeInfo.IsStartOfCluster) != 0 ) 
                { 
                    // If there is a just completed cluster which
                    // needs reordering, nows the time to change the glyphs. 
                    if (isClusterInProgress)
                    {
                        // get reordered glyphs
                        clusterCop.GetReorderedGlyphs( ref currentRun ); 
                        isClusterInProgress = false;
                    } 
 
                    currentRun.SetGlyphPropertiesUsingShapeInfo(currShape);
 
                }
                else
                {
                    isClusterInProgress = clusterCop.AddCluster( ref currentRun, 
                                                                   currShape );
 
                } 

            } 

            if (isClusterInProgress)
            {
                // get reordered glyphs 
                clusterCop.GetReorderedGlyphs( ref currentRun );
            } 
 
            return  currentRun.GlyphsCount;
        } 

        /// 
        /// ThaiShape.SupportedScripts -
        ///  IShapingEngine member override 
        /// 
        /// Our supported scripts (Thai, Lao). 
        public override ScriptTags[] SupportedScripts 
        {
            get 
            {
                return _supportedScripts;
            }
        } 

 
 
#endregion
 
    }

    /// 
    /// Class ThaiClusterCop: 
    ///  Manages the diacritic ordering requirement
    ///  
    ///  
    /// This class is used to provide diacritic ordering and invalid
    /// diacritic enforcement for the diacritics and cantillation marks 
    /// found in the unicode stream.
    /// Diacritic ordering is done by maintaining an array whose extent
    /// is ThaiCharClassifier.NumberOfClasses.
    ///  for comments about the 
    /// char class enum.
    /// For each base character in the unicode run, the diacritics and 
    /// other marks are added to the arrays at the index corresponding 
    /// to the ThaiCharClass value.  Only one mark per ThaiCharClass
    /// value is allowed per base character.  Once the extent of the valid 
    /// diacritics and marks has been determined (ie, when an
    /// illegal mark is found, when a new base is found, or at the end of
    /// the unicode run) the array used to reorder the glyphs collected in
    /// the GlyphList object if reordering is needed (only lower marks are 
    /// reordered - out-of-order upper marks results in a dotted circle base
    /// being inserted). 
    /// The diacritics reordering done here is not consistent with Unicode 
    /// canonical ordering, but is instead consistent with the needs of
    /// current Thai OpenType fonts. 
    /// 
    internal struct ThaiClusterCop
    {
        private ThaiCharClass     _baseCharClass;     // base char's class info 

        ///  
        /// Critical - unsafe pointers 
        /// 
        [SecurityCritical] 
        unsafe private ushort*     _clusterGlyphs; // our temporary array for reordering

        private bool               _clusterIsInvalid; //
 
        private byte               _clusterSize;   // number of diacritics currently in cluster
        private ThaiClusterState   _clusterState;  // current reordering state 
        private bool               _clusterRequiresReordering; 
        private bool               _clusterHasSaraAm;
 
        private ushort             _nextReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private ushort             _lastReorderedGlyphIx;  // for returning reordered glyphs (0 if no reordering)
        private bool               _reorderingIsSuppressed; //
        private bool               _textHasUnicodeControlChars; // true if control chars are found to exist in text 

        private ushort             _nikhahitGlyph; 
        private ushort             _aaGlyph; 
        private ushort             _firstCharIx;
 
        /// 
        /// ThaiClusterCop constructor
        /// 
        ///  
        /// Critical - This method reads into unsafe buffer
        ///  
        [SecurityCritical] 
        unsafe public ThaiClusterCop ( ushort *clusterGlyphs )
        { 
            //  create support for diacritic reordering.  Add one extra position
            // for use in storing 'Aa' when decomposing an 'Am' char.
            _clusterGlyphs = clusterGlyphs;
            _clusterState = ThaiClusterState.BaseChar; 
            _baseCharClass = ThaiCharClass.UnknownCharClass;
 
            _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0; 
            _clusterIsInvalid = _reorderingIsSuppressed =
            _clusterHasSaraAm =_clusterRequiresReordering = _textHasUnicodeControlChars = false; 

            // initialize the diacritics class array (its on the stack)
            // initialize the glyph array (its on the stack)
            int i = ThaiClusterCop.ReorderingArraySize; 
            while (i-- > 0)
            { 
                _clusterGlyphs[i] = 0; 
            }
 
            _clusterSize = 0;
            _nikhahitGlyph = _aaGlyph = 0;
            _firstCharIx = 0;
 
        }
 
        ///  
        /// ThaiClusterCop.AddCluster - adds as many characters as possible to the
        ///                               current cluster 
        /// 
        /// 
        /// This routine is used to create a diacritics cluster on the current
        /// base char.  It continues to add characters till it notes a non-legal 
        /// character.  Enforces member count restrictions for the various diacritic
        /// classes and keeps track of reordering needs. 
        ///  
        /// 
        ///     true if cluster remains valid 
        /// 
        /// 
        /// Critical - calls critical code, uses unsafe accessors
        ///  
        [SecurityCritical]
        unsafe internal bool AddCluster ( ref ShapingWorkspace currentRun, 
                                             CharShapeInfo currShape ) 
        {
            bool isClusterInProgress = StartCluster( ref currentRun, currShape ); 
            char currChar = currentRun.CurrentChar;
            ushort currGlyph = currentRun.CharConverter.ToGlyph(currChar);

            do { 
                ThaiCharClass currClass = (ThaiCharClass)(currShape & CharShapeInfo.ShaperClassMask);
                // add this character's glyph to the cluster 
                if ( _clusterIsInvalid) 
                {
                    _reorderingIsSuppressed = true; 
                    currShape |= CharShapeInfo.RequiresInsertedBase;
                    isClusterInProgress = false;

                    if ( (currShape & CharShapeInfo.RequiresSpecialHandling) != 0 && 
                          IsSaraAmDecomposable(ref currentRun, currClass) )
                    { 
                        // decompose this into 2 glyphs (after inserting 
                        // the dotted circle glyph).
                        currentRun.AddGlyphs( 1 ); 
                        currentRun.SetGlyphPropertiesUsingGlyph( currShape, _nikhahitGlyph );

                        currShape ^= CharShapeInfo.RequiresInsertedBase;
                        currShape |= CharShapeInfo.IsStartOfCluster; 
                        currentRun.CurrentShape = currShape;
                        currentRun.SetGlyphProperties(_aaGlyph); 
                        continue; 
                    }
                } 
                else
                {

                    // step 1. get the reordering position 
                    ThaiClusterState clusterIx = ThaiClusterCop.clusterStateMap[ (int)currClass ];
 
 
                    // step 2. validate this character
                    switch (clusterIx) 
                    {
                        case ThaiClusterState.BaseChar:
                             // This marks the end of the cluster.  We'll have to re-process this character
                            currentRun.Reset(); 
                            return (_clusterRequiresReordering || _clusterHasSaraAm);      // leave now.
 
                        case ThaiClusterState.UnicodeLayoutControlPresent: 
                            _clusterRequiresReordering = false;
                            _textHasUnicodeControlChars = true; 
                            _reorderingIsSuppressed = true;      // we don't want to confuse things by reordering
                            currShape |= CharShapeInfo.IsUnicodeLayoutControl;

                            break; 

                        default: 
                            // - don't allow two characters of the same 
                            // class in a cluster
                            if ( _clusterHasSaraAm || 
                                (_clusterGlyphs[ (int) clusterIx] != 0) ||
                                 IsIllegalPair(_baseCharClass,currClass)  )
                            {
                                _clusterIsInvalid = _reorderingIsSuppressed = true;      // for consistency with unmanaged Uniscribe 

                                // add a dotted circle (except if the preceding character or this character is a ZWJ) 
                                if (!_textHasUnicodeControlChars || 
                                   (currentRun.PreviousChar != UnicodeCharacter.ZWJ))
                                { 
                                    // no valid base char precedes this char, then we must
                                    // insert a dotted circle in the shaping info
                                    currShape |= CharShapeInfo.RequiresInsertedBase;
 
                                    if ( (currShape & (CharShapeInfo)ThaiCharClassifier.IsAmChar) != 0 &&
                                          IsSaraAmDecomposable(ref currentRun, currClass) ) 
                                    { 
                                        // decompose this into 2 glyphs
                                        currentRun.AddGlyphs( 1 ); 
                                        currentRun.SetGlyphPropertiesUsingGlyph( currShape, _nikhahitGlyph );

                                        currShape = CharShapeInfo.NoFlagsSet;
                                        currGlyph =_aaGlyph; 
                                    }
                                } 
 
                                isClusterInProgress = false;
                            } 

                            else
                            {
                                // if this is a sara am, decompose it if possible 
                                if ((currShape & CharShapeInfo.RequiresSpecialHandling) != 0)
                                { 
                                    _clusterHasSaraAm = true; 

                                    if (IsSaraAmDecomposable(ref currentRun, currClass)) 
                                    {
                                        currGlyph = _nikhahitGlyph;
                                    }
                                } 

                                // All's well! 
                                // step 3.  As long as reordering isn't suppressed, save the current glyph in the 
                                // reordering array.  (We may not need to reorder, but we might)
 
                                if (currGlyph == 0)
                                {
                                    _reorderingIsSuppressed = true;     // this isn't expected, but reordering array
                                } 
                                else
                                { 
                                    _clusterGlyphs[ (ushort)clusterIx ] = currGlyph; 
                                    _clusterSize++;
 
                                    // step 4. update the reordering state
                                    if ( _lastReorderedGlyphIx < (ushort)clusterIx )
                                    {
                                        // Keep _lastReorderedGlyphIx at the "highest" member of the 
                                        // reordering array
                                        _lastReorderedGlyphIx = (ushort)clusterIx; 
                                    } 

                                    if (clusterIx >= _clusterState) 
                                    {
                                        // no reordering needed thus far...
                                        // keep track of the "highest" cluster member
                                        _clusterState = clusterIx; 

                                    } 
                                    else if ( !_reorderingIsSuppressed ) 
                                    {
                                        // this current char is "lower" than a previous character 
                                        // in the cluster so we're gonna need to reorder this cluster
                                        _clusterRequiresReordering = true;
                                    }
 
                                    if (_clusterHasSaraAm && IsSaraAmDecomposable(ref currentRun, currClass))
                                    { 
                                        currGlyph = _aaGlyph; 
                                    }
                                } 
                            }

                            break;
                    } 
                }
 
                // Add this glyph and shape to the glyphlist 
                currentRun.SetGlyphPropertiesUsingGlyph(currShape,currGlyph);
 

            } while ( isClusterInProgress &&
                      currentRun.GetNextCharProperties (out currChar, out currGlyph, out currShape) );
 
                return (_clusterRequiresReordering || _clusterHasSaraAm);
        } 
 
        /// 
        ///  ThaiClusterCop.IsSaraAmDecomposable - get the reordered glyph 
        /// 
        /// 
        /// 
        /// true if sara am is decomposed 
        /// 
        /// Critical - calls critical code, uses pointers 
        ///  
        [SecurityCritical]
        unsafe private bool IsSaraAmDecomposable(ref ShapingWorkspace currentRun, 
                                                ThaiCharClass currClass)
        {
            bool isDecomposed = (_nikhahitGlyph != 0 && _aaGlyph != 0);
 
            if (!isDecomposed)
            { 
                IScriptCharConverter charConverter = currentRun.CharConverter; 
                char nikhahit;
                char aa; 

                if (currClass == ThaiCharClass.ThaiAmChar)
                {
                    nikhahit = '\u0e4d'; 
                    aa = '\u0e32';
                } 
                else 
                {
                    nikhahit = '\u0ecd'; 
                    aa = '\u0eb2';
                }
                _nikhahitGlyph = charConverter.ToGlyph(nikhahit);
                _aaGlyph =  charConverter.ToGlyph(aa); 

               isDecomposed = (_nikhahitGlyph != 0 && _aaGlyph != 0); 
            } 

            return isDecomposed; 
        }

        /// 
        ///  ThaiClusterCop.GetNextGlyph - get the reordered glyph 
        /// 
        ///  
        /// This function is called to fetch each member of a reordered cluster. 
        /// It clears the array position as it fetchs each char.
        /// This function will return '\u0000' if there are no valid chars 
        /// to return.
        /// 
        /// next reordered character
        ///  
        /// Critical - calls critical code, uses pointers
        ///  
        [SecurityCritical] 
        unsafe private ushort GetNextGlyph()
        { 
            ushort nextGlyph = 0;

            if ( _lastReorderedGlyphIx != 0 && _clusterSize != 0 &&
                 _lastReorderedGlyphIx < (ushort) ReorderingArraySize ) 
            {
                ushort lastGlyphIx = _lastReorderedGlyphIx; 
                ushort nextGlyphIx = _nextReorderedGlyphIx; 

                while (nextGlyph == 0) 
                {
                    if (++nextGlyphIx > lastGlyphIx)
                    {
                        _clusterSize = 0; 
                        return nextGlyph;
                    } 
 
                    nextGlyph = _clusterGlyphs[ nextGlyphIx ];
                } 

                _nextReorderedGlyphIx = nextGlyphIx;
                --_clusterSize;
            } 
            else
            { 
                _clusterSize = 0; 
            }
 
            return nextGlyph;

        }
 
        /// 
        /// Critical - calls critical code, uses pointers 
        ///  
        [SecurityCritical]
        unsafe internal int GetReorderedGlyphs( ref ShapingWorkspace currentRun ) 
        {
            ushort reorderedCount = ReorderedCount;
            if (reorderedCount > 0)
            { 
                Debug.Assert (_firstCharIx + _clusterSize - (currentRun.IsFinished?1:0) <= currentRun.CurrentCharIx);
                ushort nextGlyphIx = currentRun.GetGlyphIx( _firstCharIx); 
 
                if(_clusterHasSaraAm)
                { 
                    // Insert a glyph to account for the decomposed sara am.   Currently,
                    // the list of diacritic glyphs includes the decomposed sara aa, but
                    // the reordering array includes the decomposed nikhahit.  "reorderedCount"
                    // does not take account of the needed extra glyph.  We insert 
                    // the extra glyph just before the sara aa glyph; normally the decomposed sara
                    // aa will be the last glyph added till now.  Then in the loop below 
                    // we will replace the glyphs for all the diacritics in their right 
                    // order.
                    currentRun.InsertGlyphs( (ushort)(nextGlyphIx + reorderedCount - 1),(ushort)1 ); 
                }

                // replace the reordered glyphs
                while ( _clusterSize > 0 ) 
                {
                    currentRun.SetGlyph(nextGlyphIx++, GetNextGlyph()); 
                } 

                _clusterRequiresReordering = false; 

            }

            return reorderedCount; 

        } 
 

        ///  
        /// ThaiCharClassifier.IsIllegalPair - Validates a base/mark combo
        /// 
        /// 
        /// Uniscribe does not verify that a given mark is legal on 
        /// a given base. Thus, our behavior is different than legacy
        /// implementation.  Note that a space character is a valid 
        /// base, but we return "false" so that space char base can 
        /// be differentiated in the ClusterControl.AddToCluster()
        ///  
        /// current base char
        /// class of diacritic
        /// false if diacritic can't be attached to base
        private static bool IsIllegalPair (ThaiCharClass baseClass, ThaiCharClass markClass) 
        {
            return ( (CharClassToFlag[(int)baseClass] & ThaiCharClassFlags.AllLegalBaseChars) == 0 || 
                    (CharClassToFlag[(int)markClass] & BaseDiacriticValidationTable[(int)baseClass] ) == 0); 
        }
 
        /// 
        /// ThaiClusterCop.ReorderedCount - get size of cluster
        /// 
        ///  
        /// this number doesn't include the base character.
        ///  
        internal ushort ReorderedCount 
        {
          get { 
                  return _clusterRequiresReordering || _clusterHasSaraAm ? (ushort)_clusterSize : (ushort)0;
              }
        }
 

        ///  
        /// Critical - This method reads into unsafe buffer 
        /// 
        [SecurityCritical] 
        unsafe private void ResetClusterCop ()
        {
            // If last processed cluster did not require reordering, then
            // clusterSize will be non-zero and we need to re-initialize the 
            // reordering array.  Otherwise, its already init'ed.
            int i = ThaiClusterCop.ReorderingArraySize; 
            while (i-- > 0) 
            {
                _clusterGlyphs[i] = 0; 
            }

            _clusterSize = 0;
 
            _clusterState = ThaiClusterState.BaseChar;
            _clusterIsInvalid = _reorderingIsSuppressed = 
                _clusterHasSaraAm = _clusterRequiresReordering = false; 

            _lastReorderedGlyphIx = _nextReorderedGlyphIx = 0; 
        }


        ///  
        /// ThaiClusterCop.StartCluster - start new cluster (potentially)
        ///  
        ///  
        /// This routine is called with the first non-base member of a new
        /// cluster.  If the base + this first non-base char are the valid 
        /// start of a new cluster, the ClusterCop is initialized in preparation
        /// for the rest of the cluster.  The character is shaped and its glyph
        /// is added to the glyphs list, regardless of whether this is a "valid"
        /// cluster or not. 
        /// 
        /// true if more characters can be added to this cluster 
        ///  
        /// Critical - calls critical code, uses unsafe accessors
        ///  
        [SecurityCritical]
        unsafe private bool StartCluster( ref ShapingWorkspace currentRun,
                                            CharShapeInfo currShape)
        { 
            ResetClusterCop();
            CharShapeInfo baseShape = currentRun.PreviousShape;  // default base shape 
 
          // Keep track of our current character index for checking
          // reordering later (when next base is detected) 
          _firstCharIx = currentRun.CurrentCharIx;

            _textHasUnicodeControlChars =
                  ((currShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) || 
                  ((baseShape & CharShapeInfo.IsUnicodeLayoutControl) != 0) ; // note any ZWNJ/ZWJ chars
 
            if ( _textHasUnicodeControlChars ) 
            {
              // if this is a ZW joiner just allow whatever follows 
                _reorderingIsSuppressed = true;

            }
            else 
            {
                _baseCharClass = (ThaiCharClass)(baseShape & CharShapeInfo.ShaperClassMask); 
                _clusterIsInvalid = IsIllegalPair(_baseCharClass,(ThaiCharClass)(currShape & CharShapeInfo.ShaperClassMask)); 

                if (_clusterIsInvalid) 
                {
                    if (currentRun.PreviousChar == UnicodeCharacter.NoBreakSpace)
                    {
                        // if the base is a NBSP, this diacritic is acceptable (and we're done). 
                        _clusterIsInvalid = false;
                        return false; 
                    } 
                    else
                    { 
                        currShape |= CharShapeInfo.IsStartOfCluster;
                    }
                }
                else 
                {
                    // the base class is good, we can start to process a cluster - so prepare. 
 
                }
            } 

          return !_clusterIsInvalid;
        }
 

        ///  
        /// ThaiClusterCop.ThaiClusterState: state definitions 
        /// 
        private enum ThaiClusterState : byte 
        {
            BaseChar,
            AboveMarkClosestPresent,     // Above mark closest to base
            AboveMarkClosePresent,       // Second level above mark 
            AboveMarkFurtherPresent,     // Third level above mark
            AboveMarkFurthestPresent,    // Fourth level above mark 
            BelowMarkClosestPresent,     // Below mark closest to base 
            BelowMarkFurthestPresent,    // Second level below mark
            UnicodeLayoutControlPresent, 
            SaraAmPresent,
            NumberOfClusterStates
        }
 
        public static readonly int ReorderingArraySize = (int)ThaiClusterState.NumberOfClusterStates;
 
        // this table is used to determine if reordering will be required. 
        private static readonly ThaiClusterState[] clusterStateMap =
        { 
            // This maps all the Thai char classes to the corresponding cluster
            // state.
            ThaiClusterState.BaseChar, // Unknown class -- can take any diacritic
            ThaiClusterState.BaseChar, // AcceptsNoMark -- cannot take diacritics 
            ThaiClusterState.AboveMarkClosestPresent, // The Am character
            ThaiClusterState.AboveMarkClosestPresent, // The Am character 
            ThaiClusterState.BaseChar, // No upper diacritic is allowed. below is okay 
            ThaiClusterState.BaseChar, // Base glyph can have diacritics
            ThaiClusterState.BaseChar, // space may be base of mark (only one) 
            ThaiClusterState.AboveMarkClosestPresent, // Above mark closest to base
            ThaiClusterState.AboveMarkClosePresent, // Second level above mark
            ThaiClusterState.AboveMarkFurtherPresent, // Third level above mark
            ThaiClusterState.AboveMarkFurthestPresent, // Fourth level above mark 
            ThaiClusterState.BelowMarkClosestPresent, // Below mark closest to base
            ThaiClusterState.BelowMarkFurthestPresent, // Second level below mark 
            ThaiClusterState.UnicodeLayoutControlPresent, 
            // NumberOfThaiCharClasses
        }; 


        // BaseDiacriticValidationTable - table used for validating a given base/mark pair
        // Used to quickly determine whether a given diacritic/mark are 
        // legal to add to a particular base type.
        private static ThaiCharClassFlags[] BaseDiacriticValidationTable = 
        { 
           0,            // unknown chars accept no marks
           0,            // punctuation marks accept no marks 
           0,            // Lao am accepts no marks
           0,            // Thai am accepts no marks
           ThaiCharClassFlags.AllLowerMarks, // base accepts lower marks only
           ThaiCharClassFlags.AllLowerMarks | ThaiCharClassFlags.AllUpperMarks | ThaiCharClassFlags.AmChars, 
           0,             // space char
        }; 
 
        internal static ThaiCharClassFlags[] CharClassToFlag =
        { 
          ThaiCharClassFlags.UnknownCharClass,
          ThaiCharClassFlags.AcceptsNoMark,
          ThaiCharClassFlags.LaoAmChar,        //  The AM character
          ThaiCharClassFlags.ThaiAmChar,       //  The AM character 
          ThaiCharClassFlags.AcceptsLower,     // No upper diacritic is allowed. below is okay
          ThaiCharClassFlags.AcceptsMarks,     // Base glyph can have diacritics 
          ThaiCharClassFlags.SpaceChar,        // Space char may take one mark 
          ThaiCharClassFlags.AboveMarkClosest, // Above mark closest to base
          ThaiCharClassFlags.AboveMarkClose,   // Second level above base 
          ThaiCharClassFlags.AboveMarkFurther, // Third level above base
          ThaiCharClassFlags.AboveMarkFurthest,// Fourth level above base
          ThaiCharClassFlags.BelowMarkClosest, // Below mark closest to base
          ThaiCharClassFlags.BelowMarkFurthest,// Below mark furthest from base 
          ThaiCharClassFlags.UnicodeLayoutControl,// Unicode control chars (ZWJ, ZWNJ)
        }; 
 

          ///  
          /// ThaiCharClassFlags - enumeration of Thai character classification flags
          /// 
          /// 
          /// This enum is linked to the list of char classes in the ThaiCharClass 
          /// enum; keep them in sync.  ()
          /// There are several masks that are defined here: 
          ///     the "All..." masks are used to test the char class of a unicode char 
          /// 
          [Flags()] 
          internal enum ThaiCharClassFlags: uint
          {
              UnknownCharClass,
              AcceptsNoMark      = 0x0001, 
              LaoAmChar          = 0x0002, //  The AM character
              ThaiAmChar         = 0x0004, //  The AM character 
              AmChars            = 0x0006, // mask for am characters 
              AcceptsLower       = 0x0008, // No upper diacritic is allowed. below is okay
              AcceptsMarks       = 0x0010, // Base glyph can have diacritics 
              SpaceChar          = 0x0020, // Space char may take one mark
              AllLegalBaseChars  = 0x0018, // mask for all possible bases (not spaces)
              AllBaseChars       = 0x0019, // mask for all possible bases (not spaces)
              AboveMarkClosest   = 0x0040, // Above mark closest to base 
              AboveMarkClose     = 0x0080, // Second level above base
              AboveMarkFurther   = 0x0100, // Third level above base 
              AboveMarkFurthest  = 0x0200, // Fourth level above base 
              BelowMarkClosest   = 0x0400, // Below mark closest to base
              BelowMarkFurthest  = 0x0800, // Below mark furthest from base 
              UnicodeLayoutControl = 0x1000,
              AllDiacritics      = 0x0fc0, // mask for all diacritics
              AllLowerMarks      = 0x0c00, // mask for lower marks
              AllUpperMarks      = 0x03c0, // mask for upper marks 
          };
    } 
 

  ///  
  /// ThaiCharClassifier - The char converter for Thai/Lao.
  /// 
  internal class ThaiCharClassifier : ShaperCharacterClassifier
  { 

 
    public ThaiCharClassifier(ScriptTags scriptTag, GlyphTypeface fontFace) : base(scriptTag, fontFace) 
    {
        _unknownClass =    UnknownCharClass; 
        _spaceClass =      (byte)ThaiCharClass.SpaceChar;
        _zwControlClass =  (byte)ThaiCharClass.UnicodeLayoutControl;
        _zwjClass =        (byte)ThaiCharClass.UnicodeLayoutControl;
        _zwnjClass =       (byte)ThaiCharClass.UnicodeLayoutControl; 
        _shyClass =        UnknownCharClass;
 
        if (scriptTag == ScriptTags.Thai) 
        {
            _firstChar     = '\u0E00';     // this is the first Hebrew Unicode char 
            _lastChar      = '\u0E7F';     // this is the last Hebrew Unicode char
            _xorMask       = 0xE00;     // this mask is used in GetCharShapeInfo
            _xorRange      = 0x080;     // this is used in GetCharShapeInfo
 
            _charClassTable = _thaiCharClasses;
        } 
        else if (scriptTag == ScriptTags.Lao) 
        {
            _firstChar     = '\u0E80'; 
            _lastChar      = '\u0EFF';
            _xorMask       = 0xE80;
            _xorRange      = 0x080;
            _charClassTable = _laoCharClasses; 
        }
 
    } 

    ///  
    /// ShaperCharClassifier.this[] - returns CharShapeInfo for the Unicode character
    /// 
    public override CharShapeInfo ToShapeInfo(char unicodeChar )
    { 
        CharShapeInfo charShape = base.ToShapeInfo(unicodeChar);
 
        return charShape; 
    }
 

#region Classification Tables

        // these consts are so the tables below will be more readable 
        public const ThaiCharClass IsAmChar = (ThaiCharClass)CharShapeInfo.RequiresSpecialHandling;
        private const ThaiCharClass IsStartOfCluster = (ThaiCharClass)CharShapeInfo.IsStartOfCluster; 
        private const byte UnknownCharClass = (byte)(ThaiCharClass.UnknownCharClass | IsStartOfCluster); 
        private const byte AcceptsNoMark = (byte)(ThaiCharClass.AcceptsNoMark | IsStartOfCluster);
        private const byte BaseAcceptsLower = (byte)(ThaiCharClass.BaseAcceptsLower | IsStartOfCluster); 
        private const byte BaseAcceptsMarks = (byte)(ThaiCharClass.BaseAcceptsMarks | IsStartOfCluster);
        private const byte AboveMarkClosest = (byte)ThaiCharClass.AboveMarkClosest;
        private const byte AboveMarkClose = (byte)ThaiCharClass.AboveMarkClose;
        private const byte AboveMarkFurther = (byte)ThaiCharClass.AboveMarkFurther; 
        private const byte AboveMarkFurthest = (byte)ThaiCharClass.AboveMarkFurthest;
        private const byte BelowMarkClosest = (byte)ThaiCharClass.BelowMarkClosest; 
        private const byte BelowMarkFurthest = (byte)ThaiCharClass.BelowMarkFurthest; 
        private const byte ThaiAmChar  = (byte)(ThaiCharClass.ThaiAmChar | IsAmChar);
        private const byte LaoAmChar  = (byte)(ThaiCharClass.LaoAmChar | IsAmChar); 

        // All the Thai Unicode chars (U+E00 - U+E7F) classified
         private static readonly byte[] _thaiCharClasses  = //new ThaiLaoCharClass[]
         { 
                        // U+E00 (Thai Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,

            // U+E10 (Thai Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
 
            // U+E20 (Thai Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, AcceptsNoMark, 
 
            // U+E30 (Thai Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            BaseAcceptsLower, AboveMarkClosest, BaseAcceptsLower, ThaiAmChar,
            AboveMarkClosest, AboveMarkClosest, AboveMarkClosest, AboveMarkClosest,
            BelowMarkClosest, BelowMarkClosest, BelowMarkFurthest,UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, AcceptsNoMark, 

            // U+E40 (Thai Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            BaseAcceptsLower, BaseAcceptsLower, BaseAcceptsLower, BaseAcceptsLower,
            BaseAcceptsLower, BaseAcceptsLower, BaseAcceptsLower, AboveMarkClose, 
            AboveMarkFurther, AboveMarkFurther, AboveMarkFurther, AboveMarkFurther,
            AboveMarkFurthest,AboveMarkClose,   AboveMarkFurthest,AcceptsNoMark,

            // U+E50 (Thai Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark, 
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark, 
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 

            // U+E60 (Thai Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 

            // U+E70 (Thai Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
        }; 
 
        // All the Lao Unicode chars (U+E00 - U+E7F) classified
        private static readonly byte[] _laoCharClasses  = //new ThaiLaoCharClass[] 
        {
            // All the Lao Unicode chars (U+E80 - U+8FF) classifications
            // U+E80 (Lao Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks, UnknownCharClass,
            BaseAcceptsMarks, UnknownCharClass, UnknownCharClass, BaseAcceptsMarks, 
            BaseAcceptsMarks, UnknownCharClass, BaseAcceptsMarks, UnknownCharClass, 
            UnknownCharClass, BaseAcceptsMarks, UnknownCharClass, UnknownCharClass,
 
            // U+E90 (Lao Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
            UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks, 
 
            // U+EA0 (Lao Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks, BaseAcceptsMarks,
            UnknownCharClass, BaseAcceptsMarks, UnknownCharClass, BaseAcceptsMarks,
            UnknownCharClass, UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks,
            UnknownCharClass, BaseAcceptsMarks, BaseAcceptsMarks, AcceptsNoMark, 

            // U+EB0 (Lao Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F 
            AcceptsNoMark,    AboveMarkClosest, AcceptsNoMark,    LaoAmChar,
            AboveMarkClosest, AboveMarkClosest, AboveMarkClosest, AboveMarkClosest, 
            BelowMarkFurthest,BelowMarkFurthest,UnknownCharClass, AboveMarkClosest,
            BelowMarkClosest, AcceptsNoMark,    UnknownCharClass, UnknownCharClass,

            // U+EC0 (Lao Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark, 
            AcceptsNoMark,    UnknownCharClass, AcceptsNoMark,    UnknownCharClass,   //0xEC0 
            AboveMarkClose,   AboveMarkClose,   AboveMarkClose,   AboveMarkClose,
            AboveMarkClose,   AboveMarkClosest, UnknownCharClass, UnknownCharClass, 

            // U+ED0 (Lao Unicode)
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark, 
            AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,    AcceptsNoMark,
            AcceptsNoMark,    AcceptsNoMark,    UnknownCharClass, UnknownCharClass, 
            BaseAcceptsMarks, BaseAcceptsMarks, UnknownCharClass, UnknownCharClass, 

            // U+EE0 (Lao Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
 
            // U+EF0 (Lao Unicode) 
            // 0, 4, 8, C     1, 5, 9, D        2, 6, A, E        3, 7, B, F
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass, 
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
            UnknownCharClass, UnknownCharClass, UnknownCharClass, UnknownCharClass,
        }; 

#endregion  // end of Classification Tables 
    } 

} 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
thaishape.cs source code in C# .NET

Source code for the .NET framework in C#

Code:

Link Menu