ISO2022Encoding.cs source code in C# .NET

                        Code:
                         / Net / Net / 3.5.50727.3053 / DEVDIV / depot / DevDiv / releases / whidbey / netfxsp / ndp / clr / src / BCL / System / Text / ISO2022Encoding.cs / 1 / ISO2022Encoding.cs
                        
                        
                            // ==++== 
//
//   Copyright (c) 1991-2003, Microsoft Corporation.  All rights reserved.
//
// ==--== 
//  ISO2022Encoding.cs
// 
//  Ported to managed code from c_is2022.c and related iso 2022 dll files from mlang 
//
//  Abstract: 
//
//      Managed implimentation of ISO 2022 code pages, ported from the implimentation in c_is2022.dll
//      This code should be kept in [....] with the other implimentations
//      This encoding wraps the basic encodings in code that adds the shift in/out wrapper methods 
//
//  Notes: 
// 
// IsAlwaysNormalized ???
// Regarding Normalization for ISO-2022-JP (50220, 50221, 50222), its the same rules as EUCJP 
//  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
//  Form D is precluded because of 0x00a8, which changes to space + dierises.
//
// Note: I think that IsAlwaysNormalized should probably return true for form C for Japanese 20932 based CPs. 
//
// For ISO-2022-KR 
//  Never normalized, C & D (& therefore KC & KD) are precluded because of Hangul syllables and combined characters. 
//
// IsAlwaysNormalized ??? 
// Regarding Normalization for ISO-2022-CN (50227, 50229) & HZ-GB2312 (52936) I think is similar to the Japanese case.
//  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
//  Form D is precluded because of 0x00a8, which changes to space + dierises.
// 
// Note: I think that IsAlwaysNormalized should probably return true for form C for Chinese 20936 based CPs.
// 
namespace System.Text 
{
    using System.Globalization; 
    using System.Text;
    using System.Runtime.InteropServices;
    using System;
    using System.Security; 
    using System.Runtime.CompilerServices;
    using System.Runtime.Serialization; 
 

    /*=================================ISO2022Encoding============================ 
    **
    ** This is used to support ISO 2022 encodings that use shift/escape sequences.
    **
    ==============================================================================*/ 

    [Serializable()] 
    internal class ISO2022Encoding : DBCSCodePageEncoding 
    {
        const byte SHIFT_OUT            = (byte)0x0E; 
        const byte SHIFT_IN             = (byte)0x0F;
        const byte ESCAPE               = 0x1B;
        const byte LEADBYTE_HALFWIDTH   = 0x10;
 
        // We have to load the 936 code page tables, so impersonate 936 as our base
        // This pretends to be other code pages as far as memory sections are concerned. 
        internal ISO2022Encoding(int codePage) : base(codePage, tableBaseCodePages[codePage % 10]) 
        {
            this.m_bUseMlangTypeForSerialization = true; 
        }

        // Constructor called by serialization.
        // Note:  We use the base GetObjectData however 
        internal ISO2022Encoding(SerializationInfo info, StreamingContext context) : base(info, context)
        { 
            // Actually this can't ever get called, CodePageEncoding is our proxy 
            BCLDebug.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
            throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException")); 
        }

        static int[] tableBaseCodePages =
        { 
            932,    // 50220  ISO-2022-JP, No halfwidth Katakana, convert to full width
            932,    // 50221  ISO-2022-JP, Use escape sequence for half width Katakana 
            932,    // 50222  ISO-2022-JP, Use shift-in/shift-out for half width Katakana 
            0,
            0, 
            949,    // 50225  ISO-2022-KR, Korean
            936,    // 52936  HZ-GB2312, 936 might be better source
            0, //20936,    // 50227  ISO-2022-CN, Note: This is just the same as CP 936 in Everett.
            0, 
            // 50229 is currently unsupported, CP 20000 is currently not built in .nlp file
            0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_1 
            0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_2 
            0         //                     ModeASCII
        }; 

        internal enum ISO2022Modes
        {
            ModeHalfwidthKatakana   = 0, 
            ModeJIS0208             = 1,
            ModeKR                  = 5, 
            ModeHZ                  = 6, 
            ModeGB2312              = 7,
            ModeCNS11643_1          = 9, 
            ModeCNS11643_2          = 10,
            ModeASCII               = 11,

            ModeIncompleteEscape    = -1, 
            ModeInvalidEscape       = -2,
            ModeNOOP                = -3 
        } 

        protected unsafe override String GetMemorySectionName() 
        {
            int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;

            String strFormat; 

            switch (this.CodePage) 
            { 
                case 50220:
                case 50221: 
                case 50222:
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022JP";
                    break;
                case 50225: 
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022KR";
                    break; 
                case 52936: 
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_HZ";
                    break; 
                default:
                    BCLDebug.Assert(false, "[ISO2022Encoding.GetMemorySectionName] Don't expect to get here for code page " + this.CodePage);
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}";
                    break; 
            }
 
            String strName = String.Format(CultureInfo.InvariantCulture, strFormat, 
                iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
                this.pCodePage->VersionRevision, this.pCodePage->VersionBuild); 

            return strName;
        }
 
        // Clean up characters for ISO2022 code pages, etc.
        // ISO2022 (50220, 50221, 50222) 
        // GB-HZ (52936) 
        protected override bool CleanUpBytes(ref int bytes)
        { 
            switch (this.CodePage)
            {
                // 932 based code pages
                case 50220: 
                case 50221:
                case 50222: 
                { 
                    if (bytes >= 0x100)
                    { 
                        // map extended char (0xfa40-0xfc4b) to a special range
                        // (ported from mlang)
                        if (bytes >= 0xfa40 && bytes <= 0xfc4b)
                        { 
                            if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
                            { 
                                if ( bytes <= 0xfa49 ) 
                                    bytes = bytes - 0x0b51 ;
                                else if ( bytes >= 0xfa4a && bytes <= 0xfa53 ) 
                                    bytes = bytes - 0x072f6 ;
                                else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
                                    bytes = bytes - 0x0b5b ;
                                else if ( bytes == 0xfa58 ) 
                                    bytes = 0x878a ;
                                else if ( bytes == 0xfa59 ) 
                                    bytes = 0x8782 ; 
                                else if ( bytes == 0xfa5a )
                                    bytes = 0x8784 ; 
                                else if ( bytes == 0xfa5b )
                                    bytes = 0x879a ;
                            }
                            else if ( bytes >= 0xfa5c && bytes <= 0xfc4b ) 
                            {
                                byte tc = unchecked((byte)bytes); 
                                if ( tc < 0x5c ) 
                                    bytes = bytes - 0x0d5f;
                                else if ( tc >= 0x80 && tc <= 0x9B ) 
                                    bytes = bytes - 0x0d1d;
                                else
                                    bytes = bytes - 0x0d1c;
                            } 
                        }
 
                        // Convert 932 code page to 20932 like code page range 
                        // (also ported from mlang)
                        byte bLead = unchecked((byte)(bytes >> 8)); 
                        byte bTrail = unchecked((byte)bytes);

                        bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
                        bLead = (byte)((bLead << 1) + 1); 
                        if (bTrail > (byte)0x9e)
                        { 
                            bTrail -= (byte)0x7e; 
                            bLead++;
                        } 
                        else
                        {
                            if (bTrail > (byte)0x7e)
                                bTrail--; 
                            bTrail -= (byte)0x1f;
                        } 
 
                        bytes = ((int)bLead) << 8 | (int)bTrail;
 
                        // Don't step out of our allocated lead byte area.
                        // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
                        // This is commented out because Everett/Mlang had illegal PUA
                        // mappings to ISO2022 code pages that we're maintaining. 
//                        if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
  //                          (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e) 
    //                        return false; 
                    }
                    else 
                    {
                        // Adjust 1/2 Katakana
                        if (bytes >= 0xa1 && bytes <= 0xdf)
                            bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80; 

                        // 0x81-0x9f and 0xe0-0xfc CP 932 
                        // 0x8e and 0xa1-0xfe      CP 20932 (we don't use 8e though) 
                        // b0-df is 1/2 Katakana
                        if (bytes >= 0x81 && 
                            (bytes <= 0x9f ||
                             (bytes >= 0xe0 && bytes <= 0xfc)))
                        {
                            // Don't do lead bytes, we use escape sequences instead. 
                            return false;
                        } 
                    } 
                    break;
                } 
                case 50225:
                {
                    // For 50225 since we don't rely on lead byte marks, return false and don't add them,
                    // esp. since we're only a 7 bit code page. 
                    if (bytes >= 0x80 && bytes <= 0xff)
                        return false; 
 
                    // Ignore characters out of range (a1-7f)
                    if (bytes >= 0x100 && 
                        ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
                         (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
                         return false;
 
                    // May as well get them into our 7 bit range
                    bytes &= 0x7f7f; 
 
                    break;
                } 
                case 52936:
                {
                    // Since we don't rely on lead byte marks for 52936, get rid of them so we
                    // don't end up with extra wierd fffe mappings. 
                    if (bytes >= 0x81 && bytes <= 0xfe)
                        return false; 
 
                    break;
                } 
            }

            return true;
        } 

        // GetByteCount 
        internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder) 
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already 
            BCLDebug.Assert(count >= 0, "[ISO2022Encoding.GetByteCount]count is negative");
            BCLDebug.Assert(chars != null, "[ISO2022Encoding.GetByteCount]chars is null");

            // Just call GetBytes with null byte* to get count 
            return GetBytes(chars, count, null, 0, baseEncoder);
        } 
 
        internal override unsafe int GetBytes(char* chars, int charCount,
                                                byte* bytes, int byteCount, EncoderNLS baseEncoder) 
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            BCLDebug.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null");
            BCLDebug.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative"); 
            BCLDebug.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative");
 
            // Assert because we shouldn't be able to have a null encoder. 
            BCLDebug.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback");
 
            // Fix our encoder
            ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder;

            // Our return value 
            int iCount = 0;
 
            switch(CodePage) 
            {
                case 50220: 
                case 50221:
                case 50222:
                    iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder );
                    break; 
                case 50225:
                    iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder ); 
                    break; 
// Everett had 50227 the same as 936
/*              case 50227: 
                    iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder );
                    break;
*/
                case 52936: 
                    iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder );
                    break; 
            } 

            return iCount; 
        }

        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) 
        {
            // Just assert, we're called internally so these should be safe, checked already 
            BCLDebug.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null"); 
            BCLDebug.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");
 
            // Just call getChars with null char* to get count
            return GetChars(bytes, count, null, 0, baseDecoder);
        }
 
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder) 
        { 
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            BCLDebug.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null"); 
            BCLDebug.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
            BCLDebug.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");

            // Fix our decoder 
            ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
            int iCount = 0; 
 
            switch (CodePage)
            { 
                case 50220:
                case 50221:
                case 50222:
                    iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder); 
                    break;
                case 50225: 
                    iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder); 
                    break;
                    // Currently 50227 is the same as 936 
//                case 50227:
  //                  iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
    //                break;
                case 52936: 
                    iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
                    break; 
                default: 
                    BCLDebug.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
                    break; 
            }

            return iCount;
        } 

        // ISO 2022 Code pages for JP. 
        //  50220 - No halfwidth Katakana, convert to full width 
        //  50221 - Use escape sequence for half width Katakana
        //  50222 - Use shift-in/shift-out for half width Katakana 
        //
        // These are the JIS code pages, superset of ISO-2022 / ISO-2022-JP-1
        //  0E          Shift Out (following bytes are Katakana)
        //  0F          Shift In  (back to "normal" behavior) 
        //  21-7E       Byte ranges (1 or 2 bytes)
        //   $ @   To Double Byte 0208 Mode (actually older code page, but subset of 0208) 
        //   $ B   To Double Byte 0208 Mode (duplicate) 
        //   $ ( D To Double Byte 0212 Mode (previously we misinterpreted this)
        //   $ I   To half width Katakana 
        //   ( J   To JIS-Roman
        //   ( H   To JIS-Roman (swedish character set)
        //   ( B   To ASCII
        //   & @   Alternate lead in to  $ B so just ignore it. 
        //
        // So in Katakana mode we add 0x8e as a lead byte and use CP 20932 to convert it 
        // In ASCII mode we just spit out the single byte. 
        // In Roman mode we should change 0x5c (\) -> Yen sign and 0x7e (~) to Overline, however
        //      we didn't in mLang, otherwise roman is like ASCII. 
        // In 0208 double byte mode we have to |= with 0x8080 and use CP 20932 to convert it.
        // In 0212 double byte mode we have to |= with 0x8000 and use CP 20932 to convert it.
        //
        // Note that JIS Shift In/Shift Out is different than the other ISO2022 encodings.  For JIS 
        // Shift out always shifts to half-width Katakana.  Chinese encodings use designator sequences
        // instead of escape sequences and shift out to the designated sequence or back in to ASCII. 
        // 
        // When decoding JIS 0208, MLang used a '*' (0x2a) character in JIS 0208 mode to map the trailing byte
        // to halfwidth katakana.  I found no description of that behavior, however that block of 0208 is 
        // undefined, so we maintain that behavior when decoding.  We will never generate characters using
        // that technique, but the decoder will process them.
        //
        private unsafe int GetBytesCP5022xJP(char* chars, int charCount, 
                                                  byte* bytes, int byteCount, ISO2022Encoder encoder)
        { 
            // prepare our helpers 
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
                this, encoder, bytes, byteCount, chars, charCount); 

            // Get our mode
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
            ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that shift in will go back to (only used by CP 50222) 

            // Check our encoder 
            if (encoder != null) 
            {
                char charLeftOver = encoder.charLeftOver; 

                currentMode = encoder.currentMode;
                shiftInMode = encoder.shiftInOutMode;
 
                // We may have a left over character from last time, try and process it.
                if (charLeftOver > 0) 
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate");
 
                    // It has to be a high surrogate, which we don't support, so it has to be a fallback
                    buffer.Fallback(charLeftOver);
                }
            } 

            while (buffer.MoreData) 
            { 
                // Get our char
                char ch = buffer.GetNextChar(); 

                // Get our bytes
                ushort iBytes = mapUnicodeToBytes[ch];
 
                StartConvert:
                // Check for halfwidth bytes 
                byte bLeadByte = (byte)(iBytes >> 8); 
                byte bTrailByte = (byte)(iBytes & 0xff);
 
                if (bLeadByte == LEADBYTE_HALFWIDTH)
                {
                    // Its Halfwidth Katakana
                    if (CodePage == 50220) 
                    {
                        // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth 
                        // See if its out of range, fallback if so, throws if recursive fallback 
                        if (bTrailByte < 0x21 || bTrailByte >= 0x21 + HalfToFullWidthKanaTable.Length)
                        { 
                            buffer.Fallback(ch);
                            continue;
                        }
 
                        // Get the full width katakana char to use.
                        iBytes = unchecked((ushort)(HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F)); 
 
                        // May have to do all sorts of fun stuff for mode, go back to start convert
                        goto StartConvert; 
                    }

                    // Can use halfwidth Katakana, make sure we're in right mode
 
                    // Make sure we're in right mode
                    if (currentMode != ISO2022Modes.ModeHalfwidthKatakana) 
                    { 
                        // 50222 or 50221, either shift in/out or escape to get to Katakana mode
                        if (CodePage == 50222) 
                        {
                            // Shift Out
                            if (!buffer.AddByte(SHIFT_OUT))
                                break;  // convert out of space, stop 

                            // Don't change modes until after AddByte in case it fails for convert 
                            // We get to shift out to Katakana, make sure we'll go back to the right mode 
                            // (This ends up always being ASCII)
                            shiftInMode = currentMode; 
                            currentMode = ISO2022Modes.ModeHalfwidthKatakana;
                        }
                        else
                        { 
                            // 50221 does halfwidth katakana by escape sequence
                            BCLDebug.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221"); 
 
                            // Add our escape sequence
                            if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I'))) 
                                break;  // convert out of space, stop

                            currentMode = ISO2022Modes.ModeHalfwidthKatakana;
                        } 
                    }
 
                    // We know we're in Katakana mode now, so add it. 
                    // Go ahead and add the Katakana byte.  Our table tail bytes are 0x80 too big.
                    if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F)))) 
                        break;  // convert out of space, stop

                    // Done with this one
                    continue; 
                }
                else if (bLeadByte != 0) 
                { 
                    //
                    //  It's a double byte character. 
                    //

                    // If we're CP 50222 we may have to shift in from Katakana mode first
                    if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                    {
                        // Shift In 
                        if (!buffer.AddByte(SHIFT_IN)) 
                            break;    // convert out of space, stop
 
                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
                        currentMode = shiftInMode;
                    }
 
                    // Make sure we're in the right mode (JIS 0208 or JIS 0212)
                    // Note: Right now we don't use JIS 0212.  Also this table'd be wrong 
 
                    // Its JIS extension 0208
                    if (currentMode != ISO2022Modes.ModeJIS0208) 
                    {
                        // Escape sequence, we can fail after this, mode will be correct for convert
                        if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B')))
                            break;  // Convert out of space, stop 

                        currentMode = ISO2022Modes.ModeJIS0208; 
                    } 

                    // Add our double bytes 
                    if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte))))
                        break; // Convert out of space, stop
                    continue;
                } 
                else if (iBytes != 0 || ch == 0)
                { 
                    // Single byte Char 
                    // If we're CP 50222 we may have to shift in from Katakana mode first
                    if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                    {
                        // Shift IN
                        if (!buffer.AddByte(SHIFT_IN))
                            break; // convert ran out of room 

                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway) 
                        currentMode = shiftInMode; 
                    }
 
                    // Its a single byte character, switch to ASCII if we have to
                    if (currentMode != ISO2022Modes.ModeASCII)
                    {
                        if (!buffer.AddByte(ESCAPE,unchecked((byte)'('), unchecked((byte)'B'))) 
                            break; // convert ran out of room
 
                        currentMode = ISO2022Modes.ModeASCII; 
                    }
 
                    // Add the ASCII char
                    if (!buffer.AddByte(bTrailByte))
                        break; // convert had no room left
                    continue; 
                }
 
                // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar) 
                buffer.Fallback(ch);
            } 

            // Switch back to ASCII if MustFlush or no encoder
            if (currentMode != ISO2022Modes.ModeASCII &&
                (encoder == null || encoder.MustFlush)) 
            {
                // If we're CP 50222 we may have to shift in from Katakana mode first 
                if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                {
                    // Shift IN, only shift mode if necessary. 
                    if (buffer.AddByte(SHIFT_IN))
                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
                        currentMode = shiftInMode;
                    else 
                        // If not successful, convert will maintain state for next time, also
                        // AddByte will have decremented our char count, however we need it to remain the same 
                        buffer.GetNextChar(); 
                }
 
                // switch back to ASCII to finish neatly
                if (currentMode != ISO2022Modes.ModeASCII &&
                    (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana))
                { 
                    // only shift if it was successful
                    if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B'))) 
                        currentMode = ISO2022Modes.ModeASCII; 
                    else
                        // If not successful, convert will maintain state for next time, also 
                        // AddByte will have decremented our char count, however we need it to remain the same
                        buffer.GetNextChar();
                }
            } 

            // Remember our encoder state 
            if (bytes != null && encoder != null) 
            {
                // This is ASCII if we had to flush 
                encoder.currentMode = currentMode;
                encoder.shiftInOutMode = shiftInMode;

                if (!buffer.fallbackBuffer.bUsedEncoder) 
                {
                    encoder.charLeftOver = (char)0; 
                } 

                encoder.m_charsUsed = buffer.CharsUsed; 
            }

            // Return our length
            return buffer.Count; 
        }
 
        // ISO 2022 Code pages for Korean - CP 50225 
        //
        // CP 50225 has Shift In/Shift Out codes, and a single designator sequence that is supposed 
        // to appear once in the file, at the beginning of a line, before any multibyte code points.
        // So we stick the designator at the beginning of the output.
        //
        // These are the KR code page codes for ISO-2022-KR 
        //  0E          Shift Out (following bytes are double byte)
        //  0F          Shift In  (back to ASCII behavior) 
        //  21-7E       Byte ranges (1 or 2 bytes) 
        //   $)C   Double byte ISO-2022-KR designator
        // 
        // Note that this encoding is a little different than other encodings.  The $)C sequence
        // should only appear once per file.  (Actually I saw another spec/rfc that said at the beginning
        // of each line, but it shouldn't really matter.)
        // 
        // During decoding Mlang accepted ' ', '\t, and '\n' as their respective characters, even if
        // it was in double byte mode.  We maintain that behavior, although I couldn't find a reference or 
        // reason for that behavior.  We never generate data using that shortcut. 
        //
        // Also Mlang always assumed KR mode, even if the designator wasn't found yet, so we do that as 
        // well.  So basically we just ignore $)C when decoding.
        //
        private unsafe int GetBytesCP50225KR(char* chars, int charCount,
                                                    byte* bytes, int byteCount, ISO2022Encoder encoder) 
        {
            // prepare our helpers 
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( 
                this, encoder, bytes, byteCount, chars, charCount);
 
            // Get our mode
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
            ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII;     // ModeKR if already stamped lead bytes
 
            // Check our encoder
            if (encoder != null) 
            { 
                // May have leftover stuff
                char charLeftOver = encoder.charLeftOver; 
                currentMode = encoder.currentMode;
                shiftOutMode = encoder.shiftInOutMode;

                // We may have a l left over character from last time, try and process it. 
                if (charLeftOver > 0)
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate"); 

                    // It has to be a high surrogate, which we don't support, so it has to be a fallback 
                    buffer.Fallback(charLeftOver);
                }
            }
 
            while (buffer.MoreData)
            { 
                // Get our data 
                char ch = buffer.GetNextChar();
 
                // Get our bytes
                ushort iBytes = mapUnicodeToBytes[ch];

                // Check for double byte bytes 
                byte bLeadByte = (byte)(iBytes >> 8);
                byte bTrailByte = (byte)(iBytes & 0xff); 
 
                if (bLeadByte != 0)
                { 
                    //
                    //  It's a double byte character.
                    //
 
                    // If we haven't done our Korean designator, then do so, if we have any input
                    if (shiftOutMode != ISO2022Modes.ModeKR) 
                    { 
                        // Add our code page designator sequence
                        if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C'))) 
                            break; // No room during convert.

                        shiftOutMode = ISO2022Modes.ModeKR;
                    } 

                    // May have to switch to ModeKR first 
                    if (currentMode != ISO2022Modes.ModeKR) 
                    {
                        if (!buffer.AddByte(SHIFT_OUT)) 
                            break; // No convert room

                        currentMode = ISO2022Modes.ModeKR;
                    } 

                    // Add the bytes 
                    if (!buffer.AddByte(bLeadByte, bTrailByte)) 
                        break; // no convert room
                    continue; 
                }
                else if (iBytes != 0 || ch == 0)
                {
                    // Its a single byte character, switch to ASCII if we have to 
                    if (currentMode != ISO2022Modes.ModeASCII)
                    { 
                        if (!buffer.AddByte(SHIFT_IN)) 
                            break;
 
                        currentMode = ISO2022Modes.ModeASCII;
                    }

                    // Add the ASCII char 
                    if (!buffer.AddByte(bTrailByte))
                        break; 
                    continue; 
                }
 
                // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
                buffer.Fallback(ch);
            }
 
            // Switch back to ASCII if MustFlush or no encoder
            if (currentMode != ISO2022Modes.ModeASCII && 
                (encoder == null || encoder.MustFlush)) 
            {
                // Get back to ASCII to be safe.  Only do it if it success. 
                if (buffer.AddByte(SHIFT_IN))
                    currentMode = ISO2022Modes.ModeASCII;
                else
                    // If not successful, convert will maintain state for next time, also 
                    // AddByte will have decremented our char count, however we need it to remain the same
                    buffer.GetNextChar(); 
            } 

            // Remember our encoder state 
            if (bytes != null && encoder != null)
            {
                // If we didn't use the encoder, then there's no chars left over
                if (!buffer.fallbackBuffer.bUsedEncoder) 
                {
                    encoder.charLeftOver = (char)0; 
                } 

                // This is ASCII if we had to flush 
                encoder.currentMode = currentMode;

                // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't
                // get output again. 
                if (!encoder.MustFlush || encoder.charLeftOver != (char)0)
                { 
                    // We should be not flushing or converting 
                    BCLDebug.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow,
                        "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting"); 
                    encoder.shiftInOutMode = shiftOutMode;
                }
                else
                    encoder.shiftInOutMode = ISO2022Modes.ModeASCII; 

                encoder.m_charsUsed = buffer.CharsUsed; 
            } 

            // Return our length 
            return buffer.Count;
        }

        // CP52936 is HZ Encoding 
        // HZ Encoding has 4 shift sequences:
        // ~~       '~' (\u7e) 
        // ~}       shift into 1 byte mode, 
        // ~{       shift into 2 byte GB 2312-80
        // ~    Maintain 2 byte mode across new lines (ignore both ~ and  characters) 
        //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
        //
        // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
        // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e 
        // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
        // 
        // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set. 
        // (all bytes <= 0x7f)
        private unsafe int GetBytesCP52936(char* chars, int charCount, 
                                           byte* bytes, int byteCount, ISO2022Encoder encoder)
        {
            // prepare our helpers
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( 
                this, encoder, bytes, byteCount, chars, charCount);
 
            // Mode 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
 
            // Check our encoder
            if (encoder != null)
            {
                char charLeftOver = encoder.charLeftOver; 
                currentMode = encoder.currentMode;
 
                // We may have a left over character from last time, try and process it. 
                if (charLeftOver > 0)
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate");

                    // It has to be a high surrogate, which we don't support, so it has to be a fallback
                    buffer.Fallback(charLeftOver); 
                }
            } 
 
            while (buffer.MoreData)
            { 
                // Get our char
                char ch = buffer.GetNextChar();

                // Get our bytes 
                ushort sChar = mapUnicodeToBytes[ch];
                if (sChar == 0 && ch != 0) 
                { 
                    // Wasn't a legal byte sequence, its a surrogate or fallback
                    // Throws if recursive (knows because we called InternalGetNextChar) 
                    buffer.Fallback(ch);

                    // Done with our char, now process fallback
                    continue; 
                }
 
                // Check for halfwidth bytes 
                byte bLeadByte = (byte)(sChar >> 8);
                byte bTrailByte = (byte)(sChar & 0xff); 

                // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range
                // (including the 0x8080 that our codepage or's to the value)
                if ((bLeadByte != 0 && 
                     (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) ||
                    (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff)) 
                { 
                    // Illegal character, in 936 code page, but not in HZ subset, get fallback for it
                    buffer.Fallback(ch); 
                    continue;
                }

                // sChar is now either ASCII or has an 0x8080 mask 
                if (bLeadByte != 0)
                { 
                    // Its a double byte mode 
                    if (currentMode != ISO2022Modes.ModeHZ)
                    { 
                        // Need to add the double byte mode marker
                        if (!buffer.AddByte((byte)'~', (byte)'{', 2))
                            break;                                      // Stop if no buffer space in convert
 
                        currentMode = ISO2022Modes.ModeHZ;
                    } 
 
                    // Go ahead and add the 2 bytes
                    if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f)))) 
                        break;                                      // Stop if no buffer space in convert
                }
                else
                { 
                    // Its supposed to be ASCII
                    if (currentMode != ISO2022Modes.ModeASCII) 
                    { 
                        // Need to add the ASCII mode marker
                        // Will have 1 more byte (or 2 if ~) 
                        if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2:1))
                            break;

                        currentMode = ISO2022Modes.ModeASCII; 
                    }
 
                    // If its a '~' we'll need an extra one 
                    if (bTrailByte == '~')
                    { 
                        // Need to add the extra ~
                        if (!buffer.AddByte((byte)'~', 1))
                            break;
                    } 

                    // Need to add the character 
                    if (!buffer.AddByte(bTrailByte)) 
                        break;
                } 
            }

            // Add ASCII shift out if we're at end of decoder
            if (currentMode != ISO2022Modes.ModeASCII && 
                (encoder == null || encoder.MustFlush))
            { 
                // Need to add the ASCII mode marker 
                // Only turn off other mode if this works
                if (buffer.AddByte((byte)'~',(byte)'}')) 
                    currentMode = ISO2022Modes.ModeASCII;
                else
                    // If not successful, convert will maintain state for next time, also
                    // AddByte will have decremented our char count, however we need it to remain the same 
                    buffer.GetNextChar();
            } 
 
            // Need to remember our mode
            if (encoder != null && bytes != null) 
            {
                // This is ASCII if we had to flush
                encoder.currentMode = currentMode;
 
                if (!buffer.fallbackBuffer.bUsedEncoder)
                { 
                    encoder.charLeftOver = (char)0; 
                }
 
                encoder.m_charsUsed = buffer.CharsUsed;
            }

            // Return our length 
            return buffer.Count;
        } 
 
        private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount,
                                                  char* chars, int charCount, ISO2022Decoder decoder) 
        {
            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount); 

            // No mode information yet 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode 
            ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that we'll shift in to
            byte[] escapeBytes = new byte[4]; 
            int escapeCount = 0;

            if (decoder != null)
            { 
                currentMode = decoder.currentMode;
                shiftInMode = decoder.shiftInOutMode; 
 
                // See if we have leftover decoder buffer to use
                // Load our bytesLeftOver 
                escapeCount = decoder.bytesLeftOverCount;

                // Don't want to mess up decoder if we're counting or throw an exception
                for (int i = 0; i < escapeCount; i++) 
                    escapeBytes[i] = decoder.bytesLeftOver[i];
            } 
 
            // Do this until the end
            while (buffer.MoreData || escapeCount > 0) 
            {
                byte ch;

                if (escapeCount > 0) 
                {
                    // Get more escape sequences if necessary 
                    if (escapeBytes[0] == ESCAPE) 
                    {
                        // Stop if no more input 
                        if (!buffer.MoreData)
                        {
                            if (decoder != null && !decoder.MustFlush)
                                break; 
                        }
                        else 
                        { 
                            // Add it to the sequence we can check
                            escapeBytes[escapeCount++] = buffer.GetNextByte(); 

                            // We have an escape sequence
                            ISO2022Modes modeReturn =
                                CheckEscapeSequenceJP(escapeBytes, escapeCount); 

                            if (modeReturn != ISO2022Modes.ModeInvalidEscape) 
                            { 
                                if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
                                { 
                                    // Processed escape correctly
                                    escapeCount = 0;

                                    // We're now this mode 
                                    currentMode = shiftInMode = modeReturn;
                                } 
 
                                // Either way, continue to get next escape or real byte
                                continue; 
                            }
                        }

                        // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. 
                    }
 
                    // Read next escape byte and move them down one. 
                    ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
                } 
                else
                {
                    // Get our next byte
                    ch = buffer.GetNextByte(); 

                    if (ch == ESCAPE) 
                    { 
                        // We'll have an escape sequence, use it if we don't have one buffered already
                        if (escapeCount == 0) 
                        {
                            // Start this new escape sequence
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                            continue;
                        } 
 
                        // Flush the previous escape sequence, then reuse this escape byte
                        buffer.AdjustBytes(-1); 
                    }
                }

                if (ch == SHIFT_OUT) 
                {
                   shiftInMode = currentMode; 
                   currentMode = ISO2022Modes.ModeHalfwidthKatakana; 
                   continue;
                } 
                else if (ch == SHIFT_IN)
                {
                   currentMode = shiftInMode;
                   continue; 
                }
 
                // Get our full character 
                ushort iBytes = ch;
                bool b2Bytes = false; 

                if (currentMode == ISO2022Modes.ModeJIS0208)
                {
                    // 
                    //  To handle errors, we need to check:
                    //    1. if trailbyte is there 
                    //    2. if code is valid 
                    //
                    if (escapeCount > 0) 
                    {
                        // Let another escape fall through
                        if (escapeBytes[0] != ESCAPE)
                        { 
                            // Move them down one & get the next data
                            iBytes <<= 8; 
                            iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); 
                            b2Bytes = true;
                        } 
                    }
                    else if (buffer.MoreData)
                    {
                        iBytes <<= 8; 
                        iBytes |= buffer.GetNextByte();
                        b2Bytes = true; 
                    } 
                    else
                    { 
                        // Not enough input, use decoder if possible
                        if (decoder == null || decoder.MustFlush)
                        {
                            // No decoder, do fallback for this byte 
                            buffer.Fallback(ch);
                            break; 
                        } 

                        // Stick it in the decoder if we're not counting 
                        if (chars != null)
                        {
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                        }
                        break; 
                    } 

                    // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana 
                    // escape, so use 0x8e00 as katakana lead byte and keep same trail byte.
                    // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have
                    // any wierd compatibility issues.
                    if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00)) 
                    {
                        iBytes = (ushort)(iBytes & 0xff); 
                        iBytes |= (LEADBYTE_HALFWIDTH << 8);   // Put us in the halfwidth katakana range 
                    }
                } 
                else if (iBytes >= 0xA1 && iBytes <= 0xDF)
                {
                    // Everett accidentally mapped Katakana like shift-jis (932),
                    // even though this is a 7 bit code page.  We keep that mapping 
                    iBytes |= (LEADBYTE_HALFWIDTH << 8);    // Map to halfwidth katakana range
                    iBytes &= 0xff7f;                       // remove extra 0x80 
                } 
                else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana )
                { 
                    // Add 0x10 lead byte that our encoding expects for Katakana:
                    iBytes |= (LEADBYTE_HALFWIDTH << 8);
                }
 
                // We have an iBytes to try to convert.
                char c = mapBytesToUnicode[iBytes]; 
 
                // See if it was unknown
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) 
                {
                    // Have to do fallback
                    if (b2Bytes)
                    { 
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
                            break; 
                    } 
                    else
                    { 
                        if (!buffer.Fallback(ch))
                            break;
                    }
                } 
                else
                { 
                    // If we were JIS 0208, then we consumed an extra byte 
                    if (!buffer.AddChar(c, b2Bytes ? 2:1))
                        break; 
                }
            }

            // Make sure our decoder state matches our mode, if not counting 
            if (chars != null && decoder != null)
            { 
                // Remember it if we don't flush 
                if (!decoder.MustFlush || escapeCount != 0)
                { 
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
                        "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing");
 
                    decoder.currentMode = currentMode;
                    decoder.shiftInOutMode = shiftInMode; 
 
                    // Remember escape buffer
                    decoder.bytesLeftOverCount = escapeCount; 
                    decoder.bytesLeftOver = escapeBytes;
                }
                else
                { 
                    // We flush, clear buffer
                    decoder.currentMode = ISO2022Modes.ModeASCII; 
                    decoder.shiftInOutMode = ISO2022Modes.ModeASCII; 
                    decoder.bytesLeftOverCount = 0;
                    // Slightly different if counting/not counting 
                }

                decoder.m_bytesUsed = buffer.BytesUsed;
            } 

            // Return # of characters we found 
            return buffer.Count; 
        }
 
        // We know we have an escape sequence, so check it starting with the byte after the escape
        private ISO2022Modes CheckEscapeSequenceJP( byte[] bytes, int escapeCount )
        {
            // Have an escape sequence 
            if (bytes[0] != ESCAPE)
                return ISO2022Modes.ModeInvalidEscape; 
 
            if (escapeCount < 3)
                return ISO2022Modes.ModeIncompleteEscape; 

            if (bytes[1] == '(')
            {
                if (bytes[2] == 'B')       // (B 
                {
                    return ISO2022Modes.ModeASCII; 
                } 
                else if (bytes[2] == 'H')  // (H
                { 
                    // Actually this is supposed to be Swedish
                    // We treat it like ASCII though.
                    return ISO2022Modes.ModeASCII;
                } 
                else if (bytes[2] == 'J')  // (J
                { 
                    // Actually this is supposed to be Roman 
                    // 2 characters are different, but historically we treat it as ascii
                    return ISO2022Modes.ModeASCII; 
                }
                else if (bytes[2] == 'I')  // (I
                {
                    return ISO2022Modes.ModeHalfwidthKatakana; 
                }
            } 
            else if (bytes[1] == '$') 
            {
                if (bytes[2] == '@' ||   // $@ 
                    bytes[2] == 'B')     // $B
                {
                    return ISO2022Modes.ModeJIS0208;
                } 
                else
                { 
                    // Looking for $(D 
                    if (escapeCount < 4)
                        return ISO2022Modes.ModeIncompleteEscape; 

                    if (bytes[2] == '(' && bytes[3] == 'D') // $(D
                    {
                        // Mlang treated 0208 like 0212 even though that's wrong 
                        return ISO2022Modes.ModeJIS0208;
                    } 
                } 
            }
            else if (bytes[1] == '&') 
            {
                if (bytes[2] == '@')            // &@
                {
                    // Ignore ESC & @ (prefix to $B) 
                    return ISO2022Modes.ModeNOOP;
                } 
            } 

            // If we get here we fell through and have an invalid/unknown escape sequence 
            return ISO2022Modes.ModeInvalidEscape;
        }

        private byte DecrementEscapeBytes(ref byte[] bytes, ref int count) 
        {
            BCLDebug.Assert(count > 0, "[ISO2022Encoding.DecrementEscapeBytes]count > 0"); 
 
            // Decrement our count
            count--; 

            // Remember the first one
            byte returnValue = bytes[0];
 
            // Move them down one.
            for (int i = 0; i < count; i++) 
            { 
                bytes[i] = bytes[i+1];
            } 

            // Clear out the last byte
            bytes[count] = 0;
 
            // Return the old 1st byte
            return returnValue; 
        } 

        // Note that in DBCS mode mlang passed through ' ', '\t' and '\n' as SBCS characters 
        // probably to allow mailer formatting without too much extra work.
        private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount,
                                                   char* chars, int charCount, ISO2022Decoder decoder)
        { 
            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( 
                this, decoder, chars, charCount, bytes, byteCount); 

            // No mode information yet 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode

            byte[] escapeBytes = new byte[4];
            int escapeCount = 0; 

            if (decoder != null) 
            { 
                currentMode = decoder.currentMode;
 
                // See if we have leftover decoder buffer to use
                // Load our bytesLeftOver
                escapeCount = decoder.bytesLeftOverCount;
 
                // Don't want to mess up decoder if we're counting or throw an exception
                for (int i = 0; i < escapeCount; i++) 
                    escapeBytes[i] = decoder.bytesLeftOver[i]; 
            }
 
            // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
            while (buffer.MoreData || escapeCount > 0)
            {
                byte ch; 

                if (escapeCount > 0) 
                { 
                    // Get more escape sequences if necessary
                    if (escapeBytes[0] == ESCAPE) 
                    {
                        // Stop if no more input
                        if (!buffer.MoreData)
                        { 
                            if (decoder != null && !decoder.MustFlush)
                                break; 
                        } 
                        else
                        { 
                            // Add it to the sequence we can check
                            escapeBytes[escapeCount++] = buffer.GetNextByte();

                            // We have an escape sequence 
                            ISO2022Modes modeReturn =
                                CheckEscapeSequenceKR(escapeBytes, escapeCount); 
 
                            if (modeReturn != ISO2022Modes.ModeInvalidEscape)
                            { 
                                if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
                                {
                                    // Processed escape correctly, no effect (we know about KR mode)
                                    escapeCount = 0; 
                                }
 
                                // Either way, continue to get next escape or real byte 
                                continue;
                            } 
                        }

                        // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
                    } 

                    // Still have something left over in escape buffer 
                    // Get it and move them down one 
                    ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
                } 
                else
                {
                    // Get our next byte
                    ch = buffer.GetNextByte(); 

                    if (ch == ESCAPE) 
                    { 
                        // We'll have an escape sequence, use it if we don't have one buffered already
                        if (escapeCount == 0) 
                        {
                            // Start this new escape sequence
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                            continue;
                        } 
 
                        // Flush previous escape sequence, then reuse this escape byte
                        buffer.AdjustBytes(-1); 
                    }
                }

                if (ch == SHIFT_OUT) 
                {
                   currentMode = ISO2022Modes.ModeKR; 
                   continue; 
                }
                else if (ch == SHIFT_IN) 
                {
                   currentMode = ISO2022Modes.ModeASCII;
                   continue;
                } 

                // Get our full character 
                ushort iBytes = ch; 
                bool b2Bytes = false;
 
                // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC.
                if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n')
                {
                    // 
                    //  To handle errors, we need to check:
                    //    1. if trailbyte is there 
                    //    2. if code is valid 
                    //
                    if (escapeCount > 0) 
                    {
                        // Let another escape fall through
                        if (escapeBytes[0] != ESCAPE)
                        { 
                            // Move them down one & get the next data
                            iBytes <<= 8; 
                            iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); 
                            b2Bytes = true;
                        } 
                    }
                    else if (buffer.MoreData)
                    {
                        iBytes <<= 8; 
                        iBytes |= buffer.GetNextByte();
                        b2Bytes = true; 
                    } 
                    else
                    { 
                        // Not enough input, use decoder if possible
                        if (decoder == null || decoder.MustFlush)
                        {
                            // No decoder, do fallback for lonely 1st byte 
                            buffer.Fallback(ch);
                            break; 
                        } 

                        // Stick it in the decoder if we're not counting 
                        if (chars != null)
                        {
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                        }
                        break; 
                    } 
                }
 
                // We have a iBytes to try to convert.
                char c = mapBytesToUnicode[iBytes];

                // See if it was unknown 
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
                { 
                    // Have to do fallback 
                    if (b2Bytes)
                    { 
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
                            break;
                    }
                    else 
                    {
                        if (!buffer.Fallback(ch)) 
                            break; 
                    }
                } 
                else
                {
                    if (!buffer.AddChar(c, b2Bytes ? 2:1))
                        break; 
                }
            } 
 
            // Make sure our decoder state matches our mode, if not counting
            if (chars != null && decoder != null) 
            {
                // Remember it if we don't flush
                if (!decoder.MustFlush || escapeCount != 0)
                { 
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, 
                        "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing"); 

                    decoder.currentMode = currentMode; 

                    // Remember escape buffer
                    decoder.bytesLeftOverCount = escapeCount;
                    decoder.bytesLeftOver = escapeBytes; 
                }
                else 
                { 
                    // We flush, clear buffer
                    decoder.currentMode = ISO2022Modes.ModeASCII; 
                    decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
                    decoder.bytesLeftOverCount = 0;
                }
 
                decoder.m_bytesUsed = buffer.BytesUsed;
            } 
 
            // Return # of characters we found
            return buffer.Count; 
        }

        // We know we have an escape sequence, so check it starting with the byte after the escape
        private ISO2022Modes CheckEscapeSequenceKR( byte[] bytes, int escapeCount ) 
        {
            // Have an escape sequence 
            if (bytes[0] != ESCAPE) 
                return ISO2022Modes.ModeInvalidEscape;
 
            if (escapeCount < 4)
                return ISO2022Modes.ModeIncompleteEscape;

            if (bytes[1] == '$' && bytes[2] == ')' && bytes[3] == 'C') // $)C 
                return ISO2022Modes.ModeKR;
 
            // If we get here we fell through and have an invalid/unknown escape sequence 
            return ISO2022Modes.ModeInvalidEscape;
        } 

        // CP52936 is HZ Encoding
        // HZ Encoding has 4 shift sequences:
        // ~~       '~' (\u7e) 
        // ~}       shift into 1 byte mode,
        // ~{       shift into 2 byte GB 2312-80 
        // ~    Maintain 2 byte mode across new lines (ignore both ~ and  characters) 
        //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
        // 
        // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
        // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
        // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
        // 
        // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
        // (all bytes <= 0x7f) 
        private unsafe int GetCharsCP52936(byte* bytes, int byteCount, 
                                                char* chars, int charCount, ISO2022Decoder decoder)
        { 
            BCLDebug.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
            BCLDebug.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");

            // Get our info. 
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount); 
 
            // No mode information yet
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII; 
            int byteLeftOver = -1;
            bool bUsedDecoder = false;

            if (decoder != null) 
            {
                currentMode = decoder.currentMode; 
                // See if we have leftover decoder buffer to use 
                // Don't want to mess up decoder if we're counting or throw an exception
                if (decoder.bytesLeftOverCount != 0 ) 
                {
                    // Load our bytesLeftOver
                    byteLeftOver = decoder.bytesLeftOver[0];
                } 
            }
 
            // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. 
            while (buffer.MoreData || byteLeftOver >= 0)
            { 
                byte ch;

                // May have a left over byte
                if (byteLeftOver >= 0) 
                {
                    ch = (byte)byteLeftOver; 
                    byteLeftOver = -1; 
                }
                else 
                {
                    ch = buffer.GetNextByte();
                }
 
                // We're in escape mode
                if (ch == '~') 
                { 
                    // Next char is type of switch
                    if (!buffer.MoreData) 
                    {
                        // We don't have anything left, it'll be in decoder or a ?
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush) 
                        {
                            // We'll be a '?' 
                            buffer.Fallback(ch); 
                            // break if we fail & break if we don't (because !MoreData)
                            // Add succeeded, continue 
                            break;
                        }

                        // Stick it in decoder 
                        if (decoder != null)
                            decoder.ClearMustFlush(); 
 
                        if (chars != null)
                        { 
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = (byte)'~';
                            bUsedDecoder = true;
                        } 
                        break;
                    } 
 
                    // What type is it?, get 2nd byte
                    ch = buffer.GetNextByte(); 

                    if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
                    {
                        // Its just a ~~ replacement for ~, add it 
                        if (!buffer.AddChar((char)ch, 2))
                            // Add failed, break for converting 
                            break; 

                        // Add succeeded, continue 
                        continue;
                    }
                    else if (ch == '{')
                    { 
                        // Switching to Double Byte mode
                        currentMode = ISO2022Modes.ModeHZ; 
                        continue; 
                    }
                    else if (ch == '}') 
                    {
                        // Switching to ASCII mode
                        currentMode = ISO2022Modes.ModeASCII;
                        continue; 
                    }
                    else if (ch == '\n') 
                    { 
                        // Ignore ~\n sequence
                        continue; 
                    }
                    else
                    {
                        // Unknown escape, back up and try the '~' as a "normal" byte or lead byte 
                        buffer.AdjustBytes(-1);
                        ch = (byte)'~'; 
                    } 
                }
 
                // go ahead and add our data
                if (currentMode != ISO2022Modes.ModeASCII)
                {
                    // Should be ModeHZ 
                    BCLDebug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
                    char cm; 
 
                    // Everett allowed characters < 0x20 to be passed as if they were ASCII
                    if (ch < 0x20) 
                    {
                        // Emit it as ASCII
                        goto STOREASCII;
                    } 

                    // Its multibyte, should have another byte 
                    if (!buffer.MoreData) 
                    {
                        // No bytes left 
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush)
                        {
                            // Not enough bytes, fallback lead byte 
                            buffer.Fallback(ch);
 
                            // Break if we fail & break because !MoreData 
                            break;
                        } 

                        if (decoder != null)
                            decoder.ClearMustFlush();
 
                        // Stick it in decoder
                        if (chars != null) 
                        { 
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = ch; 
                            bUsedDecoder = true;
                        }
                        break;
                    } 

                    // Everett uses space as an escape character for single SBCS bytes 
                    byte ch2 = buffer.GetNextByte(); 
                    ushort iBytes = (ushort)(ch << 8 | ch2);
 
                    if (ch == ' ' && ch2 != 0)
                    {
                        // Get next char and treat it like ASCII (Everett treated space like an escape
                        // allowing the next char to be just ascii) 
                        cm = (char)ch2;
                        goto STOREMULTIBYTE; 
                    } 

                    // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e 
                    if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
                    // Everett allowed high bit mappings for same characters (but only if both bits set)
                        (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
                    { 
                        // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
                        if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d) 
                        { 
                            iBytes = 0x2121;
                            goto MULTIBYTE; 
                        }

                        // Illegal char, use fallback.  If lead byte is 0 have to do it special and do it first
                        if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) 
                            break;
                        continue; 
                    } 

                    MULTIBYTE: 
                    iBytes |= 0x8080;
                    // Look up the multibyte char to stick it in our data

                    // We have a iBytes to try to convert. 
                    cm = mapBytesToUnicode[iBytes];
 
                    STOREMULTIBYTE: 

                    // See if it was unknown 
                    if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
                    {
                        // Fall back the unknown stuff
                        if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) 
                            break;
                        continue; 
                    } 

                    if (!buffer.AddChar(cm, 2)) 
                        break;              // convert ran out of buffer, stop
                    continue;
                }
 
                // Just ASCII
                // We allow some chars > 7f because everett did, so we have to look them up. 
                STOREASCII: 
                char c = mapBytesToUnicode[ch];
 
                // Check if it was unknown
                if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
                {
                    // fallback the unkown bytes 
                    if (!buffer.Fallback((byte)ch))
                        break; 
                    continue; 
                }
 
                // Go ahead and add our ASCII character
                if (!buffer.AddChar(c))
                    break;                  // convert ran out of buffer, stop
            } 

            // Need to remember our state, IF we're not counting 
            if (chars != null && decoder != null) 
            {
                if (!bUsedDecoder) 
                {
                    // If we didn't use it, clear the byte left over
                    decoder.bytesLeftOverCount = 0;
                } 

                if (decoder.MustFlush && decoder.bytesLeftOverCount == 0) 
                { 
                    decoder.currentMode = ISO2022Modes.ModeASCII;
                } 
                else
                {
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, 
                        "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
 
                    decoder.currentMode = currentMode; 
                }
                decoder.m_bytesUsed = buffer.BytesUsed; 
            }

            // Return # of characters we found
            return buffer.Count; 
        }
 
        // Note: These all end up with 1/2 bytes of average byte count, so unless we're 1 we're always 
        // charCount/2 bytes too big.
        public override int GetMaxByteCount(int charCount) 
        {
            if (charCount < 0)
               throw new ArgumentOutOfRangeException("charCount",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 

            // Characters would be # of characters + 1 in case high surrogate is ? * max fallback 
            long byteCount = (long)charCount + 1; 

            if (EncoderFallback.MaxCharCount > 1) 
                byteCount *= EncoderFallback.MaxCharCount;

            // Start with just generic DBCS values (sort of).
            int perChar = 2; 
            int extraStart = 0;
            int extraEnd = 0; 
 
            switch (CodePage)
            { 
                case 50220:
                case 50221:
                    // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
                    perChar = 5;        // 5 max (4.5 average) 
                    extraEnd = 3;       // 3 bytes to shift back to ASCII
                    break; 
                case 50222: 
                    // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
                    perChar = 5;        // 5 max (4.5 average) 
                    extraEnd = 4;       // 1 byte to shift from Katakana -> DBCS, 3 bytes to shift back to ASCII from DBCS
                    break;
                case 50225:
                    // 2 bytes per char + 1 byte SO, or 1 byte per char + 1 byte SI. 
                    perChar = 3;        // 3 max, (2.5 average)
                    extraStart = 4;     // EUC-KR marker appears at beginning of file. 
                    extraEnd = 1;       // 1 byte to shift back to ascii if necessary. 
                    break;
                case 52936: 
                    // 2 bytes per char + 2 byte shift, or 1 byte + 1 byte shift
                    // Worst case: left over surrogate with no low surrogate is extra ?, could have to switch to ASCII, then could have HZ and flush to ASCII mode
                    perChar = 4;        // 4 max, (3.5 average if every other char is HZ/ASCII)
                    extraEnd = 2;       // 2 if we have to shift back to ASCII 
                    break;
            } 
 
            // Return our surrogate and End plus perChar for each char.
            byteCount *= perChar; 
            byteCount += extraStart + extraEnd;

            if (byteCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); 

            return (int)byteCount; 
        } 

        public override int GetMaxCharCount(int byteCount) 
        {
            if (byteCount < 0)
               throw new ArgumentOutOfRangeException("byteCount",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 

            int perChar = 1; 
            int extraDecoder = 1; 

            switch (CodePage) 
            {
                case 50220:
                case 50221:
                case 50222: 
                case 50225:
                    perChar = 1;        // Worst case all ASCII 
                    extraDecoder = 3;   // Could have left over 3 chars of 4 char escape sequence, that all become ? 
                    break;
                case 52936: 
                    perChar = 1;        // Worst case all ASCII
                    extraDecoder = 1;   // sequences are 2 chars, so if next one is illegal, then previous 1 could be ?
                    break;
            } 

            // Figure out our length, perchar * char + whatever extra our decoder could do to us. 
            long charCount = ((long)byteCount * perChar) + extraDecoder; 

            // Just in case we have to fall back unknown ones. 
            if (DecoderFallback.MaxCharCount > 1)
                charCount *= DecoderFallback.MaxCharCount;

            if (charCount > 0x7fffffff) 
                throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
 
            return (int)charCount; 
        }
 
        public override Encoder GetEncoder()
        {
            return new ISO2022Encoder(this);
        } 

        public override Decoder GetDecoder() 
        { 
            return new ISO2022Decoder(this);
        } 

        [Serializable]
        internal class ISO2022Encoder : System.Text.EncoderNLS
        { 
            internal ISO2022Modes currentMode;
            internal ISO2022Modes shiftInOutMode; 
 
            internal ISO2022Encoder(EncodingNLS encoding) : base(encoding)
            { 
                // base calls reset
            }

            public override void Reset() 
            {
                // Reset 
                currentMode = ISO2022Modes.ModeASCII; 
                shiftInOutMode = ISO2022Modes.ModeASCII;
                charLeftOver = (char)0; 
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset();
            }
 
            // Anything left in our encoder?
            internal override bool HasState 
            { 
                get
                { 
                    // Don't check shift-out mode, it may be ascii (JP) or not (KR)
                    return (this.charLeftOver != (char)0 ||
                            currentMode != ISO2022Modes.ModeASCII);
                } 
            }
        } 
 
        [Serializable]
        internal class ISO2022Decoder : System.Text.DecoderNLS 
        {
            internal byte[] bytesLeftOver;
            internal int bytesLeftOverCount;
            internal ISO2022Modes currentMode; 
            internal ISO2022Modes shiftInOutMode;
 
            internal ISO2022Decoder(EncodingNLS encoding) : base(encoding) 
            {
                // base calls reset 
            }

            public override void Reset()
            { 
                // Reset
                bytesLeftOverCount = 0; 
                bytesLeftOver = new byte[4]; 
                currentMode = ISO2022Modes.ModeASCII;
                shiftInOutMode = ISO2022Modes.ModeASCII; 
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset();
            }
 
            // Anything left in our decoder?
            internal override bool HasState 
            { 
                get
                { 
                    // If have bytes left over or not shifted back to ASCII then have problem
                    return (this.bytesLeftOverCount != 0 ||
                            currentMode != ISO2022Modes.ModeASCII);
                } 
            }
        } 
 
        static ushort[] HalfToFullWidthKanaTable =
        { 
            0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
            0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
            0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
            0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma 
            0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
            0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo 
            0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A 
            0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
            0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U 
            0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
            0xa5a9, // 0x8eab : Halfwidth Katakana Small O
            0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
            0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu 
            0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
            0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu 
            0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark 
            0xa5a2, // 0x8eb1 : Halfwidth Katakana A
            0xa5a4, // 0x8eb2 : Halfwidth Katakana I 
            0xa5a6, // 0x8eb3 : Halfwidth Katakana U
            0xa5a8, // 0x8eb4 : Halfwidth Katakana E
            0xa5aa, // 0x8eb5 : Halfwidth Katakana O
            0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka 
            0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
            0xa5af, // 0x8eb8 : Halfwidth Katakana Ku 
            0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke 
            0xa5b3, // 0x8eba : Halfwidth Katakana Ko
            0xa5b5, // 0x8ebb : Halfwidth Katakana Sa 
            0xa5b7, // 0x8ebc : Halfwidth Katakana Si
            0xa5b9, // 0x8ebd : Halfwidth Katakana Su
            0xa5bb, // 0x8ebe : Halfwidth Katakana Se
            0xa5bd, // 0x8ebf : Halfwidth Katakana So 
            0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
            0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti 
            0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu 
            0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
            0xa5c8, // 0x8ec4 : Halfwidth Katakana To 
            0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
            0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
            0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
            0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne 
            0xa5ce, // 0x8ec9 : Halfwidth Katakana No
            0xa5cf, // 0x8eca : Halfwidth Katakana Ha 
            0xa5d2, // 0x8ecb : Halfwidth Katakana Hi 
            0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
            0xa5d8, // 0x8ecd : Halfwidth Katakana He 
            0xa5db, // 0x8ece : Halfwidth Katakana Ho
            0xa5de, // 0x8ecf : Halfwidth Katakana Ma
            0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
            0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu 
            0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
            0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo 
            0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya 
            0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
            0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo 
            0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
            0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
            0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
            0xa5ec, // 0x8eda : Halfwidth Katakana Re 
            0xa5ed, // 0x8edb : Halfwidth Katakana Ro
            0xa5ef, // 0x8edc : Halfwidth Katakana Wa 
            0xa5f3, // 0x8edd : Halfwidth Katakana N 
            0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
            0xa1ac  // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark 
        };
    }
}
 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
// ==++== 
//
//   Copyright (c) 1991-2003, Microsoft Corporation.  All rights reserved.
//
// ==--== 
//  ISO2022Encoding.cs
// 
//  Ported to managed code from c_is2022.c and related iso 2022 dll files from mlang 
//
//  Abstract: 
//
//      Managed implimentation of ISO 2022 code pages, ported from the implimentation in c_is2022.dll
//      This code should be kept in [....] with the other implimentations
//      This encoding wraps the basic encodings in code that adds the shift in/out wrapper methods 
//
//  Notes: 
// 
// IsAlwaysNormalized ???
// Regarding Normalization for ISO-2022-JP (50220, 50221, 50222), its the same rules as EUCJP 
//  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
//  Form D is precluded because of 0x00a8, which changes to space + dierises.
//
// Note: I think that IsAlwaysNormalized should probably return true for form C for Japanese 20932 based CPs. 
//
// For ISO-2022-KR 
//  Never normalized, C & D (& therefore KC & KD) are precluded because of Hangul syllables and combined characters. 
//
// IsAlwaysNormalized ??? 
// Regarding Normalization for ISO-2022-CN (50227, 50229) & HZ-GB2312 (52936) I think is similar to the Japanese case.
//  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
//  Form D is precluded because of 0x00a8, which changes to space + dierises.
// 
// Note: I think that IsAlwaysNormalized should probably return true for form C for Chinese 20936 based CPs.
// 
namespace System.Text 
{
    using System.Globalization; 
    using System.Text;
    using System.Runtime.InteropServices;
    using System;
    using System.Security; 
    using System.Runtime.CompilerServices;
    using System.Runtime.Serialization; 
 

    /*=================================ISO2022Encoding============================ 
    **
    ** This is used to support ISO 2022 encodings that use shift/escape sequences.
    **
    ==============================================================================*/ 

    [Serializable()] 
    internal class ISO2022Encoding : DBCSCodePageEncoding 
    {
        const byte SHIFT_OUT            = (byte)0x0E; 
        const byte SHIFT_IN             = (byte)0x0F;
        const byte ESCAPE               = 0x1B;
        const byte LEADBYTE_HALFWIDTH   = 0x10;
 
        // We have to load the 936 code page tables, so impersonate 936 as our base
        // This pretends to be other code pages as far as memory sections are concerned. 
        internal ISO2022Encoding(int codePage) : base(codePage, tableBaseCodePages[codePage % 10]) 
        {
            this.m_bUseMlangTypeForSerialization = true; 
        }

        // Constructor called by serialization.
        // Note:  We use the base GetObjectData however 
        internal ISO2022Encoding(SerializationInfo info, StreamingContext context) : base(info, context)
        { 
            // Actually this can't ever get called, CodePageEncoding is our proxy 
            BCLDebug.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
            throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException")); 
        }

        static int[] tableBaseCodePages =
        { 
            932,    // 50220  ISO-2022-JP, No halfwidth Katakana, convert to full width
            932,    // 50221  ISO-2022-JP, Use escape sequence for half width Katakana 
            932,    // 50222  ISO-2022-JP, Use shift-in/shift-out for half width Katakana 
            0,
            0, 
            949,    // 50225  ISO-2022-KR, Korean
            936,    // 52936  HZ-GB2312, 936 might be better source
            0, //20936,    // 50227  ISO-2022-CN, Note: This is just the same as CP 936 in Everett.
            0, 
            // 50229 is currently unsupported, CP 20000 is currently not built in .nlp file
            0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_1 
            0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_2 
            0         //                     ModeASCII
        }; 

        internal enum ISO2022Modes
        {
            ModeHalfwidthKatakana   = 0, 
            ModeJIS0208             = 1,
            ModeKR                  = 5, 
            ModeHZ                  = 6, 
            ModeGB2312              = 7,
            ModeCNS11643_1          = 9, 
            ModeCNS11643_2          = 10,
            ModeASCII               = 11,

            ModeIncompleteEscape    = -1, 
            ModeInvalidEscape       = -2,
            ModeNOOP                = -3 
        } 

        protected unsafe override String GetMemorySectionName() 
        {
            int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;

            String strFormat; 

            switch (this.CodePage) 
            { 
                case 50220:
                case 50221: 
                case 50222:
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022JP";
                    break;
                case 50225: 
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022KR";
                    break; 
                case 52936: 
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_HZ";
                    break; 
                default:
                    BCLDebug.Assert(false, "[ISO2022Encoding.GetMemorySectionName] Don't expect to get here for code page " + this.CodePage);
                    strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}";
                    break; 
            }
 
            String strName = String.Format(CultureInfo.InvariantCulture, strFormat, 
                iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
                this.pCodePage->VersionRevision, this.pCodePage->VersionBuild); 

            return strName;
        }
 
        // Clean up characters for ISO2022 code pages, etc.
        // ISO2022 (50220, 50221, 50222) 
        // GB-HZ (52936) 
        protected override bool CleanUpBytes(ref int bytes)
        { 
            switch (this.CodePage)
            {
                // 932 based code pages
                case 50220: 
                case 50221:
                case 50222: 
                { 
                    if (bytes >= 0x100)
                    { 
                        // map extended char (0xfa40-0xfc4b) to a special range
                        // (ported from mlang)
                        if (bytes >= 0xfa40 && bytes <= 0xfc4b)
                        { 
                            if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
                            { 
                                if ( bytes <= 0xfa49 ) 
                                    bytes = bytes - 0x0b51 ;
                                else if ( bytes >= 0xfa4a && bytes <= 0xfa53 ) 
                                    bytes = bytes - 0x072f6 ;
                                else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
                                    bytes = bytes - 0x0b5b ;
                                else if ( bytes == 0xfa58 ) 
                                    bytes = 0x878a ;
                                else if ( bytes == 0xfa59 ) 
                                    bytes = 0x8782 ; 
                                else if ( bytes == 0xfa5a )
                                    bytes = 0x8784 ; 
                                else if ( bytes == 0xfa5b )
                                    bytes = 0x879a ;
                            }
                            else if ( bytes >= 0xfa5c && bytes <= 0xfc4b ) 
                            {
                                byte tc = unchecked((byte)bytes); 
                                if ( tc < 0x5c ) 
                                    bytes = bytes - 0x0d5f;
                                else if ( tc >= 0x80 && tc <= 0x9B ) 
                                    bytes = bytes - 0x0d1d;
                                else
                                    bytes = bytes - 0x0d1c;
                            } 
                        }
 
                        // Convert 932 code page to 20932 like code page range 
                        // (also ported from mlang)
                        byte bLead = unchecked((byte)(bytes >> 8)); 
                        byte bTrail = unchecked((byte)bytes);

                        bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
                        bLead = (byte)((bLead << 1) + 1); 
                        if (bTrail > (byte)0x9e)
                        { 
                            bTrail -= (byte)0x7e; 
                            bLead++;
                        } 
                        else
                        {
                            if (bTrail > (byte)0x7e)
                                bTrail--; 
                            bTrail -= (byte)0x1f;
                        } 
 
                        bytes = ((int)bLead) << 8 | (int)bTrail;
 
                        // Don't step out of our allocated lead byte area.
                        // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
                        // This is commented out because Everett/Mlang had illegal PUA
                        // mappings to ISO2022 code pages that we're maintaining. 
//                        if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
  //                          (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e) 
    //                        return false; 
                    }
                    else 
                    {
                        // Adjust 1/2 Katakana
                        if (bytes >= 0xa1 && bytes <= 0xdf)
                            bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80; 

                        // 0x81-0x9f and 0xe0-0xfc CP 932 
                        // 0x8e and 0xa1-0xfe      CP 20932 (we don't use 8e though) 
                        // b0-df is 1/2 Katakana
                        if (bytes >= 0x81 && 
                            (bytes <= 0x9f ||
                             (bytes >= 0xe0 && bytes <= 0xfc)))
                        {
                            // Don't do lead bytes, we use escape sequences instead. 
                            return false;
                        } 
                    } 
                    break;
                } 
                case 50225:
                {
                    // For 50225 since we don't rely on lead byte marks, return false and don't add them,
                    // esp. since we're only a 7 bit code page. 
                    if (bytes >= 0x80 && bytes <= 0xff)
                        return false; 
 
                    // Ignore characters out of range (a1-7f)
                    if (bytes >= 0x100 && 
                        ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
                         (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
                         return false;
 
                    // May as well get them into our 7 bit range
                    bytes &= 0x7f7f; 
 
                    break;
                } 
                case 52936:
                {
                    // Since we don't rely on lead byte marks for 52936, get rid of them so we
                    // don't end up with extra wierd fffe mappings. 
                    if (bytes >= 0x81 && bytes <= 0xfe)
                        return false; 
 
                    break;
                } 
            }

            return true;
        } 

        // GetByteCount 
        internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder) 
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already 
            BCLDebug.Assert(count >= 0, "[ISO2022Encoding.GetByteCount]count is negative");
            BCLDebug.Assert(chars != null, "[ISO2022Encoding.GetByteCount]chars is null");

            // Just call GetBytes with null byte* to get count 
            return GetBytes(chars, count, null, 0, baseEncoder);
        } 
 
        internal override unsafe int GetBytes(char* chars, int charCount,
                                                byte* bytes, int byteCount, EncoderNLS baseEncoder) 
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            BCLDebug.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null");
            BCLDebug.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative"); 
            BCLDebug.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative");
 
            // Assert because we shouldn't be able to have a null encoder. 
            BCLDebug.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback");
 
            // Fix our encoder
            ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder;

            // Our return value 
            int iCount = 0;
 
            switch(CodePage) 
            {
                case 50220: 
                case 50221:
                case 50222:
                    iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder );
                    break; 
                case 50225:
                    iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder ); 
                    break; 
// Everett had 50227 the same as 936
/*              case 50227: 
                    iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder );
                    break;
*/
                case 52936: 
                    iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder );
                    break; 
            } 

            return iCount; 
        }

        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) 
        {
            // Just assert, we're called internally so these should be safe, checked already 
            BCLDebug.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null"); 
            BCLDebug.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");
 
            // Just call getChars with null char* to get count
            return GetChars(bytes, count, null, 0, baseDecoder);
        }
 
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder) 
        { 
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            BCLDebug.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null"); 
            BCLDebug.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
            BCLDebug.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");

            // Fix our decoder 
            ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
            int iCount = 0; 
 
            switch (CodePage)
            { 
                case 50220:
                case 50221:
                case 50222:
                    iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder); 
                    break;
                case 50225: 
                    iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder); 
                    break;
                    // Currently 50227 is the same as 936 
//                case 50227:
  //                  iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
    //                break;
                case 52936: 
                    iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
                    break; 
                default: 
                    BCLDebug.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
                    break; 
            }

            return iCount;
        } 

        // ISO 2022 Code pages for JP. 
        //  50220 - No halfwidth Katakana, convert to full width 
        //  50221 - Use escape sequence for half width Katakana
        //  50222 - Use shift-in/shift-out for half width Katakana 
        //
        // These are the JIS code pages, superset of ISO-2022 / ISO-2022-JP-1
        //  0E          Shift Out (following bytes are Katakana)
        //  0F          Shift In  (back to "normal" behavior) 
        //  21-7E       Byte ranges (1 or 2 bytes)
        //   $ @   To Double Byte 0208 Mode (actually older code page, but subset of 0208) 
        //   $ B   To Double Byte 0208 Mode (duplicate) 
        //   $ ( D To Double Byte 0212 Mode (previously we misinterpreted this)
        //   $ I   To half width Katakana 
        //   ( J   To JIS-Roman
        //   ( H   To JIS-Roman (swedish character set)
        //   ( B   To ASCII
        //   & @   Alternate lead in to  $ B so just ignore it. 
        //
        // So in Katakana mode we add 0x8e as a lead byte and use CP 20932 to convert it 
        // In ASCII mode we just spit out the single byte. 
        // In Roman mode we should change 0x5c (\) -> Yen sign and 0x7e (~) to Overline, however
        //      we didn't in mLang, otherwise roman is like ASCII. 
        // In 0208 double byte mode we have to |= with 0x8080 and use CP 20932 to convert it.
        // In 0212 double byte mode we have to |= with 0x8000 and use CP 20932 to convert it.
        //
        // Note that JIS Shift In/Shift Out is different than the other ISO2022 encodings.  For JIS 
        // Shift out always shifts to half-width Katakana.  Chinese encodings use designator sequences
        // instead of escape sequences and shift out to the designated sequence or back in to ASCII. 
        // 
        // When decoding JIS 0208, MLang used a '*' (0x2a) character in JIS 0208 mode to map the trailing byte
        // to halfwidth katakana.  I found no description of that behavior, however that block of 0208 is 
        // undefined, so we maintain that behavior when decoding.  We will never generate characters using
        // that technique, but the decoder will process them.
        //
        private unsafe int GetBytesCP5022xJP(char* chars, int charCount, 
                                                  byte* bytes, int byteCount, ISO2022Encoder encoder)
        { 
            // prepare our helpers 
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
                this, encoder, bytes, byteCount, chars, charCount); 

            // Get our mode
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
            ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that shift in will go back to (only used by CP 50222) 

            // Check our encoder 
            if (encoder != null) 
            {
                char charLeftOver = encoder.charLeftOver; 

                currentMode = encoder.currentMode;
                shiftInMode = encoder.shiftInOutMode;
 
                // We may have a left over character from last time, try and process it.
                if (charLeftOver > 0) 
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate");
 
                    // It has to be a high surrogate, which we don't support, so it has to be a fallback
                    buffer.Fallback(charLeftOver);
                }
            } 

            while (buffer.MoreData) 
            { 
                // Get our char
                char ch = buffer.GetNextChar(); 

                // Get our bytes
                ushort iBytes = mapUnicodeToBytes[ch];
 
                StartConvert:
                // Check for halfwidth bytes 
                byte bLeadByte = (byte)(iBytes >> 8); 
                byte bTrailByte = (byte)(iBytes & 0xff);
 
                if (bLeadByte == LEADBYTE_HALFWIDTH)
                {
                    // Its Halfwidth Katakana
                    if (CodePage == 50220) 
                    {
                        // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth 
                        // See if its out of range, fallback if so, throws if recursive fallback 
                        if (bTrailByte < 0x21 || bTrailByte >= 0x21 + HalfToFullWidthKanaTable.Length)
                        { 
                            buffer.Fallback(ch);
                            continue;
                        }
 
                        // Get the full width katakana char to use.
                        iBytes = unchecked((ushort)(HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F)); 
 
                        // May have to do all sorts of fun stuff for mode, go back to start convert
                        goto StartConvert; 
                    }

                    // Can use halfwidth Katakana, make sure we're in right mode
 
                    // Make sure we're in right mode
                    if (currentMode != ISO2022Modes.ModeHalfwidthKatakana) 
                    { 
                        // 50222 or 50221, either shift in/out or escape to get to Katakana mode
                        if (CodePage == 50222) 
                        {
                            // Shift Out
                            if (!buffer.AddByte(SHIFT_OUT))
                                break;  // convert out of space, stop 

                            // Don't change modes until after AddByte in case it fails for convert 
                            // We get to shift out to Katakana, make sure we'll go back to the right mode 
                            // (This ends up always being ASCII)
                            shiftInMode = currentMode; 
                            currentMode = ISO2022Modes.ModeHalfwidthKatakana;
                        }
                        else
                        { 
                            // 50221 does halfwidth katakana by escape sequence
                            BCLDebug.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221"); 
 
                            // Add our escape sequence
                            if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I'))) 
                                break;  // convert out of space, stop

                            currentMode = ISO2022Modes.ModeHalfwidthKatakana;
                        } 
                    }
 
                    // We know we're in Katakana mode now, so add it. 
                    // Go ahead and add the Katakana byte.  Our table tail bytes are 0x80 too big.
                    if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F)))) 
                        break;  // convert out of space, stop

                    // Done with this one
                    continue; 
                }
                else if (bLeadByte != 0) 
                { 
                    //
                    //  It's a double byte character. 
                    //

                    // If we're CP 50222 we may have to shift in from Katakana mode first
                    if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                    {
                        // Shift In 
                        if (!buffer.AddByte(SHIFT_IN)) 
                            break;    // convert out of space, stop
 
                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
                        currentMode = shiftInMode;
                    }
 
                    // Make sure we're in the right mode (JIS 0208 or JIS 0212)
                    // Note: Right now we don't use JIS 0212.  Also this table'd be wrong 
 
                    // Its JIS extension 0208
                    if (currentMode != ISO2022Modes.ModeJIS0208) 
                    {
                        // Escape sequence, we can fail after this, mode will be correct for convert
                        if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B')))
                            break;  // Convert out of space, stop 

                        currentMode = ISO2022Modes.ModeJIS0208; 
                    } 

                    // Add our double bytes 
                    if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte))))
                        break; // Convert out of space, stop
                    continue;
                } 
                else if (iBytes != 0 || ch == 0)
                { 
                    // Single byte Char 
                    // If we're CP 50222 we may have to shift in from Katakana mode first
                    if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                    {
                        // Shift IN
                        if (!buffer.AddByte(SHIFT_IN))
                            break; // convert ran out of room 

                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway) 
                        currentMode = shiftInMode; 
                    }
 
                    // Its a single byte character, switch to ASCII if we have to
                    if (currentMode != ISO2022Modes.ModeASCII)
                    {
                        if (!buffer.AddByte(ESCAPE,unchecked((byte)'('), unchecked((byte)'B'))) 
                            break; // convert ran out of room
 
                        currentMode = ISO2022Modes.ModeASCII; 
                    }
 
                    // Add the ASCII char
                    if (!buffer.AddByte(bTrailByte))
                        break; // convert had no room left
                    continue; 
                }
 
                // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar) 
                buffer.Fallback(ch);
            } 

            // Switch back to ASCII if MustFlush or no encoder
            if (currentMode != ISO2022Modes.ModeASCII &&
                (encoder == null || encoder.MustFlush)) 
            {
                // If we're CP 50222 we may have to shift in from Katakana mode first 
                if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) 
                {
                    // Shift IN, only shift mode if necessary. 
                    if (buffer.AddByte(SHIFT_IN))
                        // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
                        currentMode = shiftInMode;
                    else 
                        // If not successful, convert will maintain state for next time, also
                        // AddByte will have decremented our char count, however we need it to remain the same 
                        buffer.GetNextChar(); 
                }
 
                // switch back to ASCII to finish neatly
                if (currentMode != ISO2022Modes.ModeASCII &&
                    (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana))
                { 
                    // only shift if it was successful
                    if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B'))) 
                        currentMode = ISO2022Modes.ModeASCII; 
                    else
                        // If not successful, convert will maintain state for next time, also 
                        // AddByte will have decremented our char count, however we need it to remain the same
                        buffer.GetNextChar();
                }
            } 

            // Remember our encoder state 
            if (bytes != null && encoder != null) 
            {
                // This is ASCII if we had to flush 
                encoder.currentMode = currentMode;
                encoder.shiftInOutMode = shiftInMode;

                if (!buffer.fallbackBuffer.bUsedEncoder) 
                {
                    encoder.charLeftOver = (char)0; 
                } 

                encoder.m_charsUsed = buffer.CharsUsed; 
            }

            // Return our length
            return buffer.Count; 
        }
 
        // ISO 2022 Code pages for Korean - CP 50225 
        //
        // CP 50225 has Shift In/Shift Out codes, and a single designator sequence that is supposed 
        // to appear once in the file, at the beginning of a line, before any multibyte code points.
        // So we stick the designator at the beginning of the output.
        //
        // These are the KR code page codes for ISO-2022-KR 
        //  0E          Shift Out (following bytes are double byte)
        //  0F          Shift In  (back to ASCII behavior) 
        //  21-7E       Byte ranges (1 or 2 bytes) 
        //   $)C   Double byte ISO-2022-KR designator
        // 
        // Note that this encoding is a little different than other encodings.  The $)C sequence
        // should only appear once per file.  (Actually I saw another spec/rfc that said at the beginning
        // of each line, but it shouldn't really matter.)
        // 
        // During decoding Mlang accepted ' ', '\t, and '\n' as their respective characters, even if
        // it was in double byte mode.  We maintain that behavior, although I couldn't find a reference or 
        // reason for that behavior.  We never generate data using that shortcut. 
        //
        // Also Mlang always assumed KR mode, even if the designator wasn't found yet, so we do that as 
        // well.  So basically we just ignore $)C when decoding.
        //
        private unsafe int GetBytesCP50225KR(char* chars, int charCount,
                                                    byte* bytes, int byteCount, ISO2022Encoder encoder) 
        {
            // prepare our helpers 
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( 
                this, encoder, bytes, byteCount, chars, charCount);
 
            // Get our mode
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
            ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII;     // ModeKR if already stamped lead bytes
 
            // Check our encoder
            if (encoder != null) 
            { 
                // May have leftover stuff
                char charLeftOver = encoder.charLeftOver; 
                currentMode = encoder.currentMode;
                shiftOutMode = encoder.shiftInOutMode;

                // We may have a l left over character from last time, try and process it. 
                if (charLeftOver > 0)
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate"); 

                    // It has to be a high surrogate, which we don't support, so it has to be a fallback 
                    buffer.Fallback(charLeftOver);
                }
            }
 
            while (buffer.MoreData)
            { 
                // Get our data 
                char ch = buffer.GetNextChar();
 
                // Get our bytes
                ushort iBytes = mapUnicodeToBytes[ch];

                // Check for double byte bytes 
                byte bLeadByte = (byte)(iBytes >> 8);
                byte bTrailByte = (byte)(iBytes & 0xff); 
 
                if (bLeadByte != 0)
                { 
                    //
                    //  It's a double byte character.
                    //
 
                    // If we haven't done our Korean designator, then do so, if we have any input
                    if (shiftOutMode != ISO2022Modes.ModeKR) 
                    { 
                        // Add our code page designator sequence
                        if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C'))) 
                            break; // No room during convert.

                        shiftOutMode = ISO2022Modes.ModeKR;
                    } 

                    // May have to switch to ModeKR first 
                    if (currentMode != ISO2022Modes.ModeKR) 
                    {
                        if (!buffer.AddByte(SHIFT_OUT)) 
                            break; // No convert room

                        currentMode = ISO2022Modes.ModeKR;
                    } 

                    // Add the bytes 
                    if (!buffer.AddByte(bLeadByte, bTrailByte)) 
                        break; // no convert room
                    continue; 
                }
                else if (iBytes != 0 || ch == 0)
                {
                    // Its a single byte character, switch to ASCII if we have to 
                    if (currentMode != ISO2022Modes.ModeASCII)
                    { 
                        if (!buffer.AddByte(SHIFT_IN)) 
                            break;
 
                        currentMode = ISO2022Modes.ModeASCII;
                    }

                    // Add the ASCII char 
                    if (!buffer.AddByte(bTrailByte))
                        break; 
                    continue; 
                }
 
                // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
                buffer.Fallback(ch);
            }
 
            // Switch back to ASCII if MustFlush or no encoder
            if (currentMode != ISO2022Modes.ModeASCII && 
                (encoder == null || encoder.MustFlush)) 
            {
                // Get back to ASCII to be safe.  Only do it if it success. 
                if (buffer.AddByte(SHIFT_IN))
                    currentMode = ISO2022Modes.ModeASCII;
                else
                    // If not successful, convert will maintain state for next time, also 
                    // AddByte will have decremented our char count, however we need it to remain the same
                    buffer.GetNextChar(); 
            } 

            // Remember our encoder state 
            if (bytes != null && encoder != null)
            {
                // If we didn't use the encoder, then there's no chars left over
                if (!buffer.fallbackBuffer.bUsedEncoder) 
                {
                    encoder.charLeftOver = (char)0; 
                } 

                // This is ASCII if we had to flush 
                encoder.currentMode = currentMode;

                // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't
                // get output again. 
                if (!encoder.MustFlush || encoder.charLeftOver != (char)0)
                { 
                    // We should be not flushing or converting 
                    BCLDebug.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow,
                        "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting"); 
                    encoder.shiftInOutMode = shiftOutMode;
                }
                else
                    encoder.shiftInOutMode = ISO2022Modes.ModeASCII; 

                encoder.m_charsUsed = buffer.CharsUsed; 
            } 

            // Return our length 
            return buffer.Count;
        }

        // CP52936 is HZ Encoding 
        // HZ Encoding has 4 shift sequences:
        // ~~       '~' (\u7e) 
        // ~}       shift into 1 byte mode, 
        // ~{       shift into 2 byte GB 2312-80
        // ~    Maintain 2 byte mode across new lines (ignore both ~ and  characters) 
        //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
        //
        // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
        // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e 
        // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
        // 
        // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set. 
        // (all bytes <= 0x7f)
        private unsafe int GetBytesCP52936(char* chars, int charCount, 
                                           byte* bytes, int byteCount, ISO2022Encoder encoder)
        {
            // prepare our helpers
            Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( 
                this, encoder, bytes, byteCount, chars, charCount);
 
            // Mode 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
 
            // Check our encoder
            if (encoder != null)
            {
                char charLeftOver = encoder.charLeftOver; 
                currentMode = encoder.currentMode;
 
                // We may have a left over character from last time, try and process it. 
                if (charLeftOver > 0)
                { 
                    BCLDebug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate");

                    // It has to be a high surrogate, which we don't support, so it has to be a fallback
                    buffer.Fallback(charLeftOver); 
                }
            } 
 
            while (buffer.MoreData)
            { 
                // Get our char
                char ch = buffer.GetNextChar();

                // Get our bytes 
                ushort sChar = mapUnicodeToBytes[ch];
                if (sChar == 0 && ch != 0) 
                { 
                    // Wasn't a legal byte sequence, its a surrogate or fallback
                    // Throws if recursive (knows because we called InternalGetNextChar) 
                    buffer.Fallback(ch);

                    // Done with our char, now process fallback
                    continue; 
                }
 
                // Check for halfwidth bytes 
                byte bLeadByte = (byte)(sChar >> 8);
                byte bTrailByte = (byte)(sChar & 0xff); 

                // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range
                // (including the 0x8080 that our codepage or's to the value)
                if ((bLeadByte != 0 && 
                     (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) ||
                    (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff)) 
                { 
                    // Illegal character, in 936 code page, but not in HZ subset, get fallback for it
                    buffer.Fallback(ch); 
                    continue;
                }

                // sChar is now either ASCII or has an 0x8080 mask 
                if (bLeadByte != 0)
                { 
                    // Its a double byte mode 
                    if (currentMode != ISO2022Modes.ModeHZ)
                    { 
                        // Need to add the double byte mode marker
                        if (!buffer.AddByte((byte)'~', (byte)'{', 2))
                            break;                                      // Stop if no buffer space in convert
 
                        currentMode = ISO2022Modes.ModeHZ;
                    } 
 
                    // Go ahead and add the 2 bytes
                    if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f)))) 
                        break;                                      // Stop if no buffer space in convert
                }
                else
                { 
                    // Its supposed to be ASCII
                    if (currentMode != ISO2022Modes.ModeASCII) 
                    { 
                        // Need to add the ASCII mode marker
                        // Will have 1 more byte (or 2 if ~) 
                        if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2:1))
                            break;

                        currentMode = ISO2022Modes.ModeASCII; 
                    }
 
                    // If its a '~' we'll need an extra one 
                    if (bTrailByte == '~')
                    { 
                        // Need to add the extra ~
                        if (!buffer.AddByte((byte)'~', 1))
                            break;
                    } 

                    // Need to add the character 
                    if (!buffer.AddByte(bTrailByte)) 
                        break;
                } 
            }

            // Add ASCII shift out if we're at end of decoder
            if (currentMode != ISO2022Modes.ModeASCII && 
                (encoder == null || encoder.MustFlush))
            { 
                // Need to add the ASCII mode marker 
                // Only turn off other mode if this works
                if (buffer.AddByte((byte)'~',(byte)'}')) 
                    currentMode = ISO2022Modes.ModeASCII;
                else
                    // If not successful, convert will maintain state for next time, also
                    // AddByte will have decremented our char count, however we need it to remain the same 
                    buffer.GetNextChar();
            } 
 
            // Need to remember our mode
            if (encoder != null && bytes != null) 
            {
                // This is ASCII if we had to flush
                encoder.currentMode = currentMode;
 
                if (!buffer.fallbackBuffer.bUsedEncoder)
                { 
                    encoder.charLeftOver = (char)0; 
                }
 
                encoder.m_charsUsed = buffer.CharsUsed;
            }

            // Return our length 
            return buffer.Count;
        } 
 
        private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount,
                                                  char* chars, int charCount, ISO2022Decoder decoder) 
        {
            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount); 

            // No mode information yet 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode 
            ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that we'll shift in to
            byte[] escapeBytes = new byte[4]; 
            int escapeCount = 0;

            if (decoder != null)
            { 
                currentMode = decoder.currentMode;
                shiftInMode = decoder.shiftInOutMode; 
 
                // See if we have leftover decoder buffer to use
                // Load our bytesLeftOver 
                escapeCount = decoder.bytesLeftOverCount;

                // Don't want to mess up decoder if we're counting or throw an exception
                for (int i = 0; i < escapeCount; i++) 
                    escapeBytes[i] = decoder.bytesLeftOver[i];
            } 
 
            // Do this until the end
            while (buffer.MoreData || escapeCount > 0) 
            {
                byte ch;

                if (escapeCount > 0) 
                {
                    // Get more escape sequences if necessary 
                    if (escapeBytes[0] == ESCAPE) 
                    {
                        // Stop if no more input 
                        if (!buffer.MoreData)
                        {
                            if (decoder != null && !decoder.MustFlush)
                                break; 
                        }
                        else 
                        { 
                            // Add it to the sequence we can check
                            escapeBytes[escapeCount++] = buffer.GetNextByte(); 

                            // We have an escape sequence
                            ISO2022Modes modeReturn =
                                CheckEscapeSequenceJP(escapeBytes, escapeCount); 

                            if (modeReturn != ISO2022Modes.ModeInvalidEscape) 
                            { 
                                if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
                                { 
                                    // Processed escape correctly
                                    escapeCount = 0;

                                    // We're now this mode 
                                    currentMode = shiftInMode = modeReturn;
                                } 
 
                                // Either way, continue to get next escape or real byte
                                continue; 
                            }
                        }

                        // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. 
                    }
 
                    // Read next escape byte and move them down one. 
                    ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
                } 
                else
                {
                    // Get our next byte
                    ch = buffer.GetNextByte(); 

                    if (ch == ESCAPE) 
                    { 
                        // We'll have an escape sequence, use it if we don't have one buffered already
                        if (escapeCount == 0) 
                        {
                            // Start this new escape sequence
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                            continue;
                        } 
 
                        // Flush the previous escape sequence, then reuse this escape byte
                        buffer.AdjustBytes(-1); 
                    }
                }

                if (ch == SHIFT_OUT) 
                {
                   shiftInMode = currentMode; 
                   currentMode = ISO2022Modes.ModeHalfwidthKatakana; 
                   continue;
                } 
                else if (ch == SHIFT_IN)
                {
                   currentMode = shiftInMode;
                   continue; 
                }
 
                // Get our full character 
                ushort iBytes = ch;
                bool b2Bytes = false; 

                if (currentMode == ISO2022Modes.ModeJIS0208)
                {
                    // 
                    //  To handle errors, we need to check:
                    //    1. if trailbyte is there 
                    //    2. if code is valid 
                    //
                    if (escapeCount > 0) 
                    {
                        // Let another escape fall through
                        if (escapeBytes[0] != ESCAPE)
                        { 
                            // Move them down one & get the next data
                            iBytes <<= 8; 
                            iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); 
                            b2Bytes = true;
                        } 
                    }
                    else if (buffer.MoreData)
                    {
                        iBytes <<= 8; 
                        iBytes |= buffer.GetNextByte();
                        b2Bytes = true; 
                    } 
                    else
                    { 
                        // Not enough input, use decoder if possible
                        if (decoder == null || decoder.MustFlush)
                        {
                            // No decoder, do fallback for this byte 
                            buffer.Fallback(ch);
                            break; 
                        } 

                        // Stick it in the decoder if we're not counting 
                        if (chars != null)
                        {
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                        }
                        break; 
                    } 

                    // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana 
                    // escape, so use 0x8e00 as katakana lead byte and keep same trail byte.
                    // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have
                    // any wierd compatibility issues.
                    if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00)) 
                    {
                        iBytes = (ushort)(iBytes & 0xff); 
                        iBytes |= (LEADBYTE_HALFWIDTH << 8);   // Put us in the halfwidth katakana range 
                    }
                } 
                else if (iBytes >= 0xA1 && iBytes <= 0xDF)
                {
                    // Everett accidentally mapped Katakana like shift-jis (932),
                    // even though this is a 7 bit code page.  We keep that mapping 
                    iBytes |= (LEADBYTE_HALFWIDTH << 8);    // Map to halfwidth katakana range
                    iBytes &= 0xff7f;                       // remove extra 0x80 
                } 
                else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana )
                { 
                    // Add 0x10 lead byte that our encoding expects for Katakana:
                    iBytes |= (LEADBYTE_HALFWIDTH << 8);
                }
 
                // We have an iBytes to try to convert.
                char c = mapBytesToUnicode[iBytes]; 
 
                // See if it was unknown
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) 
                {
                    // Have to do fallback
                    if (b2Bytes)
                    { 
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
                            break; 
                    } 
                    else
                    { 
                        if (!buffer.Fallback(ch))
                            break;
                    }
                } 
                else
                { 
                    // If we were JIS 0208, then we consumed an extra byte 
                    if (!buffer.AddChar(c, b2Bytes ? 2:1))
                        break; 
                }
            }

            // Make sure our decoder state matches our mode, if not counting 
            if (chars != null && decoder != null)
            { 
                // Remember it if we don't flush 
                if (!decoder.MustFlush || escapeCount != 0)
                { 
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
                        "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing");
 
                    decoder.currentMode = currentMode;
                    decoder.shiftInOutMode = shiftInMode; 
 
                    // Remember escape buffer
                    decoder.bytesLeftOverCount = escapeCount; 
                    decoder.bytesLeftOver = escapeBytes;
                }
                else
                { 
                    // We flush, clear buffer
                    decoder.currentMode = ISO2022Modes.ModeASCII; 
                    decoder.shiftInOutMode = ISO2022Modes.ModeASCII; 
                    decoder.bytesLeftOverCount = 0;
                    // Slightly different if counting/not counting 
                }

                decoder.m_bytesUsed = buffer.BytesUsed;
            } 

            // Return # of characters we found 
            return buffer.Count; 
        }
 
        // We know we have an escape sequence, so check it starting with the byte after the escape
        private ISO2022Modes CheckEscapeSequenceJP( byte[] bytes, int escapeCount )
        {
            // Have an escape sequence 
            if (bytes[0] != ESCAPE)
                return ISO2022Modes.ModeInvalidEscape; 
 
            if (escapeCount < 3)
                return ISO2022Modes.ModeIncompleteEscape; 

            if (bytes[1] == '(')
            {
                if (bytes[2] == 'B')       // (B 
                {
                    return ISO2022Modes.ModeASCII; 
                } 
                else if (bytes[2] == 'H')  // (H
                { 
                    // Actually this is supposed to be Swedish
                    // We treat it like ASCII though.
                    return ISO2022Modes.ModeASCII;
                } 
                else if (bytes[2] == 'J')  // (J
                { 
                    // Actually this is supposed to be Roman 
                    // 2 characters are different, but historically we treat it as ascii
                    return ISO2022Modes.ModeASCII; 
                }
                else if (bytes[2] == 'I')  // (I
                {
                    return ISO2022Modes.ModeHalfwidthKatakana; 
                }
            } 
            else if (bytes[1] == '$') 
            {
                if (bytes[2] == '@' ||   // $@ 
                    bytes[2] == 'B')     // $B
                {
                    return ISO2022Modes.ModeJIS0208;
                } 
                else
                { 
                    // Looking for $(D 
                    if (escapeCount < 4)
                        return ISO2022Modes.ModeIncompleteEscape; 

                    if (bytes[2] == '(' && bytes[3] == 'D') // $(D
                    {
                        // Mlang treated 0208 like 0212 even though that's wrong 
                        return ISO2022Modes.ModeJIS0208;
                    } 
                } 
            }
            else if (bytes[1] == '&') 
            {
                if (bytes[2] == '@')            // &@
                {
                    // Ignore ESC & @ (prefix to $B) 
                    return ISO2022Modes.ModeNOOP;
                } 
            } 

            // If we get here we fell through and have an invalid/unknown escape sequence 
            return ISO2022Modes.ModeInvalidEscape;
        }

        private byte DecrementEscapeBytes(ref byte[] bytes, ref int count) 
        {
            BCLDebug.Assert(count > 0, "[ISO2022Encoding.DecrementEscapeBytes]count > 0"); 
 
            // Decrement our count
            count--; 

            // Remember the first one
            byte returnValue = bytes[0];
 
            // Move them down one.
            for (int i = 0; i < count; i++) 
            { 
                bytes[i] = bytes[i+1];
            } 

            // Clear out the last byte
            bytes[count] = 0;
 
            // Return the old 1st byte
            return returnValue; 
        } 

        // Note that in DBCS mode mlang passed through ' ', '\t' and '\n' as SBCS characters 
        // probably to allow mailer formatting without too much extra work.
        private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount,
                                                   char* chars, int charCount, ISO2022Decoder decoder)
        { 
            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( 
                this, decoder, chars, charCount, bytes, byteCount); 

            // No mode information yet 
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode

            byte[] escapeBytes = new byte[4];
            int escapeCount = 0; 

            if (decoder != null) 
            { 
                currentMode = decoder.currentMode;
 
                // See if we have leftover decoder buffer to use
                // Load our bytesLeftOver
                escapeCount = decoder.bytesLeftOverCount;
 
                // Don't want to mess up decoder if we're counting or throw an exception
                for (int i = 0; i < escapeCount; i++) 
                    escapeBytes[i] = decoder.bytesLeftOver[i]; 
            }
 
            // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
            while (buffer.MoreData || escapeCount > 0)
            {
                byte ch; 

                if (escapeCount > 0) 
                { 
                    // Get more escape sequences if necessary
                    if (escapeBytes[0] == ESCAPE) 
                    {
                        // Stop if no more input
                        if (!buffer.MoreData)
                        { 
                            if (decoder != null && !decoder.MustFlush)
                                break; 
                        } 
                        else
                        { 
                            // Add it to the sequence we can check
                            escapeBytes[escapeCount++] = buffer.GetNextByte();

                            // We have an escape sequence 
                            ISO2022Modes modeReturn =
                                CheckEscapeSequenceKR(escapeBytes, escapeCount); 
 
                            if (modeReturn != ISO2022Modes.ModeInvalidEscape)
                            { 
                                if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
                                {
                                    // Processed escape correctly, no effect (we know about KR mode)
                                    escapeCount = 0; 
                                }
 
                                // Either way, continue to get next escape or real byte 
                                continue;
                            } 
                        }

                        // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
                    } 

                    // Still have something left over in escape buffer 
                    // Get it and move them down one 
                    ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
                } 
                else
                {
                    // Get our next byte
                    ch = buffer.GetNextByte(); 

                    if (ch == ESCAPE) 
                    { 
                        // We'll have an escape sequence, use it if we don't have one buffered already
                        if (escapeCount == 0) 
                        {
                            // Start this new escape sequence
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                            continue;
                        } 
 
                        // Flush previous escape sequence, then reuse this escape byte
                        buffer.AdjustBytes(-1); 
                    }
                }

                if (ch == SHIFT_OUT) 
                {
                   currentMode = ISO2022Modes.ModeKR; 
                   continue; 
                }
                else if (ch == SHIFT_IN) 
                {
                   currentMode = ISO2022Modes.ModeASCII;
                   continue;
                } 

                // Get our full character 
                ushort iBytes = ch; 
                bool b2Bytes = false;
 
                // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC.
                if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n')
                {
                    // 
                    //  To handle errors, we need to check:
                    //    1. if trailbyte is there 
                    //    2. if code is valid 
                    //
                    if (escapeCount > 0) 
                    {
                        // Let another escape fall through
                        if (escapeBytes[0] != ESCAPE)
                        { 
                            // Move them down one & get the next data
                            iBytes <<= 8; 
                            iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); 
                            b2Bytes = true;
                        } 
                    }
                    else if (buffer.MoreData)
                    {
                        iBytes <<= 8; 
                        iBytes |= buffer.GetNextByte();
                        b2Bytes = true; 
                    } 
                    else
                    { 
                        // Not enough input, use decoder if possible
                        if (decoder == null || decoder.MustFlush)
                        {
                            // No decoder, do fallback for lonely 1st byte 
                            buffer.Fallback(ch);
                            break; 
                        } 

                        // Stick it in the decoder if we're not counting 
                        if (chars != null)
                        {
                            escapeBytes[0] = ch;
                            escapeCount = 1; 
                        }
                        break; 
                    } 
                }
 
                // We have a iBytes to try to convert.
                char c = mapBytesToUnicode[iBytes];

                // See if it was unknown 
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
                { 
                    // Have to do fallback 
                    if (b2Bytes)
                    { 
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
                            break;
                    }
                    else 
                    {
                        if (!buffer.Fallback(ch)) 
                            break; 
                    }
                } 
                else
                {
                    if (!buffer.AddChar(c, b2Bytes ? 2:1))
                        break; 
                }
            } 
 
            // Make sure our decoder state matches our mode, if not counting
            if (chars != null && decoder != null) 
            {
                // Remember it if we don't flush
                if (!decoder.MustFlush || escapeCount != 0)
                { 
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, 
                        "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing"); 

                    decoder.currentMode = currentMode; 

                    // Remember escape buffer
                    decoder.bytesLeftOverCount = escapeCount;
                    decoder.bytesLeftOver = escapeBytes; 
                }
                else 
                { 
                    // We flush, clear buffer
                    decoder.currentMode = ISO2022Modes.ModeASCII; 
                    decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
                    decoder.bytesLeftOverCount = 0;
                }
 
                decoder.m_bytesUsed = buffer.BytesUsed;
            } 
 
            // Return # of characters we found
            return buffer.Count; 
        }

        // We know we have an escape sequence, so check it starting with the byte after the escape
        private ISO2022Modes CheckEscapeSequenceKR( byte[] bytes, int escapeCount ) 
        {
            // Have an escape sequence 
            if (bytes[0] != ESCAPE) 
                return ISO2022Modes.ModeInvalidEscape;
 
            if (escapeCount < 4)
                return ISO2022Modes.ModeIncompleteEscape;

            if (bytes[1] == '$' && bytes[2] == ')' && bytes[3] == 'C') // $)C 
                return ISO2022Modes.ModeKR;
 
            // If we get here we fell through and have an invalid/unknown escape sequence 
            return ISO2022Modes.ModeInvalidEscape;
        } 

        // CP52936 is HZ Encoding
        // HZ Encoding has 4 shift sequences:
        // ~~       '~' (\u7e) 
        // ~}       shift into 1 byte mode,
        // ~{       shift into 2 byte GB 2312-80 
        // ~    Maintain 2 byte mode across new lines (ignore both ~ and  characters) 
        //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
        // 
        // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
        // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
        // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
        // 
        // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
        // (all bytes <= 0x7f) 
        private unsafe int GetCharsCP52936(byte* bytes, int byteCount, 
                                                char* chars, int charCount, ISO2022Decoder decoder)
        { 
            BCLDebug.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
            BCLDebug.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");

            // Get our info. 
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount); 
 
            // No mode information yet
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII; 
            int byteLeftOver = -1;
            bool bUsedDecoder = false;

            if (decoder != null) 
            {
                currentMode = decoder.currentMode; 
                // See if we have leftover decoder buffer to use 
                // Don't want to mess up decoder if we're counting or throw an exception
                if (decoder.bytesLeftOverCount != 0 ) 
                {
                    // Load our bytesLeftOver
                    byteLeftOver = decoder.bytesLeftOver[0];
                } 
            }
 
            // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. 
            while (buffer.MoreData || byteLeftOver >= 0)
            { 
                byte ch;

                // May have a left over byte
                if (byteLeftOver >= 0) 
                {
                    ch = (byte)byteLeftOver; 
                    byteLeftOver = -1; 
                }
                else 
                {
                    ch = buffer.GetNextByte();
                }
 
                // We're in escape mode
                if (ch == '~') 
                { 
                    // Next char is type of switch
                    if (!buffer.MoreData) 
                    {
                        // We don't have anything left, it'll be in decoder or a ?
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush) 
                        {
                            // We'll be a '?' 
                            buffer.Fallback(ch); 
                            // break if we fail & break if we don't (because !MoreData)
                            // Add succeeded, continue 
                            break;
                        }

                        // Stick it in decoder 
                        if (decoder != null)
                            decoder.ClearMustFlush(); 
 
                        if (chars != null)
                        { 
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = (byte)'~';
                            bUsedDecoder = true;
                        } 
                        break;
                    } 
 
                    // What type is it?, get 2nd byte
                    ch = buffer.GetNextByte(); 

                    if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
                    {
                        // Its just a ~~ replacement for ~, add it 
                        if (!buffer.AddChar((char)ch, 2))
                            // Add failed, break for converting 
                            break; 

                        // Add succeeded, continue 
                        continue;
                    }
                    else if (ch == '{')
                    { 
                        // Switching to Double Byte mode
                        currentMode = ISO2022Modes.ModeHZ; 
                        continue; 
                    }
                    else if (ch == '}') 
                    {
                        // Switching to ASCII mode
                        currentMode = ISO2022Modes.ModeASCII;
                        continue; 
                    }
                    else if (ch == '\n') 
                    { 
                        // Ignore ~\n sequence
                        continue; 
                    }
                    else
                    {
                        // Unknown escape, back up and try the '~' as a "normal" byte or lead byte 
                        buffer.AdjustBytes(-1);
                        ch = (byte)'~'; 
                    } 
                }
 
                // go ahead and add our data
                if (currentMode != ISO2022Modes.ModeASCII)
                {
                    // Should be ModeHZ 
                    BCLDebug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
                    char cm; 
 
                    // Everett allowed characters < 0x20 to be passed as if they were ASCII
                    if (ch < 0x20) 
                    {
                        // Emit it as ASCII
                        goto STOREASCII;
                    } 

                    // Its multibyte, should have another byte 
                    if (!buffer.MoreData) 
                    {
                        // No bytes left 
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush)
                        {
                            // Not enough bytes, fallback lead byte 
                            buffer.Fallback(ch);
 
                            // Break if we fail & break because !MoreData 
                            break;
                        } 

                        if (decoder != null)
                            decoder.ClearMustFlush();
 
                        // Stick it in decoder
                        if (chars != null) 
                        { 
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = ch; 
                            bUsedDecoder = true;
                        }
                        break;
                    } 

                    // Everett uses space as an escape character for single SBCS bytes 
                    byte ch2 = buffer.GetNextByte(); 
                    ushort iBytes = (ushort)(ch << 8 | ch2);
 
                    if (ch == ' ' && ch2 != 0)
                    {
                        // Get next char and treat it like ASCII (Everett treated space like an escape
                        // allowing the next char to be just ascii) 
                        cm = (char)ch2;
                        goto STOREMULTIBYTE; 
                    } 

                    // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e 
                    if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
                    // Everett allowed high bit mappings for same characters (but only if both bits set)
                        (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
                    { 
                        // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
                        if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d) 
                        { 
                            iBytes = 0x2121;
                            goto MULTIBYTE; 
                        }

                        // Illegal char, use fallback.  If lead byte is 0 have to do it special and do it first
                        if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) 
                            break;
                        continue; 
                    } 

                    MULTIBYTE: 
                    iBytes |= 0x8080;
                    // Look up the multibyte char to stick it in our data

                    // We have a iBytes to try to convert. 
                    cm = mapBytesToUnicode[iBytes];
 
                    STOREMULTIBYTE: 

                    // See if it was unknown 
                    if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
                    {
                        // Fall back the unknown stuff
                        if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) 
                            break;
                        continue; 
                    } 

                    if (!buffer.AddChar(cm, 2)) 
                        break;              // convert ran out of buffer, stop
                    continue;
                }
 
                // Just ASCII
                // We allow some chars > 7f because everett did, so we have to look them up. 
                STOREASCII: 
                char c = mapBytesToUnicode[ch];
 
                // Check if it was unknown
                if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
                {
                    // fallback the unkown bytes 
                    if (!buffer.Fallback((byte)ch))
                        break; 
                    continue; 
                }
 
                // Go ahead and add our ASCII character
                if (!buffer.AddChar(c))
                    break;                  // convert ran out of buffer, stop
            } 

            // Need to remember our state, IF we're not counting 
            if (chars != null && decoder != null) 
            {
                if (!bUsedDecoder) 
                {
                    // If we didn't use it, clear the byte left over
                    decoder.bytesLeftOverCount = 0;
                } 

                if (decoder.MustFlush && decoder.bytesLeftOverCount == 0) 
                { 
                    decoder.currentMode = ISO2022Modes.ModeASCII;
                } 
                else
                {
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, 
                        "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
 
                    decoder.currentMode = currentMode; 
                }
                decoder.m_bytesUsed = buffer.BytesUsed; 
            }

            // Return # of characters we found
            return buffer.Count; 
        }
 
        // Note: These all end up with 1/2 bytes of average byte count, so unless we're 1 we're always 
        // charCount/2 bytes too big.
        public override int GetMaxByteCount(int charCount) 
        {
            if (charCount < 0)
               throw new ArgumentOutOfRangeException("charCount",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 

            // Characters would be # of characters + 1 in case high surrogate is ? * max fallback 
            long byteCount = (long)charCount + 1; 

            if (EncoderFallback.MaxCharCount > 1) 
                byteCount *= EncoderFallback.MaxCharCount;

            // Start with just generic DBCS values (sort of).
            int perChar = 2; 
            int extraStart = 0;
            int extraEnd = 0; 
 
            switch (CodePage)
            { 
                case 50220:
                case 50221:
                    // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
                    perChar = 5;        // 5 max (4.5 average) 
                    extraEnd = 3;       // 3 bytes to shift back to ASCII
                    break; 
                case 50222: 
                    // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
                    perChar = 5;        // 5 max (4.5 average) 
                    extraEnd = 4;       // 1 byte to shift from Katakana -> DBCS, 3 bytes to shift back to ASCII from DBCS
                    break;
                case 50225:
                    // 2 bytes per char + 1 byte SO, or 1 byte per char + 1 byte SI. 
                    perChar = 3;        // 3 max, (2.5 average)
                    extraStart = 4;     // EUC-KR marker appears at beginning of file. 
                    extraEnd = 1;       // 1 byte to shift back to ascii if necessary. 
                    break;
                case 52936: 
                    // 2 bytes per char + 2 byte shift, or 1 byte + 1 byte shift
                    // Worst case: left over surrogate with no low surrogate is extra ?, could have to switch to ASCII, then could have HZ and flush to ASCII mode
                    perChar = 4;        // 4 max, (3.5 average if every other char is HZ/ASCII)
                    extraEnd = 2;       // 2 if we have to shift back to ASCII 
                    break;
            } 
 
            // Return our surrogate and End plus perChar for each char.
            byteCount *= perChar; 
            byteCount += extraStart + extraEnd;

            if (byteCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); 

            return (int)byteCount; 
        } 

        public override int GetMaxCharCount(int byteCount) 
        {
            if (byteCount < 0)
               throw new ArgumentOutOfRangeException("byteCount",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 

            int perChar = 1; 
            int extraDecoder = 1; 

            switch (CodePage) 
            {
                case 50220:
                case 50221:
                case 50222: 
                case 50225:
                    perChar = 1;        // Worst case all ASCII 
                    extraDecoder = 3;   // Could have left over 3 chars of 4 char escape sequence, that all become ? 
                    break;
                case 52936: 
                    perChar = 1;        // Worst case all ASCII
                    extraDecoder = 1;   // sequences are 2 chars, so if next one is illegal, then previous 1 could be ?
                    break;
            } 

            // Figure out our length, perchar * char + whatever extra our decoder could do to us. 
            long charCount = ((long)byteCount * perChar) + extraDecoder; 

            // Just in case we have to fall back unknown ones. 
            if (DecoderFallback.MaxCharCount > 1)
                charCount *= DecoderFallback.MaxCharCount;

            if (charCount > 0x7fffffff) 
                throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
 
            return (int)charCount; 
        }
 
        public override Encoder GetEncoder()
        {
            return new ISO2022Encoder(this);
        } 

        public override Decoder GetDecoder() 
        { 
            return new ISO2022Decoder(this);
        } 

        [Serializable]
        internal class ISO2022Encoder : System.Text.EncoderNLS
        { 
            internal ISO2022Modes currentMode;
            internal ISO2022Modes shiftInOutMode; 
 
            internal ISO2022Encoder(EncodingNLS encoding) : base(encoding)
            { 
                // base calls reset
            }

            public override void Reset() 
            {
                // Reset 
                currentMode = ISO2022Modes.ModeASCII; 
                shiftInOutMode = ISO2022Modes.ModeASCII;
                charLeftOver = (char)0; 
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset();
            }
 
            // Anything left in our encoder?
            internal override bool HasState 
            { 
                get
                { 
                    // Don't check shift-out mode, it may be ascii (JP) or not (KR)
                    return (this.charLeftOver != (char)0 ||
                            currentMode != ISO2022Modes.ModeASCII);
                } 
            }
        } 
 
        [Serializable]
        internal class ISO2022Decoder : System.Text.DecoderNLS 
        {
            internal byte[] bytesLeftOver;
            internal int bytesLeftOverCount;
            internal ISO2022Modes currentMode; 
            internal ISO2022Modes shiftInOutMode;
 
            internal ISO2022Decoder(EncodingNLS encoding) : base(encoding) 
            {
                // base calls reset 
            }

            public override void Reset()
            { 
                // Reset
                bytesLeftOverCount = 0; 
                bytesLeftOver = new byte[4]; 
                currentMode = ISO2022Modes.ModeASCII;
                shiftInOutMode = ISO2022Modes.ModeASCII; 
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset();
            }
 
            // Anything left in our decoder?
            internal override bool HasState 
            { 
                get
                { 
                    // If have bytes left over or not shifted back to ASCII then have problem
                    return (this.bytesLeftOverCount != 0 ||
                            currentMode != ISO2022Modes.ModeASCII);
                } 
            }
        } 
 
        static ushort[] HalfToFullWidthKanaTable =
        { 
            0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
            0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
            0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
            0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma 
            0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
            0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo 
            0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A 
            0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
            0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U 
            0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
            0xa5a9, // 0x8eab : Halfwidth Katakana Small O
            0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
            0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu 
            0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
            0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu 
            0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark 
            0xa5a2, // 0x8eb1 : Halfwidth Katakana A
            0xa5a4, // 0x8eb2 : Halfwidth Katakana I 
            0xa5a6, // 0x8eb3 : Halfwidth Katakana U
            0xa5a8, // 0x8eb4 : Halfwidth Katakana E
            0xa5aa, // 0x8eb5 : Halfwidth Katakana O
            0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka 
            0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
            0xa5af, // 0x8eb8 : Halfwidth Katakana Ku 
            0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke 
            0xa5b3, // 0x8eba : Halfwidth Katakana Ko
            0xa5b5, // 0x8ebb : Halfwidth Katakana Sa 
            0xa5b7, // 0x8ebc : Halfwidth Katakana Si
            0xa5b9, // 0x8ebd : Halfwidth Katakana Su
            0xa5bb, // 0x8ebe : Halfwidth Katakana Se
            0xa5bd, // 0x8ebf : Halfwidth Katakana So 
            0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
            0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti 
            0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu 
            0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
            0xa5c8, // 0x8ec4 : Halfwidth Katakana To 
            0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
            0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
            0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
            0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne 
            0xa5ce, // 0x8ec9 : Halfwidth Katakana No
            0xa5cf, // 0x8eca : Halfwidth Katakana Ha 
            0xa5d2, // 0x8ecb : Halfwidth Katakana Hi 
            0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
            0xa5d8, // 0x8ecd : Halfwidth Katakana He 
            0xa5db, // 0x8ece : Halfwidth Katakana Ho
            0xa5de, // 0x8ecf : Halfwidth Katakana Ma
            0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
            0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu 
            0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
            0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo 
            0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya 
            0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
            0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo 
            0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
            0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
            0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
            0xa5ec, // 0x8eda : Halfwidth Katakana Re 
            0xa5ed, // 0x8edb : Halfwidth Katakana Ro
            0xa5ef, // 0x8edc : Halfwidth Katakana Wa 
            0xa5f3, // 0x8edd : Halfwidth Katakana N 
            0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
            0xa1ac  // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark 
        };
    }
}
 

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
// Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
ISO2022Encoding.cs source code in C# .NET

Source code for the .NET framework in C#

Code:

Link Menu