Code:
/ Dotnetfx_Vista_SP2 / Dotnetfx_Vista_SP2 / 8.0.50727.4016 / WIN_WINDOWS / lh_tools_devdiv_wpf / Windows / wcp / Speech / Src / Internal / SrgsCompiler / backend.cs / 1 / backend.cs
//------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // // // Description: // CFG Grammar backend // // History: // 5/1/2004 jeanfp Created from the Sapi Managed code //----------------------------------------------------------------- using System; using System.Collections; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Diagnostics; using System.Globalization; using System.Runtime.InteropServices; using System.Text; using System.IO; using System.Speech.Internal.SrgsParser; namespace System.Speech.Internal.SrgsCompiler { ////// Summary description for GrammarBackEnd. /// internal sealed partial class Backend { //******************************************************************* // // Constructors // //******************************************************************* #region Constructors internal Backend () { _words = new StringBlob (); _symbols = new StringBlob (); } internal Backend (StreamMarshaler streamHelper) { InitFromBinaryGrammar (streamHelper); } #endregion //******************************************************************** // // Internal Methods // //******************************************************************* #region Internal Methods ////// Optimizes the grammar network by removing the epsilon states and merging /// duplicate transitions. See GrammarOptimization.doc for details. /// internal void Optimize () { //DumpGrammarStatistics ("GrammarOptimization: Pre optimize"); _states.Optimize (); //DumpGrammarStatistics ("GrammarOptimization: Post optimize"); // Most likely, there will be an arc with a weight != 1. So we need a weight table. _fNeedWeightTable = true; } ////// Performs consistency checks of the grammar structure, creates the /// serialized format and either saves it to the stream provided by SetSaveOptions, /// or reloads it into the CFG engine. /// /// internal void Commit (StreamMarshaler streamBuffer) { // For debugging purpose, assert if the position is not it is assumed it should be // Keep the start position in the stream long startStreamPostion = streamBuffer.Stream.Position; // put all states State into a sorted array by rule parent index and serialized index ListsortedStates = new List (_states); // Release the memory for the original list of states _states = null; sortedStates.Sort (); // Validate the grammar ValidateAndTagRules (); CheckLeftRecursion (sortedStates); // Include null terminator int cBasePath = _basePath != null ? _basePath.Length + 1 : 0; float [] pWeights; int cArcs; // Add the top level semantic interpreatation tag // This should be set as the first symbol in the symbol string blog since it must hold on a 16 bits value. int semanticInterpretationGlobals = 0; if (_globalTags.Count > 0) { StringBuilder sb = new StringBuilder (); foreach (string s in _globalTags) { sb.Append (s); } _symbols.Add (sb.ToString (), out semanticInterpretationGlobals); semanticInterpretationGlobals = _symbols.OffsetFromId (semanticInterpretationGlobals); if (semanticInterpretationGlobals > UInt16.MaxValue) { throw new OverflowException (SR.Get (SRID.TooManyRulesWithSemanticsGlobals)); } } #if !NO_STG // Write the method names as symbols foreach (ScriptRef script in _scriptRefs) { _symbols.Add (script._sMethod, out script._idSymbol); } #endif // get the header CfgGrammar.CfgSerializedHeader header = BuildHeader (sortedStates, cBasePath, unchecked ((UInt16) semanticInterpretationGlobals), out cArcs, out pWeights); streamBuffer.WriteStream (header); // // For the string blobs, we must explicitly report I/O error since the blobs don't // use the error log facility. // System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pszWords); streamBuffer.WriteArrayChar (_words.SerializeData (), _words.SerializeSize ()); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pszSymbols); streamBuffer.WriteArrayChar (_symbols.SerializeData (), _symbols.SerializeSize ()); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pRules); foreach (Rule rule in _rules) { rule.Serialize (streamBuffer); } if (cBasePath > 0) { streamBuffer.WriteArrayChar (_basePath.ToCharArray (), _basePath.Length); // Add a zero to be compatible with SAPI 5 System.Diagnostics.Debug.Assert (_basePath.Length + 1 == cBasePath); streamBuffer.WriteArrayChar (_achZero, 1); // Zero-pad to align following structures streamBuffer.WriteArray (_abZero3, cBasePath * Helpers._sizeOfChar & 3); } // // Write a dummy 0 index state entry // CfgArc dummyArc = new CfgArc (); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pArcs); streamBuffer.WriteStream (dummyArc); int ulWeightOffset = 1; uint arcOffset = 1; bool semanticInterpretation = (GrammarOptions & GrammarOptions.MssV1) == GrammarOptions.MssV1; foreach (State state in sortedStates) { state.SerializeStateEntries (streamBuffer, semanticInterpretation, pWeights, ref arcOffset, ref ulWeightOffset); } System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pWeights); if (_fNeedWeightTable) { streamBuffer.WriteArray (pWeights, cArcs); } System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.tags); if (!semanticInterpretation) { foreach (State state in sortedStates) { state.SetEndArcIndexForTags (); } } // Remove the orphaned arcs // This could happen in the case of a - for (int i = _tags.Count - 1; i >= 0; i--) { // When arc are created the index is set to zero. This value changes during serialization // if an arc references it if (_tags [i]._cfgTag.ArcIndex == 0) { _tags.RemoveAt (i); } } // Sort the _tags array by ArcIndex _tags.Sort (); // Write the _tags array foreach (Tag tag in _tags) { tag.Serialize (streamBuffer); } #if !NO_STG // Write the script references and the IL write after the header so getting it for the grammar // Does not require a seek to the end of the file System.Diagnostics.Debug.Assert (header.pScripts == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pScripts); foreach (ScriptRef script in _scriptRefs) { script.Serialize (_symbols, streamBuffer); } // Write the assembly bits System.Diagnostics.Debug.Assert (header.pIL == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pIL); if (_il != null && _il.Length > 0) { streamBuffer.Stream.Write (_il, 0, _il.Length); } System.Diagnostics.Debug.Assert (header.pPDB == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pPDB); if (_pdb != null && _pdb.Length > 0) { streamBuffer.Stream.Write (_pdb, 0, _pdb.Length); } #endif } ///
/// Description: /// Combine the current data in a grammar with one comming from a CFG /// /// /// /// internal static Backend CombineGrammar (string ruleName, Backend org, Backend extra) { // Backend be = new Backend (); be._fLoadedFromBinary = true; be._fNeedWeightTable = org._fNeedWeightTable; be._grammarMode = org._grammarMode; be._grammarOptions = org._grammarOptions; // Hash source state to destination state DictionarysrcToDestHash = new Dictionary (); // Find the rule foreach (Rule orgRule in org._rules) { if (orgRule.Name == ruleName) { be.CloneSubGraph (orgRule, org, extra, srcToDestHash, true); } } return be; } internal State CreateNewState (Rule rule) { return _states.CreateNewState (rule); } internal void DeleteState (State state) { _states.DeleteState (state); } internal void MoveInputTransitionsAndDeleteState (State from, State to) { _states.MoveInputTransitionsAndDeleteState (from, to); } internal void MoveOutputTransitionsAndDeleteState (State from, State to) { _states.MoveOutputTransitionsAndDeleteState (from, to); } /// /// Tries to find the rule's initial state handle. If both a name and an id /// are provided, then both have to match in order for this call to succeed. /// If the rule doesn't already exist then we define it if fCreateIfNotExists, /// otherwise we return an error (). /// /// - pszRuleName name of rule to find/define (null: don't care) /// - ruleId id of rule to find/define (0: don't care) /// - dwAttribute rule attribute for defining the rule /// - fCreateIfNotExists creates the rule using name, id, and attributes /// in case the rule doesn't already exist /// /// throws: /// S_OK, E_INVALIDARG, E_OUTOFMEMORY /// SPERR_RULE_NOT_FOUND -- no rule found and we don't create a new one /// SPERR_RULE_NAME_ID_CONFLICT -- rule name and id don't match /// /// /// ///internal Rule CreateRule (string name, SPCFGRULEATTRIBUTES attributes) { //CfgGrammar.TraceInformation ("BackEnd::CreateRule"); SPCFGRULEATTRIBUTES allFlags = SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Export | SPCFGRULEATTRIBUTES.SPRAF_Import | SPCFGRULEATTRIBUTES.SPRAF_Interpreter | SPCFGRULEATTRIBUTES.SPRAF_Dynamic | SPCFGRULEATTRIBUTES.SPRAF_Root; if (attributes != 0 && ((attributes & ~allFlags) != 0 || ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (attributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0))) { throw new ArgumentException ("attributes"); } // PS: 52277 - SAPI does not properly handle a rule marked as Import and TopLevel/Active/Root. // - To maintain maximal backwards compatibility, if a rule is marked as Import, we will unmark TopLevel/Active/Root. // - This changes the behavior when application tries to activate this rule. However, given that it is already // broken/fragile, we believe it is better to change the behavior. if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && ((attributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0)) { //CfgGrammar.TraceInformation ("Warning: A rule cannot be marked as both Import and TopLevel/Active/Root.\n" + " TopLevel/Active/Root have been disabled."); attributes &= ~(SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Root); } if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (name [0] == '\0')) { LogError (name, SRID.InvalidImport); } if (_fLoadedFromBinary) { // Scan all non-dynamic names and prevent a duplicate... foreach (Rule r in _rules) { string wpszName = _symbols [r._cfgRule._nameOffset]; if (!r._cfgRule.Dynamic && name == wpszName) { LogError (name, SRID.DuplicatedRuleName); } } } int idString; int cImportedRule = 0; Rule rule = new Rule (this, name, _symbols.Add (name, out idString), attributes, _ruleIndex, 0, _grammarOptions & GrammarOptions.TagFormat, ref cImportedRule); rule._iSerialize2 = _ruleIndex++; if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0) { if (_rootRule != null) { //We already have a root rule, return error code. LogError (name, SRID.RootRuleAlreadyDefined); } else { _rootRule = rule; } } // Add rule to RuleListByName and RuleListByID hash tables. if (rule._cfgRule._nameOffset != 0) { _nameOffsetRules.Add (rule._cfgRule._nameOffset, rule); } // // It is important to insert this at the tail for dynamic rules to // retain their slot number. // _rules.Add (rule); _rules.Sort (); return rule; } /// /// Internal method for finding rule in rule list /// /// ///internal Rule FindRule (string sRule) { //CfgGrammar.TraceInformation ("BackEnd::FindRule"); Rule rule = null; if (_nameOffsetRules.Count > 0) { // Find rule corresponding to name symbol offset corresponding to the RuleName int iWord = _symbols.Find (sRule); if (iWord > 0) { int dwSymbolOffset = _symbols.OffsetFromId (iWord); System.Diagnostics.Debug.Assert (dwSymbolOffset == 0 || _symbols [iWord] == sRule); rule = dwSymbolOffset > 0 && _nameOffsetRules.ContainsKey (dwSymbolOffset) ? _nameOffsetRules [dwSymbolOffset] : null; } } if (rule != null) { string sRuleFound = rule.Name; // at least one of the 2 arguments matched // names either match or they are both null! if (!((string.IsNullOrEmpty (sRule) || (!string.IsNullOrEmpty (sRule) && !string.IsNullOrEmpty (sRuleFound) && sRuleFound == sRule)))) { LogError (sRule, SRID.RuleNameIdConflict); } } return rule != null ? rule : null; } /// /// Adds a word transition from hFromState to hToState. If hToState == null /// then the arc will be to the (implicit) terminal state. If psz == null then /// we add an epsilon transition. Properties are pushed back to the /// first un-ambiguous arc in case we can share a common initial state path. /// The weight will be placed on the first arc (if there exists an arc with /// the same word but different weight we will create a new arc). /// internal Arc WordTransition (string sWord, float flWeight, int requiredConfidence) { return CreateTransition (sWord, flWeight, requiredConfidence); } internal Arc SubsetTransition (string text, MatchMode matchMode) { // Performs white space normalization in place text = NormalizeTokenWhiteSpace (text); return new Arc (text, null, _words, 1.0f, CfgGrammar.SP_NORMAL_CONFIDENCE, null, matchMode, ref _fNeedWeightTable); } ////// Adds a rule (reference) transition from hFromState to hToState. /// hRule can also be one of these special transition handles: /// SPRULETRANS_WILDCARD : "WILDCARD" transition /// SPRULETRANS_DICTATION : single word from dictation /// SPRULETRANS_TEXTBUFFER : "TEXTBUFFER" transition /// /// must be initial state of rule /// Rule calling the ruleref /// internal Arc RuleTransition (Rule rule, Rule parentRule, float flWeight) { //CfgGrammar.TraceInformation ("BackEnd::AddRuleTransition"); Rule ruleToTransitionTo = null; if (flWeight < 0.0f) { XmlParser.ThrowSrgsException (SRID.UnsupportedFormat); } Rule specialRuleTrans = null; if (rule == CfgGrammar.SPRULETRANS_WILDCARD || rule == CfgGrammar.SPRULETRANS_DICTATION || rule == CfgGrammar.SPRULETRANS_TEXTBUFFER) { specialRuleTrans = rule; } else { ruleToTransitionTo = rule; } bool fNeedWeightTable = false; Arc arc = new Arc (null, ruleToTransitionTo, _words, flWeight, '\0', specialRuleTrans, MatchMode.AllWords, ref fNeedWeightTable); AddArc (arc); if (ruleToTransitionTo != null && parentRule != null) { ruleToTransitionTo._listRules.Insert (0, parentRule); } return arc; } ////// Adds a word transition from hFromState to hToState. If hToState == null /// then the arc will be to the (implicit) terminal state. If psz == null then /// we add an epsilon transition. Properties are pushed back to the /// first un-ambiguous arc in case we can share a common initial state path. /// The weight will be placed on the first arc (if there exists an arc with /// the same word but different weight we will create a new arc). /// /// ///internal Arc EpsilonTransition (float flWeight) { return CreateTransition (null, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE); } internal void AddSemanticInterpretationTag (Arc arc, CfgGrammar.CfgProperty propertyInfo) { //CfgGrammar.TraceInformation ("BackEnd::AddSemanticTag"); Tag tag = new Tag (this, propertyInfo); _tags.Add (tag); arc.AddStartTag (tag); arc.AddEndTag (tag); } internal void AddPropertyTag (Arc start, Arc end, CfgGrammar.CfgProperty propertyInfo) { //CfgGrammar.TraceInformation ("BackEnd::AddSemanticTag"); Tag tag = new Tag (this, propertyInfo); _tags.Add (tag); start.AddStartTag (tag); end.AddEndTag (tag); } /// /// Traverse the graph starting from SrcStartState, cloning each state as we go along, /// cloning each transition except ones originating from SrcEndState, and return /// the cloned state corresponding to SrcEndState. /// /// /// /// ///internal State CloneSubGraph (State srcFromState, State srcEndState, State destFromState) { Dictionary SrcToDestHash = new Dictionary (); // Hash source state to destination state Stack CloneStack = new Stack (); // States to process Dictionary tags = new Dictionary (); // Add initial state to CloneStack and SrcToDestHash. SrcToDestHash.Add (srcFromState, destFromState); CloneStack.Push (srcFromState); // While there are still states on the CloneStack (ToDo collection) while (CloneStack.Count > 0) { srcFromState = CloneStack.Pop (); destFromState = SrcToDestHash [srcFromState]; System.Diagnostics.Debug.Assert (destFromState != null); // For each transition from srcFromState (except SrcEndState) foreach (Arc arc in srcFromState.OutArcs) { // - Lookup the DestToState corresponding to SrcToState State srcToState = arc.End; State destToState = null; if (srcToState != null) { // - If not found, clone a new DestToState, add SrcToState.DestToState to SrcToDestHash, and add SrcToState to CloneStack. if (!SrcToDestHash.ContainsKey (srcToState)) { destToState = CreateNewState (srcToState.Rule); SrcToDestHash.Add (srcToState, destToState); CloneStack.Push (srcToState); } else { destToState = SrcToDestHash [srcToState]; ; } } // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState // -- Clone Arc Arc newArc = new Arc (arc, destFromState, destToState); AddArc (newArc); // -- Clone SemanticTag newArc.CloneTags (arc, _tags, tags, null); // -- Add Arc newArc.ConnectStates (); } } System.Diagnostics.Debug.Assert (tags.Count == 0); return SrcToDestHash [srcEndState]; } /// /// Traverse the graph starting from SrcStartState, cloning each state as we go along, /// cloning each transition except ones originating from SrcEndState, and return /// the cloned state corresponding to SrcEndState. /// /// /// /// /// /// ///internal void CloneSubGraph (Rule rule, Backend org, Backend extra, Dictionary srcToDestHash, bool fromOrg) { Backend beSrc = fromOrg ? org : extra; List CloneStack = new List (); // States to process Dictionary tags = new Dictionary (); // Push all the state for the top level rule CloneState (rule._firstState, CloneStack, srcToDestHash); // While there are still states on the CloneStack (ToDo collection) while (CloneStack.Count > 0) { State srcFromState = CloneStack [0]; CloneStack.RemoveAt (0); State destFromState = srcToDestHash [srcFromState]; // For each transition from srcFromState (except SrcEndState) foreach (Arc arc in srcFromState.OutArcs) { // - Lookup the DestToState corresponding to SrcToState State srcToState = arc.End; State destToState = null; if (srcToState != null) { if (!srcToDestHash.ContainsKey (srcToState)) { // - If not found, then it is a new rule, just clown it. CloneState (srcToState, CloneStack, srcToDestHash); } destToState = srcToDestHash [srcToState]; } // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState // -- Clone Arc int newWordId = arc.WordId; if (beSrc != null && arc.WordId > 0) { _words.Add (beSrc.Words [arc.WordId], out newWordId); } Arc newArc = new Arc (arc, destFromState, destToState, newWordId); // -- Clone SemanticTag newArc.CloneTags (arc, _tags, tags, this); // For rule ref push the first state of the ruleref if (arc.RuleRef != null) { string ruleName; // Check for DYNAMIC grammars if (arc.RuleRef.Name.IndexOf ("URL:DYNAMIC#", StringComparison.Ordinal) == 0) { ruleName = arc.RuleRef.Name.Substring (12); if (fromOrg == true && FindInRules (ruleName) == null) { Rule ruleExtra = extra.FindInRules (ruleName); if (ruleExtra == null) { XmlParser.ThrowSrgsException (SRID.DynamicRuleNotFound, ruleName); } CloneSubGraph (ruleExtra, org, extra, srcToDestHash, false); } } else if (arc.RuleRef.Name.IndexOf ("URL:STATIC#", StringComparison.Ordinal) == 0) { ruleName = arc.RuleRef.Name.Substring (11); if (fromOrg == false && FindInRules (ruleName) == null) { Rule ruleOrg = org.FindInRules (ruleName); if (ruleOrg == null) { XmlParser.ThrowSrgsException (SRID.DynamicRuleNotFound, ruleName); } CloneSubGraph (ruleOrg, org, extra, srcToDestHash, true); } } else { ruleName = arc.RuleRef.Name; Rule ruleExtra = org.FindInRules (ruleName); if (fromOrg == false) { CloneSubGraph (arc.RuleRef, org, extra, srcToDestHash, true); } } Rule refRule = FindInRules (ruleName); if (refRule == null) { refRule = CloneState (arc.RuleRef._firstState, CloneStack, srcToDestHash); } newArc.RuleRef = refRule; } // -- Add Arc newArc.ConnectStates (); } } System.Diagnostics.Debug.Assert (tags.Count == 0); } /// /// Delete disconnected subgraph starting at hState. /// Traverse the graph starting from SrcStartState, and delete each state as we go along. /// /// internal void DeleteSubGraph (State state) { // Add initial state to DeleteStack. StackstateToProcess = new Stack (); // States to delete Collection arcsToDelete = new Collection (); Collection statesToDelete = new Collection (); stateToProcess.Push (state); // While there are still states on the listDelete (ToDo collection) while (stateToProcess.Count > 0) { // For each transition from state, state = stateToProcess.Pop (); statesToDelete.Add (state); arcsToDelete.Clear (); // Accumulate the arcs to delete and add new states to the stack of states to process foreach (Arc arc in state.OutArcs) { // Add EndState to listDelete, if unique State endState = arc.End; // Add this state to the list of states to delete if (endState != null && !stateToProcess.Contains (endState) && !statesToDelete.Contains (endState)) { stateToProcess.Push (endState); } arcsToDelete.Add (arc); } // Clear up the arcs foreach (Arc arc in arcsToDelete) { arc.Start = arc.End = null; } } foreach (State stateToDelete in statesToDelete) { // Delete state and remove from listDelete System.Diagnostics.Debug.Assert (stateToDelete != null); System.Diagnostics.Debug.Assert (stateToDelete.InArcs.IsEmpty); System.Diagnostics.Debug.Assert (stateToDelete.OutArcs.IsEmpty); DeleteState (stateToDelete); } } /// /// Modify the placeholder rule attributes after it has been created. /// This is only safe to use in the context of SRGSGrammarCompiler. /// /// /// ///internal void SetRuleAttributes (Rule rule, SPCFGRULEATTRIBUTES dwAttributes) { // Check if this is the Root rule if ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0) { if (_rootRule != null) { //We already have a root rule, return error code. XmlParser.ThrowSrgsException (SRID.RootRuleAlreadyDefined); } else { _rootRule = rule; } } rule._cfgRule.TopLevel = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0); rule._cfgRule.DefaultActive = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0); rule._cfgRule.PropRule = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Interpreter) != 0); rule._cfgRule.Export = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0); rule._cfgRule.Dynamic = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Dynamic) != 0); rule._cfgRule.Import = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0); } /// /// Set the path from which relative grammar imports are calculated. As specified by xml:base / meta base /// Null or empty string will clear any existing base path. /// /// internal void SetBasePath (string sBasePath) { if (!string.IsNullOrEmpty (sBasePath)) { // Validate base path. Uri uri = new Uri (sBasePath, UriKind.RelativeOrAbsolute); //Url Canonicalized _basePath = uri.ToString (); } else { _basePath = null; } } ////// Perform white space normalization in place. /// - Trim leading/trailing white spaces. /// - Collapse white space sequences to a single ' '. /// /// ///internal static string NormalizeTokenWhiteSpace (string sToken) { System.Diagnostics.Debug.Assert (!string.IsNullOrEmpty (sToken)); // Trim leading and ending white spaces sToken = sToken.Trim (Helpers._achTrimChars); // Easy out if there are no consecutive double white spaces if (sToken.IndexOf (" ", StringComparison.Ordinal) == -1) { return sToken; } // Normalize internal spaces char [] achSrc = sToken.ToCharArray (); int iDest = 0; for (int i = 0; i < achSrc.Length; ) { // Collapsed multiple white spaces into ' ' if (achSrc [i] == ' ') { do { i++; } while (achSrc [i] == ' '); achSrc [iDest++] = ' '; continue; } // Copy the non-white space character achSrc [iDest++] = achSrc [i++]; } return new string (achSrc, 0, iDest); } #endregion //******************************************************************** // // Internal Property // //******************************************************************** #region Internal Property internal StringBlob Words { get { return this._words; } } internal StringBlob Symbols { get { return this._symbols; } } #endregion //******************************************************************* // // Private Methods // //******************************************************************** #region Private Methods /// /// Description: /// Load compiled grammar data. This overwrites any existing data in the grammar /// We end up with containers of words, symbols, rules, arcs, states and state handles, etc. /// /// #if true internal void InitFromBinaryGrammar (StreamMarshaler streamHelper) { //CfgGrammar.TraceInformation ("BackEnd::InitFromBinaryGrammar"); CfgGrammar.CfgHeader header = CfgGrammar.ConvertCfgHeader (streamHelper); _words = header.pszWords; _symbols = header.pszSymbols; _grammarOptions = header.GrammarOptions; // // Build up the internal representation // State [] apStateTable = new State [header.arcs.Length]; SortedDictionaryruleFirstArcs = new SortedDictionary (); // // Initialize the rules // int previousCfgLastRules = _rules.Count; BuildRulesFromBinaryGrammar (header, apStateTable, ruleFirstArcs, previousCfgLastRules); // // Initialize the arcs // Arc [] apArcTable = new Arc [header.arcs.Length]; bool fLastArcNull = true; CfgArc pLastArc = new CfgArc (); State currentState = null; IEnumerator > ieFirstArcs = ruleFirstArcs.GetEnumerator (); // If no rules, then we have no arcs if (ieFirstArcs.MoveNext ()) { KeyValuePair kvFirstArc = ieFirstArcs.Current; Rule ruleCur = kvFirstArc.Value; // We repersist the static AND dynamic parts for now. This allows the grammar to be queried // with the automation interfaces for (int k = 1; k < header.arcs.Length; k++) { CfgArc arc = header.arcs [k]; // Reset the Transition index based on the compbined stringblobs if (arc.RuleRef) { // for a ruleref offset the rule index ruleCur._listRules.Add (_rules [(int) arc.TransitionIndex]); } if (kvFirstArc.Key == k) { // we are entering a new rule now ruleCur = kvFirstArc.Value; // Reset to zero once we have read the last rule. if (ieFirstArcs.MoveNext ()) { kvFirstArc = ieFirstArcs.Current; } } // new currentState? if (fLastArcNull || pLastArc.LastArc) { if (apStateTable [k] == null) { uint hNewState = CfgGrammar.NextHandle; apStateTable [k] = new State (ruleCur, hNewState, k); AddState (apStateTable [k]); } currentState = apStateTable [k]; } // // now get the arc // int iNextArc = (int) (arc.NextStartArcIndex); Arc newArc; State targetState = null; if (currentState != null && iNextArc != 0) { if (apStateTable [iNextArc] == null) { uint hNewState = CfgGrammar.NextHandle; apStateTable [iNextArc] = new State (ruleCur, hNewState, iNextArc); AddState (apStateTable [iNextArc]); } targetState = apStateTable [iNextArc]; } float flWeight = header.weights != null ? header.weights [k] : CfgGrammar.DEFAULT_WEIGHT; // determine properties of the arc now ... if (arc.RuleRef) { Rule ruleToTransitionTo = _rules [(int) arc.TransitionIndex]; newArc = new Arc (null, ruleToTransitionTo, _words, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE, null, MatchMode.AllWords, ref _fNeedWeightTable); } else { int transitionIndex = (int) arc.TransitionIndex; int ulSpecialTransitionIndex = (int) ((transitionIndex == CfgGrammar.SPWILDCARDTRANSITION || transitionIndex == CfgGrammar.SPDICTATIONTRANSITION || transitionIndex == CfgGrammar.SPTEXTBUFFERTRANSITION) ? transitionIndex : 0); newArc = new Arc ((ulSpecialTransitionIndex != 0) ? 0 : (int) arc.TransitionIndex, flWeight, arc.LowConfRequired ? CfgGrammar.SP_LOW_CONFIDENCE : arc.HighConfRequired ? CfgGrammar.SP_HIGH_CONFIDENCE : CfgGrammar.SP_NORMAL_CONFIDENCE, ulSpecialTransitionIndex, MatchMode.AllWords, ref _fNeedWeightTable); } newArc.Start = currentState; newArc.End = targetState; AddArc (newArc); apArcTable [k] = newArc; fLastArcNull = false; pLastArc = arc; } } // Initialize the Semantics tags for (int k = 1, iCurTag = 0; k < header.arcs.Length; k++) { CfgArc arc = header.arcs [k]; if (arc.HasSemanticTag) { System.Diagnostics.Debug.Assert (header.tags [iCurTag].StartArcIndex == k); while (iCurTag < header.tags.Length && header.tags [iCurTag].StartArcIndex == k) { // we should already point to the tag CfgSemanticTag semTag = header.tags [iCurTag]; Tag tag = new Tag (this, semTag); _tags.Add (tag); apArcTable [tag._cfgTag.StartArcIndex].AddStartTag (tag); apArcTable [tag._cfgTag.EndArcIndex].AddEndTag (tag); // If we have ms-properties than _nameOffset != overwise it is w3c tags. if (semTag._nameOffset > 0) { tag._cfgTag._nameOffset = _symbols.OffsetFromId (_symbols.Find (_symbols.FromOffset (semTag._nameOffset))); } else { // The offset of the JScrip expression is stored in the value field. tag._cfgTag._valueOffset = _symbols.OffsetFromId (_symbols.Find (_symbols.FromOffset (semTag._valueOffset))); } iCurTag++; } } } // _fNeedWeightTable = true; if (header.BasePath != null) { SetBasePath (header.BasePath); } _guid = header.GrammarGUID; _langId = header.langId; _grammarMode = header.GrammarMode; _fLoadedFromBinary = true; // Save Last ArcIndex #if VSCOMPILE && DEBUG DumpGrammarStatistics ("InitFromBinaryGrammar"); #endif } private Arc CreateTransition (string sWord, float flWeight, int requiredConfidence) { // epsilon transition for empty words return AddSingleWordTransition (!string.IsNullOrEmpty (sWord) ? sWord : null, flWeight, requiredConfidence); } /// /// /// /// /// /// /// /// ///private CfgGrammar.CfgSerializedHeader BuildHeader (List sortedStates, int cBasePath, UInt16 iSemanticGlobals, out int cArcs, out float [] pWeights) { cArcs = 1; // Start with offset one! (0 indicates dead state). pWeights = null; int cSemanticTags = 0; int cLargest = 0; foreach (State state in sortedStates) { // For new states SerializeId is INFINITE so we set it correctly here. // For existing states we preserve the index from loading, // unless new states have been added in. state.SerializeId = cArcs; int thisState = state.NumArcs; #if DEBUG if (thisState == 0 && state.InArcs.IsEmpty && state.Rule._cStates > 1) { XmlParser.ThrowSrgsException (SRID.StateWithNoArcs); } #endif cArcs += thisState; if (cLargest < thisState) { cLargest = thisState; } cSemanticTags += state.NumSemanticTags; } CfgGrammar.CfgSerializedHeader header = new CfgGrammar.CfgSerializedHeader (); uint ulOffset = (uint) Marshal.SizeOf (typeof (CfgGrammar.CfgSerializedHeader)); header.FormatId = CfgGrammar._SPGDF_ContextFree; _guid = Guid.NewGuid (); header.GrammarGUID = _guid; header.LangID = (UInt16) _langId; header.pszSemanticInterpretationGlobals = iSemanticGlobals; header.cArcsInLargestState = cLargest; header.cchWords = _words.StringSize (); header.cWords = _words.Count; // For compat with SAPI 5.x add one to cWords if there's more than one word. // The CFGEngine code assumes cWords includes the initial empty-string word. // See PS 11491 and 61982. if (header.cWords > 0) { header.cWords++; } header.pszWords = ulOffset; ulOffset += (uint) _words.SerializeSize () * Helpers._sizeOfChar; header.cchSymbols = _symbols.StringSize (); header.pszSymbols = ulOffset; ulOffset += (uint) _symbols.SerializeSize () * Helpers._sizeOfChar; header.cRules = _rules.Count; header.pRules = ulOffset; ulOffset += (uint) (_rules.Count * Marshal.SizeOf (typeof (CfgRule))); header.cBasePath = cBasePath > 0 ? ulOffset : 0; //If there is no base path offset is set to zero ulOffset += (uint) (((int) cBasePath * Helpers._sizeOfChar + 3) & ~3); header.cArcs = cArcs; header.pArcs = ulOffset; ulOffset += (uint) (cArcs * Marshal.SizeOf (typeof (CfgArc))); if (_fNeedWeightTable) { header.pWeights = ulOffset; ulOffset += (uint) (cArcs * Marshal.SizeOf (typeof (float))); pWeights = new float [cArcs]; pWeights [0] = 0.0f; } else { header.pWeights = 0; ulOffset += 0; } if (_rootRule != null) { //We have a root rule header.ulRootRuleIndex = (uint) _rootRule._iSerialize; } else { //-1 means there is no root rule header.ulRootRuleIndex = 0xFFFFFFFF; } header.GrammarOptions = _grammarOptions | ((_alphabet == AlphabetType.Sapi) ? 0 : GrammarOptions.IpaPhoneme); #if !NO_STG header.GrammarOptions |= _scriptRefs.Count > 0 ? GrammarOptions.STG | GrammarOptions.KeyValuePairSrgs : 0; #endif header.GrammarMode = (uint) _grammarMode; header.cTags = cSemanticTags; header.tags = ulOffset; ulOffset += (uint) (cSemanticTags * Marshal.SizeOf (typeof (CfgSemanticTag))); #if !NO_STG header.cScripts = _scriptRefs.Count; header.pScripts = header.cScripts > 0 ? ulOffset : 0; ulOffset += (uint) (_scriptRefs.Count * Marshal.SizeOf (typeof (CfgScriptRef))); header.cIL = _il != null ? _il.Length : 0; header.pIL = header.cIL > 0 ? ulOffset : 0; ulOffset += (uint) (header.cIL * Marshal.SizeOf (typeof (byte))); header.cPDB = _pdb != null ? _pdb.Length : 0; header.pPDB = header.cPDB > 0 ? ulOffset : 0; ulOffset += (uint) (header.cPDB * Marshal.SizeOf (typeof (byte))); #endif header.ulTotalSerializedSize = ulOffset; return header; } private CfgGrammar.CfgHeader BuildRulesFromBinaryGrammar (CfgGrammar.CfgHeader header, State [] apStateTable, SortedDictionary ruleFirstArcs, int previousCfgLastRules) { for (int i = 0; i < header.rules.Length; i++) { // Check if the rule does not exist already CfgRule cfgRule = header.rules [i]; int firstArc = (int) cfgRule.FirstArcIndex; cfgRule._nameOffset = _symbols.OffsetFromId (_symbols.Find (header.pszSymbols.FromOffset (cfgRule._nameOffset))); Rule rule = new Rule (this, _symbols.FromOffset (cfgRule._nameOffset), cfgRule, i + previousCfgLastRules, _grammarOptions & GrammarOptions.TagFormat, ref _cImportedRules); rule._firstState = _states.CreateNewState (rule); _rules.Add (rule); // Add the rule to the list of firstArc/rule if (firstArc > 0) { ruleFirstArcs.Add ((int) cfgRule.FirstArcIndex, rule); } rule._fStaticRule = (cfgRule.Dynamic) ? false : true; rule._cfgRule.DirtyRule = false; // by default loaded static rules have an exist rule._fHasExitPath = (rule._fStaticRule) ? true : false; // or they wouldn't be there in the first place if (firstArc != 0) { System.Diagnostics.Debug.Assert (apStateTable [firstArc] == null); rule._firstState.SerializeId = (int) cfgRule.FirstArcIndex; apStateTable [firstArc] = rule._firstState; } if (rule._cfgRule.HasResources) { throw new NotImplementedException (); } if (header.ulRootRuleIndex == i) { _rootRule = rule; } // Add rule to RuleListByName and RuleListByID hash tables. if (rule._cfgRule._nameOffset != 0) { // Look for the rule in the original CFG and map it in the combined string blobs _nameOffsetRules.Add (rule._cfgRule._nameOffset, rule); } } return header; } #endif private Rule CloneState (State srcToState, List CloneStack, Dictionary srcToDestHash) { bool newRule = false; int posDynamic = srcToState.Rule.Name.IndexOf ("URL:DYNAMIC#", StringComparison.Ordinal); string ruleName = posDynamic != 0 ? srcToState.Rule.Name : srcToState.Rule.Name.Substring (12); Rule dstRule = FindInRules (ruleName); // Clone this rule into this GrammarBuilder if it does not exist yet if (dstRule == null) { dstRule = srcToState.Rule.Clone (_symbols, ruleName); _rules.Add (dstRule); newRule = true; } // Should not exist yet System.Diagnostics.Debug.Assert (!srcToDestHash.ContainsKey (srcToState)); // push all the states for that rule State newState = CreateNewState (dstRule); srcToDestHash.Add (srcToState, newState); CloneStack.Add (srcToState); if (newRule) { dstRule._firstState = newState; } return dstRule; } private Rule FindInRules (string ruleName) { foreach (Rule rule in _rules) { if (rule.Name == ruleName) { return rule; } } return null; } private static void LogError (string rule, SRID srid, params object [] args) { string sError = SR.Get (srid, args); throw new FormatException (string.Format (CultureInfo.InvariantCulture, "Rule=\"{0}\" - ", rule) + sError); } /// /// Connect arc to the state graph. /// /// #if DEBUG private #else private static #endif void AddArc (Arc arc) { #if DEBUG arc.Backend = this; #endif } ////// /// ///private void ValidateAndTagRules () { //CfgGrammar.TraceInformation ("BackEnd::ValidateAndTagRules"); // bool fAtLeastOneRule = false; int ulIndex = 0; foreach (Rule rule in _rules) { // set _fHasExitPath = true for empty dynamic grammars and imported rules // Clear this for the next loop through the rules.... rule._fHasExitPath |= (rule._cfgRule.Dynamic | rule._cfgRule.Import) ? true : false; rule._iSerialize = ulIndex++; fAtLeastOneRule |= (rule._cfgRule.Dynamic || rule._cfgRule.TopLevel || rule._cfgRule.Export); rule.Validate (); } #if DEBUG // // Now make sure that all rules have an exit path. // foreach (Rule rule in _rules) { _ulRecursiveDepth = 0; //The following function will use recursive function that might change _ulRecursiveDepth rule.CheckForExitPath (ref _ulRecursiveDepth); } #endif // // Check each exported rule if it has a dynamic rule in its "scope" // foreach (Rule rule in _rules) { if (rule._cfgRule.Dynamic) { rule._cfgRule.HasDynamicRef = true; _ulRecursiveDepth = 0; rule.PopulateDynamicRef (ref _ulRecursiveDepth); } } } private void CheckLeftRecursion (List states) { bool fReachedEndState; foreach (State state in states) { state.CheckLeftRecursion (out fReachedEndState); } } /// /// /// /// /// /// private Arc AddSingleWordTransition (string s, float flWeight, int requiredConfidence) { //CfgGrammar.TraceInformation ("BackEnd::CGramComp::AddSingleWordTransition"); Arc arc = new Arc (s, null, _words, flWeight, requiredConfidence, null, MatchMode.AllWords, ref _fNeedWeightTable); AddArc (arc); return arc; } internal void AddState (State state) { _states.Add (state); } #if VSCOMPILE && DEBUG /// internal void DumpStateTable (string sTitle) { Liststates = new List (_states); DumpStateTable (sTitle, states.ToArray ()); } internal void DumpStateTable (string sTitle, State [] states) { CfgGrammar.TraceInformation2 (sTitle); int iHandle2 = 0; for (int iState = 0; iState < states.Length; iState++) { State state = states [iState]; if (state == null) { continue; } //string s = state.OutArcs.Count > 0 ? state.OutArcs.First.WordId > 0 ? _words [state.OutArcs.First.WordId] : string.Empty : string.Empty; string s = _symbols.FromOffset (state.Rule._cfgRule._nameOffset); CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "{0}: {1} {2} {3} '", iHandle2, state.Id, state.SerializeId, s)); foreach (Arc arc2 in state.OutArcs) { int iWord = arc2.WordId; string sWord = arc2.RuleRef != null ? _symbols.FromOffset (arc2.RuleRef._cfgRule._nameOffset) : iWord > 0 ? _words [iWord] : string.Empty; //CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "{0}{1} [{2}]", (fFirst ? string.Empty : ", "), sWord, arc2.Id)); //if (arc2._tag != null) //{ // CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "({0}, {1}, {2})", _symbols.FromOffset (arc2._tag._cfgTag._valueOffset), arc2._tag._startArc.Id, arc2._tag._endArc.Id)); //} } CfgGrammar.TraceInformation2 ("'"); } } internal void DumpStateMachine (TextWriter tw) { tw.WriteLine ("Word Blobs"); for (int i = 1; i < _words.Count; i++) { tw.WriteLine (" \"" + _words [i] + "\""); } tw.WriteLine ("\nSymbols Blobs"); for (int i = 1; i < _symbols.Count; i++) { tw.WriteLine (" \"" + _symbols [i] + "\""); } tw.WriteLine ("\nRules"); foreach (Rule rule in _rules) { tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " \"{0}\" id: {1} flags: {2}", rule.Name, rule._cfgRule._id, rule._cfgRule._flag.ToString ("x", CultureInfo.InvariantCulture))); tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " States: {0}", rule._cStates)); } tw.WriteLine (string.Format (CultureInfo.InvariantCulture, "\nStates:\n Count: {0}", _states.Count)); foreach (State state in _states) { tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " \"{0}\"", state.Rule.Name)); } } //GrammarOptimization /// /// Dump grammar statistics to debug window. (Debug only) /// /// void DumpGrammarStatistics (string title) { int iNumStates = 0; int iNumArcs = 0; int iNumEpsilonArcs = 0; int iMaxBranch = 0; int iNumProperties = 0; foreach (State state in _states) { iNumStates++; int cArcs = 0; foreach (Arc arc in state.OutArcs) { cArcs++; if (arc.IsEpsilonTransition) { iNumEpsilonArcs++; } //if (arc._tag != null) //{ // iNumProperties++; //} } if (iMaxBranch < cArcs) iMaxBranch = cArcs; iNumArcs += cArcs; } Console.WriteLine (title); Console.WriteLine (string.Format (CultureInfo.InvariantCulture, "{0}\n NumStates = {0}\n NumArcs = {1}\n NumEpsilons = {2}\n", iNumStates, iNumArcs, iNumEpsilonArcs)); Console.WriteLine (string.Format (CultureInfo.InvariantCulture, " MaxBranch = {0}\n" + " NumProperties = {1}\n", iMaxBranch, iNumProperties)); } #endif #endregion //******************************************************************* // // Internal Properties // //******************************************************************* #region Internal Properties internal int LangId { get { return _langId; } set { _langId = value; } } internal GrammarOptions GrammarOptions { get { return _grammarOptions; } set { _grammarOptions = value; } } internal GrammarType GrammarMode { set { _grammarMode = value; } } internal AlphabetType Alphabet { set { _alphabet = value; } get { return _alphabet; } } internal CollectionGlobalTags { get { return _globalTags; } set { _globalTags = value; } } #if !NO_STG internal Collection ScriptRefs { set { _scriptRefs = value; } } internal byte [] IL { set { _il = value; } } internal byte [] PDB { set { _pdb = value; } } #endif #endregion //******************************************************************* // // Private Fields // //******************************************************************** #region Private Fields private int _langId = CultureInfo.CurrentUICulture.LCID; private StringBlob _words; private StringBlob _symbols; //private int _cResources; private Guid _guid; private bool _fNeedWeightTable; private Graph _states = new Graph (); private List _rules = new List (); private int _ruleIndex; private Dictionary _nameOffsetRules = new Dictionary (); private Rule _rootRule; #if !SPEECHSERVER private GrammarOptions _grammarOptions = GrammarOptions.KeyValuePairs; #else private GrammarOptions _grammarOptions = GrammarOptions.W3cV1; #endif // It is used sequentially. So there is no thread issue private int _ulRecursiveDepth; // Path from which relative grammar imports are calculated. As specified by xml:base private string _basePath; // Collection of all SemanticTags in the grammar (sorted by StartArc) private List _tags = new List (); // Voice or DTMF private GrammarType _grammarMode = GrammarType.VoiceGrammar; // Pron information is either IPA or SAPI private AlphabetType _alphabet = AlphabetType.Sapi; // Global value for the semantic interpretation tags private Collection _globalTags = new Collection (); // private static byte [] _abZero3 = new byte [] { 0, 0, 0 }; private static char [] _achZero = new char [] { '\0' }; // private const uint SPGF_RESET_DIRTY_FLAG = 0x80000000; private int _cImportedRules; #if !NO_STG // List of cd /reference Rule->rule 'on'method-> .Net method private Collection _scriptRefs = new Collection (); // Grammar code assembly private byte [] _il; // Grammar debug symbols private byte [] _pdb; #endif private bool _fLoadedFromBinary; #endregion } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007. //------------------------------------------------------------------ // // Copyright (c) Microsoft Corporation. All rights reserved. // // // Description: // CFG Grammar backend // // History: // 5/1/2004 jeanfp Created from the Sapi Managed code //----------------------------------------------------------------- using System; using System.Collections; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Diagnostics; using System.Globalization; using System.Runtime.InteropServices; using System.Text; using System.IO; using System.Speech.Internal.SrgsParser; namespace System.Speech.Internal.SrgsCompiler { ////// Summary description for GrammarBackEnd. /// internal sealed partial class Backend { //******************************************************************* // // Constructors // //******************************************************************* #region Constructors internal Backend () { _words = new StringBlob (); _symbols = new StringBlob (); } internal Backend (StreamMarshaler streamHelper) { InitFromBinaryGrammar (streamHelper); } #endregion //******************************************************************** // // Internal Methods // //******************************************************************* #region Internal Methods ////// Optimizes the grammar network by removing the epsilon states and merging /// duplicate transitions. See GrammarOptimization.doc for details. /// internal void Optimize () { //DumpGrammarStatistics ("GrammarOptimization: Pre optimize"); _states.Optimize (); //DumpGrammarStatistics ("GrammarOptimization: Post optimize"); // Most likely, there will be an arc with a weight != 1. So we need a weight table. _fNeedWeightTable = true; } ////// Performs consistency checks of the grammar structure, creates the /// serialized format and either saves it to the stream provided by SetSaveOptions, /// or reloads it into the CFG engine. /// /// internal void Commit (StreamMarshaler streamBuffer) { // For debugging purpose, assert if the position is not it is assumed it should be // Keep the start position in the stream long startStreamPostion = streamBuffer.Stream.Position; // put all states State into a sorted array by rule parent index and serialized index ListsortedStates = new List (_states); // Release the memory for the original list of states _states = null; sortedStates.Sort (); // Validate the grammar ValidateAndTagRules (); CheckLeftRecursion (sortedStates); // Include null terminator int cBasePath = _basePath != null ? _basePath.Length + 1 : 0; float [] pWeights; int cArcs; // Add the top level semantic interpreatation tag // This should be set as the first symbol in the symbol string blog since it must hold on a 16 bits value. int semanticInterpretationGlobals = 0; if (_globalTags.Count > 0) { StringBuilder sb = new StringBuilder (); foreach (string s in _globalTags) { sb.Append (s); } _symbols.Add (sb.ToString (), out semanticInterpretationGlobals); semanticInterpretationGlobals = _symbols.OffsetFromId (semanticInterpretationGlobals); if (semanticInterpretationGlobals > UInt16.MaxValue) { throw new OverflowException (SR.Get (SRID.TooManyRulesWithSemanticsGlobals)); } } #if !NO_STG // Write the method names as symbols foreach (ScriptRef script in _scriptRefs) { _symbols.Add (script._sMethod, out script._idSymbol); } #endif // get the header CfgGrammar.CfgSerializedHeader header = BuildHeader (sortedStates, cBasePath, unchecked ((UInt16) semanticInterpretationGlobals), out cArcs, out pWeights); streamBuffer.WriteStream (header); // // For the string blobs, we must explicitly report I/O error since the blobs don't // use the error log facility. // System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pszWords); streamBuffer.WriteArrayChar (_words.SerializeData (), _words.SerializeSize ()); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pszSymbols); streamBuffer.WriteArrayChar (_symbols.SerializeData (), _symbols.SerializeSize ()); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pRules); foreach (Rule rule in _rules) { rule.Serialize (streamBuffer); } if (cBasePath > 0) { streamBuffer.WriteArrayChar (_basePath.ToCharArray (), _basePath.Length); // Add a zero to be compatible with SAPI 5 System.Diagnostics.Debug.Assert (_basePath.Length + 1 == cBasePath); streamBuffer.WriteArrayChar (_achZero, 1); // Zero-pad to align following structures streamBuffer.WriteArray (_abZero3, cBasePath * Helpers._sizeOfChar & 3); } // // Write a dummy 0 index state entry // CfgArc dummyArc = new CfgArc (); System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pArcs); streamBuffer.WriteStream (dummyArc); int ulWeightOffset = 1; uint arcOffset = 1; bool semanticInterpretation = (GrammarOptions & GrammarOptions.MssV1) == GrammarOptions.MssV1; foreach (State state in sortedStates) { state.SerializeStateEntries (streamBuffer, semanticInterpretation, pWeights, ref arcOffset, ref ulWeightOffset); } System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.pWeights); if (_fNeedWeightTable) { streamBuffer.WriteArray (pWeights, cArcs); } System.Diagnostics.Debug.Assert (streamBuffer.Stream.Position - startStreamPostion == header.tags); if (!semanticInterpretation) { foreach (State state in sortedStates) { state.SetEndArcIndexForTags (); } } // Remove the orphaned arcs // This could happen in the case of a - for (int i = _tags.Count - 1; i >= 0; i--) { // When arc are created the index is set to zero. This value changes during serialization // if an arc references it if (_tags [i]._cfgTag.ArcIndex == 0) { _tags.RemoveAt (i); } } // Sort the _tags array by ArcIndex _tags.Sort (); // Write the _tags array foreach (Tag tag in _tags) { tag.Serialize (streamBuffer); } #if !NO_STG // Write the script references and the IL write after the header so getting it for the grammar // Does not require a seek to the end of the file System.Diagnostics.Debug.Assert (header.pScripts == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pScripts); foreach (ScriptRef script in _scriptRefs) { script.Serialize (_symbols, streamBuffer); } // Write the assembly bits System.Diagnostics.Debug.Assert (header.pIL == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pIL); if (_il != null && _il.Length > 0) { streamBuffer.Stream.Write (_il, 0, _il.Length); } System.Diagnostics.Debug.Assert (header.pPDB == 0 || streamBuffer.Stream.Position - startStreamPostion == header.pPDB); if (_pdb != null && _pdb.Length > 0) { streamBuffer.Stream.Write (_pdb, 0, _pdb.Length); } #endif } ///
/// Description: /// Combine the current data in a grammar with one comming from a CFG /// /// /// /// internal static Backend CombineGrammar (string ruleName, Backend org, Backend extra) { // Backend be = new Backend (); be._fLoadedFromBinary = true; be._fNeedWeightTable = org._fNeedWeightTable; be._grammarMode = org._grammarMode; be._grammarOptions = org._grammarOptions; // Hash source state to destination state DictionarysrcToDestHash = new Dictionary (); // Find the rule foreach (Rule orgRule in org._rules) { if (orgRule.Name == ruleName) { be.CloneSubGraph (orgRule, org, extra, srcToDestHash, true); } } return be; } internal State CreateNewState (Rule rule) { return _states.CreateNewState (rule); } internal void DeleteState (State state) { _states.DeleteState (state); } internal void MoveInputTransitionsAndDeleteState (State from, State to) { _states.MoveInputTransitionsAndDeleteState (from, to); } internal void MoveOutputTransitionsAndDeleteState (State from, State to) { _states.MoveOutputTransitionsAndDeleteState (from, to); } /// /// Tries to find the rule's initial state handle. If both a name and an id /// are provided, then both have to match in order for this call to succeed. /// If the rule doesn't already exist then we define it if fCreateIfNotExists, /// otherwise we return an error (). /// /// - pszRuleName name of rule to find/define (null: don't care) /// - ruleId id of rule to find/define (0: don't care) /// - dwAttribute rule attribute for defining the rule /// - fCreateIfNotExists creates the rule using name, id, and attributes /// in case the rule doesn't already exist /// /// throws: /// S_OK, E_INVALIDARG, E_OUTOFMEMORY /// SPERR_RULE_NOT_FOUND -- no rule found and we don't create a new one /// SPERR_RULE_NAME_ID_CONFLICT -- rule name and id don't match /// /// /// ///internal Rule CreateRule (string name, SPCFGRULEATTRIBUTES attributes) { //CfgGrammar.TraceInformation ("BackEnd::CreateRule"); SPCFGRULEATTRIBUTES allFlags = SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Export | SPCFGRULEATTRIBUTES.SPRAF_Import | SPCFGRULEATTRIBUTES.SPRAF_Interpreter | SPCFGRULEATTRIBUTES.SPRAF_Dynamic | SPCFGRULEATTRIBUTES.SPRAF_Root; if (attributes != 0 && ((attributes & ~allFlags) != 0 || ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (attributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0))) { throw new ArgumentException ("attributes"); } // PS: 52277 - SAPI does not properly handle a rule marked as Import and TopLevel/Active/Root. // - To maintain maximal backwards compatibility, if a rule is marked as Import, we will unmark TopLevel/Active/Root. // - This changes the behavior when application tries to activate this rule. However, given that it is already // broken/fragile, we believe it is better to change the behavior. if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && ((attributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0)) { //CfgGrammar.TraceInformation ("Warning: A rule cannot be marked as both Import and TopLevel/Active/Root.\n" + " TopLevel/Active/Root have been disabled."); attributes &= ~(SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Root); } if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (name [0] == '\0')) { LogError (name, SRID.InvalidImport); } if (_fLoadedFromBinary) { // Scan all non-dynamic names and prevent a duplicate... foreach (Rule r in _rules) { string wpszName = _symbols [r._cfgRule._nameOffset]; if (!r._cfgRule.Dynamic && name == wpszName) { LogError (name, SRID.DuplicatedRuleName); } } } int idString; int cImportedRule = 0; Rule rule = new Rule (this, name, _symbols.Add (name, out idString), attributes, _ruleIndex, 0, _grammarOptions & GrammarOptions.TagFormat, ref cImportedRule); rule._iSerialize2 = _ruleIndex++; if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0) { if (_rootRule != null) { //We already have a root rule, return error code. LogError (name, SRID.RootRuleAlreadyDefined); } else { _rootRule = rule; } } // Add rule to RuleListByName and RuleListByID hash tables. if (rule._cfgRule._nameOffset != 0) { _nameOffsetRules.Add (rule._cfgRule._nameOffset, rule); } // // It is important to insert this at the tail for dynamic rules to // retain their slot number. // _rules.Add (rule); _rules.Sort (); return rule; } /// /// Internal method for finding rule in rule list /// /// ///internal Rule FindRule (string sRule) { //CfgGrammar.TraceInformation ("BackEnd::FindRule"); Rule rule = null; if (_nameOffsetRules.Count > 0) { // Find rule corresponding to name symbol offset corresponding to the RuleName int iWord = _symbols.Find (sRule); if (iWord > 0) { int dwSymbolOffset = _symbols.OffsetFromId (iWord); System.Diagnostics.Debug.Assert (dwSymbolOffset == 0 || _symbols [iWord] == sRule); rule = dwSymbolOffset > 0 && _nameOffsetRules.ContainsKey (dwSymbolOffset) ? _nameOffsetRules [dwSymbolOffset] : null; } } if (rule != null) { string sRuleFound = rule.Name; // at least one of the 2 arguments matched // names either match or they are both null! if (!((string.IsNullOrEmpty (sRule) || (!string.IsNullOrEmpty (sRule) && !string.IsNullOrEmpty (sRuleFound) && sRuleFound == sRule)))) { LogError (sRule, SRID.RuleNameIdConflict); } } return rule != null ? rule : null; } /// /// Adds a word transition from hFromState to hToState. If hToState == null /// then the arc will be to the (implicit) terminal state. If psz == null then /// we add an epsilon transition. Properties are pushed back to the /// first un-ambiguous arc in case we can share a common initial state path. /// The weight will be placed on the first arc (if there exists an arc with /// the same word but different weight we will create a new arc). /// internal Arc WordTransition (string sWord, float flWeight, int requiredConfidence) { return CreateTransition (sWord, flWeight, requiredConfidence); } internal Arc SubsetTransition (string text, MatchMode matchMode) { // Performs white space normalization in place text = NormalizeTokenWhiteSpace (text); return new Arc (text, null, _words, 1.0f, CfgGrammar.SP_NORMAL_CONFIDENCE, null, matchMode, ref _fNeedWeightTable); } ////// Adds a rule (reference) transition from hFromState to hToState. /// hRule can also be one of these special transition handles: /// SPRULETRANS_WILDCARD : "WILDCARD" transition /// SPRULETRANS_DICTATION : single word from dictation /// SPRULETRANS_TEXTBUFFER : "TEXTBUFFER" transition /// /// must be initial state of rule /// Rule calling the ruleref /// internal Arc RuleTransition (Rule rule, Rule parentRule, float flWeight) { //CfgGrammar.TraceInformation ("BackEnd::AddRuleTransition"); Rule ruleToTransitionTo = null; if (flWeight < 0.0f) { XmlParser.ThrowSrgsException (SRID.UnsupportedFormat); } Rule specialRuleTrans = null; if (rule == CfgGrammar.SPRULETRANS_WILDCARD || rule == CfgGrammar.SPRULETRANS_DICTATION || rule == CfgGrammar.SPRULETRANS_TEXTBUFFER) { specialRuleTrans = rule; } else { ruleToTransitionTo = rule; } bool fNeedWeightTable = false; Arc arc = new Arc (null, ruleToTransitionTo, _words, flWeight, '\0', specialRuleTrans, MatchMode.AllWords, ref fNeedWeightTable); AddArc (arc); if (ruleToTransitionTo != null && parentRule != null) { ruleToTransitionTo._listRules.Insert (0, parentRule); } return arc; } ////// Adds a word transition from hFromState to hToState. If hToState == null /// then the arc will be to the (implicit) terminal state. If psz == null then /// we add an epsilon transition. Properties are pushed back to the /// first un-ambiguous arc in case we can share a common initial state path. /// The weight will be placed on the first arc (if there exists an arc with /// the same word but different weight we will create a new arc). /// /// ///internal Arc EpsilonTransition (float flWeight) { return CreateTransition (null, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE); } internal void AddSemanticInterpretationTag (Arc arc, CfgGrammar.CfgProperty propertyInfo) { //CfgGrammar.TraceInformation ("BackEnd::AddSemanticTag"); Tag tag = new Tag (this, propertyInfo); _tags.Add (tag); arc.AddStartTag (tag); arc.AddEndTag (tag); } internal void AddPropertyTag (Arc start, Arc end, CfgGrammar.CfgProperty propertyInfo) { //CfgGrammar.TraceInformation ("BackEnd::AddSemanticTag"); Tag tag = new Tag (this, propertyInfo); _tags.Add (tag); start.AddStartTag (tag); end.AddEndTag (tag); } /// /// Traverse the graph starting from SrcStartState, cloning each state as we go along, /// cloning each transition except ones originating from SrcEndState, and return /// the cloned state corresponding to SrcEndState. /// /// /// /// ///internal State CloneSubGraph (State srcFromState, State srcEndState, State destFromState) { Dictionary SrcToDestHash = new Dictionary (); // Hash source state to destination state Stack CloneStack = new Stack (); // States to process Dictionary tags = new Dictionary (); // Add initial state to CloneStack and SrcToDestHash. SrcToDestHash.Add (srcFromState, destFromState); CloneStack.Push (srcFromState); // While there are still states on the CloneStack (ToDo collection) while (CloneStack.Count > 0) { srcFromState = CloneStack.Pop (); destFromState = SrcToDestHash [srcFromState]; System.Diagnostics.Debug.Assert (destFromState != null); // For each transition from srcFromState (except SrcEndState) foreach (Arc arc in srcFromState.OutArcs) { // - Lookup the DestToState corresponding to SrcToState State srcToState = arc.End; State destToState = null; if (srcToState != null) { // - If not found, clone a new DestToState, add SrcToState.DestToState to SrcToDestHash, and add SrcToState to CloneStack. if (!SrcToDestHash.ContainsKey (srcToState)) { destToState = CreateNewState (srcToState.Rule); SrcToDestHash.Add (srcToState, destToState); CloneStack.Push (srcToState); } else { destToState = SrcToDestHash [srcToState]; ; } } // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState // -- Clone Arc Arc newArc = new Arc (arc, destFromState, destToState); AddArc (newArc); // -- Clone SemanticTag newArc.CloneTags (arc, _tags, tags, null); // -- Add Arc newArc.ConnectStates (); } } System.Diagnostics.Debug.Assert (tags.Count == 0); return SrcToDestHash [srcEndState]; } /// /// Traverse the graph starting from SrcStartState, cloning each state as we go along, /// cloning each transition except ones originating from SrcEndState, and return /// the cloned state corresponding to SrcEndState. /// /// /// /// /// /// ///internal void CloneSubGraph (Rule rule, Backend org, Backend extra, Dictionary srcToDestHash, bool fromOrg) { Backend beSrc = fromOrg ? org : extra; List CloneStack = new List (); // States to process Dictionary tags = new Dictionary (); // Push all the state for the top level rule CloneState (rule._firstState, CloneStack, srcToDestHash); // While there are still states on the CloneStack (ToDo collection) while (CloneStack.Count > 0) { State srcFromState = CloneStack [0]; CloneStack.RemoveAt (0); State destFromState = srcToDestHash [srcFromState]; // For each transition from srcFromState (except SrcEndState) foreach (Arc arc in srcFromState.OutArcs) { // - Lookup the DestToState corresponding to SrcToState State srcToState = arc.End; State destToState = null; if (srcToState != null) { if (!srcToDestHash.ContainsKey (srcToState)) { // - If not found, then it is a new rule, just clown it. CloneState (srcToState, CloneStack, srcToDestHash); } destToState = srcToDestHash [srcToState]; } // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState // -- Clone Arc int newWordId = arc.WordId; if (beSrc != null && arc.WordId > 0) { _words.Add (beSrc.Words [arc.WordId], out newWordId); } Arc newArc = new Arc (arc, destFromState, destToState, newWordId); // -- Clone SemanticTag newArc.CloneTags (arc, _tags, tags, this); // For rule ref push the first state of the ruleref if (arc.RuleRef != null) { string ruleName; // Check for DYNAMIC grammars if (arc.RuleRef.Name.IndexOf ("URL:DYNAMIC#", StringComparison.Ordinal) == 0) { ruleName = arc.RuleRef.Name.Substring (12); if (fromOrg == true && FindInRules (ruleName) == null) { Rule ruleExtra = extra.FindInRules (ruleName); if (ruleExtra == null) { XmlParser.ThrowSrgsException (SRID.DynamicRuleNotFound, ruleName); } CloneSubGraph (ruleExtra, org, extra, srcToDestHash, false); } } else if (arc.RuleRef.Name.IndexOf ("URL:STATIC#", StringComparison.Ordinal) == 0) { ruleName = arc.RuleRef.Name.Substring (11); if (fromOrg == false && FindInRules (ruleName) == null) { Rule ruleOrg = org.FindInRules (ruleName); if (ruleOrg == null) { XmlParser.ThrowSrgsException (SRID.DynamicRuleNotFound, ruleName); } CloneSubGraph (ruleOrg, org, extra, srcToDestHash, true); } } else { ruleName = arc.RuleRef.Name; Rule ruleExtra = org.FindInRules (ruleName); if (fromOrg == false) { CloneSubGraph (arc.RuleRef, org, extra, srcToDestHash, true); } } Rule refRule = FindInRules (ruleName); if (refRule == null) { refRule = CloneState (arc.RuleRef._firstState, CloneStack, srcToDestHash); } newArc.RuleRef = refRule; } // -- Add Arc newArc.ConnectStates (); } } System.Diagnostics.Debug.Assert (tags.Count == 0); } /// /// Delete disconnected subgraph starting at hState. /// Traverse the graph starting from SrcStartState, and delete each state as we go along. /// /// internal void DeleteSubGraph (State state) { // Add initial state to DeleteStack. StackstateToProcess = new Stack (); // States to delete Collection arcsToDelete = new Collection (); Collection statesToDelete = new Collection (); stateToProcess.Push (state); // While there are still states on the listDelete (ToDo collection) while (stateToProcess.Count > 0) { // For each transition from state, state = stateToProcess.Pop (); statesToDelete.Add (state); arcsToDelete.Clear (); // Accumulate the arcs to delete and add new states to the stack of states to process foreach (Arc arc in state.OutArcs) { // Add EndState to listDelete, if unique State endState = arc.End; // Add this state to the list of states to delete if (endState != null && !stateToProcess.Contains (endState) && !statesToDelete.Contains (endState)) { stateToProcess.Push (endState); } arcsToDelete.Add (arc); } // Clear up the arcs foreach (Arc arc in arcsToDelete) { arc.Start = arc.End = null; } } foreach (State stateToDelete in statesToDelete) { // Delete state and remove from listDelete System.Diagnostics.Debug.Assert (stateToDelete != null); System.Diagnostics.Debug.Assert (stateToDelete.InArcs.IsEmpty); System.Diagnostics.Debug.Assert (stateToDelete.OutArcs.IsEmpty); DeleteState (stateToDelete); } } /// /// Modify the placeholder rule attributes after it has been created. /// This is only safe to use in the context of SRGSGrammarCompiler. /// /// /// ///internal void SetRuleAttributes (Rule rule, SPCFGRULEATTRIBUTES dwAttributes) { // Check if this is the Root rule if ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0) { if (_rootRule != null) { //We already have a root rule, return error code. XmlParser.ThrowSrgsException (SRID.RootRuleAlreadyDefined); } else { _rootRule = rule; } } rule._cfgRule.TopLevel = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0); rule._cfgRule.DefaultActive = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0); rule._cfgRule.PropRule = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Interpreter) != 0); rule._cfgRule.Export = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0); rule._cfgRule.Dynamic = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Dynamic) != 0); rule._cfgRule.Import = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0); } /// /// Set the path from which relative grammar imports are calculated. As specified by xml:base / meta base /// Null or empty string will clear any existing base path. /// /// internal void SetBasePath (string sBasePath) { if (!string.IsNullOrEmpty (sBasePath)) { // Validate base path. Uri uri = new Uri (sBasePath, UriKind.RelativeOrAbsolute); //Url Canonicalized _basePath = uri.ToString (); } else { _basePath = null; } } ////// Perform white space normalization in place. /// - Trim leading/trailing white spaces. /// - Collapse white space sequences to a single ' '. /// /// ///internal static string NormalizeTokenWhiteSpace (string sToken) { System.Diagnostics.Debug.Assert (!string.IsNullOrEmpty (sToken)); // Trim leading and ending white spaces sToken = sToken.Trim (Helpers._achTrimChars); // Easy out if there are no consecutive double white spaces if (sToken.IndexOf (" ", StringComparison.Ordinal) == -1) { return sToken; } // Normalize internal spaces char [] achSrc = sToken.ToCharArray (); int iDest = 0; for (int i = 0; i < achSrc.Length; ) { // Collapsed multiple white spaces into ' ' if (achSrc [i] == ' ') { do { i++; } while (achSrc [i] == ' '); achSrc [iDest++] = ' '; continue; } // Copy the non-white space character achSrc [iDest++] = achSrc [i++]; } return new string (achSrc, 0, iDest); } #endregion //******************************************************************** // // Internal Property // //******************************************************************** #region Internal Property internal StringBlob Words { get { return this._words; } } internal StringBlob Symbols { get { return this._symbols; } } #endregion //******************************************************************* // // Private Methods // //******************************************************************** #region Private Methods /// /// Description: /// Load compiled grammar data. This overwrites any existing data in the grammar /// We end up with containers of words, symbols, rules, arcs, states and state handles, etc. /// /// #if true internal void InitFromBinaryGrammar (StreamMarshaler streamHelper) { //CfgGrammar.TraceInformation ("BackEnd::InitFromBinaryGrammar"); CfgGrammar.CfgHeader header = CfgGrammar.ConvertCfgHeader (streamHelper); _words = header.pszWords; _symbols = header.pszSymbols; _grammarOptions = header.GrammarOptions; // // Build up the internal representation // State [] apStateTable = new State [header.arcs.Length]; SortedDictionaryruleFirstArcs = new SortedDictionary (); // // Initialize the rules // int previousCfgLastRules = _rules.Count; BuildRulesFromBinaryGrammar (header, apStateTable, ruleFirstArcs, previousCfgLastRules); // // Initialize the arcs // Arc [] apArcTable = new Arc [header.arcs.Length]; bool fLastArcNull = true; CfgArc pLastArc = new CfgArc (); State currentState = null; IEnumerator > ieFirstArcs = ruleFirstArcs.GetEnumerator (); // If no rules, then we have no arcs if (ieFirstArcs.MoveNext ()) { KeyValuePair kvFirstArc = ieFirstArcs.Current; Rule ruleCur = kvFirstArc.Value; // We repersist the static AND dynamic parts for now. This allows the grammar to be queried // with the automation interfaces for (int k = 1; k < header.arcs.Length; k++) { CfgArc arc = header.arcs [k]; // Reset the Transition index based on the compbined stringblobs if (arc.RuleRef) { // for a ruleref offset the rule index ruleCur._listRules.Add (_rules [(int) arc.TransitionIndex]); } if (kvFirstArc.Key == k) { // we are entering a new rule now ruleCur = kvFirstArc.Value; // Reset to zero once we have read the last rule. if (ieFirstArcs.MoveNext ()) { kvFirstArc = ieFirstArcs.Current; } } // new currentState? if (fLastArcNull || pLastArc.LastArc) { if (apStateTable [k] == null) { uint hNewState = CfgGrammar.NextHandle; apStateTable [k] = new State (ruleCur, hNewState, k); AddState (apStateTable [k]); } currentState = apStateTable [k]; } // // now get the arc // int iNextArc = (int) (arc.NextStartArcIndex); Arc newArc; State targetState = null; if (currentState != null && iNextArc != 0) { if (apStateTable [iNextArc] == null) { uint hNewState = CfgGrammar.NextHandle; apStateTable [iNextArc] = new State (ruleCur, hNewState, iNextArc); AddState (apStateTable [iNextArc]); } targetState = apStateTable [iNextArc]; } float flWeight = header.weights != null ? header.weights [k] : CfgGrammar.DEFAULT_WEIGHT; // determine properties of the arc now ... if (arc.RuleRef) { Rule ruleToTransitionTo = _rules [(int) arc.TransitionIndex]; newArc = new Arc (null, ruleToTransitionTo, _words, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE, null, MatchMode.AllWords, ref _fNeedWeightTable); } else { int transitionIndex = (int) arc.TransitionIndex; int ulSpecialTransitionIndex = (int) ((transitionIndex == CfgGrammar.SPWILDCARDTRANSITION || transitionIndex == CfgGrammar.SPDICTATIONTRANSITION || transitionIndex == CfgGrammar.SPTEXTBUFFERTRANSITION) ? transitionIndex : 0); newArc = new Arc ((ulSpecialTransitionIndex != 0) ? 0 : (int) arc.TransitionIndex, flWeight, arc.LowConfRequired ? CfgGrammar.SP_LOW_CONFIDENCE : arc.HighConfRequired ? CfgGrammar.SP_HIGH_CONFIDENCE : CfgGrammar.SP_NORMAL_CONFIDENCE, ulSpecialTransitionIndex, MatchMode.AllWords, ref _fNeedWeightTable); } newArc.Start = currentState; newArc.End = targetState; AddArc (newArc); apArcTable [k] = newArc; fLastArcNull = false; pLastArc = arc; } } // Initialize the Semantics tags for (int k = 1, iCurTag = 0; k < header.arcs.Length; k++) { CfgArc arc = header.arcs [k]; if (arc.HasSemanticTag) { System.Diagnostics.Debug.Assert (header.tags [iCurTag].StartArcIndex == k); while (iCurTag < header.tags.Length && header.tags [iCurTag].StartArcIndex == k) { // we should already point to the tag CfgSemanticTag semTag = header.tags [iCurTag]; Tag tag = new Tag (this, semTag); _tags.Add (tag); apArcTable [tag._cfgTag.StartArcIndex].AddStartTag (tag); apArcTable [tag._cfgTag.EndArcIndex].AddEndTag (tag); // If we have ms-properties than _nameOffset != overwise it is w3c tags. if (semTag._nameOffset > 0) { tag._cfgTag._nameOffset = _symbols.OffsetFromId (_symbols.Find (_symbols.FromOffset (semTag._nameOffset))); } else { // The offset of the JScrip expression is stored in the value field. tag._cfgTag._valueOffset = _symbols.OffsetFromId (_symbols.Find (_symbols.FromOffset (semTag._valueOffset))); } iCurTag++; } } } // _fNeedWeightTable = true; if (header.BasePath != null) { SetBasePath (header.BasePath); } _guid = header.GrammarGUID; _langId = header.langId; _grammarMode = header.GrammarMode; _fLoadedFromBinary = true; // Save Last ArcIndex #if VSCOMPILE && DEBUG DumpGrammarStatistics ("InitFromBinaryGrammar"); #endif } private Arc CreateTransition (string sWord, float flWeight, int requiredConfidence) { // epsilon transition for empty words return AddSingleWordTransition (!string.IsNullOrEmpty (sWord) ? sWord : null, flWeight, requiredConfidence); } /// /// /// /// /// /// /// /// ///private CfgGrammar.CfgSerializedHeader BuildHeader (List sortedStates, int cBasePath, UInt16 iSemanticGlobals, out int cArcs, out float [] pWeights) { cArcs = 1; // Start with offset one! (0 indicates dead state). pWeights = null; int cSemanticTags = 0; int cLargest = 0; foreach (State state in sortedStates) { // For new states SerializeId is INFINITE so we set it correctly here. // For existing states we preserve the index from loading, // unless new states have been added in. state.SerializeId = cArcs; int thisState = state.NumArcs; #if DEBUG if (thisState == 0 && state.InArcs.IsEmpty && state.Rule._cStates > 1) { XmlParser.ThrowSrgsException (SRID.StateWithNoArcs); } #endif cArcs += thisState; if (cLargest < thisState) { cLargest = thisState; } cSemanticTags += state.NumSemanticTags; } CfgGrammar.CfgSerializedHeader header = new CfgGrammar.CfgSerializedHeader (); uint ulOffset = (uint) Marshal.SizeOf (typeof (CfgGrammar.CfgSerializedHeader)); header.FormatId = CfgGrammar._SPGDF_ContextFree; _guid = Guid.NewGuid (); header.GrammarGUID = _guid; header.LangID = (UInt16) _langId; header.pszSemanticInterpretationGlobals = iSemanticGlobals; header.cArcsInLargestState = cLargest; header.cchWords = _words.StringSize (); header.cWords = _words.Count; // For compat with SAPI 5.x add one to cWords if there's more than one word. // The CFGEngine code assumes cWords includes the initial empty-string word. // See PS 11491 and 61982. if (header.cWords > 0) { header.cWords++; } header.pszWords = ulOffset; ulOffset += (uint) _words.SerializeSize () * Helpers._sizeOfChar; header.cchSymbols = _symbols.StringSize (); header.pszSymbols = ulOffset; ulOffset += (uint) _symbols.SerializeSize () * Helpers._sizeOfChar; header.cRules = _rules.Count; header.pRules = ulOffset; ulOffset += (uint) (_rules.Count * Marshal.SizeOf (typeof (CfgRule))); header.cBasePath = cBasePath > 0 ? ulOffset : 0; //If there is no base path offset is set to zero ulOffset += (uint) (((int) cBasePath * Helpers._sizeOfChar + 3) & ~3); header.cArcs = cArcs; header.pArcs = ulOffset; ulOffset += (uint) (cArcs * Marshal.SizeOf (typeof (CfgArc))); if (_fNeedWeightTable) { header.pWeights = ulOffset; ulOffset += (uint) (cArcs * Marshal.SizeOf (typeof (float))); pWeights = new float [cArcs]; pWeights [0] = 0.0f; } else { header.pWeights = 0; ulOffset += 0; } if (_rootRule != null) { //We have a root rule header.ulRootRuleIndex = (uint) _rootRule._iSerialize; } else { //-1 means there is no root rule header.ulRootRuleIndex = 0xFFFFFFFF; } header.GrammarOptions = _grammarOptions | ((_alphabet == AlphabetType.Sapi) ? 0 : GrammarOptions.IpaPhoneme); #if !NO_STG header.GrammarOptions |= _scriptRefs.Count > 0 ? GrammarOptions.STG | GrammarOptions.KeyValuePairSrgs : 0; #endif header.GrammarMode = (uint) _grammarMode; header.cTags = cSemanticTags; header.tags = ulOffset; ulOffset += (uint) (cSemanticTags * Marshal.SizeOf (typeof (CfgSemanticTag))); #if !NO_STG header.cScripts = _scriptRefs.Count; header.pScripts = header.cScripts > 0 ? ulOffset : 0; ulOffset += (uint) (_scriptRefs.Count * Marshal.SizeOf (typeof (CfgScriptRef))); header.cIL = _il != null ? _il.Length : 0; header.pIL = header.cIL > 0 ? ulOffset : 0; ulOffset += (uint) (header.cIL * Marshal.SizeOf (typeof (byte))); header.cPDB = _pdb != null ? _pdb.Length : 0; header.pPDB = header.cPDB > 0 ? ulOffset : 0; ulOffset += (uint) (header.cPDB * Marshal.SizeOf (typeof (byte))); #endif header.ulTotalSerializedSize = ulOffset; return header; } private CfgGrammar.CfgHeader BuildRulesFromBinaryGrammar (CfgGrammar.CfgHeader header, State [] apStateTable, SortedDictionary ruleFirstArcs, int previousCfgLastRules) { for (int i = 0; i < header.rules.Length; i++) { // Check if the rule does not exist already CfgRule cfgRule = header.rules [i]; int firstArc = (int) cfgRule.FirstArcIndex; cfgRule._nameOffset = _symbols.OffsetFromId (_symbols.Find (header.pszSymbols.FromOffset (cfgRule._nameOffset))); Rule rule = new Rule (this, _symbols.FromOffset (cfgRule._nameOffset), cfgRule, i + previousCfgLastRules, _grammarOptions & GrammarOptions.TagFormat, ref _cImportedRules); rule._firstState = _states.CreateNewState (rule); _rules.Add (rule); // Add the rule to the list of firstArc/rule if (firstArc > 0) { ruleFirstArcs.Add ((int) cfgRule.FirstArcIndex, rule); } rule._fStaticRule = (cfgRule.Dynamic) ? false : true; rule._cfgRule.DirtyRule = false; // by default loaded static rules have an exist rule._fHasExitPath = (rule._fStaticRule) ? true : false; // or they wouldn't be there in the first place if (firstArc != 0) { System.Diagnostics.Debug.Assert (apStateTable [firstArc] == null); rule._firstState.SerializeId = (int) cfgRule.FirstArcIndex; apStateTable [firstArc] = rule._firstState; } if (rule._cfgRule.HasResources) { throw new NotImplementedException (); } if (header.ulRootRuleIndex == i) { _rootRule = rule; } // Add rule to RuleListByName and RuleListByID hash tables. if (rule._cfgRule._nameOffset != 0) { // Look for the rule in the original CFG and map it in the combined string blobs _nameOffsetRules.Add (rule._cfgRule._nameOffset, rule); } } return header; } #endif private Rule CloneState (State srcToState, List CloneStack, Dictionary srcToDestHash) { bool newRule = false; int posDynamic = srcToState.Rule.Name.IndexOf ("URL:DYNAMIC#", StringComparison.Ordinal); string ruleName = posDynamic != 0 ? srcToState.Rule.Name : srcToState.Rule.Name.Substring (12); Rule dstRule = FindInRules (ruleName); // Clone this rule into this GrammarBuilder if it does not exist yet if (dstRule == null) { dstRule = srcToState.Rule.Clone (_symbols, ruleName); _rules.Add (dstRule); newRule = true; } // Should not exist yet System.Diagnostics.Debug.Assert (!srcToDestHash.ContainsKey (srcToState)); // push all the states for that rule State newState = CreateNewState (dstRule); srcToDestHash.Add (srcToState, newState); CloneStack.Add (srcToState); if (newRule) { dstRule._firstState = newState; } return dstRule; } private Rule FindInRules (string ruleName) { foreach (Rule rule in _rules) { if (rule.Name == ruleName) { return rule; } } return null; } private static void LogError (string rule, SRID srid, params object [] args) { string sError = SR.Get (srid, args); throw new FormatException (string.Format (CultureInfo.InvariantCulture, "Rule=\"{0}\" - ", rule) + sError); } /// /// Connect arc to the state graph. /// /// #if DEBUG private #else private static #endif void AddArc (Arc arc) { #if DEBUG arc.Backend = this; #endif } ////// /// ///private void ValidateAndTagRules () { //CfgGrammar.TraceInformation ("BackEnd::ValidateAndTagRules"); // bool fAtLeastOneRule = false; int ulIndex = 0; foreach (Rule rule in _rules) { // set _fHasExitPath = true for empty dynamic grammars and imported rules // Clear this for the next loop through the rules.... rule._fHasExitPath |= (rule._cfgRule.Dynamic | rule._cfgRule.Import) ? true : false; rule._iSerialize = ulIndex++; fAtLeastOneRule |= (rule._cfgRule.Dynamic || rule._cfgRule.TopLevel || rule._cfgRule.Export); rule.Validate (); } #if DEBUG // // Now make sure that all rules have an exit path. // foreach (Rule rule in _rules) { _ulRecursiveDepth = 0; //The following function will use recursive function that might change _ulRecursiveDepth rule.CheckForExitPath (ref _ulRecursiveDepth); } #endif // // Check each exported rule if it has a dynamic rule in its "scope" // foreach (Rule rule in _rules) { if (rule._cfgRule.Dynamic) { rule._cfgRule.HasDynamicRef = true; _ulRecursiveDepth = 0; rule.PopulateDynamicRef (ref _ulRecursiveDepth); } } } private void CheckLeftRecursion (List states) { bool fReachedEndState; foreach (State state in states) { state.CheckLeftRecursion (out fReachedEndState); } } /// /// /// /// /// /// private Arc AddSingleWordTransition (string s, float flWeight, int requiredConfidence) { //CfgGrammar.TraceInformation ("BackEnd::CGramComp::AddSingleWordTransition"); Arc arc = new Arc (s, null, _words, flWeight, requiredConfidence, null, MatchMode.AllWords, ref _fNeedWeightTable); AddArc (arc); return arc; } internal void AddState (State state) { _states.Add (state); } #if VSCOMPILE && DEBUG /// internal void DumpStateTable (string sTitle) { Liststates = new List (_states); DumpStateTable (sTitle, states.ToArray ()); } internal void DumpStateTable (string sTitle, State [] states) { CfgGrammar.TraceInformation2 (sTitle); int iHandle2 = 0; for (int iState = 0; iState < states.Length; iState++) { State state = states [iState]; if (state == null) { continue; } //string s = state.OutArcs.Count > 0 ? state.OutArcs.First.WordId > 0 ? _words [state.OutArcs.First.WordId] : string.Empty : string.Empty; string s = _symbols.FromOffset (state.Rule._cfgRule._nameOffset); CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "{0}: {1} {2} {3} '", iHandle2, state.Id, state.SerializeId, s)); foreach (Arc arc2 in state.OutArcs) { int iWord = arc2.WordId; string sWord = arc2.RuleRef != null ? _symbols.FromOffset (arc2.RuleRef._cfgRule._nameOffset) : iWord > 0 ? _words [iWord] : string.Empty; //CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "{0}{1} [{2}]", (fFirst ? string.Empty : ", "), sWord, arc2.Id)); //if (arc2._tag != null) //{ // CfgGrammar.TraceInformation3 (string.Format (CultureInfo.InvariantCulture, "({0}, {1}, {2})", _symbols.FromOffset (arc2._tag._cfgTag._valueOffset), arc2._tag._startArc.Id, arc2._tag._endArc.Id)); //} } CfgGrammar.TraceInformation2 ("'"); } } internal void DumpStateMachine (TextWriter tw) { tw.WriteLine ("Word Blobs"); for (int i = 1; i < _words.Count; i++) { tw.WriteLine (" \"" + _words [i] + "\""); } tw.WriteLine ("\nSymbols Blobs"); for (int i = 1; i < _symbols.Count; i++) { tw.WriteLine (" \"" + _symbols [i] + "\""); } tw.WriteLine ("\nRules"); foreach (Rule rule in _rules) { tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " \"{0}\" id: {1} flags: {2}", rule.Name, rule._cfgRule._id, rule._cfgRule._flag.ToString ("x", CultureInfo.InvariantCulture))); tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " States: {0}", rule._cStates)); } tw.WriteLine (string.Format (CultureInfo.InvariantCulture, "\nStates:\n Count: {0}", _states.Count)); foreach (State state in _states) { tw.WriteLine (string.Format (CultureInfo.InvariantCulture, " \"{0}\"", state.Rule.Name)); } } //GrammarOptimization /// /// Dump grammar statistics to debug window. (Debug only) /// /// void DumpGrammarStatistics (string title) { int iNumStates = 0; int iNumArcs = 0; int iNumEpsilonArcs = 0; int iMaxBranch = 0; int iNumProperties = 0; foreach (State state in _states) { iNumStates++; int cArcs = 0; foreach (Arc arc in state.OutArcs) { cArcs++; if (arc.IsEpsilonTransition) { iNumEpsilonArcs++; } //if (arc._tag != null) //{ // iNumProperties++; //} } if (iMaxBranch < cArcs) iMaxBranch = cArcs; iNumArcs += cArcs; } Console.WriteLine (title); Console.WriteLine (string.Format (CultureInfo.InvariantCulture, "{0}\n NumStates = {0}\n NumArcs = {1}\n NumEpsilons = {2}\n", iNumStates, iNumArcs, iNumEpsilonArcs)); Console.WriteLine (string.Format (CultureInfo.InvariantCulture, " MaxBranch = {0}\n" + " NumProperties = {1}\n", iMaxBranch, iNumProperties)); } #endif #endregion //******************************************************************* // // Internal Properties // //******************************************************************* #region Internal Properties internal int LangId { get { return _langId; } set { _langId = value; } } internal GrammarOptions GrammarOptions { get { return _grammarOptions; } set { _grammarOptions = value; } } internal GrammarType GrammarMode { set { _grammarMode = value; } } internal AlphabetType Alphabet { set { _alphabet = value; } get { return _alphabet; } } internal CollectionGlobalTags { get { return _globalTags; } set { _globalTags = value; } } #if !NO_STG internal Collection ScriptRefs { set { _scriptRefs = value; } } internal byte [] IL { set { _il = value; } } internal byte [] PDB { set { _pdb = value; } } #endif #endregion //******************************************************************* // // Private Fields // //******************************************************************** #region Private Fields private int _langId = CultureInfo.CurrentUICulture.LCID; private StringBlob _words; private StringBlob _symbols; //private int _cResources; private Guid _guid; private bool _fNeedWeightTable; private Graph _states = new Graph (); private List _rules = new List (); private int _ruleIndex; private Dictionary _nameOffsetRules = new Dictionary (); private Rule _rootRule; #if !SPEECHSERVER private GrammarOptions _grammarOptions = GrammarOptions.KeyValuePairs; #else private GrammarOptions _grammarOptions = GrammarOptions.W3cV1; #endif // It is used sequentially. So there is no thread issue private int _ulRecursiveDepth; // Path from which relative grammar imports are calculated. As specified by xml:base private string _basePath; // Collection of all SemanticTags in the grammar (sorted by StartArc) private List _tags = new List (); // Voice or DTMF private GrammarType _grammarMode = GrammarType.VoiceGrammar; // Pron information is either IPA or SAPI private AlphabetType _alphabet = AlphabetType.Sapi; // Global value for the semantic interpretation tags private Collection _globalTags = new Collection (); // private static byte [] _abZero3 = new byte [] { 0, 0, 0 }; private static char [] _achZero = new char [] { '\0' }; // private const uint SPGF_RESET_DIRTY_FLAG = 0x80000000; private int _cImportedRules; #if !NO_STG // List of cd /reference Rule->rule 'on'method-> .Net method private Collection _scriptRefs = new Collection (); // Grammar code assembly private byte [] _il; // Grammar debug symbols private byte [] _pdb; #endif private bool _fLoadedFromBinary; #endregion } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- GeometryModel3D.cs
- AttachmentService.cs
- XmlSchemaSearchPattern.cs
- MsmqReceiveHelper.cs
- DecimalConverter.cs
- BuilderPropertyEntry.cs
- CodeDomConfigurationHandler.cs
- SoapSchemaImporter.cs
- DocumentEventArgs.cs
- WindowsFormsSectionHandler.cs
- DoubleConverter.cs
- CommandField.cs
- InternalControlCollection.cs
- SystemResources.cs
- Site.cs
- ArithmeticException.cs
- ActivityExecutorOperation.cs
- Trace.cs
- SplitContainer.cs
- ControllableStoryboardAction.cs
- DataGridViewColumnDesignTimeVisibleAttribute.cs
- XmlReader.cs
- StylusPointDescription.cs
- XmlQueryContext.cs
- TextWriterTraceListener.cs
- Accessible.cs
- FormsAuthenticationCredentials.cs
- WindowShowOrOpenTracker.cs
- QilIterator.cs
- Line.cs
- XmlObjectSerializer.cs
- UDPClient.cs
- StorageTypeMapping.cs
- CaseInsensitiveHashCodeProvider.cs
- PagesSection.cs
- SafeFileMappingHandle.cs
- ObjectSpanRewriter.cs
- JoinCqlBlock.cs
- DataGridBoundColumn.cs
- TableLayoutColumnStyleCollection.cs
- DataSourceIDConverter.cs
- SQLSingleStorage.cs
- CryptoKeySecurity.cs
- IntPtr.cs
- LocationSectionRecord.cs
- SchemaTableOptionalColumn.cs
- BoolExpression.cs
- MenuItem.cs
- DropShadowBitmapEffect.cs
- DbConnectionOptions.cs
- GeometryDrawing.cs
- FakeModelItemImpl.cs
- Deflater.cs
- HyperLinkDataBindingHandler.cs
- UnsafeNativeMethods.cs
- TransactionContextValidator.cs
- MulticastDelegate.cs
- SafeRightsManagementSessionHandle.cs
- ToolStripItemClickedEventArgs.cs
- WorkflowEventArgs.cs
- HeaderUtility.cs
- BinaryParser.cs
- EditorPartCollection.cs
- MdImport.cs
- WebPartDisplayModeCollection.cs
- XmlSchemaRedefine.cs
- GraphicsState.cs
- EdmEntityTypeAttribute.cs
- URLMembershipCondition.cs
- CacheModeValueSerializer.cs
- SizeValueSerializer.cs
- BamlTreeNode.cs
- ITextView.cs
- WorkItem.cs
- DependsOnAttribute.cs
- CheckBoxDesigner.cs
- Viewport3DAutomationPeer.cs
- ProgressBarRenderer.cs
- metadatamappinghashervisitor.cs
- Point3DAnimation.cs
- PageFunction.cs
- SBCSCodePageEncoding.cs
- DataGridViewLinkColumn.cs
- CodeEntryPointMethod.cs
- FamilyCollection.cs
- PasswordTextContainer.cs
- TableStyle.cs
- EdmItemCollection.OcAssemblyCache.cs
- SimpleTextLine.cs
- ParameterToken.cs
- TextComposition.cs
- EncodingDataItem.cs
- CodeDirectiveCollection.cs
- NumericExpr.cs
- StyleCollection.cs
- DesignerView.Commands.cs
- DataRowChangeEvent.cs
- RijndaelCryptoServiceProvider.cs
- DataGridViewLayoutData.cs
- PropertyMapper.cs