Code:
/ 4.0 / 4.0 / untmp / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / fx / src / Regex / System / Text / RegularExpressions / RegexRunner.cs / 1305376 / RegexRunner.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- // This RegexRunner class is a base class for compiled regex code. // Implementation notes: // // RegexRunner provides a common calling convention and a common // runtime environment for the interpreter and the compiled code. // // It provides the driver code that call's the subclass's Go() // method for either scanning or direct execution. // // It also maintains memory allocation for the backtracking stack, // the grouping stack and the longjump crawlstack, and provides // methods to push new subpattern match results into (or remove // backtracked results from) the Match instance. namespace System.Text.RegularExpressions { using System.Collections; using System.Diagnostics; using System.ComponentModel; using System.Globalization; ///// #if !SILVERLIGHT [ EditorBrowsable(EditorBrowsableState.Never) ] #endif #if !SILVERLIGHT abstract public class RegexRunner { #else abstract internal class RegexRunner { #endif protected internal int runtextbeg; // beginning of text to search protected internal int runtextend; // end of text to search protected internal int runtextstart; // starting point for search protected internal String runtext; // text to search protected internal int runtextpos; // current position in text protected internal int [] runtrack; // The backtracking stack. Opcodes use this to store data regarding protected internal int runtrackpos; // what they have matched and where to backtrack to. Each "frame" on // the stack takes the form of [CodePosition Data1 Data2...], where // CodePosition is the position of the current opcode and // the data values are all optional. The CodePosition can be negative, and // these values (also called "back2") are used by the BranchMark family of opcodes // to indicate whether they are backtracking after a successful or failed // match. // When we backtrack, we pop the CodePosition off the stack, set the current // instruction pointer to that code position, and mark the opcode // with a backtracking flag ("Back"). Each opcode then knows how to // handle its own data. protected internal int [] runstack; // This stack is used to track text positions across different opcodes. protected internal int runstackpos; // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark // pair. SetMark records the text position before we match a*b. Then // CaptureMark uses that position to figure out where the capture starts. // Opcodes which push onto this stack are always paired with other opcodes // which will pop the value from it later. A successful match should mean // that this stack is empty. protected internal int [] runcrawl; // The crawl stack is used to keep track of captures. Every time a group protected internal int runcrawlpos; // has a capture, we push its group number onto the runcrawl stack. In // the case of a balanced match, we push BOTH groups onto the stack. protected internal int runtrackcount; // count of states that may do backtracking protected internal Match runmatch; // result object protected internal Regex runregex; // regex object protected internal RegexRunner() {} /* * Scans the string to find the first match. Uses the Match object * both to feed text in and as a place to store matches that come out. * * All the action is in the abstract Go() method defined by subclasses. Our * responsibility is to load up the class members (as done here) before * calling Go. * * < */ protected internal Match Scan(Regex regex, String text, int textbeg, int textend, int textstart, int prevlen, bool quick) { int bump; int stoppos; bool initted = false; runregex = regex; runtext = text; runtextbeg = textbeg; runtextend = textend; runtextstart = textstart; bump = runregex.RightToLeft ? -1 : 1; stoppos = runregex.RightToLeft ? runtextbeg : runtextend; runtextpos = textstart; // If previous match was empty or failed, advance by one before matching if (prevlen == 0) { if (runtextpos == stoppos) return Match.Empty; runtextpos += bump; } for (;;) { #if DBG if (runregex.Debug) { Debug.WriteLine(""); Debug.WriteLine("Search range: from " + runtextbeg.ToString(CultureInfo.InvariantCulture) + " to " + runtextend.ToString(CultureInfo.InvariantCulture)); Debug.WriteLine("Firstchar search starting at " + runtextpos.ToString(CultureInfo.InvariantCulture) + " stopping at " + stoppos.ToString(CultureInfo.InvariantCulture)); } #endif if (FindFirstChar()) { if (!initted) { InitMatch(); initted = true; } #if DBG if (runregex.Debug) { Debug.WriteLine("Executing engine starting at " + runtextpos.ToString(CultureInfo.InvariantCulture)); Debug.WriteLine(""); } #endif Go(); if (runmatch._matchcount[0] > 0) { // < return TidyMatch(quick); } // reset state for another go runtrackpos = runtrack.Length; runstackpos = runstack.Length; runcrawlpos = runcrawl.Length; } // failure! if (runtextpos == stoppos) { TidyMatch(true); return Match.Empty; } // < // Bump by one and start again runtextpos += bump; } } /* * The responsibility of Go() is to run the regular expression at * runtextpos and call Capture() on all the captured subexpressions, * then to leave runtextpos at the ending position. It should leave * runtextpos where it started if there was no match. */ protected abstract void Go(); /* * The responsibility of FindFirstChar() is to advance runtextpos * until it is at the next position which is a candidate for the * beginning of a successful match. */ protected abstract bool FindFirstChar(); /* * InitTrackCount must initialize the runtrackcount field; this is * used to know how large the initial runtrack and runstack arrays * must be. */ protected abstract void InitTrackCount(); /* * Initializes all the data members that are used by Go() */ private void InitMatch() { // Use a hashtable'ed Match object if the capture numbers are sparse if (runmatch == null) { if (runregex.caps != null) runmatch = new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext, runtextbeg, runtextend - runtextbeg, runtextstart); else runmatch = new Match(runregex, runregex.capsize, runtext, runtextbeg, runtextend - runtextbeg, runtextstart); } else { runmatch.Reset(runregex, runtext, runtextbeg, runtextend, runtextstart); } // note we test runcrawl, because it is the last one to be allocated // If there is an alloc failure in the middle of the three allocations, // we may still return to reuse this instance, and we want to behave // as if the allocations didn't occur. (we used to test _trackcount != 0) if (runcrawl != null) { runtrackpos = runtrack.Length; runstackpos = runstack.Length; runcrawlpos = runcrawl.Length; return; } InitTrackCount(); int tracksize = runtrackcount * 8; int stacksize = runtrackcount * 8; if (tracksize < 32) tracksize = 32; if (stacksize < 16) stacksize = 16; runtrack = new int[tracksize]; runtrackpos = tracksize; runstack = new int[stacksize]; runstackpos = stacksize; runcrawl = new int[32]; runcrawlpos = 32; } /* * Put match in its canonical form before returning it. */ private Match TidyMatch(bool quick) { if (!quick) { Match match = runmatch; runmatch = null; match.Tidy(runtextpos); return match; } else { // in quick mode, a successful match returns null, and // the allocated match object is left in the cache return null; } } /* * Called by the implemenation of Go() to increase the size of storage */ protected void EnsureStorage() { if (runstackpos < runtrackcount * 4) DoubleStack(); if (runtrackpos < runtrackcount * 4) DoubleTrack(); } /* * Called by the implemenation of Go() to decide whether the pos * at the specified index is a boundary or not. It's just not worth * emitting inline code for this logic. */ protected bool IsBoundary(int index, int startpos, int endpos) { return (index > startpos && RegexCharClass.IsWordChar(runtext[index - 1])) != (index < endpos && RegexCharClass.IsWordChar(runtext[index])); } protected bool IsECMABoundary(int index, int startpos, int endpos) { return (index > startpos && RegexCharClass.IsECMAWordChar(runtext[index - 1])) != (index < endpos && RegexCharClass.IsECMAWordChar(runtext[index])); } protected static bool CharInSet(char ch, String set, String category) { string charClass = RegexCharClass.ConvertOldStringsToClass(set, category); return RegexCharClass.CharInClass(ch, charClass); } protected static bool CharInClass(char ch, String charClass) { return RegexCharClass.CharInClass(ch, charClass); } /* * Called by the implemenation of Go() to increase the size of the * backtracking stack. */ protected void DoubleTrack() { int[] newtrack; newtrack = new int[runtrack.Length * 2]; System.Array.Copy(runtrack, 0, newtrack, runtrack.Length, runtrack.Length); runtrackpos += runtrack.Length; runtrack = newtrack; } /* * Called by the implemenation of Go() to increase the size of the * grouping stack. */ protected void DoubleStack() { int[] newstack; newstack = new int[runstack.Length * 2]; System.Array.Copy(runstack, 0, newstack, runstack.Length, runstack.Length); runstackpos += runstack.Length; runstack = newstack; } /* * Increases the size of the longjump unrolling stack. */ protected void DoubleCrawl() { int[] newcrawl; newcrawl = new int[runcrawl.Length * 2]; System.Array.Copy(runcrawl, 0, newcrawl, runcrawl.Length, runcrawl.Length); runcrawlpos += runcrawl.Length; runcrawl = newcrawl; } /* * Save a number on the longjump unrolling stack */ protected void Crawl(int i) { if (runcrawlpos == 0) DoubleCrawl(); runcrawl[--runcrawlpos] = i; } /* * Remove a number from the longjump unrolling stack */ protected int Popcrawl() { return runcrawl[runcrawlpos++]; } /* * Get the height of the stack */ protected int Crawlpos() { return runcrawl.Length - runcrawlpos; } /* * Called by Go() to capture a subexpression. Note that the * capnum used here has already been mapped to a non-sparse * index (by the code generator RegexWriter). */ protected void Capture(int capnum, int start, int end) { if (end < start) { int T; T = end; end = start; start = T; } Crawl(capnum); runmatch.AddMatch(capnum, start, end - start); } /* * Called by Go() to capture a subexpression. Note that the * capnum used here has already been mapped to a non-sparse * index (by the code generator RegexWriter). */ protected void TransferCapture(int capnum, int uncapnum, int start, int end) { int start2; int end2; // these are the two intervals that are cancelling each other if (end < start) { int T; T = end; end = start; start = T; } start2 = MatchIndex(uncapnum); end2 = start2 + MatchLength(uncapnum); // The new capture gets the innermost defined interval if (start >= end2) { end = start; start = end2; } else if (end <= start2) { start = start2; } else { if (end > end2) end = end2; if (start2 > start) start = start2; } Crawl(uncapnum); runmatch.BalanceMatch(uncapnum); if (capnum != -1) { Crawl(capnum); runmatch.AddMatch(capnum, start, end - start); } } /* * Called by Go() to revert the last capture */ protected void Uncapture() { int capnum = Popcrawl(); runmatch.RemoveMatch(capnum); } /* * Call out to runmatch to get around visibility issues */ protected bool IsMatched(int cap) { return runmatch.IsMatched(cap); } /* * Call out to runmatch to get around visibility issues */ protected int MatchIndex(int cap) { return runmatch.MatchIndex(cap); } /* * Call out to runmatch to get around visibility issues */ protected int MatchLength(int cap) { return runmatch.MatchLength(cap); } #if DBG /* * Dump the current state */ internal virtual void DumpState() { Debug.WriteLine("Text: " + TextposDescription()); Debug.WriteLine("Track: " + StackDescription(runtrack, runtrackpos)); Debug.WriteLine("Stack: " + StackDescription(runstack, runstackpos)); } internal static String StackDescription(int[] A, int Index) { StringBuilder Sb = new StringBuilder(); Sb.Append(A.Length - Index); Sb.Append('/'); Sb.Append(A.Length); if (Sb.Length < 8) Sb.Append(' ',8 - Sb.Length); Sb.Append("("); for (int i = Index; i < A.Length; i++) { if (i > Index) Sb.Append(' '); Sb.Append(A[i]); } Sb.Append(')'); return Sb.ToString(); } internal virtual String TextposDescription() { StringBuilder Sb = new StringBuilder(); int remaining; Sb.Append(runtextpos); if (Sb.Length < 8) Sb.Append(' ',8 - Sb.Length); if (runtextpos > runtextbeg) Sb.Append(RegexCharClass.CharDescription(runtext[runtextpos - 1])); else Sb.Append('^'); Sb.Append('>'); remaining = runtextend - runtextpos; for (int i = runtextpos; i < runtextend; i++) { Sb.Append(RegexCharClass.CharDescription(runtext[i])); } if (Sb.Length >= 64) { Sb.Length = 61; Sb.Append("..."); } else { Sb.Append('$'); } return Sb.ToString(); } #endif } } // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- SmiContextFactory.cs
- URI.cs
- IPAddressCollection.cs
- RectAnimationBase.cs
- CapiHashAlgorithm.cs
- Vars.cs
- DispatcherExceptionEventArgs.cs
- ListViewItemSelectionChangedEvent.cs
- SQLInt32.cs
- SchemaTableOptionalColumn.cs
- EntityStoreSchemaFilterEntry.cs
- FacetValueContainer.cs
- MachineKeySection.cs
- PinnedBufferMemoryStream.cs
- Win32.cs
- ListenerElementsCollection.cs
- EndpointConfigContainer.cs
- _UriTypeConverter.cs
- designeractionlistschangedeventargs.cs
- WindowCollection.cs
- DbMetaDataColumnNames.cs
- SystemBrushes.cs
- FramingEncoders.cs
- Tile.cs
- EntityCommandExecutionException.cs
- RadioButtonAutomationPeer.cs
- ClientData.cs
- TransformGroup.cs
- MessageBox.cs
- ToolZone.cs
- URLString.cs
- SqlCacheDependencySection.cs
- OuterGlowBitmapEffect.cs
- StaticContext.cs
- FunctionOverloadResolver.cs
- APCustomTypeDescriptor.cs
- Timer.cs
- LayoutDump.cs
- ColumnHeader.cs
- EmulateRecognizeCompletedEventArgs.cs
- DataGridViewTopRowAccessibleObject.cs
- ToolStripDropTargetManager.cs
- SqlResolver.cs
- ToolBarTray.cs
- PriorityQueue.cs
- SqlVersion.cs
- TypeBinaryExpression.cs
- SmiEventStream.cs
- SafeNativeMethods.cs
- TableSectionStyle.cs
- AddInProcess.cs
- ElementProxy.cs
- SchemaTableOptionalColumn.cs
- StickyNote.cs
- SystemWebExtensionsSectionGroup.cs
- XpsSerializationException.cs
- EpmContentDeSerializerBase.cs
- DBSchemaTable.cs
- InputLanguageEventArgs.cs
- ConnectionManagementElementCollection.cs
- IndentedWriter.cs
- TiffBitmapEncoder.cs
- messageonlyhwndwrapper.cs
- GroupQuery.cs
- PropertySegmentSerializationProvider.cs
- TraceContextEventArgs.cs
- StatusBar.cs
- IndexerNameAttribute.cs
- BypassElementCollection.cs
- SQLInt32Storage.cs
- NullableDecimalSumAggregationOperator.cs
- UiaCoreTypesApi.cs
- ProcessInputEventArgs.cs
- SizeKeyFrameCollection.cs
- RecordsAffectedEventArgs.cs
- RequestCacheManager.cs
- MaskInputRejectedEventArgs.cs
- UpdateManifestForBrowserApplication.cs
- PropertyDescriptorCollection.cs
- CompleteWizardStep.cs
- EditableTreeList.cs
- OleAutBinder.cs
- OracleConnection.cs
- EncodingTable.cs
- BackgroundWorker.cs
- HealthMonitoringSectionHelper.cs
- PreservationFileReader.cs
- PngBitmapDecoder.cs
- BrowserCapabilitiesFactory.cs
- Constraint.cs
- SuppressMergeCheckAttribute.cs
- MissingManifestResourceException.cs
- PeerContact.cs
- OdbcStatementHandle.cs
- XmlSchemaFacet.cs
- SliderAutomationPeer.cs
- CodeTypeReference.cs
- CellConstantDomain.cs
- XmlSerializationReader.cs
- ObjectTypeMapping.cs