Code:
/ FX-1434 / FX-1434 / 1.0 / untmp / whidbey / REDBITS / ndp / fx / src / Services / IO / System / IO / PatternMatcher.cs / 1 / PatternMatcher.cs
//------------------------------------------------------------------------------ //// Copyright (c) Microsoft Corporation. All rights reserved. // //----------------------------------------------------------------------------- /* */ namespace System.IO { using System.Text; using System.Diagnostics; using System; using Microsoft.Win32; using System.Globalization; internal static class PatternMatcher { ////// Private constants (directly from C header files) /// private const int MATCHES_ARRAY_SIZE = 16; private const char ANSI_DOS_STAR = '>'; private const char ANSI_DOS_QM = '<'; private const char DOS_DOT = '"'; ////// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) /// semantics. This code is a port of unmanaged code. Original code comment follows: /// /// Routine Description: /// /// This routine compares a Dbcs name and an expression and tells the caller /// if the name is in the language defined by the expression. The input name /// cannot contain wildcards, while the expression may contain wildcards. /// /// Expression wild cards are evaluated as shown in the nondeterministic /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. /// /// /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT /// /// /// S /// <-----< /// X | | e Y /// X * Y == (0)----->-(1)->-----(2)-----(3) /// /// /// S-. /// <-----< /// X | | e Y /// X ~* Y == (0)----->-(1)->-----(2)-----(3) /// /// /// /// X S S Y /// X ?? Y == (0)---(1)---(2)---(3)---(4) /// /// /// /// X . . Y /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// X S-. S-. Y /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) /// | |________| /// | ^ | /// |_______________| /// ^EOF or .^ /// /// /// /// where S is any single character /// /// S-. is any single character except the final . /// /// e is a null character transition /// /// EOF is the end of the name string /// /// In words: /// /// * matches 0 or more characters. /// /// ? matches exactly 1 character. /// /// DOS_STAR matches 0 or more characters until encountering and matching /// the final . in the name. /// /// DOS_QM matches any single character, or upon encountering a period or /// end of name string, advances the expression to the end of the /// set of contiguous DOS_QMs. /// /// DOS_DOT matches either a . or zero characters beyond name string. /// /// Arguments: /// /// Expression - Supplies the input expression to check against /// /// Name - Supplies the input name to check for. /// /// Return Value: /// /// BOOLEAN - TRUE if Name is an element in the set of strings denoted /// by the input Expression and FALSE otherwise. /// /// public static bool StrictMatchPattern(string expression, string name) { int nameOffset; int exprOffset; int length; int srcCount; int destCount; int previousDestCount; int matchesCount; char nameChar = '\0'; char exprChar = '\0'; int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; int maxState; int currentState; bool nameFinished = false; // // The idea behind the algorithm is pretty simple. We keep track of // all possible locations in the regular expression that are matching // the name. If when the name has been exhausted one of the locations // in the expression is also just exhausted, the name is in the language // defined by the regular expression. // if (name == null || name.Length == 0 || expression == null || expression.Length == 0) { return false; } // // Special case by far the most common wild card search of * or *.* // if (expression.Equals("*") || expression.Equals("*.*")) { return true; } // If this class is ever exposed for generic use, // we need to make sure that name doesn't contain wildcards. Currently // the only component that calls this method is FileSystemWatcher and // it will never pass a name that contains a wildcard. // // Also special case expressions of the form *X. With this and the prior // case we have covered virtually all normal queries. // if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) { int rightLength = expression.Length - 1; // if name is shorter that the stuff to the right of * in expression, we don't // need to do the string compare, otherwise we compare rightlength characters // and the end of both strings. if (name.Length >= rightLength && String.Compare(expression, 1, name, name.Length - rightLength, rightLength, StringComparison.OrdinalIgnoreCase) == 0) { return true; } } // // Walk through the name string, picking off characters. We go one // character beyond the end because some wild cards are able to match // zero characters beyond the end of the string. // // With each new name character we determine a new set of states that // match the name so far. We use two arrays that we swap back and forth // for this purpose. One array lists the possible expression states for // all name characters up to but not including the current one, and other // array is used to build up the list of states considering the current // name character as well. The arrays are then switched and the process // repeated. // // There is not a one-to-one correspondence between state number and // offset into the expression. This is evident from the NFAs in the // initial comment to this function. State numbering is not continuous. // This allows a simple conversion between state number and expression // offset. Each character in the expression can represent one or two // states. * and DOS_STAR generate two states: ExprOffset*2 and // ExprOffset*2 + 1. All other expreesion characters can produce only // a single state. Thus ExprOffset = State/2. // // // Here is a short description of the variables involved: // // NameOffset - The offset of the current name char being processed. // // ExprOffset - The offset of the current expression char being processed. // // SrcCount - Prior match being investigated with current name char // // DestCount - Next location to put a matching assuming current name char // // NameFinished - Allows one more itteration through the Matches array // after the name is exhusted (to come *s for example) // // PreviousDestCount - This is used to prevent entry duplication, see coment // // PreviousMatches - Holds the previous set of matches (the Src array) // // CurrentMatches - Holds the current set of matches (the Dest array) // // AuxBuffer, LocalBuffer - the storage for the Matches arrays // // // Set up the initial variables // previousMatches[0] = 0; matchesCount = 1; nameOffset = 0; maxState = expression.Length * 2; while (! nameFinished) { if (nameOffset < name.Length) { nameChar = name[nameOffset]; length = 1; nameOffset++; } else { nameFinished = true; // // if we have already exhasted the expression, C#. Don't // continue. // if (previousMatches[matchesCount - 1] == maxState) { break; } } // // Now, for each of the previous stored expression matches, see what // we can do with this name character. // srcCount = 0; destCount = 0; previousDestCount = 0; while (srcCount < matchesCount) { // // We have to carry on our expression analysis as far as possible // for each character of name, so we loop here until the // expression stops matching. A clue here is that expression // cases that can match zero or more characters end with a // continue, while those that can accept only a single character // end with a break. // exprOffset = ((previousMatches[srcCount++] + 1) / 2); length = 0; while (true) { if ( exprOffset == expression.Length ) { break; } // // The first time through the loop we don't want // to increment ExprOffset. // exprOffset += length; currentState = exprOffset * 2; if (exprOffset == expression.Length) { currentMatches[destCount++] = maxState; break; } exprChar = expression[exprOffset]; length = 1; // // Before we get started, we have to check for something // really gross. We may be about to exhaust the local // space for ExpressionMatches[][], so we have to allocate // some pool if this is the case. Yuk! // if (destCount >= MATCHES_ARRAY_SIZE - 2) { int newSize = currentMatches.Length * 2; int [] tmp = new int[newSize]; Array.Copy(currentMatches, tmp, currentMatches.Length); currentMatches = tmp; tmp = new int[newSize]; Array.Copy(previousMatches, tmp, previousMatches.Length); previousMatches = tmp; } // // * matches any character zero or more times. // if (exprChar == '*') { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } // // DOS_STAR matches any character except . zero or more times. // if (exprChar == ANSI_DOS_STAR) { bool iCanEatADot = false; // // If we are at a period, determine if we are allowed to // consume it, ie. make sure it is not the last one. // if (!nameFinished && (nameChar == '.') ) { char tmpChar; int offset; int nameLength = name.Length; for (offset = nameOffset; offset < nameLength; offset ++ ) { tmpChar = name[offset]; length = 1; if (tmpChar == '.') { iCanEatADot = true; break; } } } if (nameFinished || (nameChar != '.') || iCanEatADot) { currentMatches[destCount++] = currentState; currentMatches[destCount++] = (currentState + 1); continue; } else { // // We are at a period. We can only match zero // characters (ie. the epsilon transition). // currentMatches[destCount++] = (currentState + 1); continue; } } // // The following expreesion characters all match by consuming // a character, thus force the expression, and thus state // forward. // currentState += length * 2; // // DOS_QM is the most complicated. If the name is finished, // we can match zero characters. If this name is a '.', we // don't match, but look at the next expression. Otherwise // we match a single character. // if (exprChar == ANSI_DOS_QM) { if (nameFinished || (nameChar == '.') ) { continue; } currentMatches[destCount++] = currentState; break; } // // A DOS_DOT can match either a period, or zero characters // beyond the end of name. // if (exprChar == DOS_DOT) { if (nameFinished) { continue; } if (nameChar == '.') { currentMatches[destCount++] = currentState; break; } } // // From this point on a name character is required to even // continue, let alone make a match. // if (nameFinished) { break; } // // If this expression was a '?' we can match it once. // if (exprChar == '?') { currentMatches[destCount++] = currentState; break; } // // Finally, check if the expression char matches the name char // if (exprChar == nameChar) { currentMatches[destCount++] = currentState; break; } // // The expression didn't match so go look at the next // previous match. // break; } // // Prevent duplication in the destination array. // // Each of the arrays is montonically increasing and non- // duplicating, thus we skip over any source element in the src // array if we just added the same element to the destination // array. This guarentees non-duplication in the dest. array. // if ((srcCount < matchesCount) && (previousDestCount < destCount) ) { while (previousDestCount < destCount) { int previousLength = previousMatches.Length; while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) { srcCount += 1; } previousDestCount += 1; } } } // // If we found no matches in the just finished itteration, it's time // to bail. // if ( destCount == 0 ) { return false; } // // Swap the meaning the two arrays // { int[] tmp; tmp = previousMatches; previousMatches = currentMatches; currentMatches = tmp; } matchesCount = destCount; } currentState = previousMatches[matchesCount - 1]; return currentState == maxState; } } }
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- HTMLTextWriter.cs
- DataSourceControl.cs
- SafeProcessHandle.cs
- UpDownBase.cs
- MediaContextNotificationWindow.cs
- ToolboxItemWrapper.cs
- SoapParser.cs
- CriticalExceptions.cs
- _ListenerAsyncResult.cs
- ConfigurationElementCollection.cs
- HtmlContainerControl.cs
- StackOverflowException.cs
- DataServiceBuildProvider.cs
- UrlMapping.cs
- RedirectionProxy.cs
- TypeHelpers.cs
- HttpSysSettings.cs
- VarRemapper.cs
- WebPartHeaderCloseVerb.cs
- CommonXSendMessage.cs
- Convert.cs
- OdbcConnectionFactory.cs
- RunInstallerAttribute.cs
- UIElementHelper.cs
- IdentityModelDictionary.cs
- TreeNodeStyle.cs
- XhtmlTextWriter.cs
- HttpFileCollection.cs
- QuaternionRotation3D.cs
- ApplicationFileParser.cs
- ContextQuery.cs
- StyleXamlParser.cs
- PenThreadPool.cs
- SmiConnection.cs
- ResourcePool.cs
- QilTargetType.cs
- BinaryFormatterWriter.cs
- ResXDataNode.cs
- ActivityDesignerHelper.cs
- CharAnimationUsingKeyFrames.cs
- RuntimeEnvironment.cs
- InputLanguageSource.cs
- XNodeNavigator.cs
- HttpStreamMessageEncoderFactory.cs
- RSAPKCS1SignatureDeformatter.cs
- CompiledQuery.cs
- AuthenticationManager.cs
- ConfigXmlElement.cs
- GenericIdentity.cs
- HtmlTable.cs
- FormClosedEvent.cs
- followingsibling.cs
- XmlDocument.cs
- XmlReflectionMember.cs
- DragDeltaEventArgs.cs
- Win32SafeHandles.cs
- AlignmentXValidation.cs
- KeyValuePair.cs
- SafeWaitHandle.cs
- QuaternionRotation3D.cs
- TypeLibConverter.cs
- WebGetAttribute.cs
- PartialToken.cs
- SamlAssertion.cs
- CodeTypeDeclaration.cs
- Timeline.cs
- IDictionary.cs
- PerfService.cs
- Positioning.cs
- _NTAuthentication.cs
- MLangCodePageEncoding.cs
- HttpContextWrapper.cs
- Config.cs
- PasswordBox.cs
- SessionStateItemCollection.cs
- basecomparevalidator.cs
- UpdateDelegates.Generated.cs
- LicenseManager.cs
- SqlException.cs
- WsiProfilesElement.cs
- sqlnorm.cs
- Peer.cs
- XpsSerializationException.cs
- SortDescription.cs
- WebBrowserNavigatedEventHandler.cs
- WebCategoryAttribute.cs
- ClientFormsIdentity.cs
- DiscoveryInnerClientManaged11.cs
- TransformedBitmap.cs
- ObjectSet.cs
- ContractsBCL.cs
- GlobalId.cs
- AnnouncementService.cs
- COM2Properties.cs
- OracleRowUpdatingEventArgs.cs
- Grid.cs
- FullTextLine.cs
- _NTAuthentication.cs
- PrimarySelectionGlyph.cs
- DataServiceHostFactory.cs