Code:
/ Dotnetfx_Win7_3.5.1 / Dotnetfx_Win7_3.5.1 / 3.5.1 / DEVDIV / depot / DevDiv / releases / Orcas / NetFXw7 / wpf / src / Framework / MS / Internal / IO / Packaging / XamlFilter.cs / 1 / XamlFilter.cs
//---------------------------------------------------------------------------- // //// Copyright (C) Microsoft Corporation. All rights reserved. // // // Description: // Implements an indexing filter for XAML streams. // Invoked by the PackageFilter. // // History: // 02/02/2004: BruceMac: Stubs // 02/16/2004: JohnLarc: Initial implementation // 08/26/2004: JohnLarc: Removed access to indexing filters from managed code. // 07/18/2005: ArindamB: Moved from XamlFilterImpl to XamlFilter, which // implements IManagedFilter instead of ManagedFilterBase. //--------------------------------------------------------------------------- #if DEBUG // #define TRACE #endif using System; using System.IO; using System.Xml; using MS.Win32; // For SafeNativeMethods using System.Globalization; // For CultureInfo using System.Diagnostics; // For Assert using System.Collections; // For Stack and Hashtable using System.Collections.Generic; // For List<> using System.Runtime.InteropServices; // For COMException using System.Runtime.InteropServices.ComTypes; // For IStream, etc. using System.Windows; // for ExceptionStringTable using MS.Internal.PresentationFramework; // for SecurityHelper using MS.Internal.Interop; // for CHUNK_BREAKTYPE (and other IFilter-related definitions) using MS.Internal; // for Invariant namespace MS.Internal.IO.Packaging { #region XamlFilter ////// The class that supports content extraction from XAML files for indexing purposes. /// Note: It would be nice to have fixed page content extractor look for flow elements in a fixed page. /// This however, is not really doable: FixedPageContentExtractor is XSLT-based, not reader-based. /// It cannot do anything more efficiently than what XamlFilter is currently doing. /// The "flow pass" on a DOM reader for a fixed page does not entail any redundant IO or DOM building. /// internal partial class XamlFilter : IManagedFilter { #region Nested Types ////// The following enumeration makes it easier to keep track of the filter's multi-modal behavior. /// /// Each state implements a distinct method for collecting the next content unit, as follows: /// /// Uninitialized Return appropriate errors from GetChunk and GetText. /// FindNextUnit Standard mode. Return content as it is discovered in markup. /// UseContentExtractor Retrieve content from a FixedPageContentExtractor object (expected to /// perform adjacency analysis). /// FindNextFlowUnit Look for content in markup ignoring fixed-format markup (second pass over a /// fixed page). /// EndOfStream Return appropriate errors from GetChunk and GetText. /// /// /// Transitions between these states are handled as follows: /// /// state | transition | action | next state /// -------- | ------------ | -------- | ------------ /// Uninitialized | constructor | create an XML reader | FindNextUnit /// | | | /// FindNextUnit | end of reader | clean up | EndOfStream /// | | | /// FindNextUnit | FixedPage tag | create FixedPageContentExtractor, | UseContentExtractor /// | | save a DOM of the FixedPage | /// | | | /// UseContentExtractor | end of extractor | create sub-reader from FixedPage DOM,| FindNextFlowUnit /// | | save top-level reader | /// | | | /// FindNextFlowUnit | end of reader | restore top-level reader | FindNextUnit /// | | | /// /// internal enum FilterState { Uninitialized =1, FindNextUnit, FindNextFlowUnit, UseContentExtractor, EndOfStream }; ////// A single reader position on an element start may correspond to 3 distinct states depending on /// whether the title and/or content property in the start tag has already been processed. /// [Flags] internal enum AttributesToIgnore { None =0, Title =1, Content =2 }; #endregion Nested Types #region Internal Constructors ////// The class constructor initializes trace and event logging. /// static XamlFilter() { #if TRACE EventLog xamlFilterEventLog = new EventLog(); xamlFilterEventLog.Log = "Application"; xamlFilterEventLog.Source = "XAML filter"; Trace.Listeners.Add(new EventLogTraceListener(xamlFilterEventLog)); #endif } ////// Constructor. Does initialization. /// /// xaml stream to filter internal XamlFilter(Stream stream) { #if TRACE System.Diagnostics.Trace.TraceInformation("New Xaml filter created."); #endif _lcidDictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); _contextStack = new Stack(32); InitializeDeclaredFields(); _xamlStream = stream; // Create a XAML reader (field _xamlReader) on the stream. CreateXmlReader(); // Reflect load in filter's state. _filterState = FilterState.FindNextUnit; } /// /// This function is called from the constructor. It makes the object re-initializable, /// which would come in handy if the XamlFilter is ever made visible to unmanaged code /// and Load is allowed to be called multiple times. /// private void InitializeDeclaredFields() { // Initialize context variables. ClearStack(); _filterState = FilterState.Uninitialized; // Initialize current ID. _currentChunkID = 0; // Initialize the content model dictionary. // Note: Hashtable is not IDisposable. LoadContentDescriptorDictionary(); // Misc. initializations. _countOfCharactersReturned = 0; _currentContent = null; _indexingContentUnit = null; _expectingBlockStart = true; // If text data occurred at top level, it would be a block start. _topLevelReader = null; _fixedPageContentExtractor = null; _fixedPageDomTree = null; } #endregion Internal Constructors #region Managed IFilter API ////// Managed counterpart of IFilter.Init. /// /// Usage flags. Only IFILTER_INIT_CANON_PARAGRAPHS can be meaningfully /// honored by the XAML filter. /// array of Managed FULLPROPSPEC structs to restrict responses ///IFILTER_FLAGS_NONE, meaning the caller should not try to retrieve OLE property using /// IPropertyStorage on the Xaml part. ///Input parameters are ignored because this filter never returns any property value. public IFILTER_FLAGS Init( IFILTER_INIT grfFlags, // IFILTER_INIT value ManagedFullPropSpec[] aAttributes) // restrict responses to the specified attributes { // // Content is filtered either if no attributes are specified, // or if there are attributes specified, the attribute with PSGUID_STORAGE // property set and PID_STG_CONTENTS property id is present. // _filterContents = true; if (aAttributes != null && aAttributes.Length > 0) { _filterContents = false; for (int i = 0; i < aAttributes.Length; i++) { if (aAttributes[i].Guid == IndexingFilterMarshaler.PSGUID_STORAGE && aAttributes[i].Property.PropType == PropSpecType.Id && aAttributes[i].Property.PropId == (uint)MS.Internal.Interop.PID_STG.CONTENTS) { _filterContents = true; break; } } } // The only flag in grfFlags that makes sense to honor is IFILTER_INIT_CANON_PARAGRAPHS _returnCanonicalParagraphBreaks = ((grfFlags & IFILTER_INIT.IFILTER_INIT_CANON_PARAGRAPHS) != 0); // Return zero value to indicate that the client code should not take any special steps // to retrieve OLE properties. This might have to change if filtering loose Xaml is supported. return IFILTER_FLAGS.IFILTER_FLAGS_NONE; } ////// Managed counterpart of IFilter.GetChunk. /// ////// Chunk descriptor. /// ////// On end of stream, this function will return null. /// public ManagedChunk GetChunk() { if (!_filterContents) { // Contents not being filtered, no chunks to return in that case. _currentContent = null; // End of chunks. return null; } IndexingContentUnit contentUnit; // If client code forgot to load the stream, throw appropriate exception. if (_xamlReader == null) { throw new COMException(SR.Get(SRID.FilterGetChunkNoStream), (int)FilterErrorCode.FILTER_E_ACCESS); } // If at end of chunks, report the condition. if (_filterState == FilterState.EndOfStream) { //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // End of chunks. return null; } try { contentUnit = NextContentUnit(); } catch (XmlException xmlException) { //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // Return FILTER_E_UNKNOWNFORMAT for ill-formed documents. throw new COMException(xmlException.Message, (int)FilterErrorCode.FILTER_E_UNKNOWNFORMAT); } if (contentUnit == null) { // Update text information. _currentContent = null; //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // Report end of stream by indicating end of chunks. return null; } // Store the text for returning in GetText. _currentContent = contentUnit.Text; // Record the fact that GetText hasn't been called on this chunk. _countOfCharactersReturned = 0; return contentUnit; } ////// Return a maximum of bufferCharacterCount characters (*not* bytes) from the current content unit. /// public String GetText(int bufferCharacterCount) { //BufferCharacterCount should be non-negative Debug.Assert(bufferCharacterCount >= 0); if (_currentContent == null) { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_TEXT); } int numCharactersToReturn = _currentContent.Length - _countOfCharactersReturned; if (numCharactersToReturn <= 0) { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_MORE_TEXT); } // Return at most bufferCharacterCount characters. The marshaler makes sure it can add a terminating // NULL beyond the end of the string that is returned. if (numCharactersToReturn > bufferCharacterCount) { numCharactersToReturn = bufferCharacterCount; } String result = _currentContent.Substring(_countOfCharactersReturned, numCharactersToReturn); _countOfCharactersReturned += numCharactersToReturn; return result; } ////// The XAML indexing filter never returns property values. /// public Object GetValue() { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_VALUES); return null; } #endregion Managed IFilter API #region Internal Methods #if DEBUG internal string DumpElementTable() { ICollection keys = _xamlElementContentDescriptorDictionary.Keys; ICollection values = _xamlElementContentDescriptorDictionary.Values; int length = keys.Count; ElementTableKey[] keyList = new ElementTableKey[length]; ContentDescriptor[] valueList = new ContentDescriptor[length]; keys.CopyTo(keyList, 0); values.CopyTo(valueList,0); string result = ""; for (int i = 0; i < length; ++i) { result += string.Format("{0}: [{1} -> {2}]\n", i, keyList[i], valueList[i]); } return result; } #endif ///Return the next text chunk, or null at end of stream. internal IndexingContentUnit NextContentUnit() { // Loop until we are able to return some content or encounter an end of file. IndexingContentUnit nextContentUnit = null; while (nextContentUnit == null) { // If we have a content extractor delivering content units for us, use it. if (_filterState == FilterState.UseContentExtractor) { Debug.Assert(_fixedPageContentExtractor != null); // If we've consumed all the glyph run info, switch to a mode in which only the flow content // of the fixed page just scanned will be returned. if (_fixedPageContentExtractor.AtEndOfPage) { // Discard extractor. _fixedPageContentExtractor = null; // Set up reader. _topLevelReader = _xamlReader; _xamlReader = new XmlNodeReader(_fixedPageDomTree.DocumentElement); // Transition to flow-only mode. _filterState = FilterState.FindNextFlowUnit; } else { bool chunkIsInline; uint lcid; string chunk = _fixedPageContentExtractor.NextGlyphContent(out chunkIsInline, out lcid); _expectingBlockStart = !chunkIsInline; return BuildIndexingContentUnit(chunk, lcid); } } if (_xamlReader.EOF) { switch (_filterState) { // If in standard mode, return a null chunk to signal the end of all chunks. case FilterState.FindNextUnit: // A non-empty stack at this point could only be attributable to an internal error, // for an early EOF would have been reported as an XML exception by the XML reader. Debug.Assert(_contextStack.Count == 0); _filterState = FilterState.EndOfStream; return null; // If processing a fixed page, revert to top-level XML reader. case FilterState.FindNextFlowUnit: Debug.Assert(_topLevelReader != null); _xamlReader.Close(); _xamlReader = _topLevelReader; _filterState = FilterState.FindNextUnit; break; default: Debug.Assert(false); break; } } switch (_xamlReader.NodeType) { // If current token is a text element, // if it can be part of its parent's content, return a chunk; // else, skip. case XmlNodeType.Text: case XmlNodeType.SignificantWhitespace: case XmlNodeType.CDATA: nextContentUnit = HandleTextData(); continue; // If current token is an element start, then, // if appropriate, extract chunk text from an attribute // else, record content information and recurse. case XmlNodeType.Element: nextContentUnit = HandleElementStart(); continue; // On end of element, restore context data (pop, etc.) and look further. case XmlNodeType.EndElement: nextContentUnit = HandleElementEnd(); continue; // Default action is to ignore current token and look further. // Note that non-significant whitespace is handled here. default: _xamlReader.Read(); // Consume current token. continue; } } return nextContentUnit; } ////// Load a hash table to map qualified element names to content descriptors. /// private void LoadContentDescriptorDictionary() { // Invoke init function that is generated at build time. InitElementDictionary(); } #endregion Internal Methods #region Private Methods ///Ancillary function of NextContentUnit(). Create new chunk, taking _contextStack into /// account, and updating it if needed. private IndexingContentUnit BuildIndexingContentUnit(string text, uint lcid) { CHUNK_BREAKTYPE breakType = CHUNK_BREAKTYPE.CHUNK_NO_BREAK; // If a paragraph break is expected, reflect this in the new chunk. if (_expectingBlockStart) { breakType = CHUNK_BREAKTYPE.CHUNK_EOP; if (_returnCanonicalParagraphBreaks) text = _paragraphSeparator + text; } if (_indexingContentUnit == null) { _indexingContentUnit = new IndexingContentUnit(text, AllocateChunkID(), breakType, _propSpec, lcid); } else { // Optimization: reuse indexing content unit. _indexingContentUnit.InitIndexingContentUnit(text, AllocateChunkID(), breakType, _propSpec, lcid); } // Until proven separated (by the occurrence of a block tag), right neighbors are contiguous. _expectingBlockStart = false; return _indexingContentUnit; } ///Obtain a content descriptor for a custom element not found in the dictionary. ////// There is currently no general way of extracting information about custom elements, /// so the default descriptor is systematically returned. /// private ContentDescriptor GetContentInformationAboutCustomElement(ElementTableKey customElement) { return _defaultContentDescriptor; } ////// If current token is a text element, /// assume it can be part of its parent's content and return a chunk. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleTextData() { ContentDescriptor topOfStack = TopOfStack(); if (topOfStack != null) { // The descendants of elements with HasIndexableContent set to false get skipped. Debug.Assert(topOfStack.HasIndexableContent); // Return a chunk with appropriate block-break information. IndexingContentUnit result = BuildIndexingContentUnit(_xamlReader.Value, GetCurrentLcid()); _xamlReader.Read(); // Move past data just processed. return result; } else { // Bad Xaml (no top-level element). The Xaml filter should at some point raise an exception. // Just to be safe, ignore all content when in this state. _xamlReader.Read(); // Skip data. return null; } } ////// If current token is an element start, then, /// if appropriate, extract chunk text from an attribute /// else, record content information and recurse. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleElementStart() { ElementTableKey elementFullName = new ElementTableKey(_xamlReader.NamespaceURI, _xamlReader.LocalName); string propertyName; // Handle the case of a complex property (e.g. Button.Content). if (IsPrefixedPropertyName(elementFullName.BaseName, out propertyName)) { ContentDescriptor topOfStack = TopOfStack(); // Handle the semantically incorrect case of a compound property occurring at the root // by ignoring it totally. if (topOfStack == null) { SkipCurrentElement(); return null; } // Index the text children of property elements only if they are content or title properties. bool elementIsIndexable = ( elementFullName.XmlNamespace.Equals(ElementTableKey.XamlNamespace, StringComparison.Ordinal) && ( propertyName == topOfStack.ContentProp || propertyName == topOfStack.TitleProp )); if (!elementIsIndexable) { // Skip element together with all its descendants. SkipCurrentElement(); return null; } // Push descriptor, advance reader, and have caller look further. Push( new ContentDescriptor( elementIsIndexable, TopOfStack().IsInline, String.Empty, // has potential text content, but no content property null)); // no title property _xamlReader.Read(); return null; } // Handle fixed-format markup in a special way (because assumptions for building // content descriptors don't work for these and they require actions beyond what // is stated in content descriptors). // Note: The elementFullyHandled boolean is required as the nextUnit returned can // be null in both cases - when element is fully handled and when its not. bool elementFullyHandled; IndexingContentUnit nextUnit = HandleFixedFormatTag(elementFullName, out elementFullyHandled); if (elementFullyHandled) return nextUnit; else { // When HandleFixedFormatTag declines to handle a tag because it is not fixed-format, it // will return null. Invariant.Assert(nextUnit == null); } // Obtain a content descriptor for the current element. ContentDescriptor elementDescriptor = (ContentDescriptor) _xamlElementContentDescriptorDictionary[elementFullName]; if (elementDescriptor == null) { if (elementFullName.XmlNamespace.Equals(ElementTableKey.XamlNamespace, StringComparison.Ordinal)) { elementDescriptor = _defaultContentDescriptor; } else if (elementFullName.XmlNamespace.Equals(_inDocumentCodeURI, StringComparison.Ordinal)) { elementDescriptor = _nonIndexableElementDescriptor; } else { elementDescriptor = GetContentInformationAboutCustomElement(elementFullName); } _xamlElementContentDescriptorDictionary.Add(elementFullName, elementDescriptor); } // If the element has no indexable content, skip all its descendants. if (!elementDescriptor.HasIndexableContent) { SkipCurrentElement(); return null; } // If appropriate, retrieve title from an attribute. string title = null; if ( elementDescriptor.TitleProp != null && (_attributesToIgnore & AttributesToIgnore.Title) == 0 ) { title = GetPropertyAsAttribute(elementDescriptor.TitleProp); if (title != null && title.Length > 0) { // Leave the reader in its present state, but return the title as a block chunk, // and mark this attribute as processed. _attributesToIgnore |= AttributesToIgnore.Title; _expectingBlockStart = true; IndexingContentUnit titleContent = BuildIndexingContentUnit(title, GetCurrentLcid()); _expectingBlockStart = true; // Simulate a stack pop for a block element. return titleContent; } } // If appropriate, retrieve content from an attribute. string content = null; if ( elementDescriptor.ContentProp != null && (_attributesToIgnore & AttributesToIgnore.Content) == 0 ) { content = GetPropertyAsAttribute(elementDescriptor.ContentProp); if (content != null && content.Length > 0) { // Leave the reader in its present state, but mark the content attribute // as processed. _attributesToIgnore |= AttributesToIgnore.Content; // Create a new chunk with appropriate break data. if (!elementDescriptor.IsInline) { _expectingBlockStart = true; } IndexingContentUnit result = BuildIndexingContentUnit(content, GetCurrentLcid()); // Emulate a stack pop for the content attribute (which never gets pushed on the stack). _expectingBlockStart = !elementDescriptor.IsInline; return result; } } // Reset the attribute flag, since we are going to change the reader's state. _attributesToIgnore = AttributesToIgnore.None; // Handle the special case of an empty element: no descendants, but a possible paragraph break. if (_xamlReader.IsEmptyElement) { if (!elementDescriptor.IsInline) _expectingBlockStart = true; // Have caller search for content past the tag. _xamlReader.Read(); return null; } // Have caller look for content in descendants. Push(elementDescriptor); _xamlReader.Read(); // skip start-tag return null; } ////// On end of element, restore context data (pop, etc.) and look further. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleElementEnd() { // Pop current descriptor. ContentDescriptor item = Pop(); // Consume end-tag. _xamlReader.Read(); return null; } ////// If the current tag is one of Glyphs, FixedPage or PageContent, process it adequately /// and return the next content unit or null (if not supposed to return content from fixed format). /// Otherwise, set 'handled' to false to tell the caller we didn't do anything useful. /// private IndexingContentUnit HandleFixedFormatTag(ElementTableKey elementFullName, out bool handled) { handled = true; // Not true until we return, but this is the most convenient default. if (!elementFullName.XmlNamespace.Equals(ElementTableKey.FixedMarkupNamespace, StringComparison.Ordinal)) { handled = false; // Let caller handle that tag. return null; } if (String.CompareOrdinal(elementFullName.BaseName, _glyphRunName) == 0) { // Ignore glyph runs during flow pass over a FixedPage. if (_filterState == FilterState.FindNextFlowUnit) { SkipCurrentElement(); return null; } else { return ProcessGlyphRun(); } } if (String.CompareOrdinal(elementFullName.BaseName, _fixedPageName) == 0) { // Ignore FixedPage element (i.e. root element) during flow pass over a fixed page. if (_filterState == FilterState.FindNextFlowUnit) { Push(_defaultContentDescriptor); _xamlReader.Read(); return null; } else { return ProcessFixedPage(); } } if (String.CompareOrdinal(elementFullName.BaseName, _pageContentName) == 0) { // If the element has a Source attribute, any inlined content should be ignored. string sourceUri = _xamlReader.GetAttribute(_pageContentSourceAttribute); if (sourceUri != null) { SkipCurrentElement(); return null; } else { // Have NextContentUnit() look for content in descendants. Push( _defaultContentDescriptor); _xamlReader.Read(); return null; } } // No useful work was done. Report 'unhandled'. handled = false; return null; } ////// Handle the presence of a glyph run in the middle of flow markup by extracting /// its UnicodeString attribute and considering it a separate paragraph. /// ////// The handling of glyph runs inside fixed pages is performed in ProcessFixedPage. /// private IndexingContentUnit ProcessGlyphRun() { Debug.Assert(_xamlReader != null); string textContent = _xamlReader.GetAttribute(_unicodeStringAttribute); if (textContent == null || textContent.Length == 0) { SkipCurrentElement(); return null; } _expectingBlockStart = true; // Read Lcid at current position and advance reader to next element before returning. uint lcid = GetCurrentLcid(); SkipCurrentElement(); return BuildIndexingContentUnit(textContent, lcid); } ////// Load FixedPage element into a DOM tree to initialize a FixedPageContentExtractor. /// The content extractor will then be used to incrementally return the content of the fixed page. /// private IndexingContentUnit ProcessFixedPage() { // Reader is positioned on the start-tag for a FixedPage element. Debug.Assert(String.CompareOrdinal(_xamlReader.LocalName, _fixedPageName) == 0); // A FixedPage nested in a FixedPage is invalid. // XmlException gets handled inside this class (see GetChunk). if (_filterState == FilterState.FindNextFlowUnit) { throw new XmlException(SR.Get(SRID.XamlFilterNestedFixedPage)); } // Create a DOM for the current FixedPage. string fixedPageMarkup = _xamlReader.ReadOuterXml(); XmlDocument fixedPageTree = new XmlDocument(); fixedPageTree.LoadXml(fixedPageMarkup); // Preserve the current language ID if (_xamlReader.XmlLang.Length > 0) { fixedPageTree.DocumentElement.SetAttribute(_xmlLangAttribute, _xamlReader.XmlLang); } // Initialize a content extractor with this DOM tree. _fixedPageContentExtractor = new FixedPageContentExtractor(fixedPageTree.DocumentElement); // Save the DOM (to search for flow elements in it once the extractor is done) // and switch to extractor mode. _fixedPageDomTree = fixedPageTree; _filterState = FilterState.UseContentExtractor; // Have NextContentUnit look for the appropriate unit in the new mode. return null; } ////// Create an XmlTextReader on _xamlStream with the appropriate settings. /// private void CreateXmlReader() { if (_xamlReader != null) { _xamlReader.Close(); } _xamlReader = new XmlTextReader(_xamlStream); // Do not return pretty-pretting spacing between tags as data. ((XmlTextReader)_xamlReader).WhitespaceHandling = WhitespaceHandling.Significant; // Initialize reader state. _attributesToIgnore = AttributesToIgnore.None; // not in the middle of processing a start-tag } private void EnsureXmlReaderIsClosed() { if (_xamlReader != null) { _xamlReader.Close(); } } ////// Return the LCID in scope for the current node or, if there is none, /// the system's default LCID. /// Note: XmlGlyphRunInfo.LanguageID is an internal property that also has /// similar logic and will default to CultureInfo.InvariantCulture.LCID /// CultureInfo.InvariantCulture will never be null /// private uint GetCurrentLcid() { string languageString = GetLanguageString(); if (languageString.Length == 0) return (uint)CultureInfo.InvariantCulture.LCID; else if (_lcidDictionary.ContainsKey(languageString)) return _lcidDictionary[languageString]; else { CultureInfo cultureInfo = new CultureInfo(languageString); _lcidDictionary.Add(languageString, (uint)cultureInfo.LCID); return (uint)cultureInfo.LCID; } } private string GetLanguageString() { string languageString = _xamlReader.XmlLang; if (languageString.Length == 0) { // Check whether there is a parent XAML reader. if (_topLevelReader != null) { languageString = _topLevelReader.XmlLang; } } return languageString; } private void SkipCurrentElement() { _xamlReader.Skip(); } private bool IsPrefixedPropertyName(string name, out string propertyName) { int suffixStart = name.IndexOf('.'); if (suffixStart == -1) { propertyName = null; return false; } propertyName = name.Substring(suffixStart + 1); return true; } ////// 0 is an illegal value, so this function never returns 0. /// After the counter reaches UInt32.MaxValue we assert, since such a /// high number for chunks is most likely an indicator of some other /// problem in the system/code. /// private uint AllocateChunkID() { Invariant.Assert(_currentChunkID <= UInt32.MaxValue); ++_currentChunkID; return _currentChunkID; } ////// Find an attribute named propertyName or X.propertyName. /// private string GetPropertyAsAttribute(string propertyName) { string value = _xamlReader.GetAttribute(propertyName); if (value != null) { return value; } bool attributeFound = _xamlReader.MoveToFirstAttribute(); while (attributeFound) { string attributePropertyName; if ( IsPrefixedPropertyName(_xamlReader.LocalName, out attributePropertyName) && attributePropertyName.Equals(propertyName, StringComparison.Ordinal)) { value = _xamlReader.Value; break; } // Advance reader. attributeFound = _xamlReader.MoveToNextAttribute(); } // Reposition reader on owner element. _xamlReader.MoveToElement(); return value; } #region Context Stack Accessors private ContentDescriptor TopOfStack() { return (ContentDescriptor) _contextStack.Peek(); } private void Push(ContentDescriptor contentDescriptor) { if (!contentDescriptor.IsInline) { _expectingBlockStart = true; } _contextStack.Push(contentDescriptor); } private ContentDescriptor Pop() { ContentDescriptor topOfStack = (ContentDescriptor) _contextStack.Pop(); // If we reach an end of block, we expect the next item to // start with a block separator. if (!topOfStack.IsInline) { _expectingBlockStart = true; } return topOfStack; } private void ClearStack() { _contextStack.Clear(); } #endregion Context Stack Accessors #endregion Private Methods #region Private Constants ///XML namespace URI for in-document code. private const string _inDocumentCodeURI = "http://schemas.microsoft.com/winfx/2006/xaml"; // Tag and attribute names. private const string _pageContentName = "PageContent"; private const string _glyphRunName = "Glyphs"; private const string _pageContentSourceAttribute = "Source"; private const string _fixedPageName = "FixedPage"; private const string _xmlLangAttribute = "xml:lang"; private const string _paragraphSeparator = "\u2029"; private const string _unicodeStringAttribute = "UnicodeString"; ////// The default content descriptor has content in child nodes, no title, and block-type content. /// private readonly ContentDescriptor _defaultContentDescriptor = new ContentDescriptor(true /*hasIndexableContent*/, false /*isInline*/, null, null); private readonly ContentDescriptor _nonIndexableElementDescriptor = new ContentDescriptor(false); // Static fields. private static readonly ManagedFullPropSpec _propSpec = new ManagedFullPropSpec(IndexingFilterMarshaler.PSGUID_STORAGE, (uint)MS.Internal.Interop.PID_STG.CONTENTS); #endregion Private Constants #region Private Fields // Variables initialized in constructor and InitializeDeclaredFields. private Stack _contextStack; private FilterState _filterState; private string _currentContent; private uint _currentChunkID; private int _countOfCharactersReturned; private IndexingContentUnit _indexingContentUnit; private bool _expectingBlockStart; private XmlReader _topLevelReader; private FixedPageContentExtractor _fixedPageContentExtractor; private XmlDocument _fixedPageDomTree; // Variables initialized in constructor and (potentially, if implemented some day) in IPersistFile.Load. private Stream _xamlStream; // Variables initialized in Init. private bool _filterContents; //defaults to false private bool _returnCanonicalParagraphBreaks; //defaults to false // Reader state variables (initialized in CreateXmlReader). private XmlReader _xamlReader; private AttributesToIgnore _attributesToIgnore; ///Map from fully qualified element name to content location information. private Hashtable _xamlElementContentDescriptorDictionary; //Dictionary of Language strings and the corresponding LCID. private Dictionary_lcidDictionary; #endregion Private Fields } // class XamlFilter #endregion XamlFilter } // namespace MS.Internal.IO.Packaging // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved. //---------------------------------------------------------------------------- // // // Copyright (C) Microsoft Corporation. All rights reserved. // // // Description: // Implements an indexing filter for XAML streams. // Invoked by the PackageFilter. // // History: // 02/02/2004: BruceMac: Stubs // 02/16/2004: JohnLarc: Initial implementation // 08/26/2004: JohnLarc: Removed access to indexing filters from managed code. // 07/18/2005: ArindamB: Moved from XamlFilterImpl to XamlFilter, which // implements IManagedFilter instead of ManagedFilterBase. //--------------------------------------------------------------------------- #if DEBUG // #define TRACE #endif using System; using System.IO; using System.Xml; using MS.Win32; // For SafeNativeMethods using System.Globalization; // For CultureInfo using System.Diagnostics; // For Assert using System.Collections; // For Stack and Hashtable using System.Collections.Generic; // For List<> using System.Runtime.InteropServices; // For COMException using System.Runtime.InteropServices.ComTypes; // For IStream, etc. using System.Windows; // for ExceptionStringTable using MS.Internal.PresentationFramework; // for SecurityHelper using MS.Internal.Interop; // for CHUNK_BREAKTYPE (and other IFilter-related definitions) using MS.Internal; // for Invariant namespace MS.Internal.IO.Packaging { #region XamlFilter ////// The class that supports content extraction from XAML files for indexing purposes. /// Note: It would be nice to have fixed page content extractor look for flow elements in a fixed page. /// This however, is not really doable: FixedPageContentExtractor is XSLT-based, not reader-based. /// It cannot do anything more efficiently than what XamlFilter is currently doing. /// The "flow pass" on a DOM reader for a fixed page does not entail any redundant IO or DOM building. /// internal partial class XamlFilter : IManagedFilter { #region Nested Types ////// The following enumeration makes it easier to keep track of the filter's multi-modal behavior. /// /// Each state implements a distinct method for collecting the next content unit, as follows: /// /// Uninitialized Return appropriate errors from GetChunk and GetText. /// FindNextUnit Standard mode. Return content as it is discovered in markup. /// UseContentExtractor Retrieve content from a FixedPageContentExtractor object (expected to /// perform adjacency analysis). /// FindNextFlowUnit Look for content in markup ignoring fixed-format markup (second pass over a /// fixed page). /// EndOfStream Return appropriate errors from GetChunk and GetText. /// /// /// Transitions between these states are handled as follows: /// /// state | transition | action | next state /// -------- | ------------ | -------- | ------------ /// Uninitialized | constructor | create an XML reader | FindNextUnit /// | | | /// FindNextUnit | end of reader | clean up | EndOfStream /// | | | /// FindNextUnit | FixedPage tag | create FixedPageContentExtractor, | UseContentExtractor /// | | save a DOM of the FixedPage | /// | | | /// UseContentExtractor | end of extractor | create sub-reader from FixedPage DOM,| FindNextFlowUnit /// | | save top-level reader | /// | | | /// FindNextFlowUnit | end of reader | restore top-level reader | FindNextUnit /// | | | /// /// internal enum FilterState { Uninitialized =1, FindNextUnit, FindNextFlowUnit, UseContentExtractor, EndOfStream }; ////// A single reader position on an element start may correspond to 3 distinct states depending on /// whether the title and/or content property in the start tag has already been processed. /// [Flags] internal enum AttributesToIgnore { None =0, Title =1, Content =2 }; #endregion Nested Types #region Internal Constructors ////// The class constructor initializes trace and event logging. /// static XamlFilter() { #if TRACE EventLog xamlFilterEventLog = new EventLog(); xamlFilterEventLog.Log = "Application"; xamlFilterEventLog.Source = "XAML filter"; Trace.Listeners.Add(new EventLogTraceListener(xamlFilterEventLog)); #endif } ////// Constructor. Does initialization. /// /// xaml stream to filter internal XamlFilter(Stream stream) { #if TRACE System.Diagnostics.Trace.TraceInformation("New Xaml filter created."); #endif _lcidDictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); _contextStack = new Stack(32); InitializeDeclaredFields(); _xamlStream = stream; // Create a XAML reader (field _xamlReader) on the stream. CreateXmlReader(); // Reflect load in filter's state. _filterState = FilterState.FindNextUnit; } /// /// This function is called from the constructor. It makes the object re-initializable, /// which would come in handy if the XamlFilter is ever made visible to unmanaged code /// and Load is allowed to be called multiple times. /// private void InitializeDeclaredFields() { // Initialize context variables. ClearStack(); _filterState = FilterState.Uninitialized; // Initialize current ID. _currentChunkID = 0; // Initialize the content model dictionary. // Note: Hashtable is not IDisposable. LoadContentDescriptorDictionary(); // Misc. initializations. _countOfCharactersReturned = 0; _currentContent = null; _indexingContentUnit = null; _expectingBlockStart = true; // If text data occurred at top level, it would be a block start. _topLevelReader = null; _fixedPageContentExtractor = null; _fixedPageDomTree = null; } #endregion Internal Constructors #region Managed IFilter API ////// Managed counterpart of IFilter.Init. /// /// Usage flags. Only IFILTER_INIT_CANON_PARAGRAPHS can be meaningfully /// honored by the XAML filter. /// array of Managed FULLPROPSPEC structs to restrict responses ///IFILTER_FLAGS_NONE, meaning the caller should not try to retrieve OLE property using /// IPropertyStorage on the Xaml part. ///Input parameters are ignored because this filter never returns any property value. public IFILTER_FLAGS Init( IFILTER_INIT grfFlags, // IFILTER_INIT value ManagedFullPropSpec[] aAttributes) // restrict responses to the specified attributes { // // Content is filtered either if no attributes are specified, // or if there are attributes specified, the attribute with PSGUID_STORAGE // property set and PID_STG_CONTENTS property id is present. // _filterContents = true; if (aAttributes != null && aAttributes.Length > 0) { _filterContents = false; for (int i = 0; i < aAttributes.Length; i++) { if (aAttributes[i].Guid == IndexingFilterMarshaler.PSGUID_STORAGE && aAttributes[i].Property.PropType == PropSpecType.Id && aAttributes[i].Property.PropId == (uint)MS.Internal.Interop.PID_STG.CONTENTS) { _filterContents = true; break; } } } // The only flag in grfFlags that makes sense to honor is IFILTER_INIT_CANON_PARAGRAPHS _returnCanonicalParagraphBreaks = ((grfFlags & IFILTER_INIT.IFILTER_INIT_CANON_PARAGRAPHS) != 0); // Return zero value to indicate that the client code should not take any special steps // to retrieve OLE properties. This might have to change if filtering loose Xaml is supported. return IFILTER_FLAGS.IFILTER_FLAGS_NONE; } ////// Managed counterpart of IFilter.GetChunk. /// ////// Chunk descriptor. /// ////// On end of stream, this function will return null. /// public ManagedChunk GetChunk() { if (!_filterContents) { // Contents not being filtered, no chunks to return in that case. _currentContent = null; // End of chunks. return null; } IndexingContentUnit contentUnit; // If client code forgot to load the stream, throw appropriate exception. if (_xamlReader == null) { throw new COMException(SR.Get(SRID.FilterGetChunkNoStream), (int)FilterErrorCode.FILTER_E_ACCESS); } // If at end of chunks, report the condition. if (_filterState == FilterState.EndOfStream) { //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // End of chunks. return null; } try { contentUnit = NextContentUnit(); } catch (XmlException xmlException) { //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // Return FILTER_E_UNKNOWNFORMAT for ill-formed documents. throw new COMException(xmlException.Message, (int)FilterErrorCode.FILTER_E_UNKNOWNFORMAT); } if (contentUnit == null) { // Update text information. _currentContent = null; //Ensure _xamlReader has been closed EnsureXmlReaderIsClosed(); // Report end of stream by indicating end of chunks. return null; } // Store the text for returning in GetText. _currentContent = contentUnit.Text; // Record the fact that GetText hasn't been called on this chunk. _countOfCharactersReturned = 0; return contentUnit; } ////// Return a maximum of bufferCharacterCount characters (*not* bytes) from the current content unit. /// public String GetText(int bufferCharacterCount) { //BufferCharacterCount should be non-negative Debug.Assert(bufferCharacterCount >= 0); if (_currentContent == null) { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_TEXT); } int numCharactersToReturn = _currentContent.Length - _countOfCharactersReturned; if (numCharactersToReturn <= 0) { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_MORE_TEXT); } // Return at most bufferCharacterCount characters. The marshaler makes sure it can add a terminating // NULL beyond the end of the string that is returned. if (numCharactersToReturn > bufferCharacterCount) { numCharactersToReturn = bufferCharacterCount; } String result = _currentContent.Substring(_countOfCharactersReturned, numCharactersToReturn); _countOfCharactersReturned += numCharactersToReturn; return result; } ////// The XAML indexing filter never returns property values. /// public Object GetValue() { SecurityHelper.ThrowExceptionForHR((int)FilterErrorCode.FILTER_E_NO_VALUES); return null; } #endregion Managed IFilter API #region Internal Methods #if DEBUG internal string DumpElementTable() { ICollection keys = _xamlElementContentDescriptorDictionary.Keys; ICollection values = _xamlElementContentDescriptorDictionary.Values; int length = keys.Count; ElementTableKey[] keyList = new ElementTableKey[length]; ContentDescriptor[] valueList = new ContentDescriptor[length]; keys.CopyTo(keyList, 0); values.CopyTo(valueList,0); string result = ""; for (int i = 0; i < length; ++i) { result += string.Format("{0}: [{1} -> {2}]\n", i, keyList[i], valueList[i]); } return result; } #endif ///Return the next text chunk, or null at end of stream. internal IndexingContentUnit NextContentUnit() { // Loop until we are able to return some content or encounter an end of file. IndexingContentUnit nextContentUnit = null; while (nextContentUnit == null) { // If we have a content extractor delivering content units for us, use it. if (_filterState == FilterState.UseContentExtractor) { Debug.Assert(_fixedPageContentExtractor != null); // If we've consumed all the glyph run info, switch to a mode in which only the flow content // of the fixed page just scanned will be returned. if (_fixedPageContentExtractor.AtEndOfPage) { // Discard extractor. _fixedPageContentExtractor = null; // Set up reader. _topLevelReader = _xamlReader; _xamlReader = new XmlNodeReader(_fixedPageDomTree.DocumentElement); // Transition to flow-only mode. _filterState = FilterState.FindNextFlowUnit; } else { bool chunkIsInline; uint lcid; string chunk = _fixedPageContentExtractor.NextGlyphContent(out chunkIsInline, out lcid); _expectingBlockStart = !chunkIsInline; return BuildIndexingContentUnit(chunk, lcid); } } if (_xamlReader.EOF) { switch (_filterState) { // If in standard mode, return a null chunk to signal the end of all chunks. case FilterState.FindNextUnit: // A non-empty stack at this point could only be attributable to an internal error, // for an early EOF would have been reported as an XML exception by the XML reader. Debug.Assert(_contextStack.Count == 0); _filterState = FilterState.EndOfStream; return null; // If processing a fixed page, revert to top-level XML reader. case FilterState.FindNextFlowUnit: Debug.Assert(_topLevelReader != null); _xamlReader.Close(); _xamlReader = _topLevelReader; _filterState = FilterState.FindNextUnit; break; default: Debug.Assert(false); break; } } switch (_xamlReader.NodeType) { // If current token is a text element, // if it can be part of its parent's content, return a chunk; // else, skip. case XmlNodeType.Text: case XmlNodeType.SignificantWhitespace: case XmlNodeType.CDATA: nextContentUnit = HandleTextData(); continue; // If current token is an element start, then, // if appropriate, extract chunk text from an attribute // else, record content information and recurse. case XmlNodeType.Element: nextContentUnit = HandleElementStart(); continue; // On end of element, restore context data (pop, etc.) and look further. case XmlNodeType.EndElement: nextContentUnit = HandleElementEnd(); continue; // Default action is to ignore current token and look further. // Note that non-significant whitespace is handled here. default: _xamlReader.Read(); // Consume current token. continue; } } return nextContentUnit; } ////// Load a hash table to map qualified element names to content descriptors. /// private void LoadContentDescriptorDictionary() { // Invoke init function that is generated at build time. InitElementDictionary(); } #endregion Internal Methods #region Private Methods ///Ancillary function of NextContentUnit(). Create new chunk, taking _contextStack into /// account, and updating it if needed. private IndexingContentUnit BuildIndexingContentUnit(string text, uint lcid) { CHUNK_BREAKTYPE breakType = CHUNK_BREAKTYPE.CHUNK_NO_BREAK; // If a paragraph break is expected, reflect this in the new chunk. if (_expectingBlockStart) { breakType = CHUNK_BREAKTYPE.CHUNK_EOP; if (_returnCanonicalParagraphBreaks) text = _paragraphSeparator + text; } if (_indexingContentUnit == null) { _indexingContentUnit = new IndexingContentUnit(text, AllocateChunkID(), breakType, _propSpec, lcid); } else { // Optimization: reuse indexing content unit. _indexingContentUnit.InitIndexingContentUnit(text, AllocateChunkID(), breakType, _propSpec, lcid); } // Until proven separated (by the occurrence of a block tag), right neighbors are contiguous. _expectingBlockStart = false; return _indexingContentUnit; } ///Obtain a content descriptor for a custom element not found in the dictionary. ////// There is currently no general way of extracting information about custom elements, /// so the default descriptor is systematically returned. /// private ContentDescriptor GetContentInformationAboutCustomElement(ElementTableKey customElement) { return _defaultContentDescriptor; } ////// If current token is a text element, /// assume it can be part of its parent's content and return a chunk. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleTextData() { ContentDescriptor topOfStack = TopOfStack(); if (topOfStack != null) { // The descendants of elements with HasIndexableContent set to false get skipped. Debug.Assert(topOfStack.HasIndexableContent); // Return a chunk with appropriate block-break information. IndexingContentUnit result = BuildIndexingContentUnit(_xamlReader.Value, GetCurrentLcid()); _xamlReader.Read(); // Move past data just processed. return result; } else { // Bad Xaml (no top-level element). The Xaml filter should at some point raise an exception. // Just to be safe, ignore all content when in this state. _xamlReader.Read(); // Skip data. return null; } } ////// If current token is an element start, then, /// if appropriate, extract chunk text from an attribute /// else, record content information and recurse. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleElementStart() { ElementTableKey elementFullName = new ElementTableKey(_xamlReader.NamespaceURI, _xamlReader.LocalName); string propertyName; // Handle the case of a complex property (e.g. Button.Content). if (IsPrefixedPropertyName(elementFullName.BaseName, out propertyName)) { ContentDescriptor topOfStack = TopOfStack(); // Handle the semantically incorrect case of a compound property occurring at the root // by ignoring it totally. if (topOfStack == null) { SkipCurrentElement(); return null; } // Index the text children of property elements only if they are content or title properties. bool elementIsIndexable = ( elementFullName.XmlNamespace.Equals(ElementTableKey.XamlNamespace, StringComparison.Ordinal) && ( propertyName == topOfStack.ContentProp || propertyName == topOfStack.TitleProp )); if (!elementIsIndexable) { // Skip element together with all its descendants. SkipCurrentElement(); return null; } // Push descriptor, advance reader, and have caller look further. Push( new ContentDescriptor( elementIsIndexable, TopOfStack().IsInline, String.Empty, // has potential text content, but no content property null)); // no title property _xamlReader.Read(); return null; } // Handle fixed-format markup in a special way (because assumptions for building // content descriptors don't work for these and they require actions beyond what // is stated in content descriptors). // Note: The elementFullyHandled boolean is required as the nextUnit returned can // be null in both cases - when element is fully handled and when its not. bool elementFullyHandled; IndexingContentUnit nextUnit = HandleFixedFormatTag(elementFullName, out elementFullyHandled); if (elementFullyHandled) return nextUnit; else { // When HandleFixedFormatTag declines to handle a tag because it is not fixed-format, it // will return null. Invariant.Assert(nextUnit == null); } // Obtain a content descriptor for the current element. ContentDescriptor elementDescriptor = (ContentDescriptor) _xamlElementContentDescriptorDictionary[elementFullName]; if (elementDescriptor == null) { if (elementFullName.XmlNamespace.Equals(ElementTableKey.XamlNamespace, StringComparison.Ordinal)) { elementDescriptor = _defaultContentDescriptor; } else if (elementFullName.XmlNamespace.Equals(_inDocumentCodeURI, StringComparison.Ordinal)) { elementDescriptor = _nonIndexableElementDescriptor; } else { elementDescriptor = GetContentInformationAboutCustomElement(elementFullName); } _xamlElementContentDescriptorDictionary.Add(elementFullName, elementDescriptor); } // If the element has no indexable content, skip all its descendants. if (!elementDescriptor.HasIndexableContent) { SkipCurrentElement(); return null; } // If appropriate, retrieve title from an attribute. string title = null; if ( elementDescriptor.TitleProp != null && (_attributesToIgnore & AttributesToIgnore.Title) == 0 ) { title = GetPropertyAsAttribute(elementDescriptor.TitleProp); if (title != null && title.Length > 0) { // Leave the reader in its present state, but return the title as a block chunk, // and mark this attribute as processed. _attributesToIgnore |= AttributesToIgnore.Title; _expectingBlockStart = true; IndexingContentUnit titleContent = BuildIndexingContentUnit(title, GetCurrentLcid()); _expectingBlockStart = true; // Simulate a stack pop for a block element. return titleContent; } } // If appropriate, retrieve content from an attribute. string content = null; if ( elementDescriptor.ContentProp != null && (_attributesToIgnore & AttributesToIgnore.Content) == 0 ) { content = GetPropertyAsAttribute(elementDescriptor.ContentProp); if (content != null && content.Length > 0) { // Leave the reader in its present state, but mark the content attribute // as processed. _attributesToIgnore |= AttributesToIgnore.Content; // Create a new chunk with appropriate break data. if (!elementDescriptor.IsInline) { _expectingBlockStart = true; } IndexingContentUnit result = BuildIndexingContentUnit(content, GetCurrentLcid()); // Emulate a stack pop for the content attribute (which never gets pushed on the stack). _expectingBlockStart = !elementDescriptor.IsInline; return result; } } // Reset the attribute flag, since we are going to change the reader's state. _attributesToIgnore = AttributesToIgnore.None; // Handle the special case of an empty element: no descendants, but a possible paragraph break. if (_xamlReader.IsEmptyElement) { if (!elementDescriptor.IsInline) _expectingBlockStart = true; // Have caller search for content past the tag. _xamlReader.Read(); return null; } // Have caller look for content in descendants. Push(elementDescriptor); _xamlReader.Read(); // skip start-tag return null; } ////// On end of element, restore context data (pop, etc.) and look further. /// ////// Ancillary function of NextContentUnit. /// private IndexingContentUnit HandleElementEnd() { // Pop current descriptor. ContentDescriptor item = Pop(); // Consume end-tag. _xamlReader.Read(); return null; } ////// If the current tag is one of Glyphs, FixedPage or PageContent, process it adequately /// and return the next content unit or null (if not supposed to return content from fixed format). /// Otherwise, set 'handled' to false to tell the caller we didn't do anything useful. /// private IndexingContentUnit HandleFixedFormatTag(ElementTableKey elementFullName, out bool handled) { handled = true; // Not true until we return, but this is the most convenient default. if (!elementFullName.XmlNamespace.Equals(ElementTableKey.FixedMarkupNamespace, StringComparison.Ordinal)) { handled = false; // Let caller handle that tag. return null; } if (String.CompareOrdinal(elementFullName.BaseName, _glyphRunName) == 0) { // Ignore glyph runs during flow pass over a FixedPage. if (_filterState == FilterState.FindNextFlowUnit) { SkipCurrentElement(); return null; } else { return ProcessGlyphRun(); } } if (String.CompareOrdinal(elementFullName.BaseName, _fixedPageName) == 0) { // Ignore FixedPage element (i.e. root element) during flow pass over a fixed page. if (_filterState == FilterState.FindNextFlowUnit) { Push(_defaultContentDescriptor); _xamlReader.Read(); return null; } else { return ProcessFixedPage(); } } if (String.CompareOrdinal(elementFullName.BaseName, _pageContentName) == 0) { // If the element has a Source attribute, any inlined content should be ignored. string sourceUri = _xamlReader.GetAttribute(_pageContentSourceAttribute); if (sourceUri != null) { SkipCurrentElement(); return null; } else { // Have NextContentUnit() look for content in descendants. Push( _defaultContentDescriptor); _xamlReader.Read(); return null; } } // No useful work was done. Report 'unhandled'. handled = false; return null; } ////// Handle the presence of a glyph run in the middle of flow markup by extracting /// its UnicodeString attribute and considering it a separate paragraph. /// ////// The handling of glyph runs inside fixed pages is performed in ProcessFixedPage. /// private IndexingContentUnit ProcessGlyphRun() { Debug.Assert(_xamlReader != null); string textContent = _xamlReader.GetAttribute(_unicodeStringAttribute); if (textContent == null || textContent.Length == 0) { SkipCurrentElement(); return null; } _expectingBlockStart = true; // Read Lcid at current position and advance reader to next element before returning. uint lcid = GetCurrentLcid(); SkipCurrentElement(); return BuildIndexingContentUnit(textContent, lcid); } ////// Load FixedPage element into a DOM tree to initialize a FixedPageContentExtractor. /// The content extractor will then be used to incrementally return the content of the fixed page. /// private IndexingContentUnit ProcessFixedPage() { // Reader is positioned on the start-tag for a FixedPage element. Debug.Assert(String.CompareOrdinal(_xamlReader.LocalName, _fixedPageName) == 0); // A FixedPage nested in a FixedPage is invalid. // XmlException gets handled inside this class (see GetChunk). if (_filterState == FilterState.FindNextFlowUnit) { throw new XmlException(SR.Get(SRID.XamlFilterNestedFixedPage)); } // Create a DOM for the current FixedPage. string fixedPageMarkup = _xamlReader.ReadOuterXml(); XmlDocument fixedPageTree = new XmlDocument(); fixedPageTree.LoadXml(fixedPageMarkup); // Preserve the current language ID if (_xamlReader.XmlLang.Length > 0) { fixedPageTree.DocumentElement.SetAttribute(_xmlLangAttribute, _xamlReader.XmlLang); } // Initialize a content extractor with this DOM tree. _fixedPageContentExtractor = new FixedPageContentExtractor(fixedPageTree.DocumentElement); // Save the DOM (to search for flow elements in it once the extractor is done) // and switch to extractor mode. _fixedPageDomTree = fixedPageTree; _filterState = FilterState.UseContentExtractor; // Have NextContentUnit look for the appropriate unit in the new mode. return null; } ////// Create an XmlTextReader on _xamlStream with the appropriate settings. /// private void CreateXmlReader() { if (_xamlReader != null) { _xamlReader.Close(); } _xamlReader = new XmlTextReader(_xamlStream); // Do not return pretty-pretting spacing between tags as data. ((XmlTextReader)_xamlReader).WhitespaceHandling = WhitespaceHandling.Significant; // Initialize reader state. _attributesToIgnore = AttributesToIgnore.None; // not in the middle of processing a start-tag } private void EnsureXmlReaderIsClosed() { if (_xamlReader != null) { _xamlReader.Close(); } } ////// Return the LCID in scope for the current node or, if there is none, /// the system's default LCID. /// Note: XmlGlyphRunInfo.LanguageID is an internal property that also has /// similar logic and will default to CultureInfo.InvariantCulture.LCID /// CultureInfo.InvariantCulture will never be null /// private uint GetCurrentLcid() { string languageString = GetLanguageString(); if (languageString.Length == 0) return (uint)CultureInfo.InvariantCulture.LCID; else if (_lcidDictionary.ContainsKey(languageString)) return _lcidDictionary[languageString]; else { CultureInfo cultureInfo = new CultureInfo(languageString); _lcidDictionary.Add(languageString, (uint)cultureInfo.LCID); return (uint)cultureInfo.LCID; } } private string GetLanguageString() { string languageString = _xamlReader.XmlLang; if (languageString.Length == 0) { // Check whether there is a parent XAML reader. if (_topLevelReader != null) { languageString = _topLevelReader.XmlLang; } } return languageString; } private void SkipCurrentElement() { _xamlReader.Skip(); } private bool IsPrefixedPropertyName(string name, out string propertyName) { int suffixStart = name.IndexOf('.'); if (suffixStart == -1) { propertyName = null; return false; } propertyName = name.Substring(suffixStart + 1); return true; } ////// 0 is an illegal value, so this function never returns 0. /// After the counter reaches UInt32.MaxValue we assert, since such a /// high number for chunks is most likely an indicator of some other /// problem in the system/code. /// private uint AllocateChunkID() { Invariant.Assert(_currentChunkID <= UInt32.MaxValue); ++_currentChunkID; return _currentChunkID; } ////// Find an attribute named propertyName or X.propertyName. /// private string GetPropertyAsAttribute(string propertyName) { string value = _xamlReader.GetAttribute(propertyName); if (value != null) { return value; } bool attributeFound = _xamlReader.MoveToFirstAttribute(); while (attributeFound) { string attributePropertyName; if ( IsPrefixedPropertyName(_xamlReader.LocalName, out attributePropertyName) && attributePropertyName.Equals(propertyName, StringComparison.Ordinal)) { value = _xamlReader.Value; break; } // Advance reader. attributeFound = _xamlReader.MoveToNextAttribute(); } // Reposition reader on owner element. _xamlReader.MoveToElement(); return value; } #region Context Stack Accessors private ContentDescriptor TopOfStack() { return (ContentDescriptor) _contextStack.Peek(); } private void Push(ContentDescriptor contentDescriptor) { if (!contentDescriptor.IsInline) { _expectingBlockStart = true; } _contextStack.Push(contentDescriptor); } private ContentDescriptor Pop() { ContentDescriptor topOfStack = (ContentDescriptor) _contextStack.Pop(); // If we reach an end of block, we expect the next item to // start with a block separator. if (!topOfStack.IsInline) { _expectingBlockStart = true; } return topOfStack; } private void ClearStack() { _contextStack.Clear(); } #endregion Context Stack Accessors #endregion Private Methods #region Private Constants ///XML namespace URI for in-document code. private const string _inDocumentCodeURI = "http://schemas.microsoft.com/winfx/2006/xaml"; // Tag and attribute names. private const string _pageContentName = "PageContent"; private const string _glyphRunName = "Glyphs"; private const string _pageContentSourceAttribute = "Source"; private const string _fixedPageName = "FixedPage"; private const string _xmlLangAttribute = "xml:lang"; private const string _paragraphSeparator = "\u2029"; private const string _unicodeStringAttribute = "UnicodeString"; ////// The default content descriptor has content in child nodes, no title, and block-type content. /// private readonly ContentDescriptor _defaultContentDescriptor = new ContentDescriptor(true /*hasIndexableContent*/, false /*isInline*/, null, null); private readonly ContentDescriptor _nonIndexableElementDescriptor = new ContentDescriptor(false); // Static fields. private static readonly ManagedFullPropSpec _propSpec = new ManagedFullPropSpec(IndexingFilterMarshaler.PSGUID_STORAGE, (uint)MS.Internal.Interop.PID_STG.CONTENTS); #endregion Private Constants #region Private Fields // Variables initialized in constructor and InitializeDeclaredFields. private Stack _contextStack; private FilterState _filterState; private string _currentContent; private uint _currentChunkID; private int _countOfCharactersReturned; private IndexingContentUnit _indexingContentUnit; private bool _expectingBlockStart; private XmlReader _topLevelReader; private FixedPageContentExtractor _fixedPageContentExtractor; private XmlDocument _fixedPageDomTree; // Variables initialized in constructor and (potentially, if implemented some day) in IPersistFile.Load. private Stream _xamlStream; // Variables initialized in Init. private bool _filterContents; //defaults to false private bool _returnCanonicalParagraphBreaks; //defaults to false // Reader state variables (initialized in CreateXmlReader). private XmlReader _xamlReader; private AttributesToIgnore _attributesToIgnore; ///Map from fully qualified element name to content location information. private Hashtable _xamlElementContentDescriptorDictionary; //Dictionary of Language strings and the corresponding LCID. private Dictionary_lcidDictionary; #endregion Private Fields } // class XamlFilter #endregion XamlFilter } // namespace MS.Internal.IO.Packaging // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // Copyright (c) Microsoft Corporation. All rights reserved.
Link Menu
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- ToolboxBitmapAttribute.cs
- ShutDownListener.cs
- NetNamedPipeSecurityMode.cs
- AlternateView.cs
- InstalledFontCollection.cs
- figurelength.cs
- SwitchElementsCollection.cs
- FusionWrap.cs
- SessionPageStateSection.cs
- SimpleRecyclingCache.cs
- KeyValuePairs.cs
- AsyncDataRequest.cs
- TextServicesHost.cs
- URLMembershipCondition.cs
- XmlTypeAttribute.cs
- TypeElementCollection.cs
- StaticTextPointer.cs
- ObjectDataSourceStatusEventArgs.cs
- RoleGroupCollectionEditor.cs
- UInt16.cs
- HandleExceptionArgs.cs
- RequestQueue.cs
- contentDescriptor.cs
- NullRuntimeConfig.cs
- AbstractDataSvcMapFileLoader.cs
- WebPartPersonalization.cs
- OdbcStatementHandle.cs
- CodeVariableDeclarationStatement.cs
- COM2PropertyDescriptor.cs
- WizardStepBase.cs
- OleDbConnectionFactory.cs
- UrlMappingsSection.cs
- AttributeSetAction.cs
- ApplicationActivator.cs
- SQLResource.cs
- DesignerForm.cs
- WebServiceReceiveDesigner.cs
- QilTargetType.cs
- MessageContractImporter.cs
- SystemUdpStatistics.cs
- SocketElement.cs
- ResourcesGenerator.cs
- ObjectQuery_EntitySqlExtensions.cs
- SubpageParagraph.cs
- FtpRequestCacheValidator.cs
- NavigationPropertySingletonExpression.cs
- ThreadStaticAttribute.cs
- ExpressionSelection.cs
- PauseStoryboard.cs
- LinkUtilities.cs
- EmbeddedMailObjectsCollection.cs
- ComponentChangedEvent.cs
- PostBackTrigger.cs
- base64Transforms.cs
- AutomationTextAttribute.cs
- DetailsViewAutoFormat.cs
- XmlSchemaException.cs
- ObjectReaderCompiler.cs
- CodeAttributeArgumentCollection.cs
- WeakReferenceKey.cs
- SubstitutionResponseElement.cs
- TreeViewItemAutomationPeer.cs
- RelationalExpressions.cs
- ConfigXmlComment.cs
- EventData.cs
- Bitmap.cs
- InstalledVoice.cs
- InputManager.cs
- WebBrowser.cs
- XamlBrushSerializer.cs
- SafeTimerHandle.cs
- MetafileHeader.cs
- StaticSiteMapProvider.cs
- CommandConverter.cs
- DataGridViewCellMouseEventArgs.cs
- Point3D.cs
- MobileCapabilities.cs
- HitTestParameters3D.cs
- DesignTimeTemplateParser.cs
- XmlILStorageConverter.cs
- Visual3DCollection.cs
- ObjectDataProvider.cs
- ParserOptions.cs
- SQLMembershipProvider.cs
- TypedRowGenerator.cs
- Formatter.cs
- HtmlShimManager.cs
- PropertyValueChangedEvent.cs
- WebPartAddingEventArgs.cs
- GradientStop.cs
- PathStreamGeometryContext.cs
- RuntimeEnvironment.cs
- BitmapSource.cs
- Inline.cs
- HostingEnvironment.cs
- NonSerializedAttribute.cs
- TraversalRequest.cs
- MultipleViewPatternIdentifiers.cs
- Icon.cs
- SvcMapFileLoader.cs