diff --git a/engine/core/html/parser/HTMLEntityParser.cpp b/engine/core/html/parser/HTMLEntityParser.cpp index dc30c83715f4f72ba2b92a598ad4d987b51370c8..6d90746bbe62d43be202489f887aa0f010c27cc7 100644 --- a/engine/core/html/parser/HTMLEntityParser.cpp +++ b/engine/core/html/parser/HTMLEntityParser.cpp @@ -1,305 +1,171 @@ -/* - * Copyright (C) 2008 Apple Inc. All Rights Reserved. - * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ - * Copyright (C) 2010 Google, Inc. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. #include "config.h" #include "core/html/parser/HTMLEntityParser.h" -#include "core/html/parser/HTMLEntitySearch.h" -#include "core/html/parser/HTMLEntityTable.h" -#include "wtf/text/StringBuilder.h" +#include "wtf/unicode/CharacterNames.h" using namespace WTF; namespace blink { -static const UChar windowsLatin1ExtensionArray[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F -}; - -static bool isAlphaNumeric(UChar cc) -{ - return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z'); -} +static const UChar32 kInvalidUnicode = -1; -static UChar adjustEntity(UChar32 value) +static UChar asHexDigit(UChar cc) { - if ((value & ~0x1F) != 0x0080) - return value; - return windowsLatin1ExtensionArray[value - 0x80]; + if (cc >= '0' && cc <= '9') + return cc - '0'; + if (cc >= 'a' && cc <= 'f') + return 10 + cc - 'a'; + if (cc >= 'A' && cc <= 'F') + return 10 + cc - 'A'; + ASSERT_NOT_REACHED(); + return 0; } -static void appendLegalEntityFor(UChar32 c, DecodedHTMLEntity& decodedEntity) +static bool isAlphaNumeric(UChar cc) { - // FIXME: A number of specific entity values generate parse errors. - if (c <= 0 || c > 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF)) { - decodedEntity.append(0xFFFD); - return; - } - if (U_IS_BMP(c)) { - decodedEntity.append(adjustEntity(c)); - return; - } - decodedEntity.append(c); + return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z'); } -static const UChar32 kInvalidUnicode = -1; - static bool isHexDigit(UChar cc) { return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'); } -static UChar asHexDigit(UChar cc) +static UChar decodeEntity(HTMLEntityParser::OutputBuffer buffer) { - if (cc >= '0' && cc <= '9') - return cc - '0'; - if (cc >= 'a' && cc <= 'z') - return 10 + cc - 'a'; - if (cc >= 'A' && cc <= 'Z') - return 10 + cc - 'A'; - ASSERT_NOT_REACHED(); - return 0; + if (equalIgnoringNullity(buffer, "&")) + return '&'; + if (equalIgnoringNullity(buffer, "&apos")) + return '\''; + if (equalIgnoringNullity(buffer, ">")) + return '>'; + if (equalIgnoringNullity(buffer, "<")) + return '<'; + if (equalIgnoringNullity(buffer, """)) + return '"'; + return replacementCharacter; } -typedef Vector ConsumedCharacterBuffer; - -static void unconsumeCharacters(SegmentedString& source, ConsumedCharacterBuffer& consumedCharacters) +HTMLEntityParser::HTMLEntityParser() { - if (consumedCharacters.size() == 1) - source.push(consumedCharacters[0]); - else if (consumedCharacters.size() == 2) { - source.push(consumedCharacters[0]); - source.push(consumedCharacters[1]); - } else - source.prepend(SegmentedString(String(consumedCharacters))); } -static bool consumeNamedEntity(SegmentedString& source, DecodedHTMLEntity& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter, UChar& cc) +HTMLEntityParser::~HTMLEntityParser() { - ConsumedCharacterBuffer consumedCharacters; - HTMLEntitySearch entitySearch; - while (!source.isEmpty()) { - cc = source.currentChar(); - entitySearch.advance(cc); - if (!entitySearch.isEntityPrefix()) - break; - consumedCharacters.append(cc); - source.advanceAndASSERT(cc); - } - notEnoughCharacters = source.isEmpty(); - if (notEnoughCharacters) { - // We can't decide on an entity because there might be a longer entity - // that we could match if we had more data. - unconsumeCharacters(source, consumedCharacters); - return false; - } - if (!entitySearch.mostRecentMatch()) { - unconsumeCharacters(source, consumedCharacters); - return false; - } - if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) { - // We've consumed too many characters. We need to walk the - // source back to the point at which we had consumed an - // actual entity. - unconsumeCharacters(source, consumedCharacters); - consumedCharacters.clear(); - const HTMLEntityTableEntry* mostRecent = entitySearch.mostRecentMatch(); - const int length = mostRecent->length; - const LChar* reference = HTMLEntityTable::entityString(*mostRecent); - for (int i = 0; i < length; ++i) { - cc = source.currentChar(); - ASSERT_UNUSED(reference, cc == static_cast(*reference++)); - consumedCharacters.append(cc); - source.advanceAndASSERT(cc); - ASSERT(!source.isEmpty()); - } - cc = source.currentChar(); - } - if (entitySearch.mostRecentMatch()->lastCharacter() == ';' - || !additionalAllowedCharacter - || !(isAlphaNumeric(cc) || cc == '=')) { - decodedEntity.append(entitySearch.mostRecentMatch()->firstValue); - if (UChar32 second = entitySearch.mostRecentMatch()->secondValue) - decodedEntity.append(second); - return true; - } - unconsumeCharacters(source, consumedCharacters); - return false; } -bool consumeHTMLEntity(SegmentedString& source, DecodedHTMLEntity& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter) +void HTMLEntityParser::reset() { - ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>'); - ASSERT(!notEnoughCharacters); - ASSERT(decodedEntity.isEmpty()); - - enum EntityState { - Initial, - Number, - MaybeHexLowerCaseX, - MaybeHexUpperCaseX, - Hex, - Decimal, - Named - }; - EntityState entityState = Initial; - UChar32 result = 0; - ConsumedCharacterBuffer consumedCharacters; + m_state = Initial; + m_result = '\0'; + m_buffer.clear(); + m_buffer.append('&'); +} +bool HTMLEntityParser::parse(SegmentedString& source) +{ while (!source.isEmpty()) { UChar cc = source.currentChar(); - switch (entityState) { + switch (m_state) { case Initial: { - if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&') - return false; - if (additionalAllowedCharacter && cc == additionalAllowedCharacter) - return false; if (cc == '#') { - entityState = Number; + m_state = Numeric; break; } - if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { - entityState = Named; + if (isAlphaNumeric(cc)) { + m_state = Named; continue; } - return false; + return true; } - case Number: { - if (cc == 'x') { - entityState = MaybeHexLowerCaseX; - break; - } - if (cc == 'X') { - entityState = MaybeHexUpperCaseX; + case Numeric: { + if (cc == 'x' || cc == 'X') { + m_state = PossiblyHex; break; } if (cc >= '0' && cc <= '9') { - entityState = Decimal; + m_state = Decimal; continue; } - source.push('#'); - return false; + return true; } - case MaybeHexLowerCaseX: { + case PossiblyHex: { if (isHexDigit(cc)) { - entityState = Hex; + m_state = Hex; continue; } - source.push('#'); - source.push('x'); - return false; - } - case MaybeHexUpperCaseX: { - if (isHexDigit(cc)) { - entityState = Hex; - continue; - } - source.push('#'); - source.push('X'); - return false; + return true; } case Hex: { if (isHexDigit(cc)) { - if (result != kInvalidUnicode) - result = result * 16 + asHexDigit(cc); - } else if (cc == ';') { + if (m_result != kInvalidUnicode) + m_result = m_result * 16 + asHexDigit(cc); + break; + } + if (cc == ';') { source.advanceAndASSERT(cc); - appendLegalEntityFor(result, decodedEntity); - return true; - } else { - appendLegalEntityFor(result, decodedEntity); + finalizeNumericEntity(); return true; } - break; + return true; } case Decimal: { if (cc >= '0' && cc <= '9') { - if (result != kInvalidUnicode) - result = result * 10 + cc - '0'; - } else if (cc == ';') { + if (m_result != kInvalidUnicode) + m_result = m_result * 10 + cc - '0'; + break; + } + if (cc == ';') { source.advanceAndASSERT(cc); - appendLegalEntityFor(result, decodedEntity); - return true; - } else { - appendLegalEntityFor(result, decodedEntity); + finalizeNumericEntity(); return true; } - break; + return true; } case Named: { - return consumeNamedEntity(source, decodedEntity, notEnoughCharacters, additionalAllowedCharacter, cc); + if (isAlphaNumeric(cc)) + break; + if (cc == ';') { + source.advanceAndASSERT(cc); + finalizeNamedEntity(); + return true; + } + return true; } } - if (result > UCHAR_MAX_VALUE) - result = kInvalidUnicode; + if (m_result > UCHAR_MAX_VALUE) + m_result = kInvalidUnicode; - consumedCharacters.append(cc); + m_buffer.append(cc); source.advanceAndASSERT(cc); } ASSERT(source.isEmpty()); - notEnoughCharacters = true; - unconsumeCharacters(source, consumedCharacters); return false; } -static size_t appendUChar32ToUCharArray(UChar32 value, UChar* result) +void HTMLEntityParser::finalizeNumericEntity() { - if (U_IS_BMP(value)) { - UChar character = static_cast(value); - ASSERT(character == value); - result[0] = character; - return 1; + m_buffer.clear(); + if (m_result <= 0 || m_result > 0x10FFFF || (m_result >= 0xD800 && m_result <= 0xDFFF)) { + m_buffer.append(replacementCharacter); + } else if (U_IS_BMP(m_result)) { + m_buffer.append(m_result); + } else { + m_buffer.append(U16_LEAD(m_result)); + m_buffer.append(U16_TRAIL(m_result)); } - - result[0] = U16_LEAD(value); - result[1] = U16_TRAIL(value); - return 2; } -size_t decodeNamedEntityToUCharArray(const char* name, UChar result[4]) +void HTMLEntityParser::finalizeNamedEntity() { - HTMLEntitySearch search; - while (*name) { - search.advance(*name++); - if (!search.isEntityPrefix()) - return 0; - } - search.advance(';'); - if (!search.isEntityPrefix()) - return 0; - - size_t numberOfCodePoints = appendUChar32ToUCharArray(search.mostRecentMatch()->firstValue, result); - if (!search.mostRecentMatch()->secondValue) - return numberOfCodePoints; - return numberOfCodePoints + appendUChar32ToUCharArray(search.mostRecentMatch()->secondValue, result + numberOfCodePoints); + UChar decodedEntity = decodeEntity(m_buffer); + m_buffer.clear(); + m_buffer.append(decodedEntity); } } // namespace blink diff --git a/engine/core/html/parser/HTMLEntityParser.h b/engine/core/html/parser/HTMLEntityParser.h index 42df842aa564d2d9afc6bc43552e77dc6aa7b30d..e146c4ec6ff1e60f94cedc844a5db865f90df61e 100644 --- a/engine/core/html/parser/HTMLEntityParser.h +++ b/engine/core/html/parser/HTMLEntityParser.h @@ -31,40 +31,35 @@ namespace blink { -class DecodedHTMLEntity { -private: - // HTML entities contain at most four UTF-16 code units. - static const unsigned kMaxLength = 4; - +class HTMLEntityParser { public: - DecodedHTMLEntity() : length(0) { } + typedef Vector OutputBuffer; - bool isEmpty() const { return !length; } + HTMLEntityParser(); + ~HTMLEntityParser(); - void append(UChar c) - { - RELEASE_ASSERT(length < kMaxLength); - data[length++] = c; - } + void reset(); + bool parse(SegmentedString&); - void append(UChar32 c) - { - if (U_IS_BMP(c)) { - append(static_cast(c)); - return; - } - append(U16_LEAD(c)); - append(U16_TRAIL(c)); - } + const OutputBuffer& result() const { return m_buffer; } - unsigned length; - UChar data[kMaxLength]; -}; +private: + enum EntityState { + Initial, + Numeric, + PossiblyHex, + Hex, + Decimal, + Named + }; -bool consumeHTMLEntity(SegmentedString&, DecodedHTMLEntity& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0'); + void finalizeNumericEntity(); + void finalizeNamedEntity(); -// Used by the XML parser. Not suitable for use in HTML parsing. Use consumeHTMLEntity instead. -size_t decodeNamedEntityToUCharArray(const char*, UChar result[4]); + EntityState m_state; + UChar32 m_result; + OutputBuffer m_buffer; +}; } diff --git a/engine/core/html/parser/HTMLTokenizer.cpp b/engine/core/html/parser/HTMLTokenizer.cpp index 7e765b277ea71084fe31a34ad8f7308930a44c34..ca454ad3cd314b958735a106c6b87d0ed7cb5b85 100644 --- a/engine/core/html/parser/HTMLTokenizer.cpp +++ b/engine/core/html/parser/HTMLTokenizer.cpp @@ -111,24 +111,6 @@ void HTMLTokenizer::reset() { m_state = HTMLTokenizer::DataState; m_token = 0; - m_additionalAllowedCharacter = '\0'; -} - -inline bool HTMLTokenizer::processEntity(SegmentedString& source) -{ - bool notEnoughCharacters = false; - DecodedHTMLEntity decodedEntity; - bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); - if (notEnoughCharacters) - return false; - if (!success) { - ASSERT(decodedEntity.isEmpty()); - bufferCharacter('&'); - } else { - for (unsigned i = 0; i < decodedEntity.length; ++i) - bufferCharacter(decodedEntity.data[i]); - } - return true; } bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) @@ -146,7 +128,7 @@ bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) #define FLUSH_AND_ADVANCE_TO(stateName) \ do { \ - m_state = HTMLTokenizer::stateName; \ + m_state = HTMLTokenizer::stateName; \ if (flushBufferedEndTag(source)) \ return true; \ if (source.isEmpty() \ @@ -190,9 +172,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 switch (m_state) { HTML_BEGIN_STATE(DataState) { - if (cc == '&') + if (cc == '&') { + m_returnState = DataState; + m_entityParser.reset(); HTML_ADVANCE_TO(CharacterReferenceInDataState); - else if (cc == '<') { + } else if (cc == '<') { if (m_token->type() == HTMLToken::Character) { // We have a bunch of character tokens queued up that we // are emitting lazily here. @@ -209,12 +193,34 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) END_STATE() HTML_BEGIN_STATE(CharacterReferenceInDataState) { - if (!processEntity(source)) + if (!m_entityParser.parse(source)) return haveBufferedCharacterToken(); + for (const UChar& entityCharacter : m_entityParser.result()) + bufferCharacter(entityCharacter); + cc = m_inputStreamPreprocessor.nextInputCharacter(); + ASSERT(m_returnState == m_returnState); HTML_SWITCH_TO(DataState); } END_STATE() + HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { + if (!m_entityParser.parse(source)) + return haveBufferedCharacterToken(); + for (const UChar& entityCharacter : m_entityParser.result()) + m_token->appendToAttributeValue(entityCharacter); + cc = m_inputStreamPreprocessor.nextInputCharacter(); + + if (m_returnState == AttributeValueDoubleQuotedState) + HTML_SWITCH_TO(AttributeValueDoubleQuotedState); + else if (m_returnState == AttributeValueSingleQuotedState) + HTML_SWITCH_TO(AttributeValueSingleQuotedState); + else if (m_returnState == AttributeValueUnquotedState) + HTML_SWITCH_TO(AttributeValueUnquotedState); + else + ASSERT_NOT_REACHED(); + } + END_STATE() + HTML_BEGIN_STATE(RAWTEXTState) { if (cc == '<') HTML_ADVANCE_TO(RAWTEXTLessThanSignState); @@ -477,7 +483,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->endAttributeValue(source.numberOfCharactersConsumed()); HTML_ADVANCE_TO(AfterAttributeValueQuotedState); } else if (cc == '&') { - m_additionalAllowedCharacter = '"'; + m_returnState = AttributeValueDoubleQuotedState; + m_entityParser.reset(); HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); } else if (cc == kEndOfFileMarker) { parseError(); @@ -495,7 +502,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->endAttributeValue(source.numberOfCharactersConsumed()); HTML_ADVANCE_TO(AfterAttributeValueQuotedState); } else if (cc == '&') { - m_additionalAllowedCharacter = '\''; + m_returnState = AttributeValueSingleQuotedState; + m_entityParser.reset(); HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); } else if (cc == kEndOfFileMarker) { parseError(); @@ -513,7 +521,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) m_token->endAttributeValue(source.numberOfCharactersConsumed()); HTML_ADVANCE_TO(BeforeAttributeNameState); } else if (cc == '&') { - m_additionalAllowedCharacter = '>'; + m_returnState = AttributeValueUnquotedState; + m_entityParser.reset(); HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); } else if (cc == '>') { m_token->endAttributeValue(source.numberOfCharactersConsumed()); @@ -531,34 +540,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) } END_STATE() - HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { - bool notEnoughCharacters = false; - DecodedHTMLEntity decodedEntity; - bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter); - if (notEnoughCharacters) - return haveBufferedCharacterToken(); - if (!success) { - ASSERT(decodedEntity.isEmpty()); - m_token->appendToAttributeValue('&'); - } else { - for (unsigned i = 0; i < decodedEntity.length; ++i) - m_token->appendToAttributeValue(decodedEntity.data[i]); - } - // We're supposed to switch back to the attribute value state that - // we were in when we were switched into this state. Rather than - // keeping track of this explictly, we observe that the previous - // state can be determined by m_additionalAllowedCharacter. - if (m_additionalAllowedCharacter == '"') - HTML_SWITCH_TO(AttributeValueDoubleQuotedState); - else if (m_additionalAllowedCharacter == '\'') - HTML_SWITCH_TO(AttributeValueSingleQuotedState); - else if (m_additionalAllowedCharacter == '>') - HTML_SWITCH_TO(AttributeValueUnquotedState); - else - ASSERT_NOT_REACHED(); - } - END_STATE() - HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { if (isTokenizerWhitespace(cc)) HTML_ADVANCE_TO(BeforeAttributeNameState); diff --git a/engine/core/html/parser/HTMLTokenizer.h b/engine/core/html/parser/HTMLTokenizer.h index 302b6482e1c2b58500599708fd350a23f3fb2b14..a22cf50df65ecf530b9d1f0b6ea2b889945ebcdc 100644 --- a/engine/core/html/parser/HTMLTokenizer.h +++ b/engine/core/html/parser/HTMLTokenizer.h @@ -27,6 +27,7 @@ #ifndef HTMLTokenizer_h #define HTMLTokenizer_h +#include "core/html/parser/HTMLEntityParser.h" #include "core/html/parser/HTMLToken.h" #include "core/html/parser/InputStreamPreprocessor.h" #include "platform/text/SegmentedString.h" @@ -45,6 +46,7 @@ public: enum State { DataState, CharacterReferenceInDataState, + CharacterReferenceInAttributeValueState, RAWTEXTState, TagOpenState, EndTagOpenState, @@ -59,7 +61,6 @@ public: AttributeValueDoubleQuotedState, AttributeValueSingleQuotedState, AttributeValueUnquotedState, - CharacterReferenceInAttributeValueState, AfterAttributeValueQuotedState, SelfClosingStartTagState, BogusCommentState, @@ -87,8 +88,6 @@ public: private: HTMLTokenizer(); - inline bool processEntity(SegmentedString&); - inline void parseError(); inline void bufferCharacter(UChar character) @@ -156,11 +155,11 @@ private: // this member might be pointing to unallocated memory. HTMLToken* m_token; - // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character - UChar m_additionalAllowedCharacter; + State m_returnState; // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream InputStreamPreprocessor m_inputStreamPreprocessor; + HTMLEntityParser m_entityParser; Vector m_appropriateEndTagName; diff --git a/tests/parser/entity-expected.txt b/tests/parser/entity-expected.txt new file mode 100644 index 0000000000000000000000000000000000000000..b13a39b09cc8599df8184b7bf421fb47e9306452 --- /dev/null +++ b/tests/parser/entity-expected.txt @@ -0,0 +1,19 @@ + + + +
aaa'bbb
+
<
+
aaa�bbb
+
A
+
&
+
&#
+
&#x
+
&#x41
+
&;
+
&#;
+
&#x;
+
A
+
+ + + diff --git a/tests/parser/entity.html b/tests/parser/entity.html new file mode 100644 index 0000000000000000000000000000000000000000..0abc6a965c9d87f1c4a7d12f9bddb084d545519e --- /dev/null +++ b/tests/parser/entity.html @@ -0,0 +1,18 @@ + + + +
aaa'bbb
+
<
+
aaa&xxx;bbb
+
A
+
&
+
&#
+
&#x
+
A
+
&;
+
&#;
+
&#x;
+
A
+
+ + diff --git a/tests/resources/dump-as-markup.html b/tests/resources/dump-as-markup.html new file mode 100644 index 0000000000000000000000000000000000000000..81e849faf859a051d12a3109ec9bd336fa9924cc --- /dev/null +++ b/tests/resources/dump-as-markup.html @@ -0,0 +1,5 @@ +