diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 4e95fb8c..a71445ea 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) { } -char* XMLDocument::Identify( char* p, XMLNode** node ) +char* XMLDocument::Identify( char* p, XMLNode** node, bool first ) { TIXMLASSERT( node ); TIXMLASSERT( p ); @@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node ) p += dtdHeaderLen; } else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) { - returnNode = CreateUnlinkedNode( _elementPool ); - returnNode->_parseLineNum = _parseCurLineNum; - p += elementHeaderLen; + + // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag + if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') { + returnNode = CreateUnlinkedNode(_textPool); + returnNode->_parseLineNum = startLine; + p = start; // Back it up, all the text counts. + _parseCurLineNum = startLine; + } + else { + returnNode = CreateUnlinkedNode(_elementPool); + returnNode->_parseLineNum = _parseCurLineNum; + p += elementHeaderLen; + } } else { returnNode = CreateUnlinkedNode( _textPool ); @@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr ) if (_document->Error()) return 0; + bool first = true; while( p && *p ) { XMLNode* node = 0; - p = _document->Identify( p, &node ); + p = _document->Identify( p, &node, first ); TIXMLASSERT( p ); if ( node == 0 ) { break; } + first = false; const int initialLineNum = node->_parseLineNum; diff --git a/tinyxml2.h b/tinyxml2.h index da9a5a77..bab582c3 100755 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -1710,7 +1710,8 @@ class TINYXML2_LIB XMLElement : public XMLNode enum Whitespace { PRESERVE_WHITESPACE, - COLLAPSE_WHITESPACE + COLLAPSE_WHITESPACE, + PEDANTIC_WHITESPACE }; @@ -1921,7 +1922,7 @@ class TINYXML2_LIB XMLDocument : public XMLNode void DeepCopy(XMLDocument* target) const; // internal - char* Identify( char* p, XMLNode** node ); + char* Identify( char* p, XMLNode** node, bool first ); // internal void MarkInUse(const XMLNode* const); diff --git a/xmltest.cpp b/xmltest.cpp index c3ce079e..ae976042 100755 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv ) XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() ); } + // ----------- Preserve Whitespace ------------ + { + const char* xml = "This is ' \n\n text '"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " This \nis ' text ' "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n This is ' text ' \n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText()); + } + + // Following cases are for text that is all whitespace which are not preserved intentionally + { + const char* xml = " "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = "\n\n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + // ----------- Pedantic Whitespace ------------ + { + const char* xml = "This is ' \n\n text '"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " This \nis ' text ' "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n This is ' text ' \n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText()); + } + + // Following cases are for text that is all whitespace which is preserved with pedantic mode + { + const char* xml = " "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = "\n\n\n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText()); + } + + // Following cases are for checking nested elements are still parsed with pedantic whitespace + { + const char* xml = "\n\t This is nested text \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText()); + } + + { + const char* xml = " \n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText()); + } + + { + const char* xml = " \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText()); + } + + // Check sample xml can be parsed with pedantic mode + { + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.LoadFile("resources/dream.xml"); + XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error()); + + XMLTest("Dream", "xml version=\"1.0\"", + doc.FirstChild()->ToDeclaration()->Value()); + XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0); + XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"", + doc.FirstChild()->NextSibling()->ToUnknown()->Value()); + XMLTest("Dream", "And Robin shall restore amends.", + doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText()); + } + { // An assert should not fire. const char* xml = "";