From ff48ea1baeafa82835bc211c261e0b7979080248 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Mon, 9 Dec 2024 21:17:33 -0800 Subject: [PATCH] improve fix --- tinyxml2.cpp | 129 +++++++++++++++++++++++---------------------------- 1 file changed, 57 insertions(+), 72 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index ad4f62ca..b5e85aad 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -467,104 +467,89 @@ void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length } -const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length ) +const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length) { - // Presume an entity, and pull it out. + // Assume an entity, and pull it out. *length = 0; - static const uint32_t MAX_CODE_POINT = 0x10FFFF; + static const uint32_t MAX_CODE_POINT = 0x10FFFF; - if ( *(p+1) == '#' && *(p+2) ) { + if (*(p + 1) == '#' && *(p + 2)) { uint32_t ucs = 0; ptrdiff_t delta = 0; uint32_t mult = 1; static const char SEMICOLON = ';'; - if ( *(p+2) == 'x' ) { + bool hex = false; + uint32_t radix = 10; + const char* q = 0; + char terminator = '#'; + + if (*(p + 2) == 'x') { // Hexadecimal. - const char* q = p+3; - if ( !(*q) ) { - return 0; - } + hex = true; + radix = 16; + terminator = 'x'; - q = strchr( q, SEMICOLON ); - if ( !q ) { - return 0; - } - TIXMLASSERT( *q == SEMICOLON ); + q = p + 3; + } + else { + // Decimal. + q = p + 2; + } + if (!(*q)) { + return 0; + } - delta = q-p; - --q; + q = strchr(q, SEMICOLON); + if (!q) { + return 0; + } + TIXMLASSERT(*q == SEMICOLON); - while ( *q != 'x' ) { - uint32_t digit = 0; + delta = q - p; + --q; - if ( *q >= '0' && *q <= '9' ) { - digit = *q - '0'; - } - else if ( *q >= 'a' && *q <= 'f' ) { - digit = *q - 'a' + 10; - } - else if ( *q >= 'A' && *q <= 'F' ) { - digit = *q - 'A' + 10; - } - else { - return 0; - } - TIXMLASSERT( digit < 16 ); - const unsigned int digitScaled = mult * digit; - ucs += digitScaled; - if (ucs > MAX_CODE_POINT) { - return 0; - } + while (*q != terminator) { + uint32_t digit = 0; - mult *= 16; - --q; + if (*q >= '0' && *q <= '9') { + digit = *q - '0'; } - } - else { - // Decimal. - const char* q = p+2; - if ( !(*q) ) { - return 0; + else if (hex && (*q >= 'a' && *q <= 'f')) { + digit = *q - 'a' + 10; } - - q = strchr( q, SEMICOLON ); - - if ( !q ) { + else if (hex && (*q >= 'A' && *q <= 'F')) { + digit = *q - 'A' + 10; + } + else { return 0; } - TIXMLASSERT( *q == SEMICOLON ); - - delta = q-p; - --q; - - while ( *q != '#' ) { - if ( *q >= '0' && *q <= '9' ) { - const uint32_t digit = *q - '0'; - TIXMLASSERT( digit < 10 ); - const uint32_t digitScaled = mult * digit; - ucs += digitScaled; - if (ucs > MAX_CODE_POINT) { - return 0; - } - } - else { - return 0; - } - mult *= 10; - --q; + TIXMLASSERT(digit < radix); + + const unsigned int digitScaled = mult * digit; + ucs += digitScaled; + mult *= radix; + + // Security check: could a value exist that is out of range? + // Easily; limit to the MAX_CODE_POINT, which also allows for a + // bunch of leading zeroes. + if (mult > MAX_CODE_POINT) { + mult = MAX_CODE_POINT; } + --q; + } + // Out of range: + if (ucs > MAX_CODE_POINT) { + return 0; } // convert the UCS to UTF-8 - TIXMLASSERT(ucs <= MAX_CODE_POINT); - ConvertUTF32ToUTF8( ucs, value, length ); + ConvertUTF32ToUTF8(ucs, value, length); return p + delta + 1; } - return p+1; + return p + 1; } - void XMLUtil::ToStr( int v, char* buffer, int bufferSize ) { TIXML_SNPRINTF( buffer, bufferSize, "%d", v );