Skip to content

Commit

Permalink
Merge pull request #1009 from leethomason/leethomason/charref
Browse files Browse the repository at this point in the history
Fix the potential overflow in char refs
  • Loading branch information
leethomason authored Dec 10, 2024
2 parents d418ac2 + ff48ea1 commit 4cbb251
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 74 deletions.
133 changes: 60 additions & 73 deletions tinyxml2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,102 +467,89 @@ void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length
}


const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
{
// Presume an entity, and pull it out.
// Assume an entity, and pull it out.
*length = 0;

if ( *(p+1) == '#' && *(p+2) ) {
unsigned long ucs = 0;
TIXMLASSERT( sizeof( ucs ) >= 4 );
static const uint32_t MAX_CODE_POINT = 0x10FFFF;

if (*(p + 1) == '#' && *(p + 2)) {
uint32_t ucs = 0;
ptrdiff_t delta = 0;
unsigned mult = 1;
uint32_t mult = 1;
static const char SEMICOLON = ';';

if ( *(p+2) == 'x' ) {
bool hex = false;
uint32_t radix = 10;
const char* q = 0;
char terminator = '#';

if (*(p + 2) == 'x') {
// Hexadecimal.
const char* q = p+3;
if ( !(*q) ) {
return 0;
}
hex = true;
radix = 16;
terminator = 'x';

q = strchr( q, SEMICOLON );
q = p + 3;
}
else {
// Decimal.
q = p + 2;
}
if (!(*q)) {
return 0;
}

if ( !q ) {
return 0;
}
TIXMLASSERT( *q == SEMICOLON );
q = strchr(q, SEMICOLON);
if (!q) {
return 0;
}
TIXMLASSERT(*q == SEMICOLON);

delta = q-p;
--q;
delta = q - p;
--q;

while ( *q != 'x' ) {
unsigned int digit = 0;
while (*q != terminator) {
uint32_t digit = 0;

if ( *q >= '0' && *q <= '9' ) {
digit = *q - '0';
}
else if ( *q >= 'a' && *q <= 'f' ) {
digit = *q - 'a' + 10;
}
else if ( *q >= 'A' && *q <= 'F' ) {
digit = *q - 'A' + 10;
}
else {
return 0;
}
TIXMLASSERT( digit < 16 );
TIXMLASSERT( digit == 0 || mult <= UINT_MAX / digit );
const unsigned int digitScaled = mult * digit;
TIXMLASSERT( ucs <= ULONG_MAX - digitScaled );
ucs += digitScaled;
TIXMLASSERT( mult <= UINT_MAX / 16 );
mult *= 16;
--q;
if (*q >= '0' && *q <= '9') {
digit = *q - '0';
}
}
else {
// Decimal.
const char* q = p+2;
if ( !(*q) ) {
return 0;
else if (hex && (*q >= 'a' && *q <= 'f')) {
digit = *q - 'a' + 10;
}

q = strchr( q, SEMICOLON );

if ( !q ) {
else if (hex && (*q >= 'A' && *q <= 'F')) {
digit = *q - 'A' + 10;
}
else {
return 0;
}
TIXMLASSERT( *q == SEMICOLON );

delta = q-p;
--q;

while ( *q != '#' ) {
if ( *q >= '0' && *q <= '9' ) {
const unsigned int digit = *q - '0';
TIXMLASSERT( digit < 10 );
TIXMLASSERT( digit == 0 || mult <= UINT_MAX / digit );
const unsigned int digitScaled = mult * digit;
TIXMLASSERT( ucs <= ULONG_MAX - digitScaled );
ucs += digitScaled;
}
else {
return 0;
}
TIXMLASSERT( mult <= UINT_MAX / 10 );
mult *= 10;
--q;
TIXMLASSERT(digit < radix);

const unsigned int digitScaled = mult * digit;
ucs += digitScaled;
mult *= radix;

// Security check: could a value exist that is out of range?
// Easily; limit to the MAX_CODE_POINT, which also allows for a
// bunch of leading zeroes.
if (mult > MAX_CODE_POINT) {
mult = MAX_CODE_POINT;
}
--q;
}
// Out of range:
if (ucs > MAX_CODE_POINT) {
return 0;
}
// convert the UCS to UTF-8
ConvertUTF32ToUTF8( ucs, value, length );
ConvertUTF32ToUTF8(ucs, value, length);
return p + delta + 1;
}
return p+1;
return p + 1;
}


void XMLUtil::ToStr( int v, char* buffer, int bufferSize )
{
TIXML_SNPRINTF( buffer, bufferSize, "%d", v );
Expand Down
31 changes: 30 additions & 1 deletion xmltest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1642,7 +1642,7 @@ int main( int argc, const char ** argv )

static const char* result = "\xef\xbb\xbf<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
XMLTest( "BOM and default declaration", result, printer.CStr(), false );
XMLTest( "CStrSize", 42, printer.CStrSize(), false );
XMLTest( "CStrSize", true, printer.CStrSize() == 42, false );
}
{
const char* xml = "<ipxml ws='1'><info bla=' /></ipxml>";
Expand Down Expand Up @@ -2666,6 +2666,35 @@ int main( int argc, const char ** argv )
doc.PrintError();
}

// ---------- CVE-2024-50615 -----------
{
const char* xml = "<Hello value='12&#65;34' value2='56&#x42;78'>Text</Hello>";
XMLDocument doc;
doc.Parse(xml);
const char* value = doc.FirstChildElement()->Attribute("value");
const char* value2 = doc.FirstChildElement()->Attribute("value2");
XMLTest("Test attribute encode", false, doc.Error());
XMLTest("Test decimal value", value, "12A34");
XMLTest("Test hex encode", value2, "56B78");
}

{
const char* xml = "<Hello value='&#ABC9000000065;' value2='&#xffffffff;' value3='&#5000000000;' value4='&#x00000045;' value5='&#x000000000000000021;'>Text</Hello>";
XMLDocument doc;
doc.Parse(xml);
const char* value = doc.FirstChildElement()->Attribute("value");
const char* value2 = doc.FirstChildElement()->Attribute("value2");
const char* value3 = doc.FirstChildElement()->Attribute("value3");
const char* value4 = doc.FirstChildElement()->Attribute("value4");
const char* value5 = doc.FirstChildElement()->Attribute("value5");
XMLTest("Test attribute encode", false, doc.Error());
XMLTest("Test attribute encode too long value", value, "&#ABC9000000065;"); // test long value
XMLTest("Test attribute encode out of unicode range", value2, "&#xffffffff;"); // out of unicode range
XMLTest("Test attribute encode out of int max value", value3, "&#5000000000;"); // out of int max value
XMLTest("Test attribute encode with a Hex value", value4, "E"); // hex value in unicode value
XMLTest("Test attribute encode with a Hex value", value5, "!"); // hex value in unicode value
}

// ----------- Performance tracking --------------
{
#if defined( _MSC_VER )
Expand Down

0 comments on commit 4cbb251

Please sign in to comment.