há 3 dias atrás · a737ecb2cb
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -552,12 +552,15 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
 
				             TIXMLASSERT(digit < radix);

			
 
				 

			
 
				             const unsigned int digitScaled = mult * digit;

			
 
				+            // Reject before adding: if digitScaled alone exceeds MAX_CODE_POINT,

			
 
				+            // or if adding it to ucs would exceed it (checked without overflow by

			
 
				+            // testing ucs > MAX_CODE_POINT - digitScaled, safe since digitScaled

			
 
				+            // <= MAX_CODE_POINT at this point).

			
 
				+            if (digitScaled > MAX_CODE_POINT || ucs > MAX_CODE_POINT - digitScaled) {

			
 
				+                return 0;

			
 
				+            }

			
 
				             ucs += digitScaled;

			
 
				-            mult *= radix;       

			
 
				-            

			
 
				-            // Security check: could a value exist that is out of range?

			
 
				-            // Easily; limit to the MAX_CODE_POINT, which also allows for a

			
 
				-            // bunch of leading zeroes.

			
 
				+            mult *= radix;

			
 
				             if (mult > MAX_CODE_POINT) {

			
 
				                 mult = MAX_CODE_POINT;

			
 
				             }

			
@@ -569,11 +572,11 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
 
				         }

			
 
				         // convert the UCS to UTF-8

			
 
				         ConvertUTF32ToUTF8(ucs, value, length);

			
 
				-		if (length == 0) {

			
 
				-            // If length is 0, there was an error. (Security? Bad input?)

			
 
				+        if (*length == 0) {

			
 
				+            // If *length is 0, ConvertUTF32ToUTF8 rejected the code point.

			
 
				             // Fail safely.

			
 
				-			return 0;

			
 
				-		}

			
 
				+            return 0;

			
 
				+        }

			
 
				         return p + delta + 1;

			
 
				     }

			
 
				     return p + 1;

			
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -2701,6 +2701,56 @@ int main( int argc, const char ** argv )
 
				 		XMLTest("Test attribute encode with a Hex value", value5, "!"); // hex value in unicode value

			
 
				 	}

			
 
				 

			
 
				+	// ---------- Security: numeric character reference bounds ----------

			
 
				+	{

			
 
				+		// Regression: U+10FFFF is the last valid Unicode code point and must

			
 
				+		// parse correctly. The in-loop overflow guard must not reject it.

			
 
				+		XMLDocument doc;

			
 
				+		doc.Parse( "<t v='&#x10FFFF;'/>" );

			
 
				+		XMLTest( "Numeric ref U+10FFFF: no error", false, doc.Error() );

			
 
				+		const char* v = doc.FirstChildElement()->Attribute( "v" );

			
 
				+		// U+10FFFF encodes to the 4-byte UTF-8 sequence F4 8F BF BF.

			
 
				+		const char expected[] = {

			
 
				+			static_cast<char>(0xF4), static_cast<char>(0x8F),

			
 
				+			static_cast<char>(0xBF), static_cast<char>(0xBF), 0

			
 
				+		};

			
 
				+		XMLTest( "Numeric ref U+10FFFF: correct UTF-8 output", expected, v );

			
 
				+	}

			
 
				+	{

			
 
				+		// Boundary check: U+110000 is one above the maximum code point.

			
 
				+		// The in-loop overflow guard must catch this before ucs is written,

			
 
				+		// leaving the entity as a literal (starting with '&').

			
 
				+		XMLDocument doc;

			
 
				+		doc.Parse( "<t v='&#x110000;'/>" );

			
 
				+		XMLTest( "Numeric ref U+110000: no parse error", false, doc.Error() );

			
 
				+		const char* v = doc.FirstChildElement()->Attribute( "v" );

			
 
				+		XMLTest( "Numeric ref U+110000: not resolved (left as literal)", true,

			
 
				+		         v != nullptr && v[0] == '&' );

			
 
				+	}

			
 
				+	{

			
 
				+		// A hex entity with enough digits to overflow uint32_t must

			
 
				+		// be rejected by the in-loop guard before the accumulator wraps.

			
 
				+		// Before the fix, ucs could wrap around and pass the post-loop range

			
 
				+		// check, producing an attacker-chosen character in the parsed output.

			
 
				+		// Build "&#x" + 300 'F' digits + ";" -- far beyond what fits in uint32_t.

			
 
				+		const char prefix[] = "<t v='&#x";

			
 
				+		const char suffix[] = ";'/>";

			
 
				+		static const int NDIGITS = 300;

			
 
				+		char xml[sizeof(prefix) + NDIGITS + sizeof(suffix)];

			
 
				+		strcpy( xml, prefix );

			
 
				+		memset( xml + strlen(prefix), 'F', NDIGITS );

			
 
				+		strcpy( xml + strlen(prefix) + NDIGITS, suffix );

			
 
				+

			
 
				+		XMLDocument doc;

			
 
				+		doc.Parse( xml );

			
 
				+		XMLTest( "Overflow hex entity: no parse error", false, doc.Error() );

			
 
				+		const char* v = doc.FirstChildElement()->Attribute( "v" );

			
 
				+		// GetCharacterRef returns 0 for rejected refs; the caller then copies

			
 
				+		// the literal '&', so the attribute must start with '&', not a char.

			
 
				+		XMLTest( "Overflow hex entity: not resolved to a character", true,

			
 
				+		         v != nullptr && v[0] == '&' );

			
 
				+	}

			
 
				+

			
 
				 	// ---------- XMLPrinter Apos Escaping ------

			
 
				 	{

			
 
				 		const char* testText = "text containing a ' character";