Merge pull request #21526 from spokodev/w33/pdfjs-encodexml-surrogate

Do not drop the character after U+FFFE or U+FFFF in encodeToXmlString
This commit is contained in:
Tim van der Meij 2026-07-04 13:38:22 +02:00 committed by GitHub
commit 9c46f48f88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 1 deletions

View File

@ -533,7 +533,7 @@ function encodeToXmlString(str) {
buffer.push(str.substring(start, i));
}
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
if (char > 0xffff) {
// char is represented by two u16
i++;
}

View File

@ -317,6 +317,11 @@ describe("core_utils", function () {
const str = "hello world";
expect(encodeToXmlString(str)).toEqual(str);
});
it("should keep the character after U+FFFE or U+FFFF", function () {
expect(encodeToXmlString("￿A")).toEqual("&#xFFFF;A");
expect(encodeToXmlString("￾B")).toEqual("&#xFFFE;B");
});
});
describe("validateCSSFont", function () {