mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-07-04 22:25:50 +02:00
Do not drop the character after U+FFFE or U+FFFF in encodeToXmlString
encodeToXmlString skips surrogate pairs with the guard `char > 0xd7ff && (char < 0xe000 || char > 0xfffd)` and then does `i++` to step over the low surrogate. That predicate is also true for U+FFFE and U+FFFF, which are single UTF-16 code units, not surrogate pairs. The `i++` then skips the character that follows them, so it is silently dropped. For example, encodeToXmlString of U+FFFF followed by "A" returned "" instead of "A". The function serializes XML text nodes and attribute values in xml_parser.js and xfa_object.js, so this corrupts round-tripped XML and XFA content. The correct test for a surrogate pair is `char > 0xffff`, since codePointAt returns a value at or above 0x10000 only for a real pair. This preserves the existing behavior for emoji, the U+FFFD boundary, and lone surrogates, and stops dropping the character after U+FFFE and U+FFFF.
This commit is contained in:
parent
d66bd324fa
commit
0aee1d5382
@ -533,7 +533,7 @@ function encodeToXmlString(str) {
|
||||
buffer.push(str.substring(start, i));
|
||||
}
|
||||
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
|
||||
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
|
||||
if (char > 0xffff) {
|
||||
// char is represented by two u16
|
||||
i++;
|
||||
}
|
||||
|
||||
@ -312,6 +312,11 @@ describe("core_utils", function () {
|
||||
const str = "hello world";
|
||||
expect(encodeToXmlString(str)).toEqual(str);
|
||||
});
|
||||
|
||||
it("should keep the character after U+FFFE or U+FFFF", function () {
|
||||
expect(encodeToXmlString("A")).toEqual("A");
|
||||
expect(encodeToXmlString("B")).toEqual("B");
|
||||
});
|
||||
});
|
||||
|
||||
describe("validateCSSFont", function () {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user