Merge pull request #21526 from spokodev/w33/pdfjs-encodexml-surrogate

Do not drop the character after U+FFFE or U+FFFF in encodeToXmlString
2026-07-05 06:35:48 +02:00 · 2026-07-04 13:38:22 +02:00 · 2026-07-04 13:38:22 +02:00 · 9c46f48f88
commit 9c46f48f88
parent 5b100c4509 0aee1d5382
2 changed files with 6 additions and 1 deletions
--- a/src/core/core_utils.js
+++ b/src/core/core_utils.js
@ -533,7 +533,7 @@ function encodeToXmlString(str) {
        buffer.push(str.substring(start, i));
      }
      buffer.push(`&#x${char.toString(16).toUpperCase()};`);
-      if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
+      if (char > 0xffff) {
        // char is represented by two u16
        i++;
      }
--- a/test/unit/core_utils_spec.js
+++ b/test/unit/core_utils_spec.js
@ -317,6 +317,11 @@ describe("core_utils", function () {
      const str = "hello world";
      expect(encodeToXmlString(str)).toEqual(str);
    });
+
+    it("should keep the character after U+FFFE or U+FFFF", function () {
+      expect(encodeToXmlString("A")).toEqual("&#xFFFF;A");
+      expect(encodeToXmlString("B")).toEqual("&#xFFFE;B");
+    });
  });

  describe("validateCSSFont", function () {