mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-05-31 07:11:00 +02:00
Merge pull request #21279 from Snuffleupagus/mv-stringToPDFString
Move the `stringToPDFString` helper function to the worker-thread
This commit is contained in:
commit
69efba1ca2
@ -33,7 +33,6 @@ import {
|
||||
OPS,
|
||||
RenderingIntentFlag,
|
||||
shadow,
|
||||
stringToPDFString,
|
||||
unreachable,
|
||||
Util,
|
||||
warn,
|
||||
@ -53,8 +52,6 @@ import {
|
||||
numberToString,
|
||||
RESOURCES_KEYS_OPERATOR_LIST,
|
||||
RESOURCES_KEYS_TEXT_CONTENT,
|
||||
stringToAsciiOrUTF16BE,
|
||||
stringToUTF16String,
|
||||
} from "./core_utils.js";
|
||||
import {
|
||||
createDefaultAppearance,
|
||||
@ -66,6 +63,11 @@ import {
|
||||
import { DateFormats, TimeFormats } from "../shared/scripting_utils.js";
|
||||
import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js";
|
||||
import { Stream, StringStream } from "./stream.js";
|
||||
import {
|
||||
stringToAsciiOrUTF16BE,
|
||||
stringToPDFString,
|
||||
stringToUTF16String,
|
||||
} from "./string_utils.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { bidi } from "./bidi.js";
|
||||
import { Catalog } from "./catalog.js";
|
||||
|
||||
@ -22,7 +22,6 @@ import {
|
||||
objectSize,
|
||||
PermissionFlag,
|
||||
shadow,
|
||||
stringToPDFString,
|
||||
stringToUTF8String,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
@ -53,6 +52,7 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { ColorSpaceUtils } from "./colorspace_utils.js";
|
||||
import { FileSpec } from "./file_spec.js";
|
||||
import { MetadataParser } from "./metadata_parser.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
import { StructTreeRoot } from "./struct_tree.js";
|
||||
|
||||
const isRef = v => v instanceof Ref;
|
||||
|
||||
@ -19,12 +19,12 @@ import {
|
||||
BaseException,
|
||||
makeArr,
|
||||
objectSize,
|
||||
stringToPDFString,
|
||||
Util,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
|
||||
const PDF_VERSION_REGEXP = /^[1-9]\.\d$/;
|
||||
const MAX_INT_32 = 2 ** 31 - 1;
|
||||
@ -684,45 +684,6 @@ function getNewAnnotationsMap(annotationStorage) {
|
||||
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
|
||||
}
|
||||
|
||||
// If the string is null or undefined then it is returned as is.
|
||||
function stringToAsciiOrUTF16BE(str) {
|
||||
if (str === null || str === undefined) {
|
||||
return str;
|
||||
}
|
||||
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
|
||||
}
|
||||
|
||||
function isAscii(str) {
|
||||
if (typeof str !== "string") {
|
||||
return false;
|
||||
}
|
||||
return !str || /^[\x00-\x7F]*$/.test(str);
|
||||
}
|
||||
|
||||
function stringToUTF16HexString(str) {
|
||||
const buf = [];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]);
|
||||
}
|
||||
return buf.join("");
|
||||
}
|
||||
|
||||
function stringToUTF16String(str, bigEndian = false) {
|
||||
const buf = [];
|
||||
if (bigEndian) {
|
||||
buf.push("\xFE\xFF");
|
||||
}
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
buf.push(
|
||||
String.fromCharCode((char >> 8) & 0xff),
|
||||
String.fromCharCode(char & 0xff)
|
||||
);
|
||||
}
|
||||
return buf.join("");
|
||||
}
|
||||
|
||||
function getModificationDate(date = new Date()) {
|
||||
if (!(date instanceof Date)) {
|
||||
date = new Date(date);
|
||||
@ -782,7 +743,6 @@ export {
|
||||
getRotationMatrix,
|
||||
getSizeInBytes,
|
||||
IDENTITY_MATRIX,
|
||||
isAscii,
|
||||
isBooleanArray,
|
||||
isNumberArray,
|
||||
isWhiteSpace,
|
||||
@ -798,9 +758,6 @@ export {
|
||||
recoverJsURL,
|
||||
RESOURCES_KEYS_OPERATOR_LIST,
|
||||
RESOURCES_KEYS_TEXT_CONTENT,
|
||||
stringToAsciiOrUTF16BE,
|
||||
stringToUTF16HexString,
|
||||
stringToUTF16String,
|
||||
toRomanNumerals,
|
||||
validateCSSFont,
|
||||
validateFontName,
|
||||
|
||||
@ -18,7 +18,6 @@ import {
|
||||
escapePDFName,
|
||||
getRotationMatrix,
|
||||
numberToString,
|
||||
stringToUTF16HexString,
|
||||
} from "./core_utils.js";
|
||||
import { Dict, Name } from "./primitives.js";
|
||||
import {
|
||||
@ -33,6 +32,7 @@ import { EvaluatorPreprocessor } from "./evaluator.js";
|
||||
import { LocalColorSpaceCache } from "./image_utils.js";
|
||||
import { PDFFunctionFactory } from "./function.js";
|
||||
import { StringStream } from "./stream.js";
|
||||
import { stringToUTF16HexString } from "./string_utils.js";
|
||||
|
||||
class DefaultAppearanceEvaluator extends EvaluatorPreprocessor {
|
||||
constructor(str) {
|
||||
|
||||
@ -26,7 +26,6 @@ import {
|
||||
RenderingIntentFlag,
|
||||
shadow,
|
||||
stringToBytes,
|
||||
stringToPDFString,
|
||||
stringToUTF8String,
|
||||
unreachable,
|
||||
Util,
|
||||
@ -76,6 +75,7 @@ import { OperatorList } from "./operator_list.js";
|
||||
import { PartialEvaluator } from "./evaluator.js";
|
||||
import { PDFImage } from "./image.js";
|
||||
import { StreamsSequenceStream } from "./decode_stream.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
import { StructTreePage } from "./struct_tree.js";
|
||||
import { XFAFactory } from "./xfa/factory.js";
|
||||
import { XRef } from "./xref.js";
|
||||
|
||||
@ -25,15 +25,15 @@ import {
|
||||
getInheritableProperty,
|
||||
getModificationDate,
|
||||
getNewAnnotationsMap,
|
||||
stringToAsciiOrUTF16BE,
|
||||
} from "../core_utils.js";
|
||||
import { Dict, isName, Name, Ref, RefSet, RefSetCache } from "../primitives.js";
|
||||
import { incrementalUpdate, writeValue } from "../writer.js";
|
||||
import { NameTree, NumberTree } from "../name_number_tree.js";
|
||||
import { stringToBytes, stringToPDFString } from "../../shared/util.js";
|
||||
import { stringToAsciiOrUTF16BE, stringToPDFString } from "../string_utils.js";
|
||||
import { AnnotationFactory } from "../annotation.js";
|
||||
import { BaseStream } from "../base_stream.js";
|
||||
import { StringStream } from "../stream.js";
|
||||
import { stringToBytes } from "../../shared/util.js";
|
||||
|
||||
const MAX_LEAVES_PER_PAGES_NODE = 16;
|
||||
const MAX_IN_NAME_TREE_NODE = 64;
|
||||
|
||||
@ -26,7 +26,6 @@ import {
|
||||
normalizeUnicode,
|
||||
OPS,
|
||||
shadow,
|
||||
stringToPDFString,
|
||||
TextRenderingMode,
|
||||
Util,
|
||||
warn,
|
||||
@ -90,6 +89,7 @@ import { getUnicodeForGlyph } from "./unicode.js";
|
||||
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
|
||||
import { PDFImage } from "./image.js";
|
||||
import { Stream } from "./stream.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
|
||||
const DefaultPartialEvaluatorOptions = Object.freeze({
|
||||
maxImageSize: -1,
|
||||
|
||||
@ -13,9 +13,10 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { stringToPDFString, stripPath, warn } from "../shared/util.js";
|
||||
import { stripPath, warn } from "../shared/util.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { Dict } from "./primitives.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
|
||||
function pickPlatformItem(dict) {
|
||||
if (dict instanceof Dict) {
|
||||
|
||||
121
src/core/string_utils.js
Normal file
121
src/core/string_utils.js
Normal file
@ -0,0 +1,121 @@
|
||||
/* Copyright 2019 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { stringToBytes, Util, warn } from "../shared/util.js";
|
||||
|
||||
function isAscii(str) {
|
||||
return typeof str === "string" && (!str || /^[\x00-\x7F]*$/.test(str));
|
||||
}
|
||||
|
||||
// If the string is null or undefined then it is returned as is.
|
||||
function stringToAsciiOrUTF16BE(str) {
|
||||
if (str === null || str === undefined) {
|
||||
return str;
|
||||
}
|
||||
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
|
||||
}
|
||||
|
||||
function stringToUTF16HexString(str) {
|
||||
const buf = [];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]);
|
||||
}
|
||||
return buf.join("");
|
||||
}
|
||||
|
||||
function stringToUTF16String(str, bigEndian = false) {
|
||||
const buf = [];
|
||||
if (bigEndian) {
|
||||
buf.push("\xFE\xFF");
|
||||
}
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
buf.push(
|
||||
String.fromCharCode((char >> 8) & 0xff),
|
||||
String.fromCharCode(char & 0xff)
|
||||
);
|
||||
}
|
||||
return buf.join("");
|
||||
}
|
||||
|
||||
const PDFStringTranslateTable = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2d8,
|
||||
0x2c7, 0x2c6, 0x2d9, 0x2dd, 0x2db, 0x2da, 0x2dc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x192,
|
||||
0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
|
||||
0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x141, 0x152, 0x160, 0x178, 0x17d,
|
||||
0x131, 0x142, 0x153, 0x161, 0x17e, 0, 0x20ac,
|
||||
];
|
||||
|
||||
function stringToPDFString(str, keepEscapeSequence = false) {
|
||||
// See section 7.9.2.2 Text String Type.
|
||||
// The string can contain some language codes bracketed with 0x1b,
|
||||
// so we must remove them.
|
||||
if (str[0] >= "\xEF") {
|
||||
let encoding;
|
||||
if (str[0] === "\xFE" && str[1] === "\xFF") {
|
||||
encoding = "utf-16be";
|
||||
if (str.length % 2 === 1) {
|
||||
str = str.slice(0, -1);
|
||||
}
|
||||
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
encoding = "utf-16le";
|
||||
if (str.length % 2 === 1) {
|
||||
str = str.slice(0, -1);
|
||||
}
|
||||
} else if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
|
||||
encoding = "utf-8";
|
||||
}
|
||||
|
||||
if (encoding) {
|
||||
try {
|
||||
const decoder = new TextDecoder(encoding, { fatal: true });
|
||||
const buffer = stringToBytes(str);
|
||||
const decoded = decoder.decode(buffer);
|
||||
if (keepEscapeSequence || !decoded.includes("\x1b")) {
|
||||
return decoded;
|
||||
}
|
||||
return decoded.replaceAll(/\x1b[^\x1b]*(?:\x1b|$)/g, "");
|
||||
} catch (ex) {
|
||||
warn(`stringToPDFString: "${ex}".`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// ISO Latin 1
|
||||
const strBuf = [];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const charCode = str.charCodeAt(i);
|
||||
if (!keepEscapeSequence && charCode === 0x1b) {
|
||||
// eslint-disable-next-line no-empty
|
||||
while (++i < ii && str.charCodeAt(i) !== 0x1b) {}
|
||||
continue;
|
||||
}
|
||||
const code = PDFStringTranslateTable[charCode];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
}
|
||||
return strBuf.join("");
|
||||
}
|
||||
|
||||
export {
|
||||
isAscii,
|
||||
stringToAsciiOrUTF16BE,
|
||||
stringToPDFString,
|
||||
stringToUTF16HexString,
|
||||
stringToUTF16String,
|
||||
};
|
||||
@ -16,13 +16,13 @@
|
||||
import {
|
||||
AnnotationPrefix,
|
||||
makeArr,
|
||||
stringToPDFString,
|
||||
stringToUTF8String,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js";
|
||||
import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js";
|
||||
import { stringToAsciiOrUTF16BE, stringToPDFString } from "./string_utils.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { lookupNormalRect } from "./core_utils.js";
|
||||
import { NumberTree } from "./name_number_tree.js";
|
||||
|
||||
const MAX_DEPTH = 40;
|
||||
|
||||
@ -21,7 +21,6 @@ import {
|
||||
isNodeJS,
|
||||
PasswordException,
|
||||
setVerbosityLevel,
|
||||
stringToPDFString,
|
||||
VerbosityLevel,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
@ -38,6 +37,7 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { incrementalUpdate } from "./writer.js";
|
||||
import { PDFEditor } from "./editor/pdf_editor.js";
|
||||
import { PDFWorkerStream } from "./worker_stream.js";
|
||||
import { stringToPDFString } from "./string_utils.js";
|
||||
import { StructTreeRoot } from "./struct_tree.js";
|
||||
|
||||
class WorkerTask {
|
||||
|
||||
@ -1061,67 +1061,6 @@ class Util {
|
||||
}
|
||||
}
|
||||
|
||||
const PDFStringTranslateTable = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2d8,
|
||||
0x2c7, 0x2c6, 0x2d9, 0x2dd, 0x2db, 0x2da, 0x2dc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x192,
|
||||
0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
|
||||
0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x141, 0x152, 0x160, 0x178, 0x17d,
|
||||
0x131, 0x142, 0x153, 0x161, 0x17e, 0, 0x20ac,
|
||||
];
|
||||
|
||||
function stringToPDFString(str, keepEscapeSequence = false) {
|
||||
// See section 7.9.2.2 Text String Type.
|
||||
// The string can contain some language codes bracketed with 0x1b,
|
||||
// so we must remove them.
|
||||
if (str[0] >= "\xEF") {
|
||||
let encoding;
|
||||
if (str[0] === "\xFE" && str[1] === "\xFF") {
|
||||
encoding = "utf-16be";
|
||||
if (str.length % 2 === 1) {
|
||||
str = str.slice(0, -1);
|
||||
}
|
||||
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
encoding = "utf-16le";
|
||||
if (str.length % 2 === 1) {
|
||||
str = str.slice(0, -1);
|
||||
}
|
||||
} else if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
|
||||
encoding = "utf-8";
|
||||
}
|
||||
|
||||
if (encoding) {
|
||||
try {
|
||||
const decoder = new TextDecoder(encoding, { fatal: true });
|
||||
const buffer = stringToBytes(str);
|
||||
const decoded = decoder.decode(buffer);
|
||||
if (keepEscapeSequence || !decoded.includes("\x1b")) {
|
||||
return decoded;
|
||||
}
|
||||
return decoded.replaceAll(/\x1b[^\x1b]*(?:\x1b|$)/g, "");
|
||||
} catch (ex) {
|
||||
warn(`stringToPDFString: "${ex}".`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// ISO Latin 1
|
||||
const strBuf = [];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const charCode = str.charCodeAt(i);
|
||||
if (!keepEscapeSequence && charCode === 0x1b) {
|
||||
// eslint-disable-next-line no-empty
|
||||
while (++i < ii && str.charCodeAt(i) !== 0x1b) {}
|
||||
continue;
|
||||
}
|
||||
const code = PDFStringTranslateTable[charCode];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
}
|
||||
return strBuf.join("");
|
||||
}
|
||||
|
||||
function stringToUTF8String(str) {
|
||||
return decodeURIComponent(escape(str));
|
||||
}
|
||||
@ -1300,7 +1239,6 @@ export {
|
||||
setVerbosityLevel,
|
||||
shadow,
|
||||
stringToBytes,
|
||||
stringToPDFString,
|
||||
stringToUTF8String,
|
||||
stripPath,
|
||||
TextRenderingMode,
|
||||
|
||||
@ -49,6 +49,7 @@
|
||||
"postscript_spec.js",
|
||||
"primitives_spec.js",
|
||||
"stream_spec.js",
|
||||
"string_utils_spec.js",
|
||||
"struct_tree_spec.js",
|
||||
"svg_factory_spec.js",
|
||||
"text_layer_spec.js",
|
||||
|
||||
@ -22,13 +22,10 @@ import {
|
||||
getInheritableProperty,
|
||||
getModificationDate,
|
||||
getSizeInBytes,
|
||||
isAscii,
|
||||
isWhiteSpace,
|
||||
numberToString,
|
||||
parseXFAPath,
|
||||
recoverJsURL,
|
||||
stringToUTF16HexString,
|
||||
stringToUTF16String,
|
||||
toRomanNumerals,
|
||||
validateCSSFont,
|
||||
} from "../../src/core/core_utils.js";
|
||||
@ -416,56 +413,6 @@ describe("core_utils", function () {
|
||||
});
|
||||
});
|
||||
|
||||
describe("isAscii", function () {
|
||||
it("handles ascii/non-ascii strings", function () {
|
||||
expect(isAscii("hello world")).toEqual(true);
|
||||
expect(isAscii("こんにちは世界の")).toEqual(false);
|
||||
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
|
||||
false
|
||||
);
|
||||
expect(isAscii("")).toEqual(true);
|
||||
expect(isAscii(123)).toEqual(false);
|
||||
expect(isAscii(null)).toEqual(false);
|
||||
expect(isAscii(undefined)).toEqual(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToUTF16HexString", function () {
|
||||
it("should encode a string in UTF16 hexadecimal format", function () {
|
||||
expect(stringToUTF16HexString("hello world")).toEqual(
|
||||
"00680065006c006c006f00200077006f0072006c0064"
|
||||
);
|
||||
|
||||
expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
|
||||
"30533093306b3061306f4e16754c306e"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToUTF16String", function () {
|
||||
it("should encode a string in UTF16", function () {
|
||||
expect(stringToUTF16String("hello world")).toEqual(
|
||||
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
|
||||
);
|
||||
|
||||
expect(stringToUTF16String("こんにちは世界の")).toEqual(
|
||||
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
|
||||
);
|
||||
});
|
||||
|
||||
it("should encode a string in UTF16BE with a BOM", function () {
|
||||
expect(
|
||||
stringToUTF16String("hello world", /* bigEndian = */ true)
|
||||
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
|
||||
|
||||
expect(
|
||||
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
|
||||
).toEqual(
|
||||
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("deepCompare", function () {
|
||||
it("should return true for the same reference", function () {
|
||||
const dict = new Dict();
|
||||
|
||||
@ -96,6 +96,7 @@ async function initializePDFJS(callback) {
|
||||
"pdfjs-test/unit/primitives_spec.js",
|
||||
"pdfjs-test/unit/scripting_spec.js",
|
||||
"pdfjs-test/unit/stream_spec.js",
|
||||
"pdfjs-test/unit/string_utils_spec.js",
|
||||
"pdfjs-test/unit/struct_tree_spec.js",
|
||||
"pdfjs-test/unit/svg_factory_spec.js",
|
||||
"pdfjs-test/unit/text_layer_spec.js",
|
||||
|
||||
147
test/unit/string_utils_spec.js
Normal file
147
test/unit/string_utils_spec.js
Normal file
@ -0,0 +1,147 @@
|
||||
/* Copyright 2019 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import {
|
||||
isAscii,
|
||||
stringToPDFString,
|
||||
stringToUTF16HexString,
|
||||
stringToUTF16String,
|
||||
} from "../../src/core/string_utils.js";
|
||||
|
||||
describe("string_utils", function () {
|
||||
describe("isAscii", function () {
|
||||
it("handles ascii/non-ascii strings", function () {
|
||||
expect(isAscii("hello world")).toEqual(true);
|
||||
expect(isAscii("こんにちは世界の")).toEqual(false);
|
||||
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
|
||||
false
|
||||
);
|
||||
expect(isAscii("")).toEqual(true);
|
||||
expect(isAscii(123)).toEqual(false);
|
||||
expect(isAscii(null)).toEqual(false);
|
||||
expect(isAscii(undefined)).toEqual(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToUTF16HexString", function () {
|
||||
it("should encode a string in UTF16 hexadecimal format", function () {
|
||||
expect(stringToUTF16HexString("hello world")).toEqual(
|
||||
"00680065006c006c006f00200077006f0072006c0064"
|
||||
);
|
||||
|
||||
expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
|
||||
"30533093306b3061306f4e16754c306e"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToUTF16String", function () {
|
||||
it("should encode a string in UTF16", function () {
|
||||
expect(stringToUTF16String("hello world")).toEqual(
|
||||
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
|
||||
);
|
||||
|
||||
expect(stringToUTF16String("こんにちは世界の")).toEqual(
|
||||
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
|
||||
);
|
||||
});
|
||||
|
||||
it("should encode a string in UTF16BE with a BOM", function () {
|
||||
expect(
|
||||
stringToUTF16String("hello world", /* bigEndian = */ true)
|
||||
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
|
||||
|
||||
expect(
|
||||
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
|
||||
).toEqual(
|
||||
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToPDFString", function () {
|
||||
it("handles ISO Latin 1 strings", function () {
|
||||
const str = "\x8Dstring\x8E";
|
||||
expect(stringToPDFString(str)).toEqual("\u201Cstring\u201D");
|
||||
});
|
||||
|
||||
it("handles UTF-16 big-endian strings", function () {
|
||||
const str = "\xFE\xFF\x00\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67";
|
||||
expect(stringToPDFString(str)).toEqual("string");
|
||||
});
|
||||
|
||||
it("handles incomplete UTF-16 big-endian strings", function () {
|
||||
const str = "\xFE\xFF\x00\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00";
|
||||
expect(stringToPDFString(str)).toEqual("strin");
|
||||
});
|
||||
|
||||
it("handles UTF-16 little-endian strings", function () {
|
||||
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67\x00";
|
||||
expect(stringToPDFString(str)).toEqual("string");
|
||||
});
|
||||
|
||||
it("handles incomplete UTF-16 little-endian strings", function () {
|
||||
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67";
|
||||
expect(stringToPDFString(str)).toEqual("strin");
|
||||
});
|
||||
|
||||
it("handles UTF-8 strings", function () {
|
||||
const simpleStr = "\xEF\xBB\xBF\x73\x74\x72\x69\x6E\x67";
|
||||
expect(stringToPDFString(simpleStr)).toEqual("string");
|
||||
|
||||
const complexStr =
|
||||
"\xEF\xBB\xBF\xE8\xA1\xA8\xE3\x83\x9D\xE3\x81\x82\x41\xE9\xB7\x97" +
|
||||
"\xC5\x92\xC3\xA9\xEF\xBC\xA2\xE9\x80\x8D\xC3\x9C\xC3\x9F\xC2\xAA" +
|
||||
"\xC4\x85\xC3\xB1\xE4\xB8\x82\xE3\x90\x80\xF0\xA0\x80\x80";
|
||||
expect(stringToPDFString(complexStr)).toEqual(
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀"
|
||||
);
|
||||
});
|
||||
|
||||
it("handles empty strings", function () {
|
||||
// ISO Latin 1
|
||||
const str1 = "";
|
||||
expect(stringToPDFString(str1)).toEqual("");
|
||||
|
||||
// UTF-16BE
|
||||
const str2 = "\xFE\xFF";
|
||||
expect(stringToPDFString(str2)).toEqual("");
|
||||
|
||||
// UTF-16LE
|
||||
const str3 = "\xFF\xFE";
|
||||
expect(stringToPDFString(str3)).toEqual("");
|
||||
|
||||
// UTF-8
|
||||
const str4 = "\xEF\xBB\xBF";
|
||||
expect(stringToPDFString(str4)).toEqual("");
|
||||
});
|
||||
|
||||
it("handles strings with language code", function () {
|
||||
// ISO Latin 1
|
||||
const str1 = "hello \x1benUS\x1bworld";
|
||||
expect(stringToPDFString(str1)).toEqual("hello world");
|
||||
|
||||
// UTF-16BE
|
||||
const str2 =
|
||||
"\xFE\xFF\x00h\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d";
|
||||
expect(stringToPDFString(str2)).toEqual("hello world");
|
||||
|
||||
// UTF-16LE
|
||||
const str3 =
|
||||
"\xFF\xFEh\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d\x00";
|
||||
expect(stringToPDFString(str3)).toEqual("hello world");
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -19,7 +19,6 @@ import {
|
||||
createValidAbsoluteUrl,
|
||||
getUuid,
|
||||
stringToBytes,
|
||||
stringToPDFString,
|
||||
} from "../../src/shared/util.js";
|
||||
|
||||
describe("util", function () {
|
||||
@ -83,80 +82,6 @@ describe("util", function () {
|
||||
});
|
||||
});
|
||||
|
||||
describe("stringToPDFString", function () {
|
||||
it("handles ISO Latin 1 strings", function () {
|
||||
const str = "\x8Dstring\x8E";
|
||||
expect(stringToPDFString(str)).toEqual("\u201Cstring\u201D");
|
||||
});
|
||||
|
||||
it("handles UTF-16 big-endian strings", function () {
|
||||
const str = "\xFE\xFF\x00\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67";
|
||||
expect(stringToPDFString(str)).toEqual("string");
|
||||
});
|
||||
|
||||
it("handles incomplete UTF-16 big-endian strings", function () {
|
||||
const str = "\xFE\xFF\x00\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00";
|
||||
expect(stringToPDFString(str)).toEqual("strin");
|
||||
});
|
||||
|
||||
it("handles UTF-16 little-endian strings", function () {
|
||||
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67\x00";
|
||||
expect(stringToPDFString(str)).toEqual("string");
|
||||
});
|
||||
|
||||
it("handles incomplete UTF-16 little-endian strings", function () {
|
||||
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67";
|
||||
expect(stringToPDFString(str)).toEqual("strin");
|
||||
});
|
||||
|
||||
it("handles UTF-8 strings", function () {
|
||||
const simpleStr = "\xEF\xBB\xBF\x73\x74\x72\x69\x6E\x67";
|
||||
expect(stringToPDFString(simpleStr)).toEqual("string");
|
||||
|
||||
const complexStr =
|
||||
"\xEF\xBB\xBF\xE8\xA1\xA8\xE3\x83\x9D\xE3\x81\x82\x41\xE9\xB7\x97" +
|
||||
"\xC5\x92\xC3\xA9\xEF\xBC\xA2\xE9\x80\x8D\xC3\x9C\xC3\x9F\xC2\xAA" +
|
||||
"\xC4\x85\xC3\xB1\xE4\xB8\x82\xE3\x90\x80\xF0\xA0\x80\x80";
|
||||
expect(stringToPDFString(complexStr)).toEqual(
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀"
|
||||
);
|
||||
});
|
||||
|
||||
it("handles empty strings", function () {
|
||||
// ISO Latin 1
|
||||
const str1 = "";
|
||||
expect(stringToPDFString(str1)).toEqual("");
|
||||
|
||||
// UTF-16BE
|
||||
const str2 = "\xFE\xFF";
|
||||
expect(stringToPDFString(str2)).toEqual("");
|
||||
|
||||
// UTF-16LE
|
||||
const str3 = "\xFF\xFE";
|
||||
expect(stringToPDFString(str3)).toEqual("");
|
||||
|
||||
// UTF-8
|
||||
const str4 = "\xEF\xBB\xBF";
|
||||
expect(stringToPDFString(str4)).toEqual("");
|
||||
});
|
||||
|
||||
it("handles strings with language code", function () {
|
||||
// ISO Latin 1
|
||||
const str1 = "hello \x1benUS\x1bworld";
|
||||
expect(stringToPDFString(str1)).toEqual("hello world");
|
||||
|
||||
// UTF-16BE
|
||||
const str2 =
|
||||
"\xFE\xFF\x00h\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d";
|
||||
expect(stringToPDFString(str2)).toEqual("hello world");
|
||||
|
||||
// UTF-16LE
|
||||
const str3 =
|
||||
"\xFF\xFEh\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d\x00";
|
||||
expect(stringToPDFString(str3)).toEqual("hello world");
|
||||
});
|
||||
});
|
||||
|
||||
describe("createValidAbsoluteUrl", function () {
|
||||
it("handles invalid URLs", function () {
|
||||
expect(createValidAbsoluteUrl(undefined, undefined)).toEqual(null);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user