pdf.js.mirror/src/core/string_utils.js
Calixte Denizet 7bda0fc97c Enable 'eslint-plugin-regexp' and fix existing findings
Enable the recommended preset and fix or per-line-disable the 78
findings it surfaces. Most are equivalent rewrites, intentional
patterns (control chars, the whatwg email regex, autolinker URL regex)
keep their behavior via targeted disables.
2026-05-25 14:31:55 +02:00

127 lines
4.0 KiB
JavaScript

/* Copyright 2019 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { stringToBytes, Util, warn } from "../shared/util.js";
function isAscii(str) {
return (
typeof str === "string" &&
// eslint-disable-next-line no-control-regex
(!str || /^[\x00-\x7F]*$/.test(str))
);
}
// If the string is null or undefined then it is returned as is.
function stringToAsciiOrUTF16BE(str) {
if (str === null || str === undefined) {
return str;
}
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
}
function stringToUTF16HexString(str) {
const buf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]);
}
return buf.join("");
}
function stringToUTF16String(str, bigEndian = false) {
const buf = [];
if (bigEndian) {
buf.push("\xFE\xFF");
}
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
String.fromCharCode((char >> 8) & 0xff),
String.fromCharCode(char & 0xff)
);
}
return buf.join("");
}
const PDFStringTranslateTable = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2d8,
0x2c7, 0x2c6, 0x2d9, 0x2dd, 0x2db, 0x2da, 0x2dc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x192,
0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x141, 0x152, 0x160, 0x178, 0x17d,
0x131, 0x142, 0x153, 0x161, 0x17e, 0, 0x20ac,
];
function stringToPDFString(str, keepEscapeSequence = false) {
// See section 7.9.2.2 Text String Type.
// The string can contain some language codes bracketed with 0x1b,
// so we must remove them.
if (str[0] >= "\xEF") {
let encoding;
if (str[0] === "\xFE" && str[1] === "\xFF") {
encoding = "utf-16be";
if (str.length % 2 === 1) {
str = str.slice(0, -1);
}
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
encoding = "utf-16le";
if (str.length % 2 === 1) {
str = str.slice(0, -1);
}
} else if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
encoding = "utf-8";
}
if (encoding) {
try {
const decoder = new TextDecoder(encoding, { fatal: true });
const buffer = stringToBytes(str);
const decoded = decoder.decode(buffer);
if (keepEscapeSequence || !decoded.includes("\x1b")) {
return decoded;
}
// eslint-disable-next-line no-control-regex
return decoded.replaceAll(/\x1b[^\x1b]*(?:\x1b|$)/g, "");
} catch (ex) {
warn(`stringToPDFString: "${ex}".`);
}
}
}
// ISO Latin 1
const strBuf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
const charCode = str.charCodeAt(i);
if (!keepEscapeSequence && charCode === 0x1b) {
// eslint-disable-next-line no-empty
while (++i < ii && str.charCodeAt(i) !== 0x1b) {}
continue;
}
const code = PDFStringTranslateTable[charCode];
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
}
return strBuf.join("");
}
export {
isAscii,
stringToAsciiOrUTF16BE,
stringToPDFString,
stringToUTF16HexString,
stringToUTF16String,
};