mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-02-08 00:21:11 +01:00
Add support for Brotli decompression
For now, `BrotliDecode` hasn't been specified but it should be in a close future. So when it's possible we use the native `DecompressionStream` API with "brotli" as argument. If that fails or if we've to decompress in a sync context, we fallback to `BrotliStream` which a pure js implementation (see README in external/brotli).
This commit is contained in:
parent
471adfd023
commit
43273fde27
@ -31,6 +31,7 @@ export default [
|
||||
"**/docs/",
|
||||
"**/node_modules/",
|
||||
"external/bcmaps/",
|
||||
"external/brotli/",
|
||||
"external/builder/fixtures/",
|
||||
"external/builder/fixtures_babel/",
|
||||
"external/openjpeg/",
|
||||
|
||||
19
external/brotli/LICENSE_BROTLI
vendored
Normal file
19
external/brotli/LICENSE_BROTLI
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
8
external/brotli/README.md
vendored
Normal file
8
external/brotli/README.md
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
## Release
|
||||
|
||||
In order to get the file `decoder.js`:
|
||||
* `gulp release-brotli --hash` followed by the git hash of the revision.
|
||||
|
||||
## Licensing
|
||||
|
||||
[brotli](https://github.com/google/brotli/) is under [MIT License](https://github.com/google/brotli/blob/master/LICENSE)
|
||||
2466
external/brotli/decode.js
vendored
Normal file
2466
external/brotli/decode.js
vendored
Normal file
File diff suppressed because one or more lines are too long
24
gulpfile.mjs
24
gulpfile.mjs
@ -21,6 +21,7 @@ import { exec, execSync, spawn, spawnSync } from "child_process";
|
||||
import autoprefixer from "autoprefixer";
|
||||
import babel from "@babel/core";
|
||||
import crypto from "crypto";
|
||||
import { finished } from "stream/promises";
|
||||
import fs from "fs";
|
||||
import gulp from "gulp";
|
||||
import hljs from "highlight.js";
|
||||
@ -874,6 +875,28 @@ gulp.task("default", function (done) {
|
||||
done();
|
||||
});
|
||||
|
||||
gulp.task("release-brotli", async function (done) {
|
||||
const hashIndex = process.argv.indexOf("--hash");
|
||||
if (hashIndex === -1 || hashIndex + 1 >= process.argv.length) {
|
||||
throw new Error('Missing "--hash <commit-hash>" argument.');
|
||||
}
|
||||
console.log();
|
||||
console.log("### Getting Brotli js file for release");
|
||||
|
||||
const OUTPUT_DIR = "./external/brotli/";
|
||||
const hash = process.argv[hashIndex + 1];
|
||||
const url = `https://raw.githubusercontent.com/google/brotli/${hash}/js/decode.js`;
|
||||
const outputPath = OUTPUT_DIR + "decode.js";
|
||||
const res = await fetch(url);
|
||||
const fileStream = fs.createWriteStream(outputPath, { flags: "w" });
|
||||
await finished(stream.Readable.fromWeb(res.body).pipe(fileStream));
|
||||
fileStream.end();
|
||||
|
||||
console.log(`Brotli js file saved to: ${outputPath}`);
|
||||
|
||||
done();
|
||||
});
|
||||
|
||||
function createBuildNumber(done) {
|
||||
console.log();
|
||||
console.log("### Getting extension build number");
|
||||
@ -1692,6 +1715,7 @@ function buildLib(defines, dir) {
|
||||
gulp.src("external/openjpeg/*.js", { base: "openjpeg/", encoding: false }),
|
||||
gulp.src("external/qcms/*.js", { base: "qcms/", encoding: false }),
|
||||
gulp.src("external/jbig2/*.js", { base: "jbig2/", encoding: false }),
|
||||
gulp.src("external/brotli/*.js", { base: "brotli/", encoding: false }),
|
||||
]);
|
||||
|
||||
return buildLibHelper(bundleDefines, inputStream, dir);
|
||||
|
||||
86
src/core/brotli_stream.js
Normal file
86
src/core/brotli_stream.js
Normal file
@ -0,0 +1,86 @@
|
||||
/* Copyright 2026 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { BrotliDecode } from "../../external/brotli/decode.js";
|
||||
import { DecodeStream } from "./decode_stream.js";
|
||||
import { Stream } from "./stream.js";
|
||||
|
||||
class BrotliStream extends DecodeStream {
|
||||
#isAsync = true;
|
||||
|
||||
constructor(stream, maybeLength) {
|
||||
super(maybeLength);
|
||||
|
||||
this.stream = stream;
|
||||
this.dict = stream.dict;
|
||||
}
|
||||
|
||||
readBlock() {
|
||||
// TODO: add some telemetry to measure how often we fallback here.
|
||||
// Get all bytes from the input stream
|
||||
const bytes = this.stream.getBytes();
|
||||
const decodedData = BrotliDecode(
|
||||
new Int8Array(bytes.buffer, bytes.byteOffset, bytes.length)
|
||||
);
|
||||
|
||||
this.buffer = new Uint8Array(
|
||||
decodedData.buffer,
|
||||
decodedData.byteOffset,
|
||||
decodedData.length
|
||||
);
|
||||
this.bufferLength = this.buffer.length;
|
||||
this.eof = true;
|
||||
}
|
||||
|
||||
async getImageData(length, _decoderOptions) {
|
||||
const data = await this.asyncGetBytes();
|
||||
if (!data) {
|
||||
return this.getBytes(length);
|
||||
}
|
||||
if (data.length <= length) {
|
||||
return data;
|
||||
}
|
||||
return data.subarray(0, length);
|
||||
}
|
||||
|
||||
async asyncGetBytes() {
|
||||
const { decompressed, compressed } =
|
||||
await this.asyncGetBytesFromDecompressionStream("brotli");
|
||||
if (decompressed) {
|
||||
return decompressed;
|
||||
}
|
||||
// DecompressionStream failed (for example because there are some extra
|
||||
// bytes after the end of the compressed data), so we fallback to our
|
||||
// decoder.
|
||||
// We already get the bytes from the underlying stream, so we just reuse
|
||||
// them to avoid get them again.
|
||||
|
||||
this.#isAsync = false;
|
||||
this.stream = new Stream(
|
||||
compressed,
|
||||
0,
|
||||
compressed.length,
|
||||
this.stream.dict
|
||||
);
|
||||
this.reset();
|
||||
return null;
|
||||
}
|
||||
|
||||
get isAsync() {
|
||||
return this.#isAsync;
|
||||
}
|
||||
}
|
||||
|
||||
export { BrotliStream };
|
||||
@ -110,6 +110,46 @@ class DecodeStream extends BaseStream {
|
||||
return this.decodeImage(data, decoderOptions);
|
||||
}
|
||||
|
||||
async asyncGetBytesFromDecompressionStream(name) {
|
||||
this.stream.reset();
|
||||
const bytes = this.stream.isAsync
|
||||
? await this.stream.asyncGetBytes()
|
||||
: this.stream.getBytes();
|
||||
|
||||
try {
|
||||
const { readable, writable } = new DecompressionStream(name);
|
||||
const writer = writable.getWriter();
|
||||
await writer.ready;
|
||||
|
||||
// We can't await writer.write() because it'll block until the reader
|
||||
// starts which happens few lines below.
|
||||
writer
|
||||
.write(bytes)
|
||||
.then(async () => {
|
||||
await writer.ready;
|
||||
await writer.close();
|
||||
})
|
||||
.catch(() => {});
|
||||
|
||||
const chunks = [];
|
||||
let totalLength = 0;
|
||||
|
||||
for await (const chunk of readable) {
|
||||
chunks.push(chunk);
|
||||
totalLength += chunk.byteLength;
|
||||
}
|
||||
const data = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
data.set(chunk, offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
return { decompressed: data, compressed: bytes };
|
||||
} catch {
|
||||
return { decompressed: null, compressed: bytes };
|
||||
}
|
||||
}
|
||||
|
||||
reset() {
|
||||
this.pos = 0;
|
||||
}
|
||||
|
||||
@ -163,57 +163,26 @@ class FlateStream extends DecodeStream {
|
||||
}
|
||||
|
||||
async asyncGetBytes() {
|
||||
this.stream.reset();
|
||||
const bytes = this.stream.isAsync
|
||||
? await this.stream.asyncGetBytes()
|
||||
: this.stream.getBytes();
|
||||
|
||||
try {
|
||||
const { readable, writable } = new DecompressionStream("deflate");
|
||||
const writer = writable.getWriter();
|
||||
await writer.ready;
|
||||
|
||||
// We can't await writer.write() because it'll block until the reader
|
||||
// starts which happens few lines below.
|
||||
writer
|
||||
.write(bytes)
|
||||
.then(async () => {
|
||||
await writer.ready;
|
||||
await writer.close();
|
||||
})
|
||||
.catch(() => {});
|
||||
|
||||
const chunks = [];
|
||||
let totalLength = 0;
|
||||
|
||||
for await (const chunk of readable) {
|
||||
chunks.push(chunk);
|
||||
totalLength += chunk.byteLength;
|
||||
}
|
||||
const data = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
data.set(chunk, offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
|
||||
return data;
|
||||
} catch {
|
||||
// DecompressionStream failed (for example because there are some extra
|
||||
// bytes after the end of the compressed data), so we fallback to our
|
||||
// decoder.
|
||||
// We already get the bytes from the underlying stream, so we just reuse
|
||||
// them to avoid get them again.
|
||||
this.#isAsync = false;
|
||||
this.stream = new Stream(
|
||||
bytes,
|
||||
2 /* = header size (see ctor) */,
|
||||
bytes.length,
|
||||
this.stream.dict
|
||||
);
|
||||
this.reset();
|
||||
return null;
|
||||
const { decompressed, compressed } =
|
||||
await this.asyncGetBytesFromDecompressionStream("deflate");
|
||||
if (decompressed) {
|
||||
return decompressed;
|
||||
}
|
||||
// DecompressionStream failed (for example because there are some extra
|
||||
// bytes after the end of the compressed data), so we fallback to our
|
||||
// decoder.
|
||||
// We already get the bytes from the underlying stream, so we just reuse
|
||||
// them to avoid get them again.
|
||||
|
||||
this.#isAsync = false;
|
||||
this.stream = new Stream(
|
||||
compressed,
|
||||
2 /* = header size (see ctor) */,
|
||||
compressed.length,
|
||||
this.stream.dict
|
||||
);
|
||||
this.reset();
|
||||
return null;
|
||||
}
|
||||
|
||||
get isAsync() {
|
||||
|
||||
@ -29,6 +29,7 @@ import {
|
||||
import { NullStream, Stream } from "./stream.js";
|
||||
import { Ascii85Stream } from "./ascii_85_stream.js";
|
||||
import { AsciiHexStream } from "./ascii_hex_stream.js";
|
||||
import { BrotliStream } from "./brotli_stream.js";
|
||||
import { CCITTFaxStream } from "./ccitt_stream.js";
|
||||
import { FlateStream } from "./flate_stream.js";
|
||||
import { Jbig2Stream } from "./jbig2_stream.js";
|
||||
@ -822,6 +823,8 @@ class Parser {
|
||||
return new RunLengthStream(stream, maybeLength);
|
||||
case "JBIG2Decode":
|
||||
return new Jbig2Stream(stream, maybeLength, params);
|
||||
case "BrotliDecode":
|
||||
return new BrotliStream(stream, maybeLength);
|
||||
}
|
||||
warn(`Filter "${name}" is not supported.`);
|
||||
return stream;
|
||||
|
||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -870,3 +870,4 @@
|
||||
!bug2009627.pdf
|
||||
!page_with_number.pdf
|
||||
!page_with_number_and_link.pdf
|
||||
!Brotli-Prototype-FileA.pdf
|
||||
|
||||
BIN
test/pdfs/Brotli-Prototype-FileA.pdf
Normal file
BIN
test/pdfs/Brotli-Prototype-FileA.pdf
Normal file
Binary file not shown.
@ -13929,5 +13929,12 @@
|
||||
"md5": "e515a9abb11ab74332e57e371bfae61e",
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{
|
||||
"id": "Brotli-Prototype-FileA",
|
||||
"file": "pdfs/Brotli-Prototype-FileA.pdf",
|
||||
"md5": "9113370932798776ba91c807ce95082e",
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
}
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user