mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-02-08 00:21:11 +01:00
Note that compared other structures, such as e.g. Images and ColorSpaces, `Function`s are not referred to by name, which however does bring the advantage of being able to share the cache for an *entire* page.
Furthermore, similar to ColorSpaces, the parsing of individual `Function`s are generally fast enough to not really warrant trying to cache them in any "smarter" way than by reference. (Hence trying to do caching similar to e.g. Fonts would most likely be a losing proposition, given the amount of data lookup/parsing that'd be required.)
Originally I tried implementing this similar to e.g. the recently added ColorSpace caching (and in a couple of different ways), however it unfortunately turned out to be quite ugly/unwieldy given the sheer number of functions/methods where you'd thus need to pass in a `LocalFunctionCache` instance. (Also, the affected functions/methods didn't exactly have short signatures as-is.)
After going back and forth on this for a while it seemed to me that the simplest, or least "invasive" if you will, solution would be if each `PartialEvaluator` instance had its *own* `PDFFunctionFactory` instance (since the latter is already passed to all of the required code). This way each `PDFFunctionFactory` instances could have a local `Function` cache, without it being necessary to provide a `LocalFunctionCache` instance manually at every `PDFFunctionFactory.{create, createFromArray}` call-site.
Obviously, with this patch, there's now (potentially) more `PDFFunctionFactory` instances than before when the entire document shared just one. However, each such instance is really quite small and it's also tied to a `PartialEvaluator` instance and those are *not* kept alive and/or cached. To reduce the impact of these changes, I've tried to make as many of these structures as possible *lazily initialized*, specifically:
- The `PDFFunctionFactory`, on `PartialEvaluator` instances, since not all kinds of general parsing actually requires it. For example: `getTextContent` calls won't cause any `Function` to be parsed, and even some `getOperatorList` calls won't trigger `Function` parsing (if a page contains e.g. no Patterns or "complex" ColorSpaces).
- The `LocalFunctionCache`, on `PDFFunctionFactory` instances, since only certain parsing requires it. Generally speaking, only e.g. Patterns, "complex" ColorSpaces, and/or (some) SoftMasks will trigger any `Function` parsing.
To put these changes into perspective, when loading/rendering all (14) pages of the default `tracemonkey.pdf` file there's now a total of 6 `PDFFunctionFactory` and 1 `LocalFunctionCache` instances created thanks to the lazy initialization.
(If you instead would keep the document-"global" `PDFFunctionFactory` instance and pass around `LocalFunctionCache` instances everywhere, the numbers for the `tracemonkey.pdf` file would be instead be something like 1 `PDFFunctionFactory` and 6 `LocalFunctionCache` instances.)
All-in-all, I thus don't think that the `PDFFunctionFactory` changes should be generally problematic.
With these changes, we can also modify (some) call-sites to pass in a `Reference` rather than the actual `Function` data. This is nice since `Function`s can also be `Streams`, which are not cached on the `XRef` instance (given their potential size), and this way we can avoid unnecessary lookups and thus save some additional time/resources.
Obviously I had intended to include (standard) benchmark results with these changes, but for reasons I don't really understand the test run-time (even with `master`) of the document in issue 2541 is quite a bit slower than in the development viewer.
However, logging the time it takes for the relevant `PDFFunctionFactory`/`PDFFunction ` parsing shows that it takes *approximately* `0.5 ms` for the `Function` in question. Looking up a cached `Function`, on the other hand, is *one order of magnitude faster* which does add up when the same `Function` is invoked close to 2000 times.
209 lines
5.0 KiB
JavaScript
209 lines
5.0 KiB
JavaScript
/* Copyright 2019 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/* eslint no-var: error */
|
|
|
|
import { assert, info, shadow, unreachable } from "../shared/util.js";
|
|
import { RefSetCache } from "./primitives.js";
|
|
|
|
class BaseLocalCache {
|
|
constructor() {
|
|
if (this.constructor === BaseLocalCache) {
|
|
unreachable("Cannot initialize BaseLocalCache.");
|
|
}
|
|
this._nameRefMap = new Map();
|
|
this._imageMap = new Map();
|
|
this._imageCache = new RefSetCache();
|
|
}
|
|
|
|
getByName(name) {
|
|
const ref = this._nameRefMap.get(name);
|
|
if (ref) {
|
|
return this.getByRef(ref);
|
|
}
|
|
return this._imageMap.get(name) || null;
|
|
}
|
|
|
|
getByRef(ref) {
|
|
return this._imageCache.get(ref) || null;
|
|
}
|
|
|
|
set(name, ref, data) {
|
|
unreachable("Abstract method `set` called.");
|
|
}
|
|
}
|
|
|
|
class LocalImageCache extends BaseLocalCache {
|
|
set(name, ref = null, data) {
|
|
if (!name) {
|
|
throw new Error('LocalImageCache.set - expected "name" argument.');
|
|
}
|
|
if (ref) {
|
|
if (this._imageCache.has(ref)) {
|
|
return;
|
|
}
|
|
this._nameRefMap.set(name, ref);
|
|
this._imageCache.put(ref, data);
|
|
return;
|
|
}
|
|
// name
|
|
if (this._imageMap.has(name)) {
|
|
return;
|
|
}
|
|
this._imageMap.set(name, data);
|
|
}
|
|
}
|
|
|
|
class LocalColorSpaceCache extends BaseLocalCache {
|
|
set(name = null, ref = null, data) {
|
|
if (!name && !ref) {
|
|
throw new Error(
|
|
'LocalColorSpaceCache.set - expected "name" and/or "ref" argument.'
|
|
);
|
|
}
|
|
if (ref) {
|
|
if (this._imageCache.has(ref)) {
|
|
return;
|
|
}
|
|
if (name) {
|
|
// Optional when `ref` is defined.
|
|
this._nameRefMap.set(name, ref);
|
|
}
|
|
this._imageCache.put(ref, data);
|
|
return;
|
|
}
|
|
// name
|
|
if (this._imageMap.has(name)) {
|
|
return;
|
|
}
|
|
this._imageMap.set(name, data);
|
|
}
|
|
}
|
|
|
|
class LocalFunctionCache extends BaseLocalCache {
|
|
getByName(name) {
|
|
unreachable("Should not call `getByName` method.");
|
|
}
|
|
|
|
set(name = null, ref, data) {
|
|
if (!ref) {
|
|
throw new Error('LocalFunctionCache.set - expected "ref" argument.');
|
|
}
|
|
if (this._imageCache.has(ref)) {
|
|
return;
|
|
}
|
|
this._imageCache.put(ref, data);
|
|
}
|
|
}
|
|
|
|
class GlobalImageCache {
|
|
static get NUM_PAGES_THRESHOLD() {
|
|
return shadow(this, "NUM_PAGES_THRESHOLD", 2);
|
|
}
|
|
|
|
static get MAX_IMAGES_TO_CACHE() {
|
|
return shadow(this, "MAX_IMAGES_TO_CACHE", 10);
|
|
}
|
|
|
|
constructor() {
|
|
if (
|
|
typeof PDFJSDev === "undefined" ||
|
|
PDFJSDev.test("!PRODUCTION || TESTING")
|
|
) {
|
|
assert(
|
|
GlobalImageCache.NUM_PAGES_THRESHOLD > 1,
|
|
"GlobalImageCache - invalid NUM_PAGES_THRESHOLD constant."
|
|
);
|
|
}
|
|
this._refCache = new RefSetCache();
|
|
this._imageCache = new RefSetCache();
|
|
}
|
|
|
|
shouldCache(ref, pageIndex) {
|
|
const pageIndexSet = this._refCache.get(ref);
|
|
const numPages = pageIndexSet
|
|
? pageIndexSet.size + (pageIndexSet.has(pageIndex) ? 0 : 1)
|
|
: 1;
|
|
|
|
if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) {
|
|
return false;
|
|
}
|
|
if (
|
|
!this._imageCache.has(ref) &&
|
|
this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE
|
|
) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
addPageIndex(ref, pageIndex) {
|
|
let pageIndexSet = this._refCache.get(ref);
|
|
if (!pageIndexSet) {
|
|
pageIndexSet = new Set();
|
|
this._refCache.put(ref, pageIndexSet);
|
|
}
|
|
pageIndexSet.add(pageIndex);
|
|
}
|
|
|
|
getData(ref, pageIndex) {
|
|
const pageIndexSet = this._refCache.get(ref);
|
|
if (!pageIndexSet) {
|
|
return null;
|
|
}
|
|
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
|
|
return null;
|
|
}
|
|
if (!this._imageCache.has(ref)) {
|
|
return null;
|
|
}
|
|
// Ensure that we keep track of all pages containing the image reference.
|
|
pageIndexSet.add(pageIndex);
|
|
|
|
return this._imageCache.get(ref);
|
|
}
|
|
|
|
setData(ref, data) {
|
|
if (!this._refCache.has(ref)) {
|
|
throw new Error(
|
|
'GlobalImageCache.setData - expected "addPageIndex" to have been called.'
|
|
);
|
|
}
|
|
if (this._imageCache.has(ref)) {
|
|
return;
|
|
}
|
|
if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) {
|
|
info(
|
|
"GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE."
|
|
);
|
|
return;
|
|
}
|
|
this._imageCache.put(ref, data);
|
|
}
|
|
|
|
clear(onlyData = false) {
|
|
if (!onlyData) {
|
|
this._refCache.clear();
|
|
}
|
|
this._imageCache.clear();
|
|
}
|
|
}
|
|
|
|
export {
|
|
LocalImageCache,
|
|
LocalColorSpaceCache,
|
|
LocalFunctionCache,
|
|
GlobalImageCache,
|
|
};
|