Avoid downloading test PDFs multiple times

It's somewhat common for multiple test-cases to use the same PDF, for example tests for both `eq` and `text` with one PDF.
Currently the logic in `downloadManifestFiles` doesn't handle duplicate test PDFs, which leads to wasted time/resources by downloading the same PDF more than once. Naturally the effect of this is especially bad when downloading all linked PDFs.

Total number of test PDFs downloaded:
 - 507, with `master`.
 - 447, with this patch.
This commit is contained in:
Jonas Jenwald 2026-03-24 22:36:51 +01:00
parent ae70a5d123
commit 9c857166cc

View File

@ -40,14 +40,27 @@ async function downloadFile(file, url) {
}
async function downloadManifestFiles(manifest) {
const links = manifest
.filter(item => item.link && !fs.existsSync(item.file))
.map(item => {
const url = fs.readFileSync(`${item.file}.link`).toString().trimEnd();
return { file: item.file, url };
});
// Keep track of file identifiers to remove any duplicates,
// since multiple test-cases may use the same PDF.
const seenFiles = new Set();
for (const { file, url } of links) {
const links = new Map(
manifest
.filter(({ link, file }) => {
if (!link || seenFiles.has(file)) {
return false;
}
seenFiles.add(file);
return !fs.existsSync(file);
})
.map(({ file }) => {
const url = fs.readFileSync(`${file}.link`).toString().trimEnd();
return [file, url];
})
);
seenFiles.clear();
for (const [file, url] of links) {
console.log(`Downloading ${url} to ${file}...`);
try {
await downloadFile(file, url);