From 9c857166cca051312ca6a47797b2d8a5732b6cc1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 24 Mar 2026 22:36:51 +0100 Subject: [PATCH] Avoid downloading test PDFs multiple times It's somewhat common for multiple test-cases to use the same PDF, for example tests for both `eq` and `text` with one PDF. Currently the logic in `downloadManifestFiles` doesn't handle duplicate test PDFs, which leads to wasted time/resources by downloading the same PDF more than once. Naturally the effect of this is especially bad when downloading all linked PDFs. Total number of test PDFs downloaded: - 507, with `master`. - 447, with this patch. --- test/downloadutils.mjs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/test/downloadutils.mjs b/test/downloadutils.mjs index 63cb81473..9fdbe4780 100644 --- a/test/downloadutils.mjs +++ b/test/downloadutils.mjs @@ -40,14 +40,27 @@ async function downloadFile(file, url) { } async function downloadManifestFiles(manifest) { - const links = manifest - .filter(item => item.link && !fs.existsSync(item.file)) - .map(item => { - const url = fs.readFileSync(`${item.file}.link`).toString().trimEnd(); - return { file: item.file, url }; - }); + // Keep track of file identifiers to remove any duplicates, + // since multiple test-cases may use the same PDF. + const seenFiles = new Set(); - for (const { file, url } of links) { + const links = new Map( + manifest + .filter(({ link, file }) => { + if (!link || seenFiles.has(file)) { + return false; + } + seenFiles.add(file); + return !fs.existsSync(file); + }) + .map(({ file }) => { + const url = fs.readFileSync(`${file}.link`).toString().trimEnd(); + return [file, url]; + }) + ); + seenFiles.clear(); + + for (const [file, url] of links) { console.log(`Downloading ${url} to ${file}...`); try { await downloadFile(file, url);