From 7810cee0050b9aab695cf87b26afcd4f14155f07 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 17 Apr 2026 19:50:24 +0200 Subject: [PATCH] Improve reftest runner memory usage and load balancing Replace manifest slicing with a dynamic task queue: drivers request tasks on demand via WebSocket so parallel sessions self-balance naturally. Start reading reference PNGs from disk as soon as a task is dispatched (prefetchRefPngs) to overlap I/O with rendering. Release snapshot buffers and task entries immediately after comparison, and copy WS frame slices via Buffer.from() so the original frame buffer can be GC'd. Bump --max-old-space-size to 8192 MB as a workaround for a Puppeteer/BiDi memory leak where data: URL strings accumulate in BrowsingContext.#requests indefinitely: https://github.com/puppeteer/puppeteer/issues/14876 --- gulpfile.mjs | 4 +- test/driver.js | 110 ++++++++++++++++++------------- test/test.mjs | 174 +++++++++++++++++++++++++++++++------------------ 3 files changed, 177 insertions(+), 111 deletions(-) diff --git a/gulpfile.mjs b/gulpfile.mjs index ee64e5984..09ff243d1 100644 --- a/gulpfile.mjs +++ b/gulpfile.mjs @@ -168,7 +168,9 @@ function startNode(args, options) { // (such as `issue6360.pdf`), so we need to restore this value. Note that // this argument needs to be before all other arguments as it needs to be // passed to the Node.js process itself and not to the script that it runs. - args.unshift("--max-http-header-size=80000"); + // Increase the max heap size to avoid OOM caused by a Puppeteer/BiDi memory + // leak: https://github.com/puppeteer/puppeteer/issues/14876 + args.unshift("--max-http-header-size=80000", "--max-old-space-size=8192"); return spawn("node", args, options); } diff --git a/test/driver.js b/test/driver.js index f50245c7a..7ab52dc52 100644 --- a/test/driver.js +++ b/test/driver.js @@ -549,7 +549,6 @@ class Driver { }; this._info("User agent: " + navigator.userAgent); this._log(`Harness thinks this browser is ${this.browser}\n`); - this._log('Fetching manifest "' + this.manifestFile + '"... '); if (this.delay > 0) { this._log("\nDelaying for " + this.delay + " ms...\n"); @@ -562,32 +561,39 @@ class Driver { this.ws.addEventListener("open", resolve, { once: true }); }); } - const response = await fetch(this.manifestFile); - if (!response.ok) { - throw new Error(response.statusText); - } - this._log("done\n"); - this.manifest = await response.json(); - if (this.testFilter?.length) { - this.manifest = this.manifest.filter(item => { - if (this.testFilter.includes(item.id)) { - return true; + // Dynamic task queue: server sends tasks on demand. + this.taskQueue = []; + this.serverDone = false; + this.pendingTaskResolve = null; + this.currentTask = null; + this.tasksDone = 0; + + this.ws.addEventListener("message", event => { + if (typeof event.data !== "string") { + return; + } + const msg = JSON.parse(event.data); + if (msg.type === "task") { + if (this.pendingTaskResolve) { + this.pendingTaskResolve(msg.task); + this.pendingTaskResolve = null; + } else { + this.taskQueue.push(msg.task); + // Prefetch PDF for this task if it's now first in queue. + if (this.taskQueue.length === 1) { + this._prefetchNextTask(); + } } - return false; - }); - } - if (this.sessionCount > 1) { - const { sessionIndex, sessionCount } = this; - const start = Math.floor( - (this.manifest.length * sessionIndex) / sessionCount - ); - const end = Math.floor( - (this.manifest.length * (sessionIndex + 1)) / sessionCount - ); - this.manifest = this.manifest.slice(start, end); - } - this.currentTask = 0; + } else if (msg.type === "done") { + this.serverDone = true; + if (this.pendingTaskResolve) { + this.pendingTaskResolve(null); + this.pendingTaskResolve = null; + } + } + }); + this._nextTask(); }, this.delay); } @@ -602,23 +608,38 @@ class Driver { */ log(msg) { let id = this.browser; - const task = this.manifest[this.currentTask]; - if (task) { - id += `-${task.id}`; + if (this.currentTask) { + id += `-${this.currentTask.id}`; } - this._info(`${id}: ${msg}`); } + _waitForNextTask() { + if (this.taskQueue.length > 0) { + return Promise.resolve(this.taskQueue.shift()); + } + if (this.serverDone) { + return Promise.resolve(null); + } + this.ws.send( + JSON.stringify({ type: "requestTask", browser: this.browser }) + ); + return new Promise(resolve => { + this.pendingTaskResolve = resolve; + }); + } + _nextTask() { let failure = ""; - this._cleanup().then(() => { - if (this.currentTask === this.manifest.length) { + this._cleanup().then(async () => { + const task = await this._waitForNextTask(); + if (!task) { this._done(); return; } - const task = this.manifest[this.currentTask]; + this.currentTask = task; + task.round = 0; task.pageNum = task.firstPage || 1; task.stats = { times: [] }; @@ -658,13 +679,10 @@ class Driver { md5FileMap.set(task.md5, task.file); } - this._log( - `[${this.currentTask + 1}/${this.manifest.length}] ${task.id}:\n` - ); + this._log(`[${++this.tasksDone}] ${task.id}:\n`); if (task.type === "skip-because-failing") { this._log(` Skipping file "${task.file} because it's failing"\n`); - this.currentTask++; this._nextTask(); return; } @@ -678,7 +696,6 @@ class Driver { this._nextPage(task, 'Expected "other" test-case to be linked.'); return; } - this.currentTask++; this._nextTask(); return; } @@ -885,11 +902,10 @@ class Driver { } _prefetchNextTask() { - const nextIdx = this.currentTask + 1; - if (nextIdx >= this.manifest.length) { + const task = this.taskQueue[0]; + if (!task) { return; } - const task = this.manifest[nextIdx]; // Skip tasks that do not load a PDF or that need DOM setup (XFA style // element injection) to happen synchronously before getDocument. if ( @@ -899,7 +915,9 @@ class Driver { ) { return; } - task._prefetchedLoadingTask = getDocument(this._getDocumentOptions(task)); + if (!task._prefetchedLoadingTask) { + task._prefetchedLoadingTask = getDocument(this._getDocumentOptions(task)); + } } _cleanup() { @@ -918,10 +936,10 @@ class Driver { const destroyedPromises = []; // Wipe out the link to the pdfdoc so it can be GC'ed. - for (let i = 0; i < this.manifest.length; i++) { - if (this.manifest[i].pdfDoc) { - destroyedPromises.push(this.manifest[i].pdfDoc.destroy()); - delete this.manifest[i].pdfDoc; + for (const task of [this.currentTask, ...this.taskQueue]) { + if (task?.pdfDoc) { + destroyedPromises.push(task.pdfDoc.destroy()); + delete task.pdfDoc; } } return Promise.all(destroyedPromises); @@ -955,7 +973,6 @@ class Driver { .then(blob => this._sendResult(blob, task, failure)) .then(() => { this._log(`done${failure ? ` (failed !: ${failure})` : ""}\n`); - this.currentTask++; this._nextTask(); }); return; @@ -966,7 +983,6 @@ class Driver { this._log(` Round ${1 + task.round}\n`); task.pageNum = task.firstPage || 1; } else { - this.currentTask++; this._nextTask(); return; } diff --git a/test/test.mjs b/test/test.mjs index bd5f05451..6a4f5bd6e 100644 --- a/test/test.mjs +++ b/test/test.mjs @@ -290,6 +290,9 @@ async function startRefTest(masterMode, showRefImages) { startTime = Date.now(); startServer(); server.hooks.POST.push(refTestPostHandler); + taskQueue = new Map(); + refPngCache = new Map(); + server.hooks.WS.push(ws => { let pendingOps = 0; let pendingQuit = null; @@ -304,7 +307,29 @@ async function startRefTest(masterMode, showRefImages) { }); } else { const msg = JSON.parse(data.toString()); - if (msg.type === "quit") { + if (msg.type === "requestTask") { + const session = getSession(msg.browser); + session.taskResults ??= {}; + session.tasks ??= {}; + session.remaining ??= 0; + const browserType = session.browserType ?? session.name; + if (!taskQueue.has(browserType)) { + taskQueue.set(browserType, [...manifest]); + } + const task = taskQueue.get(browserType).shift(); + if (task) { + const rounds = task.rounds || 1; + const roundsResults = []; + roundsResults.length = rounds; + session.taskResults[task.id] = roundsResults; + session.tasks[task.id] = task; + session.remaining++; + ws.send(JSON.stringify({ type: "task", task })); + prefetchRefPngs(browserType, task); + } else { + ws.send(JSON.stringify({ type: "done" })); + } + } else if (msg.type === "quit") { const session = getSession(msg.browser); monitorBrowserTimeout(session, null); const doQuit = () => closeSession(session.name); @@ -324,21 +349,9 @@ async function startRefTest(masterMode, showRefImages) { numSessions: options.jobs, initializeSession: session => { session.masterMode = masterMode; - session.taskResults = {}; - session.tasks = {}; - const sessionManifest = getSessionManifest( - manifest, - session.sessionIndex, - session.sessionCount - ); - session.remaining = sessionManifest.length; - sessionManifest.forEach(function (item) { - var rounds = item.rounds || 1; - var roundsResults = []; - roundsResults.length = rounds; - session.taskResults[item.id] = roundsResults; - session.tasks[item.id] = item; - }); + session.taskResults ??= {}; + session.tasks ??= {}; + session.remaining ??= 0; session.numRuns = 0; session.numErrors = 0; session.numFBFFailures = 0; @@ -379,6 +392,7 @@ async function startRefTest(masterMode, showRefImages) { if (!manifest) { return; } + if (!options.noDownload) { await ensurePDFsDownloaded(); } @@ -418,13 +432,32 @@ function getTestManifest() { return manifest; } -function getSessionManifest(manifest, sessionIndex, sessionCount) { - if (sessionCount <= 1) { - return manifest; +function prefetchRefPngs(browserType, task) { + if ( + task.type !== "eq" && + task.type !== "partial" && + task.type !== "text" && + task.type !== "highlight" && + task.type !== "extract" + ) { + return; } - const start = Math.floor((manifest.length * sessionIndex) / sessionCount); - const end = Math.floor((manifest.length * (sessionIndex + 1)) / sessionCount); - return manifest.slice(start, end); + const refSnapshotDir = path.join( + refsDir, + os.platform(), + browserType, + task.id + ); + const firstPage = task.firstPage || 1; + const lastPage = task.lastPage; + // 0-indexed so pages[p-1] = promise for `${p}.png`, matching checkEq's loop. + const pages = []; + for (let p = firstPage; p <= lastPage; p++) { + pages[p - 1] = fs.promises + .readFile(path.join(refSnapshotDir, `${p}.png`)) + .catch(err => (err.code === "ENOENT" ? null : Promise.reject(err))); + } + refPngCache.set(`${browserType}/${task.id}`, pages); } async function checkEq(task, results, session, masterMode) { @@ -446,20 +479,28 @@ async function checkEq(task, results, session, masterMode) { let numEqNoSnapshot = 0; let numEqFailures = 0; - // Read all reference PNGs in parallel, skipping pages with no valid snapshot. + const cacheKey = `${browserType}/${taskId}`; + const cachedPages = refPngCache.get(cacheKey); + refPngCache.delete(cacheKey); + + // Consume pre-started ref PNG reads (started when the task was dispatched), + // falling back to a fresh read if the cache entry is missing. const refSnapshots = await Promise.all( pageResults.map((pageResult, page) => { if (!pageResult || !(pageResult.snapshot instanceof Buffer)) { return null; } - return fs.promises - .readFile(path.join(refSnapshotDir, `${page + 1}.png`)) - .catch(err => { - if (err.code === "ENOENT") { - return null; - } - throw err; - }); + return ( + cachedPages?.[page] ?? + fs.promises + .readFile(path.join(refSnapshotDir, `${page + 1}.png`)) + .catch(err => { + if (err.code === "ENOENT") { + return null; + } + throw err; + }) + ); }) ); @@ -653,32 +694,39 @@ async function checkRefTestResults(browser, id, results) { } }); }); + const browserType = session.browserType ?? session.name; if (failed) { - return; + refPngCache.delete(`${browserType}/${id}`); + } else { + switch (task.type) { + case "eq": + case "partial": + case "text": + case "highlight": + case "extract": + await checkEq(task, results, session, session.masterMode); + break; + case "fbf": + checkFBF(task, results, session, session.masterMode); + break; + case "load": + checkLoad(task, results, session.name); + break; + default: + throw new Error("Unknown test type"); + } } - switch (task.type) { - case "eq": - case "partial": - case "text": - case "highlight": - case "extract": - await checkEq(task, results, session, session.masterMode); - break; - case "fbf": - checkFBF(task, results, session, session.masterMode); - break; - case "load": - checkLoad(task, results, session.name); - break; - default: - throw new Error("Unknown test type"); - } - // clear memory - results.forEach(function (roundResults, round) { - roundResults.forEach(function (pageResult, page) { - pageResult.snapshot = null; + // Clear snapshot buffers and drop the task entry from the session. + results.forEach(function (roundResults) { + roundResults.forEach(function (pageResult) { + if (pageResult) { + pageResult.snapshot = null; + pageResult.baselineSnapshot = null; + } }); }); + delete session.taskResults[id]; + delete session.tasks[id]; } async function handleWsBinaryResult(data) { @@ -689,10 +737,13 @@ async function handleWsBinaryResult(data) { const meta = JSON.parse(data.subarray(4, 4 + metaLen).toString("utf8")); const snapshotLen = data.readUInt32BE(4 + metaLen); const snapshotOffset = 8 + metaLen; - const snapshot = data.subarray(snapshotOffset, snapshotOffset + snapshotLen); + // Copy slices so the original WS frame buffer can be GC'd immediately. + const snapshot = Buffer.from( + data.subarray(snapshotOffset, snapshotOffset + snapshotLen) + ); const baseline = data.length > snapshotOffset + snapshotLen - ? data.subarray(snapshotOffset + snapshotLen) + ? Buffer.from(data.subarray(snapshotOffset + snapshotLen)) : null; const { browser, id, round, page, failure, lastPageNum, numberOfTasks } = @@ -724,8 +775,8 @@ async function handleWsBinaryResult(data) { const lastTaskResults = taskResults.at(-1); const isDone = - lastTaskResults?.[lastPageNum - 1] || - lastTaskResults?.filter(result => !!result).length === numberOfTasks; + !!lastTaskResults?.[lastPageNum - 1] || + lastTaskResults?.filter(Boolean).length === numberOfTasks; if (isDone) { await checkRefTestResults(browser, id, taskResults); session.remaining--; @@ -1003,14 +1054,9 @@ async function startBrowsers({ baseUrl, initializeSession, numSessions = 1 }) { if (baseUrl) { startUrl = `${baseUrl}?browser=${encodeURIComponent(sessionName)}` + - `&manifestFile=${encodeURIComponent(`/test/${options.manifestFile}`)}` + `&testFilter=${JSON.stringify(options.testfilter)}` + - `&delay=${options.statsDelay}&masterMode=${options.masterMode}` + - (numSessions > 1 - ? `&sessionIndex=${i}&sessionCount=${numSessions}` - : ""); + `&delay=${options.statsDelay}&masterMode=${options.masterMode}`; } - await startBrowser({ browserName, startUrl }) .then(async function (browser) { session.browser = browser; @@ -1189,6 +1235,8 @@ var host = "127.0.0.1"; var options = parseOptions(); var stats; var tempDir = null; +var taskQueue = new Map(); +var refPngCache = new Map(); main();