In tagged pdfs, TH can be either a column header or a row header (bug 2014080)

This commit is contained in:
Calixte Denizet 2026-02-03 21:54:45 +01:00
parent 58ac273f1f
commit 280a02150e
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F
4 changed files with 82 additions and 5 deletions

View File

@ -544,4 +544,71 @@ describe("accessibility", () => {
);
});
});
describe("A TH in a TR itself in a TBody is rowheader", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait("bug2014080.pdf", ".textLayer");
});
afterEach(async () => {
await closePages(pages);
});
it("must check that the table has the right structure", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
let elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(".structTree [role='table']").children
).map(child => child.getAttribute("role"))
);
// THeader and TBody must be rowgroup.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["rowgroup", "rowgroup"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup'] > [role='row']"
).children
).map(child => child.getAttribute("role"))
);
// THeader has 3 columnheader.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["columnheader", "columnheader", "columnheader"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup']:nth-child(2)"
).children
).map(child => child.getAttribute("role"))
);
// TBody has 5 rows.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["row", "row", "row", "row", "row"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup']:nth-child(2) > [role='row']:first-child"
).children
).map(child => child.getAttribute("role"))
);
// First row has a rowheader and 2 cells.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["rowheader", "cell", "cell"]);
})
);
});
});
});

View File

@ -872,3 +872,4 @@
!page_with_number_and_link.pdf
!Brotli-Prototype-FileA.pdf
!bug2013793.pdf
!bug2014080.pdf

BIN
test/pdfs/bug2014080.pdf Executable file

Binary file not shown.

View File

@ -61,8 +61,8 @@ const PDF_ROLE_TO_HTML_ROLE = {
TR: "row",
TH: "columnheader",
TD: "cell",
THead: "columnheader",
TBody: null,
THead: "rowgroup",
TBody: "rowgroup",
TFoot: null,
// Standard structure type Caption
Caption: null,
@ -353,7 +353,7 @@ class StructTreeLayerBuilder {
}
}
#walk(node) {
#walk(node, parentNodes = []) {
if (!node) {
return null;
}
@ -378,7 +378,14 @@ class StructTreeLayerBuilder {
element.setAttribute("role", "heading");
element.setAttribute("aria-level", match[1]);
} else if (PDF_ROLE_TO_HTML_ROLE[role]) {
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]);
element.setAttribute(
"role",
role === "TH" &&
parentNodes.at(-1)?.role === "TR" &&
parentNodes.at(-2)?.role === "TBody"
? "rowheader" // TH inside TR itself in TBody is a rowheader.
: PDF_ROLE_TO_HTML_ROLE[role]
);
}
if (role === "Figure" && this.#addImageInTextLayer(node, element)) {
return element;
@ -423,9 +430,11 @@ class StructTreeLayerBuilder {
// parent node to avoid creating an extra span.
this.#setAttributes(node.children[0], element);
} else {
parentNodes.push(node);
for (const kid of node.children) {
element.append(this.#walk(kid));
element.append(this.#walk(kid, parentNodes));
}
parentNodes.pop();
}
}
return element;