Merge pull request #20623 from calixteman/bug2014080

In tagged pdfs, TH can be either a column header or a row header (bug 2014080)
This commit is contained in:
calixteman 2026-02-06 16:42:22 +01:00 committed by GitHub
commit c00591c1b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 82 additions and 5 deletions

View File

@ -544,4 +544,71 @@ describe("accessibility", () => {
); );
}); });
}); });
describe("A TH in a TR itself in a TBody is rowheader", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait("bug2014080.pdf", ".textLayer");
});
afterEach(async () => {
await closePages(pages);
});
it("must check that the table has the right structure", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
let elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(".structTree [role='table']").children
).map(child => child.getAttribute("role"))
);
// THeader and TBody must be rowgroup.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["rowgroup", "rowgroup"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup'] > [role='row']"
).children
).map(child => child.getAttribute("role"))
);
// THeader has 3 columnheader.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["columnheader", "columnheader", "columnheader"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup']:nth-child(2)"
).children
).map(child => child.getAttribute("role"))
);
// TBody has 5 rows.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["row", "row", "row", "row", "row"]);
elementRole = await page.evaluate(() =>
Array.from(
document.querySelector(
".structTree [role='table'] > [role='rowgroup']:nth-child(2) > [role='row']:first-child"
).children
).map(child => child.getAttribute("role"))
);
// First row has a rowheader and 2 cells.
expect(elementRole)
.withContext(`In ${browserName}`)
.toEqual(["rowheader", "cell", "cell"]);
})
);
});
});
}); });

View File

@ -872,3 +872,4 @@
!page_with_number_and_link.pdf !page_with_number_and_link.pdf
!Brotli-Prototype-FileA.pdf !Brotli-Prototype-FileA.pdf
!bug2013793.pdf !bug2013793.pdf
!bug2014080.pdf

BIN
test/pdfs/bug2014080.pdf Executable file

Binary file not shown.

View File

@ -61,8 +61,8 @@ const PDF_ROLE_TO_HTML_ROLE = {
TR: "row", TR: "row",
TH: "columnheader", TH: "columnheader",
TD: "cell", TD: "cell",
THead: "columnheader", THead: "rowgroup",
TBody: null, TBody: "rowgroup",
TFoot: null, TFoot: null,
// Standard structure type Caption // Standard structure type Caption
Caption: null, Caption: null,
@ -353,7 +353,7 @@ class StructTreeLayerBuilder {
} }
} }
#walk(node) { #walk(node, parentNodes = []) {
if (!node) { if (!node) {
return null; return null;
} }
@ -378,7 +378,14 @@ class StructTreeLayerBuilder {
element.setAttribute("role", "heading"); element.setAttribute("role", "heading");
element.setAttribute("aria-level", match[1]); element.setAttribute("aria-level", match[1]);
} else if (PDF_ROLE_TO_HTML_ROLE[role]) { } else if (PDF_ROLE_TO_HTML_ROLE[role]) {
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]); element.setAttribute(
"role",
role === "TH" &&
parentNodes.at(-1)?.role === "TR" &&
parentNodes.at(-2)?.role === "TBody"
? "rowheader" // TH inside TR itself in TBody is a rowheader.
: PDF_ROLE_TO_HTML_ROLE[role]
);
} }
if (role === "Figure" && this.#addImageInTextLayer(node, element)) { if (role === "Figure" && this.#addImageInTextLayer(node, element)) {
return element; return element;
@ -423,9 +430,11 @@ class StructTreeLayerBuilder {
// parent node to avoid creating an extra span. // parent node to avoid creating an extra span.
this.#setAttributes(node.children[0], element); this.#setAttributes(node.children[0], element);
} else { } else {
parentNodes.push(node);
for (const kid of node.children) { for (const kid of node.children) {
element.append(this.#walk(kid)); element.append(this.#walk(kid, parentNodes));
} }
parentNodes.pop();
} }
} }
return element; return element;