mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-06-09 03:31:01 +02:00
Merge pull request #21331 from calixteman/fix_cjk_file
Load the predefined CMap for composite fonts that omit the FontDescriptor
This commit is contained in:
commit
f82382e010
@ -4436,6 +4436,13 @@ class PartialEvaluator {
|
||||
// FontDescriptor is only required for Type3 fonts when the document
|
||||
// is a tagged pdf.
|
||||
descriptor = Dict.empty;
|
||||
} else if (composite) {
|
||||
// Some PDFs omit the FontDescriptor on the descendant CIDFont when
|
||||
// referencing one of the standard Acrobat CJK fonts via a predefined
|
||||
// CMap (e.g. /Encoding /90ms-RKSJ-H with /BaseFont /HeiseiMin-W3).
|
||||
// Fall through so the CMap is loaded by the composite-font path
|
||||
// below; otherwise multi-byte codes would be decoded byte-by-byte.
|
||||
descriptor = Dict.empty;
|
||||
} else {
|
||||
// Before PDF 1.5 if the font was one of the base 14 fonts, having a
|
||||
// FontDescriptor was not required.
|
||||
|
||||
@ -21,6 +21,10 @@ const NORMAL = {
|
||||
style: "normal",
|
||||
weight: "normal",
|
||||
};
|
||||
const MEDIUM = {
|
||||
style: "normal",
|
||||
weight: "500",
|
||||
};
|
||||
const BOLD = {
|
||||
style: "normal",
|
||||
weight: "bold",
|
||||
@ -364,6 +368,194 @@ const substitutionMap = new Map([
|
||||
alias: "\xCB\xCE\xCC\xE5",
|
||||
},
|
||||
],
|
||||
// Standard Acrobat CJK fonts. These BaseFont names appear in PDFs that
|
||||
// don't embed a CJK font and rely on the reader having Acrobat's bundled
|
||||
// CJK fonts installed.
|
||||
// Adobe-Japan1 - Mincho (serif).
|
||||
[
|
||||
"HeiseiMin-W3",
|
||||
{
|
||||
local: [
|
||||
"Hiragino Mincho ProN",
|
||||
"Hiragino Mincho Pro",
|
||||
"Yu Mincho",
|
||||
"YuMincho",
|
||||
"Source Han Serif JP",
|
||||
"Noto Serif JP",
|
||||
"Noto Serif CJK JP",
|
||||
"IPAexMincho",
|
||||
"IPAMincho",
|
||||
"Takao Mincho",
|
||||
"MS Mincho",
|
||||
"MS PMincho",
|
||||
],
|
||||
style: NORMAL,
|
||||
ultimate: "serif",
|
||||
},
|
||||
],
|
||||
// Adobe-Japan1 - Gothic (sans-serif).
|
||||
[
|
||||
"HeiseiKakuGo-W5",
|
||||
{
|
||||
local: [
|
||||
"Hiragino Kaku Gothic ProN",
|
||||
"Hiragino Kaku Gothic Pro",
|
||||
"Hiragino Sans",
|
||||
"Yu Gothic",
|
||||
"YuGothic",
|
||||
"Source Han Sans JP",
|
||||
"Noto Sans JP",
|
||||
"Noto Sans CJK JP",
|
||||
"IPAexGothic",
|
||||
"IPAGothic",
|
||||
"Takao Gothic",
|
||||
"Meiryo",
|
||||
"MS Gothic",
|
||||
"MS PGothic",
|
||||
],
|
||||
style: MEDIUM,
|
||||
ultimate: "sans-serif",
|
||||
},
|
||||
],
|
||||
// Common Adobe-Japan1 variants and Kozuka names.
|
||||
["HeiseiMin-W3-Acro", { alias: "HeiseiMin-W3" }],
|
||||
["HeiseiKakuGo-W5-Acro", { alias: "HeiseiKakuGo-W5" }],
|
||||
["KozMinPro-Regular", { alias: "HeiseiMin-W3" }],
|
||||
["KozMinProVI-Regular", { alias: "HeiseiMin-W3" }],
|
||||
["KozMinPr6N-Regular", { alias: "HeiseiMin-W3" }],
|
||||
["KozGoPro-Regular", { alias: "HeiseiKakuGo-W5" }],
|
||||
["KozGoProVI-Regular", { alias: "HeiseiKakuGo-W5" }],
|
||||
["KozGoPr6N-Regular", { alias: "HeiseiKakuGo-W5" }],
|
||||
|
||||
// Adobe-GB1 - Song (Simplified Chinese serif).
|
||||
[
|
||||
"STSong-Light",
|
||||
{
|
||||
local: [
|
||||
"STSong",
|
||||
"Songti SC",
|
||||
"Source Han Serif SC",
|
||||
"Source Han Serif CN",
|
||||
"Noto Serif SC",
|
||||
"Noto Serif CJK SC",
|
||||
"AR PL UMing CN",
|
||||
"SimSun",
|
||||
"NSimSun",
|
||||
],
|
||||
style: NORMAL,
|
||||
ultimate: "serif",
|
||||
},
|
||||
],
|
||||
// Adobe-GB1 - Hei (Simplified Chinese sans-serif).
|
||||
[
|
||||
"STHeiti-Regular",
|
||||
{
|
||||
local: [
|
||||
"STHeiti",
|
||||
"Heiti SC",
|
||||
"PingFang SC",
|
||||
"Source Han Sans SC",
|
||||
"Source Han Sans CN",
|
||||
"Noto Sans SC",
|
||||
"Noto Sans CJK SC",
|
||||
"Microsoft YaHei",
|
||||
"SimHei",
|
||||
"WenQuanYi Zen Hei",
|
||||
],
|
||||
style: NORMAL,
|
||||
ultimate: "sans-serif",
|
||||
},
|
||||
],
|
||||
["STSongStd-Light", { alias: "STSong-Light" }],
|
||||
["AdobeSongStd-Light", { alias: "STSong-Light" }],
|
||||
["AdobeHeitiStd-Regular", { alias: "STHeiti-Regular" }],
|
||||
// KaiTi (regular script) and FangSong (imitation Song) are different
|
||||
// typographic styles; route to the existing GB2312-keyed entries above.
|
||||
["AdobeKaitiStd-Regular", { alias: "\xBF\xAC\xCC\xE5" }],
|
||||
["AdobeFangsongStd-Regular", { alias: "\xB7\xC2\xCB\xCE" }],
|
||||
|
||||
// Adobe-CNS1 - Sung (Traditional Chinese serif).
|
||||
[
|
||||
"MSung-Light",
|
||||
{
|
||||
local: [
|
||||
"Songti TC",
|
||||
"LiSong Pro",
|
||||
"Source Han Serif TC",
|
||||
"Source Han Serif TW",
|
||||
"Noto Serif TC",
|
||||
"Noto Serif CJK TC",
|
||||
"AR PL UMing TW",
|
||||
"PMingLiU",
|
||||
"MingLiU",
|
||||
"MingLiU_HKSCS",
|
||||
],
|
||||
style: NORMAL,
|
||||
ultimate: "serif",
|
||||
},
|
||||
],
|
||||
// Adobe-CNS1 - Hei (Traditional Chinese sans-serif).
|
||||
[
|
||||
"MHei-Medium",
|
||||
{
|
||||
local: [
|
||||
"Heiti TC",
|
||||
"STHeiti",
|
||||
"Source Han Sans TC",
|
||||
"Source Han Sans TW",
|
||||
"Noto Sans TC",
|
||||
"Noto Sans CJK TC",
|
||||
"PingFang TC",
|
||||
"Microsoft JhengHei",
|
||||
],
|
||||
style: MEDIUM,
|
||||
ultimate: "sans-serif",
|
||||
},
|
||||
],
|
||||
["MSungStd-Light", { alias: "MSung-Light" }],
|
||||
["AdobeMingStd-Light", { alias: "MSung-Light" }],
|
||||
|
||||
// Adobe-Korea1 - Myeongjo (Korean serif).
|
||||
[
|
||||
"HYSMyeongJo-Medium",
|
||||
{
|
||||
local: [
|
||||
"AppleMyungjo",
|
||||
"Source Han Serif KR",
|
||||
"Noto Serif KR",
|
||||
"Noto Serif CJK KR",
|
||||
"Nanum Myeongjo",
|
||||
"Batang",
|
||||
],
|
||||
style: MEDIUM,
|
||||
ultimate: "serif",
|
||||
},
|
||||
],
|
||||
// Adobe-Korea1 - Gothic (Korean sans-serif).
|
||||
[
|
||||
"HYGoThic-Medium",
|
||||
{
|
||||
local: [
|
||||
"Apple SD Gothic Neo",
|
||||
"AppleGothic",
|
||||
"Source Han Sans KR",
|
||||
"Noto Sans KR",
|
||||
"Noto Sans CJK KR",
|
||||
"Nanum Gothic",
|
||||
"Malgun Gothic",
|
||||
"Dotum",
|
||||
"Gulim",
|
||||
],
|
||||
style: MEDIUM,
|
||||
ultimate: "sans-serif",
|
||||
},
|
||||
],
|
||||
["HYSMyeongJoStd-Medium", { alias: "HYSMyeongJo-Medium" }],
|
||||
["AdobeMyungjoStd-Medium", { alias: "HYSMyeongJo-Medium" }],
|
||||
// Bold variants reuse the same fallback list with a bold style override
|
||||
// so the @font-face declaration requests a bold local() match.
|
||||
["HYGoThic-Bold", { alias: "HYGoThic-Medium", style: BOLD }],
|
||||
["AdobeGothicStd-Bold", { alias: "HYGoThic-Medium", style: BOLD }],
|
||||
]);
|
||||
|
||||
const fontAliases = new Map([["Arial-Black", "ArialBlack"]]);
|
||||
|
||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -924,3 +924,4 @@
|
||||
!Embedded_font.pdf
|
||||
!issue18548_reduced.pdf
|
||||
!issue_cff_unsigned_bbox.pdf
|
||||
!90ms_rksj_h_sample.pdf
|
||||
|
||||
67
test/pdfs/90ms_rksj_h_sample.pdf
Normal file
67
test/pdfs/90ms_rksj_h_sample.pdf
Normal file
@ -0,0 +1,67 @@
|
||||
%PDF-1.4
|
||||
%âãÏÓ
|
||||
|
||||
1 0 obj
|
||||
<< /Type /Catalog /Pages 2 0 R >>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792]
|
||||
/Contents 4 0 R
|
||||
/Resources << /Font << /F0 5 0 R /F1 7 0 R >> >> >>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<< /Length 92 >>
|
||||
stream
|
||||
BT
|
||||
/F0 14 Tf
|
||||
72 720 Td
|
||||
(Hello ASCII) Tj
|
||||
0 -28 Td
|
||||
/F1 14 Tf
|
||||
<93FA967B8CEA836583588367> Tj
|
||||
ET
|
||||
|
||||
endstream
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
<< /Type /Font /Subtype /Type1
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding >>
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<< /Type /Font /Subtype /CIDFontType2
|
||||
/BaseFont /HeiseiMin-W3
|
||||
/CIDSystemInfo << /Registry (Adobe) /Ordering (Japan1) /Supplement 2 >>
|
||||
/DW 1000 >>
|
||||
endobj
|
||||
|
||||
7 0 obj
|
||||
<< /Type /Font /Subtype /Type0
|
||||
/BaseFont /HeiseiMin-W3
|
||||
/Encoding /90ms-RKSJ-H
|
||||
/DescendantFonts [6 0 R] >>
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000016 00000 n
|
||||
0000000066 00000 n
|
||||
0000000124 00000 n
|
||||
0000000267 00000 n
|
||||
0000000410 00000 n
|
||||
0000000514 00000 n
|
||||
0000000685 00000 n
|
||||
trailer
|
||||
<< /Size 8 /Root 1 0 R >>
|
||||
startxref
|
||||
816
|
||||
%%EOF
|
||||
@ -4009,6 +4009,25 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content from a Type0 composite font with no FontDescriptor, using a predefined CMap", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("90ms_rksj_h_sample.pdf", {
|
||||
cMapUrl: CMAP_URL,
|
||||
useWorkerFetch: false,
|
||||
})
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent({
|
||||
disableNormalization: true,
|
||||
});
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(text).toEqual("Hello ASCII\n日本語テスト");
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content with a rised text", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue16221.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
|
||||
@ -546,4 +546,96 @@ describe("getFontSubstitution", function () {
|
||||
/^"ArialBlack",g_d(\d+)_sf(\d+),sans-serif$/
|
||||
);
|
||||
});
|
||||
|
||||
it("should substitute HeiseiMin-W3", () => {
|
||||
const fontName = "HeiseiMin-W3";
|
||||
const fontSubstitution = getFontSubstitution(
|
||||
new Map(),
|
||||
idFactory,
|
||||
localFontPath,
|
||||
fontName,
|
||||
undefined,
|
||||
"CIDFontType2"
|
||||
);
|
||||
expect(fontSubstitution).toEqual(
|
||||
jasmine.objectContaining({
|
||||
guessFallback: false,
|
||||
baseFontName: "HeiseiMin-W3",
|
||||
src:
|
||||
"local(Hiragino Mincho ProN),local(Hiragino Mincho Pro)," +
|
||||
"local(Yu Mincho),local(YuMincho),local(Source Han Serif JP)," +
|
||||
"local(Noto Serif JP),local(Noto Serif CJK JP)," +
|
||||
"local(IPAexMincho),local(IPAMincho),local(Takao Mincho)," +
|
||||
"local(MS Mincho),local(MS PMincho)",
|
||||
style: {
|
||||
style: "normal",
|
||||
weight: "normal",
|
||||
},
|
||||
})
|
||||
);
|
||||
expect(fontSubstitution.css).toMatch(
|
||||
/^"HeiseiMin W3",g_d(\d+)_sf(\d+),serif$/
|
||||
);
|
||||
});
|
||||
|
||||
it("should substitute a Kozuka Mincho alias", () => {
|
||||
const fontName = "KozMinPr6N-Regular";
|
||||
const fontSubstitution = getFontSubstitution(
|
||||
new Map(),
|
||||
idFactory,
|
||||
localFontPath,
|
||||
fontName,
|
||||
undefined,
|
||||
"CIDFontType0"
|
||||
);
|
||||
expect(fontSubstitution).toEqual(
|
||||
jasmine.objectContaining({
|
||||
guessFallback: false,
|
||||
baseFontName: "KozMinPr6N-Regular",
|
||||
src:
|
||||
"local(Hiragino Mincho ProN),local(Hiragino Mincho Pro)," +
|
||||
"local(Yu Mincho),local(YuMincho),local(Source Han Serif JP)," +
|
||||
"local(Noto Serif JP),local(Noto Serif CJK JP)," +
|
||||
"local(IPAexMincho),local(IPAMincho),local(Takao Mincho)," +
|
||||
"local(MS Mincho),local(MS PMincho)",
|
||||
style: {
|
||||
style: "normal",
|
||||
weight: "normal",
|
||||
},
|
||||
})
|
||||
);
|
||||
expect(fontSubstitution.css).toMatch(
|
||||
/^"KozMinPr6N",g_d(\d+)_sf(\d+),serif$/
|
||||
);
|
||||
});
|
||||
|
||||
it("should substitute HYGoThic-Medium", () => {
|
||||
const fontName = "HYGoThic-Medium";
|
||||
const fontSubstitution = getFontSubstitution(
|
||||
new Map(),
|
||||
idFactory,
|
||||
localFontPath,
|
||||
fontName,
|
||||
undefined,
|
||||
"CIDFontType2"
|
||||
);
|
||||
expect(fontSubstitution).toEqual(
|
||||
jasmine.objectContaining({
|
||||
guessFallback: false,
|
||||
baseFontName: "HYGoThic-Medium",
|
||||
src:
|
||||
"local(Apple SD Gothic Neo),local(AppleGothic)," +
|
||||
"local(Source Han Sans KR),local(Noto Sans KR)," +
|
||||
"local(Noto Sans CJK KR),local(Nanum Gothic)," +
|
||||
"local(Malgun Gothic),local(Dotum),local(Gulim)",
|
||||
style: {
|
||||
style: "normal",
|
||||
weight: "500",
|
||||
},
|
||||
})
|
||||
);
|
||||
expect(fontSubstitution.css).toMatch(
|
||||
/^"HYGoThic",g_d(\d+)_sf(\d+),sans-serif$/
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user