initial-commit
This commit is contained in:
commit
30a0f1beb7
14
.gitignore
vendored
Normal file
14
.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
.DS_Store
|
||||
node_modules
|
||||
/build
|
||||
/.svelte-kit
|
||||
/package
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
vite.config.js.timestamp-*
|
||||
vite.config.ts.timestamp-*
|
||||
.idea
|
||||
.vscode
|
||||
**/target/**
|
||||
dist
|
||||
43
package.json
Normal file
43
package.json
Normal file
@ -0,0 +1,43 @@
|
||||
{
|
||||
"name": "pdf-forge",
|
||||
"version": "0.1.0",
|
||||
"description": "",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview",
|
||||
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
||||
"tauri": "tauri"
|
||||
},
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@geoffcox/svelte-splitter": "^1.0.1",
|
||||
"@tauri-apps/api": "^2",
|
||||
"@tauri-apps/plugin-dialog": "~2",
|
||||
"@tauri-apps/plugin-fs": "~2",
|
||||
"@tauri-apps/plugin-opener": "^2",
|
||||
"flowbite-svelte": "^0.47.4",
|
||||
"flowbite-svelte-icons": "^2.0.2",
|
||||
"paths": "^0.1.1",
|
||||
"svelte-split-pane": "^0.1.2",
|
||||
"svelte-splitpanes": "^8.0.9"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/adapter-static": "^3.0.6",
|
||||
"@sveltejs/kit": "^2.9.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.0",
|
||||
"@tailwindcss/typography": "^0.5.14",
|
||||
"@tauri-apps/cli": "^2",
|
||||
"autoprefixer": "^10.4.20",
|
||||
"postcss": "^8.5.1",
|
||||
"sass-embedded": "^1.83.4",
|
||||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"svelte-preprocess": "^6.0.3",
|
||||
"tailwindcss": "^3.4.17",
|
||||
"typescript": "~5.6.2",
|
||||
"vite": "^6.0.3"
|
||||
}
|
||||
}
|
||||
6
postcss.config.js
Normal file
6
postcss.config.js
Normal file
@ -0,0 +1,6 @@
|
||||
export default {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {}
|
||||
}
|
||||
};
|
||||
1731
src-pdfrs/Cargo.lock
generated
Normal file
1731
src-pdfrs/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
6
src-pdfrs/Cargo.toml
Normal file
6
src-pdfrs/Cargo.toml
Normal file
@ -0,0 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"pdf",
|
||||
"pdf_derive",
|
||||
"examples",
|
||||
]
|
||||
7
src-pdfrs/LICENSE
Normal file
7
src-pdfrs/LICENSE
Normal file
@ -0,0 +1,7 @@
|
||||
Copyright © 2020 The pdf-rs contributers.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
25
src-pdfrs/README.md
Normal file
25
src-pdfrs/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
# pdf-rs [](https://github.com/pdf-rs/pdf/actions/workflows/test.yml)
|
||||
Read, alter and write PDF files.
|
||||
|
||||
Modifying and writing PDFs is still experimental.
|
||||
|
||||
One easy way you can contribute is to add different PDF files to `tests/files` and see if they pass the tests (`cargo test`).
|
||||
|
||||
Feel free to contribute with ideas, issues or code! Please join [us on Zulip](https://type.zulipchat.com/#narrow/stream/209232-pdf) if you have any questions or problems.
|
||||
|
||||
# Workspace
|
||||
This repository uses a Cargo Workspace and default members. This means by default only the `pdf` library is build.
|
||||
To build additional parts, pass `--package=read` to build the subcrate you are interested in (here the `read` example).
|
||||
|
||||
# Examples
|
||||
Examples are located in `pdf/examples/` and can be executed using:
|
||||
|
||||
```
|
||||
cargo run --example {content,metadata,names,read,text} -- <files/{choose a pdf}>
|
||||
```
|
||||
|
||||
# Renderer and Viewer
|
||||
A library for rendering PDFs via [Pathfinder](https://github.com/servo/pathfinder) and minimal viewer can be found [here](https://github.com/pdf-rs/pdf_render).
|
||||
|
||||
# Inspect
|
||||
There is a tool for visualizing a PDF file as an interactive hierarchy of primitives at [inspect-prim](https://github.com/pdf-rs/inspect-prim). Just clone and `cargo run`.
|
||||
16
src-pdfrs/examples/Cargo.toml
Normal file
16
src-pdfrs/examples/Cargo.toml
Normal file
@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "pdf-examples"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
pdf = { path = "../pdf" }
|
||||
datasize = "0.2.13"
|
||||
clap = { version = "*", features = ["derive"] }
|
||||
image = "*"
|
||||
|
||||
[[bin]]
|
||||
name = "extract_page"
|
||||
125
src-pdfrs/examples/src/bin/add_image.rs
Normal file
125
src-pdfrs/examples/src/bin/add_image.rs
Normal file
@ -0,0 +1,125 @@
|
||||
use std::{path::PathBuf, error::Error};
|
||||
|
||||
use pdf::{
|
||||
file::FileOptions,
|
||||
object::*,
|
||||
primitive::Name, enc::{StreamFilter, DCTDecodeParams}, content::{Op, Matrix, Content},
|
||||
};
|
||||
|
||||
use clap::Parser;
|
||||
use std::io::Cursor;
|
||||
use image::io::Reader as ImageReader;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Input PDF file
|
||||
#[arg(short, long)]
|
||||
input: PathBuf,
|
||||
|
||||
/// Input image file
|
||||
#[arg(long)]
|
||||
image: PathBuf,
|
||||
|
||||
/// Page number to add the image to
|
||||
#[arg(short, long, default_value_t = 0)]
|
||||
page: u32,
|
||||
|
||||
/// Output file
|
||||
#[arg(short, long)]
|
||||
output: PathBuf,
|
||||
}
|
||||
|
||||
struct Point {
|
||||
x: f32,
|
||||
y: f32
|
||||
}
|
||||
struct Align {
|
||||
page_rel: f32,
|
||||
page_abs: f32,
|
||||
img_rel: f32,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let args = Args::parse();
|
||||
|
||||
let img_data = std::fs::read(&args.image)?;
|
||||
let img = ImageReader::with_format(Cursor::new(&img_data), image::ImageFormat::Jpeg).decode()?;
|
||||
let image_dict = ImageDict {
|
||||
width: img.width(),
|
||||
height: img.height(),
|
||||
color_space: Some(ColorSpace::DeviceRGB),
|
||||
bits_per_component: Some(8),
|
||||
.. Default::default()
|
||||
};
|
||||
let image = Stream::new_with_filters(image_dict, img_data, vec![StreamFilter::DCTDecode(DCTDecodeParams { color_transform: None})]);
|
||||
|
||||
let mut file = FileOptions::cached().open(&args.input).unwrap();
|
||||
let page = file.get_page(args.page).expect("no such page");
|
||||
|
||||
let resources = page.resources()?;
|
||||
let mut resources2: Resources = (**resources).clone();
|
||||
|
||||
let image_obj = XObject::Image(ImageXObject { inner: image });
|
||||
let image_ref = file.create(image_obj)?;
|
||||
|
||||
// assume that name did not exist
|
||||
let image_name = Name::from("MyImage");
|
||||
resources2.xobjects.insert(image_name.clone(), image_ref.get_ref());
|
||||
|
||||
|
||||
let mut ops = page.contents.as_ref().unwrap().operations(&file.resolver())?;
|
||||
|
||||
let mm = 72.0 / 25.4; // one millimeter
|
||||
// bottom right corner of the page, but 5mm margin
|
||||
let h_align = Align {
|
||||
img_rel: -1.0, // move left by image width
|
||||
page_rel: 1.0, // move right by page width
|
||||
page_abs: -5.0 * mm, // 5,mm from the right edge
|
||||
};
|
||||
let v_align = Align {
|
||||
img_rel: 0.0,
|
||||
page_rel: 0.0,
|
||||
page_abs: 5.0 * mm
|
||||
};
|
||||
let dpi = 300.;
|
||||
|
||||
let px_scale = 72. / dpi;
|
||||
let media_box = page.media_box.unwrap();
|
||||
let scale = Point { x: img.width() as f32 * px_scale , y: img.height() as f32 * px_scale };
|
||||
let skew = Point { x: 0.0, y: 0.0 };
|
||||
let page_size = Point {
|
||||
x: media_box.right - media_box.left,
|
||||
y: media_box.top - media_box.bottom
|
||||
};
|
||||
let page_origin = Point {
|
||||
x: media_box.left,
|
||||
y: media_box.bottom
|
||||
};
|
||||
|
||||
let position = Point {
|
||||
x: page_origin.x + h_align.page_abs + h_align.img_rel * scale.x + h_align.page_rel * page_size.x,
|
||||
y: page_origin.y + v_align.page_abs + v_align.img_rel * scale.y + v_align.page_rel * page_size.y
|
||||
};
|
||||
|
||||
ops.append(&mut vec![
|
||||
Op::Save, // ADD IMAGE START
|
||||
Op::Transform { matrix: Matrix{ // IMAGE MANIPULATION
|
||||
a: scale.x, d: scale.y,
|
||||
b: skew.x, c: skew.y,
|
||||
e: position.x, f: position.y,
|
||||
} },
|
||||
Op::XObject {name: image_name}, // IMAGE
|
||||
Op::Restore, // ADD IMAGE STOP
|
||||
]);
|
||||
|
||||
let mut page2: Page = (*page).clone();
|
||||
page2.contents = Some(Content::from_ops(ops));
|
||||
page2.resources = Some(file.create(resources2)?.into());
|
||||
|
||||
file.update(page.get_ref().get_inner(), page2)?;
|
||||
|
||||
file.save_to(&args.output)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
84
src-pdfrs/examples/src/bin/extract_page.rs
Normal file
84
src-pdfrs/examples/src/bin/extract_page.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use pdf::{
|
||||
error::PdfError,
|
||||
file::FileOptions,
|
||||
object::*,
|
||||
build::*,
|
||||
primitive::{PdfString, Name}, content::{Op, Color, Cmyk, Matrix}, font::{Font, TFont, FontData},
|
||||
};
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Input file
|
||||
#[arg(short, long)]
|
||||
input: PathBuf,
|
||||
|
||||
/// Page number
|
||||
#[arg(short, long, default_value_t = 0)]
|
||||
page: u32,
|
||||
|
||||
/// Output file
|
||||
#[arg(short, long)]
|
||||
output: PathBuf,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), PdfError> {
|
||||
let args = Args::parse();
|
||||
|
||||
let old_file = FileOptions::cached().open(&args.input).unwrap();
|
||||
let old_page = old_file.get_page(args.page).expect("no such page");
|
||||
|
||||
let mut builder = PdfBuilder::new(FileOptions::cached());
|
||||
|
||||
let mut importer = Importer::new(old_file.resolver(), &mut builder.storage);
|
||||
let mut pages = Vec::new();
|
||||
|
||||
let mut new_page = PageBuilder::clone_page(&old_page, &mut importer)?;
|
||||
importer.finish().verify(&builder.storage.resolver())?;
|
||||
|
||||
let font = Font {
|
||||
data: FontData::TrueType(TFont{
|
||||
base_font: Some(Name::from("Helvetica")),
|
||||
first_char: None,
|
||||
font_descriptor: None,
|
||||
last_char: None,
|
||||
widths: None,
|
||||
}),
|
||||
encoding: Some(pdf::encoding::Encoding::standard()),
|
||||
name: None,
|
||||
subtype: pdf::font::FontType::TrueType,
|
||||
to_unicode: None,
|
||||
_other: Default::default()
|
||||
};
|
||||
let font_name = Name::from("F42");
|
||||
new_page.resources.fonts.insert(font_name.clone(), builder.storage.create(font)?.into());
|
||||
|
||||
new_page.ops.push(Op::BeginText);
|
||||
let label = format!("{} page {}", args.input.file_name().unwrap().to_string_lossy(), args.page).into_bytes();
|
||||
let mut text_ops = vec![
|
||||
Op::FillColor { color: Color::Cmyk(Cmyk { cyan: 0.0, magenta: 0.0, key: 1.0, yellow: 0.0})},
|
||||
Op::BeginText,
|
||||
Op::SetTextMatrix { matrix: Matrix { a: 1.0, b: 0.0, c: 0.0, d: 1., e: 10., f: 10. }},
|
||||
Op::TextFont { name: font_name.clone(), size: 20. },
|
||||
Op::TextDraw { text: PdfString::new(label.into()) },
|
||||
Op::EndText
|
||||
];
|
||||
new_page.ops.append(&mut text_ops);
|
||||
|
||||
pages.push(new_page);
|
||||
|
||||
let catalog = CatalogBuilder::from_pages(pages);
|
||||
|
||||
let mut info = InfoDict::default();
|
||||
info.title = Some(PdfString::from("test"));
|
||||
|
||||
let data = builder.info(info).build(catalog)?;
|
||||
|
||||
std::fs::write(&args.output, data)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
125
src-pdfrs/examples/src/bin/form.rs
Normal file
125
src-pdfrs/examples/src/bin/form.rs
Normal file
@ -0,0 +1,125 @@
|
||||
extern crate pdf;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env::args;
|
||||
|
||||
use pdf::content::{FormXObject, Op, serialize_ops};
|
||||
use pdf::error::PdfError;
|
||||
use pdf::file::{FileOptions, Log};
|
||||
use pdf::font::{Font, FontData, TFont};
|
||||
use pdf::object::*;
|
||||
use pdf::primitive::{PdfString, Primitive, Name};
|
||||
|
||||
fn run() -> Result<(), PdfError> {
|
||||
let path = args().nth(1).expect("no file given");
|
||||
println!("read: {}", path);
|
||||
|
||||
let mut file = FileOptions::cached().open(&path)?;
|
||||
let mut to_update_field: Option<_> = None;
|
||||
|
||||
|
||||
let font = Font {
|
||||
data: FontData::TrueType(TFont{
|
||||
base_font: Some(Name::from("Helvetica")),
|
||||
first_char: None,
|
||||
font_descriptor: None,
|
||||
last_char: None,
|
||||
widths: None,
|
||||
}),
|
||||
encoding: Some(pdf::encoding::Encoding::standard()),
|
||||
name: None,
|
||||
subtype: pdf::font::FontType::TrueType,
|
||||
to_unicode: None,
|
||||
_other: Default::default()
|
||||
};
|
||||
let font_name = Name::from("Helvetica");
|
||||
let font = file.create(font)?;
|
||||
let mut fonts = HashMap::new();
|
||||
fonts.insert("Helvetica".into(), font.into());
|
||||
let resources = Resources {
|
||||
fonts,
|
||||
.. Default::default()
|
||||
};
|
||||
let resources = file.create(resources)?;
|
||||
|
||||
let page0 = file.get_page(0).unwrap();
|
||||
let annots = page0.annotations.load(&file.resolver()).expect("can't load annotations");
|
||||
for annot in &*annots {
|
||||
if let Some(ref a) = annot.appearance_streams {
|
||||
let normal = file.resolver().get(a.normal);
|
||||
if let Ok(normal) = normal {
|
||||
match *normal {
|
||||
AppearanceStreamEntry::Single(ref s) => {
|
||||
//dbg!(&s.stream.resources);
|
||||
|
||||
let form_dict = FormDict {
|
||||
resources: Some(resources.clone().into()),
|
||||
.. (**s.stream).clone()
|
||||
};
|
||||
|
||||
let ops = vec![
|
||||
Op::Save,
|
||||
Op::TextFont { name: font_name.clone(), size: 14.0 },
|
||||
Op::TextDraw { text: PdfString::from("Hello World!") },
|
||||
Op::EndText,
|
||||
Op::Restore
|
||||
];
|
||||
let stream = Stream::new(form_dict, serialize_ops(&ops)?);
|
||||
|
||||
let normal2 = AppearanceStreamEntry::Single(FormXObject { stream });
|
||||
|
||||
file.update(a.normal.get_inner(), normal2)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref forms) = file.get_root().forms {
|
||||
println!("Forms:");
|
||||
for field in forms.fields.iter().take(1) {
|
||||
print!(" {:?} = ", field.name);
|
||||
match field.value {
|
||||
Primitive::String(ref s) => println!("{}", s.to_string_lossy()),
|
||||
Primitive::Integer(i) => println!("{}", i),
|
||||
Primitive::Name(ref s) => println!("{}", s),
|
||||
ref p => println!("{:?}", p),
|
||||
}
|
||||
|
||||
if to_update_field.is_none() {
|
||||
to_update_field = Some(field.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(to_update_field) = to_update_field {
|
||||
println!("\nUpdating field:");
|
||||
println!("{:?}\n", to_update_field);
|
||||
|
||||
let text = "Hello World!";
|
||||
let new_value: PdfString = PdfString::new(text.into());
|
||||
let mut updated_field = (*to_update_field).clone();
|
||||
updated_field.value = Primitive::String(new_value);
|
||||
|
||||
//dbg!(&updated_field);
|
||||
|
||||
let reference = file.update(
|
||||
to_update_field.get_ref().get_inner(),
|
||||
updated_field,
|
||||
)?;
|
||||
|
||||
file.save_to("output/out.pdf")?;
|
||||
|
||||
println!("\nUpdated field:");
|
||||
//println!("{:?}\n", reference);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
if let Err(e) = run() {
|
||||
println!("{e}");
|
||||
}
|
||||
}
|
||||
46
src-pdfrs/files/encrypted_aes_128.pdf
Normal file
46
src-pdfrs/files/encrypted_aes_128.pdf
Normal file
@ -0,0 +1,46 @@
|
||||
%PDF-1.5
|
||||
1 0 obj
|
||||
<</Type /Catalog/Pages 2 0 R>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<</Count 1/Kids [ 6 0 R ]/Type /Pages>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<</Length 160>>stream
|
||||
DM<EFBFBD>àœeXÐãpJ©DO9‹»ùÆI ç[³r(çûÏ×l/ÉÚp„¬
|
||||
Ù`½`r룆 +j Ö<C2A0>W$TL˜cºq`×Ö…¹&<26>͵
|
||||
8åO$ÓJ5…[âY
)BwW[“?Äâ©°eOÎ\™,5|ÏßNÉWexÈp'}ä3p$l°ã†(÷KMÊ^N^§DÏêaŸÅâÔ<¶B W
|
||||
endstream
|
||||
endobj
|
||||
4 0 obj
|
||||
132
|
||||
endobj
|
||||
5 0 obj
|
||||
<</Type /Font/Subtype /Type1/BaseFont /Times-Roman/Encoding /WinAnsiEncoding>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<</Type /Page/Parent 2 0 R/Resources <</Font <</F0 5 0 R>>>>/MediaBox [ 0 0 180 240 ]/Contents 3 0 R>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<</Filter /Standard/V 4/R 4/Length 128/P -1/EncryptMetadata true/CF <</StdCF <</AuthEvent /DocOpen/CFM /AESV2/Length 16>>>>/StrF /StdCF/StmF /StdCF/O (6EÓ<>u;|’,\(æfZ¤ó5?°4‹Sh“ã±Û\\W›)/U (ç3ÁýŽ ]–ù…i9µÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ)>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000010 00000 n
|
||||
0000000059 00000 n
|
||||
0000000117 00000 n
|
||||
0000000330 00000 n
|
||||
0000000352 00000 n
|
||||
0000000449 00000 n
|
||||
0000000570 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 1 0 R
|
||||
/ID [ <E6BD677BF08513BD60C4834FE38C16C2> <E6BD677BF08513BD60C4834FE38C16C2> ]
|
||||
/Encrypt 7 0 R
|
||||
>>
|
||||
startxref
|
||||
814
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/encrypted_aes_256.pdf
Normal file
BIN
src-pdfrs/files/encrypted_aes_256.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/encrypted_aes_256_hardened.pdf
Normal file
BIN
src-pdfrs/files/encrypted_aes_256_hardened.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/encrypted_rc4_rev2.pdf
Normal file
BIN
src-pdfrs/files/encrypted_rc4_rev2.pdf
Normal file
Binary file not shown.
44
src-pdfrs/files/encrypted_rc4_rev3.pdf
Normal file
44
src-pdfrs/files/encrypted_rc4_rev3.pdf
Normal file
@ -0,0 +1,44 @@
|
||||
%PDF-1.4
|
||||
1 0 obj
|
||||
<</Type /Catalog/Pages 2 0 R>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<</Count 1/Kids [ 6 0 R ]/Type /Pages>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<</Length 131>>stream
|
||||
þ*BÏ,Ø)뢽Ÿñ0?éêH1¡.Ž%R,m–™=Ü«é{×xXöDˆ¤ÜÖ^¼‹Ç’ ÄýÊ‘î¥V^̃v))Þ¨S«·Ýß,x¸ÇZnT³„jÀÝ ‚ªp[еŽ~`·nÛTGÕVS“®ÿ£ÓnKŠW•K
|
||||
endstream
|
||||
endobj
|
||||
4 0 obj
|
||||
132
|
||||
endobj
|
||||
5 0 obj
|
||||
<</Type /Font/Subtype /Type1/BaseFont /Times-Roman/Encoding /WinAnsiEncoding>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<</Type /Page/Parent 2 0 R/Resources <</Font <</F0 5 0 R>>>>/MediaBox [ 0 0 180 240 ]/Contents 3 0 R>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<</Filter /Standard/V 2/R 3/Length 64/P -1/O (þâÆ6j¤ªR‹ØúñB‰:â´.å™ü®NeÌ6ì¢þ)/U (‘<>ÏíA&9¢…ÃÌnsŠRÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ)>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000010 00000 n
|
||||
0000000059 00000 n
|
||||
0000000117 00000 n
|
||||
0000000301 00000 n
|
||||
0000000323 00000 n
|
||||
0000000420 00000 n
|
||||
0000000541 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 1 0 R
|
||||
/ID [ <B41EF7ADFC7D6C55D5BC0442D166A888> <B41EF7ADFC7D6C55D5BC0442D166A888> ]
|
||||
/Encrypt 7 0 R
|
||||
>>
|
||||
startxref
|
||||
678
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/ep.pdf
Normal file
BIN
src-pdfrs/files/ep.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/ep2.pdf
Normal file
BIN
src-pdfrs/files/ep2.pdf
Normal file
Binary file not shown.
57
src-pdfrs/files/example.pdf
Normal file
57
src-pdfrs/files/example.pdf
Normal file
@ -0,0 +1,57 @@
|
||||
%PDF-1.7
|
||||
%µí®û
|
||||
3 0 obj
|
||||
<< /Length 4 0 R >>
|
||||
stream
|
||||
/DeviceRGB cs /DeviceRGB CS
|
||||
0 0 0.972549 SC
|
||||
21.68 194 136.64 26 re
|
||||
10 10 m 20 20 l S
|
||||
BT
|
||||
/F0 24 Tf
|
||||
25.68 200 Td
|
||||
(Hello World!) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
4 0 obj
|
||||
132
|
||||
endobj
|
||||
5 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Times-Roman /Encoding /WinAnsiEncoding >>
|
||||
endobj
|
||||
6 0 obj
|
||||
<< /Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources << /Font << /F0 5 0 R >> >>
|
||||
/MediaBox [ 0 0 180 240 ]
|
||||
/Contents 3 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Type /Pages
|
||||
/Count 1
|
||||
/Kids [ 6 0 R ]
|
||||
>>
|
||||
endobj
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000522 00000 n
|
||||
0000000457 00000 n
|
||||
0000000015 00000 n
|
||||
0000000199 00000 n
|
||||
0000000218 00000 n
|
||||
0000000317 00000 n
|
||||
trailer
|
||||
<< /Size 7
|
||||
/Root 1 0 R
|
||||
>>
|
||||
startxref
|
||||
574
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/example_annotation.pdf
Normal file
BIN
src-pdfrs/files/example_annotation.pdf
Normal file
Binary file not shown.
70
src-pdfrs/files/formxobject.pdf
Normal file
70
src-pdfrs/files/formxobject.pdf
Normal file
@ -0,0 +1,70 @@
|
||||
%PDF-1.7
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Kids [3 0 R]
|
||||
/Type /Pages
|
||||
/Count 1
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<< /Contents 4 0 R
|
||||
/Type /Page
|
||||
/Resources << /XObject << /Im0 5 0 R >> >>
|
||||
/Parent 2 0 R
|
||||
/MediaBox [0 0 180 240]
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<< /Length 93 >>
|
||||
stream
|
||||
/DeviceRGB cs /DeviceRGB CS
|
||||
0 0 0.972549 SC
|
||||
21.68 194 136.64 26 re
|
||||
10 10 m 20 20 l S
|
||||
/Im0 Do
|
||||
endstream
|
||||
endobj
|
||||
5 0 obj
|
||||
<< /Subtype /Form
|
||||
/Type /XObject
|
||||
/FormType 1
|
||||
/Resources << /Font << /F0 6 0 R >> >>
|
||||
/BBox [0 0 180 240]
|
||||
/Length 47
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F0 24 Tf
|
||||
25.68 200 Td
|
||||
(Hello World!) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
6 0 obj
|
||||
<< /Subtype /Type1
|
||||
/Type /Font
|
||||
/BaseFont /Times-Roman
|
||||
/Encoding /WinAnsiEncoding
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000067 00000 n
|
||||
0000000130 00000 n
|
||||
0000000272 00000 n
|
||||
0000000414 00000 n
|
||||
0000000626 00000 n
|
||||
trailer
|
||||
<< /Root 1 0 R
|
||||
/Size 7
|
||||
>>
|
||||
startxref
|
||||
734
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/invalid/crash-121-1.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-121-1.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-121-2.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-121-2.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-121-3.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-121-3.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-121-4.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-121-4.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-122.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-122.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-123.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-123.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-124.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-124.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/invalid/crash-assertion-failure.pdf
Normal file
BIN
src-pdfrs/files/invalid/crash-assertion-failure.pdf
Normal file
Binary file not shown.
1
src-pdfrs/files/invalid/infinite-loop-103.pdf
Normal file
1
src-pdfrs/files/invalid/infinite-loop-103.pdf
Normal file
@ -0,0 +1 @@
|
||||
startxref%PDF-
|
||||
BIN
src-pdfrs/files/jpeg.pdf
Normal file
BIN
src-pdfrs/files/jpeg.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/libreoffice.pdf
Normal file
BIN
src-pdfrs/files/libreoffice.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/lossless.pdf
Normal file
BIN
src-pdfrs/files/lossless.pdf
Normal file
Binary file not shown.
75
src-pdfrs/files/offset.pdf
Normal file
75
src-pdfrs/files/offset.pdf
Normal file
@ -0,0 +1,75 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
%PDF-1.7
|
||||
%µí®û
|
||||
3 0 obj
|
||||
<< /Length 4 0 R >>
|
||||
stream
|
||||
/DeviceRGB cs /DeviceRGB CS
|
||||
0 0 0.972549 SC
|
||||
21.68 194 136.64 26 re
|
||||
10 10 m 20 20 l S
|
||||
BT
|
||||
/F0 24 Tf
|
||||
25.68 200 Td
|
||||
(Hello World!) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
4 0 obj
|
||||
132
|
||||
endobj
|
||||
5 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Times-Roman /Encoding /WinAnsiEncoding >>
|
||||
endobj
|
||||
6 0 obj
|
||||
<< /Type /Page
|
||||
/Parent 2 0 R
|
||||
/Resources << /Font << /F0 5 0 R >> >>
|
||||
/MediaBox [ 0 0 180 240 ]
|
||||
/Contents 3 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Type /Pages
|
||||
/Count 1
|
||||
/Kids [ 6 0 R ]
|
||||
>>
|
||||
endobj
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000522 00000 n
|
||||
0000000457 00000 n
|
||||
0000000015 00000 n
|
||||
0000000199 00000 n
|
||||
0000000218 00000 n
|
||||
0000000317 00000 n
|
||||
trailer
|
||||
<< /Size 7
|
||||
/Root 1 0 R
|
||||
>>
|
||||
startxref
|
||||
574
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/password_protected/passwords_aes_128.pdf
Normal file
BIN
src-pdfrs/files/password_protected/passwords_aes_128.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/password_protected/passwords_aes_256.pdf
Normal file
BIN
src-pdfrs/files/password_protected/passwords_aes_256.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
src-pdfrs/files/password_protected/passwords_rc4_rev2.pdf
Normal file
BIN
src-pdfrs/files/password_protected/passwords_rc4_rev2.pdf
Normal file
Binary file not shown.
44
src-pdfrs/files/password_protected/passwords_rc4_rev3.pdf
Normal file
44
src-pdfrs/files/password_protected/passwords_rc4_rev3.pdf
Normal file
@ -0,0 +1,44 @@
|
||||
%PDF-1.4
|
||||
1 0 obj
|
||||
<</Type /Catalog/Pages 2 0 R>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<</Count 1/Kids [ 6 0 R ]/Type /Pages>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<</Length 131>>stream
|
||||
+Zì(Õ†I0hoêo<C3AA>‚'ÊÊçË÷¼¿‹^^*›)u—tÿç‡8&à[O7s¶jtS-ñhF„Y҅Бr…c0yø®¦Í³¦ßkó„9‹±—§Nü0±ì lhõ†–F¶gœõ:òSÐ<C390>Á–Áý¸Ð^ý2µEéfD9ÀÅŸ
|
||||
endstream
|
||||
endobj
|
||||
4 0 obj
|
||||
132
|
||||
endobj
|
||||
5 0 obj
|
||||
<</Type /Font/Subtype /Type1/BaseFont /Times-Roman/Encoding /WinAnsiEncoding>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<</Type /Page/Parent 2 0 R/Resources <</Font <</F0 5 0 R>>>>/MediaBox [ 0 0 180 240 ]/Contents 3 0 R>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<</Filter /Standard/V 2/R 3/Length 64/P -1/O (é812!ÚO1éñ\)†ÞæÞuÿœz<qÅ=`ÞH7\)ƒ)/U (hknyļ¡c¶÷ÞxÛ|ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ)>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000010 00000 n
|
||||
0000000059 00000 n
|
||||
0000000117 00000 n
|
||||
0000000301 00000 n
|
||||
0000000323 00000 n
|
||||
0000000420 00000 n
|
||||
0000000541 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 1 0 R
|
||||
/ID [ <DC9E3E7E38006A5B77E717B8F02A9F09> <DC9E3E7E38006A5B77E717B8F02A9F09> ]
|
||||
/Encrypt 7 0 R
|
||||
>>
|
||||
startxref
|
||||
680
|
||||
%%EOF
|
||||
BIN
src-pdfrs/files/pdf-sample.pdf
Normal file
BIN
src-pdfrs/files/pdf-sample.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/xelatex-drawboard.pdf
Normal file
BIN
src-pdfrs/files/xelatex-drawboard.pdf
Normal file
Binary file not shown.
BIN
src-pdfrs/files/xelatex.pdf
Normal file
BIN
src-pdfrs/files/xelatex.pdf
Normal file
Binary file not shown.
65
src-pdfrs/pdf/Cargo.toml
Normal file
65
src-pdfrs/pdf/Cargo.toml
Normal file
@ -0,0 +1,65 @@
|
||||
[package]
|
||||
name = "pdf"
|
||||
version = "0.9.1"
|
||||
authors = ["Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln <s3bk@protonmail.com>"]
|
||||
repository = "https://github.com/pdf-rs/pdf"
|
||||
readme = "../README.md"
|
||||
keywords = ["pdf"]
|
||||
license = "MIT"
|
||||
documentation = "https://docs.rs/pdf"
|
||||
edition = "2018"
|
||||
description = "PDF reader"
|
||||
|
||||
[features]
|
||||
mmap = ["memmap2"]
|
||||
dump = ["tempfile"]
|
||||
threads = ["jpeg-decoder/default"]
|
||||
sync = []
|
||||
cache = ["globalcache"]
|
||||
default = ["sync", "cache"]
|
||||
|
||||
[dependencies]
|
||||
pdf_derive = { version = "0.2.0", path = "../pdf_derive" }
|
||||
snafu = "0.8.3"
|
||||
libflate = "2.0.0"
|
||||
deflate = "1.0.0"
|
||||
itertools = "0.13.0"
|
||||
memmap2 = { version = "0.9.4", optional = true }
|
||||
weezl = "0.1.4"
|
||||
once_cell = "1.5.2"
|
||||
log = "0.4.14"
|
||||
tempfile = { version = "3.2.0", optional = true }
|
||||
md5 = "0.7"
|
||||
jpeg-decoder = { version = "0.3.0", default-features = false }
|
||||
aes = "0.8.2"
|
||||
cbc = "0.1"
|
||||
stringprep = "0.1.2"
|
||||
sha2 = "0.10.2"
|
||||
fax = "0.2.0"
|
||||
euclid = { version = "0.22.7", optional = true }
|
||||
bitflags = "2.5"
|
||||
istring = { version = "0.3.3", features = ["std", "size"] }
|
||||
datasize = "0.2.13"
|
||||
globalcache = { version = "0.2.3", features = ["sync"], optional = true }
|
||||
indexmap = "2.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.3.0"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[[example]]
|
||||
name = "content"
|
||||
|
||||
[[example]]
|
||||
name = "metadata"
|
||||
|
||||
[[example]]
|
||||
name = "names"
|
||||
|
||||
[[example]]
|
||||
name = "read"
|
||||
|
||||
[[example]]
|
||||
name = "other_page_content"
|
||||
67
src-pdfrs/pdf/examples/content.rs
Normal file
67
src-pdfrs/pdf/examples/content.rs
Normal file
@ -0,0 +1,67 @@
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
||||
use pdf::error::PdfError;
|
||||
use pdf::content::*;
|
||||
use pdf::file::FileOptions;
|
||||
|
||||
|
||||
|
||||
|
||||
use pdf::object::*;
|
||||
use pdf::build::*;
|
||||
|
||||
use pdf::primitive::PdfString;
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
fn main() -> Result<(), PdfError> {
|
||||
let path = PathBuf::from(env::args_os().nth(1).expect("no file given"));
|
||||
|
||||
let mut builder = PdfBuilder::new(FileOptions::cached());
|
||||
|
||||
let mut pages = Vec::new();
|
||||
|
||||
let content = Content::from_ops(vec![
|
||||
Op::MoveTo { p: Point { x: 100., y: 100. } },
|
||||
Op::LineTo { p: Point { x: 100., y: 200. } },
|
||||
Op::LineTo { p: Point { x: 200., y: 200. } },
|
||||
Op::LineTo { p: Point { x: 200., y: 100. } },
|
||||
Op::Close,
|
||||
Op::Stroke,
|
||||
]);
|
||||
let mut new_page = PageBuilder::from_content(content, &NoResolve)?;
|
||||
new_page.media_box = Some(pdf::object::Rectangle {
|
||||
left: 0.0,
|
||||
top: 0.0,
|
||||
bottom: 400.0,
|
||||
right: 400.0
|
||||
});
|
||||
let resources = Resources::default();
|
||||
|
||||
/*
|
||||
let font = Font {
|
||||
name: Some("Test".into()),
|
||||
subtype: pdf::font::FontType::TrueType,
|
||||
data: FontData::TrueType(TFont {
|
||||
base_font: None,
|
||||
|
||||
})
|
||||
}
|
||||
resources.fonts.insert("f1", font);
|
||||
*/
|
||||
|
||||
new_page.resources = resources;
|
||||
pages.push(new_page);
|
||||
|
||||
let catalog = CatalogBuilder::from_pages(pages);
|
||||
|
||||
let mut info = InfoDict::default();
|
||||
info.title = Some(PdfString::from("test"));
|
||||
|
||||
let data = builder.info(info).build(catalog)?;
|
||||
|
||||
std::fs::write(path, data)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
38
src-pdfrs/pdf/examples/metadata.rs
Normal file
38
src-pdfrs/pdf/examples/metadata.rs
Normal file
@ -0,0 +1,38 @@
|
||||
use std::env::args;
|
||||
|
||||
use pdf::error::PdfError;
|
||||
use pdf::file::{FileOptions};
|
||||
use pdf::object::{FieldDictionary, FieldType, Resolve};
|
||||
|
||||
/// extract and print a PDF's metadata
|
||||
#[cfg(feature="cache")]
|
||||
fn main() -> Result<(), PdfError> {
|
||||
let path = args()
|
||||
.nth(1)
|
||||
.expect("Please provide a file path to the PDF you want to explore.");
|
||||
|
||||
let file = FileOptions::cached().open(&path).unwrap();
|
||||
let resolver = file.resolver();
|
||||
|
||||
if let Some(ref info) = file.trailer.info_dict {
|
||||
dbg!(info);
|
||||
}
|
||||
|
||||
if let Some(ref forms) = file.get_root().forms {
|
||||
for field in forms.fields.iter() {
|
||||
print_field(field, &resolver);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_field(field: &FieldDictionary, resolve: &impl Resolve) {
|
||||
if field.typ == Some(FieldType::Signature) {
|
||||
println!("{:?}", field);
|
||||
}
|
||||
for &kid in field.kids.iter() {
|
||||
let child = resolve.get(kid).unwrap();
|
||||
print_field(&child, resolve);
|
||||
}
|
||||
}
|
||||
129
src-pdfrs/pdf/examples/names.rs
Normal file
129
src-pdfrs/pdf/examples/names.rs
Normal file
@ -0,0 +1,129 @@
|
||||
extern crate pdf;
|
||||
|
||||
use std::env::args;
|
||||
use std::fmt;
|
||||
use std::collections::HashMap;
|
||||
use pdf::file::{FileOptions};
|
||||
use pdf::object::*;
|
||||
use pdf::primitive::{Primitive, PdfString};
|
||||
|
||||
struct Indent(usize);
|
||||
impl fmt::Display for Indent {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
for _ in 0 .. self.0 {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn walk_outline(r: &impl Resolve, mut node: RcRef<OutlineItem>, name_map: &impl Fn(&str) -> usize, page_map: &impl Fn(PlainRef) -> usize, depth: usize) {
|
||||
let indent = Indent(depth);
|
||||
loop {
|
||||
if let Some(ref title) = node.title {
|
||||
println!("{}title: {:?}", indent, title.to_string_lossy());
|
||||
}
|
||||
if let Some(ref dest) = node.dest {
|
||||
match dest {
|
||||
Primitive::String(ref s) => {
|
||||
let name = s.to_string_lossy();
|
||||
let page_nr = name_map(&name);
|
||||
println!("{}dest: {:?} -> page nr. {:?}", indent, name, page_nr);
|
||||
}
|
||||
Primitive::Array(ref a) => match a[0] {
|
||||
Primitive::Reference(r) => {
|
||||
let page_nr = page_map(r);
|
||||
println!("{}dest: {:?} -> page nr. {:?}", indent, a, page_nr);
|
||||
}
|
||||
_ => unimplemented!("invalid reference in array"),
|
||||
}
|
||||
_ => unimplemented!("invalid dest"),
|
||||
}
|
||||
}
|
||||
if let Some(Action::Goto(MaybeNamedDest::Direct(Dest { page: Some(page), ..}))) = node.action {
|
||||
let page_nr = page_map(page.get_inner());
|
||||
println!("{}action -> page nr. {:?}", indent, page_nr);
|
||||
}
|
||||
if let Some(ref a) = node.se {
|
||||
println!("{} -> {:?}", indent, a);
|
||||
}
|
||||
if let Some(entry_ref) = node.first {
|
||||
let entry = r.get(entry_ref).unwrap();
|
||||
walk_outline(r, entry, name_map, page_map, depth + 1);
|
||||
}
|
||||
if let Some(entry_ref) = node.next {
|
||||
node = r.get(entry_ref).unwrap();
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
fn main() {
|
||||
let path = args().nth(1).expect("no file given");
|
||||
println!("read: {}", path);
|
||||
|
||||
let file = FileOptions::cached().open(&path).unwrap();
|
||||
let resolver = file.resolver();
|
||||
let catalog = file.get_root();
|
||||
|
||||
let mut pages_map: HashMap<String, PlainRef> = HashMap::new();
|
||||
|
||||
let mut count = 0;
|
||||
let mut dests_cb = |key: &PdfString, val: &Option<Dest>| {
|
||||
//println!("{:?} {:?}", key, val);
|
||||
if let Some(Dest { page: Some(page), ..}) = val {
|
||||
pages_map.insert(key.to_string_lossy(), page.get_inner());
|
||||
}
|
||||
|
||||
count += 1;
|
||||
};
|
||||
|
||||
if let Some(ref names) = catalog.names {
|
||||
if let Some(ref dests) = names.dests {
|
||||
dests.walk(&resolver, &mut dests_cb).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let mut pages = HashMap::new();
|
||||
fn add_tree(r: &impl Resolve, pages: &mut HashMap<PlainRef, usize>, tree: &PageTree, current_page: &mut usize) {
|
||||
for &node_ref in &tree.kids {
|
||||
let node = r.get(node_ref).unwrap();
|
||||
match *node {
|
||||
PagesNode::Tree(ref tree) => {
|
||||
add_tree(r, pages, tree, current_page);
|
||||
}
|
||||
PagesNode::Leaf(ref _page) => {
|
||||
pages.insert(node_ref.get_inner(), *current_page);
|
||||
*current_page += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
add_tree(&resolver, &mut pages, &catalog.pages, &mut 0);
|
||||
|
||||
let get_page_nr = |name: &str| -> usize {
|
||||
let page = pages_map[name];
|
||||
pages[&page]
|
||||
};
|
||||
let page_nr = |r: PlainRef| -> usize {
|
||||
pages[&r]
|
||||
};
|
||||
|
||||
if let Some(ref outlines) = catalog.outlines {
|
||||
if let Some(entry_ref) = outlines.first {
|
||||
let entry = resolver.get(entry_ref).unwrap();
|
||||
walk_outline(&resolver, entry, &get_page_nr, &page_nr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
println!("{} items", count);
|
||||
|
||||
if let Some(ref labels) = catalog.page_labels {
|
||||
labels.walk(&resolver, &mut |page: i32, label| {
|
||||
println!("{page} -> {:?}", label);
|
||||
});
|
||||
}
|
||||
}
|
||||
110
src-pdfrs/pdf/examples/other_page_content.rs
Normal file
110
src-pdfrs/pdf/examples/other_page_content.rs
Normal file
@ -0,0 +1,110 @@
|
||||
use pdf::content::ViewRect;
|
||||
use pdf::error::PdfError;
|
||||
use pdf::file::FileOptions;
|
||||
use pdf::object::Resolve;
|
||||
use pdf::primitive::{Dictionary, Primitive};
|
||||
use std::env::args;
|
||||
|
||||
/// Extract data from a page entry that is under "other".
|
||||
/// This example looks for stikethroughs in the annotations entry
|
||||
/// and returns a Vec<Rect> for the bounds of the struckthrough text.
|
||||
#[cfg(feature="cache")]
|
||||
fn main() -> Result<(), PdfError> {
|
||||
let path = args()
|
||||
.nth(1)
|
||||
.expect("Please provide a file path to the PDF you want to explore.");
|
||||
|
||||
let file = FileOptions::cached().open(&path).unwrap();
|
||||
let resolver = file.resolver();
|
||||
|
||||
for (i, page) in file.pages().enumerate() {
|
||||
let page = page.unwrap();
|
||||
let strikethroughs = annotation_strikethrough(&page.other, &resolver)?;
|
||||
println!(
|
||||
"Found {} strikethrough annotations on page {}.",
|
||||
strikethroughs.len(),
|
||||
i + 1
|
||||
);
|
||||
for strikethrough in strikethroughs {
|
||||
println!();
|
||||
println!("Struck text:");
|
||||
println!("{:#?}", strikethrough.0);
|
||||
println!();
|
||||
println!("Text spans {} lines", strikethrough.1.len());
|
||||
println!();
|
||||
println!("Strikethrough bounding boxes:");
|
||||
for rect in strikethrough.1 {
|
||||
println!("{:#?}", rect);
|
||||
println!();
|
||||
}
|
||||
println!();
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn annotation_strikethrough(
|
||||
other_dict: &Dictionary,
|
||||
resolver: &impl Resolve,
|
||||
) -> Result<Vec<(String, Vec<pdf::content::ViewRect>)>, PdfError> {
|
||||
let mut strikethroughs: Vec<(String, Vec<pdf::content::ViewRect>)> = Vec::new();
|
||||
|
||||
if !other_dict.is_empty() {
|
||||
let annotations = other_dict.get("Annots".into());
|
||||
if let Some(annotations) = annotations {
|
||||
let annotations_resolved = annotations.clone().resolve(resolver)?;
|
||||
let annotations_array = annotations_resolved.into_array()?;
|
||||
for annotation in annotations_array.iter() {
|
||||
let mut paths: Vec<pdf::content::ViewRect> = Vec::new();
|
||||
let annotation_resolved = annotation.clone().resolve(resolver)?;
|
||||
let annotation_dict = annotation_resolved.into_dictionary()?;
|
||||
|
||||
// If you have multiline strikethrough "Rect" will be the bounding
|
||||
// box around all the strikethrough lines.
|
||||
// "QuadPoints" gives 8 points for each line that is struckthrough,
|
||||
// so if a single annotation involves text on two lines, QuadPoints
|
||||
// should have 16 values in it. It starts with bottom left and
|
||||
// runs counter-clockwise.
|
||||
let subtype = annotation_dict.get("Subtype".into());
|
||||
if let Some(subtype) = subtype {
|
||||
let subtype = subtype.clone().into_name()?;
|
||||
if subtype.as_str() == "StrikeOut" {
|
||||
let rects = annotation_dict.get("QuadPoints".into());
|
||||
let text = annotation_dict.get("Contents".into());
|
||||
if let (Some(rects), Some(text)) = (rects, text) {
|
||||
let text = text.to_string()?;
|
||||
|
||||
// Check multiples of 8.
|
||||
let rects_array = rects.clone().into_array()?;
|
||||
if rects_array.len() % 8 == 0 {
|
||||
let rects: Vec<Vec<Primitive>> =
|
||||
rects_array.chunks(8).map(|chunk| chunk.to_vec()).collect();
|
||||
|
||||
for rect in rects {
|
||||
let mut quad_points: Vec<f32> = Vec::new();
|
||||
for num in rect {
|
||||
let number = num.as_number()?;
|
||||
quad_points.push(number);
|
||||
}
|
||||
if quad_points.len() == 8 {
|
||||
paths.push(ViewRect {
|
||||
x: quad_points[0],
|
||||
y: quad_points[1],
|
||||
width: quad_points[2] - quad_points[0],
|
||||
height: quad_points[7] - quad_points[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
strikethroughs.push((text, paths))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(strikethroughs)
|
||||
}
|
||||
128
src-pdfrs/pdf/examples/read.rs
Normal file
128
src-pdfrs/pdf/examples/read.rs
Normal file
@ -0,0 +1,128 @@
|
||||
extern crate pdf;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env::args;
|
||||
use std::fs;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use pdf::enc::StreamFilter;
|
||||
use pdf::error::PdfError;
|
||||
use pdf::file::{FileOptions, Log};
|
||||
use pdf::object::*;
|
||||
use pdf::primitive::Primitive;
|
||||
|
||||
struct VerboseLog;
|
||||
impl Log for VerboseLog {
|
||||
fn load_object(&self, r: PlainRef) {
|
||||
println!("load {r:?}");
|
||||
}
|
||||
fn log_get(&self, r: PlainRef) {
|
||||
println!("get {r:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cache")]
|
||||
fn main() -> Result<(), PdfError> {
|
||||
let path = "/home/kschuettler/Dokumente/TestFiles/SYNGENTA_EFSA_sanitisation_GFL_v1.pdf"; //args().nth(1).expect("no file given");
|
||||
println!("read: {}", path);
|
||||
let now = SystemTime::now();
|
||||
|
||||
let file = FileOptions::cached().log(VerboseLog).open(&path).unwrap();
|
||||
let resolver = file.resolver();
|
||||
|
||||
if let Some(ref info) = file.trailer.info_dict {
|
||||
let title = info.title.as_ref().map(|p| p.to_string_lossy());
|
||||
let author = info.author.as_ref().map(|p| p.to_string_lossy());
|
||||
|
||||
let descr = match (title, author) {
|
||||
(Some(title), None) => title,
|
||||
(None, Some(author)) => format!("[no title] – {}", author),
|
||||
(Some(title), Some(author)) => format!("{} – {}", title, author),
|
||||
_ => "PDF".into(),
|
||||
};
|
||||
println!("{}", descr);
|
||||
}
|
||||
|
||||
let mut images: Vec<_> = vec![];
|
||||
let mut fonts = HashMap::new();
|
||||
|
||||
for page in file.pages() {
|
||||
let page = page.unwrap();
|
||||
let resources = page.resources().unwrap();
|
||||
for (i, font) in resources
|
||||
.fonts
|
||||
.values()
|
||||
.map(|lazy_font| lazy_font.load(&resolver))
|
||||
.filter_map(|f| f.ok())
|
||||
.enumerate()
|
||||
{
|
||||
let name = match &font.name {
|
||||
Some(name) => name.as_str().into(),
|
||||
None => i.to_string(),
|
||||
};
|
||||
fonts.insert(name, font.clone());
|
||||
}
|
||||
images.extend(
|
||||
resources
|
||||
.xobjects
|
||||
.iter()
|
||||
.map(|(_name, &r)| resolver.get(r).unwrap())
|
||||
.filter(|o| matches!(**o, XObject::Image(_))),
|
||||
);
|
||||
}
|
||||
|
||||
for (i, o) in images.iter().enumerate() {
|
||||
let img = match **o {
|
||||
XObject::Image(ref im) => im,
|
||||
_ => continue,
|
||||
};
|
||||
let (mut data, filter) = img.raw_image_data(&resolver)?;
|
||||
let ext = match filter {
|
||||
Some(StreamFilter::DCTDecode(_)) => "jpeg",
|
||||
Some(StreamFilter::JBIG2Decode(_)) => "jbig2",
|
||||
Some(StreamFilter::JPXDecode) => "jp2k",
|
||||
Some(StreamFilter::FlateDecode(_)) => "png",
|
||||
Some(StreamFilter::CCITTFaxDecode(_)) => {
|
||||
data = fax::tiff::wrap(&data, img.width, img.height).into();
|
||||
"tiff"
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let fname = format!("extracted_image_{}.{}", i, ext);
|
||||
|
||||
fs::write(fname.as_str(), data).unwrap();
|
||||
println!("Wrote file {}", fname);
|
||||
}
|
||||
println!("Found {} image(s).", images.len());
|
||||
|
||||
for (name, font) in fonts.iter() {
|
||||
let fname = format!("font_{}", name);
|
||||
if let Some(Ok(data)) = font.embedded_data(&resolver) {
|
||||
fs::write(fname.as_str(), data).unwrap();
|
||||
println!("Wrote file {}", fname);
|
||||
}
|
||||
}
|
||||
println!("Found {} font(s).", fonts.len());
|
||||
|
||||
if let Some(ref forms) = file.get_root().forms {
|
||||
println!("Forms:");
|
||||
for field in forms.fields.iter() {
|
||||
print!(" {:?} = ", field.name);
|
||||
match field.value {
|
||||
Primitive::String(ref s) => println!("{}", s.to_string_lossy()),
|
||||
Primitive::Integer(i) => println!("{}", i),
|
||||
Primitive::Name(ref s) => println!("{}", s),
|
||||
ref p => println!("{:?}", p),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(elapsed) = now.elapsed() {
|
||||
println!(
|
||||
"Time: {}s",
|
||||
elapsed.as_secs() as f64 + elapsed.subsec_nanos() as f64 * 1e-9
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
4
src-pdfrs/pdf/fuzz/.gitignore
vendored
Normal file
4
src-pdfrs/pdf/fuzz/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
target
|
||||
corpus
|
||||
artifacts
|
||||
735
src-pdfrs/pdf/fuzz/Cargo.lock
generated
Normal file
735
src-pdfrs/pdf/fuzz/Cargo.lock
generated
Normal file
@ -0,0 +1,735 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "adler32"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||
|
||||
[[package]]
|
||||
name = "aes"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
||||
|
||||
[[package]]
|
||||
name = "cbc"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
|
||||
dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
|
||||
dependencies = [
|
||||
"crypto-common",
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core2"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dary_heap"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
|
||||
|
||||
[[package]]
|
||||
name = "datasize"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e65c07d59e45d77a8bda53458c24a828893a99ac6cdd9c84111e09176ab739a2"
|
||||
dependencies = [
|
||||
"datasize_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datasize_derive"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613e4ee15899913285b7612004bbd490abd605be7b11d35afada5902fb6b91d5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deflate"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c86f7e25f518f4b81808a2cf1c50996a61f5c2eb394b2393bd87f2a4780a432f"
|
||||
dependencies = [
|
||||
"adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "fax"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b03e33ad0e71af414ef9d2b0a94d23ff59115bb068e6a6a06c0952f2c22ffd77"
|
||||
dependencies = [
|
||||
"fax_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fax_derive"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c1d7ffc9f2dc8316348c75281a99c8fdc60c1ddf4f82a366d117bf1b74d5a39"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "globalcache"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240a3059d86f2ba6859ac79f95ff94e65606abc775c1bc0ecf9b6590fb35dc04"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"tuple",
|
||||
"web-time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.15.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
|
||||
dependencies = [
|
||||
"block-padding",
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "istring"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "875cc6fb9aecbc1a9bd736f2d18b12e0756b4c80c5e35e28262154abcb077a39"
|
||||
dependencies = [
|
||||
"datasize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jpeg-decoder"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.77"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.169"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "libflate"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e"
|
||||
dependencies = [
|
||||
"adler32",
|
||||
"core2",
|
||||
"crc32fast",
|
||||
"dary_heap",
|
||||
"libflate_lz77",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libflate_lz77"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d"
|
||||
dependencies = [
|
||||
"core2",
|
||||
"hashbrown 0.14.5",
|
||||
"rle-decode-fast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libfuzzer-sys"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
||||
|
||||
[[package]]
|
||||
name = "pdf"
|
||||
version = "0.9.1"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"bitflags",
|
||||
"cbc",
|
||||
"datasize",
|
||||
"deflate",
|
||||
"fax",
|
||||
"globalcache",
|
||||
"indexmap",
|
||||
"istring",
|
||||
"itertools",
|
||||
"jpeg-decoder",
|
||||
"libflate",
|
||||
"log",
|
||||
"md5",
|
||||
"once_cell",
|
||||
"pdf_derive",
|
||||
"sha2",
|
||||
"snafu",
|
||||
"stringprep",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdf-fuzz"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"libfuzzer-sys",
|
||||
"pdf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdf_derive"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rle-decode-fast"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.217"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.217"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
|
||||
dependencies = [
|
||||
"snafu-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "snafu-derive"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
|
||||
dependencies = [
|
||||
"unicode-bidi",
|
||||
"unicode-normalization",
|
||||
"unicode-properties",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.96"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
|
||||
dependencies = [
|
||||
"tinyvec_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec_macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tuple"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bb9f6bd73479481158ba8ee3edf17aca93354623d13f02e96a2014fdbc1c37e"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-properties"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "web-time"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "weezl"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
26
src-pdfrs/pdf/fuzz/Cargo.toml
Normal file
26
src-pdfrs/pdf/fuzz/Cargo.toml
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
[package]
|
||||
name = "pdf-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.pdf]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "parse"
|
||||
path = "fuzz_targets/parse.rs"
|
||||
test = false
|
||||
doc = false
|
||||
14
src-pdfrs/pdf/fuzz/fuzz_targets/parse.rs
Normal file
14
src-pdfrs/pdf/fuzz/fuzz_targets/parse.rs
Normal file
@ -0,0 +1,14 @@
|
||||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fn harness(data: &[u8]) {
|
||||
if let Ok(file) = pdf::file::FileOptions::cached().load(data) {
|
||||
for idx in 0..file.num_pages() {
|
||||
let _ = file.get_page(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let _ = harness(data);
|
||||
});
|
||||
121
src-pdfrs/pdf/src/any.rs
Normal file
121
src-pdfrs/pdf/src/any.rs
Normal file
@ -0,0 +1,121 @@
|
||||
use std::any::TypeId;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
use datasize::DataSize;
|
||||
use crate::object::{Object};
|
||||
use crate::error::{Result, PdfError};
|
||||
|
||||
pub trait AnyObject {
|
||||
fn type_name(&self) -> &'static str;
|
||||
fn type_id(&self) -> TypeId;
|
||||
fn size(&self) -> usize;
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct NoSize<T>(T);
|
||||
impl<T: 'static> AnyObject for NoSize<T> {
|
||||
fn size(&self) -> usize {
|
||||
0
|
||||
}
|
||||
fn type_id(&self) -> TypeId {
|
||||
TypeId::of::<T>()
|
||||
}
|
||||
fn type_name(&self) -> &'static str {
|
||||
std::any::type_name::<T>()
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct WithSize<T>(T);
|
||||
impl<T: DataSize + 'static> AnyObject for WithSize<T> {
|
||||
fn size(&self) -> usize {
|
||||
datasize::data_size(&self.0)
|
||||
}
|
||||
fn type_id(&self) -> TypeId {
|
||||
TypeId::of::<T>()
|
||||
}
|
||||
fn type_name(&self) -> &'static str {
|
||||
std::any::type_name::<T>()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(DataSize)]
|
||||
pub struct Any(Rc<dyn AnyObject>);
|
||||
|
||||
impl Any {
|
||||
pub fn downcast<T>(self) -> Result<Rc<T>>
|
||||
where T: AnyObject + 'static
|
||||
{
|
||||
if TypeId::of::<T>() == self.0.type_id() {
|
||||
unsafe {
|
||||
let raw: *const dyn AnyObject = Rc::into_raw(self.0);
|
||||
Ok(Rc::from_raw(raw as *const T))
|
||||
}
|
||||
} else {
|
||||
Err(type_mismatch::<T>(self.0.type_name()))
|
||||
}
|
||||
}
|
||||
pub fn new<T>(rc: Rc<T>) -> Any
|
||||
where WithSize<T>: AnyObject, T: 'static
|
||||
{
|
||||
Any(unsafe {
|
||||
std::mem::transmute::<Rc<T>, Rc<WithSize<T>>>(rc)
|
||||
} as _)
|
||||
}
|
||||
pub fn new_without_size<T>(rc: Rc<T>) -> Any
|
||||
where NoSize<T>: AnyObject, T: 'static
|
||||
{
|
||||
Any(unsafe {
|
||||
std::mem::transmute::<Rc<T>, Rc<NoSize<T>>>(rc)
|
||||
} as _)
|
||||
}
|
||||
pub fn type_name(&self) -> &'static str {
|
||||
self.0.type_name()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, DataSize)]
|
||||
pub struct AnySync(Arc<dyn AnyObject+Sync+Send>);
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
impl globalcache::ValueSize for AnySync {
|
||||
#[inline]
|
||||
fn size(&self) -> usize {
|
||||
self.0.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl AnySync {
|
||||
pub fn downcast<T>(self) -> Result<Arc<T>>
|
||||
where T: 'static
|
||||
{
|
||||
if TypeId::of::<T>() == self.0.type_id() {
|
||||
unsafe {
|
||||
let raw: *const (dyn AnyObject+Sync+Send) = Arc::into_raw(self.0);
|
||||
Ok(Arc::from_raw(raw as *const T))
|
||||
}
|
||||
} else {
|
||||
Err(type_mismatch::<T>(self.0.type_name()))
|
||||
}
|
||||
}
|
||||
pub fn new<T>(arc: Arc<T>) -> AnySync
|
||||
where WithSize<T>: AnyObject, T: Sync + Send + 'static
|
||||
{
|
||||
AnySync(unsafe {
|
||||
std::mem::transmute::<Arc<T>, Arc<WithSize<T>>>(arc)
|
||||
} as _)
|
||||
}
|
||||
pub fn new_without_size<T>(arc: Arc<T>) -> AnySync
|
||||
where NoSize<T>: AnyObject, T: Sync + Send + 'static
|
||||
{
|
||||
AnySync(unsafe {
|
||||
std::mem::transmute::<Arc<T>, Arc<NoSize<T>>>(arc)
|
||||
} as _)
|
||||
}
|
||||
pub fn type_name(&self) -> &'static str {
|
||||
self.0.type_name()
|
||||
}
|
||||
}
|
||||
fn type_mismatch<T>(name: &str) -> PdfError {
|
||||
PdfError::Other { msg: format!("expected {}, found {}", std::any::type_name::<T>(), name) }
|
||||
}
|
||||
179
src-pdfrs/pdf/src/backend.rs
Normal file
179
src-pdfrs/pdf/src/backend.rs
Normal file
@ -0,0 +1,179 @@
|
||||
use crate::error::*;
|
||||
use crate::parser::Lexer;
|
||||
use crate::parser::read_xref_and_trailer_at;
|
||||
use crate::xref::XRefTable;
|
||||
use crate::primitive::Dictionary;
|
||||
use crate::object::*;
|
||||
use std::ops::Deref;
|
||||
|
||||
use std::ops::{
|
||||
RangeFull,
|
||||
RangeFrom,
|
||||
RangeTo,
|
||||
Range,
|
||||
};
|
||||
|
||||
pub const MAX_ID: u32 = 1_000_000;
|
||||
|
||||
pub trait Backend: Sized {
|
||||
fn read<T: IndexRange>(&self, range: T) -> Result<&[u8]>;
|
||||
//fn write<T: IndexRange>(&mut self, range: T) -> Result<&mut [u8]>;
|
||||
fn len(&self) -> usize;
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Returns the offset of the beginning of the file, i.e., where the `%PDF-1.5` header is.
|
||||
/// (currently only used internally!)
|
||||
fn locate_start_offset(&self) -> Result<usize> {
|
||||
// Read from the beginning of the file, and look for the header.
|
||||
// Implementation note 13 in version 1.7 of the PDF reference says that Acrobat viewers
|
||||
// expect the header to be within the first 1KB of the file, so we do the same here.
|
||||
const HEADER: &[u8] = b"%PDF-";
|
||||
let buf = t!(self.read(..std::cmp::min(1024, self.len())));
|
||||
buf
|
||||
.windows(HEADER.len())
|
||||
.position(|window| window == HEADER)
|
||||
.ok_or_else(|| PdfError::Other{ msg: "file header is missing".to_string() })
|
||||
}
|
||||
|
||||
/// Returns the value of startxref (currently only used internally!)
|
||||
fn locate_xref_offset(&self) -> Result<usize> {
|
||||
// locate the xref offset at the end of the file
|
||||
// `\nPOS\n%%EOF` where POS is the position encoded as base 10 integer.
|
||||
// u64::MAX has 20 digits + \n\n(2) + %%EOF(5) = 27 bytes max.
|
||||
|
||||
let mut lexer = Lexer::new(t!(self.read(..)));
|
||||
lexer.set_pos_from_end(0);
|
||||
t!(lexer.seek_substr_back(b"startxref"));
|
||||
t!(lexer.next()).to::<usize>()
|
||||
}
|
||||
|
||||
/// Used internally by File, but could also be useful for applications that want to look at the raw PDF objects.
|
||||
fn read_xref_table_and_trailer(&self, start_offset: usize, resolve: &impl Resolve) -> Result<(XRefTable, Dictionary)> {
|
||||
let xref_offset = t!(self.locate_xref_offset());
|
||||
let pos = t!(start_offset.checked_add(xref_offset).ok_or(PdfError::Invalid));
|
||||
if pos >= self.len() {
|
||||
bail!("XRef offset outside file bounds");
|
||||
}
|
||||
|
||||
let mut lexer = Lexer::with_offset(t!(self.read(pos ..)), pos);
|
||||
|
||||
let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve));
|
||||
|
||||
let highest_id = t!(trailer.get("Size")
|
||||
.ok_or_else(|| PdfError::MissingEntry {field: "Size".into(), typ: "XRefTable"})?
|
||||
.as_u32());
|
||||
|
||||
if highest_id > MAX_ID {
|
||||
bail!("too many objects");
|
||||
}
|
||||
let mut refs = XRefTable::new(highest_id as ObjNr);
|
||||
for section in xref_sections {
|
||||
refs.add_entries_from(section)?;
|
||||
}
|
||||
|
||||
let mut prev_trailer = {
|
||||
match trailer.get("Prev") {
|
||||
Some(p) => Some(t!(p.as_usize())),
|
||||
None => None
|
||||
}
|
||||
};
|
||||
trace!("READ XREF AND TABLE");
|
||||
let mut seen = vec![];
|
||||
while let Some(prev_xref_offset) = prev_trailer {
|
||||
if seen.contains(&prev_xref_offset) {
|
||||
bail!("xref offsets loop");
|
||||
}
|
||||
seen.push(prev_xref_offset);
|
||||
|
||||
let pos = t!(start_offset.checked_add(prev_xref_offset).ok_or(PdfError::Invalid));
|
||||
let mut lexer = Lexer::with_offset(t!(self.read(pos..)), pos);
|
||||
let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve));
|
||||
|
||||
for section in xref_sections {
|
||||
refs.add_entries_from(section)?;
|
||||
}
|
||||
|
||||
prev_trailer = {
|
||||
match trailer.get("Prev") {
|
||||
Some(p) => {
|
||||
let prev = t!(p.as_usize());
|
||||
Some(prev)
|
||||
}
|
||||
None => None
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok((refs, trailer))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<T> Backend for T where T: Deref<Target=[u8]> { //+ DerefMut<Target=[u8]> {
|
||||
fn read<R: IndexRange>(&self, range: R) -> Result<&[u8]> {
|
||||
let r = t!(range.to_range(self.len()));
|
||||
Ok(&self[r])
|
||||
}
|
||||
/*
|
||||
fn write<R: IndexRange>(&mut self, range: R) -> Result<&mut [u8]> {
|
||||
let r = range.to_range(self.len())?;
|
||||
Ok(&mut self[r])
|
||||
}
|
||||
*/
|
||||
fn len(&self) -> usize {
|
||||
(**self).len()
|
||||
}
|
||||
}
|
||||
|
||||
/// `IndexRange` is implemented by Rust's built-in range types, produced
|
||||
/// by range syntax like `..`, `a..`, `..b` or `c..d`.
|
||||
pub trait IndexRange
|
||||
{
|
||||
/// Start index (inclusive)
|
||||
fn start(&self) -> Option<usize>;
|
||||
|
||||
/// End index (exclusive)
|
||||
fn end(&self) -> Option<usize>;
|
||||
|
||||
/// `len`: the size of whatever container that is being indexed
|
||||
fn to_range(&self, len: usize) -> Result<Range<usize>> {
|
||||
match (self.start(), self.end()) {
|
||||
(None, None) => Ok(0 .. len),
|
||||
(Some(start), None) if start <= len => Ok(start .. len),
|
||||
(None, Some(end)) if end <= len => Ok(0 .. end),
|
||||
(Some(start), Some(end)) if start <= end && end <= len => Ok(start .. end),
|
||||
_ => Err(PdfError::ContentReadPastBoundary)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl IndexRange for RangeFull {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { None }
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { None }
|
||||
|
||||
}
|
||||
|
||||
impl IndexRange for RangeFrom<usize> {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { Some(self.start) }
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { None }
|
||||
}
|
||||
|
||||
impl IndexRange for RangeTo<usize> {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { None }
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { Some(self.end) }
|
||||
}
|
||||
|
||||
impl IndexRange for Range<usize> {
|
||||
#[inline]
|
||||
fn start(&self) -> Option<usize> { Some(self.start) }
|
||||
#[inline]
|
||||
fn end(&self) -> Option<usize> { Some(self.end) }
|
||||
}
|
||||
406
src-pdfrs/pdf/src/build.rs
Normal file
406
src-pdfrs/pdf/src/build.rs
Normal file
@ -0,0 +1,406 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datasize::DataSize;
|
||||
|
||||
use crate::PdfError;
|
||||
use crate::any::AnySync;
|
||||
use crate::enc::StreamFilter;
|
||||
use crate::file::Cache;
|
||||
use crate::file::FileOptions;
|
||||
use crate::file::Log;
|
||||
use crate::file::Storage;
|
||||
use crate::file::Trailer;
|
||||
use crate::object::*;
|
||||
use crate::content::*;
|
||||
use crate::error::Result;
|
||||
use crate::parser::ParseFlags;
|
||||
use crate::primitive::Dictionary;
|
||||
use crate::primitive::Primitive;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct PageBuilder {
|
||||
pub ops: Vec<Op>,
|
||||
pub media_box: Option<Rectangle>,
|
||||
pub crop_box: Option<Rectangle>,
|
||||
pub trim_box: Option<Rectangle>,
|
||||
pub resources: Resources,
|
||||
pub rotate: i32,
|
||||
pub metadata: Option<Primitive>,
|
||||
pub lgi: Option<Primitive>,
|
||||
pub vp: Option<Primitive>,
|
||||
pub other: Dictionary,
|
||||
}
|
||||
impl PageBuilder {
|
||||
pub fn from_content(content: Content, resolve: &impl Resolve) -> Result<PageBuilder> {
|
||||
Ok(PageBuilder {
|
||||
ops: content.operations(resolve)?,
|
||||
.. PageBuilder::default()
|
||||
})
|
||||
}
|
||||
pub fn from_page(page: &Page, resolve: &impl Resolve) -> Result<PageBuilder> {
|
||||
Ok(PageBuilder {
|
||||
ops: page.contents.as_ref().map(|c| c.operations(resolve)).transpose()?.unwrap_or_default(),
|
||||
media_box: Some(page.media_box()?),
|
||||
crop_box: Some(page.crop_box()?),
|
||||
trim_box: page.trim_box,
|
||||
resources: (**page.resources()?.data()).clone(),
|
||||
rotate: page.rotate,
|
||||
metadata: page.metadata.clone(),
|
||||
lgi: page.lgi.clone(),
|
||||
vp: page.vp.clone(),
|
||||
other: page.other.clone(),
|
||||
})
|
||||
}
|
||||
pub fn clone_page(page: &Page, cloner: &mut impl Cloner) -> Result<PageBuilder> {
|
||||
let old_resources = &**page.resources()?.data();
|
||||
|
||||
let mut resources = Resources::default();
|
||||
let ops = page.contents.as_ref()
|
||||
.map(|content| content.operations(cloner)).transpose()?
|
||||
.map(|ops| {
|
||||
ops.into_iter().map(|op| -> Result<Op, PdfError> {
|
||||
deep_clone_op(&op, cloner, old_resources, &mut resources)
|
||||
}).collect()
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
Ok(PageBuilder {
|
||||
ops,
|
||||
media_box: Some(page.media_box()?),
|
||||
crop_box: Some(page.crop_box()?),
|
||||
trim_box: page.trim_box,
|
||||
resources,
|
||||
rotate: page.rotate,
|
||||
metadata: page.metadata.deep_clone(cloner)?,
|
||||
lgi: page.lgi.deep_clone(cloner)?,
|
||||
vp: page.vp.deep_clone(cloner)?,
|
||||
other: page.other.deep_clone(cloner)?,
|
||||
})
|
||||
}
|
||||
pub fn size(&mut self, width: f32, height: f32) {
|
||||
self.media_box = Some(Rectangle {
|
||||
top: 0.,
|
||||
left: 0.,
|
||||
bottom: height,
|
||||
right: width,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CatalogBuilder {
|
||||
pages: Vec<PageBuilder>
|
||||
}
|
||||
impl CatalogBuilder {
|
||||
pub fn from_pages(pages: Vec<PageBuilder>) -> CatalogBuilder {
|
||||
CatalogBuilder {
|
||||
pages
|
||||
}
|
||||
}
|
||||
pub fn build(self, update: &mut impl Updater) -> Result<Catalog> {
|
||||
let kids_promise: Vec<_> = self.pages.iter()
|
||||
.map(|_page| update.promise::<PagesNode>())
|
||||
.collect();
|
||||
let kids: Vec<_> = kids_promise.iter()
|
||||
.map(|p| Ref::new(p.get_inner()))
|
||||
.collect();
|
||||
|
||||
let tree = PagesRc::create(PageTree {
|
||||
parent: None,
|
||||
count: kids.len() as _,
|
||||
kids,
|
||||
resources: None,
|
||||
media_box: None,
|
||||
crop_box: None
|
||||
}, update)?;
|
||||
|
||||
for (page, promise) in self.pages.into_iter().zip(kids_promise) {
|
||||
let content = Content::from_ops(page.ops);
|
||||
let resources = update.create(page.resources)?.into();
|
||||
let page = Page {
|
||||
parent: tree.clone(),
|
||||
contents: Some(content),
|
||||
media_box: page.media_box,
|
||||
crop_box: page.crop_box,
|
||||
trim_box: page.trim_box,
|
||||
resources: Some(resources),
|
||||
rotate: page.rotate,
|
||||
metadata: page.metadata,
|
||||
lgi: page.lgi,
|
||||
vp: page.vp,
|
||||
other: page.other,
|
||||
annotations: Default::default(),
|
||||
};
|
||||
update.fulfill(promise, PagesNode::Leaf(page))?;
|
||||
}
|
||||
|
||||
Ok(Catalog {
|
||||
version: Some("1.7".into()),
|
||||
pages: tree,
|
||||
names: None,
|
||||
dests: None,
|
||||
metadata: None,
|
||||
outlines: None,
|
||||
struct_tree_root: None,
|
||||
forms: None,
|
||||
page_labels: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PdfBuilder<SC, OC, L> {
|
||||
pub storage: Storage<Vec<u8>, SC, OC, L>,
|
||||
pub info: Option<InfoDict>,
|
||||
pub id: Option<[String; 2]>,
|
||||
|
||||
}
|
||||
impl<SC, OC, L> PdfBuilder<SC, OC, L>
|
||||
where
|
||||
SC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
OC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
pub fn new(fileoptions: FileOptions<'_, SC, OC, L>) -> Self {
|
||||
let storage = fileoptions.storage();
|
||||
PdfBuilder {
|
||||
storage,
|
||||
info: None,
|
||||
id: None
|
||||
}
|
||||
}
|
||||
pub fn info(mut self, info: InfoDict) -> Self {
|
||||
self.info = Some(info);
|
||||
self
|
||||
}
|
||||
pub fn id(mut self, a: String, b: String) -> Self {
|
||||
self.id = Some([a, b]);
|
||||
self
|
||||
}
|
||||
pub fn build(mut self, catalog: CatalogBuilder) -> Result<Vec<u8>> {
|
||||
let catalog = catalog.build(&mut self.storage)?;
|
||||
let info = self.info.take();
|
||||
let mut trailer = Trailer {
|
||||
root: self.storage.create(catalog)?,
|
||||
encrypt_dict: None,
|
||||
size: 0,
|
||||
id: vec!["foo".into(), "bar".into()],
|
||||
info_dict: info.map(|info| self.storage.create(info)).transpose()?,
|
||||
prev_trailer_pos: None,
|
||||
other: Dictionary::new(),
|
||||
};
|
||||
self.storage.save(&mut trailer)?;
|
||||
Ok(self.storage.into_inner())
|
||||
}
|
||||
}
|
||||
pub struct Importer<'a, R, U> {
|
||||
resolver: R,
|
||||
map: HashMap<PlainRef, PlainRef>,
|
||||
updater: &'a mut U,
|
||||
rcrefs: HashMap<PlainRef, AnySync>,
|
||||
// ptr of old -> (old, new)
|
||||
shared: HashMap<usize, (AnySync, AnySync)>,
|
||||
}
|
||||
|
||||
pub struct ImporterMap<R> {
|
||||
resolver: R,
|
||||
map: HashMap<PlainRef, PlainRef>,
|
||||
}
|
||||
|
||||
impl<'a, R, U> Importer<'a, R, U> {
|
||||
pub fn new(resolver: R, updater: &'a mut U) -> Self {
|
||||
Importer {
|
||||
resolver,
|
||||
updater,
|
||||
map: Default::default(),
|
||||
rcrefs: Default::default(),
|
||||
shared: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, R: Resolve, U> Importer<'a, R, U> {
|
||||
pub fn finish(self) -> ImporterMap<R> {
|
||||
ImporterMap { resolver: self.resolver, map: self.map }
|
||||
}
|
||||
}
|
||||
impl<R: Resolve> ImporterMap<R> {
|
||||
fn compare_dict(&self, a_dict: &Dictionary, b_dict: &Dictionary, new_resolve: &impl Resolve) -> Result<bool> {
|
||||
let mut same = true;
|
||||
let mut b_unvisited: HashSet<_> = b_dict.keys().collect();
|
||||
for (a_key, a_val) in a_dict.iter() {
|
||||
if let Some(b_val) = b_dict.get(a_key) {
|
||||
if !self.compare_prim(a_val, b_val, new_resolve)? {
|
||||
println!("value for key {a_key} mismatch.");
|
||||
same = false;
|
||||
}
|
||||
b_unvisited.remove(a_key);
|
||||
} else {
|
||||
println!("missing key {a_key} in b.");
|
||||
same = false;
|
||||
}
|
||||
}
|
||||
for b_key in b_unvisited.iter() {
|
||||
println!("missing key {b_key} in a.");
|
||||
}
|
||||
Ok(same && !b_unvisited.is_empty())
|
||||
}
|
||||
fn compare_prim(&self, a: &Primitive, b: &Primitive, new_resolve: &impl Resolve) -> Result<bool> {
|
||||
match (a, b) {
|
||||
(Primitive::Array(a_parts), Primitive::Array(b_parts)) => {
|
||||
if a_parts.len() != b_parts.len() {
|
||||
dbg!(a_parts, b_parts);
|
||||
println!("different length {} vs. {}", a_parts.len(), b_parts.len());
|
||||
println!("a = {a_parts:?}");
|
||||
println!("b = {b_parts:?}");
|
||||
return Ok(false);
|
||||
}
|
||||
for (a, b) in a_parts.iter().zip(b_parts.iter()) {
|
||||
if !self.compare_prim(a, b, new_resolve)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
(Primitive::Dictionary(a_dict), Primitive::Dictionary(b_dict)) => {
|
||||
self.compare_dict(a_dict, b_dict, new_resolve)
|
||||
}
|
||||
(Primitive::Reference(r1), Primitive::Reference(r2)) => {
|
||||
match self.map.get(&r1) {
|
||||
Some(r) if r == r2 => Ok(true),
|
||||
_ => Ok(false)
|
||||
}
|
||||
}
|
||||
(Primitive::Stream(a_s), Primitive::Stream(b_s)) => {
|
||||
if !self.compare_dict(&a_s.info, &b_s.info, new_resolve)? {
|
||||
println!("stream dicts differ");
|
||||
return Ok(false)
|
||||
}
|
||||
let a_data = a_s.raw_data(&self.resolver)?;
|
||||
let b_data = b_s.raw_data(new_resolve)?;
|
||||
if a_data != b_data {
|
||||
println!("data differs.");
|
||||
return Ok(false)
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
(Primitive::Integer(a), Primitive::Number(b)) => Ok(*a as f32 == *b),
|
||||
(Primitive::Number(a), Primitive::Integer(b)) => Ok(*a == *b as f32),
|
||||
(Primitive::Reference(a_ref), b) => {
|
||||
let a = self.resolver.resolve(*a_ref)?;
|
||||
self.compare_prim(&a, b, new_resolve)
|
||||
}
|
||||
(a, Primitive::Reference(b_ref)) => {
|
||||
let b = new_resolve.resolve(*b_ref)?;
|
||||
self.compare_prim(a, &b, new_resolve)
|
||||
}
|
||||
(ref a, ref b) => {
|
||||
if a == b {
|
||||
Ok(true)
|
||||
} else {
|
||||
println!("{a:?} != {b:?}");
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn verify(&self, new_resolve: &impl Resolve) -> Result<bool> {
|
||||
let mut same = true;
|
||||
for (&old_ref, &new_ref) in self.map.iter() {
|
||||
let old = self.resolver.resolve(old_ref)?;
|
||||
let new = new_resolve.resolve(new_ref)?;
|
||||
|
||||
if !self.compare_prim(&old, &new, new_resolve)? {
|
||||
same = false;
|
||||
}
|
||||
}
|
||||
Ok(same)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, R: Resolve, U> Resolve for Importer<'a, R, U> {
|
||||
fn get<T: Object+datasize::DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
|
||||
self.resolver.get(r)
|
||||
}
|
||||
fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
|
||||
self.resolver.get_data_or_decode(id, range, filters)
|
||||
}
|
||||
fn options(&self) -> &ParseOptions {
|
||||
self.resolver.options()
|
||||
}
|
||||
fn resolve(&self, r: PlainRef) -> Result<Primitive> {
|
||||
self.resolver.resolve(r)
|
||||
}
|
||||
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> {
|
||||
self.resolver.resolve_flags(r, flags, depth)
|
||||
}
|
||||
fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
|
||||
self.resolver.stream_data(id, range)
|
||||
}
|
||||
}
|
||||
impl<'a, R, U: Updater> Updater for Importer<'a, R, U> {
|
||||
fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
|
||||
self.updater.create(obj)
|
||||
}
|
||||
fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
|
||||
self.updater.fulfill(promise, obj)
|
||||
}
|
||||
fn promise<T: Object>(&mut self) -> PromisedRef<T> {
|
||||
self.updater.promise()
|
||||
}
|
||||
fn update<T: ObjectWrite>(&mut self, old: PlainRef, obj: T) -> Result<RcRef<T>> {
|
||||
self.updater.update(old, obj)
|
||||
}
|
||||
}
|
||||
impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> {
|
||||
fn clone_ref<T: DeepClone + Object + DataSize + ObjectWrite>(&mut self, old: Ref<T>) -> Result<Ref<T>> {
|
||||
if let Some(&new_ref) = self.map.get(&old.get_inner()) {
|
||||
return Ok(Ref::new(new_ref));
|
||||
}
|
||||
let obj = self.resolver.get(old)?;
|
||||
let clone = obj.deep_clone(self)?;
|
||||
|
||||
let r = self.updater.create(clone)?;
|
||||
self.map.insert(old.get_inner(), r.get_ref().get_inner());
|
||||
|
||||
Ok(r.get_ref())
|
||||
}
|
||||
fn clone_plainref(&mut self, old: PlainRef) -> Result<PlainRef> {
|
||||
if let Some(&new_ref) = self.map.get(&old) {
|
||||
return Ok(new_ref);
|
||||
}
|
||||
let obj = self.resolver.resolve(old)?;
|
||||
let clone = obj.deep_clone(self)?;
|
||||
|
||||
let new = self.updater.create(clone)?
|
||||
.get_ref().get_inner();
|
||||
|
||||
self.map.insert(old, new);
|
||||
|
||||
Ok(new)
|
||||
}
|
||||
fn clone_rcref<T: DeepClone + ObjectWrite + DataSize>(&mut self, old: &RcRef<T>) -> Result<RcRef<T>> {
|
||||
let old_ref = old.get_ref().get_inner();
|
||||
if let Some(&new_ref) = self.map.get(&old_ref) {
|
||||
let arc = self.rcrefs.get(&new_ref).unwrap().clone().downcast()?;
|
||||
return Ok(RcRef::new(new_ref, arc));
|
||||
}
|
||||
|
||||
let new = old.data().deep_clone(self)?;
|
||||
let new = self.updater.create::<T>(new)?;
|
||||
self.rcrefs.insert(new.get_ref().get_inner(), AnySync::new(new.data().clone()));
|
||||
self.map.insert(old_ref, new.get_ref().get_inner());
|
||||
|
||||
Ok(new)
|
||||
}
|
||||
fn clone_shared<T: DeepClone>(&mut self, old: &Shared<T>) -> Result<Shared<T>> {
|
||||
let key = &**old as *const T as usize;
|
||||
if let Some((old, new)) = self.shared.get(&key) {
|
||||
return new.clone().downcast();
|
||||
}
|
||||
let new = Shared::new(old.as_ref().deep_clone(self)?);
|
||||
self.shared.insert(key, (AnySync::new_without_size(old.clone()), AnySync::new_without_size(new.clone())));
|
||||
Ok(new)
|
||||
}
|
||||
}
|
||||
1339
src-pdfrs/pdf/src/content.rs
Normal file
1339
src-pdfrs/pdf/src/content.rs
Normal file
File diff suppressed because it is too large
Load Diff
695
src-pdfrs/pdf/src/crypt.rs
Normal file
695
src-pdfrs/pdf/src/crypt.rs
Normal file
@ -0,0 +1,695 @@
|
||||
/// PDF "cryptography" – This is why you don't write your own crypto.
|
||||
|
||||
use crate as pdf;
|
||||
use aes::cipher::generic_array::{sequence::Split, GenericArray};
|
||||
use aes::cipher::{BlockDecryptMut, BlockEncryptMut, KeyIvInit};
|
||||
use aes::cipher::block_padding::{NoPadding, Pkcs7};
|
||||
use sha2::{Digest, Sha256, Sha384, Sha512};
|
||||
use std::fmt;
|
||||
use std::collections::HashMap;
|
||||
use datasize::DataSize;
|
||||
use crate::object::PlainRef;
|
||||
use crate::primitive::{Dictionary, PdfString, Name};
|
||||
use crate::error::{PdfError, Result};
|
||||
|
||||
type Aes128CbcEnc = cbc::Encryptor<aes::Aes128>;
|
||||
type Aes128CbcDec = cbc::Decryptor<aes::Aes128>;
|
||||
type Aes256CbcDec = cbc::Decryptor<aes::Aes256>;
|
||||
|
||||
const PADDING: [u8; 32] = [
|
||||
0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41,
|
||||
0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
|
||||
0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80,
|
||||
0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
|
||||
];
|
||||
|
||||
#[derive(Copy)]
|
||||
pub struct Rc4 {
|
||||
i: u8,
|
||||
j: u8,
|
||||
state: [u8; 256]
|
||||
}
|
||||
|
||||
impl Clone for Rc4 { fn clone(&self) -> Rc4 { *self } }
|
||||
|
||||
impl Rc4 {
|
||||
pub fn new(key: &[u8]) -> Rc4 {
|
||||
assert!(!key.is_empty() && key.len() <= 256);
|
||||
let mut rc4 = Rc4 { i: 0, j: 0, state: [0; 256] };
|
||||
for (i, x) in rc4.state.iter_mut().enumerate() {
|
||||
*x = i as u8;
|
||||
}
|
||||
let mut j: u8 = 0;
|
||||
for i in 0..256 {
|
||||
j = j.wrapping_add(rc4.state[i]).wrapping_add(key[i % key.len()]);
|
||||
rc4.state.swap(i, j as usize);
|
||||
}
|
||||
rc4
|
||||
}
|
||||
fn next(&mut self) -> u8 {
|
||||
self.i = self.i.wrapping_add(1);
|
||||
self.j = self.j.wrapping_add(self.state[self.i as usize]);
|
||||
self.state.swap(self.i as usize, self.j as usize);
|
||||
self.state[(self.state[self.i as usize].wrapping_add(self.state[self.j as usize])) as usize]
|
||||
}
|
||||
pub fn encrypt(key: &[u8], data: &mut [u8]) {
|
||||
let mut rc4 = Rc4::new(key);
|
||||
for b in data.iter_mut() {
|
||||
*b ^= rc4.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 7.6.1 Table 20 + 7.6.3.2 Table 21
|
||||
#[derive(Object, Debug, Clone, DataSize)]
|
||||
pub struct CryptDict {
|
||||
#[pdf(key="O")]
|
||||
o: PdfString,
|
||||
|
||||
#[pdf(key="U")]
|
||||
u: PdfString,
|
||||
|
||||
#[pdf(key="R")]
|
||||
r: u32,
|
||||
|
||||
#[pdf(key="P")]
|
||||
p: i32,
|
||||
|
||||
#[pdf(key="V")]
|
||||
v: i32,
|
||||
|
||||
#[pdf(key="Length", default="40")]
|
||||
bits: u32,
|
||||
|
||||
#[pdf(key="CF")]
|
||||
crypt_filters: HashMap<Name, CryptFilter>,
|
||||
|
||||
#[pdf(key="StmF")]
|
||||
default_crypt_filter: Option<Name>,
|
||||
|
||||
#[pdf(key="EncryptMetadata", default="true")]
|
||||
encrypt_metadata: bool,
|
||||
|
||||
#[pdf(key = "OE")]
|
||||
oe: Option<PdfString>,
|
||||
|
||||
#[pdf(key = "UE")]
|
||||
ue: Option<PdfString>,
|
||||
|
||||
#[pdf(other)]
|
||||
_other: Dictionary
|
||||
}
|
||||
|
||||
#[derive(Object, Debug, Clone, Copy, DataSize)]
|
||||
pub enum CryptMethod {
|
||||
None,
|
||||
V2,
|
||||
AESV2,
|
||||
AESV3,
|
||||
}
|
||||
|
||||
#[derive(Object, Debug, Clone, Copy, DataSize)]
|
||||
pub enum AuthEvent {
|
||||
DocOpen,
|
||||
EFOpen
|
||||
}
|
||||
|
||||
#[derive(Object, Debug, Clone, DataSize)]
|
||||
#[pdf(Type="CryptFilter?")]
|
||||
pub struct CryptFilter {
|
||||
#[pdf(key="CFM", default="CryptMethod::None")]
|
||||
pub method: CryptMethod,
|
||||
|
||||
#[pdf(key="AuthEvent", default="AuthEvent::DocOpen")]
|
||||
pub auth_event: AuthEvent,
|
||||
|
||||
#[pdf(key="Length")]
|
||||
pub length: Option<u32>,
|
||||
|
||||
#[pdf(other)]
|
||||
_other: Dictionary
|
||||
}
|
||||
|
||||
pub struct Decoder {
|
||||
key_size: usize,
|
||||
key: Vec<u8>, // maximum length
|
||||
method: CryptMethod,
|
||||
/// A reference to the /Encrypt dictionary, if it is in an indirect
|
||||
/// object. The strings in this dictionary are not encrypted, so
|
||||
/// decryption must be skipped when accessing them.
|
||||
pub(crate) encrypt_indirect_object: Option<PlainRef>,
|
||||
/// A reference to the /Metadata dictionary, if it is an indirect
|
||||
/// object. If /EncryptMedata is set to false in the /Encrypt dictionary,
|
||||
/// then the strings in the /Metadata dictionary are not encrypted, so
|
||||
/// decryption must be skipped when accessing them.
|
||||
pub(crate) metadata_indirect_object: Option<PlainRef>,
|
||||
/// Whether the metadata is encrypted, as indicated by /EncryptMetadata
|
||||
/// in the /Encrypt dictionary.
|
||||
encrypt_metadata: bool,
|
||||
}
|
||||
impl Decoder {
|
||||
pub fn default(dict: &CryptDict, id: &[u8]) -> Result<Decoder> {
|
||||
Decoder::from_password(dict, id, b"")
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
&self.key[.. std::cmp::min(self.key_size, 16)]
|
||||
}
|
||||
|
||||
pub fn new(key: Vec<u8>, key_size: usize, method: CryptMethod, encrypt_metadata: bool) -> Decoder {
|
||||
Decoder {
|
||||
key_size,
|
||||
key,
|
||||
method,
|
||||
encrypt_indirect_object: None,
|
||||
metadata_indirect_object: None,
|
||||
encrypt_metadata,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_password(dict: &CryptDict, id: &[u8], pass: &[u8]) -> Result<Decoder> {
|
||||
fn compute_u_rev_2(key: &[u8]) -> Vec<u8> {
|
||||
// algorithm 4
|
||||
let mut data = PADDING.to_vec();
|
||||
Rc4::encrypt(key, &mut data);
|
||||
data
|
||||
}
|
||||
|
||||
fn check_password_rev_2(document_u: &[u8], key: &[u8]) -> bool {
|
||||
compute_u_rev_2(key) == document_u
|
||||
}
|
||||
|
||||
fn compute_u_rev_3_4(id: &[u8], key: &[u8]) -> [u8; 16] {
|
||||
// algorithm 5
|
||||
// a) we derived the key already.
|
||||
|
||||
// b)
|
||||
let mut hash = md5::Context::new();
|
||||
hash.consume(PADDING);
|
||||
|
||||
// c)
|
||||
hash.consume(id);
|
||||
|
||||
// d)
|
||||
let mut data = *hash.compute();
|
||||
Rc4::encrypt(key, &mut data);
|
||||
|
||||
// e)
|
||||
for i in 1u8..=19 {
|
||||
let mut key = key.to_owned();
|
||||
for b in &mut key {
|
||||
*b ^= i;
|
||||
}
|
||||
Rc4::encrypt(&key, &mut data);
|
||||
}
|
||||
|
||||
// f)
|
||||
data
|
||||
}
|
||||
|
||||
fn check_password_rev_3_4(document_u: &[u8], id: &[u8], key: &[u8]) -> bool {
|
||||
document_u.starts_with(&compute_u_rev_3_4(id, key))
|
||||
}
|
||||
|
||||
fn check_password_rc4(revision: u32, document_u: &[u8], id: &[u8], key: &[u8]) -> bool {
|
||||
if revision == 2 {
|
||||
check_password_rev_2(document_u, key)
|
||||
} else {
|
||||
check_password_rev_3_4(document_u, id, key)
|
||||
}
|
||||
}
|
||||
|
||||
fn key_derivation_user_password_rc4(
|
||||
revision: u32,
|
||||
key_size: usize,
|
||||
dict: &CryptDict,
|
||||
id: &[u8],
|
||||
pass: &[u8],
|
||||
) -> Vec<u8> {
|
||||
let o = dict.o.as_bytes();
|
||||
let p = dict.p;
|
||||
// 7.6.3.3 - Algorithm 2
|
||||
// a) and b)
|
||||
let mut hash = md5::Context::new();
|
||||
if pass.len() < 32 {
|
||||
hash.consume(pass);
|
||||
hash.consume(&PADDING[..32 - pass.len()]);
|
||||
} else {
|
||||
hash.consume(&pass[..32]);
|
||||
}
|
||||
|
||||
// c)
|
||||
hash.consume(o);
|
||||
|
||||
// d)
|
||||
hash.consume(p.to_le_bytes());
|
||||
|
||||
// e)
|
||||
hash.consume(id);
|
||||
|
||||
// f)
|
||||
if revision >= 4 && !dict.encrypt_metadata {
|
||||
hash.consume([0xff, 0xff, 0xff, 0xff]);
|
||||
}
|
||||
|
||||
// g)
|
||||
let mut data = *hash.compute();
|
||||
|
||||
// h)
|
||||
if revision >= 3 {
|
||||
for _ in 0..50 {
|
||||
data = *md5::compute(&data[..std::cmp::min(key_size, 16)]);
|
||||
}
|
||||
}
|
||||
|
||||
let mut key = vec![0u8; key_size.max(16)];
|
||||
key[..16].copy_from_slice(&data);
|
||||
key
|
||||
}
|
||||
|
||||
fn key_derivation_owner_password_rc4(
|
||||
revision: u32,
|
||||
key_size: usize,
|
||||
pass: &[u8],
|
||||
) -> Result<Vec<u8>> {
|
||||
if key_size > 16 {
|
||||
bail!("key size > 16");
|
||||
}
|
||||
|
||||
let mut hash = md5::Context::new();
|
||||
if pass.len() < 32 {
|
||||
hash.consume(pass);
|
||||
hash.consume(&PADDING[..32 - pass.len()]);
|
||||
} else {
|
||||
hash.consume(&pass[..32]);
|
||||
}
|
||||
|
||||
if revision >= 3 {
|
||||
for _ in 0..50 {
|
||||
let digest = *std::mem::replace(&mut hash, md5::Context::new()).compute();
|
||||
hash.consume(digest);
|
||||
}
|
||||
}
|
||||
|
||||
let digest = &hash.compute()[..key_size];
|
||||
Ok(digest.to_vec())
|
||||
}
|
||||
|
||||
let (key_bits, method) = match dict.v {
|
||||
1 => (40, CryptMethod::V2),
|
||||
2 => {
|
||||
if dict.bits % 8 != 0 {
|
||||
err!(other!("invalid key length {}", dict.bits))
|
||||
} else {
|
||||
(dict.bits, CryptMethod::V2)
|
||||
}
|
||||
},
|
||||
4 ..= 6 => {
|
||||
let default = dict
|
||||
.crypt_filters
|
||||
.get(try_opt!(dict.default_crypt_filter.as_ref()).as_str())
|
||||
.ok_or_else(|| other!("missing crypt filter entry {:?}", dict.default_crypt_filter.as_ref()))?;
|
||||
|
||||
match default.method {
|
||||
CryptMethod::V2 | CryptMethod::AESV2 => (
|
||||
default.length.map(|n| 8 * n).unwrap_or(dict.bits),
|
||||
default.method,
|
||||
),
|
||||
CryptMethod::AESV3 if dict.v == 5 => (
|
||||
default.length.map(|n| 8 * n).unwrap_or(dict.bits),
|
||||
default.method,
|
||||
),
|
||||
m => err!(other!("unimplemented crypt method {:?}", m)),
|
||||
}
|
||||
}
|
||||
v => err!(other!("unsupported V value {}", v)),
|
||||
};
|
||||
let level = dict.r;
|
||||
if !(2..=6).contains(&level) {
|
||||
err!(other!("unsupported standard security handler revision {}", level))
|
||||
};
|
||||
if level <= 4 {
|
||||
let key_size = key_bits as usize / 8;
|
||||
let key = key_derivation_user_password_rc4(level, key_size, dict, id, pass);
|
||||
|
||||
if check_password_rc4(level, dict.u.as_bytes(), id, &key[..std::cmp::min(key_size, 16)]) {
|
||||
let decoder = Decoder::new(key, key_size, method, dict.encrypt_metadata);
|
||||
Ok(decoder)
|
||||
} else {
|
||||
let password_wrap_key = key_derivation_owner_password_rc4(level, key_size, pass)?;
|
||||
let mut data = dict.o.as_bytes().to_vec();
|
||||
let rounds = if level == 2 { 1u8 } else { 20u8 };
|
||||
for round in 0..rounds {
|
||||
let mut round_key = password_wrap_key.clone();
|
||||
for byte in round_key.iter_mut() {
|
||||
*byte ^= round;
|
||||
}
|
||||
Rc4::encrypt(&round_key, &mut data);
|
||||
}
|
||||
let unwrapped_user_password = data;
|
||||
|
||||
let key = key_derivation_user_password_rc4(
|
||||
level,
|
||||
key_size,
|
||||
dict,
|
||||
id,
|
||||
&unwrapped_user_password,
|
||||
);
|
||||
|
||||
if check_password_rc4(level, dict.u.as_bytes(), id, &key[..key_size]) {
|
||||
let decoder = Decoder::new(key, key_size, method, dict.encrypt_metadata);
|
||||
Ok(decoder)
|
||||
} else {
|
||||
Err(PdfError::InvalidPassword)
|
||||
}
|
||||
}
|
||||
} else if level == 5 || level == 6 {
|
||||
let u = dict.u.as_bytes();
|
||||
if u.len() != 48 {
|
||||
err!(format!(
|
||||
"U in Encrypt dictionary should have a length of 48 bytes, not {}",
|
||||
u.len(),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let user_hash = &u[0..32];
|
||||
let user_validation_salt = &u[32..40];
|
||||
let user_key_salt = &u[40..48];
|
||||
|
||||
let o = dict.o.as_bytes();
|
||||
if o.len() != 48 {
|
||||
err!(format!(
|
||||
"O in Encrypt dictionary should have a length of 48 bytes, not {}",
|
||||
o.len(),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let owner_hash = &o[0..32];
|
||||
let owner_validation_salt = &o[32..40];
|
||||
let owner_key_salt = &o[40..48];
|
||||
|
||||
let password_unicode =
|
||||
t!(String::from_utf8(pass.to_vec()).map_err(|_| PdfError::InvalidPassword));
|
||||
let password_prepped =
|
||||
t!(stringprep::saslprep(&password_unicode).map_err(|_| PdfError::InvalidPassword));
|
||||
let mut password_encoded = password_prepped.as_bytes();
|
||||
|
||||
if password_encoded.len() > 127 {
|
||||
password_encoded = &password_encoded[..127];
|
||||
}
|
||||
|
||||
let ue = t!(dict.ue.as_ref().ok_or_else(|| PdfError::MissingEntry {
|
||||
typ: "Encrypt",
|
||||
field: "UE".into(),
|
||||
}))
|
||||
.as_bytes()
|
||||
.to_vec();
|
||||
let oe = t!(dict.oe.as_ref().ok_or_else(|| PdfError::MissingEntry {
|
||||
typ: "Encrypt",
|
||||
field: "OE".into(),
|
||||
}))
|
||||
.as_bytes()
|
||||
.to_vec();
|
||||
|
||||
let (intermediate_key, mut wrapped_key) = if level == 6 {
|
||||
let user_hash_computed =
|
||||
Self::revision_6_kdf(password_encoded, user_validation_salt, b"");
|
||||
if user_hash_computed == user_hash {
|
||||
(
|
||||
Self::revision_6_kdf(password_encoded, user_key_salt, b"").into(),
|
||||
ue,
|
||||
)
|
||||
} else {
|
||||
let owner_hash_computed =
|
||||
Self::revision_6_kdf(password_encoded, owner_validation_salt, u);
|
||||
if owner_hash_computed == owner_hash {
|
||||
(
|
||||
Self::revision_6_kdf(password_encoded, owner_key_salt, u).into(),
|
||||
oe,
|
||||
)
|
||||
} else {
|
||||
err!(PdfError::InvalidPassword);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// level == 5
|
||||
|
||||
let mut user_check_hash = Sha256::new();
|
||||
user_check_hash.update(password_encoded);
|
||||
user_check_hash.update(user_validation_salt);
|
||||
let user_hash_computed = user_check_hash.finalize();
|
||||
#[allow(clippy::branches_sharing_code)]
|
||||
if user_hash_computed.as_slice() == user_hash {
|
||||
let mut intermediate_kdf_hash = Sha256::new();
|
||||
intermediate_kdf_hash.update(password_encoded);
|
||||
intermediate_kdf_hash.update(user_key_salt);
|
||||
(intermediate_kdf_hash.finalize(), ue)
|
||||
} else {
|
||||
let mut owner_check_hash = Sha256::new();
|
||||
owner_check_hash.update(password_encoded);
|
||||
owner_check_hash.update(owner_validation_salt);
|
||||
owner_check_hash.update(u);
|
||||
let owner_hash_computed = owner_check_hash.finalize();
|
||||
if owner_hash_computed.as_slice() == owner_hash {
|
||||
let mut intermediate_kdf_hash = Sha256::new();
|
||||
intermediate_kdf_hash.update(password_encoded);
|
||||
intermediate_kdf_hash.update(owner_key_salt);
|
||||
intermediate_kdf_hash.update(u);
|
||||
(intermediate_kdf_hash.finalize(), oe)
|
||||
} else {
|
||||
err!(PdfError::InvalidPassword);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let zero_iv = GenericArray::from_slice(&[0u8; 16]);
|
||||
let key_slice = t!(Aes256CbcDec::new(&intermediate_key, zero_iv)
|
||||
.decrypt_padded_mut::<NoPadding>(&mut wrapped_key)
|
||||
.map_err(|_| PdfError::InvalidPassword));
|
||||
|
||||
let decoder = Decoder::new(key_slice.into(), 32, method, dict.encrypt_metadata);
|
||||
Ok(decoder)
|
||||
} else {
|
||||
err!(format!("unsupported V value {}", level).into())
|
||||
}
|
||||
}
|
||||
|
||||
fn revision_6_kdf(password: &[u8], salt: &[u8], u: &[u8]) -> [u8; 32] {
|
||||
let mut data = [0u8; (128 + 64 + 48) * 64];
|
||||
let mut data_total_len = 0;
|
||||
|
||||
let mut sha256 = Sha256::new();
|
||||
let mut sha384 = Sha384::new();
|
||||
let mut sha512 = Sha512::new();
|
||||
|
||||
let mut input_sha256 = Sha256::new();
|
||||
input_sha256.update(password);
|
||||
input_sha256.update(salt);
|
||||
input_sha256.update(u);
|
||||
let input = input_sha256.finalize();
|
||||
let (mut key, mut iv) = input.split();
|
||||
|
||||
let mut block = [0u8; 64];
|
||||
let mut block_size = 32;
|
||||
(block[..block_size]).copy_from_slice(&input[..block_size]);
|
||||
|
||||
let mut i = 0;
|
||||
while i < 64 || i < data[data_total_len - 1] as usize + 32 {
|
||||
let aes = Aes128CbcEnc::new(&key, &iv);
|
||||
let data_repeat_len = password.len() + block_size + u.len();
|
||||
data[..password.len()].copy_from_slice(password);
|
||||
data[password.len()..password.len() + block_size].copy_from_slice(&block[..block_size]);
|
||||
data[password.len() + block_size..data_repeat_len].copy_from_slice(u);
|
||||
for j in 1..64 {
|
||||
data.copy_within(..data_repeat_len, j * data_repeat_len);
|
||||
}
|
||||
data_total_len = data_repeat_len * 64;
|
||||
|
||||
// The plaintext length will always be a multiple of the block size, unwrap is okay
|
||||
let encrypted = aes
|
||||
.encrypt_padded_mut::<NoPadding>(&mut data[..data_total_len], data_total_len)
|
||||
.unwrap();
|
||||
|
||||
let sum: usize = encrypted[..16].iter().map(|byte| *byte as usize).sum();
|
||||
block_size = sum % 3 * 16 + 32;
|
||||
match block_size {
|
||||
32 => {
|
||||
sha256.update(encrypted);
|
||||
(block[..block_size]).copy_from_slice(&sha256.finalize_reset());
|
||||
}
|
||||
48 => {
|
||||
sha384.update(encrypted);
|
||||
(block[..block_size]).copy_from_slice(&sha384.finalize_reset());
|
||||
}
|
||||
64 => {
|
||||
sha512.update(encrypted);
|
||||
(block[..block_size]).copy_from_slice(&sha512.finalize_reset());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
key.copy_from_slice(&block[..16]);
|
||||
iv.copy_from_slice(&block[16..32]);
|
||||
|
||||
i += 1;
|
||||
}
|
||||
let mut hash = [0u8; 32];
|
||||
hash.copy_from_slice(&block[..32]);
|
||||
hash
|
||||
}
|
||||
|
||||
pub fn decrypt<'buf>(&self, id: PlainRef, data: &'buf mut [u8]) -> Result<&'buf [u8]> {
|
||||
if self.encrypt_indirect_object == Some(id) {
|
||||
// Strings inside the /Encrypt dictionary are not encrypted
|
||||
return Ok(data);
|
||||
}
|
||||
|
||||
if !self.encrypt_metadata && self.metadata_indirect_object == Some(id) {
|
||||
// Strings inside the /Metadata dictionary are not encrypted when /EncryptMetadata is
|
||||
// false
|
||||
return Ok(data);
|
||||
}
|
||||
|
||||
if data.is_empty() {
|
||||
return Ok(data);
|
||||
}
|
||||
|
||||
// Algorithm 1
|
||||
// a) we have those already
|
||||
|
||||
match self.method {
|
||||
CryptMethod::None => unreachable!(),
|
||||
CryptMethod::V2 => {
|
||||
// b)
|
||||
let mut key = [0; 16 + 5];
|
||||
let n = self.key().len();
|
||||
key[..n].copy_from_slice(self.key());
|
||||
key[n..n + 3].copy_from_slice(&id.id.to_le_bytes()[..3]);
|
||||
key[n + 3..n + 5].copy_from_slice(&id.gen.to_le_bytes()[..2]);
|
||||
|
||||
// c)
|
||||
let key = *md5::compute(&key[..n + 5]);
|
||||
|
||||
// d)
|
||||
Rc4::encrypt(&key[..(n + 5).min(16)], data);
|
||||
Ok(data)
|
||||
}
|
||||
CryptMethod::AESV2 => {
|
||||
// b)
|
||||
let mut key = [0; 32 + 5 + 4];
|
||||
let n = std::cmp::min(self.key_size, 16);
|
||||
key[..n].copy_from_slice(self.key());
|
||||
key[n..n + 3].copy_from_slice(&id.id.to_le_bytes()[..3]);
|
||||
key[n + 3..n + 5].copy_from_slice(&id.gen.to_le_bytes()[..2]);
|
||||
key[n + 5..n + 9].copy_from_slice(b"sAlT");
|
||||
|
||||
// c)
|
||||
let key = *md5::compute(&key[..n + 9]);
|
||||
|
||||
// d)
|
||||
let key = &key[..(n + 5).min(16)];
|
||||
if data.len() < 16 {
|
||||
return Err(PdfError::DecryptionFailure);
|
||||
}
|
||||
let (iv, ciphertext) = data.split_at_mut(16);
|
||||
let cipher =
|
||||
t!(Aes128CbcDec::new_from_slices(key, iv).map_err(|_| PdfError::DecryptionFailure));
|
||||
Ok(t!(cipher
|
||||
.decrypt_padded_mut::<Pkcs7>(ciphertext)
|
||||
.map_err(|_| PdfError::DecryptionFailure)))
|
||||
}
|
||||
CryptMethod::AESV3 => {
|
||||
if data.len() < 16 {
|
||||
return Err(PdfError::DecryptionFailure);
|
||||
}
|
||||
let (iv, ciphertext) = data.split_at_mut(16);
|
||||
let cipher =
|
||||
t!(Aes256CbcDec::new_from_slices(self.key(), iv).map_err(|_| PdfError::DecryptionFailure));
|
||||
Ok(t!(cipher
|
||||
.decrypt_padded_mut::<Pkcs7>(ciphertext)
|
||||
.map_err(|_| PdfError::DecryptionFailure)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl fmt::Debug for Decoder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("Decoder")
|
||||
.field("key", &self.key())
|
||||
.field("method", &self.method)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn unencrypted_strings() {
|
||||
let data_prefix = b"%PDF-1.5\n\
|
||||
1 0 obj\n\
|
||||
<< /Type /Catalog /Pages 2 0 R >>\n\
|
||||
endobj\n\
|
||||
2 0 obj\n\
|
||||
<< /Type /Pages /Kids [3 0 R] /Count 1 >>\n\
|
||||
endobj\n\
|
||||
3 0 obj\n\
|
||||
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>\n\
|
||||
endobj\n\
|
||||
4 0 obj\n\
|
||||
<< /Length 0 >>\n\
|
||||
stream\n\
|
||||
endstream\n\
|
||||
endobj\n\
|
||||
5 0 obj\n\
|
||||
<<\n\
|
||||
/V 4\n\
|
||||
/CF <<\n\
|
||||
/StdCF << /Type /CryptFilter /CFM /V2 >>\n\
|
||||
>>\n\
|
||||
/StmF /StdCF\n\
|
||||
/StrF /StdCF\n\
|
||||
/R 4\n\
|
||||
/O (owner pwd hash!!)\n\
|
||||
/U <E721D9D63EC4E7BD4DA6C9F0E30C8290>\n\
|
||||
/P -4\n\
|
||||
>>\n\
|
||||
endobj\n\
|
||||
xref\n\
|
||||
1 5\n";
|
||||
let mut data = data_prefix.to_vec();
|
||||
for obj_nr in 1..=5 {
|
||||
let needle = format!("\n{} 0 obj\n", obj_nr).into_bytes();
|
||||
let offset = data_prefix
|
||||
.windows(needle.len())
|
||||
.position(|w| w == needle)
|
||||
.unwrap()
|
||||
+ 1;
|
||||
let mut line = format!("{:010} {:05} n\r\n", offset, 0).into_bytes();
|
||||
assert_eq!(line.len(), 20);
|
||||
data.append(&mut line);
|
||||
}
|
||||
let trailer_snippet = b"trailer\n\
|
||||
<<\n\
|
||||
/Size 6\n\
|
||||
/Root 1 0 R\n\
|
||||
/Encrypt 5 0 R\n\
|
||||
/ID [<DEADBEEF> <DEADBEEF>]\n\
|
||||
>>\n\
|
||||
startxref\n";
|
||||
data.extend_from_slice(trailer_snippet);
|
||||
let xref_offset = data_prefix
|
||||
.windows("xref".len())
|
||||
.rposition(|w| w == b"xref")
|
||||
.unwrap();
|
||||
data.append(&mut format!("{}\n%%EOF", xref_offset).into_bytes());
|
||||
|
||||
let file = crate::file::FileOptions::uncached().load(data).unwrap();
|
||||
|
||||
// PDF reference says strings in the encryption dictionary are "not
|
||||
// encrypted by the usual methods."
|
||||
assert_eq!(
|
||||
file.trailer.encrypt_dict.unwrap().o.as_ref(),
|
||||
b"owner pwd hash!!",
|
||||
);
|
||||
}
|
||||
}
|
||||
12
src-pdfrs/pdf/src/data/t01_lzw+base85.txt
Normal file
12
src-pdfrs/pdf/src/data/t01_lzw+base85.txt
Normal file
@ -0,0 +1,12 @@
|
||||
J..)6T`?p&<!J9%_[umg"B7/Z7KNXbN'S+,*Q/&"OLT'F
|
||||
LIDK#!n`$"<Atdi`\Vn%b%)&'cA*VnK\CJY(sF>c!Jnl@
|
||||
RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d
|
||||
&/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb
|
||||
Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4
|
||||
ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1
|
||||
'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sD
|
||||
S]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+
|
||||
":aAa'S`ViJglLb8<W9k6Yl\\0McJQkDeLWdPN?9A'jX*
|
||||
al>iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL,
|
||||
JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj8
|
||||
8l8Q:_CZ(Gm1%X\N1&u!FKHMB~>
|
||||
0
src-pdfrs/pdf/src/data/t01_plain.txt
Normal file
0
src-pdfrs/pdf/src/data/t01_plain.txt
Normal file
656
src-pdfrs/pdf/src/enc.rs
Normal file
656
src-pdfrs/pdf/src/enc.rs
Normal file
@ -0,0 +1,656 @@
|
||||
#![allow(clippy::many_single_char_names)]
|
||||
#![allow(dead_code)] // TODO
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate as pdf;
|
||||
use crate::error::*;
|
||||
use crate::object::{Object, Resolve, Stream};
|
||||
use crate::primitive::{Primitive, Dictionary};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::io::{Read, Write};
|
||||
use once_cell::sync::OnceCell;
|
||||
use datasize::DataSize;
|
||||
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct LZWFlateParams {
|
||||
#[pdf(key="Predictor", default="1")]
|
||||
pub predictor: i32,
|
||||
#[pdf(key="Colors", default="1")]
|
||||
pub n_components: i32,
|
||||
#[pdf(key="BitsPerComponent", default="8")]
|
||||
pub bits_per_component: i32,
|
||||
#[pdf(key="Columns", default="1")]
|
||||
pub columns: i32,
|
||||
#[pdf(key="EarlyChange", default="1")]
|
||||
pub early_change: i32,
|
||||
}
|
||||
impl Default for LZWFlateParams {
|
||||
fn default() -> LZWFlateParams {
|
||||
LZWFlateParams {
|
||||
predictor: 1,
|
||||
n_components: 1,
|
||||
bits_per_component: 8,
|
||||
columns: 1,
|
||||
early_change: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct DCTDecodeParams {
|
||||
// TODO The default value of ColorTransform is 1 if the image has three components and 0 otherwise.
|
||||
// 0: No transformation.
|
||||
// 1: If the image has three color components, transform RGB values to YUV before encoding and from YUV to RGB after decoding.
|
||||
// If the image has four components, transform CMYK values to YUVK before encoding and from YUVK to CMYK after decoding.
|
||||
// This option is ignored if the image has one or two color components.
|
||||
#[pdf(key="ColorTransform")]
|
||||
pub color_transform: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct CCITTFaxDecodeParams {
|
||||
#[pdf(key="K", default="0")]
|
||||
pub k: i32,
|
||||
|
||||
#[pdf(key="EndOfLine", default="false")]
|
||||
pub end_of_line: bool,
|
||||
|
||||
#[pdf(key="EncodedByteAlign", default="false")]
|
||||
pub encoded_byte_align: bool,
|
||||
|
||||
#[pdf(key="Columns", default="1728")]
|
||||
pub columns: u32,
|
||||
|
||||
#[pdf(key="Rows", default="0")]
|
||||
pub rows: u32,
|
||||
|
||||
#[pdf(key="EndOfBlock", default="true")]
|
||||
pub end_of_block: bool,
|
||||
|
||||
#[pdf(key="BlackIs1", default="false")]
|
||||
pub black_is_1: bool,
|
||||
|
||||
#[pdf(key="DamagedRowsBeforeError", default="0")]
|
||||
pub damaged_rows_before_error: u32,
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct JBIG2DecodeParams {
|
||||
#[pdf(key="JBIG2Globals")]
|
||||
pub globals: Option<Stream<()>>
|
||||
}
|
||||
#[derive(Debug, Clone, DataSize, DeepClone)]
|
||||
pub enum StreamFilter {
|
||||
ASCIIHexDecode,
|
||||
ASCII85Decode,
|
||||
LZWDecode (LZWFlateParams),
|
||||
FlateDecode (LZWFlateParams),
|
||||
JPXDecode, //Jpeg2k
|
||||
DCTDecode (DCTDecodeParams),
|
||||
CCITTFaxDecode (CCITTFaxDecodeParams),
|
||||
JBIG2Decode(JBIG2DecodeParams),
|
||||
Crypt,
|
||||
RunLengthDecode
|
||||
}
|
||||
impl StreamFilter {
|
||||
pub fn from_kind_and_params(kind: &str, params: Dictionary, r: &impl Resolve) -> Result<StreamFilter> {
|
||||
let params = Primitive::Dictionary (params);
|
||||
Ok(
|
||||
match kind {
|
||||
"ASCIIHexDecode" => StreamFilter::ASCIIHexDecode,
|
||||
"ASCII85Decode" => StreamFilter::ASCII85Decode,
|
||||
"LZWDecode" => StreamFilter::LZWDecode (LZWFlateParams::from_primitive(params, r)?),
|
||||
"FlateDecode" => StreamFilter::FlateDecode (LZWFlateParams::from_primitive(params, r)?),
|
||||
"JPXDecode" => StreamFilter::JPXDecode,
|
||||
"DCTDecode" => StreamFilter::DCTDecode (DCTDecodeParams::from_primitive(params, r)?),
|
||||
"CCITTFaxDecode" => StreamFilter::CCITTFaxDecode (CCITTFaxDecodeParams::from_primitive(params, r)?),
|
||||
"JBIG2Decode" => StreamFilter::JBIG2Decode(JBIG2DecodeParams::from_primitive(params, r)?),
|
||||
"Crypt" => StreamFilter::Crypt,
|
||||
"RunLengthDecode" => StreamFilter::RunLengthDecode,
|
||||
ty => bail!("Unrecognized filter type {:?}", ty),
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn decode_nibble(c: u8) -> Option<u8> {
|
||||
match c {
|
||||
n @ b'0' ..= b'9' => Some(n - b'0'),
|
||||
a @ b'a' ..= b'h' => Some(a - b'a' + 0xa),
|
||||
a @ b'A' ..= b'H' => Some(a - b'A' + 0xA),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn encode_nibble(c: u8) -> u8 {
|
||||
match c {
|
||||
0 ..= 9 => b'0'+ c,
|
||||
10 ..= 15 => b'a' - 10 + c,
|
||||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn decode_hex(data: &[u8]) -> Result<Vec<u8>> {
|
||||
let mut out = Vec::with_capacity(data.len() / 2);
|
||||
let pairs = data.iter().cloned()
|
||||
.take_while(|&b| b != b'>')
|
||||
.filter(|&b| !matches!(b, 0 | 9 | 10 | 12 | 13 | 32))
|
||||
.tuples();
|
||||
for (i, (high, low)) in pairs.enumerate() {
|
||||
if let (Some(low), Some(high)) = (decode_nibble(low), decode_nibble(high)) {
|
||||
out.push(high << 4 | low);
|
||||
} else {
|
||||
return Err(PdfError::HexDecode {pos: i * 2, bytes: [high, low]})
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
pub fn encode_hex(data: &[u8]) -> Vec<u8> {
|
||||
let mut buf = Vec::with_capacity(data.len() * 2);
|
||||
for &b in data {
|
||||
buf.push(encode_nibble(b >> 4));
|
||||
buf.push(encode_nibble(b & 0xf));
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn sym_85(byte: u8) -> Option<u8> {
|
||||
match byte {
|
||||
b @ 0x21 ..= 0x75 => Some(b - 0x21),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
fn word_85([a, b, c, d, e]: [u8; 5]) -> Option<[u8; 4]> {
|
||||
fn s(b: u8) -> Option<u64> { sym_85(b).map(|n| n as u64) }
|
||||
let (a, b, c, d, e) = (s(a)?, s(b)?, s(c)?, s(d)?, s(e)?);
|
||||
let q = (((a * 85 + b) * 85 + c) * 85 + d) * 85 + e;
|
||||
// 85^5 > 256^4, the result might not fit in an u32.
|
||||
let r = u32::try_from(q).ok()?;
|
||||
Some(r.to_be_bytes())
|
||||
}
|
||||
|
||||
pub fn decode_85(data: &[u8]) -> Result<Vec<u8>> {
|
||||
let mut out = Vec::with_capacity((data.len() + 4) / 5 * 4);
|
||||
|
||||
let mut stream = data.iter().cloned()
|
||||
.filter(|&b| !matches!(b, b' ' | b'\n' | b'\r' | b'\t'));
|
||||
|
||||
let mut symbols = stream.by_ref()
|
||||
.take_while(|&b| b != b'~');
|
||||
|
||||
let (tail_len, tail) = loop {
|
||||
match symbols.next() {
|
||||
Some(b'z') => out.extend_from_slice(&[0; 4]),
|
||||
Some(a) => {
|
||||
let (b, c, d, e) = match (symbols.next(), symbols.next(), symbols.next(), symbols.next()) {
|
||||
(Some(b), Some(c), Some(d), Some(e)) => (b, c, d, e),
|
||||
(None, _, _, _) => break (1, [a, b'u', b'u', b'u', b'u']),
|
||||
(Some(b), None, _, _) => break (2, [a, b, b'u', b'u', b'u']),
|
||||
(Some(b), Some(c), None, _) => break (3, [a, b, c, b'u', b'u']),
|
||||
(Some(b), Some(c), Some(d), None) => break (4, [a, b, c, d, b'u']),
|
||||
};
|
||||
out.extend_from_slice(&word_85([a, b, c, d, e]).ok_or(PdfError::Ascii85TailError)?);
|
||||
}
|
||||
None => break (0, [b'u'; 5])
|
||||
}
|
||||
};
|
||||
|
||||
if tail_len > 0 {
|
||||
let last = word_85(tail).ok_or(PdfError::Ascii85TailError)?;
|
||||
out.extend_from_slice(&last[.. tail_len-1]);
|
||||
}
|
||||
|
||||
match (stream.next(), stream.next()) {
|
||||
(Some(b'>'), None) => Ok(out),
|
||||
_ => Err(PdfError::Ascii85TailError)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn divmod(n: u32, m: u32) -> (u32, u32) {
|
||||
(n / m, n % m)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn a85(n: u32) -> u8 {
|
||||
n as u8 + 0x21
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn base85_chunk(c: [u8; 4]) -> [u8; 5] {
|
||||
let n = u32::from_be_bytes(c);
|
||||
let (n, e) = divmod(n, 85);
|
||||
let (n, d) = divmod(n, 85);
|
||||
let (n, c) = divmod(n, 85);
|
||||
let (a, b) = divmod(n, 85);
|
||||
|
||||
[a85(a), a85(b), a85(c), a85(d), a85(e)]
|
||||
}
|
||||
|
||||
fn encode_85(data: &[u8]) -> Vec<u8> {
|
||||
let mut buf = Vec::with_capacity((data.len() / 4) * 5 + 10);
|
||||
let mut chunks = data.chunks_exact(4);
|
||||
for chunk in chunks.by_ref() {
|
||||
let c: [u8; 4] = chunk.try_into().unwrap();
|
||||
if c == [0; 4] {
|
||||
buf.push(b'z');
|
||||
} else {
|
||||
buf.extend_from_slice(&base85_chunk(c));
|
||||
}
|
||||
}
|
||||
|
||||
let r = chunks.remainder();
|
||||
if r.len() > 0 {
|
||||
let mut c = [0; 4];
|
||||
c[.. r.len()].copy_from_slice(r);
|
||||
let out = base85_chunk(c);
|
||||
buf.extend_from_slice(&out[.. r.len() + 1]);
|
||||
}
|
||||
buf.extend_from_slice(b"~>");
|
||||
buf
|
||||
}
|
||||
|
||||
fn inflate_bytes_zlib(data: &[u8]) -> Result<Vec<u8>> {
|
||||
use libflate::zlib::Decoder;
|
||||
let mut decoder = Decoder::new(data)?;
|
||||
let mut decoded = Vec::new();
|
||||
decoder.read_to_end(&mut decoded)?;
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
fn inflate_bytes(data: &[u8]) -> Result<Vec<u8>> {
|
||||
use libflate::deflate::Decoder;
|
||||
let mut decoder = Decoder::new(data);
|
||||
let mut decoded = Vec::new();
|
||||
decoder.read_to_end(&mut decoded)?;
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
pub fn flate_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
|
||||
|
||||
let predictor = params.predictor as usize;
|
||||
let n_components = params.n_components as usize;
|
||||
let columns = params.columns as usize;
|
||||
let stride = columns * n_components;
|
||||
|
||||
|
||||
// First flate decode
|
||||
let decoded = {
|
||||
if let Ok(data) = inflate_bytes_zlib(data) {
|
||||
data
|
||||
} else if let Ok(data) = inflate_bytes(data) {
|
||||
data
|
||||
} else {
|
||||
dump_data(data);
|
||||
bail!("can't inflate");
|
||||
}
|
||||
};
|
||||
// Then unfilter (PNG)
|
||||
// For this, take the old out as input, and write output to out
|
||||
|
||||
if predictor > 10 {
|
||||
let inp = decoded; // input buffer
|
||||
let rows = inp.len() / (stride+1);
|
||||
|
||||
// output buffer
|
||||
let mut out = vec![0; rows * stride];
|
||||
|
||||
// Apply inverse predictor
|
||||
let null_vec = vec![0; stride];
|
||||
|
||||
let mut in_off = 0; // offset into input buffer
|
||||
|
||||
let mut out_off = 0; // offset into output buffer
|
||||
let mut last_out_off = 0; // last offset to output buffer
|
||||
|
||||
while in_off + stride < inp.len() {
|
||||
let predictor = PredictorType::from_u8(inp[in_off])?;
|
||||
in_off += 1; // +1 because the first byte on each row is predictor
|
||||
|
||||
let row_in = &inp[in_off .. in_off + stride];
|
||||
let (prev_row, row_out) = if out_off == 0 {
|
||||
(&null_vec[..], &mut out[out_off .. out_off+stride])
|
||||
} else {
|
||||
let (prev, curr) = out.split_at_mut(out_off);
|
||||
(&prev[last_out_off ..], &mut curr[.. stride])
|
||||
};
|
||||
unfilter(predictor, n_components, prev_row, row_in, row_out);
|
||||
|
||||
last_out_off = out_off;
|
||||
|
||||
in_off += stride;
|
||||
out_off += stride;
|
||||
}
|
||||
Ok(out)
|
||||
} else {
|
||||
Ok(decoded)
|
||||
}
|
||||
}
|
||||
fn flate_encode(data: &[u8]) -> Vec<u8> {
|
||||
use libflate::deflate::Encoder;
|
||||
let mut encoded = Vec::new();
|
||||
let mut encoder = Encoder::new(&mut encoded);
|
||||
encoder.write_all(data).unwrap();
|
||||
encoded
|
||||
}
|
||||
|
||||
pub fn dct_decode(data: &[u8], _params: &DCTDecodeParams) -> Result<Vec<u8>> {
|
||||
use jpeg_decoder::Decoder;
|
||||
let mut decoder = Decoder::new(data);
|
||||
let pixels = decoder.decode()?;
|
||||
Ok(pixels)
|
||||
}
|
||||
|
||||
pub fn lzw_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
|
||||
use weezl::{BitOrder, decode::Decoder};
|
||||
let mut out = vec![];
|
||||
|
||||
let mut decoder = if params.early_change != 0 {
|
||||
Decoder::with_tiff_size_switch(BitOrder::Msb, 9)
|
||||
} else {
|
||||
Decoder::new(BitOrder::Msb, 9)
|
||||
};
|
||||
|
||||
decoder
|
||||
.into_stream(&mut out)
|
||||
.decode_all(data).status?;
|
||||
Ok(out)
|
||||
}
|
||||
fn lzw_encode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
|
||||
use weezl::{BitOrder, encode::Encoder};
|
||||
if params.early_change != 0 {
|
||||
bail!("encoding early_change != 0 is not supported");
|
||||
}
|
||||
let mut compressed = vec![];
|
||||
Encoder::new(BitOrder::Msb, 9)
|
||||
.into_stream(&mut compressed)
|
||||
.encode_all(data).status?;
|
||||
Ok(compressed)
|
||||
}
|
||||
|
||||
pub fn fax_decode(data: &[u8], params: &CCITTFaxDecodeParams) -> Result<Vec<u8>> {
|
||||
use fax::{Color, decoder::{pels, decode_g4}};
|
||||
|
||||
if params.k < 0 {
|
||||
let columns = params.columns as usize;
|
||||
let rows = params.rows as usize;
|
||||
|
||||
let height = if params.rows == 0 { None } else { Some(params.rows as u16)};
|
||||
let mut buf = Vec::with_capacity(columns * rows);
|
||||
decode_g4(data.iter().cloned(), columns as u16, height, |line| {
|
||||
buf.extend(pels(line, columns as u16).map(|c| match c {
|
||||
Color::Black => 0,
|
||||
Color::White => 255
|
||||
}));
|
||||
assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
|
||||
}).ok_or(PdfError::Other { msg: "faxdecode failed".into() })?;
|
||||
assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
|
||||
|
||||
if rows != 0 && buf.len() != columns * rows {
|
||||
bail!("decoded length does not match (expected {rows}∙{columns}, got {})", buf.len());
|
||||
}
|
||||
Ok(buf)
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_length_decode(data: &[u8]) -> Result<Vec<u8>> {
|
||||
// Used <http://benno.id.au/refs/PDFReference15_v5.pdf> as specification
|
||||
let mut buf = Vec::new();
|
||||
let d = data;
|
||||
let mut c = 0;
|
||||
|
||||
while c < data.len() {
|
||||
let length = d[c]; // length is first byte
|
||||
if length < 128 {
|
||||
let start = c + 1;
|
||||
let end = start + length as usize + 1;
|
||||
// copy _following_ length + 1 bytes literally
|
||||
buf.extend_from_slice(&d[start..end]);
|
||||
c = end; // move cursor to next run
|
||||
} else if length >= 129 {
|
||||
let copy = 257 - length as usize; // copy 2 - 128 times
|
||||
let b = d[c + 1]; // copied byte
|
||||
buf.extend(std::iter::repeat(b).take(copy));
|
||||
c += 2; // move cursor to next run
|
||||
} else {
|
||||
break; // EOD
|
||||
}
|
||||
}
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub type DecodeFn = dyn Fn(&[u8]) -> Result<Vec<u8>> + Sync + Send + 'static;
|
||||
static JPX_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
|
||||
static JBIG2_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
|
||||
|
||||
pub fn set_jpx_decoder(f: Box<DecodeFn>) {
|
||||
let _ = JPX_DECODER.set(f);
|
||||
}
|
||||
pub fn set_jbig2_decoder(f: Box<DecodeFn>) {
|
||||
let _ = JBIG2_DECODER.set(f);
|
||||
}
|
||||
|
||||
pub fn jpx_decode(data: &[u8]) -> Result<Vec<u8>> {
|
||||
JPX_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jp2k decoder not set".into()})?(data)
|
||||
}
|
||||
pub fn jbig2_decode(data: &[u8], globals: &[u8]) -> Result<Vec<u8>> {
|
||||
let data = [
|
||||
// file header
|
||||
// &[0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01],
|
||||
|
||||
globals,
|
||||
data,
|
||||
|
||||
// end of page
|
||||
&[0x00, 0x00, 0x00, 0x03, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00],
|
||||
|
||||
// end of stream
|
||||
&[0x00, 0x00, 0x00, 0x04, 0x33, 0x01, 0x00, 0x00, 0x00, 0x00],
|
||||
].concat();
|
||||
JBIG2_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jbig2 decoder not set".into()})?(&data)
|
||||
}
|
||||
|
||||
pub fn decode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
|
||||
match *filter {
|
||||
StreamFilter::ASCIIHexDecode => decode_hex(data),
|
||||
StreamFilter::ASCII85Decode => decode_85(data),
|
||||
StreamFilter::LZWDecode(ref params) => lzw_decode(data, params),
|
||||
StreamFilter::FlateDecode(ref params) => flate_decode(data, params),
|
||||
StreamFilter::RunLengthDecode => run_length_decode(data),
|
||||
StreamFilter::DCTDecode(ref params) => dct_decode(data, params),
|
||||
|
||||
_ => bail!("unimplemented {filter:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
|
||||
match *filter {
|
||||
StreamFilter::ASCIIHexDecode => Ok(encode_hex(data)),
|
||||
StreamFilter::ASCII85Decode => Ok(encode_85(data)),
|
||||
StreamFilter::LZWDecode(ref params) => lzw_encode(data, params),
|
||||
StreamFilter::FlateDecode (ref _params) => Ok(flate_encode(data)),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Predictor - copied and adapted from PNG crate..
|
||||
*/
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
#[allow(dead_code)]
|
||||
pub enum PredictorType {
|
||||
NoFilter = 0,
|
||||
Sub = 1,
|
||||
Up = 2,
|
||||
Avg = 3,
|
||||
Paeth = 4
|
||||
}
|
||||
|
||||
impl PredictorType {
|
||||
/// u8 -> Self. Temporary solution until Rust provides a canonical one.
|
||||
pub fn from_u8(n: u8) -> Result<PredictorType> {
|
||||
match n {
|
||||
0 => Ok(PredictorType::NoFilter),
|
||||
1 => Ok(PredictorType::Sub),
|
||||
2 => Ok(PredictorType::Up),
|
||||
3 => Ok(PredictorType::Avg),
|
||||
4 => Ok(PredictorType::Paeth),
|
||||
n => Err(PdfError::IncorrectPredictorType {n})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
|
||||
let ia = a as i16;
|
||||
let ib = b as i16;
|
||||
let ic = c as i16;
|
||||
|
||||
let p = ia + ib - ic;
|
||||
|
||||
let pa = (p - ia).abs();
|
||||
let pb = (p - ib).abs();
|
||||
let pc = (p - ic).abs();
|
||||
|
||||
if pa <= pb && pa <= pc {
|
||||
a
|
||||
} else if pb <= pc {
|
||||
b
|
||||
} else {
|
||||
c
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unfilter(filter: PredictorType, bpp: usize, prev: &[u8], inp: &[u8], out: &mut [u8]) {
|
||||
use self::PredictorType::*;
|
||||
let len = inp.len();
|
||||
assert_eq!(len, out.len());
|
||||
assert_eq!(len, prev.len());
|
||||
if bpp > len {
|
||||
return;
|
||||
}
|
||||
|
||||
match filter {
|
||||
NoFilter => {
|
||||
out[..len].copy_from_slice(&inp[..len]);
|
||||
}
|
||||
Sub => {
|
||||
out[..bpp].copy_from_slice(&inp[..bpp]);
|
||||
|
||||
for i in bpp..len {
|
||||
out[i] = inp[i].wrapping_add(out[i - bpp]);
|
||||
}
|
||||
}
|
||||
Up => {
|
||||
for i in 0..len {
|
||||
out[i] = inp[i].wrapping_add(prev[i]);
|
||||
}
|
||||
}
|
||||
Avg => {
|
||||
for i in 0..bpp {
|
||||
out[i] = inp[i].wrapping_add(prev[i] / 2);
|
||||
}
|
||||
|
||||
for i in bpp..len {
|
||||
out[i] = inp[i].wrapping_add(
|
||||
((out[i - bpp] as i16 + prev[i] as i16) / 2) as u8
|
||||
);
|
||||
}
|
||||
}
|
||||
Paeth => {
|
||||
for i in 0..bpp {
|
||||
out[i] = inp[i].wrapping_add(
|
||||
filter_paeth(0, prev[i], 0)
|
||||
);
|
||||
}
|
||||
|
||||
for i in bpp..len {
|
||||
out[i] = inp[i].wrapping_add(
|
||||
filter_paeth(out[i - bpp], prev[i], prev[i - bpp])
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn filter(method: PredictorType, bpp: usize, previous: &[u8], current: &mut [u8]) {
|
||||
use self::PredictorType::*;
|
||||
let len = current.len();
|
||||
|
||||
match method {
|
||||
NoFilter => (),
|
||||
Sub => {
|
||||
for i in (bpp..len).rev() {
|
||||
current[i] = current[i].wrapping_sub(current[i - bpp]);
|
||||
}
|
||||
}
|
||||
Up => {
|
||||
for i in 0..len {
|
||||
current[i] = current[i].wrapping_sub(previous[i]);
|
||||
}
|
||||
}
|
||||
Avg => {
|
||||
for i in (bpp..len).rev() {
|
||||
current[i] = current[i].wrapping_sub(current[i - bpp].wrapping_add(previous[i]) / 2);
|
||||
}
|
||||
|
||||
for i in 0..bpp {
|
||||
current[i] = current[i].wrapping_sub(previous[i] / 2);
|
||||
}
|
||||
}
|
||||
Paeth => {
|
||||
for i in (bpp..len).rev() {
|
||||
current[i] = current[i].wrapping_sub(filter_paeth(current[i - bpp], previous[i], previous[i - bpp]));
|
||||
}
|
||||
|
||||
for i in 0..bpp {
|
||||
current[i] = current[i].wrapping_sub(filter_paeth(0, previous[i], 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn base_85() {
|
||||
fn s(b: &[u8]) -> &str { std::str::from_utf8(b).unwrap() }
|
||||
|
||||
let case = &b"hello world!"[..];
|
||||
let encoded = encode_85(case);
|
||||
assert_eq!(s(&encoded), "BOu!rD]j7BEbo80~>");
|
||||
let decoded = decode_85(&encoded).unwrap();
|
||||
assert_eq!(case, &*decoded);
|
||||
/*
|
||||
assert_eq!(
|
||||
s(&decode_85(
|
||||
&lzw_decode(
|
||||
&decode_85(&include_bytes!("data/t01_lzw+base85.txt")[..]).unwrap(),
|
||||
&LZWFlateParams::default()
|
||||
).unwrap()
|
||||
).unwrap()),
|
||||
include_str!("data/t01_plain.txt")
|
||||
);
|
||||
*/
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn run_length_decode_test() {
|
||||
let x = run_length_decode(&[254, b'a', 255, b'b', 2, b'c', b'b', b'c', 254, b'a', 128]).unwrap();
|
||||
assert_eq!(b"aaabbcbcaaa", x.as_slice());
|
||||
}
|
||||
}
|
||||
108
src-pdfrs/pdf/src/encoding.rs
Normal file
108
src-pdfrs/pdf/src/encoding.rs
Normal file
@ -0,0 +1,108 @@
|
||||
use std::collections::HashMap;
|
||||
use istring::SmallString;
|
||||
use crate as pdf;
|
||||
use crate::object::{Object, Resolve, ObjectWrite, DeepClone};
|
||||
use crate::primitive::{Primitive, Dictionary};
|
||||
use crate::error::{Result};
|
||||
use datasize::DataSize;
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
pub struct Encoding {
|
||||
pub base: BaseEncoding,
|
||||
pub differences: HashMap<u32, SmallString>,
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, Eq, PartialEq, DataSize)]
|
||||
pub enum BaseEncoding {
|
||||
StandardEncoding,
|
||||
SymbolEncoding,
|
||||
MacRomanEncoding,
|
||||
WinAnsiEncoding,
|
||||
MacExpertEncoding,
|
||||
#[pdf(name = "Identity-H")]
|
||||
IdentityH,
|
||||
None,
|
||||
|
||||
#[pdf(other)]
|
||||
Other(String),
|
||||
}
|
||||
impl Object for Encoding {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
name @ Primitive::Name(_) => {
|
||||
Ok(Encoding {
|
||||
base: BaseEncoding::from_primitive(name, resolve)?,
|
||||
differences: HashMap::new(),
|
||||
})
|
||||
}
|
||||
Primitive::Dictionary(mut dict) => {
|
||||
let base = match dict.remove("BaseEncoding") {
|
||||
Some(p) => BaseEncoding::from_primitive(p, resolve)?,
|
||||
None => BaseEncoding::None
|
||||
};
|
||||
let mut gid = 0;
|
||||
let mut differences = HashMap::new();
|
||||
if let Some(p) = dict.remove("Differences") {
|
||||
for part in p.resolve(resolve)?.into_array()? {
|
||||
match part {
|
||||
Primitive::Integer(code) => {
|
||||
gid = code as u32;
|
||||
}
|
||||
Primitive::Name(name) => {
|
||||
differences.insert(gid, name);
|
||||
gid += 1;
|
||||
}
|
||||
_ => bail!("Unknown part primitive in dictionary: {:?}", part),
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Encoding { base, differences })
|
||||
}
|
||||
Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve),
|
||||
Primitive::Stream(s) => Self::from_primitive(Primitive::Dictionary(s.info), resolve),
|
||||
_ => bail!("Unknown element: {:?}", p),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Encoding {
|
||||
fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result<Primitive> {
|
||||
let base = self.base.to_primitive(update)?;
|
||||
if self.differences.len() == 0 {
|
||||
Ok(base)
|
||||
} else {
|
||||
let mut list = vec![];
|
||||
|
||||
let mut diff_list: Vec<_> = self.differences.iter().collect();
|
||||
diff_list.sort();
|
||||
let mut last = None;
|
||||
|
||||
for &(&gid, name) in diff_list.iter() {
|
||||
if !last.map(|n| n + 1 == gid).unwrap_or(false) {
|
||||
list.push(Primitive::Integer(gid as i32));
|
||||
}
|
||||
|
||||
list.push(Primitive::Name(name.clone()));
|
||||
|
||||
last = Some(gid);
|
||||
}
|
||||
|
||||
let mut dict = Dictionary::new();
|
||||
dict.insert("BaseEncoding", base);
|
||||
dict.insert("Differences", Primitive::Array(list));
|
||||
Ok(Primitive::Dictionary(dict))
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Encoding {
|
||||
pub fn standard() -> Encoding {
|
||||
Encoding {
|
||||
base: BaseEncoding::StandardEncoding,
|
||||
differences: HashMap::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl DeepClone for Encoding {
|
||||
fn deep_clone(&self, cloner: &mut impl pdf::object::Cloner) -> Result<Self> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
347
src-pdfrs/pdf/src/error.rs
Normal file
347
src-pdfrs/pdf/src/error.rs
Normal file
@ -0,0 +1,347 @@
|
||||
use crate::object::ObjNr;
|
||||
use std::io;
|
||||
use std::error::Error;
|
||||
use crate::parser::ParseFlags;
|
||||
use std::sync::Arc;
|
||||
use datasize::{DataSize, data_size};
|
||||
use snafu::ErrorCompat;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum PdfError {
|
||||
// Syntax / parsing
|
||||
#[snafu(display("Unexpected end of file"))]
|
||||
EOF,
|
||||
|
||||
#[snafu(display("Shared, caused by\n {}", source))]
|
||||
Shared {
|
||||
#[snafu(source)]
|
||||
source: Arc<PdfError>
|
||||
},
|
||||
|
||||
#[snafu(display("Not enough Operator arguments"))]
|
||||
NoOpArg,
|
||||
|
||||
#[snafu(display("Error parsing from string, caused by\n {}", source))]
|
||||
Parse {
|
||||
#[snafu(source)]
|
||||
source: Box<dyn Error + Send + Sync>
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid encoding, caused by\n {}", source))]
|
||||
Encoding {
|
||||
#[snafu(source)]
|
||||
source: Box<dyn Error + Send + Sync>
|
||||
},
|
||||
|
||||
#[snafu(display("Out of bounds: index {}, but len is {}", index, len))]
|
||||
Bounds { index: usize, len: usize },
|
||||
|
||||
#[snafu(display("Unexpected token '{}' at {} - expected '{}'", lexeme, pos, expected))]
|
||||
UnexpectedLexeme {pos: usize, lexeme: String, expected: &'static str},
|
||||
|
||||
#[snafu(display("Expecting an object, encountered {} at pos {}. Rest:\n{}\n\n((end rest))", first_lexeme, pos, rest))]
|
||||
UnknownType {pos: usize, first_lexeme: String, rest: String},
|
||||
|
||||
#[snafu(display("Unknown variant '{}' for enum {}", name, id))]
|
||||
UnknownVariant { id: &'static str, name: String },
|
||||
|
||||
#[snafu(display("'{}' not found.", word))]
|
||||
NotFound { word: String },
|
||||
|
||||
#[snafu(display("Cannot follow reference during parsing - no resolve fn given (most likely /Length of Stream)."))]
|
||||
Reference, // TODO: which one?
|
||||
|
||||
#[snafu(display("Erroneous 'type' field in xref stream - expected 0, 1 or 2, found {}", found))]
|
||||
XRefStreamType { found: u64 },
|
||||
|
||||
#[snafu(display("Parsing read past boundary of Contents."))]
|
||||
ContentReadPastBoundary,
|
||||
|
||||
#[snafu(display("Primitive not allowed"))]
|
||||
PrimitiveNotAllowed { allowed: ParseFlags, found: ParseFlags },
|
||||
|
||||
//////////////////
|
||||
// Encode/decode
|
||||
#[snafu(display("Hex decode error. Position {}, bytes {:?}", pos, bytes))]
|
||||
HexDecode {pos: usize, bytes: [u8; 2]},
|
||||
|
||||
#[snafu(display("Ascii85 tail error"))]
|
||||
Ascii85TailError,
|
||||
|
||||
#[snafu(display("Failed to convert '{}' into PredictorType", n))]
|
||||
IncorrectPredictorType {n: u8},
|
||||
|
||||
//////////////////
|
||||
// Dictionary
|
||||
#[snafu(display("Can't parse field {} of struct {}, caused by\n {}", field, typ, source))]
|
||||
FromPrimitive {
|
||||
typ: &'static str,
|
||||
field: &'static str,
|
||||
#[snafu(source)]
|
||||
source: Box<PdfError>
|
||||
},
|
||||
|
||||
#[snafu(display("Field /{} is missing in dictionary for type {}.", field, typ))]
|
||||
MissingEntry {
|
||||
typ: &'static str,
|
||||
field: String
|
||||
},
|
||||
|
||||
#[snafu(display("Expected to find value {} for key {}. Found {} instead.", value, key, found))]
|
||||
KeyValueMismatch {
|
||||
key: String,
|
||||
value: String,
|
||||
found: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Expected dictionary /Type = {}. Found /Type = {}.", expected, found))]
|
||||
WrongDictionaryType {
|
||||
expected: String,
|
||||
found: String
|
||||
},
|
||||
|
||||
//////////////////
|
||||
// Misc
|
||||
#[snafu(display("Tried to dereference free object nr {}.", obj_nr))]
|
||||
FreeObject {obj_nr: u64},
|
||||
|
||||
#[snafu(display("Tried to dereference non-existing object nr {}.", obj_nr))]
|
||||
NullRef {obj_nr: u64},
|
||||
|
||||
#[snafu(display("Expected primitive {}, found primitive {} instead.", expected, found))]
|
||||
UnexpectedPrimitive {expected: &'static str, found: &'static str},
|
||||
/*
|
||||
WrongObjectType {expected: &'static str, found: &'static str} {
|
||||
description("Function called on object of wrong type.")
|
||||
display("Expected {}, found {}.", expected, found)
|
||||
}
|
||||
*/
|
||||
#[snafu(display("Object stream index out of bounds ({}/{}).", index, max))]
|
||||
ObjStmOutOfBounds {index: usize, max: usize},
|
||||
|
||||
#[snafu(display("Page out of bounds ({}/{}).", page_nr, max))]
|
||||
PageOutOfBounds {page_nr: u32, max: u32},
|
||||
|
||||
#[snafu(display("Page {} could not be found in the page tree.", page_nr))]
|
||||
PageNotFound {page_nr: u32},
|
||||
|
||||
#[snafu(display("Entry {} in xref table unspecified", id))]
|
||||
UnspecifiedXRefEntry {id: ObjNr},
|
||||
|
||||
#[snafu(display("Invalid password"))]
|
||||
InvalidPassword,
|
||||
|
||||
#[snafu(display("Decryption failure"))]
|
||||
DecryptionFailure,
|
||||
|
||||
#[snafu(display("JPEG Error, caused by\n {}", source))]
|
||||
Jpeg {
|
||||
#[snafu(source)]
|
||||
source: jpeg_decoder::Error
|
||||
},
|
||||
|
||||
#[snafu(display("IO Error, caused by\n {}", source))]
|
||||
Io {
|
||||
#[snafu(source)]
|
||||
source: io::Error
|
||||
},
|
||||
|
||||
#[snafu(display("{}", msg))]
|
||||
Other { msg: String },
|
||||
|
||||
#[snafu(display("NoneError at {}:{}:{}:{}", file, line, column, context))]
|
||||
NoneError { file: &'static str, line: u32, column: u32, context: Context },
|
||||
|
||||
#[snafu(display("Try at {}:{}:{}:{}, caused by\n {}", file, line, column, context, source))]
|
||||
Try {
|
||||
file: &'static str,
|
||||
line: u32,
|
||||
column: u32,
|
||||
context: Context,
|
||||
#[snafu(source)]
|
||||
source: Box<PdfError>
|
||||
},
|
||||
|
||||
#[snafu(display("PostScriptParseError"))]
|
||||
PostScriptParse,
|
||||
|
||||
#[snafu(display("PostScriptExecError"))]
|
||||
PostScriptExec,
|
||||
|
||||
#[snafu(display("UTF16 decode error"))]
|
||||
Utf16Decode,
|
||||
|
||||
#[snafu(display("UTF8 decode error"))]
|
||||
Utf8Decode,
|
||||
|
||||
#[snafu(display("CID decode error"))]
|
||||
CidDecode,
|
||||
|
||||
#[snafu(display("Max nesting depth reached"))]
|
||||
MaxDepth,
|
||||
|
||||
#[snafu(display("Invalid"))]
|
||||
Invalid,
|
||||
}
|
||||
impl PdfError {
|
||||
pub fn is_eof(&self) -> bool {
|
||||
match self {
|
||||
PdfError::EOF => true,
|
||||
PdfError::Try { ref source, .. } => source.is_eof(),
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
}
|
||||
datasize::non_dynamic_const_heap_size!(PdfError, 0);
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
impl globalcache::ValueSize for PdfError {
|
||||
#[inline]
|
||||
fn size(&self) -> usize {
|
||||
data_size(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Context(pub Vec<(&'static str, String)>);
|
||||
impl std::fmt::Display for Context {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
for (i, &(key, ref val)) in self.0.iter().enumerate() {
|
||||
if i == 0 {
|
||||
writeln!(f)?;
|
||||
}
|
||||
writeln!(f, " {} = {}", key, val)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T, E=PdfError> = std::result::Result<T, E>;
|
||||
|
||||
impl From<io::Error> for PdfError {
|
||||
fn from(source: io::Error) -> PdfError {
|
||||
PdfError::Io { source }
|
||||
}
|
||||
}
|
||||
impl From<String> for PdfError {
|
||||
fn from(msg: String) -> PdfError {
|
||||
PdfError::Other { msg }
|
||||
}
|
||||
}
|
||||
impl From<Arc<PdfError>> for PdfError {
|
||||
fn from(source: Arc<PdfError>) -> PdfError {
|
||||
PdfError::Shared { source }
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! try_opt {
|
||||
($e:expr $(,$c:expr)*) => (
|
||||
match $e {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]);
|
||||
return Err($crate::PdfError::NoneError {
|
||||
file: file!(),
|
||||
line: line!(),
|
||||
column: column!(),
|
||||
context,
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! t {
|
||||
($e:expr $(,$c:expr)*) => {
|
||||
match $e {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]);
|
||||
return Err($crate::PdfError::Try { file: file!(), line: line!(), column: column!(), context, source: e.into() })
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! ctx {
|
||||
($e:expr, $($c:expr),*) => {
|
||||
match $e {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => {
|
||||
let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]);
|
||||
Err($crate::PdfError::TryContext { file: file!(), line: line!(), column: column!(), context, source: e.into() })
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! err_from {
|
||||
($($st:ty),* => $variant:ident) => (
|
||||
$(
|
||||
impl From<$st> for PdfError {
|
||||
fn from(e: $st) -> PdfError {
|
||||
PdfError::$variant { source: e.into() }
|
||||
}
|
||||
}
|
||||
)*
|
||||
)
|
||||
}
|
||||
err_from!(std::str::Utf8Error, std::string::FromUtf8Error, std::string::FromUtf16Error,
|
||||
istring::FromUtf8Error<istring::IBytes>, istring::FromUtf8Error<istring::SmallBytes> => Encoding);
|
||||
err_from!(std::num::ParseIntError, std::string::ParseError => Parse);
|
||||
err_from!(jpeg_decoder::Error => Jpeg);
|
||||
|
||||
macro_rules! other {
|
||||
($($t:tt)*) => ($crate::PdfError::Other { msg: format!($($t)*) })
|
||||
}
|
||||
|
||||
macro_rules! err {
|
||||
($e: expr) => ({
|
||||
return Err($e);
|
||||
})
|
||||
}
|
||||
macro_rules! bail {
|
||||
($($t:tt)*) => {
|
||||
err!($crate::PdfError::Other { msg: format!($($t)*) })
|
||||
}
|
||||
}
|
||||
macro_rules! unimplemented {
|
||||
() => (bail!("Unimplemented @ {}:{}", file!(), line!()))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "dump"))]
|
||||
pub fn dump_data(_data: &[u8]) {}
|
||||
|
||||
#[cfg(feature = "dump")]
|
||||
pub fn dump_data(data: &[u8]) {
|
||||
use std::io::Write;
|
||||
if let Some(path) = ::std::env::var_os("PDF_OUT") {
|
||||
let (mut file, path) = tempfile::Builder::new()
|
||||
.prefix("")
|
||||
.tempfile_in(path).unwrap()
|
||||
.keep().unwrap();
|
||||
file.write_all(&data).unwrap();
|
||||
info!("data written to {:?}", path);
|
||||
} else {
|
||||
info!("set PDF_OUT to an existing directory to dump stream data");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::PdfError;
|
||||
|
||||
fn assert_send<T: Send>() {}
|
||||
|
||||
fn assert_sync<T: Sync>() {}
|
||||
|
||||
#[test]
|
||||
fn error_is_send_and_sync() {
|
||||
// note that these checks happens at compile time, not when the test is run
|
||||
assert_send::<PdfError>();
|
||||
assert_sync::<PdfError>();
|
||||
}
|
||||
}
|
||||
708
src-pdfrs/pdf/src/file.rs
Normal file
708
src-pdfrs/pdf/src/file.rs
Normal file
@ -0,0 +1,708 @@
|
||||
//! This is kind of the entry-point of the type-safe PDF functionality.
|
||||
use std::marker::PhantomData;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::path::Path;
|
||||
use std::io::Write;
|
||||
|
||||
use crate as pdf;
|
||||
use crate::error::*;
|
||||
use crate::object::*;
|
||||
use crate::primitive::{Primitive, Dictionary, PdfString};
|
||||
use crate::backend::Backend;
|
||||
use crate::any::*;
|
||||
use crate::parser::{Lexer, parse_with_lexer};
|
||||
use crate::parser::{parse_indirect_object, parse, ParseFlags};
|
||||
use crate::xref::{XRef, XRefTable, XRefInfo};
|
||||
use crate::crypt::Decoder;
|
||||
use crate::crypt::CryptDict;
|
||||
use crate::enc::{StreamFilter, decode};
|
||||
use std::ops::Range;
|
||||
use datasize::DataSize;
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
pub use globalcache::{ValueSize, sync::SyncCache};
|
||||
|
||||
#[must_use]
|
||||
pub struct PromisedRef<T> {
|
||||
inner: PlainRef,
|
||||
_marker: PhantomData<T>
|
||||
}
|
||||
impl<T> PromisedRef<T> {
|
||||
pub fn get_inner(&self) -> PlainRef {
|
||||
self.inner
|
||||
}
|
||||
pub fn get_ref(&self) -> Ref<T> {
|
||||
Ref::new(self.inner)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Cache<T: Clone> {
|
||||
fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T;
|
||||
fn clear(&self);
|
||||
}
|
||||
pub struct NoCache;
|
||||
impl<T: Clone> Cache<T> for NoCache {
|
||||
fn get_or_compute(&self, _key: PlainRef, compute: impl FnOnce() -> T) -> T {
|
||||
compute()
|
||||
}
|
||||
fn clear(&self) {}
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
impl<T: Clone + ValueSize + Send + 'static> Cache<T> for Arc<SyncCache<PlainRef, T>> {
|
||||
fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T {
|
||||
self.get(key, compute)
|
||||
}
|
||||
fn clear(&self) {
|
||||
(**self).clear()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Log {
|
||||
fn load_object(&self, _r: PlainRef) {}
|
||||
fn log_get(&self, _r: PlainRef) {}
|
||||
}
|
||||
pub struct NoLog;
|
||||
impl Log for NoLog {}
|
||||
|
||||
pub struct Storage<B, OC, SC, L> {
|
||||
// objects identical to those in the backend
|
||||
cache: OC,
|
||||
stream_cache: SC,
|
||||
|
||||
// objects that differ from the backend
|
||||
changes: HashMap<ObjNr, (Primitive, GenNr)>,
|
||||
|
||||
refs: XRefTable,
|
||||
|
||||
decoder: Option<Decoder>,
|
||||
options: ParseOptions,
|
||||
|
||||
backend: B,
|
||||
|
||||
// Position of the PDF header in the file.
|
||||
start_offset: usize,
|
||||
|
||||
log: L
|
||||
}
|
||||
|
||||
impl<OC, SC, L> Storage<Vec<u8>, OC, SC, L>
|
||||
where
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
pub fn empty(object_cache: OC, stream_cache: SC, log: L) -> Self {
|
||||
Storage {
|
||||
cache: object_cache,
|
||||
stream_cache,
|
||||
changes: HashMap::new(),
|
||||
refs: XRefTable::new(0),
|
||||
decoder: None,
|
||||
options: ParseOptions::strict(),
|
||||
backend: Vec::from(&b"%PDF-1.7\n"[..]),
|
||||
start_offset: 0,
|
||||
log
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B, OC, SC, L> Storage<B, OC, SC, L>
|
||||
where
|
||||
B: Backend,
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
pub fn into_inner(self) -> B {
|
||||
self.backend
|
||||
}
|
||||
pub fn resolver(&self) -> impl Resolve + '_ {
|
||||
StorageResolver::new(self)
|
||||
}
|
||||
pub fn with_cache(backend: B, options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
|
||||
Ok(Storage {
|
||||
start_offset: backend.locate_start_offset()?,
|
||||
backend,
|
||||
refs: XRefTable::new(0),
|
||||
cache: object_cache,
|
||||
stream_cache,
|
||||
changes: HashMap::new(),
|
||||
decoder: None,
|
||||
options,
|
||||
log
|
||||
})
|
||||
}
|
||||
fn decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
|
||||
let data = self.backend.read(range)?;
|
||||
|
||||
let mut data = Vec::from(data);
|
||||
if let Some(ref decoder) = self.decoder {
|
||||
data = Vec::from(t!(decoder.decrypt(id, &mut data)));
|
||||
}
|
||||
for filter in filters {
|
||||
data = t!(decode(&data, filter), filter);
|
||||
}
|
||||
Ok(data.into())
|
||||
}
|
||||
|
||||
pub fn load_storage_and_trailer(&mut self) -> Result<Dictionary> {
|
||||
self.load_storage_and_trailer_password(b"")
|
||||
}
|
||||
|
||||
pub fn load_storage_and_trailer_password(&mut self, password: &[u8]) -> Result<Dictionary> {
|
||||
|
||||
let resolver = StorageResolver::new(self);
|
||||
let (refs, trailer) = t!(self.backend.read_xref_table_and_trailer(self.start_offset, &resolver));
|
||||
self.refs = refs;
|
||||
|
||||
if let Some(crypt) = trailer.get("Encrypt") {
|
||||
let key = trailer
|
||||
.get("ID")
|
||||
.ok_or(PdfError::MissingEntry {
|
||||
typ: "Trailer",
|
||||
field: "ID".into(),
|
||||
})?
|
||||
.as_array()?
|
||||
.get(0)
|
||||
.ok_or(PdfError::MissingEntry {
|
||||
typ: "Trailer",
|
||||
field: "ID[0]".into()
|
||||
})?
|
||||
.as_string()?
|
||||
.as_bytes();
|
||||
|
||||
let resolver = StorageResolver::new(self);
|
||||
let dict = CryptDict::from_primitive(crypt.clone(), &resolver)?;
|
||||
|
||||
self.decoder = Some(t!(Decoder::from_password(&dict, key, password)));
|
||||
if let Primitive::Reference(reference) = crypt {
|
||||
self.decoder.as_mut().unwrap().encrypt_indirect_object = Some(*reference);
|
||||
}
|
||||
if let Some(Primitive::Reference(catalog_ref)) = trailer.get("Root") {
|
||||
let resolver = StorageResolver::new(self);
|
||||
let catalog = t!(t!(resolver.resolve(*catalog_ref)).resolve(&resolver)?.into_dictionary());
|
||||
if let Some(Primitive::Reference(metadata_ref)) = catalog.get("Metadata") {
|
||||
self.decoder.as_mut().unwrap().metadata_indirect_object = Some(*metadata_ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(trailer)
|
||||
}
|
||||
pub fn scan(&self) -> impl Iterator<Item = Result<ScanItem>> + '_ {
|
||||
let xref_offset = self.backend.locate_xref_offset().unwrap();
|
||||
let slice = self.backend.read(self.start_offset .. xref_offset).unwrap();
|
||||
let mut lexer = Lexer::with_offset(slice, 0);
|
||||
|
||||
fn skip_xref(lexer: &mut Lexer) -> Result<()> {
|
||||
while lexer.next()? != "trailer" {
|
||||
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let resolver = StorageResolver::new(self);
|
||||
std::iter::from_fn(move || {
|
||||
loop {
|
||||
let pos = lexer.get_pos();
|
||||
match parse_indirect_object(&mut lexer, &resolver, self.decoder.as_ref(), ParseFlags::all()) {
|
||||
Ok((r, p)) => return Some(Ok(ScanItem::Object(r, p))),
|
||||
Err(e) if e.is_eof() => return None,
|
||||
Err(e) => {
|
||||
lexer.set_pos(pos);
|
||||
if let Ok(s) = lexer.next() {
|
||||
debug!("next: {:?}", String::from_utf8_lossy(s.as_slice()));
|
||||
match &*s {
|
||||
b"xref" => {
|
||||
if let Err(e) = skip_xref(&mut lexer) {
|
||||
return Some(Err(e));
|
||||
}
|
||||
if let Ok(trailer) = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::DICT).and_then(|p| p.into_dictionary()) {
|
||||
return Some(Ok(ScanItem::Trailer(trailer)));
|
||||
}
|
||||
}
|
||||
b"startxref" if lexer.next().is_ok() => {
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
return Some(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
fn resolve_ref(&self, r: PlainRef, flags: ParseFlags, resolve: &impl Resolve) -> Result<Primitive> {
|
||||
match self.changes.get(&r.id) {
|
||||
Some((p, _)) => Ok((*p).clone()),
|
||||
None => match t!(self.refs.get(r.id)) {
|
||||
XRef::Raw {pos, ..} => {
|
||||
let mut lexer = Lexer::with_offset(t!(self.backend.read(self.start_offset + pos ..)), self.start_offset + pos);
|
||||
let p = t!(parse_indirect_object(&mut lexer, resolve, self.decoder.as_ref(), flags)).1;
|
||||
Ok(p)
|
||||
}
|
||||
XRef::Stream {stream_id, index} => {
|
||||
if !flags.contains(ParseFlags::STREAM) {
|
||||
return Err(PdfError::PrimitiveNotAllowed { found: ParseFlags::STREAM, allowed: flags });
|
||||
}
|
||||
// use get to cache the object stream
|
||||
let obj_stream = resolve.get::<ObjectStream>(Ref::from_id(stream_id))?;
|
||||
|
||||
let (data, range) = t!(obj_stream.get_object_slice(index, resolve));
|
||||
let slice = data.get(range.clone()).ok_or_else(|| other!("invalid range {:?}, but only have {} bytes", range, data.len()))?;
|
||||
parse(slice, resolve, flags)
|
||||
}
|
||||
XRef::Free {..} => err!(PdfError::FreeObject {obj_nr: r.id}),
|
||||
XRef::Promised => unimplemented!(),
|
||||
XRef::Invalid => err!(PdfError::NullRef {obj_nr: r.id}),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ScanItem {
|
||||
Object(PlainRef, Primitive),
|
||||
Trailer(Dictionary)
|
||||
}
|
||||
|
||||
struct StorageResolver<'a, B, OC, SC, L> {
|
||||
storage: &'a Storage<B, OC, SC, L>,
|
||||
chain: Mutex<Vec<PlainRef>>,
|
||||
}
|
||||
impl<'a, B, OC, SC, L> StorageResolver<'a, B, OC, SC, L> {
|
||||
pub fn new(storage: &'a Storage<B, OC, SC, L>) -> Self {
|
||||
StorageResolver {
|
||||
storage,
|
||||
chain: Mutex::new(vec![])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Defer<F: FnMut()>(F);
|
||||
impl<F: FnMut()> Drop for Defer<F> {
|
||||
fn drop(&mut self) {
|
||||
(self.0)();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, B, OC, SC, L> Resolve for StorageResolver<'a, B, OC, SC, L>
|
||||
where
|
||||
B: Backend,
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log
|
||||
{
|
||||
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, _depth: usize) -> Result<Primitive> {
|
||||
let storage = self.storage;
|
||||
storage.log.load_object(r);
|
||||
|
||||
storage.resolve_ref(r, flags, self)
|
||||
}
|
||||
|
||||
fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
|
||||
let key = r.get_inner();
|
||||
self.storage.log.log_get(key);
|
||||
|
||||
{
|
||||
debug!("get {key:?} as {}", std::any::type_name::<T>());
|
||||
let mut chain = self.chain.lock().unwrap();
|
||||
if chain.contains(&key) {
|
||||
bail!("Recursive reference");
|
||||
}
|
||||
chain.push(key);
|
||||
}
|
||||
let _defer = Defer(|| {
|
||||
let mut chain = self.chain.lock().unwrap();
|
||||
assert_eq!(chain.pop(), Some(key));
|
||||
});
|
||||
|
||||
let res = self.storage.cache.get_or_compute(key, || {
|
||||
match self.resolve(key).and_then(|p| T::from_primitive(p, self)) {
|
||||
Ok(obj) => Ok(AnySync::new(Shared::new(obj))),
|
||||
Err(e) => {
|
||||
let p = self.resolve(key);
|
||||
warn!("failed to decode {p:?} as {}", std::any::type_name::<T>());
|
||||
Err(Arc::new(e))
|
||||
}
|
||||
}
|
||||
});
|
||||
match res {
|
||||
Ok(any) => {
|
||||
match any.downcast() {
|
||||
Ok(val) => Ok(RcRef::new(key, val)),
|
||||
Err(_) => {
|
||||
let p = self.resolve(key)?;
|
||||
Ok(RcRef::new(key, T::from_primitive(p, self)?.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Err(PdfError::Shared { source: e.clone()}),
|
||||
}
|
||||
}
|
||||
fn options(&self) -> &ParseOptions {
|
||||
&self.storage.options
|
||||
}
|
||||
fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
|
||||
self.storage.decode(id, range, &[])
|
||||
}
|
||||
|
||||
fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
|
||||
self.storage.stream_cache.get_or_compute(id, || self.storage.decode(id, range, filters).map_err(Arc::new))
|
||||
.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl<B, OC, SC, L> Updater for Storage<B, OC, SC, L>
|
||||
where
|
||||
B: Backend,
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
|
||||
let id = self.refs.len() as u64;
|
||||
self.refs.push(XRef::Promised);
|
||||
let primitive = obj.to_primitive(self)?;
|
||||
self.changes.insert(id, (primitive, 0));
|
||||
let rc = Shared::new(obj);
|
||||
let r = PlainRef { id, gen: 0 };
|
||||
|
||||
Ok(RcRef::new(r, rc))
|
||||
}
|
||||
fn update<T: ObjectWrite>(&mut self, old: PlainRef, obj: T) -> Result<RcRef<T>> {
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
let r = match self.refs.get(old.id)? {
|
||||
XRef::Free { .. } => panic!(),
|
||||
XRef::Raw { gen_nr, .. } => PlainRef { id: old.id, gen: gen_nr },
|
||||
XRef::Stream { .. } => return self.create(obj),
|
||||
XRef::Promised => PlainRef { id: old.id, gen: 0 },
|
||||
XRef::Invalid => panic!()
|
||||
};
|
||||
let primitive = obj.to_primitive(self)?;
|
||||
match self.changes.entry(old.id) {
|
||||
Entry::Vacant(e) => {
|
||||
e.insert((primitive, r.gen));
|
||||
}
|
||||
Entry::Occupied(mut e) => match (e.get_mut(), primitive) {
|
||||
((Primitive::Dictionary(ref mut dict), _), Primitive::Dictionary(new)) => {
|
||||
dict.append(new);
|
||||
}
|
||||
(old, new) => {
|
||||
*old = (new, r.gen);
|
||||
}
|
||||
}
|
||||
}
|
||||
let rc = Shared::new(obj);
|
||||
|
||||
Ok(RcRef::new(r, rc))
|
||||
}
|
||||
|
||||
fn promise<T: Object>(&mut self) -> PromisedRef<T> {
|
||||
let id = self.refs.len() as u64;
|
||||
|
||||
self.refs.push(XRef::Promised);
|
||||
|
||||
PromisedRef {
|
||||
inner: PlainRef {
|
||||
id,
|
||||
gen: 0
|
||||
},
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
|
||||
fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
|
||||
self.update(promise.inner, obj)
|
||||
}
|
||||
}
|
||||
|
||||
impl<OC, SC, L> Storage<Vec<u8>, OC, SC, L>
|
||||
where
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log
|
||||
{
|
||||
pub fn save(&mut self, trailer: &mut Trailer) -> Result<&[u8]> {
|
||||
// writing the trailer generates another id for the info dictionary
|
||||
trailer.size = (self.refs.len() + 2) as _;
|
||||
let trailer_dict = trailer.to_dict(self)?;
|
||||
|
||||
let xref_promise = self.promise::<Stream<XRefInfo>>();
|
||||
|
||||
let mut changes: Vec<_> = self.changes.iter().collect();
|
||||
changes.sort_unstable_by_key(|&(id, _)| id);
|
||||
|
||||
for &(&id, &(ref primitive, gen)) in changes.iter() {
|
||||
let pos = self.backend.len();
|
||||
self.refs.set(id, XRef::Raw { pos: pos as _, gen_nr: gen });
|
||||
writeln!(self.backend, "{} {} obj", id, gen)?;
|
||||
primitive.serialize(&mut self.backend)?;
|
||||
writeln!(self.backend, "endobj")?;
|
||||
}
|
||||
|
||||
let xref_pos = self.backend.len();
|
||||
self.refs.set(xref_promise.get_inner().id, XRef::Raw { pos: xref_pos, gen_nr: 0 });
|
||||
// only write up to the xref stream obj id
|
||||
let stream = self.refs.write_stream(xref_promise.get_inner().id as usize + 1)?;
|
||||
|
||||
writeln!(self.backend, "{} {} obj", xref_promise.get_inner().id, 0)?;
|
||||
let mut xref_and_trailer = stream.to_pdf_stream(&mut NoUpdate)?;
|
||||
for (k, v) in trailer_dict.iter() {
|
||||
xref_and_trailer.info.insert(k.clone(), v.clone());
|
||||
}
|
||||
|
||||
xref_and_trailer.serialize(&mut self.backend)?;
|
||||
writeln!(self.backend, "endobj")?;
|
||||
|
||||
let _ = self.fulfill(xref_promise, stream)?;
|
||||
|
||||
write!(self.backend, "\nstartxref\n{}\n%%EOF", xref_pos).unwrap();
|
||||
|
||||
// update trailer which may have change now.
|
||||
self.cache.clear();
|
||||
*trailer = Trailer::from_dict(trailer_dict, &self.resolver())?;
|
||||
|
||||
Ok(&self.backend)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
pub type ObjectCache = Arc<SyncCache<PlainRef, Result<AnySync, Arc<PdfError>>>>;
|
||||
#[cfg(feature="cache")]
|
||||
pub type StreamCache = Arc<SyncCache<PlainRef, Result<Arc<[u8]>, Arc<PdfError>>>>;
|
||||
#[cfg(feature="cache")]
|
||||
pub type CachedFile<B> = File<B, ObjectCache, StreamCache, NoLog>;
|
||||
|
||||
pub struct File<B, OC, SC, L> {
|
||||
storage: Storage<B, OC, SC, L>,
|
||||
pub trailer: Trailer,
|
||||
}
|
||||
impl<B, OC, SC, L> Updater for File<B, OC, SC, L>
|
||||
where
|
||||
B: Backend,
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
|
||||
self.storage.create(obj)
|
||||
}
|
||||
fn update<T: ObjectWrite>(&mut self, old: PlainRef, obj: T) -> Result<RcRef<T>> {
|
||||
self.storage.update(old, obj)
|
||||
}
|
||||
fn promise<T: Object>(&mut self) -> PromisedRef<T> {
|
||||
self.storage.promise()
|
||||
}
|
||||
fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
|
||||
self.storage.fulfill(promise, obj)
|
||||
}
|
||||
}
|
||||
|
||||
impl<OC, SC, L> File<Vec<u8>, OC, SC, L>
|
||||
where
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
pub fn save_to(&mut self, path: impl AsRef<Path>) -> Result<()> {
|
||||
std::fs::write(path, self.storage.save(&mut self.trailer)?)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct FileOptions<'a, OC, SC, L> {
|
||||
oc: OC,
|
||||
sc: SC,
|
||||
log: L,
|
||||
password: &'a [u8],
|
||||
parse_options: ParseOptions,
|
||||
}
|
||||
impl FileOptions<'static, NoCache, NoCache, NoLog> {
|
||||
pub fn uncached() -> Self {
|
||||
FileOptions {
|
||||
oc: NoCache,
|
||||
sc: NoCache,
|
||||
password: b"",
|
||||
parse_options: ParseOptions::strict(),
|
||||
log: NoLog,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
impl FileOptions<'static, ObjectCache, StreamCache, NoLog> {
|
||||
pub fn cached() -> Self {
|
||||
FileOptions {
|
||||
oc: SyncCache::new(),
|
||||
sc: SyncCache::new(),
|
||||
password: b"",
|
||||
parse_options: ParseOptions::strict(),
|
||||
log: NoLog
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, OC, SC, L> FileOptions<'a, OC, SC, L>
|
||||
where
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
pub fn password(self, password: &'a [u8]) -> FileOptions<'a, OC, SC, L> {
|
||||
FileOptions {
|
||||
password,
|
||||
.. self
|
||||
}
|
||||
}
|
||||
pub fn cache<O, S>(self, oc: O, sc: S) -> FileOptions<'a, O, S, L> {
|
||||
let FileOptions { oc: _, sc: _, password, parse_options, log } = self;
|
||||
FileOptions {
|
||||
oc,
|
||||
sc,
|
||||
password,
|
||||
parse_options,
|
||||
log,
|
||||
}
|
||||
}
|
||||
pub fn log<Log>(self, log: Log) -> FileOptions<'a, OC, SC, Log> {
|
||||
let FileOptions { oc, sc, password, parse_options, .. } = self;
|
||||
FileOptions {
|
||||
oc,
|
||||
sc,
|
||||
password,
|
||||
parse_options,
|
||||
log,
|
||||
}
|
||||
}
|
||||
pub fn parse_options(self, parse_options: ParseOptions) -> Self {
|
||||
FileOptions { parse_options, .. self }
|
||||
}
|
||||
|
||||
/// open a file
|
||||
pub fn open(self, path: impl AsRef<Path>) -> Result<File<Vec<u8>, OC, SC, L>> {
|
||||
let data = std::fs::read(path)?;
|
||||
self.load(data)
|
||||
}
|
||||
pub fn storage(self) -> Storage<Vec<u8>, OC, SC, L> {
|
||||
let FileOptions { oc, sc, log, .. } = self;
|
||||
Storage::empty(oc, sc, log)
|
||||
}
|
||||
|
||||
/// load data from the given backend
|
||||
pub fn load<B: Backend>(self, backend: B) -> Result<File<B, OC, SC, L>> {
|
||||
let FileOptions { oc, sc, password, parse_options, log } = self;
|
||||
File::load_data(backend, password, parse_options, oc, sc, log)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<B, OC, SC, L> File<B, OC, SC, L>
|
||||
where
|
||||
B: Backend,
|
||||
OC: Cache<Result<AnySync, Arc<PdfError>>>,
|
||||
SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
|
||||
L: Log,
|
||||
{
|
||||
fn load_data(backend: B, password: &[u8], options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
|
||||
let mut storage = Storage::with_cache(backend, options, object_cache, stream_cache, log)?;
|
||||
let trailer = storage.load_storage_and_trailer_password(password)?;
|
||||
|
||||
let resolver = StorageResolver::new(&storage);
|
||||
let trailer = t!(Trailer::from_primitive(
|
||||
Primitive::Dictionary(trailer),
|
||||
&resolver,
|
||||
));
|
||||
Ok(File { storage, trailer })
|
||||
}
|
||||
pub fn new(storage: Storage<B, OC, SC, L>, trailer: Trailer) -> Self {
|
||||
File { storage, trailer }
|
||||
}
|
||||
pub fn resolver(&self) -> impl Resolve + '_ {
|
||||
StorageResolver::new(&self.storage)
|
||||
}
|
||||
|
||||
pub fn get_root(&self) -> &Catalog {
|
||||
&self.trailer.root
|
||||
}
|
||||
|
||||
pub fn pages(&self) -> impl Iterator<Item=Result<PageRc>> + '_ {
|
||||
(0 .. self.num_pages()).map(move |n| self.get_page(n))
|
||||
}
|
||||
pub fn num_pages(&self) -> u32 {
|
||||
self.trailer.root.pages.count
|
||||
}
|
||||
|
||||
pub fn get_page(&self, n: u32) -> Result<PageRc> {
|
||||
let resolver = StorageResolver::new(&self.storage);
|
||||
self.trailer.root.pages.page(&resolver, n)
|
||||
}
|
||||
|
||||
pub fn get_xref(&self) -> &XRefTable {
|
||||
&self.storage.refs
|
||||
}
|
||||
|
||||
pub fn update_catalog(&mut self, catalog: Catalog) -> Result<()> {
|
||||
self.trailer.root = self.create(catalog)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_options(&mut self, options: ParseOptions) {
|
||||
self.storage.options = options;
|
||||
}
|
||||
|
||||
pub fn scan(&self) -> impl Iterator<Item = Result<ScanItem>> + '_ {
|
||||
self.storage.scan()
|
||||
}
|
||||
|
||||
pub fn log(&self) -> &L {
|
||||
&self.storage.log
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, DataSize)]
|
||||
pub struct Trailer {
|
||||
#[pdf(key = "Size")]
|
||||
pub size: i32,
|
||||
|
||||
#[pdf(key = "Prev")]
|
||||
pub prev_trailer_pos: Option<i32>,
|
||||
|
||||
#[pdf(key = "Root")]
|
||||
pub root: RcRef<Catalog>,
|
||||
|
||||
#[pdf(key = "Encrypt")]
|
||||
pub encrypt_dict: Option<RcRef<CryptDict>>,
|
||||
|
||||
#[pdf(key = "Info", indirect)]
|
||||
pub info_dict: Option<RcRef<InfoDict>>,
|
||||
|
||||
#[pdf(key = "ID")]
|
||||
pub id: Vec<PdfString>,
|
||||
|
||||
#[pdf(other)]
|
||||
pub other: Dictionary,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_x_ref() {
|
||||
let file = FileOptions::cached().open("/").unwrap();
|
||||
let resolver = file.resolver();
|
||||
let xref_table = file.get_xref();
|
||||
let entries: Vec<Primitive> = xref_table.iter().enumerate()
|
||||
.map(|(i, x)| PlainRef {id: i as u64, gen: x as u64})
|
||||
.map(|plain_ref| resolver.resolve(plain_ref))
|
||||
.filter(|r| r.is_ok())
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
for (i, entry) in entries.iter().enumerate() {
|
||||
match entry {
|
||||
Primitive::Stream(stream) => println!("{}: Stream {}", i, stream.info),
|
||||
Primitive::Dictionary(_) => println!("{}: Dictionary {}", i, entry),
|
||||
Primitive::Array(_) => println!("{}: Array: {}", i, entry),
|
||||
_ => println!("{}: {}", i, entry)
|
||||
}
|
||||
}
|
||||
}
|
||||
739
src-pdfrs/pdf/src/font.rs
Normal file
739
src-pdfrs/pdf/src/font.rs
Normal file
@ -0,0 +1,739 @@
|
||||
use crate as pdf;
|
||||
use crate::object::*;
|
||||
use crate::primitive::*;
|
||||
use crate::error::*;
|
||||
use crate::encoding::Encoding;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use crate::parser::{Lexer, parse_with_lexer, ParseFlags};
|
||||
use std::convert::TryInto;
|
||||
use std::sync::Arc;
|
||||
use istring::SmallString;
|
||||
use datasize::DataSize;
|
||||
use itertools::Itertools;
|
||||
|
||||
#[allow(non_upper_case_globals, dead_code)]
|
||||
mod flags {
|
||||
pub const FixedPitch: u32 = 1 << 0;
|
||||
pub const Serif: u32 = 1 << 1;
|
||||
pub const Symbolic: u32 = 1 << 2;
|
||||
pub const Script: u32 = 1 << 3;
|
||||
pub const Nonsymbolic: u32 = 1 << 5;
|
||||
pub const Italic: u32 = 1 << 6;
|
||||
pub const AllCap: u32 = 1 << 16;
|
||||
pub const SmallCap: u32 = 1 << 17;
|
||||
pub const ForceBold: u32 = 1 << 18;
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Copy, Clone, DataSize, DeepClone)]
|
||||
pub enum FontType {
|
||||
Type0,
|
||||
Type1,
|
||||
MMType1,
|
||||
Type3,
|
||||
TrueType,
|
||||
CIDFontType0, //Type1
|
||||
CIDFontType2, // TrueType
|
||||
}
|
||||
|
||||
#[derive(Debug, DataSize, DeepClone)]
|
||||
pub struct Font {
|
||||
pub subtype: FontType,
|
||||
pub name: Option<Name>,
|
||||
pub data: FontData,
|
||||
|
||||
pub encoding: Option<Encoding>,
|
||||
|
||||
// FIXME: Should use RcRef<Stream>
|
||||
pub to_unicode: Option<RcRef<Stream<()>>>,
|
||||
|
||||
/// other keys not mapped in other places. May change over time without notice, and adding things probably will break things. So don't expect this to be part of the stable API
|
||||
pub _other: Dictionary
|
||||
}
|
||||
|
||||
#[derive(Debug, DataSize, DeepClone)]
|
||||
pub enum FontData {
|
||||
Type1(TFont),
|
||||
Type0(Type0Font),
|
||||
TrueType(TFont),
|
||||
CIDFontType0(CIDFont),
|
||||
CIDFontType2(CIDFont),
|
||||
Other(Dictionary),
|
||||
}
|
||||
|
||||
#[derive(Debug, DataSize, DeepClone)]
|
||||
pub enum CidToGidMap {
|
||||
Identity,
|
||||
Table(Vec<u16>)
|
||||
}
|
||||
impl Object for CidToGidMap {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Name(name) if name == "Identity" => {
|
||||
Ok(CidToGidMap::Identity)
|
||||
}
|
||||
p @ Primitive::Stream(_) | p @ Primitive::Reference(_) => {
|
||||
let stream: Stream<()> = Stream::from_primitive(p, resolve)?;
|
||||
let data = stream.data(resolve)?;
|
||||
Ok(CidToGidMap::Table(data.chunks_exact(2).map(|c| (c[0] as u16) << 8 | c[1] as u16).collect()))
|
||||
},
|
||||
p => Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "/Identity or Stream",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for CidToGidMap {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
match self {
|
||||
CidToGidMap::Identity => Ok(Name::from("Identity").into()),
|
||||
CidToGidMap::Table(ref table) => {
|
||||
let mut data = Vec::with_capacity(table.len() * 2);
|
||||
data.extend(table.iter().flat_map(|&v| <[u8; 2]>::into_iter(v.to_be_bytes())));
|
||||
Stream::new((), data).to_primitive(update)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for Font {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
let mut dict = p.resolve(resolve)?.into_dictionary()?;
|
||||
|
||||
let subtype = t!(FontType::from_primitive(dict.require("Font", "Subtype")?, resolve));
|
||||
|
||||
// BaseFont is required for all FontTypes except Type3
|
||||
dict.expect("Font", "Type", "Font", true)?;
|
||||
let base_font_primitive = dict.get("BaseFont");
|
||||
let base_font = match (base_font_primitive, subtype) {
|
||||
(Some(name), _) => Some(t!(t!(name.clone().resolve(resolve)).into_name(), name)),
|
||||
(None, FontType::Type3) => None,
|
||||
(_, _) => return Err(PdfError::MissingEntry {
|
||||
typ: "Font",
|
||||
field: "BaseFont".to_string()
|
||||
})
|
||||
};
|
||||
|
||||
let encoding = dict.remove("Encoding").map(|p| Object::from_primitive(p, resolve)).transpose()?;
|
||||
|
||||
let to_unicode = match dict.remove("ToUnicode") {
|
||||
Some(p) => Some(Object::from_primitive(p, resolve)?),
|
||||
None => None
|
||||
};
|
||||
let _other = dict.clone();
|
||||
let data = match subtype {
|
||||
FontType::Type0 => FontData::Type0(Type0Font::from_dict(dict, resolve)?),
|
||||
FontType::Type1 => FontData::Type1(TFont::from_dict(dict, resolve)?),
|
||||
FontType::TrueType => FontData::TrueType(TFont::from_dict(dict, resolve)?),
|
||||
FontType::CIDFontType0 => FontData::CIDFontType0(CIDFont::from_dict(dict, resolve)?),
|
||||
FontType::CIDFontType2 => FontData::CIDFontType2(CIDFont::from_dict(dict, resolve)?),
|
||||
_ => FontData::Other(dict)
|
||||
};
|
||||
|
||||
Ok(Font {
|
||||
subtype,
|
||||
name: base_font,
|
||||
data,
|
||||
encoding,
|
||||
to_unicode,
|
||||
_other
|
||||
})
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Font {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
let mut dict = match self.data {
|
||||
FontData::CIDFontType0(ref d) | FontData::CIDFontType2(ref d) => d.to_dict(update)?,
|
||||
FontData::TrueType(ref d) | FontData::Type1(ref d) => d.to_dict(update)?,
|
||||
FontData::Type0(ref d) => d.to_dict(update)?,
|
||||
FontData::Other(ref dict) => dict.clone(),
|
||||
};
|
||||
|
||||
if let Some(ref to_unicode) = self.to_unicode {
|
||||
dict.insert("ToUnicode", to_unicode.to_primitive(update)?);
|
||||
}
|
||||
if let Some(ref encoding) = self.encoding {
|
||||
dict.insert("Encoding", encoding.to_primitive(update)?);
|
||||
}
|
||||
if let Some(ref name) = self.name {
|
||||
dict.insert("BaseFont", name.to_primitive(update)?);
|
||||
}
|
||||
|
||||
let subtype = match self.data {
|
||||
FontData::Type0(_) => FontType::Type0,
|
||||
FontData::Type1(_) => FontType::Type1,
|
||||
FontData::TrueType(_) => FontType::TrueType,
|
||||
FontData::CIDFontType0(_) => FontType::CIDFontType0,
|
||||
FontData::CIDFontType2(_) => FontType::CIDFontType2,
|
||||
FontData::Other(_) => bail!("unimplemented")
|
||||
};
|
||||
dict.insert("Subtype", subtype.to_primitive(update)?);
|
||||
dict.insert("Type", Name::from("Font"));
|
||||
|
||||
Ok(Primitive::Dictionary(dict))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Widths {
|
||||
values: Vec<f32>,
|
||||
default: f32,
|
||||
first_char: usize
|
||||
}
|
||||
impl Widths {
|
||||
pub fn get(&self, cid: usize) -> f32 {
|
||||
if cid < self.first_char {
|
||||
self.default
|
||||
} else {
|
||||
self.values.get(cid - self.first_char).cloned().unwrap_or(self.default)
|
||||
}
|
||||
}
|
||||
fn new(default: f32) -> Widths {
|
||||
Widths {
|
||||
default,
|
||||
values: Vec::new(),
|
||||
first_char: 0
|
||||
}
|
||||
}
|
||||
fn ensure_cid(&mut self, cid: usize) {
|
||||
if let Some(offset) = cid.checked_sub(self.first_char) { // cid may be < first_char
|
||||
// reserve difference of offset to capacity
|
||||
// if enough capacity to cover offset, saturates to zero, and reserve will do nothing
|
||||
self.values.reserve(offset.saturating_sub(self.values.capacity()));
|
||||
}
|
||||
}
|
||||
#[allow(clippy::float_cmp)] // TODO
|
||||
fn set(&mut self, cid: usize, width: f32) {
|
||||
self._set(cid, width);
|
||||
debug_assert_eq!(self.get(cid), width);
|
||||
}
|
||||
fn _set(&mut self, cid: usize, width: f32) {
|
||||
use std::iter::repeat;
|
||||
|
||||
if self.values.is_empty() {
|
||||
self.first_char = cid;
|
||||
self.values.push(width);
|
||||
return;
|
||||
}
|
||||
|
||||
if cid == self.first_char + self.values.len() {
|
||||
self.values.push(width);
|
||||
return;
|
||||
}
|
||||
|
||||
if cid < self.first_char {
|
||||
self.values.splice(0 .. 0, repeat(self.default).take(self.first_char - cid));
|
||||
self.first_char = cid;
|
||||
self.values[0] = width;
|
||||
return;
|
||||
}
|
||||
|
||||
if cid > self.values.len() + self.first_char {
|
||||
self.ensure_cid(cid);
|
||||
self.values.extend(repeat(self.default).take(cid - self.first_char - self.values.len()));
|
||||
self.values.push(width);
|
||||
return;
|
||||
}
|
||||
|
||||
self.values[cid - self.first_char] = width;
|
||||
}
|
||||
}
|
||||
impl Font {
|
||||
pub fn embedded_data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
|
||||
match self.data {
|
||||
FontData::Type0(ref t) => t.descendant_fonts.get(0).and_then(|f| f.embedded_data(resolve)),
|
||||
FontData::CIDFontType0(ref c) | FontData::CIDFontType2(ref c) => c.font_descriptor.data(resolve),
|
||||
FontData::Type1(ref t) | FontData::TrueType(ref t) => t.font_descriptor.as_ref().and_then(|d| d.data(resolve)),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
pub fn is_cid(&self) -> bool {
|
||||
matches!(self.data, FontData::Type0(_) | FontData::CIDFontType0(_) | FontData::CIDFontType2(_))
|
||||
}
|
||||
pub fn cid_to_gid_map(&self) -> Option<&CidToGidMap> {
|
||||
match self.data {
|
||||
FontData::Type0(ref inner) => inner.descendant_fonts.get(0).and_then(|f| f.cid_to_gid_map()),
|
||||
FontData::CIDFontType0(ref f) | FontData::CIDFontType2(ref f) => f.cid_to_gid_map.as_ref(),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
pub fn encoding(&self) -> Option<&Encoding> {
|
||||
self.encoding.as_ref()
|
||||
}
|
||||
pub fn info(&self) -> Option<&TFont> {
|
||||
match self.data {
|
||||
FontData::Type1(ref info) => Some(info),
|
||||
FontData::TrueType(ref info) => Some(info),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
pub fn widths(&self, resolve: &impl Resolve) -> Result<Option<Widths>> {
|
||||
match self.data {
|
||||
FontData::Type0(ref t0) => t0.descendant_fonts[0].widths(resolve),
|
||||
FontData::Type1(ref info) | FontData::TrueType(ref info) => {
|
||||
match *info {
|
||||
TFont { first_char: Some(first), ref widths, .. } => Ok(Some(Widths {
|
||||
default: 0.0,
|
||||
first_char: first as usize,
|
||||
values: widths.as_ref().cloned().unwrap_or_default()
|
||||
})),
|
||||
_ => Ok(None)
|
||||
}
|
||||
},
|
||||
FontData::CIDFontType0(ref cid) | FontData::CIDFontType2(ref cid) => {
|
||||
let mut widths = Widths::new(cid.default_width);
|
||||
let mut iter = cid.widths.iter();
|
||||
while let Some(p) = iter.next() {
|
||||
let c1 = p.as_usize()?;
|
||||
match iter.next() {
|
||||
Some(Primitive::Array(array)) => {
|
||||
widths.ensure_cid(c1 + array.len() - 1);
|
||||
for (i, w) in array.iter().enumerate() {
|
||||
widths.set(c1 + i, w.as_number()?);
|
||||
}
|
||||
},
|
||||
Some(&Primitive::Reference(r)) => {
|
||||
match resolve.resolve(r)? {
|
||||
Primitive::Array(array) => {
|
||||
widths.ensure_cid(c1 + array.len() - 1);
|
||||
for (i, w) in array.iter().enumerate() {
|
||||
widths.set(c1 + i, w.as_number()?);
|
||||
}
|
||||
}
|
||||
p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
|
||||
}
|
||||
}
|
||||
Some(&Primitive::Integer(c2)) => {
|
||||
let w = try_opt!(iter.next()).as_number()?;
|
||||
for c in c1 ..= (c2 as usize) {
|
||||
widths.set(c, w);
|
||||
}
|
||||
},
|
||||
p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) })
|
||||
}
|
||||
}
|
||||
Ok(Some(widths))
|
||||
},
|
||||
_ => Ok(None)
|
||||
}
|
||||
}
|
||||
pub fn to_unicode(&self, resolve: &impl Resolve) -> Option<Result<ToUnicodeMap>> {
|
||||
self.to_unicode.as_ref().map(|s| (**s).data(resolve).and_then(|d| parse_cmap(&d)))
|
||||
}
|
||||
}
|
||||
#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
|
||||
pub struct TFont {
|
||||
#[pdf(key="BaseFont")]
|
||||
pub base_font: Option<Name>,
|
||||
|
||||
/// per spec required, but some files lack it.
|
||||
#[pdf(key="FirstChar")]
|
||||
pub first_char: Option<i32>,
|
||||
|
||||
/// same
|
||||
#[pdf(key="LastChar")]
|
||||
pub last_char: Option<i32>,
|
||||
|
||||
#[pdf(key="Widths")]
|
||||
pub widths: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="FontDescriptor")]
|
||||
pub font_descriptor: Option<FontDescriptor>
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
|
||||
pub struct Type0Font {
|
||||
#[pdf(key="DescendantFonts")]
|
||||
pub descendant_fonts: Vec<MaybeRef<Font>>,
|
||||
|
||||
#[pdf(key="ToUnicode")]
|
||||
pub to_unicode: Option<RcRef<Stream<()>>>,
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
|
||||
pub struct CIDFont {
|
||||
#[pdf(key="CIDSystemInfo")]
|
||||
pub system_info: Dictionary,
|
||||
|
||||
#[pdf(key="FontDescriptor")]
|
||||
pub font_descriptor: FontDescriptor,
|
||||
|
||||
#[pdf(key="DW", default="1000.")]
|
||||
pub default_width: f32,
|
||||
|
||||
#[pdf(key="W")]
|
||||
pub widths: Vec<Primitive>,
|
||||
|
||||
#[pdf(key="CIDToGIDMap")]
|
||||
pub cid_to_gid_map: Option<CidToGidMap>,
|
||||
|
||||
#[pdf(other)]
|
||||
pub _other: Dictionary
|
||||
}
|
||||
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)]
|
||||
pub struct FontDescriptor {
|
||||
#[pdf(key="FontName")]
|
||||
pub font_name: Name,
|
||||
|
||||
#[pdf(key="FontFamily")]
|
||||
pub font_family: Option<PdfString>,
|
||||
|
||||
#[pdf(key="FontStretch")]
|
||||
pub font_stretch: Option<FontStretch>,
|
||||
|
||||
#[pdf(key="FontWeight")]
|
||||
pub font_weight: Option<f32>,
|
||||
|
||||
#[pdf(key="Flags")]
|
||||
pub flags: u32,
|
||||
|
||||
#[pdf(key="FontBBox")]
|
||||
pub font_bbox: Rectangle,
|
||||
|
||||
#[pdf(key="ItalicAngle")]
|
||||
pub italic_angle: f32,
|
||||
|
||||
// required as per spec, but still missing in some cases
|
||||
#[pdf(key="Ascent")]
|
||||
pub ascent: Option<f32>,
|
||||
|
||||
#[pdf(key="Descent")]
|
||||
pub descent: Option<f32>,
|
||||
|
||||
#[pdf(key="Leading", default="0.")]
|
||||
pub leading: f32,
|
||||
|
||||
#[pdf(key="CapHeight")]
|
||||
pub cap_height: Option<f32>,
|
||||
|
||||
#[pdf(key="XHeight", default="0.")]
|
||||
pub xheight: f32,
|
||||
|
||||
#[pdf(key="StemV", default="0.")]
|
||||
pub stem_v: f32,
|
||||
|
||||
#[pdf(key="StemH", default="0.")]
|
||||
pub stem_h: f32,
|
||||
|
||||
#[pdf(key="AvgWidth", default="0.")]
|
||||
pub avg_width: f32,
|
||||
|
||||
#[pdf(key="MaxWidth", default="0.")]
|
||||
pub max_width: f32,
|
||||
|
||||
#[pdf(key="MissingWidth", default="0.")]
|
||||
pub missing_width: f32,
|
||||
|
||||
#[pdf(key="FontFile")]
|
||||
pub font_file: Option<RcRef<Stream<()>>>,
|
||||
|
||||
#[pdf(key="FontFile2")]
|
||||
pub font_file2: Option<RcRef<Stream<()>>>,
|
||||
|
||||
#[pdf(key="FontFile3")]
|
||||
pub font_file3: Option<RcRef<Stream<FontStream3>>>,
|
||||
|
||||
#[pdf(key="CharSet")]
|
||||
pub char_set: Option<PdfString>
|
||||
}
|
||||
impl FontDescriptor {
|
||||
pub fn data(&self, resolve: &impl Resolve) -> Option<Result<Arc<[u8]>>> {
|
||||
if let Some(ref s) = self.font_file {
|
||||
Some((**s).data(resolve))
|
||||
} else if let Some(ref s) = self.font_file2 {
|
||||
Some((**s).data(resolve))
|
||||
} else if let Some(ref s) = self.font_file3 {
|
||||
Some((**s).data(resolve))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
#[pdf(key="Subtype")]
|
||||
pub enum FontTypeExt {
|
||||
Type1C,
|
||||
CIDFontType0C,
|
||||
OpenType
|
||||
}
|
||||
#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct FontStream3 {
|
||||
#[pdf(key="Subtype")]
|
||||
pub subtype: FontTypeExt
|
||||
}
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, DataSize, DeepClone)]
|
||||
pub enum FontStretch {
|
||||
UltraCondensed,
|
||||
ExtraCondensed,
|
||||
Condensed,
|
||||
SemiCondensed,
|
||||
Normal,
|
||||
SemiExpanded,
|
||||
Expanded,
|
||||
ExtraExpanded,
|
||||
UltraExpanded
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToUnicodeMap {
|
||||
// todo: reduce allocations
|
||||
inner: HashMap<u16, SmallString>
|
||||
}
|
||||
impl ToUnicodeMap {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
/// Create a new ToUnicodeMap from key/value pairs.
|
||||
///
|
||||
/// subject to change
|
||||
pub fn create(iter: impl Iterator<Item=(u16, SmallString)>) -> Self {
|
||||
ToUnicodeMap { inner: iter.collect() }
|
||||
}
|
||||
pub fn get(&self, gid: u16) -> Option<&str> {
|
||||
self.inner.get(&gid).map(|s| s.as_str())
|
||||
}
|
||||
pub fn insert(&mut self, gid: u16, unicode: SmallString) {
|
||||
self.inner.insert(gid, unicode);
|
||||
}
|
||||
pub fn iter(&self) -> impl Iterator<Item=(u16, &str)> {
|
||||
self.inner.iter().map(|(&gid, unicode)| (gid, unicode.as_str()))
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.inner.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// helper function to decode UTF-16-BE data
|
||||
/// takes a slice of u8 and returns an iterator for char or an decoding error
|
||||
pub fn utf16be_to_char(
|
||||
data: &[u8],
|
||||
) -> impl Iterator<Item = std::result::Result<char, std::char::DecodeUtf16Error>> + '_ {
|
||||
char::decode_utf16(data.chunks_exact(2).map(|w| u16::from_be_bytes([w[0], w[1]])))
|
||||
}
|
||||
/// converts UTF16-BE to a string replacing illegal/unknown characters
|
||||
pub fn utf16be_to_string_lossy(data: &[u8]) -> String {
|
||||
utf16be_to_char(data)
|
||||
.map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER))
|
||||
.collect()
|
||||
}
|
||||
/// converts UTF16-BE to a string errors out in illegal/unknonw characters
|
||||
pub fn utf16be_to_string(data: &[u8]) -> pdf::error::Result<SmallString> {
|
||||
utf16be_to_char(data)
|
||||
.map(|r| r.map_err(|_| PdfError::Utf16Decode))
|
||||
.collect()
|
||||
}
|
||||
fn parse_cid(s: &PdfString) -> Result<u16> {
|
||||
let b = s.as_bytes();
|
||||
match b.len() {
|
||||
2 => Ok(u16::from_be_bytes(b.try_into().unwrap())),
|
||||
1 => Ok(b[0] as u16),
|
||||
_ => Err(PdfError::CidDecode),
|
||||
}
|
||||
}
|
||||
fn parse_cmap(data: &[u8]) -> Result<ToUnicodeMap> {
|
||||
let mut lexer = Lexer::new(data);
|
||||
let mut map = ToUnicodeMap::new();
|
||||
while let Ok(substr) = lexer.next() {
|
||||
match substr.as_slice() {
|
||||
b"beginbfchar" => loop {
|
||||
let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
|
||||
if a.is_err() {
|
||||
break;
|
||||
}
|
||||
let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
|
||||
match (a, b) {
|
||||
(Ok(Primitive::String(cid_data)), Ok(Primitive::String(unicode_data))) => {
|
||||
let cid = parse_cid(&cid_data)?;
|
||||
let bytes = unicode_data.as_bytes();
|
||||
match utf16be_to_string(bytes) {
|
||||
Ok(unicode) => map.insert(cid, unicode),
|
||||
Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
},
|
||||
b"beginbfrange" => loop {
|
||||
let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
|
||||
if a.is_err() {
|
||||
break;
|
||||
}
|
||||
let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING);
|
||||
let c = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING | ParseFlags::ARRAY);
|
||||
match (a, b, c) {
|
||||
(
|
||||
Ok(Primitive::String(cid_start_data)),
|
||||
Ok(Primitive::String(cid_end_data)),
|
||||
Ok(Primitive::String(unicode_data)),
|
||||
) if unicode_data.data.len() > 0 => {
|
||||
let cid_start = parse_cid(&cid_start_data)?;
|
||||
let cid_end = parse_cid(&cid_end_data)?;
|
||||
let mut unicode_data = unicode_data.into_bytes();
|
||||
|
||||
for cid in cid_start..=cid_end {
|
||||
match utf16be_to_string(&unicode_data) {
|
||||
Ok(unicode) => map.insert(cid, unicode),
|
||||
Err(_) => warn!("invalid unicode for cid {cid} {unicode_data:?}"),
|
||||
}
|
||||
let last = unicode_data.last_mut().unwrap();
|
||||
if *last < 255 {
|
||||
*last += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
Ok(Primitive::String(cid_start_data)),
|
||||
Ok(Primitive::String(cid_end_data)),
|
||||
Ok(Primitive::Array(unicode_data_arr)),
|
||||
) => {
|
||||
let cid_start = parse_cid(&cid_start_data)?;
|
||||
let cid_end = parse_cid(&cid_end_data)?;
|
||||
|
||||
for (cid, unicode_data) in (cid_start..=cid_end).zip(unicode_data_arr) {
|
||||
let bytes = unicode_data.as_string()?.as_bytes();
|
||||
match utf16be_to_string(bytes) {
|
||||
Ok(unicode) => map.insert(cid, unicode),
|
||||
Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
},
|
||||
b"endcmap" => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
fn write_cid(w: &mut String, cid: u16) {
|
||||
write!(w, "<{:04X}>", cid).unwrap();
|
||||
}
|
||||
fn write_unicode(out: &mut String, unicode: &str) {
|
||||
let mut buf = [0; 2];
|
||||
write!(out, "<").unwrap();
|
||||
for c in unicode.chars() {
|
||||
let slice = c.encode_utf16(&mut buf);
|
||||
for &word in slice.iter() {
|
||||
write!(out, "{:04X}", word).unwrap();
|
||||
}
|
||||
}
|
||||
write!(out, ">").unwrap();
|
||||
}
|
||||
pub fn write_cmap(map: &ToUnicodeMap) -> String {
|
||||
let mut buf = String::new();
|
||||
let mut list: Vec<(u16, &str)> = map.inner.iter().map(|(&cid, s)| (cid, s.as_str())).collect();
|
||||
list.sort();
|
||||
|
||||
|
||||
let mut remaining = &list[..];
|
||||
let blocks = std::iter::from_fn(move || {
|
||||
if remaining.len() == 0 {
|
||||
return None;
|
||||
}
|
||||
let first_cid = remaining[0].0;
|
||||
let seq_len = remaining.iter().enumerate().take_while(|&(i, &(cid, _))| cid == first_cid + i as u16).count();
|
||||
|
||||
let (block, tail) = remaining.split_at(seq_len);
|
||||
remaining = tail;
|
||||
Some(block)
|
||||
});
|
||||
|
||||
for (single, group) in &blocks.group_by(|b| b.len() == 1) {
|
||||
if single {
|
||||
writeln!(buf, "beginbfchar").unwrap();
|
||||
for block in group {
|
||||
for &(cid, uni) in block {
|
||||
write_cid(&mut buf, cid);
|
||||
write!(buf, " ").unwrap();
|
||||
write_unicode(&mut buf, uni);
|
||||
writeln!(buf).unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(buf, "endbfchar").unwrap();
|
||||
} else {
|
||||
writeln!(buf, "beginbfrange").unwrap();
|
||||
for block in group {
|
||||
write_cid(&mut buf, block[0].0);
|
||||
write!(buf, " ").unwrap();
|
||||
write_cid(&mut buf, block.last().unwrap().0);
|
||||
write!(buf, " [").unwrap();
|
||||
for (i, &(_cid, u)) in block.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(buf, ", ").unwrap();
|
||||
}
|
||||
write_unicode(&mut buf, u);
|
||||
}
|
||||
writeln!(buf, "]").unwrap();
|
||||
}
|
||||
writeln!(buf, "endbfrange").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::font::{utf16be_to_string, utf16be_to_char, utf16be_to_string_lossy};
|
||||
#[test]
|
||||
fn utf16be_to_string_quick() {
|
||||
let v = vec![0x20, 0x09];
|
||||
let s = utf16be_to_string(&v);
|
||||
assert_eq!(s.unwrap(), "\u{2009}");
|
||||
assert!(!v.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_char() {
|
||||
// 𝄞mus<invalid>ic<invalid>
|
||||
let v = [
|
||||
0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75, 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00,
|
||||
0x63, 0xD8, 0x34,
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
utf16be_to_char(&v)
|
||||
.map(|r| r.map_err(|e| e.unpaired_surrogate()))
|
||||
.collect::<Vec<_>>(),
|
||||
vec![
|
||||
Ok('𝄞'),
|
||||
Ok('m'),
|
||||
Ok('u'),
|
||||
Ok('s'),
|
||||
Err(0xDD1E),
|
||||
Ok('i'),
|
||||
Ok('c'),
|
||||
Err(0xD834)
|
||||
]
|
||||
);
|
||||
|
||||
let mut lossy = String::from("𝄞mus");
|
||||
lossy.push(std::char::REPLACEMENT_CHARACTER);
|
||||
lossy.push('i');
|
||||
lossy.push('c');
|
||||
lossy.push(std::char::REPLACEMENT_CHARACTER);
|
||||
|
||||
let r = utf16be_to_string(&v);
|
||||
if let Err(r) = r {
|
||||
// FIXME: compare against PdfError::Utf16Decode variant
|
||||
assert_eq!(r.to_string(), "UTF16 decode error");
|
||||
}
|
||||
assert_eq!(utf16be_to_string(&v[..8]).unwrap(), String::from("𝄞mu"));
|
||||
assert_eq!(utf16be_to_string_lossy(&v), lossy);
|
||||
}
|
||||
}
|
||||
28
src-pdfrs/pdf/src/lib.rs
Normal file
28
src-pdfrs/pdf/src/lib.rs
Normal file
@ -0,0 +1,28 @@
|
||||
#![allow(non_camel_case_types)] /* TODO temporary becaues of pdf_derive */
|
||||
#![allow(unused_doc_comments)] // /* TODO temporary because of err.rs */
|
||||
#![allow(clippy::len_zero, clippy::should_implement_trait, clippy::manual_map, clippy::from_over_into)]
|
||||
|
||||
#[macro_use] extern crate pdf_derive;
|
||||
#[macro_use] extern crate snafu;
|
||||
#[macro_use] extern crate log;
|
||||
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod object;
|
||||
pub mod xref;
|
||||
pub mod primitive;
|
||||
pub mod file;
|
||||
pub mod backend;
|
||||
pub mod content;
|
||||
pub mod parser;
|
||||
pub mod font;
|
||||
pub mod any;
|
||||
pub mod encoding;
|
||||
pub mod build;
|
||||
|
||||
// mod content;
|
||||
pub mod enc;
|
||||
pub mod crypt;
|
||||
|
||||
// pub use content::*;
|
||||
pub use crate::error::PdfError;
|
||||
49
src-pdfrs/pdf/src/macros.rs
Normal file
49
src-pdfrs/pdf/src/macros.rs
Normal file
@ -0,0 +1,49 @@
|
||||
macro_rules! write_entry {
|
||||
($out:expr, $key:tt, $val:expr) => {
|
||||
{
|
||||
$out.write(b" ")?;
|
||||
$key.serialize($out)?;
|
||||
$out.write(b" ")?;
|
||||
$val.serialize($out)?;
|
||||
$out.write(b"\n")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
macro_rules! write_entrys {
|
||||
($out:expr, $key:tt << $val:expr $(,)*) => {
|
||||
write_entry!($out, $key, $val);
|
||||
};
|
||||
($out:expr, $key:tt << $val:expr, $($rest:tt)*) => {
|
||||
{
|
||||
write_entry!($out, $key, $val);
|
||||
write_entrys!($out, $($rest)*);
|
||||
}
|
||||
};
|
||||
($out:expr, $key:tt ? << $val:expr $(,)*) => {
|
||||
match &$val {
|
||||
&Some(ref v) => write_entry!($out, $key, v),
|
||||
&None => {}
|
||||
}
|
||||
};
|
||||
($out:expr, $key:tt ? << $val:expr, $($rest:tt)*) => {
|
||||
{
|
||||
match &$val {
|
||||
&Some(ref v) => write_entry!($out, $key, v),
|
||||
&None => {}
|
||||
}
|
||||
write_entrys!($out, $($rest)*);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! write_dict {
|
||||
($out:expr, $($rest:tt)*) => {
|
||||
{
|
||||
write!($out, "<<\n")?;
|
||||
write_entrys!($out, $($rest)*);
|
||||
write!($out, ">>")?;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
180
src-pdfrs/pdf/src/object/color.rs
Normal file
180
src-pdfrs/pdf/src/object/color.rs
Normal file
@ -0,0 +1,180 @@
|
||||
use datasize::DataSize;
|
||||
use crate as pdf;
|
||||
use crate::object::*;
|
||||
use crate::error::*;
|
||||
|
||||
#[derive(Object, Debug, DataSize, DeepClone, ObjectWrite)]
|
||||
pub struct IccInfo {
|
||||
#[pdf(key="N")]
|
||||
pub components: u32,
|
||||
|
||||
#[pdf(key="Alternate")]
|
||||
pub alternate: Option<Box<ColorSpace>>,
|
||||
|
||||
#[pdf(key="Range")]
|
||||
pub range: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="Metadata")]
|
||||
pub metadata: Option<Stream<()>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DeepClone)]
|
||||
pub enum ColorSpace {
|
||||
DeviceGray,
|
||||
DeviceRGB,
|
||||
DeviceCMYK,
|
||||
DeviceN { names: Vec<Name>, alt: Box<ColorSpace>, tint: Function, attr: Option<Dictionary> },
|
||||
CalGray(Dictionary),
|
||||
CalRGB(Dictionary),
|
||||
CalCMYK(Dictionary),
|
||||
Indexed(Box<ColorSpace>, u8, Arc<[u8]>),
|
||||
Separation(Name, Box<ColorSpace>, Function),
|
||||
Icc(RcRef<Stream<IccInfo>>),
|
||||
Pattern,
|
||||
Named(Name),
|
||||
Other(Vec<Primitive>)
|
||||
}
|
||||
impl DataSize for ColorSpace {
|
||||
const IS_DYNAMIC: bool = true;
|
||||
const STATIC_HEAP_SIZE: usize = 0;
|
||||
|
||||
#[inline]
|
||||
fn estimate_heap_size(&self) -> usize {
|
||||
match *self {
|
||||
ColorSpace::DeviceGray | ColorSpace::DeviceRGB | ColorSpace::DeviceCMYK => 0,
|
||||
ColorSpace::DeviceN { ref names, ref alt, ref tint, ref attr } => {
|
||||
names.estimate_heap_size() +
|
||||
alt.estimate_heap_size() +
|
||||
tint.estimate_heap_size() +
|
||||
attr.estimate_heap_size()
|
||||
}
|
||||
ColorSpace::CalGray(ref d) | ColorSpace::CalRGB(ref d) | ColorSpace::CalCMYK(ref d) => {
|
||||
d.estimate_heap_size()
|
||||
}
|
||||
ColorSpace::Indexed(ref cs, _, ref data) => {
|
||||
cs.estimate_heap_size() + data.estimate_heap_size()
|
||||
}
|
||||
ColorSpace::Separation(ref name, ref cs, ref f) => {
|
||||
name.estimate_heap_size() + cs.estimate_heap_size() + f.estimate_heap_size()
|
||||
}
|
||||
ColorSpace::Icc(ref s) => s.estimate_heap_size(),
|
||||
ColorSpace::Pattern => 0,
|
||||
ColorSpace::Other(ref v) => v.estimate_heap_size(),
|
||||
ColorSpace::Named(ref n) => n.estimate_heap_size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_index(arr: &[Primitive], idx: usize) -> Result<&Primitive> {
|
||||
arr.get(idx).ok_or(PdfError::Bounds { index: idx, len: arr.len() })
|
||||
}
|
||||
|
||||
impl Object for ColorSpace {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<ColorSpace> {
|
||||
ColorSpace::from_primitive_depth(p, resolve, 5)
|
||||
}
|
||||
}
|
||||
impl ColorSpace {
|
||||
fn from_primitive_depth(p: Primitive, resolve: &impl Resolve, depth: usize) -> Result<ColorSpace> {
|
||||
let p = p.resolve(resolve)?;
|
||||
|
||||
if let Ok(name) = p.as_name() {
|
||||
let cs = match name {
|
||||
"DeviceGray" => ColorSpace::DeviceGray,
|
||||
"DeviceRGB" => ColorSpace::DeviceRGB,
|
||||
"DeviceCMYK" => ColorSpace::DeviceCMYK,
|
||||
"Pattern" => ColorSpace::Pattern,
|
||||
name => ColorSpace::Named(name.into()),
|
||||
};
|
||||
return Ok(cs);
|
||||
}
|
||||
let arr = t!(p.into_array());
|
||||
let typ_p = t!(get_index(&arr, 0)).clone().resolve(resolve)?;
|
||||
let typ = t!(typ_p.as_name());
|
||||
|
||||
if depth == 0 {
|
||||
bail!("ColorSpace base recursion");
|
||||
}
|
||||
match typ {
|
||||
"Indexed" => {
|
||||
let base = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 1)).clone(), resolve, depth-1)));
|
||||
let hival = t!(t!(get_index(&arr, 2)).as_u8());
|
||||
let lookup = match t!(get_index(&arr, 3)) {
|
||||
&Primitive::Reference(r) => resolve.resolve(r)?,
|
||||
p => p.clone()
|
||||
};
|
||||
let lookup = match lookup {
|
||||
Primitive::String(string) => {
|
||||
let data: Vec<u8> = string.into_bytes().into();
|
||||
data.into()
|
||||
}
|
||||
Primitive::Stream(stream) => {
|
||||
let s: Stream::<()> = Stream::from_stream(stream, resolve)?;
|
||||
t!(s.data(resolve))
|
||||
},
|
||||
p => return Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "String or Stream",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
};
|
||||
Ok(ColorSpace::Indexed(base, hival, lookup))
|
||||
}
|
||||
"Separation" => {
|
||||
let name = t!(t!(get_index(&arr, 1)).clone().into_name());
|
||||
let alternate = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 2)).clone(), resolve, depth-1)));
|
||||
let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve));
|
||||
Ok(ColorSpace::Separation(name, alternate, tint))
|
||||
}
|
||||
"ICCBased" => {
|
||||
let s = t!(RcRef::from_primitive(t!(get_index(&arr, 1)).clone(), resolve));
|
||||
Ok(ColorSpace::Icc(s))
|
||||
}
|
||||
"DeviceN" => {
|
||||
let names = t!(Object::from_primitive(t!(get_index(&arr, 1)).clone(), resolve));
|
||||
let alt = t!(Object::from_primitive(t!(get_index(&arr, 2)).clone(), resolve));
|
||||
let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve));
|
||||
let attr = arr.get(4).map(|p| Dictionary::from_primitive(p.clone(), resolve)).transpose()?;
|
||||
|
||||
Ok(ColorSpace::DeviceN { names, alt, tint, attr})
|
||||
}
|
||||
"CalGray" => {
|
||||
let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?;
|
||||
Ok(ColorSpace::CalGray(dict))
|
||||
}
|
||||
"CalRGB" => {
|
||||
let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?;
|
||||
Ok(ColorSpace::CalRGB(dict))
|
||||
}
|
||||
"CalCMYK" => {
|
||||
let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?;
|
||||
Ok(ColorSpace::CalCMYK(dict))
|
||||
}
|
||||
"Pattern" => {
|
||||
Ok(ColorSpace::Pattern)
|
||||
}
|
||||
_ => Ok(ColorSpace::Other(arr))
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for ColorSpace {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
match *self {
|
||||
ColorSpace::DeviceCMYK => Ok(Primitive::name("DeviceCMYK")),
|
||||
ColorSpace::DeviceRGB => Ok(Primitive::name("DeviceRGB")),
|
||||
ColorSpace::Indexed(ref base, hival, ref lookup) => {
|
||||
let base = base.to_primitive(update)?;
|
||||
let hival = Primitive::Integer(hival.into());
|
||||
let lookup = if lookup.len() < 100 {
|
||||
PdfString::new((**lookup).into()).into()
|
||||
} else {
|
||||
Stream::new((), lookup.clone()).to_primitive(update)?
|
||||
};
|
||||
Ok(Primitive::Array(vec![Primitive::name("Indexed"), base, hival, lookup]))
|
||||
}
|
||||
ref p => {
|
||||
dbg!(p);
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
477
src-pdfrs/pdf/src/object/function.rs
Normal file
477
src-pdfrs/pdf/src/object/function.rs
Normal file
@ -0,0 +1,477 @@
|
||||
use crate as pdf;
|
||||
use crate::object::*;
|
||||
use crate::error::*;
|
||||
use itertools::izip;
|
||||
use datasize::DataSize;
|
||||
|
||||
#[derive(Object, Debug, Clone, ObjectWrite)]
|
||||
struct RawFunction {
|
||||
#[pdf(key="FunctionType")]
|
||||
function_type: u32,
|
||||
|
||||
#[pdf(key="Domain")]
|
||||
domain: Vec<f32>,
|
||||
|
||||
#[pdf(key="Range")]
|
||||
range: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="Size")]
|
||||
size: Option<Vec<u32>>,
|
||||
|
||||
#[pdf(key="BitsPerSample")]
|
||||
_bits_per_sample: Option<u32>,
|
||||
|
||||
#[pdf(key="Order", default="1")]
|
||||
order: u32,
|
||||
|
||||
#[pdf(key="Encode")]
|
||||
encode: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="Decode")]
|
||||
decode: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(other)]
|
||||
other: Dictionary
|
||||
}
|
||||
|
||||
#[derive(Object, Debug, Clone)]
|
||||
struct Function2 {
|
||||
#[pdf(key="C0")]
|
||||
c0: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="C1")]
|
||||
c1: Option<Vec<f32>>,
|
||||
|
||||
#[pdf(key="N")]
|
||||
exponent: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
pub enum Function {
|
||||
Sampled(SampledFunction),
|
||||
Interpolated(Vec<InterpolatedFunctionDim>),
|
||||
Stiching,
|
||||
Calculator,
|
||||
PostScript { func: PsFunc, domain: Vec<f32>, range: Vec<f32> },
|
||||
}
|
||||
impl Function {
|
||||
pub fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> {
|
||||
match *self {
|
||||
Function::Sampled(ref func) => {
|
||||
func.apply(x, out)
|
||||
}
|
||||
Function::Interpolated(ref parts) => {
|
||||
if parts.len() != out.len() {
|
||||
bail!("incorrect output length: expected {}, found {}.", parts.len(), out.len())
|
||||
}
|
||||
for (f, y) in parts.iter().zip(out) {
|
||||
*y = f.apply(x[0]);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Function::PostScript { ref func, .. } => func.exec(x, out),
|
||||
_ => bail!("unimplemted function {:?}", self)
|
||||
}
|
||||
}
|
||||
pub fn input_dim(&self) -> usize {
|
||||
match *self {
|
||||
Function::PostScript { ref domain, .. } => domain.len() / 2,
|
||||
Function::Sampled(ref f) => f.input.len(),
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
pub fn output_dim(&self) -> usize {
|
||||
match *self {
|
||||
Function::PostScript { ref range, .. } => range.len() / 2,
|
||||
Function::Sampled(ref f) => f.output.len(),
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl FromDict for Function {
|
||||
fn from_dict(dict: Dictionary, resolve: &impl Resolve) -> Result<Self> {
|
||||
use std::f32::INFINITY;
|
||||
let raw = RawFunction::from_dict(dict, resolve)?;
|
||||
match raw.function_type {
|
||||
2 => {
|
||||
let f2 = Function2::from_dict(raw.other, resolve)?;
|
||||
|
||||
let n_dim = match (raw.range.as_ref(), f2.c0.as_ref(), f2.c1.as_ref()) {
|
||||
(Some(range), _, _) => range.len() / 2,
|
||||
(_, Some(c0), _) => c0.len(),
|
||||
(_, _, Some(c1)) => c1.len(),
|
||||
_ => bail!("unknown dimensions")
|
||||
};
|
||||
let mut parts = Vec::with_capacity(n_dim);
|
||||
let input_range = (raw.domain[0], raw.domain[1]);
|
||||
for dim in 0 .. n_dim {
|
||||
let output_range = (
|
||||
raw.range.as_ref().and_then(|r| r.get(2*dim).cloned()).unwrap_or(-INFINITY),
|
||||
raw.range.as_ref().and_then(|r| r.get(2*dim+1).cloned()).unwrap_or(INFINITY)
|
||||
);
|
||||
let c0 = f2.c0.as_ref().and_then(|c0| c0.get(dim).cloned()).unwrap_or(0.0);
|
||||
let c1 = f2.c1.as_ref().and_then(|c1| c1.get(dim).cloned()).unwrap_or(1.0);
|
||||
let exponent = f2.exponent;
|
||||
parts.push(InterpolatedFunctionDim {
|
||||
input_range, output_range, c0, c1, exponent
|
||||
});
|
||||
}
|
||||
Ok(Function::Interpolated(parts))
|
||||
},
|
||||
i => {
|
||||
dbg!(raw);
|
||||
bail!("unsupported function type {}", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Object for Function {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Dictionary(dict) => Self::from_dict(dict, resolve),
|
||||
Primitive::Stream(s) => {
|
||||
let stream = Stream::<RawFunction>::from_stream(s, resolve)?;
|
||||
let data = stream.data(resolve)?;
|
||||
match stream.info.function_type {
|
||||
4 => {
|
||||
let s = std::str::from_utf8(&data)?;
|
||||
let func = PsFunc::parse(s)?;
|
||||
let info = stream.info.info;
|
||||
Ok(Function::PostScript { func, domain: info.domain, range: info.range.unwrap() })
|
||||
},
|
||||
0 => {
|
||||
let info = stream.info.info;
|
||||
let order = match info.order {
|
||||
1 => Interpolation::Linear,
|
||||
3 => Interpolation::Cubic,
|
||||
n => bail!("Invalid interpolation order {}", n),
|
||||
};
|
||||
|
||||
let size = try_opt!(info.size);
|
||||
let range = try_opt!(info.range);
|
||||
let encode = info.encode.unwrap_or_else(|| size.iter().flat_map(|&n| [0.0, (n-1) as f32]).collect());
|
||||
let decode = info.decode.unwrap_or_else(|| range.clone());
|
||||
|
||||
Ok(Function::Sampled(SampledFunction {
|
||||
input: izip!(info.domain.chunks_exact(2), encode.chunks_exact(2), size.iter()).map(|(c, e, &s)| {
|
||||
SampledFunctionInput {
|
||||
domain: (c[0], c[1]),
|
||||
encode_offset: e[0],
|
||||
encode_scale: e[1],
|
||||
size: s as usize,
|
||||
}
|
||||
}).collect(),
|
||||
output: decode.chunks_exact(2).map(|c| SampledFunctionOutput {
|
||||
offset: c[0],
|
||||
scale: (c[1] - c[0]) / 255.,
|
||||
}).collect(),
|
||||
data,
|
||||
order,
|
||||
range,
|
||||
}))
|
||||
}
|
||||
ref p => bail!("found a function stream with type {:?}", p)
|
||||
}
|
||||
},
|
||||
Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve),
|
||||
_ => bail!("double indirection")
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Function {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
unimplemented!()
|
||||
/*
|
||||
let dict = match self {
|
||||
Function::Interpolated(parts) => {
|
||||
let first: &InterpolatedFunctionDim = try_opt!(parts.get(0));
|
||||
let f2 = Function2 {
|
||||
c0: parts.iter().map(|p| p.c0).collect(),
|
||||
c1: parts.iter().map(|p| p.c0).collect(),
|
||||
exponent: first.exponent
|
||||
};
|
||||
let f = RawFunction {
|
||||
function_type: 2,
|
||||
domain: vec![first.input_range.0, first.input_range.1],
|
||||
range: parts.iter().flat_map(|p| [p.output_range.0, p.output_range.1]).collect(),
|
||||
decode: None,
|
||||
encode: None,
|
||||
order
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
impl DeepClone for Function {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
struct SampledFunctionInput {
|
||||
domain: (f32, f32),
|
||||
encode_offset: f32,
|
||||
encode_scale: f32,
|
||||
size: usize,
|
||||
}
|
||||
impl SampledFunctionInput {
|
||||
fn map(&self, x: f32) -> (usize, usize, f32) {
|
||||
let x = x.clamp(self.domain.0, self.domain.1);
|
||||
let y = x.mul_add(self.encode_scale, self.encode_offset);
|
||||
(y.floor() as usize, self.size, y.fract())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
struct SampledFunctionOutput {
|
||||
offset: f32,
|
||||
scale: f32
|
||||
}
|
||||
impl SampledFunctionOutput {
|
||||
fn map(&self, x: f32) -> f32 {
|
||||
x.mul_add(self.scale, self.offset)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
enum Interpolation {
|
||||
Linear,
|
||||
#[allow(dead_code)] // TODO
|
||||
Cubic,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
pub struct SampledFunction {
|
||||
input: Vec<SampledFunctionInput>,
|
||||
output: Vec<SampledFunctionOutput>,
|
||||
data: Arc<[u8]>,
|
||||
order: Interpolation,
|
||||
range: Vec<f32>,
|
||||
}
|
||||
impl SampledFunction {
|
||||
fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> {
|
||||
if x.len() != self.input.len() {
|
||||
bail!("input dimension mismatch {} != {}", x.len(), self.input.len());
|
||||
}
|
||||
let n_out = out.len();
|
||||
if out.len() * 2 != self.range.len() {
|
||||
bail!("output dimension mismatch 2 * {} != {}", out.len(), self.range.len())
|
||||
}
|
||||
match x.len() {
|
||||
1 => {
|
||||
match self.order {
|
||||
Interpolation::Linear => {
|
||||
let (i, _, s) = self.input[0].map(x[0]);
|
||||
let idx = i * n_out;
|
||||
|
||||
for (o, &a) in out.iter_mut().zip(&self.data[idx..]) {
|
||||
*o = a as f32 * (1. - s);
|
||||
}
|
||||
for (o, &b) in out.iter_mut().zip(&self.data[idx + n_out..]) {
|
||||
*o += b as f32 * s;
|
||||
}
|
||||
}
|
||||
_ => unimplemented!()
|
||||
}
|
||||
}
|
||||
2 => match self.order {
|
||||
Interpolation::Linear => {
|
||||
let (i0, s0, f0) = self.input[0].map(x[0]);
|
||||
let (i1, _, f1) = self.input[1].map(x[1]);
|
||||
let (j0, j1) = (i0+1, i1+1);
|
||||
let (g0, g1) = (1. - f0, 1. - f1);
|
||||
|
||||
out.fill(0.0);
|
||||
let mut add = |i0, i1, f| {
|
||||
let idx = (i0 + s0 * i1) * n_out;
|
||||
|
||||
if let Some(part) = self.data.get(idx .. idx+n_out) {
|
||||
for (o, &b) in out.iter_mut().zip(part) {
|
||||
*o += f * b as f32;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
add(i0, i1, g0 * g1);
|
||||
add(j0, i1, f0 * g1);
|
||||
add(i0, j1, g0 * f1);
|
||||
add(j0, j1, f0 * f1);
|
||||
}
|
||||
_ => unimplemented!()
|
||||
}
|
||||
3 => match self.order {
|
||||
Interpolation::Linear => {
|
||||
let (i0, s0, f0) = self.input[0].map(x[0]);
|
||||
let (i1, s1, f1) = self.input[1].map(x[1]);
|
||||
let (i2, _, f2) = self.input[2].map(x[2]);
|
||||
let (j0, j1, j2) = (i0+1, i1+1, i2+1);
|
||||
let (g0, g1, g2) = (1. - f0, 1. - f1, 1. - f2);
|
||||
|
||||
out.fill(0.0);
|
||||
let mut add = |i0, i1, i2, f| {
|
||||
let idx = (i0 + s0 * (i1 + s1 * i2)) * n_out;
|
||||
|
||||
if let Some(part) = self.data.get(idx .. idx+n_out) {
|
||||
for (o, &b) in out.iter_mut().zip(part) {
|
||||
*o += f * b as f32;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
add(i0, i1, i2, g0 * g1 * g2);
|
||||
add(j0, i1, i2, f0 * g1 * g2);
|
||||
add(i0, j1, i2, g0 * f1 * g2);
|
||||
add(j0, j1, i2, f0 * f1 * g2);
|
||||
|
||||
add(i0, i1, j2, g0 * g1 * f2);
|
||||
add(j0, i1, j2, f0 * g1 * f2);
|
||||
add(i0, j1, j2, g0 * f1 * f2);
|
||||
add(j0, j1, j2, f0 * f1 * f2);
|
||||
}
|
||||
_ => unimplemented!()
|
||||
}
|
||||
n => bail!("Order {}", n)
|
||||
}
|
||||
for (o, y) in self.output.iter().zip(out.iter_mut()) {
|
||||
*y = o.map(*y);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
pub struct InterpolatedFunctionDim {
|
||||
pub input_range: (f32, f32),
|
||||
pub output_range: (f32, f32),
|
||||
pub c0: f32,
|
||||
pub c1: f32,
|
||||
pub exponent: f32,
|
||||
}
|
||||
impl InterpolatedFunctionDim {
|
||||
pub fn apply(&self, x: f32) -> f32 {
|
||||
let y = self.c0 + x.powf(self.exponent) * (self.c1 - self.c0);
|
||||
let (y0, y1) = self.output_range;
|
||||
y.min(y1).max(y0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PostScriptError {
|
||||
StackUnderflow,
|
||||
IncorrectStackSize
|
||||
}
|
||||
#[derive(Debug, Clone, DataSize)]
|
||||
pub struct PsFunc {
|
||||
pub ops: Vec<PsOp>
|
||||
}
|
||||
|
||||
macro_rules! op {
|
||||
($stack:ident; $($v:ident),* => $($e:expr),*) => ( {
|
||||
$(let $v = $stack.pop().ok_or(PostScriptError::StackUnderflow)?;)*
|
||||
$($stack.push($e);)*
|
||||
} )
|
||||
}
|
||||
|
||||
impl PsFunc {
|
||||
fn exec_inner(&self, stack: &mut Vec<f32>) -> Result<(), PostScriptError> {
|
||||
for &op in &self.ops {
|
||||
match op {
|
||||
PsOp::Int(i) => stack.push(i as f32),
|
||||
PsOp::Value(v) => stack.push(v),
|
||||
PsOp::Dup => op!(stack; v => v, v),
|
||||
PsOp::Exch => op!(stack; b, a => b, a),
|
||||
PsOp::Add => op!(stack; b, a => a + b),
|
||||
PsOp::Sub => op!(stack; b, a => a - b),
|
||||
PsOp::Mul => op!(stack; b, a => a * b),
|
||||
PsOp::Abs => op!(stack; a => a.abs()),
|
||||
PsOp::Roll => {
|
||||
let j = stack.pop().ok_or(PostScriptError::StackUnderflow)? as isize;
|
||||
let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize;
|
||||
let start = stack.len() - n;
|
||||
let slice = &mut stack[start..];
|
||||
if j > 0 {
|
||||
slice.rotate_right(j as usize);
|
||||
} else {
|
||||
slice.rotate_left(-j as usize);
|
||||
}
|
||||
}
|
||||
PsOp::Index => {
|
||||
let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize;
|
||||
if n >= stack.len() { return Err(PostScriptError::StackUnderflow); }
|
||||
let val = stack[stack.len() - n - 1];
|
||||
stack.push(val);
|
||||
}
|
||||
PsOp::Cvr => {}
|
||||
PsOp::Pop => {
|
||||
stack.pop().ok_or(PostScriptError::StackUnderflow)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
pub fn exec(&self, input: &[f32], output: &mut [f32]) -> Result<()> {
|
||||
let mut stack = Vec::with_capacity(10);
|
||||
stack.extend_from_slice(input);
|
||||
match self.exec_inner(&mut stack) {
|
||||
Ok(()) => {},
|
||||
Err(_) => return Err(PdfError::PostScriptExec)
|
||||
}
|
||||
if output.len() != stack.len() {
|
||||
bail!("incorrect output length: expected {}, found {}.", stack.len(), output.len())
|
||||
}
|
||||
output.copy_from_slice(&stack);
|
||||
Ok(())
|
||||
}
|
||||
pub fn parse(s: &str) -> Result<Self, PdfError> {
|
||||
let start = s.find('{').ok_or(PdfError::PostScriptParse)?;
|
||||
let end = s.rfind('}').ok_or(PdfError::PostScriptParse)?;
|
||||
|
||||
let ops: Result<Vec<_>, _> = s[start + 1 .. end].split_ascii_whitespace().map(PsOp::parse).collect();
|
||||
Ok(PsFunc { ops: ops? })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, DataSize)]
|
||||
pub enum PsOp {
|
||||
Int(i32),
|
||||
Value(f32),
|
||||
Add,
|
||||
Sub,
|
||||
Abs,
|
||||
Mul,
|
||||
Dup,
|
||||
Exch,
|
||||
Roll,
|
||||
Index,
|
||||
Cvr,
|
||||
Pop,
|
||||
}
|
||||
impl PsOp {
|
||||
pub fn parse(s: &str) -> Result<Self> {
|
||||
if let Ok(i) = s.parse::<i32>() {
|
||||
Ok(PsOp::Int(i))
|
||||
} else if let Ok(f) = s.parse::<f32>() {
|
||||
Ok(PsOp::Value(f))
|
||||
} else {
|
||||
Ok(match s {
|
||||
"add" => PsOp::Add,
|
||||
"sub" => PsOp::Sub,
|
||||
"abs" => PsOp::Abs,
|
||||
"mul" => PsOp::Mul,
|
||||
"dup" => PsOp::Dup,
|
||||
"exch" => PsOp::Exch,
|
||||
"roll" => PsOp::Roll,
|
||||
"index" => PsOp::Index,
|
||||
"cvr" => PsOp::Cvr,
|
||||
"pop" => PsOp::Pop,
|
||||
_ => {
|
||||
bail!("unimplemented op {}", s);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
854
src-pdfrs/pdf/src/object/mod.rs
Normal file
854
src-pdfrs/pdf/src/object/mod.rs
Normal file
@ -0,0 +1,854 @@
|
||||
//! `Object` trait, along with some implementations. References.
|
||||
//!
|
||||
//! Some of the structs are incomplete (missing fields that are in the PDF references).
|
||||
|
||||
mod types;
|
||||
mod stream;
|
||||
mod color;
|
||||
mod function;
|
||||
|
||||
pub use self::types::*;
|
||||
pub use self::stream::*;
|
||||
pub use self::color::*;
|
||||
pub use self::function::*;
|
||||
pub use crate::file::PromisedRef;
|
||||
use crate::parser::ParseFlags;
|
||||
|
||||
use crate::primitive::*;
|
||||
use crate::error::*;
|
||||
use crate::enc::*;
|
||||
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::ops::{Deref, Range};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::convert::TryInto;
|
||||
use datasize::DataSize;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
pub type ObjNr = u64;
|
||||
pub type GenNr = u64;
|
||||
|
||||
pub struct ParseOptions {
|
||||
pub allow_error_in_option: bool,
|
||||
pub allow_xref_error: bool,
|
||||
pub allow_invalid_ops: bool,
|
||||
pub allow_missing_endobj: bool,
|
||||
}
|
||||
impl ParseOptions {
|
||||
pub const fn tolerant() -> Self {
|
||||
ParseOptions {
|
||||
allow_error_in_option: true,
|
||||
allow_xref_error: true,
|
||||
allow_invalid_ops: true,
|
||||
allow_missing_endobj: true,
|
||||
}
|
||||
}
|
||||
pub const fn strict() -> Self {
|
||||
ParseOptions {
|
||||
allow_error_in_option: false,
|
||||
allow_xref_error: false,
|
||||
allow_invalid_ops: true,
|
||||
allow_missing_endobj: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Resolve: {
|
||||
fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive>;
|
||||
fn resolve(&self, r: PlainRef) -> Result<Primitive> {
|
||||
self.resolve_flags(r, ParseFlags::ANY, 16)
|
||||
}
|
||||
fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>>;
|
||||
fn options(&self) -> &ParseOptions;
|
||||
fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>>;
|
||||
fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>>;
|
||||
}
|
||||
|
||||
pub struct NoResolve;
|
||||
impl Resolve for NoResolve {
|
||||
fn resolve_flags(&self, _: PlainRef, _: ParseFlags, _: usize) -> Result<Primitive> {
|
||||
Err(PdfError::Reference)
|
||||
}
|
||||
fn get<T: Object+DataSize>(&self, _r: Ref<T>) -> Result<RcRef<T>> {
|
||||
Err(PdfError::Reference)
|
||||
}
|
||||
fn options(&self) -> &ParseOptions {
|
||||
static STRICT: ParseOptions = ParseOptions::strict();
|
||||
&STRICT
|
||||
}
|
||||
fn get_data_or_decode(&self, _: PlainRef, _: Range<usize>, _: &[StreamFilter]) -> Result<Arc<[u8]>> {
|
||||
Err(PdfError::Reference)
|
||||
}
|
||||
fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
|
||||
Err(PdfError::Reference)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// A PDF Object
|
||||
pub trait Object: Sized + Sync + Send + 'static {
|
||||
/// Convert primitive to Self
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self>;
|
||||
}
|
||||
|
||||
pub trait Cloner: Updater + Resolve {
|
||||
fn clone_plainref(&mut self, old: PlainRef) -> Result<PlainRef>;
|
||||
fn clone_ref<T: DeepClone + Object + DataSize + ObjectWrite>(&mut self, old: Ref<T>) -> Result<Ref<T>>;
|
||||
fn clone_rcref<T: DeepClone + ObjectWrite + DataSize>(&mut self, old: &RcRef<T>) -> Result<RcRef<T>>;
|
||||
fn clone_shared<T: DeepClone>(&mut self, old: &Shared<T>) -> Result<Shared<T>>;
|
||||
}
|
||||
|
||||
pub trait DeepClone: Sized + Sync + Send + 'static {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self>;
|
||||
}
|
||||
|
||||
pub trait Updater {
|
||||
fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>>;
|
||||
fn update<T: ObjectWrite>(&mut self, old: PlainRef, obj: T) -> Result<RcRef<T>>;
|
||||
fn promise<T: Object>(&mut self) -> PromisedRef<T>;
|
||||
fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>>;
|
||||
}
|
||||
|
||||
pub struct NoUpdate;
|
||||
impl Updater for NoUpdate {
|
||||
fn create<T: ObjectWrite>(&mut self, _obj: T) -> Result<RcRef<T>> { panic!() }
|
||||
fn update<T: ObjectWrite>(&mut self, _old: PlainRef, _obj: T) -> Result<RcRef<T>> { panic!() }
|
||||
fn promise<T: Object>(&mut self) -> PromisedRef<T> { panic!() }
|
||||
fn fulfill<T: ObjectWrite>(&mut self, _promise: PromisedRef<T>, _obj: T) -> Result<RcRef<T>> { panic!() }
|
||||
}
|
||||
|
||||
pub trait ObjectWrite {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive>;
|
||||
}
|
||||
|
||||
pub trait FromDict: Sized {
|
||||
fn from_dict(dict: Dictionary, resolve: &impl Resolve) -> Result<Self>;
|
||||
}
|
||||
pub trait ToDict: ObjectWrite {
|
||||
fn to_dict(&self, update: &mut impl Updater) -> Result<Dictionary>;
|
||||
}
|
||||
|
||||
pub trait SubType<T> {}
|
||||
|
||||
pub trait Trace {
|
||||
fn trace(&self, _cb: &mut impl FnMut(PlainRef)) {}
|
||||
}
|
||||
|
||||
///////
|
||||
// Refs
|
||||
///////
|
||||
|
||||
// TODO move to primitive.rs
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, DataSize)]
|
||||
pub struct PlainRef {
|
||||
pub id: ObjNr,
|
||||
pub gen: GenNr,
|
||||
}
|
||||
impl Object for PlainRef {
|
||||
fn from_primitive(p: Primitive, _: &impl Resolve) -> Result<Self> {
|
||||
p.into_reference()
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for PlainRef {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Reference(*self))
|
||||
}
|
||||
}
|
||||
impl DeepClone for PlainRef {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
cloner.clone_plainref(*self)
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: Copy & Clone implemented manually ( https://github.com/rust-lang/rust/issues/26925 )
|
||||
|
||||
#[derive(DataSize)]
|
||||
pub struct Ref<T> {
|
||||
inner: PlainRef,
|
||||
_marker: PhantomData<T>
|
||||
}
|
||||
impl<T> Clone for Ref<T> {
|
||||
fn clone(&self) -> Ref<T> {
|
||||
*self
|
||||
}
|
||||
}
|
||||
impl<T> Copy for Ref<T> {}
|
||||
|
||||
impl<T> Ref<T> {
|
||||
pub fn new(inner: PlainRef) -> Ref<T> {
|
||||
Ref {
|
||||
inner,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn from_id(id: ObjNr) -> Ref<T> {
|
||||
Ref {
|
||||
inner: PlainRef {id, gen: 0},
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn get_inner(&self) -> PlainRef {
|
||||
self.inner
|
||||
}
|
||||
pub fn upcast<U>(self) -> Ref<U> where T: SubType<U> {
|
||||
Ref::new(self.inner)
|
||||
}
|
||||
}
|
||||
impl<T: Object> Object for Ref<T> {
|
||||
fn from_primitive(p: Primitive, _: &impl Resolve) -> Result<Self> {
|
||||
Ok(Ref::new(p.into_reference()?))
|
||||
}
|
||||
}
|
||||
impl<T> ObjectWrite for Ref<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
self.inner.to_primitive(update)
|
||||
}
|
||||
}
|
||||
impl<T: DeepClone+Object+DataSize+ObjectWrite> DeepClone for Ref<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
cloner.clone_ref(*self)
|
||||
}
|
||||
}
|
||||
impl<T> Trace for Ref<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
cb(self.inner);
|
||||
}
|
||||
}
|
||||
impl<T> fmt::Debug for Ref<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Ref({})", self.inner.id)
|
||||
}
|
||||
}
|
||||
impl<T> Hash for Ref<T> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.inner.hash(state)
|
||||
}
|
||||
}
|
||||
impl<T> PartialEq for Ref<T> {
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
self.inner.eq(&rhs.inner)
|
||||
}
|
||||
}
|
||||
impl<T> Eq for Ref<T> {}
|
||||
|
||||
pub type Shared<T> = Arc<T>;
|
||||
|
||||
|
||||
#[derive(Debug, DataSize)]
|
||||
pub struct RcRef<T> {
|
||||
inner: PlainRef,
|
||||
data: Shared<T>
|
||||
}
|
||||
impl<T> From<RcRef<T>> for Primitive {
|
||||
fn from(value: RcRef<T>) -> Self {
|
||||
Primitive::Reference(value.inner)
|
||||
}
|
||||
}
|
||||
impl<T> From<RcRef<T>> for Ref<T> {
|
||||
fn from(value: RcRef<T>) -> Self {
|
||||
value.get_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> RcRef<T> {
|
||||
pub fn new(inner: PlainRef, data: Shared<T>) -> RcRef<T> {
|
||||
RcRef { inner, data }
|
||||
}
|
||||
pub fn get_ref(&self) -> Ref<T> {
|
||||
Ref::new(self.inner)
|
||||
}
|
||||
pub fn data(&self) -> &Shared<T> {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
impl<T: Object + std::fmt::Debug + DataSize> Object for RcRef<T> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(r) => resolve.get(Ref::new(r)),
|
||||
p => Err(PdfError::UnexpectedPrimitive {expected: "Reference", found: p.get_debug_name()})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> ObjectWrite for RcRef<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
self.inner.to_primitive(update)
|
||||
}
|
||||
}
|
||||
impl<T: DeepClone + std::fmt::Debug + DataSize + Object + ObjectWrite> DeepClone for RcRef<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
cloner.clone_rcref(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for RcRef<T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &T {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
impl<T> Clone for RcRef<T> {
|
||||
fn clone(&self) -> RcRef<T> {
|
||||
RcRef {
|
||||
inner: self.inner,
|
||||
data: self.data.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> Trace for RcRef<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
cb(self.inner);
|
||||
}
|
||||
}
|
||||
impl<'a, T> From<&'a RcRef<T>> for Ref<T> {
|
||||
fn from(r: &'a RcRef<T>) -> Ref<T> {
|
||||
Ref::new(r.inner)
|
||||
}
|
||||
}
|
||||
impl<T> Hash for RcRef<T> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
std::ptr::hash(&**self, state)
|
||||
}
|
||||
}
|
||||
impl<T> PartialEq for RcRef<T> {
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
std::ptr::eq(&**self, &**rhs)
|
||||
}
|
||||
}
|
||||
impl<T> Eq for RcRef<T> {}
|
||||
|
||||
#[derive(Debug, DataSize)]
|
||||
pub enum MaybeRef<T> {
|
||||
Direct(Shared<T>),
|
||||
Indirect(RcRef<T>),
|
||||
}
|
||||
impl<T> MaybeRef<T> {
|
||||
pub fn as_ref(&self) -> Option<Ref<T>> {
|
||||
match *self {
|
||||
MaybeRef::Indirect(ref r) => Some(r.get_ref()),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
pub fn data(&self) -> &Shared<T> {
|
||||
match *self {
|
||||
MaybeRef::Direct(ref t) => t,
|
||||
MaybeRef::Indirect(ref r) => &r.data
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: Object+DataSize> Object for MaybeRef<T> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
Ok(match p {
|
||||
Primitive::Reference(r) => MaybeRef::Indirect(resolve.get(Ref::new(r))?),
|
||||
p => MaybeRef::Direct(Shared::new(T::from_primitive(p, resolve)?))
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<T: ObjectWrite> ObjectWrite for MaybeRef<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
match self {
|
||||
MaybeRef::Direct(ref inner) => inner.to_primitive(update),
|
||||
MaybeRef::Indirect(r) => r.to_primitive(update)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: DeepClone + std::fmt::Debug + DataSize + Object + ObjectWrite> DeepClone for MaybeRef<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
match *self {
|
||||
MaybeRef::Direct(ref old) => cloner.clone_shared(old).map(MaybeRef::Direct),
|
||||
MaybeRef::Indirect(ref old) => cloner.clone_rcref(old).map(MaybeRef::Indirect)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> Deref for MaybeRef<T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &T {
|
||||
match *self {
|
||||
MaybeRef::Direct(ref t) => t,
|
||||
MaybeRef::Indirect(ref r) => r
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> Clone for MaybeRef<T> {
|
||||
fn clone(&self) -> Self {
|
||||
match *self {
|
||||
MaybeRef::Direct(ref rc) => MaybeRef::Direct(rc.clone()),
|
||||
MaybeRef::Indirect(ref r) => MaybeRef::Indirect(r.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> Trace for MaybeRef<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
match *self {
|
||||
MaybeRef::Indirect(ref rc) => rc.trace(cb),
|
||||
MaybeRef::Direct(_) => ()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> From<Shared<T>> for MaybeRef<T> {
|
||||
fn from(r: Shared<T>) -> MaybeRef<T> {
|
||||
MaybeRef::Direct(r)
|
||||
}
|
||||
}
|
||||
impl<T> From<T> for MaybeRef<T> {
|
||||
fn from(t: T) -> MaybeRef<T> {
|
||||
MaybeRef::Direct(t.into())
|
||||
}
|
||||
}
|
||||
impl<T> From<MaybeRef<T>> for Shared<T> {
|
||||
fn from(r: MaybeRef<T>) -> Shared<T> {
|
||||
match r {
|
||||
MaybeRef::Direct(rc) => rc,
|
||||
MaybeRef::Indirect(r) => r.data
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, T> From<&'a MaybeRef<T>> for Shared<T> {
|
||||
fn from(r: &'a MaybeRef<T>) -> Shared<T> {
|
||||
match r {
|
||||
MaybeRef::Direct(ref rc) => rc.clone(),
|
||||
MaybeRef::Indirect(ref r) => r.data.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> From<RcRef<T>> for MaybeRef<T> {
|
||||
fn from(r: RcRef<T>) -> MaybeRef<T> {
|
||||
MaybeRef::Indirect(r)
|
||||
}
|
||||
}
|
||||
impl<T> Hash for MaybeRef<T> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
std::ptr::hash(&**self, state)
|
||||
}
|
||||
}
|
||||
impl<T> PartialEq for MaybeRef<T> {
|
||||
fn eq(&self, rhs: &Self) -> bool {
|
||||
std::ptr::eq(&**self, &**rhs)
|
||||
}
|
||||
}
|
||||
impl<T> Eq for MaybeRef<T> {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Lazy<T> {
|
||||
primitive: Primitive,
|
||||
cache: OnceCell<MaybeRef<T>>,
|
||||
_marker: PhantomData<T>
|
||||
}
|
||||
impl<T: DataSize> DataSize for Lazy<T> {
|
||||
const IS_DYNAMIC: bool = true;
|
||||
const STATIC_HEAP_SIZE: usize = size_of::<Self>();
|
||||
fn estimate_heap_size(&self) -> usize {
|
||||
self.cache.get().map(|value| value.estimate_heap_size()).unwrap_or(0) + size_of::<Self>()
|
||||
}
|
||||
}
|
||||
impl<T> Clone for Lazy<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Lazy {
|
||||
primitive: self.primitive.clone(),
|
||||
cache: self.cache.clone(),
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: Object> DeepClone for Lazy<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok(Lazy {
|
||||
primitive: self.primitive.deep_clone(cloner)?,
|
||||
cache: OnceCell::new(),
|
||||
_marker: PhantomData
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<T: Object + DataSize> Lazy<T> {
|
||||
pub fn load(&self, resolve: &impl Resolve) -> Result<MaybeRef<T>> {
|
||||
self.cache.get_or_try_init(|| {
|
||||
match self.primitive {
|
||||
Primitive::Reference(r) => resolve.get(Ref::new(r)).map(MaybeRef::Indirect),
|
||||
ref p => T::from_primitive(p.clone(), resolve).map(|o| MaybeRef::Direct(Arc::new(o))),
|
||||
}
|
||||
}).cloned()
|
||||
}
|
||||
}
|
||||
impl<T: Object> Object for Lazy<T> {
|
||||
fn from_primitive(p: Primitive, _: &impl Resolve) -> Result<Self> {
|
||||
Ok(Self {
|
||||
primitive: p,
|
||||
cache: OnceCell::new(),
|
||||
_marker: PhantomData
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<T: ObjectWrite> ObjectWrite for Lazy<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(self.primitive.clone())
|
||||
}
|
||||
}
|
||||
impl<T> Default for Lazy<T> {
|
||||
fn default() -> Self {
|
||||
Lazy {
|
||||
primitive: Primitive::Null,
|
||||
cache: OnceCell::new(),
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: Object> From<RcRef<T>> for Lazy<T> {
|
||||
fn from(value: RcRef<T>) -> Self {
|
||||
Lazy {
|
||||
primitive: Primitive::Reference(value.inner),
|
||||
cache: OnceCell::with_value(MaybeRef::Direct(value.data)),
|
||||
_marker: PhantomData
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////
|
||||
// Object for Primitives & other types
|
||||
//////////////////////////////////////
|
||||
|
||||
impl Object for i32 {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(id) => r.resolve(id)?.as_integer(),
|
||||
p => p.as_integer()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for i32 {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Integer(*self))
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for u32 {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(id) => r.resolve(id)?.as_u32(),
|
||||
p => p.as_u32()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for u32 {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Integer(*self as _))
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for usize {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(id) => Ok(r.resolve(id)?.as_u32()? as usize),
|
||||
p => Ok(p.as_u32()? as usize)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for usize {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Integer(*self as _))
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for f32 {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(id) => r.resolve(id)?.as_number(),
|
||||
p => p.as_number()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for f32 {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Number(*self))
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for bool {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Reference(id) => r.resolve(id)?.as_bool(),
|
||||
p => p.as_bool()
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for bool {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Boolean(*self))
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for Dictionary {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Dictionary(dict) => Ok(dict),
|
||||
Primitive::Reference(id) => Dictionary::from_primitive(r.resolve(id)?, r),
|
||||
_ => Err(PdfError::UnexpectedPrimitive {expected: "Dictionary", found: p.get_debug_name()}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for Name {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
p.resolve(resolve)?.into_name()
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Name {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Name(self.0.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Object> Object for Vec<T> {
|
||||
/// Will try to convert `p` to `T` first, then try to convert `p` to Vec<T>
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
Ok(
|
||||
match p {
|
||||
Primitive::Array(_) => {
|
||||
p.resolve(r)?.into_array()?
|
||||
.into_iter()
|
||||
.map(|p| T::from_primitive(p, r))
|
||||
.collect::<Result<Vec<T>>>()?
|
||||
},
|
||||
Primitive::Null => {
|
||||
Vec::new()
|
||||
}
|
||||
Primitive::Reference(id) => Self::from_primitive(r.resolve(id)?, r)?,
|
||||
_ => vec![T::from_primitive(p, r)?]
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
impl<T: ObjectWrite> ObjectWrite for Vec<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
Primitive::array::<T, _, _, _>(self.iter(), update)
|
||||
}
|
||||
}
|
||||
impl<T: DeepClone> DeepClone for Vec<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
self.iter().map(|t| t.deep_clone(cloner)).collect()
|
||||
}
|
||||
}
|
||||
impl<T: Trace> Trace for Vec<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
for i in self.iter() {
|
||||
i.trace(cb);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
pub struct Data(pub Vec<u8>);
|
||||
impl Object for Data {
|
||||
fn serialize<W: io::Write>(&self, out: &mut W) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
/// Will try to convert `p` to `T` first, then try to convert `p` to Vec<T>
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Array(_) => {
|
||||
p.into_array(r)?
|
||||
.into_iter()
|
||||
.map(|p| u8::from_primitive(p, r))
|
||||
.collect::<Result<Vec<T>>>()?
|
||||
},
|
||||
Primitive::Null => {
|
||||
Vec::new()
|
||||
}
|
||||
Primitive::Reference(id) => Self::from_primitive(r.resolve(id)?, r)?,
|
||||
_ =>
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
impl Object for Primitive {
|
||||
fn from_primitive(p: Primitive, _: &impl Resolve) -> Result<Self> {
|
||||
Ok(p)
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Primitive {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
impl DeepClone for Primitive {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
match *self {
|
||||
Primitive::Array(ref parts) => Ok(Primitive::Array(parts.into_iter().map(|p| p.deep_clone(cloner)).try_collect()?)),
|
||||
Primitive::Boolean(b) => Ok(Primitive::Boolean(b)),
|
||||
Primitive::Dictionary(ref dict) => Ok(Primitive::Dictionary(dict.deep_clone(cloner)?)),
|
||||
Primitive::Integer(i) => Ok(Primitive::Integer(i)),
|
||||
Primitive::Name(ref name) => Ok(Primitive::Name(name.clone())),
|
||||
Primitive::Null => Ok(Primitive::Null),
|
||||
Primitive::Number(n) => Ok(Primitive::Number(n)),
|
||||
Primitive::Reference(r) => Ok(Primitive::Reference(r.deep_clone(cloner)?)),
|
||||
Primitive::Stream(ref s) => Ok(Primitive::Stream(s.deep_clone(cloner)?)),
|
||||
Primitive::String(ref s) => Ok(Primitive::String(s.clone()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Trace for Primitive {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
match *self {
|
||||
Primitive::Reference(r) => cb(r),
|
||||
Primitive::Array(ref parts) => parts.iter().for_each(|p| p.trace(cb)),
|
||||
Primitive::Dictionary(ref dict) => dict.values().for_each(|p| p.trace(cb)),
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V: Object> Object for HashMap<Name, V> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Null => Ok(HashMap::new()),
|
||||
Primitive::Dictionary (dict) => {
|
||||
let mut new = Self::new();
|
||||
for (key, val) in dict.iter() {
|
||||
new.insert(key.clone(), V::from_primitive(val.clone(), resolve)?);
|
||||
}
|
||||
Ok(new)
|
||||
}
|
||||
Primitive::Reference (id) => HashMap::from_primitive(resolve.resolve(id)?, resolve),
|
||||
p => Err(PdfError::UnexpectedPrimitive {expected: "Dictionary", found: p.get_debug_name()})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<V: ObjectWrite> ObjectWrite for HashMap<Name, V> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
if self.is_empty() {
|
||||
Ok(Primitive::Null)
|
||||
} else {
|
||||
let mut dict = Dictionary::new();
|
||||
for (k, v) in self.iter() {
|
||||
dict.insert(k.clone(), v.to_primitive(update)?);
|
||||
}
|
||||
Ok(Primitive::Dictionary(dict))
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<V: DeepClone> DeepClone for HashMap<Name, V> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
self.iter().map(|(k, v)| Ok((k.clone(), v.deep_clone(cloner)?))).collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Object> Object for Option<T> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Null => Ok(None),
|
||||
p => match T::from_primitive(p, resolve) {
|
||||
Ok(p) => Ok(Some(p)),
|
||||
// References to non-existing objects ought not to be an error
|
||||
Err(PdfError::NullRef {..}) => Ok(None),
|
||||
Err(PdfError::FreeObject {..}) => Ok(None),
|
||||
Err(e) if resolve.options().allow_error_in_option => {
|
||||
warn!("ignoring {:?}", e);
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: ObjectWrite> ObjectWrite for Option<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
match self {
|
||||
None => Ok(Primitive::Null),
|
||||
Some(t) => t.to_primitive(update)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: DeepClone> DeepClone for Option<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
match self {
|
||||
None => Ok(None),
|
||||
Some(t) => t.deep_clone(cloner).map(Some)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Trace> Trace for Option<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
if let Some(ref t) = *self {
|
||||
t.trace(cb)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Object> Object for Box<T> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
T::from_primitive(p, resolve).map(Box::new)
|
||||
}
|
||||
}
|
||||
impl<T: ObjectWrite> ObjectWrite for Box<T> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
(**self).to_primitive(update)
|
||||
}
|
||||
}
|
||||
impl<T: Trace> Trace for Box<T> {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
(**self).trace(cb)
|
||||
}
|
||||
}
|
||||
|
||||
impl Object for () {
|
||||
fn from_primitive(_p: Primitive, _resolve: &impl Resolve) -> Result<Self> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for () {
|
||||
fn to_primitive(&self, _: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Null)
|
||||
}
|
||||
}
|
||||
impl Trace for () {}
|
||||
|
||||
impl<T, U> Object for (T, U) where T: Object, U: Object {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
let arr = p.resolve(resolve)?.into_array()?;
|
||||
if arr.len() != 2 {
|
||||
bail!("expected array of length 2 (found {})", arr.len());
|
||||
}
|
||||
let [a, b]: [Primitive; 2] = arr.try_into().unwrap();
|
||||
Ok((T::from_primitive(a, resolve)?, U::from_primitive(b, resolve)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, U> ObjectWrite for (T, U) where T: ObjectWrite, U: ObjectWrite {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Array(vec![self.0.to_primitive(update)?, self.1.to_primitive(update)?]))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Trace, U: Trace> Trace for (T, U) {
|
||||
fn trace(&self, cb: &mut impl FnMut(PlainRef)) {
|
||||
self.0.trace(cb);
|
||||
self.1.trace(cb);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeepClone> DeepClone for Box<T> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok(Box::new((&**self).deep_clone(cloner)?))
|
||||
}
|
||||
}
|
||||
macro_rules! deep_clone_simple {
|
||||
($($t:ty),*) => (
|
||||
$(
|
||||
impl DeepClone for $t {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
)*
|
||||
)
|
||||
}
|
||||
deep_clone_simple!(f32, i32, u32, bool, Name, (), Date, PdfString, Rectangle, u8, Arc<[u8]>, Vec<u16>);
|
||||
|
||||
impl<A: DeepClone, B: DeepClone> DeepClone for (A, B) {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok((self.0.deep_clone(cloner)?, self.1.deep_clone(cloner)?))
|
||||
}
|
||||
}
|
||||
391
src-pdfrs/pdf/src/object/stream.rs
Normal file
391
src-pdfrs/pdf/src/object/stream.rs
Normal file
@ -0,0 +1,391 @@
|
||||
use datasize::DataSize;
|
||||
|
||||
use crate as pdf;
|
||||
use crate::object::*;
|
||||
use crate::primitive::*;
|
||||
use crate::error::*;
|
||||
use crate::parser::Lexer;
|
||||
use crate::enc::{StreamFilter, decode};
|
||||
|
||||
use std::ops::{Deref, Range};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub (crate) enum StreamData {
|
||||
Generated(Arc<[u8]>),
|
||||
Original(Range<usize>, PlainRef),
|
||||
}
|
||||
datasize::non_dynamic_const_heap_size!(StreamData, std::mem::size_of::<StreamData>());
|
||||
|
||||
/// Simple Stream object with only some additional entries from the stream dict (I).
|
||||
#[derive(Clone, DataSize)]
|
||||
pub struct Stream<I> {
|
||||
pub info: StreamInfo<I>,
|
||||
pub (crate) inner_data: StreamData,
|
||||
}
|
||||
impl<I: Object> Stream<I> {
|
||||
pub fn from_stream(s: PdfStream, resolve: &impl Resolve) -> Result<Self> {
|
||||
let PdfStream {info, inner} = s;
|
||||
let info = StreamInfo::<I>::from_primitive(Primitive::Dictionary (info), resolve)?;
|
||||
let inner_data = match inner {
|
||||
StreamInner::InFile { id, file_range } => StreamData::Original(file_range, id),
|
||||
StreamInner::Pending { data } => StreamData::Generated(data)
|
||||
};
|
||||
Ok(Stream { info, inner_data })
|
||||
}
|
||||
|
||||
/// the data is not compressed. the specified filters are to be applied when compressing the data
|
||||
pub fn new_with_filters(i: I, data: impl Into<Arc<[u8]>>, filters: Vec<StreamFilter>) -> Stream<I> {
|
||||
Stream {
|
||||
info: StreamInfo {
|
||||
filters,
|
||||
file: None,
|
||||
file_filters: Vec::new(),
|
||||
info: i
|
||||
},
|
||||
inner_data: StreamData::Generated(data.into()),
|
||||
}
|
||||
}
|
||||
pub fn new(i: I, data: impl Into<Arc<[u8]>>) -> Stream<I> {
|
||||
Stream {
|
||||
info: StreamInfo {
|
||||
filters: Vec::new(),
|
||||
file: None,
|
||||
file_filters: Vec::new(),
|
||||
info: i
|
||||
},
|
||||
inner_data: StreamData::Generated(data.into()),
|
||||
}
|
||||
}
|
||||
/// the data is already compressed with the specified filters
|
||||
pub fn from_compressed(i: I, data: impl Into<Arc<[u8]>>, filters: Vec<StreamFilter>) -> Stream<I> {
|
||||
Stream {
|
||||
info: StreamInfo {
|
||||
filters: filters.clone(),
|
||||
file: None,
|
||||
file_filters: Vec::new(),
|
||||
info: i
|
||||
},
|
||||
inner_data: StreamData::Generated(data.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
|
||||
match self.inner_data {
|
||||
StreamData::Generated(ref data) => {
|
||||
let filters = &self.info.filters;
|
||||
if filters.len() == 0 {
|
||||
Ok(data.clone())
|
||||
} else {
|
||||
use std::borrow::Cow;
|
||||
let mut data: Cow<[u8]> = (&**data).into();
|
||||
for filter in filters {
|
||||
data = t!(decode(&data, filter), filter).into();
|
||||
}
|
||||
Ok(data.into())
|
||||
}
|
||||
}
|
||||
StreamData::Original(ref file_range, id) => {
|
||||
resolve.get_data_or_decode(id, file_range.clone(), &self.info.filters)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self.inner_data {
|
||||
StreamData::Generated(ref data) => data.len(),
|
||||
StreamData::Original(ref range, _) => range.len()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Object + fmt::Debug> fmt::Debug for Stream<I> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "Stream info={:?}, len={}", self.info.info, self.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Object> Object for Stream<I> {
|
||||
/// Convert primitive to Self
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
let s = PdfStream::from_primitive(p, resolve)?;
|
||||
Stream::from_stream(s, resolve)
|
||||
}
|
||||
}
|
||||
impl<I: ObjectWrite> Stream<I> {
|
||||
pub fn to_pdf_stream(&self, update: &mut impl Updater) -> Result<PdfStream> {
|
||||
let mut info = match self.info.info.to_primitive(update)? {
|
||||
Primitive::Dictionary(dict) => dict,
|
||||
Primitive::Null => Dictionary::new(),
|
||||
p => bail!("stream info has to be a dictionary (found {:?})", p)
|
||||
};
|
||||
let mut params = None;
|
||||
if self.info.filters.len() > 0 {
|
||||
for f in self.info.filters.iter() {
|
||||
if let Some(para) = match f {
|
||||
StreamFilter::LZWDecode(ref p) => Some(p.to_primitive(update)?),
|
||||
StreamFilter::FlateDecode(ref p) => Some(p.to_primitive(update)?),
|
||||
StreamFilter::DCTDecode(ref p) => Some(p.to_primitive(update)?),
|
||||
StreamFilter::CCITTFaxDecode(ref p) => Some(p.to_primitive(update)?),
|
||||
StreamFilter::JBIG2Decode(ref p) => Some(p.to_primitive(update)?),
|
||||
_ => None
|
||||
} {
|
||||
assert!(params.is_none());
|
||||
params = Some(para);
|
||||
}
|
||||
}
|
||||
let mut filters = self.info.filters.iter().map(|filter| match filter {
|
||||
StreamFilter::ASCIIHexDecode => "ASCIIHexDecode",
|
||||
StreamFilter::ASCII85Decode => "ASCII85Decode",
|
||||
StreamFilter::LZWDecode(ref _p) => "LZWDecode",
|
||||
StreamFilter::FlateDecode(ref _p) => "FlateDecode",
|
||||
StreamFilter::JPXDecode => "JPXDecode",
|
||||
StreamFilter::DCTDecode(ref _p) => "DCTDecode",
|
||||
StreamFilter::CCITTFaxDecode(ref _p) => "CCITTFaxDecode",
|
||||
StreamFilter::JBIG2Decode(ref _p) => "JBIG2Decode",
|
||||
StreamFilter::Crypt => "Crypt",
|
||||
StreamFilter::RunLengthDecode => "RunLengthDecode",
|
||||
})
|
||||
.map(|s| Primitive::Name(s.into()));
|
||||
match self.info.filters.len() {
|
||||
0 => {},
|
||||
1 => {
|
||||
info.insert("Filter", filters.next().unwrap().to_primitive(update)?);
|
||||
}
|
||||
_ => {
|
||||
info.insert("Filter", Primitive::array::<Primitive, _, _, _>(filters, update)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(para) = params {
|
||||
info.insert("DecodeParms", para);
|
||||
}
|
||||
|
||||
let inner = match self.inner_data {
|
||||
StreamData::Generated(ref data) => {
|
||||
info.insert("Length", Primitive::Integer(data.len() as _));
|
||||
StreamInner::Pending { data: data.clone() }
|
||||
},
|
||||
StreamData::Original(ref file_range, id) => {
|
||||
info.insert("Length", Primitive::Integer(file_range.len() as _));
|
||||
StreamInner::InFile { id, file_range: file_range.clone() }
|
||||
}
|
||||
};
|
||||
|
||||
Ok(PdfStream { info, inner })
|
||||
}
|
||||
}
|
||||
impl<I: ObjectWrite> ObjectWrite for Stream<I> {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
self.to_pdf_stream(update).map(Primitive::Stream)
|
||||
}
|
||||
}
|
||||
impl<I: DeepClone> DeepClone for Stream<I> {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
let data = match self.inner_data {
|
||||
StreamData::Generated(ref data) => data.clone(),
|
||||
StreamData::Original(ref range, id) => cloner.stream_data(id, range.clone())?
|
||||
};
|
||||
Ok(Stream {
|
||||
info: self.info.deep_clone(cloner)?,
|
||||
inner_data: StreamData::Generated(data),
|
||||
})
|
||||
}
|
||||
}
|
||||
impl<I: Object> Deref for Stream<I> {
|
||||
type Target = StreamInfo<I>;
|
||||
fn deref(&self) -> &StreamInfo<I> {
|
||||
&self.info
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// General stream type. `I` is the additional information to be read from the stream dict.
|
||||
#[derive(Debug, Clone, DataSize, DeepClone)]
|
||||
pub struct StreamInfo<I> {
|
||||
// General dictionary entries
|
||||
/// Filters that the `data` is currently encoded with (corresponds to both `/Filter` and
|
||||
/// `/DecodeParms` in the PDF specs), constructed in `from_primitive()`.
|
||||
pub filters: Vec<StreamFilter>,
|
||||
|
||||
/// Eventual file containing the stream contentst
|
||||
pub file: Option<FileSpec>,
|
||||
/// Filters to apply to external file specified in `file`.
|
||||
pub file_filters: Vec<StreamFilter>,
|
||||
|
||||
// TODO:
|
||||
/*
|
||||
/// Filters to apply to external file specified in `file`.
|
||||
#[pdf(key="FFilter")]
|
||||
file_filters: Vec<StreamFilter>,
|
||||
#[pdf(key="FDecodeParms")]
|
||||
file_decode_parms: Vec<DecodeParms>,
|
||||
/// Number of bytes in the decoded stream
|
||||
#[pdf(key="DL")]
|
||||
dl: Option<usize>,
|
||||
*/
|
||||
// Specialized dictionary entries
|
||||
pub info: I,
|
||||
}
|
||||
|
||||
impl<I> Deref for StreamInfo<I> {
|
||||
type Target = I;
|
||||
fn deref(&self) -> &I {
|
||||
&self.info
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Default> Default for StreamInfo<I> {
|
||||
fn default() -> StreamInfo<I> {
|
||||
StreamInfo {
|
||||
filters: Vec::new(),
|
||||
file: None,
|
||||
file_filters: Vec::new(),
|
||||
info: I::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> StreamInfo<T> {
|
||||
/*
|
||||
/// If the stream is not encoded, this is a no-op. `decode()` should be called whenever it's uncertain
|
||||
/// whether the stream is encoded.
|
||||
pub fn encode(&mut self, _filter: StreamFilter) {
|
||||
// TODO this should add the filter to `self.filters` and encode the data with the given
|
||||
// filter
|
||||
unimplemented!();
|
||||
}*/
|
||||
pub fn get_filters(&self) -> &[StreamFilter] {
|
||||
&self.filters
|
||||
}
|
||||
}
|
||||
impl<T: Object> Object for StreamInfo<T> {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
let mut dict = Dictionary::from_primitive(p, resolve)?;
|
||||
|
||||
let _length = usize::from_primitive(
|
||||
dict.remove("Length").ok_or(PdfError::MissingEntry{ typ: "StreamInfo", field: "Length".into() })?,
|
||||
resolve)?;
|
||||
|
||||
let filters = Vec::<Name>::from_primitive(
|
||||
dict.remove("Filter").unwrap_or(Primitive::Null),
|
||||
resolve)?;
|
||||
|
||||
let decode_params = Vec::<Option<Dictionary>>::from_primitive(
|
||||
dict.remove("DecodeParms").unwrap_or(Primitive::Null),
|
||||
resolve)?;
|
||||
|
||||
let file = Option::<FileSpec>::from_primitive(
|
||||
dict.remove("F").unwrap_or(Primitive::Null),
|
||||
resolve)?;
|
||||
|
||||
let file_filters = Vec::<Name>::from_primitive(
|
||||
dict.remove("FFilter").unwrap_or(Primitive::Null),
|
||||
resolve)?;
|
||||
|
||||
let file_decode_params = Vec::<Dictionary>::from_primitive(
|
||||
dict.remove("FDecodeParms").unwrap_or(Primitive::Null),
|
||||
resolve)?;
|
||||
|
||||
|
||||
let mut new_filters = Vec::new();
|
||||
let mut new_file_filters = Vec::new();
|
||||
|
||||
for (i, filter) in filters.iter().enumerate() {
|
||||
let params = match decode_params.get(i) {
|
||||
Some(Some(params)) => params.clone(),
|
||||
_ => Dictionary::default(),
|
||||
};
|
||||
new_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?);
|
||||
}
|
||||
for (i, filter) in file_filters.iter().enumerate() {
|
||||
let params = match file_decode_params.get(i) {
|
||||
Some(params) => params.clone(),
|
||||
None => Dictionary::default(),
|
||||
};
|
||||
new_file_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?);
|
||||
}
|
||||
|
||||
Ok(StreamInfo {
|
||||
// General
|
||||
filters: new_filters,
|
||||
file,
|
||||
file_filters: new_file_filters,
|
||||
// Special
|
||||
info: T::from_primitive(Primitive::Dictionary (dict), resolve)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Object, Default, Debug, DataSize)]
|
||||
#[pdf(Type = "ObjStm")]
|
||||
pub struct ObjStmInfo {
|
||||
#[pdf(key = "N")]
|
||||
/// Number of compressed objects in the stream.
|
||||
pub num_objects: usize,
|
||||
|
||||
#[pdf(key = "First")]
|
||||
/// The byte offset in the decoded stream, of the first compressed object.
|
||||
pub first: usize,
|
||||
|
||||
#[pdf(key = "Extends")]
|
||||
/// A reference to an eventual ObjectStream which this ObjectStream extends.
|
||||
pub extends: Option<Ref<Stream<()>>>,
|
||||
}
|
||||
|
||||
#[derive(DataSize)]
|
||||
pub struct ObjectStream {
|
||||
/// Byte offset of each object. Index is the object number.
|
||||
offsets: Vec<usize>,
|
||||
/// The object number of this object.
|
||||
_id: ObjNr,
|
||||
|
||||
inner: Stream<ObjStmInfo>
|
||||
}
|
||||
|
||||
impl Object for ObjectStream {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<ObjectStream> {
|
||||
let stream: Stream<ObjStmInfo> = Stream::from_primitive(p, resolve)?;
|
||||
|
||||
let mut offsets = Vec::new();
|
||||
{
|
||||
debug!("parsing stream");
|
||||
let data = stream.data(resolve)?;
|
||||
let mut lexer = Lexer::new(&data);
|
||||
for _ in 0..(stream.info.num_objects as ObjNr) {
|
||||
let _obj_nr = lexer.next()?.to::<ObjNr>()?;
|
||||
let offset = lexer.next()?.to::<usize>()?;
|
||||
offsets.push(offset);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ObjectStream {
|
||||
offsets,
|
||||
_id: 0, // TODO
|
||||
inner: stream
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectStream {
|
||||
pub fn get_object_slice(&self, index: usize, resolve: &impl Resolve) -> Result<(Arc<[u8]>, Range<usize>)> {
|
||||
if index >= self.offsets.len() {
|
||||
err!(PdfError::ObjStmOutOfBounds {index, max: self.offsets.len()});
|
||||
}
|
||||
let start = self.inner.info.first + self.offsets[index];
|
||||
let data = self.inner.data(resolve)?;
|
||||
let end = if index == self.offsets.len() - 1 {
|
||||
data.len()
|
||||
} else {
|
||||
self.inner.info.first + self.offsets[index + 1]
|
||||
};
|
||||
|
||||
Ok((data, start..end))
|
||||
}
|
||||
/// Returns the number of contained objects
|
||||
pub fn n_objects(&self) -> usize {
|
||||
self.offsets.len()
|
||||
}
|
||||
pub fn _data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
|
||||
self.inner.data(resolve)
|
||||
}
|
||||
}
|
||||
1844
src-pdfrs/pdf/src/object/types.rs
Normal file
1844
src-pdfrs/pdf/src/object/types.rs
Normal file
File diff suppressed because it is too large
Load Diff
575
src-pdfrs/pdf/src/parser/lexer/mod.rs
Normal file
575
src-pdfrs/pdf/src/parser/lexer/mod.rs
Normal file
@ -0,0 +1,575 @@
|
||||
/// Lexing an input file, in the sense of breaking it up into substrings based on delimiters and
|
||||
/// whitespace.
|
||||
|
||||
use std::str::FromStr;
|
||||
use std::ops::{Range, Deref, RangeFrom};
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::error::*;
|
||||
use crate::primitive::Name;
|
||||
|
||||
mod str;
|
||||
pub use self::str::{StringLexer, HexStringLexer};
|
||||
|
||||
|
||||
/// `Lexer` has functionality to jump around and traverse the PDF lexemes of a string in any direction.
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct Lexer<'a> {
|
||||
pos: usize,
|
||||
buf: &'a [u8],
|
||||
file_offset: usize,
|
||||
}
|
||||
|
||||
// find the position where condition(data[pos-1]) == false and condition(data[pos]) == true
|
||||
#[inline]
|
||||
fn boundary_rev(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize {
|
||||
match data[.. pos].iter().rposition(|&b| !condition(b)) {
|
||||
Some(start) => start + 1,
|
||||
None => 0
|
||||
}
|
||||
}
|
||||
|
||||
// find the position where condition(data[pos-1]) == true and condition(data[pos]) == false
|
||||
#[inline]
|
||||
fn boundary(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize {
|
||||
match data[pos ..].iter().position(|&b| !condition(b)) {
|
||||
Some(start) => pos + start,
|
||||
None => data.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_whitespace(b: u8) -> bool {
|
||||
matches!(b, 0 | b' ' | b'\r' | b'\n' | b'\t')
|
||||
}
|
||||
#[inline]
|
||||
fn not<T>(f: impl Fn(T) -> bool) -> impl Fn(T) -> bool {
|
||||
move |t| !f(t)
|
||||
}
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(buf: &'a [u8]) -> Lexer<'a> {
|
||||
Lexer {
|
||||
pos: 0,
|
||||
buf,
|
||||
file_offset: 0
|
||||
}
|
||||
}
|
||||
pub fn with_offset(buf: &'a [u8], file_offset: usize) -> Lexer<'a> {
|
||||
Lexer {
|
||||
pos: 0,
|
||||
buf,
|
||||
file_offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns next lexeme. Lexer moves to the next byte after the lexeme. (needs to be tested)
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Result<Substr<'a>> {
|
||||
let (lexeme, pos) = self.next_word()?;
|
||||
self.pos = pos;
|
||||
Ok(lexeme)
|
||||
}
|
||||
|
||||
/// consume the whitespace sequence following the stream start
|
||||
pub fn next_stream(&mut self) -> Result<()> {
|
||||
let pos = self.skip_whitespace(self.pos)?;
|
||||
if !self.buf[pos ..].starts_with(b"stream") {
|
||||
// bail!("next token isn't 'stream'");
|
||||
}
|
||||
|
||||
let &b0 = self.buf.get(pos + 6).ok_or(PdfError::EOF)?;
|
||||
if b0 == b'\n' {
|
||||
self.pos = pos + 7;
|
||||
} else if b0 == b'\r' {
|
||||
let &b1 = self.buf.get(pos + 7).ok_or(PdfError::EOF)?;
|
||||
if b1 != b'\n' {
|
||||
bail!("invalid whitespace following 'stream'");
|
||||
// bail!("invalid whitespace following 'stream'");
|
||||
}
|
||||
self.pos = pos + 8;
|
||||
} else {
|
||||
bail!("invalid whitespace");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
/// Gives previous lexeme. Lexer moves to the first byte of this lexeme. (needs to be tested)
|
||||
pub fn back(&mut self) -> Result<Substr<'a>> {
|
||||
//println!("back: {:?}", String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(20) .. self.pos]));
|
||||
|
||||
// first reverse until we find non-whitespace
|
||||
let end_pos = boundary_rev(self.buf, self.pos, is_whitespace);
|
||||
let start_pos = boundary_rev(self.buf, end_pos, not(is_whitespace));
|
||||
self.pos = start_pos;
|
||||
|
||||
Ok(self.new_substr(start_pos .. end_pos))
|
||||
}
|
||||
|
||||
/// Look at the next lexeme. Will return empty substr if the next character is EOF.
|
||||
pub fn peek(&self) -> Result<Substr<'a>> {
|
||||
match self.next_word() {
|
||||
Ok((substr, _)) => Ok(substr),
|
||||
Err(PdfError::EOF) => Ok(self.new_substr(self.pos..self.pos)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Returns `Ok` if the next lexeme matches `expected` - else `Err`.
|
||||
pub fn next_expect(&mut self, expected: &'static str) -> Result<()> {
|
||||
let word = self.next()?;
|
||||
if word.equals(expected.as_bytes()) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(PdfError::UnexpectedLexeme {
|
||||
pos: self.pos,
|
||||
lexeme: word.to_string(),
|
||||
expected
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// skip whitespaces and return the position of the first non-whitespace character
|
||||
#[inline]
|
||||
fn skip_whitespace(&self, pos: usize) -> Result<usize> {
|
||||
// Move away from eventual whitespace
|
||||
let pos = boundary(self.buf, pos, is_whitespace);
|
||||
if pos >= self.buf.len() {
|
||||
Err(PdfError::EOF)
|
||||
} else {
|
||||
Ok(pos)
|
||||
}
|
||||
}
|
||||
|
||||
/// Used by next, peek and back - returns substring and new position
|
||||
/// If forward, places pointer at the next non-whitespace character.
|
||||
/// If backward, places pointer at the start of the current word.
|
||||
// TODO ^ backward case is actually not tested or.. thought about that well.
|
||||
fn next_word(&self) -> Result<(Substr<'a>, usize)> {
|
||||
if self.pos == self.buf.len() {
|
||||
return Err(PdfError::EOF);
|
||||
}
|
||||
let mut pos = self.skip_whitespace(self.pos)?;
|
||||
while self.buf.get(pos) == Some(&b'%') {
|
||||
pos += 1;
|
||||
if let Some(off) = self.buf[pos..].iter().position(|&b| b == b'\n') {
|
||||
pos += off+1;
|
||||
}
|
||||
|
||||
// Move away from eventual whitespace
|
||||
pos = self.skip_whitespace(pos)?;
|
||||
}
|
||||
|
||||
let start_pos = pos;
|
||||
|
||||
// If first character is delimiter, this lexeme only contains that character.
|
||||
// - except << and >> which go together, and / which marks the start of a
|
||||
// name token.
|
||||
if self.is_delimiter(pos) {
|
||||
if self.buf[pos] == b'/' {
|
||||
pos = self.advance_pos(pos)?;
|
||||
while !self.is_whitespace(pos) && !self.is_delimiter(pos) {
|
||||
match self.advance_pos(pos) {
|
||||
Ok(p) => pos = p,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
return Ok((self.new_substr(start_pos..pos), pos));
|
||||
}
|
||||
|
||||
if let Some(slice) = self.buf.get(pos..=pos+1) {
|
||||
if slice == b"<<" || slice == b">>" {
|
||||
pos = self.advance_pos(pos)?;
|
||||
}
|
||||
}
|
||||
|
||||
pos = self.advance_pos(pos)?;
|
||||
return Ok((self.new_substr(start_pos..pos), pos));
|
||||
}
|
||||
|
||||
// Read to past the end of lexeme
|
||||
while !self.is_whitespace(pos) && !self.is_delimiter(pos) {
|
||||
match self.advance_pos(pos) {
|
||||
Ok(p) => pos = p,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
let result = self.new_substr(start_pos..pos);
|
||||
|
||||
// Move away from whitespace again
|
||||
//pos = self.skip_whitespace(pos)?;
|
||||
Ok((result, pos))
|
||||
}
|
||||
|
||||
/// Just a helper for next_word.
|
||||
#[inline]
|
||||
fn advance_pos(&self, pos: usize) -> Result<usize> {
|
||||
if pos < self.buf.len() {
|
||||
Ok(pos + 1)
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_as<T>(&mut self) -> Result<T>
|
||||
where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static
|
||||
{
|
||||
self.next().and_then(|word| word.to::<T>())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_substr(&self, mut range: Range<usize>) -> Substr<'a> {
|
||||
// if the range is backward, fix it
|
||||
// start is inclusive, end is exclusive. keep that in mind
|
||||
if range.start > range.end {
|
||||
let new_end = range.start + 1;
|
||||
range.start = range.end + 1;
|
||||
range.end = new_end;
|
||||
}
|
||||
|
||||
Substr {
|
||||
file_offset: self.file_offset + range.start,
|
||||
slice: &self.buf[range],
|
||||
}
|
||||
}
|
||||
|
||||
/// Just a helper function for set_pos, set_pos_from_end and offset_pos.
|
||||
#[inline]
|
||||
pub fn set_pos(&mut self, wanted_pos: usize) -> Substr<'a> {
|
||||
let new_pos = wanted_pos.min(self.buf.len());
|
||||
let range = if self.pos < new_pos {
|
||||
self.pos..new_pos
|
||||
} else {
|
||||
new_pos..self.pos
|
||||
};
|
||||
self.pos = new_pos;
|
||||
self.new_substr(range)
|
||||
}
|
||||
|
||||
/// Returns the substr between the old and new positions
|
||||
#[inline]
|
||||
pub fn set_pos_from_end(&mut self, new_pos: usize) -> Substr<'a> {
|
||||
self.set_pos(self.buf.len().saturating_sub(new_pos).saturating_sub(1))
|
||||
}
|
||||
/// Returns the substr between the old and new positions
|
||||
#[inline]
|
||||
pub fn offset_pos(&mut self, offset: usize) -> Substr<'a> {
|
||||
self.set_pos(self.pos.wrapping_add(offset))
|
||||
}
|
||||
|
||||
/// Moves pos to start of next line. Returns the skipped-over substring.
|
||||
#[allow(dead_code)]
|
||||
pub fn seek_newline(&mut self) -> Substr{
|
||||
let start = self.pos;
|
||||
while self.buf[self.pos] != b'\n'
|
||||
&& self.incr_pos() { }
|
||||
self.incr_pos();
|
||||
|
||||
self.new_substr(start..self.pos)
|
||||
}
|
||||
|
||||
|
||||
// TODO: seek_substr and seek_substr_back should use next() or back()?
|
||||
/// Moves pos to after the found `substr`. Returns Substr with traversed text if `substr` is found.
|
||||
#[allow(dead_code)]
|
||||
pub fn seek_substr(&mut self, substr: impl AsRef<[u8]>) -> Option<Substr<'a>> {
|
||||
//
|
||||
let substr = substr.as_ref();
|
||||
let start = self.pos;
|
||||
let mut matched = 0;
|
||||
loop {
|
||||
if self.pos >= self.buf.len() {
|
||||
return None
|
||||
}
|
||||
if self.buf[self.pos] == substr[matched] {
|
||||
matched += 1;
|
||||
} else {
|
||||
matched = 0;
|
||||
}
|
||||
if matched == substr.len() {
|
||||
break;
|
||||
}
|
||||
self.pos += 1;
|
||||
}
|
||||
self.pos += 1;
|
||||
Some(self.new_substr(start..(self.pos - substr.len())))
|
||||
}
|
||||
|
||||
//TODO perhaps seek_substr_back should, like back(), move to the first letter of the substr.
|
||||
/// Searches for string backward. Moves to after the found `substr`, returns the traversed
|
||||
/// Substr if found.
|
||||
pub fn seek_substr_back(&mut self, substr: &[u8]) -> Result<Substr<'a>> {
|
||||
let end = self.pos;
|
||||
match self.buf[.. end].windows(substr.len()).rposition(|w| w == substr) {
|
||||
Some(start) => {
|
||||
self.pos = start + substr.len();
|
||||
Ok(self.new_substr(self.pos .. end))
|
||||
}
|
||||
None => Err(PdfError::NotFound {word: String::from_utf8_lossy(substr).into() })
|
||||
}
|
||||
}
|
||||
|
||||
/// Read and return slice of at most n bytes.
|
||||
#[allow(dead_code)]
|
||||
pub fn read_n(&mut self, n: usize) -> Substr<'a> {
|
||||
let start_pos = self.pos;
|
||||
self.pos += n;
|
||||
if self.pos >= self.buf.len() {
|
||||
self.pos = self.buf.len() - 1;
|
||||
}
|
||||
if start_pos < self.buf.len() {
|
||||
self.new_substr(start_pos..self.pos)
|
||||
} else {
|
||||
self.new_substr(0..0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns slice from current position to end.
|
||||
#[inline]
|
||||
pub fn get_remaining_slice(&self) -> &'a [u8] {
|
||||
&self.buf[self.pos..]
|
||||
}
|
||||
|
||||
/// for debugging
|
||||
pub fn ctx(&self) -> Cow<str> {
|
||||
String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(40)..self.buf.len().min(self.pos+40)])
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn incr_pos(&mut self) -> bool {
|
||||
if self.pos >= self.buf.len() - 1 {
|
||||
false
|
||||
} else {
|
||||
self.pos += 1;
|
||||
true
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn is_whitespace(&self, pos: usize) -> bool {
|
||||
self.buf.get(pos).map(|&b| is_whitespace(b)).unwrap_or(false)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_delimiter(&self, pos: usize) -> bool {
|
||||
self.buf.get(pos).map(|b| b"()<>[]{}/%".contains(b)).unwrap_or(false)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// A slice from some original string - a lexeme.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct Substr<'a> {
|
||||
slice: &'a [u8],
|
||||
file_offset: usize,
|
||||
}
|
||||
impl<'a> Substr<'a> {
|
||||
pub fn new<T: AsRef<[u8]> + ?Sized>(data: &'a T, file_offset: usize) -> Self {
|
||||
Substr { slice: data.as_ref(), file_offset }
|
||||
}
|
||||
// to: &S -> U. Possibly expensive conversion.
|
||||
// as: &S -> &U. Cheap borrow conversion
|
||||
// into: S -> U. Cheap ownership transfer conversion.
|
||||
|
||||
#[allow(clippy::inherent_to_string)]
|
||||
pub fn to_string(&self) -> String {
|
||||
String::from_utf8_lossy(self.as_slice()).into()
|
||||
}
|
||||
pub fn to_name(&self) -> Result<Name> {
|
||||
Ok(Name(std::str::from_utf8(self.as_slice())?.into()))
|
||||
}
|
||||
pub fn to_vec(&self) -> Vec<u8> {
|
||||
self.slice.to_vec()
|
||||
}
|
||||
pub fn to<T>(&self) -> Result<T>
|
||||
where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static
|
||||
{
|
||||
std::str::from_utf8(self.slice)?.parse::<T>().map_err(|e| PdfError::Parse { source: e.into() })
|
||||
}
|
||||
pub fn is_integer(&self) -> bool {
|
||||
if self.slice.len() == 0 {
|
||||
return false;
|
||||
}
|
||||
let mut slice = self.slice;
|
||||
if slice[0] == b'-' {
|
||||
if slice.len() < 2 {
|
||||
return false;
|
||||
}
|
||||
slice = &slice[1..];
|
||||
}
|
||||
is_int(slice)
|
||||
}
|
||||
pub fn is_real_number(&self) -> bool {
|
||||
self.real_number().is_some()
|
||||
}
|
||||
pub fn real_number(&self) -> Option<Self> {
|
||||
if self.slice.len() == 0 {
|
||||
return None;
|
||||
}
|
||||
let mut slice = self.slice;
|
||||
if slice[0] == b'-' {
|
||||
if slice.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
slice = &slice[1..];
|
||||
}
|
||||
if let Some(i) = slice.iter().position(|&b| b == b'.') {
|
||||
if !is_int(&slice[..i]) {
|
||||
return None;
|
||||
}
|
||||
slice = &slice[i+1..];
|
||||
}
|
||||
if let Some(len) = slice.iter().position(|&b| !b.is_ascii_digit()) {
|
||||
if len == 0 {
|
||||
return None;
|
||||
}
|
||||
let end = self.slice.len() - slice.len() + len;
|
||||
Some(Substr {
|
||||
file_offset: self.file_offset,
|
||||
slice: &self.slice[..end]
|
||||
})
|
||||
} else {
|
||||
Some(*self)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &'a [u8] {
|
||||
self.slice
|
||||
}
|
||||
pub fn as_str(&self) -> Result<&str> {
|
||||
std::str::from_utf8(self.slice).map_err(|e| PdfError::Parse { source: e.into() })
|
||||
}
|
||||
|
||||
pub fn equals(&self, other: impl AsRef<[u8]>) -> bool {
|
||||
self.slice == other.as_ref()
|
||||
}
|
||||
|
||||
pub fn reslice(&self, range: RangeFrom<usize>) -> Substr<'a> {
|
||||
Substr {
|
||||
file_offset: self.file_offset + range.start,
|
||||
slice: &self.slice[range],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file_range(&self) -> Range<usize> {
|
||||
self.file_offset .. self.file_offset + self.slice.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_int(b: &[u8]) -> bool {
|
||||
b.iter().all(|&b| b.is_ascii_digit())
|
||||
}
|
||||
impl<'a> Deref for Substr<'a> {
|
||||
type Target = [u8];
|
||||
fn deref(&self) -> &[u8] {
|
||||
self.as_slice()
|
||||
}
|
||||
}
|
||||
impl<'a> PartialEq<&[u8]> for Substr<'a> {
|
||||
fn eq(&self, rhs: &&[u8]) -> bool {
|
||||
self.equals(rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq<&str> for Substr<'a> {
|
||||
fn eq(&self, rhs: &&str) -> bool {
|
||||
self.equals(rhs.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
|
||||
#[test]
|
||||
fn test_boundary_rev() {
|
||||
assert_eq!(boundary_rev(b" hello", 3, not(is_whitespace)), 1);
|
||||
assert_eq!(boundary_rev(b" hello", 3, is_whitespace), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary() {
|
||||
assert_eq!(boundary(b" hello ", 3, not(is_whitespace)), 6);
|
||||
assert_eq!(boundary(b" hello ", 3, is_whitespace), 3);
|
||||
assert_eq!(boundary(b"01234 7orld", 5, is_whitespace), 7);
|
||||
assert_eq!(boundary(b"01234 7orld", 7, is_whitespace), 7);
|
||||
assert_eq!(boundary(b"q\n", 1, is_whitespace), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substr() {
|
||||
assert!(Substr::new("123", 0).is_real_number());
|
||||
assert!(Substr::new("123.", 0).is_real_number());
|
||||
assert!(Substr::new("123.45", 0).is_real_number());
|
||||
assert!(Substr::new(".45", 0).is_real_number());
|
||||
assert!(Substr::new("-.45", 0).is_real_number());
|
||||
assert!(!Substr::new("123.45", 0).is_integer());
|
||||
assert!(Substr::new("123", 0).is_integer());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lexed() {
|
||||
let file_data = fs::read("/home/kschuettler/Dokumente/TestFiles/18 - EVIDIS - Corrosao Irritacao ocular aguda.pdf").expect("File not found!");
|
||||
println!("{}", file_data.len());
|
||||
let mut lexer = Lexer::new(&*file_data);
|
||||
let file = File::create("/tmp/pdf.txt").unwrap();
|
||||
|
||||
let mut writer = BufWriter::new(file);
|
||||
let mut depth = false;
|
||||
let mut stream = false;
|
||||
let mut dict = 0;
|
||||
while let Ok(s) = lexer.next() {
|
||||
if stream && s.to_string().as_str() == "endstream" {
|
||||
stream = false;
|
||||
writer
|
||||
.write("endstream\n".as_ref())
|
||||
.expect("Could not write to buffer");
|
||||
continue;
|
||||
} else if stream {
|
||||
continue;
|
||||
}
|
||||
|
||||
match s.to_string().as_str() {
|
||||
"obj" => depth = true,
|
||||
"endobj" => depth = false,
|
||||
"stream" => {
|
||||
stream = true;
|
||||
writer
|
||||
.write("stream ... ".as_ref())
|
||||
.expect("Could not write to buffer");
|
||||
continue;
|
||||
}
|
||||
"<<" => dict += 1,
|
||||
">>" => dict -= 1,
|
||||
_ => (),
|
||||
}
|
||||
|
||||
writer.write(s.as_ref()).expect("Could not write to buffer");
|
||||
if dict == 0 {
|
||||
writer
|
||||
.write("\n".as_ref())
|
||||
.expect("Could not write to buffer");
|
||||
} else {
|
||||
writer
|
||||
.write(" ".as_ref())
|
||||
.expect("Could not write to buffer");
|
||||
}
|
||||
|
||||
match s.to_string().as_str() {
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
writer.flush().expect("Could not flush buffer");
|
||||
}
|
||||
}
|
||||
368
src-pdfrs/pdf/src/parser/lexer/str.rs
Normal file
368
src-pdfrs/pdf/src/parser/lexer/str.rs
Normal file
@ -0,0 +1,368 @@
|
||||
use std::iter::Iterator;
|
||||
use crate::error::*;
|
||||
|
||||
/// A lexer for PDF strings. Breaks the string up into single characters (`u8`)
|
||||
/// It's also possible to get the number of indices of the original array that was traversed by the
|
||||
/// Iterator.
|
||||
///
|
||||
/// ```
|
||||
/// let mut string: Vec<u8> = Vec::new();
|
||||
/// let bytes_traversed = {
|
||||
/// let mut string_lexer = StringLexer::new(lexer.get_remaining_slice());
|
||||
/// for character in string_lexer.iter() {
|
||||
/// let character = character?;
|
||||
/// string.push(character);
|
||||
/// }
|
||||
/// string_lexer.get_offset() as i64
|
||||
/// };
|
||||
/// // bytes_traversed now holds the number of bytes in the original array traversed.
|
||||
/// ```
|
||||
///
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StringLexer<'a> {
|
||||
pos: usize, // points to next byte
|
||||
nested: i32, // How far in () we are nested
|
||||
buf: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> StringLexer<'a> {
|
||||
/// `buf` should start right after the `(` delimiter, and may span all the way to EOF. StringLexer
|
||||
/// will determine the end of the string.
|
||||
pub fn new(buf: &'a [u8]) -> StringLexer<'a> {
|
||||
StringLexer {
|
||||
pos: 0,
|
||||
nested: 0,
|
||||
buf,
|
||||
}
|
||||
}
|
||||
pub fn iter<'b>(&'b mut self) -> StringLexerIter<'a, 'b> {
|
||||
StringLexerIter {lexer: self}
|
||||
}
|
||||
/// Get offset/pos from start of string
|
||||
pub fn get_offset(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
/// (mostly just used by Iterator, but might be useful)
|
||||
pub fn next_lexeme(&mut self) -> Result<Option<u8>> {
|
||||
let c = self.next_byte()?;
|
||||
match c {
|
||||
b'\\' => {
|
||||
let c = self.next_byte()?;
|
||||
Ok(
|
||||
match c {
|
||||
b'n' => Some(b'\n'),
|
||||
b'r' => Some(b'\r'),
|
||||
b't' => Some(b'\t'),
|
||||
b'b' => Some(b'\x08'),
|
||||
b'f' => Some(b'\x0c'),
|
||||
b'(' => Some(b'('),
|
||||
b')' => Some(b')'),
|
||||
b'\n' => {
|
||||
// ignore end-of-line marker
|
||||
if let Ok(b'\r') = self.peek_byte() {
|
||||
let _ = self.next_byte();
|
||||
}
|
||||
self.next_lexeme()?
|
||||
}
|
||||
b'\r' => {
|
||||
// ignore end-of-line marker
|
||||
if let Ok(b'\n') = self.peek_byte() {
|
||||
let _ = self.next_byte();
|
||||
}
|
||||
self.next_lexeme()?
|
||||
}
|
||||
b'\\' => Some(b'\\'),
|
||||
|
||||
_ => {
|
||||
self.back()?;
|
||||
let _start = self.get_offset();
|
||||
let mut char_code: u16 = 0;
|
||||
|
||||
// A character code must follow. 1-3 numbers.
|
||||
for _ in 0..3 {
|
||||
let c = self.peek_byte()?;
|
||||
if (b'0'..=b'7').contains(&c) {
|
||||
self.next_byte()?;
|
||||
char_code = char_code * 8 + (c - b'0') as u16;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Some(char_code as u8)
|
||||
}
|
||||
}
|
||||
)
|
||||
},
|
||||
|
||||
b'(' => {
|
||||
self.nested += 1;
|
||||
Ok(Some(b'('))
|
||||
},
|
||||
b')' => {
|
||||
self.nested -= 1;
|
||||
if self.nested < 0 {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(b')'))
|
||||
}
|
||||
},
|
||||
|
||||
c => Ok(Some(c))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
fn next_byte(&mut self) -> Result<u8> {
|
||||
if self.pos < self.buf.len() {
|
||||
self.pos += 1;
|
||||
Ok(self.buf[self.pos-1])
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
fn back(&mut self) -> Result<()> {
|
||||
if self.pos > 0 {
|
||||
self.pos -= 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
fn peek_byte(&mut self) -> Result<u8> {
|
||||
if self.pos < self.buf.len() {
|
||||
Ok(self.buf[self.pos])
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// "'a is valid for at least 'b"
|
||||
pub struct StringLexerIter<'a: 'b, 'b> {
|
||||
lexer: &'b mut StringLexer<'a>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Iterator for StringLexerIter<'a, 'b> {
|
||||
type Item = Result<u8>;
|
||||
fn next(&mut self) -> Option<Result<u8>> {
|
||||
match self.lexer.next_lexeme() {
|
||||
Err(e) => Some(Err(e)),
|
||||
Ok(Some(s)) => Some(Ok(s)),
|
||||
Ok(None) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HexStringLexer<'a> {
|
||||
pos: usize, // points to next byte
|
||||
buf: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> HexStringLexer<'a> {
|
||||
/// `buf` should start right after the `<` delimiter, and may span all the way to EOF.
|
||||
/// HexStringLexer will determine the end of the string.
|
||||
pub fn new(buf: &'a [u8]) -> HexStringLexer<'a> {
|
||||
HexStringLexer { pos: 0, buf }
|
||||
}
|
||||
|
||||
pub fn iter<'b>(&'b mut self) -> HexStringLexerIter<'a, 'b> {
|
||||
HexStringLexerIter { lexer: self }
|
||||
}
|
||||
|
||||
/// Get offset/position from start of string
|
||||
pub fn get_offset(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
fn next_non_whitespace_char(&mut self) -> Result<u8> {
|
||||
let mut byte = self.read_byte()?;
|
||||
while byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' || byte == b'\x0c' {
|
||||
byte = self.read_byte()?;
|
||||
}
|
||||
Ok(byte)
|
||||
}
|
||||
|
||||
pub fn next_hex_byte(&mut self) -> Result<Option<u8>> {
|
||||
let c1 = self.next_non_whitespace_char()?;
|
||||
let high_nibble: u8 = match c1 {
|
||||
b'0' ..= b'9' => c1 - b'0',
|
||||
b'A' ..= b'F' => c1 - b'A' + 0xA,
|
||||
b'a' ..= b'f' => c1 - b'a' + 0xA,
|
||||
b'>' => return Ok(None),
|
||||
_ => return Err(PdfError::HexDecode {
|
||||
pos: self.pos,
|
||||
bytes: [c1, self.peek_byte().unwrap_or(0)]
|
||||
}),
|
||||
};
|
||||
let c2 = self.next_non_whitespace_char()?;
|
||||
let low_nibble: u8 = match c2 {
|
||||
b'0' ..= b'9' => c2 - b'0',
|
||||
b'A' ..= b'F' => c2 - b'A' + 0xA,
|
||||
b'a' ..= b'f' => c2 - b'a' + 0xA,
|
||||
b'>' => {
|
||||
self.back()?;
|
||||
0
|
||||
}
|
||||
_ => return Err(PdfError::HexDecode {
|
||||
pos: self.pos,
|
||||
bytes: [c1, c2]
|
||||
}),
|
||||
};
|
||||
Ok(Some((high_nibble << 4) | low_nibble))
|
||||
}
|
||||
|
||||
fn read_byte(&mut self) -> Result<u8> {
|
||||
if self.pos < self.buf.len() {
|
||||
self.pos += 1;
|
||||
Ok(self.buf[self.pos - 1])
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
|
||||
fn back(&mut self) -> Result<()> {
|
||||
if self.pos > 0 {
|
||||
self.pos -= 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_byte(&mut self) -> Result<u8> {
|
||||
if self.pos < self.buf.len() {
|
||||
Ok(self.buf[self.pos])
|
||||
} else {
|
||||
Err(PdfError::EOF)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HexStringLexerIter<'a: 'b, 'b> {
|
||||
lexer: &'b mut HexStringLexer<'a>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Iterator for HexStringLexerIter<'a, 'b> {
|
||||
type Item = Result<u8>;
|
||||
|
||||
fn next(&mut self) -> Option<Result<u8>> {
|
||||
match self.lexer.next_hex_byte() {
|
||||
Err(e) => Some(Err(e)),
|
||||
Ok(Some(s)) => Some(Ok(s)),
|
||||
Ok(None) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::error::Result;
|
||||
use crate::parser::lexer::{HexStringLexer, StringLexer};
|
||||
|
||||
#[test]
|
||||
fn tests() {
|
||||
let vec = b"a\\nb\\rc\\td\\(f/)\\\\hei)";
|
||||
let mut lexer = StringLexer::new(vec);
|
||||
let lexemes: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(lexemes, b"a\nb\rc\td(f/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_split_lines() {
|
||||
{
|
||||
let data = b"These \\\ntwo strings \\\nare the same.)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"These two strings are the same.");
|
||||
}
|
||||
{
|
||||
let data = b"These \\\rtwo strings \\\rare the same.)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"These two strings are the same.");
|
||||
}
|
||||
{
|
||||
let data = b"These \\\r\ntwo strings \\\r\nare the same.)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"These two strings are the same.");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn octal_escape() {
|
||||
{
|
||||
let data = b"This string contains\\245two octal characters\\307.)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, &b"This string contains\xa5two octal characters\xc7."[..]);
|
||||
}
|
||||
{
|
||||
let data = b"\\0053)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"\x053");
|
||||
}
|
||||
{
|
||||
let data = b"\\053)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"+");
|
||||
}
|
||||
{
|
||||
let data = b"\\53)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"+");
|
||||
}
|
||||
{
|
||||
// overflow is ignored
|
||||
let data = b"\\541)";
|
||||
let mut lexer = StringLexer::new(data);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(result, b"a");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hex_test() {
|
||||
let input = b"901FA3>";
|
||||
let mut lexer = HexStringLexer::new(input);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
b'\x90',
|
||||
b'\x1f',
|
||||
b'\xa3',
|
||||
]
|
||||
);
|
||||
|
||||
let input = b"901FA>";
|
||||
let mut lexer = HexStringLexer::new(input);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
b'\x90',
|
||||
b'\x1f',
|
||||
b'\xa0',
|
||||
]
|
||||
);
|
||||
|
||||
let input = b"1 9F\t5\r\n4\x0c62a>";
|
||||
let mut lexer = HexStringLexer::new(input);
|
||||
let result: Vec<u8> = lexer.iter().map(Result::unwrap).collect();
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
b'\x19',
|
||||
b'\xf5',
|
||||
b'\x46',
|
||||
b'\x2a',
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
405
src-pdfrs/pdf/src/parser/mod.rs
Normal file
405
src-pdfrs/pdf/src/parser/mod.rs
Normal file
@ -0,0 +1,405 @@
|
||||
//! Basic functionality for parsing a PDF file.
|
||||
|
||||
mod lexer;
|
||||
mod parse_object;
|
||||
mod parse_xref;
|
||||
|
||||
pub use self::lexer::*;
|
||||
pub use self::parse_object::*;
|
||||
pub use self::parse_xref::*;
|
||||
|
||||
use crate::error::*;
|
||||
use crate::primitive::StreamInner;
|
||||
use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString};
|
||||
use crate::object::{ObjNr, GenNr, PlainRef, Resolve};
|
||||
use crate::crypt::Decoder;
|
||||
use bitflags::bitflags;
|
||||
use istring::{SmallBytes, SmallString, IBytes};
|
||||
|
||||
const MAX_DEPTH: usize = 20;
|
||||
|
||||
|
||||
bitflags! {
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct ParseFlags: u16 {
|
||||
const INTEGER = 1 << 0;
|
||||
const STREAM = 1 << 1;
|
||||
const DICT = 1 << 2;
|
||||
const NUMBER = 1 << 3;
|
||||
const NAME = 1 << 4;
|
||||
const ARRAY = 1 << 5;
|
||||
const STRING = 1 << 6;
|
||||
const BOOL = 1 << 7;
|
||||
const NULL = 1 << 8;
|
||||
const REF = 1 << 9;
|
||||
const ANY = (1 << 10) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct Context<'a> {
|
||||
pub decoder: Option<&'a Decoder>,
|
||||
pub id: PlainRef,
|
||||
}
|
||||
impl<'a> Context<'a> {
|
||||
pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> {
|
||||
if let Some(decoder) = self.decoder {
|
||||
decoder.decrypt(self.id, data)
|
||||
} else {
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
fn fake() -> Self {
|
||||
Context {
|
||||
decoder: None,
|
||||
id: PlainRef { id: 0, gen: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Can parse stream but only if its dictionary does not contain indirect references.
|
||||
/// Use `parse_stream` if this is insufficient.
|
||||
pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
|
||||
parse_with_lexer(&mut Lexer::new(data), r, flags)
|
||||
}
|
||||
|
||||
/// Recursive. Can parse stream but only if its dictionary does not contain indirect references.
|
||||
/// Use `parse_stream` if this is not sufficient.
|
||||
pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
|
||||
parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH)
|
||||
}
|
||||
|
||||
fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result<Dictionary> {
|
||||
let mut dict = Dictionary::default();
|
||||
loop {
|
||||
// Expect a Name (and Object) or the '>>' delimiter
|
||||
let token = t!(lexer.next());
|
||||
if token.starts_with(b"/") {
|
||||
let key = token.reslice(1..).to_name()?;
|
||||
let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth));
|
||||
dict.insert(key, obj);
|
||||
} else if token.equals(b">>") {
|
||||
break;
|
||||
} else {
|
||||
err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"});
|
||||
}
|
||||
}
|
||||
Ok(dict)
|
||||
}
|
||||
|
||||
fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
|
||||
t!(lexer.next_stream());
|
||||
|
||||
let length = match dict.get("Length") {
|
||||
Some(&Primitive::Integer(n)) if n >= 0 => n as usize,
|
||||
Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()),
|
||||
Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }),
|
||||
None => err!(PdfError::MissingEntry { typ: "<Stream>", field: "Length".into() }),
|
||||
};
|
||||
|
||||
let stream_substr = lexer.read_n(length);
|
||||
|
||||
if stream_substr.len() != length {
|
||||
err!(PdfError::EOF)
|
||||
}
|
||||
|
||||
// Finish
|
||||
t!(lexer.next_expect("endstream"));
|
||||
|
||||
Ok(PdfStream {
|
||||
inner: StreamInner::InFile {
|
||||
id: ctx.id,
|
||||
file_range: stream_substr.file_range(),
|
||||
},
|
||||
info: dict,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> {
|
||||
if !flags.intersects(allowed) {
|
||||
return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags });
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Recursive. Can parse stream but only if its dictionary does not contain indirect references.
|
||||
/// Use `parse_stream` if this is not sufficient.
|
||||
pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
|
||||
let pos = lexer.get_pos();
|
||||
match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) {
|
||||
Ok(r) => Ok(r),
|
||||
Err(e) => {
|
||||
lexer.set_pos(pos);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
|
||||
|
||||
let input = lexer.get_remaining_slice();
|
||||
let first_lexeme = t!(lexer.next(), std::str::from_utf8(input));
|
||||
|
||||
let obj = if first_lexeme.equals(b"<<") {
|
||||
check(flags, ParseFlags::DICT)?;
|
||||
|
||||
if max_depth == 0 {
|
||||
return Err(PdfError::MaxDepth);
|
||||
}
|
||||
let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1));
|
||||
// It might just be the dictionary in front of a stream.
|
||||
if t!(lexer.peek()).equals(b"stream") {
|
||||
let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?;
|
||||
Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx)))
|
||||
} else {
|
||||
Primitive::Dictionary(dict)
|
||||
}
|
||||
} else if first_lexeme.is_integer() {
|
||||
// May be Integer or Reference
|
||||
check(flags, ParseFlags::INTEGER | ParseFlags::REF)?;
|
||||
|
||||
// First backup position
|
||||
let pos_bk = lexer.get_pos();
|
||||
|
||||
let second_lexeme = t!(lexer.next());
|
||||
if second_lexeme.is_integer() {
|
||||
let third_lexeme = t!(lexer.next());
|
||||
if third_lexeme.equals(b"R") {
|
||||
// It is indeed a reference to an indirect object
|
||||
check(flags, ParseFlags::REF)?;
|
||||
Primitive::Reference (PlainRef {
|
||||
id: t!(first_lexeme.to::<ObjNr>()),
|
||||
gen: t!(second_lexeme.to::<GenNr>()),
|
||||
})
|
||||
} else {
|
||||
check(flags, ParseFlags::INTEGER)?;
|
||||
// We are probably in an array of numbers - it's not a reference anyway
|
||||
lexer.set_pos(pos_bk); // (roll back the lexer first)
|
||||
Primitive::Integer(t!(first_lexeme.to::<i32>()))
|
||||
}
|
||||
} else {
|
||||
check(flags, ParseFlags::INTEGER)?;
|
||||
// It is but a number
|
||||
lexer.set_pos(pos_bk); // (roll back the lexer first)
|
||||
Primitive::Integer(t!(first_lexeme.to::<i32>()))
|
||||
}
|
||||
} else if let Some(s) = first_lexeme.real_number() {
|
||||
check(flags, ParseFlags::NUMBER)?;
|
||||
// Real Number
|
||||
Primitive::Number (t!(s.to::<f32>(), s.to_string()))
|
||||
} else if first_lexeme.starts_with(b"/") {
|
||||
check(flags, ParseFlags::NAME)?;
|
||||
// Name
|
||||
|
||||
let mut rest: &[u8] = &first_lexeme.reslice(1..);
|
||||
let s = if rest.contains(&b'#') {
|
||||
let mut s = IBytes::new();
|
||||
while let Some(idx) = rest.iter().position(|&b| b == b'#') {
|
||||
use crate::enc::decode_nibble;
|
||||
use std::convert::TryInto;
|
||||
let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap();
|
||||
let byte = match (decode_nibble(lo), decode_nibble(hi)) {
|
||||
(Some(low), Some(high)) => low | high << 4,
|
||||
_ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }),
|
||||
};
|
||||
s.extend_from_slice(&rest[..idx]);
|
||||
s.push(byte);
|
||||
rest = &rest[idx+3..];
|
||||
}
|
||||
s.extend_from_slice(rest);
|
||||
SmallBytes::from(s.as_slice())
|
||||
} else {
|
||||
SmallBytes::from(rest)
|
||||
};
|
||||
|
||||
Primitive::Name(SmallString::from_utf8(s)?)
|
||||
} else if first_lexeme.equals(b"[") {
|
||||
check(flags, ParseFlags::ARRAY)?;
|
||||
if max_depth == 0 {
|
||||
return Err(PdfError::MaxDepth);
|
||||
}
|
||||
let mut array = Vec::new();
|
||||
// Array
|
||||
loop {
|
||||
// Exit if closing delimiter
|
||||
if lexer.peek()?.equals(b"]") {
|
||||
break;
|
||||
}
|
||||
|
||||
let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1));
|
||||
array.push(element);
|
||||
}
|
||||
t!(lexer.next()); // Move beyond closing delimiter
|
||||
|
||||
Primitive::Array (array)
|
||||
} else if first_lexeme.equals(b"(") {
|
||||
check(flags, ParseFlags::STRING)?;
|
||||
let mut string = IBytes::new();
|
||||
|
||||
let bytes_traversed = {
|
||||
let mut string_lexer = StringLexer::new(lexer.get_remaining_slice());
|
||||
for character in string_lexer.iter() {
|
||||
string.push(t!(character));
|
||||
}
|
||||
string_lexer.get_offset()
|
||||
};
|
||||
// Advance to end of string
|
||||
lexer.offset_pos(bytes_traversed);
|
||||
// decrypt it
|
||||
if let Some(ctx) = ctx {
|
||||
string = t!(ctx.decrypt(&mut string)).into();
|
||||
}
|
||||
Primitive::String (PdfString::new(string))
|
||||
} else if first_lexeme.equals(b"<") {
|
||||
check(flags, ParseFlags::STRING)?;
|
||||
let mut string = IBytes::new();
|
||||
|
||||
let bytes_traversed = {
|
||||
let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice());
|
||||
for byte in hex_string_lexer.iter() {
|
||||
string.push(t!(byte));
|
||||
}
|
||||
hex_string_lexer.get_offset()
|
||||
};
|
||||
// Advance to end of string
|
||||
lexer.offset_pos(bytes_traversed);
|
||||
|
||||
// decrypt it
|
||||
if let Some(ctx) = ctx {
|
||||
string = t!(ctx.decrypt(&mut string)).into();
|
||||
}
|
||||
Primitive::String (PdfString::new(string))
|
||||
} else if first_lexeme.equals(b"true") {
|
||||
check(flags, ParseFlags::BOOL)?;
|
||||
Primitive::Boolean (true)
|
||||
} else if first_lexeme.equals(b"false") {
|
||||
check(flags, ParseFlags::BOOL)?;
|
||||
Primitive::Boolean (false)
|
||||
} else if first_lexeme.equals(b"null") {
|
||||
check(flags, ParseFlags::NULL)?;
|
||||
Primitive::Null
|
||||
} else {
|
||||
err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()});
|
||||
};
|
||||
|
||||
// trace!("Read object"; "Obj" => format!("{}", obj));
|
||||
|
||||
Ok(obj)
|
||||
}
|
||||
|
||||
|
||||
pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
|
||||
parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx)
|
||||
}
|
||||
|
||||
|
||||
fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
|
||||
let first_lexeme = t!(lexer.next());
|
||||
|
||||
let obj = if first_lexeme.equals(b"<<") {
|
||||
let dict = t!(parse_dictionary_object(lexer, r, None, MAX_DEPTH));
|
||||
// It might just be the dictionary in front of a stream.
|
||||
if t!(lexer.peek()).equals(b"stream") {
|
||||
let ctx = Context {
|
||||
decoder: None,
|
||||
id: ctx.id
|
||||
};
|
||||
t!(parse_stream_object(dict, lexer, r, &ctx))
|
||||
} else {
|
||||
err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" });
|
||||
}
|
||||
} else {
|
||||
err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" });
|
||||
};
|
||||
|
||||
Ok(obj)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn dict_with_empty_name_as_value() {
|
||||
use crate::object::NoResolve;
|
||||
use super::{ParseFlags, Context};
|
||||
{
|
||||
let data = b"<</App<</Name/>>>>";
|
||||
let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
|
||||
let dict = primitive.into_dictionary().unwrap();
|
||||
|
||||
assert_eq!(dict.len(), 1);
|
||||
let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
|
||||
assert_eq!(app_dict.len(), 1);
|
||||
let name = app_dict.get("Name").unwrap().as_name().unwrap();
|
||||
assert_eq!(name, "");
|
||||
}
|
||||
|
||||
{
|
||||
let data = b"<</Length 0/App<</Name/>>>>stream\nendstream\n";
|
||||
let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
|
||||
let dict = stream.info;
|
||||
|
||||
assert_eq!(dict.len(), 2);
|
||||
let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
|
||||
assert_eq!(app_dict.len(), 1);
|
||||
let name = app_dict.get("Name").unwrap().as_name().unwrap();
|
||||
assert_eq!(name, "");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dict_with_empty_name_as_key() {
|
||||
use crate::object::NoResolve;
|
||||
use super::{ParseFlags, Context};
|
||||
|
||||
{
|
||||
let data = b"<</ true>>";
|
||||
let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
|
||||
let dict = primitive.into_dictionary().unwrap();
|
||||
|
||||
assert_eq!(dict.len(), 1);
|
||||
assert!(dict.get("").unwrap().as_bool().unwrap());
|
||||
}
|
||||
|
||||
{
|
||||
let data = b"<</Length 0/ true>>stream\nendstream\n";
|
||||
let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
|
||||
let dict = stream.info;
|
||||
|
||||
assert_eq!(dict.len(), 2);
|
||||
assert!(dict.get("").unwrap().as_bool().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_array() {
|
||||
use crate::object::NoResolve;
|
||||
use super::ParseFlags;
|
||||
|
||||
let data = b"[]";
|
||||
let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap();
|
||||
let array = primitive.into_array().unwrap();
|
||||
assert!(array.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compact_array() {
|
||||
use crate::object::NoResolve;
|
||||
use crate::primitive::{Primitive, PdfString};
|
||||
use super::lexer::Lexer;
|
||||
use super::*;
|
||||
let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ");
|
||||
assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(),
|
||||
Primitive::Array(vec![
|
||||
Primitive::String(PdfString::new("Complete L".into())),
|
||||
Primitive::Integer(20),
|
||||
Primitive::String(PdfString::new("egend for Physical and P".into())),
|
||||
Primitive::Integer(20),
|
||||
Primitive::String(PdfString::new("olitical Maps".into()))
|
||||
])
|
||||
);
|
||||
assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ");
|
||||
assert!(lx.next().unwrap_err().is_eof());
|
||||
}
|
||||
}
|
||||
56
src-pdfrs/pdf/src/parser/parse_object.rs
Normal file
56
src-pdfrs/pdf/src/parser/parse_object.rs
Normal file
@ -0,0 +1,56 @@
|
||||
// Considering whether to impl Object and IndirectObject here.
|
||||
//
|
||||
|
||||
use crate::parser::{lexer::*, MAX_DEPTH};
|
||||
use crate::error::*;
|
||||
use crate::primitive::{Primitive, PdfStream};
|
||||
use crate::parser::{parse_with_lexer_ctx, parse_stream_with_lexer, Context, ParseFlags};
|
||||
use crate::object::*;
|
||||
use crate::crypt::Decoder;
|
||||
|
||||
/// Parses an Object starting at the current position of `lexer`. Almost as
|
||||
/// `Reader::parse_object`, but this function does not take `Reader`, at the expense that it
|
||||
/// cannot dereference
|
||||
|
||||
pub fn parse_indirect_object(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>, flags: ParseFlags) -> Result<(PlainRef, Primitive)> {
|
||||
let id = PlainRef {
|
||||
id: t!(lexer.next()).to::<ObjNr>()?,
|
||||
gen: t!(lexer.next()).to::<GenNr>()?,
|
||||
};
|
||||
lexer.next_expect("obj")?;
|
||||
|
||||
let ctx = Context {
|
||||
decoder,
|
||||
id,
|
||||
};
|
||||
let obj = t!(parse_with_lexer_ctx(lexer, r, Some(&ctx), flags, MAX_DEPTH));
|
||||
|
||||
if r.options().allow_missing_endobj {
|
||||
let pos = lexer.get_pos();
|
||||
if let Err(e) = lexer.next_expect("endobj") {
|
||||
warn!("error parsing obj {} {}: {:?}", id.id, id.gen, e);
|
||||
lexer.set_pos(pos);
|
||||
}
|
||||
} else {
|
||||
t!(lexer.next_expect("endobj"));
|
||||
}
|
||||
|
||||
Ok((id, obj))
|
||||
}
|
||||
pub fn parse_indirect_stream(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>) -> Result<(PlainRef, PdfStream)> {
|
||||
let id = PlainRef {
|
||||
id: t!(lexer.next()).to::<ObjNr>()?,
|
||||
gen: t!(lexer.next()).to::<GenNr>()?,
|
||||
};
|
||||
lexer.next_expect("obj")?;
|
||||
|
||||
let ctx = Context {
|
||||
decoder,
|
||||
id,
|
||||
};
|
||||
let stm = t!(parse_stream_with_lexer(lexer, r, &ctx));
|
||||
|
||||
t!(lexer.next_expect("endobj"));
|
||||
|
||||
Ok((id, stm))
|
||||
}
|
||||
144
src-pdfrs/pdf/src/parser/parse_xref.rs
Normal file
144
src-pdfrs/pdf/src/parser/parse_xref.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use crate::error::*;
|
||||
use crate::parser::lexer::Lexer;
|
||||
use crate::xref::{XRef, XRefSection, XRefInfo};
|
||||
use crate::primitive::{Primitive, Dictionary};
|
||||
use crate::object::*;
|
||||
use crate::parser::{parse_with_lexer, ParseFlags};
|
||||
use crate::parser::parse_object::{parse_indirect_stream};
|
||||
use std::convert::TryInto;
|
||||
|
||||
// Just the part of Parser which reads xref sections from xref stream.
|
||||
/// Takes `&mut &[u8]` so that it can "consume" data as it reads
|
||||
fn parse_xref_section_from_stream(first_id: u32, mut num_entries: usize, width: &[usize], data: &mut &[u8], resolve: &impl Resolve) -> Result<XRefSection> {
|
||||
let mut entries = Vec::new();
|
||||
let [w0, w1, w2]: [usize; 3] = width.try_into().map_err(|_| other!("invalid xref length array"))?;
|
||||
if num_entries * (w0 + w1 + w2) > data.len() {
|
||||
if resolve.options().allow_xref_error {
|
||||
warn!("not enough xref data. truncating.");
|
||||
num_entries = data.len() / (w0 + w1 + w2);
|
||||
} else {
|
||||
bail!("not enough xref data");
|
||||
}
|
||||
}
|
||||
for _ in 0..num_entries {
|
||||
// println!("{:?}", &data[.. width.iter().map(|&i| i as usize).sum()]);
|
||||
// TODO Check if width[i] are 0. Use default values from the PDF references.
|
||||
let _type = if w0 == 0 {
|
||||
1
|
||||
} else {
|
||||
read_u64_from_stream(w0, data)?
|
||||
};
|
||||
let field1 = read_u64_from_stream(w1, data)?;
|
||||
let field2 = read_u64_from_stream(w2, data)?;
|
||||
|
||||
let entry =
|
||||
match _type {
|
||||
0 => XRef::Free {next_obj_nr: field1 as ObjNr, gen_nr: field2 as GenNr},
|
||||
1 => XRef::Raw {pos: field1 as usize, gen_nr: field2 as GenNr},
|
||||
2 => XRef::Stream {stream_id: field1 as ObjNr, index: field2 as usize},
|
||||
_ => return Err(PdfError::XRefStreamType {found: _type}), // TODO: Should actually just be seen as a reference to the null object
|
||||
};
|
||||
entries.push(entry);
|
||||
}
|
||||
Ok(XRefSection {
|
||||
first_id,
|
||||
entries,
|
||||
})
|
||||
}
|
||||
/// Helper to read an integer with a certain amount of bytes `width` from stream.
|
||||
fn read_u64_from_stream(width: usize, data: &mut &[u8]) -> Result<u64> {
|
||||
if width > std::mem::size_of::<u64>() {
|
||||
return Err(PdfError::Other { msg: format!("xref stream entry has invalid width {}", width) });
|
||||
}
|
||||
if width > data.len() {
|
||||
return Err(PdfError::Other { msg: format!("xref stream entry has width {} but only {} bytes left to read", width, data.len()) });
|
||||
}
|
||||
let mut result = 0;
|
||||
for i in (0..width).rev() {
|
||||
let base = 8 * i; // (width, 0]
|
||||
let c: u8 = data[0];
|
||||
*data = &data[1..]; // Consume byte
|
||||
result += u64::from(c) << base;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
|
||||
/// Reads xref sections (from stream) and trailer starting at the position of the Lexer.
|
||||
pub fn parse_xref_stream_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec<XRefSection>, Dictionary)> {
|
||||
let xref_stream = t!(parse_indirect_stream(lexer, resolve, None)).1;
|
||||
let trailer = if t!(lexer.next()) == "trailer" {
|
||||
let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT));
|
||||
t!(trailer.into_dictionary())
|
||||
} else {
|
||||
xref_stream.info.clone()
|
||||
};
|
||||
|
||||
let xref_stream = t!(Stream::<XRefInfo>::from_primitive(Primitive::Stream(xref_stream), resolve));
|
||||
let mut data_left = &*t!(xref_stream.data(resolve));
|
||||
|
||||
let width = &xref_stream.w;
|
||||
|
||||
let index = &xref_stream.index;
|
||||
|
||||
if index.len() % 2 != 0 {
|
||||
return Err(PdfError::Other { msg: format!("xref stream has {} elements which is not an even number", index.len()) });
|
||||
}
|
||||
|
||||
let mut sections = Vec::new();
|
||||
for (first_id, num_objects) in index.chunks_exact(2).map(|c| (c[0], c[1])) {
|
||||
let section = t!(parse_xref_section_from_stream(first_id, num_objects as usize, width, &mut data_left, resolve));
|
||||
sections.push(section);
|
||||
}
|
||||
|
||||
Ok((sections, trailer))
|
||||
}
|
||||
|
||||
|
||||
/// Reads xref sections (from table) and trailer starting at the position of the Lexer.
|
||||
pub fn parse_xref_table_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec<XRefSection>, Dictionary)> {
|
||||
let mut sections = Vec::new();
|
||||
|
||||
// Keep reading subsections until we hit `trailer`
|
||||
while lexer.peek()? != "trailer" {
|
||||
let start_id = t!(lexer.next_as::<u32>());
|
||||
let num_ids = t!(lexer.next_as::<u32>());
|
||||
|
||||
let mut section = XRefSection::new(start_id);
|
||||
|
||||
for i in 0..num_ids {
|
||||
let w1 = t!(lexer.next());
|
||||
if w1 == "trailer" {
|
||||
return Err(PdfError::Other { msg: format!("xref table declares {} entries, but only {} follow.", num_ids, i) });
|
||||
}
|
||||
let w2 = t!(lexer.next());
|
||||
let w3 = t!(lexer.next());
|
||||
if w3 == "f" {
|
||||
section.add_free_entry(t!(w1.to::<ObjNr>()), t!(w2.to::<GenNr>()));
|
||||
} else if w3 == "n" {
|
||||
section.add_inuse_entry(t!(w1.to::<usize>()), t!(w2.to::<GenNr>()));
|
||||
} else {
|
||||
return Err(PdfError::UnexpectedLexeme {pos: lexer.get_pos(), lexeme: w3.to_string(), expected: "f or n"});
|
||||
}
|
||||
}
|
||||
sections.push(section);
|
||||
}
|
||||
|
||||
t!(lexer.next_expect("trailer"));
|
||||
let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT));
|
||||
let trailer = t!(trailer.into_dictionary());
|
||||
|
||||
Ok((sections, trailer))
|
||||
}
|
||||
|
||||
pub fn read_xref_and_trailer_at(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec<XRefSection>, Dictionary)> {
|
||||
let next_word = t!(lexer.next());
|
||||
if next_word == "xref" {
|
||||
// Read classic xref table
|
||||
parse_xref_table_and_trailer(lexer, resolve)
|
||||
} else {
|
||||
// Read xref stream
|
||||
lexer.back()?;
|
||||
parse_xref_stream_and_trailer(lexer, resolve)
|
||||
}
|
||||
}
|
||||
82
src-pdfrs/pdf/src/path.rs
Normal file
82
src-pdfrs/pdf/src/path.rs
Normal file
@ -0,0 +1,82 @@
|
||||
use mint::Point2;
|
||||
type Point = Point2<f32>;
|
||||
|
||||
pub enum FillMode {
|
||||
NonZero,
|
||||
EvenOdd
|
||||
}
|
||||
|
||||
struct PathBuilder<W: Write> {
|
||||
out: W,
|
||||
current: Point
|
||||
}
|
||||
impl<W: Write> PathBuilder {
|
||||
pub fn new<P>(writer: W, start: P) -> PathBuilder<W>
|
||||
where P: Into<Point>
|
||||
{
|
||||
PathBuilder {
|
||||
out: writer,
|
||||
current: start
|
||||
}
|
||||
}
|
||||
|
||||
/// Begin a new subpath by moving the current point to `p`,
|
||||
/// omitting any connecting line segment. If
|
||||
/// the previous path construction operator in the current path
|
||||
/// was also m, the new m overrides it; no vestige of the
|
||||
/// previous m operation remains in the path.
|
||||
pub fn move<P>(&mut self, p: P) {
|
||||
let p = p.into();
|
||||
writeln!(self.out, "{} {} m", p.x, p.y);
|
||||
self.current = p;
|
||||
}
|
||||
/// Append a straight line segment from the current point to the
|
||||
/// point `p`. The new current point shall be `p`.
|
||||
pub fn line<P>(&mut self, p: P) {
|
||||
let p = p.into();
|
||||
writeln!(self.out, "{} {} l", p.x, p.y);
|
||||
self.current = p;
|
||||
}
|
||||
|
||||
/// Append a quadratic Bézier curve to the current path.
|
||||
/// The curve shall extend from the current point to the point ´p´,
|
||||
/// using `c` as the Bézier control point.
|
||||
/// The new current point shall be `p`.
|
||||
///
|
||||
/// NOTE: The quadratic Bézier curve is translated into a cubic Bézier curve,
|
||||
/// since PDF does not allow the former.
|
||||
pub fn quadratic<P>(&mut self, c: P, p: P) {
|
||||
let (p1, p2) = (p1.into(), p2.into());
|
||||
let c1 = (2./3.) * c + (1./3.) * self.current;
|
||||
let c2 = (2./3.) * c + (1./3.) * p;
|
||||
writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y);
|
||||
self.current = p;
|
||||
}
|
||||
|
||||
/// Append a cubic Bézier curve to the current path.
|
||||
/// The curve shall extend from the current point to the point ´p´,
|
||||
/// using `c1` and `c2` as the Bézier control points.
|
||||
/// The new current point shall be `p`.
|
||||
pub fn cubic<P>(&mut self, c1: P, c2: P, p: P) {
|
||||
let (c1, c2, p) = (c1.into(), c2.into(), p.into());
|
||||
if Some(c1) == self.current {
|
||||
writeln!(self.out, "{} {} {} {} v", c2.x, c2.y, p.x, p.y);
|
||||
} else if Some(c2) == self.current {
|
||||
writeln!(self.out, "{} {} {} {} y", c1.x, c1.y, p.x, p.y);
|
||||
} else {
|
||||
writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y);
|
||||
}
|
||||
self.current = p;
|
||||
}
|
||||
|
||||
pub fn close(&mut self) {
|
||||
writeln!(self.out, "h");
|
||||
}
|
||||
|
||||
pub fn fill(&mut self, mode: FillMode) {
|
||||
match mode {
|
||||
FillMode::NonZero => writeln!(out, "f"),
|
||||
FillMode::EvenOdd => writeln!(out, "f*")
|
||||
}
|
||||
}
|
||||
}
|
||||
897
src-pdfrs/pdf/src/primitive.rs
Normal file
897
src-pdfrs/pdf/src/primitive.rs
Normal file
@ -0,0 +1,897 @@
|
||||
use crate::error::*;
|
||||
use crate::object::{PlainRef, Resolve, Object, NoResolve, ObjectWrite, Updater, DeepClone, Cloner};
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{str, fmt, io};
|
||||
use std::ops::{Index, Range};
|
||||
use std::ops::Deref;
|
||||
use std::convert::TryInto;
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use indexmap::IndexMap;
|
||||
use itertools::Itertools;
|
||||
use istring::{SmallString, IBytes};
|
||||
use datasize::DataSize;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Primitive {
|
||||
Null,
|
||||
Integer (i32),
|
||||
Number (f32),
|
||||
Boolean (bool),
|
||||
String (PdfString),
|
||||
Stream (PdfStream),
|
||||
Dictionary (Dictionary),
|
||||
Array (Vec<Primitive>),
|
||||
Reference (PlainRef),
|
||||
Name (SmallString),
|
||||
}
|
||||
impl DataSize for Primitive {
|
||||
const IS_DYNAMIC: bool = true;
|
||||
const STATIC_HEAP_SIZE: usize = std::mem::size_of::<Self>();
|
||||
|
||||
fn estimate_heap_size(&self) -> usize {
|
||||
match self {
|
||||
Primitive::String(ref s) => s.estimate_heap_size(),
|
||||
Primitive::Stream(ref s) => s.estimate_heap_size(),
|
||||
Primitive::Dictionary(ref d) => d.estimate_heap_size(),
|
||||
Primitive::Array(ref arr) => arr.estimate_heap_size(),
|
||||
Primitive::Name(ref s) => s.estimate_heap_size(),
|
||||
_ => 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Primitive {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Primitive::Null => write!(f, "null"),
|
||||
Primitive::Integer(i) => i.fmt(f),
|
||||
Primitive::Number(n) => n.fmt(f),
|
||||
Primitive::Boolean(b) => b.fmt(f),
|
||||
Primitive::String(ref s) => write!(f, "{:?}", s),
|
||||
Primitive::Stream(_) => write!(f, "stream"),
|
||||
Primitive::Dictionary(ref d) => d.fmt(f),
|
||||
Primitive::Array(ref arr) => write!(f, "[{}]", arr.iter().format(", ")),
|
||||
Primitive::Reference(r) => write!(f, "@{}", r.id),
|
||||
Primitive::Name(ref s) => write!(f, "/{}", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Primitive {
|
||||
pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> {
|
||||
match self {
|
||||
Primitive::Null => write!(out, "null")?,
|
||||
Primitive::Integer(i) => write!(out, "{}", i)?,
|
||||
Primitive::Number(n) => write!(out, "{}", n)?,
|
||||
Primitive::Boolean(b) => write!(out, "{}", b)?,
|
||||
Primitive::String(ref s) => s.serialize(out)?,
|
||||
Primitive::Stream(ref s) => s.serialize(out)?,
|
||||
Primitive::Dictionary(ref d) => d.serialize(out)?,
|
||||
Primitive::Array(ref arr) => serialize_list(arr, out)?,
|
||||
Primitive::Reference(r) => write!(out, "{} {} R", r.id, r.gen)?,
|
||||
Primitive::Name(ref s) => serialize_name(s, out)?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
pub fn array<O, T, I, U>(i: I, update: &mut U) -> Result<Primitive>
|
||||
where O: ObjectWrite, I: Iterator<Item=T>,
|
||||
T: Borrow<O>, U: Updater
|
||||
{
|
||||
i.map(|t| t.borrow().to_primitive(update)).collect::<Result<_>>().map(Primitive::Array)
|
||||
}
|
||||
pub fn name(name: impl Into<SmallString>) -> Primitive {
|
||||
Primitive::Name(name.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_list(arr: &[Primitive], out: &mut impl io::Write) -> Result<()> {
|
||||
let mut parts = arr.iter();
|
||||
write!(out, "[")?;
|
||||
if let Some(first) = parts.next() {
|
||||
first.serialize(out)?;
|
||||
}
|
||||
for p in parts {
|
||||
write!(out, " ")?;
|
||||
p.serialize(out)?;
|
||||
}
|
||||
write!(out, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn serialize_name(s: &str, out: &mut impl io::Write) -> Result<()> {
|
||||
write!(out, "/")?;
|
||||
for b in s.chars() {
|
||||
match b {
|
||||
'\\' | '(' | ')' => write!(out, r"\")?,
|
||||
c if c > '~' => panic!("only ASCII"),
|
||||
_ => ()
|
||||
}
|
||||
write!(out, "{}", b)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Primitive Dictionary type.
|
||||
#[derive(Default, Clone, PartialEq)]
|
||||
pub struct Dictionary {
|
||||
dict: IndexMap<Name, Primitive>
|
||||
}
|
||||
impl Dictionary {
|
||||
pub fn new() -> Dictionary {
|
||||
Dictionary { dict: IndexMap::new()}
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.dict.len()
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
pub fn get(&self, key: &str) -> Option<&Primitive> {
|
||||
self.dict.get(key)
|
||||
}
|
||||
pub fn insert(&mut self, key: impl Into<Name>, val: impl Into<Primitive>) -> Option<Primitive> {
|
||||
self.dict.insert(key.into(), val.into())
|
||||
}
|
||||
pub fn iter(&self) -> impl Iterator<Item=(&Name, &Primitive)> {
|
||||
self.dict.iter()
|
||||
}
|
||||
pub fn remove(&mut self, key: &str) -> Option<Primitive> {
|
||||
self.dict.remove(key)
|
||||
}
|
||||
/// like remove, but takes the name of the calling type and returns `PdfError::MissingEntry` if the entry is not found
|
||||
pub fn require(&mut self, typ: &'static str, key: &str) -> Result<Primitive> {
|
||||
self.remove(key).ok_or(
|
||||
PdfError::MissingEntry {
|
||||
typ,
|
||||
field: key.into()
|
||||
}
|
||||
)
|
||||
}
|
||||
/// assert that the given key/value pair is in the dictionary (`required=true`),
|
||||
/// or the key is not present at all (`required=false`)
|
||||
pub fn expect(&self, typ: &'static str, key: &str, value: &str, required: bool) -> Result<()> {
|
||||
match self.dict.get(key) {
|
||||
Some(ty) => {
|
||||
let ty = ty.as_name()?;
|
||||
if ty != value {
|
||||
Err(PdfError::KeyValueMismatch {
|
||||
key: key.into(),
|
||||
value: value.into(),
|
||||
found: ty.into()
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
None if required => Err(PdfError::MissingEntry { typ, field: key.into() }),
|
||||
None => Ok(())
|
||||
}
|
||||
}
|
||||
pub fn append(&mut self, other: Dictionary) {
|
||||
self.dict.extend(other.dict);
|
||||
}
|
||||
}
|
||||
impl DataSize for Dictionary {
|
||||
const IS_DYNAMIC: bool = true;
|
||||
const STATIC_HEAP_SIZE: usize = std::mem::size_of::<Self>();
|
||||
fn estimate_heap_size(&self) -> usize {
|
||||
self.iter().map(|(k, v)| 16 + k.estimate_heap_size() + v.estimate_heap_size()).sum()
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for Dictionary {
|
||||
fn to_primitive(&self, _update: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::Dictionary(self.clone()))
|
||||
}
|
||||
}
|
||||
impl DeepClone for Dictionary {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
Ok(Dictionary {
|
||||
dict: self.dict.iter()
|
||||
.map(|(key, value)| Ok((key.clone(), value.deep_clone(cloner)?)))
|
||||
.try_collect::<_, _, PdfError>()?
|
||||
})
|
||||
}
|
||||
}
|
||||
impl Deref for Dictionary {
|
||||
type Target = IndexMap<Name, Primitive>;
|
||||
fn deref(&self) -> &IndexMap<Name, Primitive> {
|
||||
&self.dict
|
||||
}
|
||||
}
|
||||
impl Dictionary {
|
||||
fn serialize(&self, out: &mut impl io::Write) -> Result<()> {
|
||||
writeln!(out, "<<")?;
|
||||
for (key, val) in self.iter() {
|
||||
write!(out, "{} ", key)?;
|
||||
val.serialize(out)?;
|
||||
writeln!(out)?;
|
||||
}
|
||||
writeln!(out, ">>")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl fmt::Debug for Dictionary {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "{{")?;
|
||||
for (k, v) in self {
|
||||
writeln!(f, "{:>15}: {}", k, v)?;
|
||||
}
|
||||
write!(f, "}}")
|
||||
}
|
||||
}
|
||||
impl fmt::Display for Dictionary {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "<{}>", self.iter().format_with(", ", |(k, v), f| f(&format_args!("{}={}", k, v))))
|
||||
}
|
||||
}
|
||||
impl<'a> Index<&'a str> for Dictionary {
|
||||
type Output = Primitive;
|
||||
fn index(&self, idx: &'a str) -> &Primitive {
|
||||
self.dict.index(idx)
|
||||
}
|
||||
}
|
||||
impl IntoIterator for Dictionary {
|
||||
type Item = (Name, Primitive);
|
||||
type IntoIter = indexmap::map::IntoIter<Name, Primitive>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.dict.into_iter()
|
||||
}
|
||||
}
|
||||
impl<'a> IntoIterator for &'a Dictionary {
|
||||
type Item = (&'a Name, &'a Primitive);
|
||||
type IntoIter = indexmap::map::Iter<'a, Name, Primitive>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.dict.iter()
|
||||
}
|
||||
}
|
||||
|
||||
/// Primitive Stream (as opposed to the higher-level `Stream`)
|
||||
#[derive(Clone, Debug, PartialEq, DataSize)]
|
||||
pub struct PdfStream {
|
||||
pub info: Dictionary,
|
||||
pub (crate) inner: StreamInner,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, DataSize)]
|
||||
pub enum StreamInner {
|
||||
InFile { id: PlainRef, file_range: Range<usize> },
|
||||
Pending { data: Arc<[u8]> },
|
||||
}
|
||||
impl Object for PdfStream {
|
||||
fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::Stream (stream) => Ok(stream),
|
||||
Primitive::Reference (r) => PdfStream::from_primitive(resolve.resolve(r)?, resolve),
|
||||
p => Err(PdfError::UnexpectedPrimitive {expected: "Stream", found: p.get_debug_name()})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for PdfStream {
|
||||
fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(self.clone().into())
|
||||
}
|
||||
}
|
||||
impl PdfStream {
|
||||
pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> {
|
||||
self.info.serialize(out)?;
|
||||
|
||||
writeln!(out, "stream")?;
|
||||
match self.inner {
|
||||
StreamInner::InFile { .. } => {
|
||||
unimplemented!()
|
||||
}
|
||||
StreamInner::Pending { ref data } => {
|
||||
out.write_all(data)?;
|
||||
}
|
||||
}
|
||||
writeln!(out, "\nendstream")?;
|
||||
Ok(())
|
||||
}
|
||||
pub fn raw_data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
|
||||
match self.inner {
|
||||
StreamInner::InFile { id, ref file_range } => resolve.stream_data(id, file_range.clone()),
|
||||
StreamInner::Pending { ref data } => Ok(data.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
impl DeepClone for PdfStream {
|
||||
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
|
||||
let data = match self.inner {
|
||||
StreamInner::InFile { id, ref file_range } => cloner.stream_data(id, file_range.clone())?,
|
||||
StreamInner::Pending { ref data } => data.clone()
|
||||
};
|
||||
Ok(PdfStream {
|
||||
info: self.info.deep_clone(cloner)?, inner: StreamInner::Pending { data }
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
macro_rules! unexpected_primitive {
|
||||
($expected:ident, $found:expr) => (
|
||||
Err(PdfError::UnexpectedPrimitive {
|
||||
expected: stringify!($expected),
|
||||
found: $found
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash, Debug, Ord, PartialOrd, DataSize)]
|
||||
pub struct Name(pub SmallString);
|
||||
impl Name {
|
||||
#[inline]
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
impl Deref for Name {
|
||||
type Target = str;
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
impl From<String> for Name {
|
||||
#[inline]
|
||||
fn from(s: String) -> Name {
|
||||
Name(s.into())
|
||||
}
|
||||
}
|
||||
impl From<SmallString> for Name {
|
||||
#[inline]
|
||||
fn from(s: SmallString) -> Name {
|
||||
Name(s)
|
||||
}
|
||||
}
|
||||
impl<'a> From<&'a str> for Name {
|
||||
#[inline]
|
||||
fn from(s: &'a str) -> Name {
|
||||
Name(s.into())
|
||||
}
|
||||
}
|
||||
impl PartialEq<str> for Name {
|
||||
#[inline]
|
||||
fn eq(&self, rhs: &str) -> bool {
|
||||
self.as_str() == rhs
|
||||
}
|
||||
}
|
||||
impl fmt::Display for Name {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "/{}", self.0)
|
||||
}
|
||||
}
|
||||
impl std::borrow::Borrow<str> for Name {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &str {
|
||||
self.0.as_str()
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn test_name() {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
let s = "Hello World!";
|
||||
let hasher = DefaultHasher::new();
|
||||
|
||||
fn hash(hasher: &DefaultHasher, value: impl Hash) -> u64 {
|
||||
let mut hasher = hasher.clone();
|
||||
value.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
assert_eq!(hash(&hasher, Name(s.into())), hash(&hasher, s));
|
||||
}
|
||||
|
||||
/// Primitive String type.
|
||||
#[derive(Clone, PartialEq, Eq, Hash, DataSize)]
|
||||
pub struct PdfString {
|
||||
pub data: IBytes,
|
||||
}
|
||||
impl fmt::Debug for PdfString {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
for &b in self.data.as_slice() {
|
||||
match b {
|
||||
b'"' => write!(f, "\\\"")?,
|
||||
b' ' ..= b'~' => write!(f, "{}", b as char)?,
|
||||
o @ 0 ..= 7 => write!(f, "\\{}", o)?,
|
||||
x => write!(f, "\\x{:02x}", x)?
|
||||
}
|
||||
}
|
||||
write!(f, "\"")
|
||||
}
|
||||
}
|
||||
impl Object for PdfString {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p {
|
||||
Primitive::String (string) => Ok(string),
|
||||
Primitive::Reference(id) => PdfString::from_primitive(r.resolve(id)?, &NoResolve),
|
||||
_ => unexpected_primitive!(String, p.get_debug_name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ObjectWrite for PdfString {
|
||||
fn to_primitive(&self, _update: &mut impl Updater) -> Result<Primitive> {
|
||||
Ok(Primitive::String(self.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl PdfString {
|
||||
pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> {
|
||||
if self.data.iter().any(|&b| b >= 0x80) {
|
||||
write!(out, "<")?;
|
||||
for &b in self.data.as_slice() {
|
||||
write!(out, "{:02x}", b)?;
|
||||
}
|
||||
write!(out, ">")?;
|
||||
} else {
|
||||
write!(out, r"(")?;
|
||||
for &b in self.data.as_slice() {
|
||||
match b {
|
||||
b'\\' | b'(' | b')' => write!(out, r"\")?,
|
||||
_ => ()
|
||||
}
|
||||
out.write_all(&[b])?;
|
||||
}
|
||||
write!(out, r")")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl AsRef<[u8]> for PdfString {
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
self.as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
impl PdfString {
|
||||
pub fn new(data: IBytes) -> PdfString {
|
||||
PdfString {
|
||||
data
|
||||
}
|
||||
}
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.data
|
||||
}
|
||||
pub fn into_bytes(self) -> IBytes {
|
||||
self.data
|
||||
}
|
||||
/// without encoding information the PdfString cannot be decoded into a String
|
||||
/// therefore only lossy decoding is possible replacing unknown characters.
|
||||
/// For decoding correctly see
|
||||
/// pdf_tools/src/lib.rs
|
||||
pub fn to_string_lossy(&self) -> String {
|
||||
if self.data.starts_with(&[0xfe, 0xff]) {
|
||||
crate::font::utf16be_to_string_lossy(&self.data[2..])
|
||||
}
|
||||
else {
|
||||
String::from_utf8_lossy(&self.data).into()
|
||||
}
|
||||
}
|
||||
/// without encoding information the PdfString cannot be sensibly decoded into a String
|
||||
/// converts to a Rust String but only works for valid UTF-8, UTF-16BE and ASCII characters
|
||||
/// if invalid bytes found an Error is returned
|
||||
pub fn to_string(&self) -> Result<String> {
|
||||
if self.data.starts_with(&[0xfe, 0xff]) {
|
||||
Ok(String::from(std::str::from_utf8(crate::font::utf16be_to_string(&self.data[2..])?.as_bytes())
|
||||
.map_err(|_| PdfError::Utf8Decode)?))
|
||||
}
|
||||
else {
|
||||
Ok(String::from(std::str::from_utf8(&self.data)
|
||||
.map_err(|_| PdfError::Utf8Decode)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> From<&'a str> for PdfString {
|
||||
fn from(value: &'a str) -> Self {
|
||||
PdfString { data: value.into() }
|
||||
}
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// Noticed some inconsistency here.. I think to_* and as_* should not take Resolve, and not accept
|
||||
// Reference. Only from_primitive() for the respective type resolves References.
|
||||
impl Primitive {
|
||||
/// For debugging / error messages: get the name of the variant
|
||||
pub fn get_debug_name(&self) -> &'static str {
|
||||
match *self {
|
||||
Primitive::Null => "Null",
|
||||
Primitive::Integer (..) => "Integer",
|
||||
Primitive::Number (..) => "Number",
|
||||
Primitive::Boolean (..) => "Boolean",
|
||||
Primitive::String (..) => "String",
|
||||
Primitive::Stream (..) => "Stream",
|
||||
Primitive::Dictionary (..) => "Dictionary",
|
||||
Primitive::Array (..) => "Array",
|
||||
Primitive::Reference (..) => "Reference",
|
||||
Primitive::Name (..) => "Name",
|
||||
}
|
||||
}
|
||||
/// resolve the primitive if it is a refernce, otherwise do nothing
|
||||
pub fn resolve(self, r: &impl Resolve) -> Result<Primitive> {
|
||||
match self {
|
||||
Primitive::Reference(id) => r.resolve(id),
|
||||
_ => Ok(self)
|
||||
}
|
||||
}
|
||||
pub fn as_integer(&self) -> Result<i32> {
|
||||
match *self {
|
||||
Primitive::Integer(n) => Ok(n),
|
||||
ref p => unexpected_primitive!(Integer, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_u8(&self) -> Result<u8> {
|
||||
match *self {
|
||||
Primitive::Integer(n) if (0..256).contains(&n) => Ok(n as u8),
|
||||
Primitive::Integer(_) => bail!("invalid integer"),
|
||||
ref p => unexpected_primitive!(Integer, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_u32(&self) -> Result<u32> {
|
||||
match *self {
|
||||
Primitive::Integer(n) if n >= 0 => Ok(n as u32),
|
||||
Primitive::Integer(_) => bail!("negative integer"),
|
||||
ref p => unexpected_primitive!(Integer, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_usize(&self) -> Result<usize> {
|
||||
match *self {
|
||||
Primitive::Integer(n) if n >= 0 => Ok(n as usize),
|
||||
Primitive::Integer(_) => bail!("negative integer"),
|
||||
ref p => unexpected_primitive!(Integer, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_number(&self) -> Result<f32> {
|
||||
match *self {
|
||||
Primitive::Integer(n) => Ok(n as f32),
|
||||
Primitive::Number(f) => Ok(f),
|
||||
ref p => unexpected_primitive!(Number, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_bool(&self) -> Result<bool> {
|
||||
match *self {
|
||||
Primitive::Boolean (b) => Ok(b),
|
||||
ref p => unexpected_primitive!(Number, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_name(&self) -> Result<&str> {
|
||||
match self {
|
||||
Primitive::Name(ref name) => Ok(name.as_str()),
|
||||
p => unexpected_primitive!(Name, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_string(&self) -> Result<&PdfString> {
|
||||
match self {
|
||||
Primitive::String(ref data) => Ok(data),
|
||||
p => unexpected_primitive!(String, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn as_array(&self) -> Result<&[Primitive]> {
|
||||
match self {
|
||||
Primitive::Array(ref v) => Ok(v),
|
||||
p => unexpected_primitive!(Array, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn into_reference(self) -> Result<PlainRef> {
|
||||
match self {
|
||||
Primitive::Reference(id) => Ok(id),
|
||||
p => unexpected_primitive!(Reference, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn into_array(self) -> Result<Vec<Primitive>> {
|
||||
match self {
|
||||
Primitive::Array(v) => Ok(v),
|
||||
p => unexpected_primitive!(Array, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn into_dictionary(self) -> Result<Dictionary> {
|
||||
match self {
|
||||
Primitive::Dictionary(dict) => Ok(dict),
|
||||
p => unexpected_primitive!(Dictionary, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn into_name(self) -> Result<Name> {
|
||||
match self {
|
||||
Primitive::Name(name) => Ok(Name(name)),
|
||||
p => unexpected_primitive!(Name, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn into_string(self) -> Result<PdfString> {
|
||||
match self {
|
||||
Primitive::String(data) => Ok(data),
|
||||
p => unexpected_primitive!(String, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
pub fn to_string_lossy(&self) -> Result<String> {
|
||||
let s = self.as_string()?;
|
||||
Ok(s.to_string_lossy())
|
||||
}
|
||||
pub fn to_string(&self) -> Result<String> {
|
||||
let s = self.as_string()?;
|
||||
s.to_string()
|
||||
}
|
||||
pub fn into_stream(self, _r: &impl Resolve) -> Result<PdfStream> {
|
||||
match self {
|
||||
Primitive::Stream (s) => Ok(s),
|
||||
p => unexpected_primitive!(Stream, p.get_debug_name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i32> for Primitive {
|
||||
fn from(x: i32) -> Primitive {
|
||||
Primitive::Integer(x)
|
||||
}
|
||||
}
|
||||
impl From<f32> for Primitive {
|
||||
fn from(x: f32) -> Primitive {
|
||||
Primitive::Number(x)
|
||||
}
|
||||
}
|
||||
impl From<bool> for Primitive {
|
||||
fn from(x: bool) -> Primitive {
|
||||
Primitive::Boolean(x)
|
||||
}
|
||||
}
|
||||
impl From<Name> for Primitive {
|
||||
fn from(Name(s): Name) -> Primitive {
|
||||
Primitive::Name(s)
|
||||
}
|
||||
}
|
||||
impl From<PdfString> for Primitive {
|
||||
fn from(x: PdfString) -> Primitive {
|
||||
Primitive::String (x)
|
||||
}
|
||||
}
|
||||
impl From<PdfStream> for Primitive {
|
||||
fn from(x: PdfStream) -> Primitive {
|
||||
Primitive::Stream (x)
|
||||
}
|
||||
}
|
||||
impl From<Dictionary> for Primitive {
|
||||
fn from(x: Dictionary) -> Primitive {
|
||||
Primitive::Dictionary (x)
|
||||
}
|
||||
}
|
||||
impl From<Vec<Primitive>> for Primitive {
|
||||
fn from(x: Vec<Primitive>) -> Primitive {
|
||||
Primitive::Array (x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PlainRef> for Primitive {
|
||||
fn from(x: PlainRef) -> Primitive {
|
||||
Primitive::Reference (x)
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<f32> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<f32> {
|
||||
self.as_number()
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<i32> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<i32> {
|
||||
self.as_integer()
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<Name> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<Name> {
|
||||
match self {
|
||||
Primitive::Name(s) => Ok(Name(s.clone())),
|
||||
p => Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "Name",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<&'a [Primitive]> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<&'a [Primitive]> {
|
||||
self.as_array()
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<&'a [u8]> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<&'a [u8]> {
|
||||
match *self {
|
||||
Primitive::Name(ref s) => Ok(s.as_bytes()),
|
||||
Primitive::String(ref s) => Ok(s.as_bytes()),
|
||||
ref p => Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "Name or String",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<Cow<'a, str>> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<Cow<'a, str>> {
|
||||
match *self {
|
||||
Primitive::Name(ref s) => Ok(Cow::Borrowed(s)),
|
||||
Primitive::String(ref s) => Ok(Cow::Owned(s.to_string_lossy())),
|
||||
ref p => Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "Name or String",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> TryInto<String> for &'a Primitive {
|
||||
type Error = PdfError;
|
||||
fn try_into(self) -> Result<String> {
|
||||
match *self {
|
||||
Primitive::Name(ref s) => Ok(s.as_str().into()),
|
||||
Primitive::String(ref s) => Ok(s.to_string_lossy()),
|
||||
ref p => Err(PdfError::UnexpectedPrimitive {
|
||||
expected: "Name or String",
|
||||
found: p.get_debug_name()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_or<T: str::FromStr + Clone>(buffer: &str, range: Range<usize>, default: T) -> T {
|
||||
buffer.get(range)
|
||||
.map(|s| str::parse::<T>(s).unwrap_or_else(|_| default.clone()))
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Date {
|
||||
pub year: u16,
|
||||
pub month: u8,
|
||||
pub day: u8,
|
||||
pub hour: u8,
|
||||
pub minute: u8,
|
||||
pub second: u8,
|
||||
pub rel: TimeRel,
|
||||
pub tz_hour: u8,
|
||||
pub tz_minute: u8,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Copy, PartialEq, Eq)]
|
||||
pub enum TimeRel {
|
||||
Earlier,
|
||||
Later,
|
||||
Universal
|
||||
}
|
||||
datasize::non_dynamic_const_heap_size!(Date, std::mem::size_of::<Date>());
|
||||
|
||||
impl Object for Date {
|
||||
fn from_primitive(p: Primitive, r: &impl Resolve) -> Result<Self> {
|
||||
match p.resolve(r)? {
|
||||
Primitive::String (PdfString {data}) => {
|
||||
let s = str::from_utf8(&data)?;
|
||||
if s.starts_with("D:") {
|
||||
let year = match s.get(2..6) {
|
||||
Some(year) => {
|
||||
str::parse::<u16>(year)?
|
||||
}
|
||||
None => bail!("Missing obligatory year in date")
|
||||
};
|
||||
|
||||
let (time, rel, zone) = match s.find(['+', '-', 'Z']) {
|
||||
Some(p) => {
|
||||
let rel = match &s[p..p+1] {
|
||||
"-" => TimeRel::Earlier,
|
||||
"+" => TimeRel::Later,
|
||||
"Z" => TimeRel::Universal,
|
||||
_ => unreachable!()
|
||||
};
|
||||
(&s[..p], rel, &s[p+1..])
|
||||
}
|
||||
None => (s, TimeRel::Universal, "")
|
||||
};
|
||||
|
||||
let month = parse_or(time, 6..8, 1);
|
||||
let day = parse_or(time, 8..10, 1);
|
||||
let hour = parse_or(time, 10..12, 0);
|
||||
let minute = parse_or(time, 12..14, 0);
|
||||
let second = parse_or(time, 14..16, 0);
|
||||
let tz_hour = parse_or(zone, 0..2, 0);
|
||||
let tz_minute = parse_or(zone, 3..5, 0);
|
||||
|
||||
Ok(Date {
|
||||
year, month, day,
|
||||
hour, minute, second,
|
||||
tz_hour, tz_minute,
|
||||
rel
|
||||
})
|
||||
} else {
|
||||
bail!("Failed parsing date");
|
||||
}
|
||||
}
|
||||
p => unexpected_primitive!(String, p.get_debug_name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectWrite for Date {
|
||||
fn to_primitive(&self, _update: &mut impl Updater) -> Result<Primitive> {
|
||||
let Date {
|
||||
year, month, day,
|
||||
hour, minute, second,
|
||||
tz_hour, tz_minute, rel,
|
||||
} = *self;
|
||||
if year > 9999 || day > 99 || hour > 23 || minute >= 60 || second >= 60 || tz_hour >= 24 || tz_minute >= 60 {
|
||||
bail!("not a valid date");
|
||||
}
|
||||
let o = match rel {
|
||||
TimeRel::Earlier => "-",
|
||||
TimeRel::Later => "+",
|
||||
TimeRel::Universal => "Z"
|
||||
};
|
||||
|
||||
let s = format!("D:{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}{o}{tz_hour:02}'{tz_minute:02}");
|
||||
Ok(Primitive::String(PdfString { data: s.into() }))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{primitive::{PdfString, TimeRel}, object::{NoResolve, Object}};
|
||||
|
||||
use super::Date;
|
||||
#[test]
|
||||
fn utf16be_string() {
|
||||
let s = PdfString::new([0xfe, 0xff, 0x20, 0x09].as_slice().into());
|
||||
assert_eq!(s.to_string_lossy(), "\u{2009}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf16be_invalid_string() {
|
||||
let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34].as_slice().into());
|
||||
let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER);
|
||||
assert_eq!(s.to_string_lossy(), repl_ch);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf16be_invalid_bytelen() {
|
||||
let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34, 0x20].as_slice().into());
|
||||
let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER);
|
||||
assert_eq!(s.to_string_lossy(), repl_ch);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pdfstring_lossy_vs_ascii() {
|
||||
// verify UTF-16-BE fails on invalid
|
||||
let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34].as_slice().into());
|
||||
assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode
|
||||
// verify UTF-16-BE supports umlauts
|
||||
let s = PdfString::new([0xfe, 0xff, 0x00, 0xe4 /*ä*/].as_slice().into());
|
||||
assert_eq!(s.to_string_lossy(), "ä");
|
||||
assert_eq!(s.to_string().unwrap(), "ä");
|
||||
// verify valid UTF-8 bytestream with umlaut works
|
||||
let s = PdfString::new([b'm', b'i', b't', 0xc3, 0xa4 /*ä*/].as_slice().into());
|
||||
assert_eq!(s.to_string_lossy(), "mitä");
|
||||
assert_eq!(s.to_string().unwrap(), "mitä");
|
||||
// verify valid ISO-8859-1 bytestream with umlaut fails
|
||||
let s = PdfString::new([b'm', b'i', b't', 0xe4/*ä in latin1*/].as_slice().into());
|
||||
let repl_ch = ['m', 'i', 't', std::char::REPLACEMENT_CHARACTER].iter().collect::<String>();
|
||||
assert_eq!(s.to_string_lossy(), repl_ch);
|
||||
assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date() {
|
||||
let p = PdfString::from("D:199812231952-08'00");
|
||||
let d = Date::from_primitive(p.into(), &NoResolve);
|
||||
|
||||
let d2 = Date {
|
||||
year: 1998,
|
||||
month: 12,
|
||||
day: 23,
|
||||
hour: 19,
|
||||
minute: 52,
|
||||
second: 00,
|
||||
rel: TimeRel::Earlier,
|
||||
tz_hour: 8,
|
||||
tz_minute: 0
|
||||
};
|
||||
assert_eq!(d.unwrap(), d2);
|
||||
}
|
||||
}
|
||||
42
src-pdfrs/pdf/src/repair.rs
Normal file
42
src-pdfrs/pdf/src/repair.rs
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
fn build_xref_table() {
|
||||
warn!("can't read xref table: {:?}", e);
|
||||
let start_offset = t!(backend.locate_start_offset());
|
||||
let mut lexer = Lexer::new(t!(backend.read(..)));
|
||||
let mut objects = Vec::new();
|
||||
|
||||
(|| -> Result<()> { loop {
|
||||
let offset = lexer.get_pos();
|
||||
let w1 = t!(lexer.next());
|
||||
let w2 = t!(lexer.next());
|
||||
let w3 = t!(lexer.next_expect("obj"));
|
||||
try_opt!(lexer.seek_substr("endobj"));
|
||||
|
||||
objects.push((t!(w1.to::<ObjNr>()), t!(w2.to::<GenNr>()), offset));
|
||||
}})();
|
||||
|
||||
objects.sort_unstable();
|
||||
let mut first_id = objects.first().map(|&(n, _, _)| n).unwrap_or(0);
|
||||
let mut last_id = objects.last().map(|&(n, _, _)| n).unwrap_or(0);
|
||||
let mut xref = XRefTable::new(1 + last_id - first_id);
|
||||
for &(obj_nr, gen_nr, offset) in objects.iter() {
|
||||
for n in first_id + 1 .. obj_nr {
|
||||
xref.push(XRef::Free { next_obj_nr: obj_nr, gen_nr: 0 });
|
||||
}
|
||||
if obj_nr == last_id {
|
||||
warn!("duplicate obj_nr {}", obj_nr);
|
||||
continue;
|
||||
}
|
||||
xref.push(XRef::Raw {
|
||||
pos: offset - start_offset,
|
||||
gen_nr
|
||||
});
|
||||
last_id = obj_nr;
|
||||
}
|
||||
|
||||
return t!(Err(e));
|
||||
}
|
||||
|
||||
fn build_catalog() {
|
||||
|
||||
}
|
||||
237
src-pdfrs/pdf/src/xref.rs
Normal file
237
src-pdfrs/pdf/src/xref.rs
Normal file
@ -0,0 +1,237 @@
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use crate::error::*;
|
||||
use crate::object::*;
|
||||
use crate as pdf;
|
||||
use datasize::DataSize;
|
||||
|
||||
///////////////////////////
|
||||
// Cross-reference table //
|
||||
///////////////////////////
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum XRef {
|
||||
/// Not currently used.
|
||||
Free {
|
||||
next_obj_nr: ObjNr,
|
||||
gen_nr: GenNr
|
||||
},
|
||||
|
||||
/// In use.
|
||||
Raw {
|
||||
pos: usize,
|
||||
gen_nr: GenNr
|
||||
},
|
||||
/// In use and compressed inside an Object Stream
|
||||
Stream {
|
||||
stream_id: ObjNr,
|
||||
index: usize,
|
||||
},
|
||||
|
||||
Promised,
|
||||
|
||||
Invalid
|
||||
}
|
||||
|
||||
impl XRef {
|
||||
pub fn get_gen_nr(&self) -> GenNr {
|
||||
match *self {
|
||||
XRef::Free {gen_nr, ..}
|
||||
| XRef::Raw {gen_nr, ..} => gen_nr,
|
||||
XRef::Stream { .. } => 0, // TODO I think these always have gen nr 0?
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Runtime lookup table of all objects
|
||||
#[derive(Clone)]
|
||||
pub struct XRefTable {
|
||||
// None means that it's not specified, and should result in an error if used
|
||||
// Thought: None could also mean Free?
|
||||
entries: Vec<XRef>
|
||||
}
|
||||
|
||||
|
||||
impl XRefTable {
|
||||
pub fn new(num_objects: ObjNr) -> XRefTable {
|
||||
let mut entries = Vec::new();
|
||||
entries.resize(num_objects as usize, XRef::Invalid);
|
||||
entries.push(XRef::Free { next_obj_nr: 0, gen_nr: 0xffff });
|
||||
XRefTable {
|
||||
entries,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item=u32> + '_ {
|
||||
self.entries.iter().enumerate()
|
||||
.filter(|(_, xref)| matches!(xref, XRef::Raw { .. } | XRef::Stream { .. } ))
|
||||
.map(|(i, _)| i as u32)
|
||||
}
|
||||
pub fn iter_real(&self) -> impl Iterator<Item=&XRef> + '_ {
|
||||
self.entries.iter()
|
||||
}
|
||||
|
||||
pub fn get(&self, id: ObjNr) -> Result<XRef> {
|
||||
match self.entries.get(id as usize) {
|
||||
Some(&entry) => Ok(entry),
|
||||
None => Err(PdfError::UnspecifiedXRefEntry {id}),
|
||||
}
|
||||
}
|
||||
pub fn set(&mut self, id: ObjNr, r: XRef) {
|
||||
self.entries[id as usize] = r;
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
pub fn push(&mut self, new_entry: XRef) {
|
||||
self.entries.push(new_entry);
|
||||
}
|
||||
pub fn num_entries(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
pub fn max_field_widths(&self) -> (u64, u64) {
|
||||
let mut max_a = 0;
|
||||
let mut max_b = 0;
|
||||
for &e in &self.entries {
|
||||
let (a, b) = match e {
|
||||
XRef::Raw { pos, gen_nr } => (pos as u64, gen_nr),
|
||||
XRef::Free { next_obj_nr, gen_nr } => (next_obj_nr, gen_nr),
|
||||
XRef::Stream { stream_id, index } => (stream_id, index as u64),
|
||||
_ => continue
|
||||
};
|
||||
max_a = max_a.max(a);
|
||||
max_b = max_b.max(b);
|
||||
}
|
||||
(max_a, max_b)
|
||||
}
|
||||
|
||||
pub fn add_entries_from(&mut self, section: XRefSection) -> Result<()> {
|
||||
for (i, &entry) in section.entries() {
|
||||
if let Some(dst) = self.entries.get_mut(i) {
|
||||
// Early return if the entry we have has larger or equal generation number
|
||||
let should_be_updated = match *dst {
|
||||
XRef::Raw { gen_nr: gen, .. } | XRef::Free { gen_nr: gen, .. }
|
||||
=> entry.get_gen_nr() > gen,
|
||||
XRef::Stream { .. } | XRef::Invalid
|
||||
=> true,
|
||||
x => bail!("found {:?}", x)
|
||||
};
|
||||
if should_be_updated {
|
||||
*dst = entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_stream(&self, size: usize) -> Result<Stream<XRefInfo>> {
|
||||
let (max_a, max_b) = self.max_field_widths();
|
||||
let a_w = byte_len(max_a);
|
||||
let b_w = byte_len(max_b);
|
||||
|
||||
let mut data = Vec::with_capacity((1 + a_w + b_w) * size);
|
||||
for &x in self.entries.iter().take(size) {
|
||||
let (t, a, b) = match x {
|
||||
XRef::Free { next_obj_nr, gen_nr } => (0, next_obj_nr, gen_nr),
|
||||
XRef::Raw { pos, gen_nr } => (1, pos as u64, gen_nr),
|
||||
XRef::Stream { stream_id, index } => (2, stream_id, index as u64),
|
||||
x => bail!("invalid xref entry: {:?}", x)
|
||||
};
|
||||
data.push(t);
|
||||
data.extend_from_slice(&a.to_be_bytes()[8 - a_w ..]);
|
||||
data.extend_from_slice(&b.to_be_bytes()[8 - b_w ..]);
|
||||
}
|
||||
let info = XRefInfo {
|
||||
size: size as u32,
|
||||
index: vec![0, size as u32],
|
||||
prev: None,
|
||||
w: vec![1, a_w, b_w],
|
||||
};
|
||||
|
||||
Ok(Stream::new(info, data))
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_len(n: u64) -> usize {
|
||||
(64 + 8 - 1 - n.leading_zeros()) as usize / 8 + (n == 0) as usize
|
||||
}
|
||||
|
||||
impl Debug for XRefTable {
|
||||
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
|
||||
for (i, entry) in self.entries.iter().enumerate() {
|
||||
match *entry {
|
||||
XRef::Free {next_obj_nr, gen_nr} => {
|
||||
writeln!(f, "{:4}: {:010} {:05} f", i, next_obj_nr, gen_nr)?
|
||||
},
|
||||
XRef::Raw {pos, gen_nr} => {
|
||||
writeln!(f, "{:4}: {:010} {:05} n", i, pos, gen_nr)?
|
||||
},
|
||||
XRef::Stream {stream_id, index} => {
|
||||
writeln!(f, "{:4}: in stream {}, index {}", i, stream_id, index)?
|
||||
},
|
||||
XRef::Promised => {
|
||||
writeln!(f, "{:4}: Promised?", i)?
|
||||
},
|
||||
XRef::Invalid => {
|
||||
writeln!(f, "{:4}: Invalid!", i)?
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// As found in PDF files
|
||||
#[derive(Debug)]
|
||||
pub struct XRefSection {
|
||||
pub first_id: u32,
|
||||
pub entries: Vec<XRef>,
|
||||
}
|
||||
|
||||
|
||||
impl XRefSection {
|
||||
pub fn new(first_id: u32) -> XRefSection {
|
||||
XRefSection {
|
||||
first_id,
|
||||
entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
pub fn add_free_entry(&mut self, next_obj_nr: ObjNr, gen_nr: GenNr) {
|
||||
self.entries.push(XRef::Free{next_obj_nr, gen_nr});
|
||||
}
|
||||
pub fn add_inuse_entry(&mut self, pos: usize, gen_nr: GenNr) {
|
||||
self.entries.push(XRef::Raw{pos, gen_nr});
|
||||
}
|
||||
pub fn entries(&self) -> impl Iterator<Item=(usize, &XRef)> {
|
||||
self.entries.iter().enumerate().map(move |(i, e)| (i + self.first_id as usize, e))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Object, ObjectWrite, Debug, DataSize)]
|
||||
#[pdf(Type = "XRef")]
|
||||
pub struct XRefInfo {
|
||||
// XRefStream fields
|
||||
#[pdf(key = "Size")]
|
||||
pub size: u32,
|
||||
|
||||
//
|
||||
#[pdf(key = "Index", default = "vec![0, size]")]
|
||||
/// Array of pairs of integers for each subsection, (first object number, number of entries).
|
||||
/// Default value (assumed when None): `(0, self.size)`.
|
||||
pub index: Vec<u32>,
|
||||
|
||||
#[pdf(key = "Prev")]
|
||||
prev: Option<i32>,
|
||||
|
||||
#[pdf(key = "W")]
|
||||
pub w: Vec<usize>,
|
||||
}
|
||||
|
||||
// read_xref_table
|
||||
// read_xref_stream
|
||||
// read_xref_and_trailer_at
|
||||
126
src-pdfrs/pdf/tests/integration.rs
Normal file
126
src-pdfrs/pdf/tests/integration.rs
Normal file
@ -0,0 +1,126 @@
|
||||
use std::str;
|
||||
use std::path::{Path, PathBuf};
|
||||
use pdf::file::FileOptions;
|
||||
use pdf::object::*;
|
||||
use pdf::parser::{parse, ParseFlags};
|
||||
use glob::glob;
|
||||
|
||||
macro_rules! run {
|
||||
($e:expr) => (
|
||||
match $e {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
panic!("{}", e);
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
fn files() -> PathBuf {
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap().join("files")
|
||||
}
|
||||
fn file_path(s: &str) -> PathBuf {
|
||||
files().join(s)
|
||||
}
|
||||
fn dir_pdfs(path: PathBuf) -> impl Iterator<Item=PathBuf> {
|
||||
path.read_dir().unwrap()
|
||||
.filter_map(|r| r.ok())
|
||||
.map(|e| e.path())
|
||||
.filter(|p| p.extension().map(|e| e == "pdf").unwrap_or(false))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_file() {
|
||||
let _ = run!(FileOptions::uncached().open(file_path("example.pdf")));
|
||||
#[cfg(all(feature = "mmap", feature = "cache"))]
|
||||
let _ = run!({
|
||||
use memmap2::Mmap;
|
||||
let file = std::fs::File::open(file_path!("example.pdf")).expect("can't open file");
|
||||
let mmap = unsafe { Mmap::map(&file).expect("can't mmap file") };
|
||||
FileOptions::cached().load(mmap)
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
#[test]
|
||||
fn read_pages() {
|
||||
for path in dir_pdfs(files()) {
|
||||
println!("\n == Now testing `{}` ==", path.to_str().unwrap());
|
||||
|
||||
let path = path.to_str().unwrap();
|
||||
let file = run!(FileOptions::cached().open(path));
|
||||
for i in 0 .. file.num_pages() {
|
||||
println!("Read page {}", i);
|
||||
let _ = file.get_page(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn user_password() {
|
||||
for path in dir_pdfs(file_path("password_protected")) {
|
||||
println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap());
|
||||
|
||||
let path = path.to_str().unwrap();
|
||||
let file = run!(FileOptions::uncached().password(b"userpassword").open(path));
|
||||
for i in 0 .. file.num_pages() {
|
||||
println!("\nRead page {}", i);
|
||||
let _ = file.get_page(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn owner_password() {
|
||||
for path in dir_pdfs(file_path("password_protected")) {
|
||||
println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap());
|
||||
|
||||
let path = path.to_str().unwrap();
|
||||
let file = run!(FileOptions::uncached().password(b"ownerpassword").open(path));
|
||||
for i in 0 .. file.num_pages() {
|
||||
println!("\nRead page {}", i);
|
||||
let _ = file.get_page(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test for invalid PDFs found by fuzzing.
|
||||
// We don't care if they give an Err or Ok, as long as they don't panic.
|
||||
#[cfg(feature="cache")]
|
||||
#[test]
|
||||
fn invalid_pdfs() {
|
||||
for path in dir_pdfs(file_path("invalid")) {
|
||||
let path = path.to_str().unwrap();
|
||||
println!("\n\n == Now testing `{}` ==\n", path);
|
||||
|
||||
match FileOptions::cached().open(path) {
|
||||
Ok(file) => {
|
||||
for i in 0 .. file.num_pages() {
|
||||
let _ = file.get_page(i);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="cache")]
|
||||
#[test]
|
||||
fn parse_objects_from_stream() {
|
||||
use pdf::object::NoResolve;
|
||||
let file = run!(FileOptions::cached().open(file_path("xelatex.pdf")));
|
||||
let resolver = file.resolver();
|
||||
|
||||
// .. we know that object 13 of that file is an ObjectStream
|
||||
let obj_stream: RcRef<ObjectStream> = run!(resolver.get(Ref::new(PlainRef {id: 13, gen: 0})));
|
||||
for i in 0..obj_stream.n_objects() {
|
||||
let (data, range) = run!(obj_stream.get_object_slice(i, &resolver));
|
||||
let slice = &data[range];
|
||||
println!("Object slice #{}: {}\n", i, str::from_utf8(slice).unwrap());
|
||||
run!(parse(slice, &NoResolve, ParseFlags::ANY));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO test decoding
|
||||
35
src-pdfrs/pdf/tests/write.rs
Normal file
35
src-pdfrs/pdf/tests/write.rs
Normal file
@ -0,0 +1,35 @@
|
||||
// TODO: commented out to make it compile
|
||||
/*
|
||||
extern crate pdf;
|
||||
|
||||
use pdf::file::File;
|
||||
use pdf::types::*;
|
||||
use pdf::stream::ObjectStream;
|
||||
|
||||
fn main() {
|
||||
let mut file = File::new(Vec::new());
|
||||
|
||||
let page_tree_promise = file.promise();
|
||||
let mut page_tree = PageTree::root();
|
||||
let mut page = Page::new((&page_tree_promise).into());
|
||||
page.media_box = Some(Rect {
|
||||
left: 0.,
|
||||
right: 100.,
|
||||
top: 0.,
|
||||
bottom: 200.
|
||||
});
|
||||
|
||||
// create the content stream
|
||||
let content = ObjectStream::new(&mut file);
|
||||
|
||||
// add stream to file
|
||||
let content_ref = file.add(content);
|
||||
|
||||
page_tree.add(file.add(PagesNode::Page(page)).unwrap());
|
||||
|
||||
let catalog = Catalog::new(file.fulfill(page_tree_promise, page_tree).unwrap());
|
||||
|
||||
let catalog_ref = file.add(catalog).unwrap();
|
||||
file.finish(catalog_ref);
|
||||
}
|
||||
*/
|
||||
12
src-pdfrs/pdf/tests/xref.rs
Normal file
12
src-pdfrs/pdf/tests/xref.rs
Normal file
@ -0,0 +1,12 @@
|
||||
use pdf::file::FileOptions;
|
||||
|
||||
#[test]
|
||||
fn infinite_loop_invalid_file() {
|
||||
assert!(FileOptions::uncached().load(b"startxref%PDF-".as_ref()).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ending_angle_bracket() {
|
||||
assert!(FileOptions::uncached().load(b"%PDF-startxref>".as_ref()).is_err());
|
||||
assert!(FileOptions::uncached().load(b"%PDF-startxref<".as_ref()).is_err());
|
||||
}
|
||||
18
src-pdfrs/pdf_derive/Cargo.toml
Normal file
18
src-pdfrs/pdf_derive/Cargo.toml
Normal file
@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "pdf_derive"
|
||||
version = "0.2.0"
|
||||
authors = ["Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln <s3bk@protonmail.com>"]
|
||||
homepage = "https://github.com/pdf-rs"
|
||||
repository = "https://github.com/pdf-rs/pdf_derive"
|
||||
description = "helper for pdf-rs."
|
||||
license = "MIT"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
syn = { version = "2", features = ["full", "extra-traits"] }
|
||||
proc-macro2 = "1.0.24"
|
||||
quote = "1"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
845
src-pdfrs/pdf_derive/src/lib.rs
Normal file
845
src-pdfrs/pdf_derive/src/lib.rs
Normal file
@ -0,0 +1,845 @@
|
||||
//! `pdf_derive` provides a proc macro to derive the Object trait from the `pdf` crate.
|
||||
//! # Usage
|
||||
//! There are several ways to derive Object on a struct or enum:
|
||||
//! ## 1. Struct from PDF Dictionary
|
||||
//!
|
||||
//! A lot of dictionary types defined in the PDF 1.7 reference have a finite amount of possible
|
||||
//! fields. Each of these are usually either required or optional. The latter is achieved by using
|
||||
//! a `Option<T>` or `Vec<T>` as type of a field.
|
||||
//!
|
||||
//! Usually, dictionary types
|
||||
//! require that the entry `/Type` is some specific string. By default, `pdf_derive` assumes that
|
||||
//! this should equal the name of the input struct. This can be overridden by setting the `Type`
|
||||
//! attribute equal to either the expected value of the `/Type` entry, or to `false` in order to
|
||||
//! omit the type check completely.
|
||||
//!
|
||||
//! Check similar to that of `/Type` can also be specified in the same manner. (but the `Type`
|
||||
//! attribute is special because it accepts a bool).
|
||||
//!
|
||||
//! Examples:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! #[derive(Object)]
|
||||
//! #[pdf(Type="XObject", Subtype="Image")]
|
||||
//! /// A variant of XObject
|
||||
//! pub struct ImageDictionary {
|
||||
//! #[pdf(key="Width")]
|
||||
//! width: i32,
|
||||
//! #[pdf(key="Height")]
|
||||
//! height: i32,
|
||||
//! // [...]
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! This enforces that the dictionary's `/Type` entry is present and equals `/XObject`, and that the
|
||||
//! `/Subtype` entry is present and equals `/Image`.
|
||||
//!
|
||||
//! Each field in the struct needs to implement `Object`. Implementation is provided already for
|
||||
//! common types like i32, f32, usize, bool, String (from Primitive::Name), Option<T> and Vec<T>.
|
||||
//! The two latter are initialized to default if the entry isn't found in the input dictionary.
|
||||
//! Option<T> is therefore frequently used for fields that are optional according to the PDF
|
||||
//! reference. Vec<T> can also be used for optional fields that can also be arrays (there are quite
|
||||
//! a few of those in the PDF specs - one or many). However, as stated, it accepts absense of the
|
||||
//! entry, so **required** fields of type array aren't yet facilitated for.
|
||||
//!
|
||||
//! Lastly, for each field, it's possible to define a default value by setting the `default`
|
||||
//! attribute to a string that can parse as Rust code.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! #[derive(Object)]
|
||||
//! #[pdf(Type = "XRef")]
|
||||
//! pub struct XRefInfo {
|
||||
//! #[pdf(key = "Filter")]
|
||||
//! filter: Vec<StreamFilter>,
|
||||
//! #[pdf(key = "Size")]
|
||||
//! pub size: i32,
|
||||
//! #[pdf(key = "Index", default = "vec![0, size]")]
|
||||
//! pub index: Vec<i32>,
|
||||
//! // [...]
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//!
|
||||
//! ## 2. Struct from PDF Stream
|
||||
//! PDF Streams consist of a stream dictionary along with the stream itself. It is assumed that all
|
||||
//! structs that want to derive Object where the primitive it converts from is a stream,
|
||||
//! have a field `info: T`, where `T: Object`, and a field `data: Vec<u8>`.
|
||||
//!
|
||||
//! Deriving an Object that converts from Primitive::Stream, the flag `is_stream` is required in
|
||||
//! the proc macro attributes.
|
||||
//!
|
||||
//! ## 3. Enum from PDF Name
|
||||
//! Example:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! #[derive(Object, Debug)]
|
||||
//! pub enum StreamFilter {
|
||||
//! ASCIIHexDecode,
|
||||
//! ASCII85Decode,
|
||||
//! LZWDecode,
|
||||
//! FlateDecode,
|
||||
//! JPXDecode,
|
||||
//! DCTDecode,
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! In this case, `StreamFilter::from_primitive(primitive)` will return Ok(_) only if the primitive
|
||||
//! is `Primitive::Name` and matches one of the enum variants
|
||||
#![recursion_limit="128"]
|
||||
|
||||
extern crate proc_macro;
|
||||
extern crate syn;
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
|
||||
use proc_macro::{TokenStream};
|
||||
use proc_macro2::{TokenStream as TokenStream2, Span};
|
||||
use syn::*;
|
||||
type SynStream = TokenStream2;
|
||||
|
||||
// Debugging:
|
||||
/*
|
||||
use std::fs::{OpenOptions};
|
||||
use std::io::Write;
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#[proc_macro_derive(Object, attributes(pdf))]
|
||||
pub fn object(input: TokenStream) -> TokenStream {
|
||||
let ast = parse_macro_input!(input as DeriveInput);
|
||||
|
||||
// Build the impl
|
||||
impl_object(&ast)
|
||||
}
|
||||
|
||||
#[proc_macro_derive(ObjectWrite, attributes(pdf))]
|
||||
pub fn objectwrite(input: TokenStream) -> TokenStream {
|
||||
let ast = parse_macro_input!(input as DeriveInput);
|
||||
|
||||
// Build the impl
|
||||
impl_objectwrite(&ast)
|
||||
}
|
||||
|
||||
#[proc_macro_derive(DeepClone, attributes(pdf))]
|
||||
pub fn deepclone(input: TokenStream) -> TokenStream {
|
||||
let ast = parse_macro_input!(input as DeriveInput);
|
||||
|
||||
// Build the impl
|
||||
impl_deepclone(&ast)
|
||||
}
|
||||
|
||||
|
||||
#[derive(Default)]
|
||||
struct FieldAttrs {
|
||||
key: Option<LitStr>,
|
||||
default: Option<LitStr>,
|
||||
name: Option<LitStr>,
|
||||
skip: bool,
|
||||
other: bool,
|
||||
indirect: bool,
|
||||
}
|
||||
impl FieldAttrs {
|
||||
fn new() -> FieldAttrs {
|
||||
FieldAttrs {
|
||||
key: None,
|
||||
default: None,
|
||||
name: None,
|
||||
skip: false,
|
||||
other: false,
|
||||
indirect: false,
|
||||
}
|
||||
}
|
||||
fn key(&self) -> &LitStr {
|
||||
self.key.as_ref().expect("no 'key' in field attributes")
|
||||
}
|
||||
fn default(&self) -> Option<Expr> {
|
||||
self.default.as_ref().map(|s| parse_str(&s.value()).expect("can't parse `default` as EXPR"))
|
||||
}
|
||||
fn parse(list: &[Attribute]) -> FieldAttrs {
|
||||
let mut attrs = FieldAttrs::new();
|
||||
for attr in list.iter().filter(|attr| attr.path().is_ident("pdf")) {
|
||||
attr.parse_nested_meta(|meta| {
|
||||
if meta.path.is_ident("key") {
|
||||
let value = meta.value()?;
|
||||
attrs.key = Some(value.parse()?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if meta.path.is_ident("default") {
|
||||
let value = meta.value()?;
|
||||
attrs.default = Some(value.parse()?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if meta.path.is_ident("name") {
|
||||
let value = meta.value()?;
|
||||
attrs.name = Some(value.parse()?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if meta.path.is_ident("skip") {
|
||||
attrs.skip = true;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if meta.path.is_ident("other") {
|
||||
attrs.other = true;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if meta.path.is_ident("indirect") {
|
||||
attrs.indirect = true;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(meta.error("unsupported key"))
|
||||
}).expect("parse error");
|
||||
}
|
||||
attrs
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Just the attributes for the whole struct
|
||||
#[derive(Default, Debug)]
|
||||
struct GlobalAttrs {
|
||||
/// List of checks to do in the dictionary (LHS is the key, RHS is the expected value)
|
||||
checks: Vec<(String, String)>,
|
||||
type_name: Option<String>,
|
||||
type_required: bool,
|
||||
is_stream: bool
|
||||
}
|
||||
impl GlobalAttrs {
|
||||
/// The PDF type may be explicitly specified as an attribute with type "Type". Else, it is the name
|
||||
/// of the struct.
|
||||
fn from_ast(ast: &DeriveInput) -> GlobalAttrs {
|
||||
let mut attrs = GlobalAttrs::default();
|
||||
|
||||
for attr in ast.attrs.iter().filter(|attr| attr.path().is_ident("pdf")) {
|
||||
attr.parse_nested_meta(|meta| {
|
||||
if meta.path.is_ident("Type") {
|
||||
let value = meta.value()?;
|
||||
let lit = value.parse()?;
|
||||
match lit {
|
||||
Lit::Str(ref value) => {
|
||||
let mut value = value.value();
|
||||
attrs.type_required = if value.ends_with('?') {
|
||||
value.pop(); // remove '?'
|
||||
false
|
||||
} else {
|
||||
true
|
||||
};
|
||||
attrs.type_name = Some(value);
|
||||
},
|
||||
_ => panic!("Value of 'Type' attribute must be a String."),
|
||||
};
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
if meta.path.is_ident("is_stream") {
|
||||
attrs.is_stream = true;
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
if let Ok(value) = meta.value() {
|
||||
let path = &meta.path;
|
||||
let lit = value.parse()?;
|
||||
match lit {
|
||||
Lit::Str(ref value) => {
|
||||
let segments = path.segments
|
||||
.iter()
|
||||
.map(|s| s.ident.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join("::");
|
||||
attrs.checks.push((segments, value.value()));
|
||||
}
|
||||
_ => panic!("Other checks must have RHS String."),
|
||||
};
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}).expect("error with global attrs parsing");
|
||||
}
|
||||
|
||||
attrs
|
||||
}
|
||||
}
|
||||
|
||||
fn impl_object(ast: &DeriveInput) -> TokenStream {
|
||||
let attrs = GlobalAttrs::from_ast(ast);
|
||||
match (attrs.is_stream, &ast.data) {
|
||||
(true, Data::Struct(ref data)) => impl_object_for_stream(ast, &data.fields).into(),
|
||||
(false, Data::Struct(ref data)) => impl_object_for_struct(ast, &data.fields).into(),
|
||||
(true, Data::Enum(ref variants)) => impl_enum_from_stream(ast, variants, &attrs).into(),
|
||||
(false, Data::Enum(ref variants)) => impl_object_for_enum(ast, variants).into(),
|
||||
(_, _) => unimplemented!()
|
||||
}
|
||||
}
|
||||
fn impl_objectwrite(ast: &DeriveInput) -> TokenStream {
|
||||
let attrs = GlobalAttrs::from_ast(ast);
|
||||
match (attrs.is_stream, &ast.data) {
|
||||
(false, Data::Struct(ref data)) => impl_objectwrite_for_struct(ast, &data.fields).into(),
|
||||
(false, Data::Enum(ref variants)) => impl_objectwrite_for_enum(ast, variants).into(),
|
||||
(_, _) => unimplemented!()
|
||||
}
|
||||
}
|
||||
fn impl_deepclone(ast: &DeriveInput) -> TokenStream {
|
||||
let attrs = GlobalAttrs::from_ast(ast);
|
||||
match &ast.data {
|
||||
Data::Struct(ref data) => impl_deepclone_for_struct(ast, &data.fields).into(),
|
||||
Data::Enum(ref variants) => impl_deepclone_for_enum(ast, variants).into(),
|
||||
_ => unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
fn enum_pairs(ast: &DeriveInput, data: &DataEnum) -> (Vec<(String, TokenStream2)>, Option<TokenStream2>) {
|
||||
let id = &ast.ident;
|
||||
|
||||
let mut pairs = Vec::with_capacity(data.variants.len());
|
||||
let mut other = None;
|
||||
|
||||
for var in data.variants.iter() {
|
||||
let attrs = FieldAttrs::parse(&var.attrs);
|
||||
let var_ident = &var.ident;
|
||||
let name = attrs
|
||||
.name
|
||||
.map(|lit| lit.value())
|
||||
.unwrap_or_else(|| var_ident.to_string());
|
||||
if attrs.other {
|
||||
assert!(other.is_none(), "only one 'other' variant is allowed in a name enum");
|
||||
|
||||
match &var.fields {
|
||||
Fields::Unnamed(fields) if fields.unnamed.len() == 1 => {}
|
||||
_ => {
|
||||
panic!(
|
||||
"the 'other' variant in a name enum should have exactly one unnamed field",
|
||||
);
|
||||
}
|
||||
}
|
||||
other = Some(quote! { #id::#var_ident });
|
||||
} else {
|
||||
pairs.push((name, quote! { #id::#var_ident }));
|
||||
}
|
||||
}
|
||||
|
||||
(pairs, other)
|
||||
}
|
||||
|
||||
|
||||
/// Accepts Name to construct enum
|
||||
fn impl_object_for_enum(ast: &DeriveInput, data: &DataEnum) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
|
||||
|
||||
let int_count = data.variants.iter().filter(|var| var.discriminant.is_some()).count();
|
||||
if int_count > 0 {
|
||||
assert_eq!(int_count, data.variants.len(), "either none or all variants can have a descriminant");
|
||||
|
||||
let parts = data.variants.iter().map(|var| {
|
||||
if let Some((_, Expr::Lit(ref lit_expr))) = var.discriminant {
|
||||
let var_ident = &var.ident;
|
||||
let pat = Pat::from(lit_expr.clone());
|
||||
quote! {
|
||||
#pat => Ok(#id::#var_ident)
|
||||
}
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
});
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::Object for #id #ty_generics #where_clause {
|
||||
fn from_primitive(p: pdf::primitive::Primitive, _resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
match p {
|
||||
pdf::primitive::Primitive::Integer(i) => {
|
||||
match i {
|
||||
#( #parts, )*
|
||||
_ => Err(pdf::error::PdfError::UnknownVariant { id: stringify!(#id), name: i.to_string() })
|
||||
}
|
||||
}
|
||||
_ => Err(pdf::error::PdfError::UnexpectedPrimitive { expected: "Integer", found: p.get_debug_name() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (pairs, other) = enum_pairs(ast, data);
|
||||
|
||||
let mut parts: Vec<_> = pairs
|
||||
.iter()
|
||||
.map(|(name, var)| {
|
||||
quote! {
|
||||
#name => Ok(#var)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(other_tokens) = other {
|
||||
parts.push(quote! {
|
||||
s => Ok(#other_tokens(s.to_string()))
|
||||
});
|
||||
} else {
|
||||
parts.push(quote! {
|
||||
s => Err(pdf::error::PdfError::UnknownVariant { id: stringify!(#id), name: s.to_string() })
|
||||
});
|
||||
}
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::Object for #id #ty_generics #where_clause {
|
||||
fn from_primitive(p: pdf::primitive::Primitive, _resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
match p {
|
||||
pdf::primitive::Primitive::Name(name) => {
|
||||
match name.as_str() {
|
||||
#( #parts, )*
|
||||
}
|
||||
}
|
||||
_ => Err(pdf::error::PdfError::UnexpectedPrimitive { expected: "Name", found: p.get_debug_name() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Accepts Name to construct enum
|
||||
fn impl_objectwrite_for_enum(ast: &DeriveInput, data: &DataEnum) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
|
||||
|
||||
let int_count = data.variants.iter().filter(|var| var.discriminant.is_some()).count();
|
||||
if int_count > 0 {
|
||||
assert_eq!(int_count, data.variants.len(), "either none or all variants can have a descriminant");
|
||||
|
||||
let parts = data.variants.iter().map(|var| {
|
||||
if let Some((_, ref expr)) = var.discriminant {
|
||||
let var_ident = &var.ident;
|
||||
quote! {
|
||||
#id::#var_ident => Ok(Primitive::Integer(#expr))
|
||||
}
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
});
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::ObjectWrite for #id #ty_generics #where_clause {
|
||||
fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result<Primitive> {
|
||||
match *self {
|
||||
#( #parts, )*
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (pairs, other) = enum_pairs(ast, data);
|
||||
|
||||
let mut ser_code: Vec<_> = pairs
|
||||
.iter()
|
||||
.map(|(name, var)| {
|
||||
quote! {
|
||||
#var => #name
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(other_tokens) = other {
|
||||
ser_code.push(quote! {
|
||||
#other_tokens(ref name) => name.as_str()
|
||||
});
|
||||
}
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::ObjectWrite for #id #ty_generics #where_clause {
|
||||
fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result<Primitive> {
|
||||
let name = match *self {
|
||||
#( #ser_code, )*
|
||||
};
|
||||
|
||||
Ok(Primitive::Name(name.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn impl_deepclone_for_enum(ast: &DeriveInput, data: &DataEnum) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
|
||||
|
||||
let parts = data.variants.iter().map(|var| {
|
||||
let var_ident = &var.ident;
|
||||
match var.fields {
|
||||
Fields::Unnamed(ref fields) => {
|
||||
let labels: Vec<Ident> = fields.unnamed.iter().enumerate().map(|(i, f)| {
|
||||
Ident::new(&format!("f_{i}"), Span::mixed_site())
|
||||
}).collect();
|
||||
quote! {
|
||||
#id::#var_ident( #( ref #labels, )* ) => Ok(#id::#var_ident( #( #labels.deep_clone(cloner)? ),* ))
|
||||
}
|
||||
}
|
||||
Fields::Named(ref fields) => {
|
||||
let names: Vec<_> = fields.named.iter().map(|f| f.ident.as_ref().unwrap()).collect();
|
||||
quote! {
|
||||
#id::#var_ident { #( ref #names ),* } => Ok(#id::#var_ident { #( #names: #names.deep_clone(cloner)? ),* })
|
||||
}
|
||||
}
|
||||
Fields::Unit => {
|
||||
quote! {
|
||||
#id::#var_ident => Ok(#id::#var_ident)
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::DeepClone for #id #ty_generics #where_clause {
|
||||
fn deep_clone(&self, cloner: &mut impl pdf::object::Cloner) -> Result<Self> {
|
||||
match *self {
|
||||
#( #parts, )*
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn impl_enum_from_stream(ast: &DeriveInput, data: &DataEnum, attrs: &GlobalAttrs) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
|
||||
|
||||
let ty_check = match (&attrs.type_name, attrs.type_required) {
|
||||
(Some(ref ty), required) => quote! {
|
||||
stream.info.expect(stringify!(#id), "Type", #ty, #required)?;
|
||||
},
|
||||
(None, _) => quote!{}
|
||||
};
|
||||
|
||||
let variants_code: Vec<_> = data.variants.iter().map(|var| {
|
||||
let attrs = FieldAttrs::parse(&var.attrs);
|
||||
let inner_ty = match var.fields {
|
||||
Fields::Unnamed(ref fields) => {
|
||||
assert_eq!(fields.unnamed.len(), 1, "all variants in a stream enum have to have exactly one unnamed field");
|
||||
fields.unnamed.first().unwrap().ty.clone()
|
||||
},
|
||||
_ => panic!("all variants in a stream enum have to have exactly one unnamed field")
|
||||
};
|
||||
let name = attrs.name.map(|lit| lit.value()).unwrap_or_else(|| var.ident.to_string());
|
||||
let variant_ident = &var.ident;
|
||||
quote! {
|
||||
#name => Ok(#id::#variant_ident ( #inner_ty::from_primitive(pdf::primitive::Primitive::Stream(stream), resolve)?))
|
||||
}
|
||||
}).collect();
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::Object for #id #ty_generics #where_clause {
|
||||
fn from_primitive(p: pdf::primitive::Primitive, resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
let mut stream = PdfStream::from_primitive(p, resolve)?;
|
||||
#ty_check
|
||||
|
||||
let subty = stream.info.get("Subtype")
|
||||
.ok_or(pdf::error::PdfError::MissingEntry { typ: stringify!(#id), field: "Subtype".into()})?
|
||||
.as_name()?;
|
||||
|
||||
match subty {
|
||||
#( #variants_code, )*
|
||||
s => Err(pdf::error::PdfError::UnknownVariant { id: stringify!(#id), name: s.into() })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
fn is_option(f: &Field) -> Option<Type> {
|
||||
match f.ty {
|
||||
Type::Path(ref p) => {
|
||||
let first = p.path.segments.first().unwrap();
|
||||
match first {
|
||||
PathSegment { ident, arguments: PathArguments::AngleBracketed(args) } if ident == "Option" => {
|
||||
match args.args.first().unwrap() {
|
||||
GenericArgument::Type(t) => Some(t.clone()),
|
||||
_ => panic!()
|
||||
}
|
||||
}
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
/// Accepts Dictionary to construct a struct
|
||||
fn impl_object_for_struct(ast: &DeriveInput, fields: &Fields) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let mut generics = ast.generics.clone();
|
||||
for g in generics.params.iter_mut() {
|
||||
if let GenericParam::Type(p) = g {
|
||||
p.bounds.push(
|
||||
parse_quote!(pdf::object::Object)
|
||||
);
|
||||
}
|
||||
}
|
||||
let (impl_generics, ty_generics, where_clause) = generics.split_for_impl();
|
||||
let attrs = GlobalAttrs::from_ast(ast);
|
||||
|
||||
///////////////////////
|
||||
let typ = id.to_string();
|
||||
let let_parts = fields.iter().map(|field| {
|
||||
|
||||
let name = &field.ident;
|
||||
let attrs = FieldAttrs::parse(&field.attrs);
|
||||
if attrs.skip {
|
||||
return quote! {}
|
||||
}
|
||||
if attrs.other {
|
||||
return quote! {
|
||||
let #name = dict;
|
||||
};
|
||||
}
|
||||
|
||||
let key = attrs.key();
|
||||
|
||||
let ty = field.ty.clone();
|
||||
if let Some(ref default) = attrs.default() {
|
||||
quote! {
|
||||
let #name = {
|
||||
let primitive: Option<pdf::primitive::Primitive>
|
||||
= dict.remove(#key);
|
||||
let x: #ty = match primitive {
|
||||
Some(primitive) => <#ty as pdf::object::Object>::from_primitive(primitive, resolve).map_err(|e|
|
||||
pdf::error::PdfError::FromPrimitive {
|
||||
typ: #typ,
|
||||
field: stringify!(#name),
|
||||
source: Box::new(e)
|
||||
})?,
|
||||
None => #default,
|
||||
};
|
||||
x
|
||||
};
|
||||
}
|
||||
} else {
|
||||
quote! {
|
||||
let #name = {
|
||||
match dict.remove(#key) {
|
||||
Some(primitive) =>
|
||||
match <#ty as pdf::object::Object>::from_primitive(primitive, resolve) {
|
||||
Ok(obj) => obj,
|
||||
Err(e) => return Err(pdf::error::PdfError::FromPrimitive {
|
||||
typ: stringify!(#ty),
|
||||
field: stringify!(#name),
|
||||
source: Box::new(e)
|
||||
})
|
||||
}
|
||||
None => // Try to construct T from Primitive::Null
|
||||
match <#ty as pdf::object::Object>::from_primitive(pdf::primitive::Primitive::Null, resolve) {
|
||||
Ok(obj) => obj,
|
||||
Err(_) => return Err(pdf::error::PdfError::MissingEntry {
|
||||
typ: #typ,
|
||||
field: String::from(stringify!(#name)),
|
||||
})
|
||||
},
|
||||
}
|
||||
// ^ By using Primitive::Null when we don't find the key, we allow 'optional'
|
||||
// types like Option and Vec to be constructed from non-existing values
|
||||
};
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let field_parts = fields.iter().map(|field| {
|
||||
let name = &field.ident;
|
||||
quote! { #name: #name, }
|
||||
});
|
||||
|
||||
let checks: Vec<_> = attrs.checks.iter().map(|(key, val)|
|
||||
quote! {
|
||||
dict.expect(#typ, #key, #val, true)?;
|
||||
}
|
||||
).collect();
|
||||
|
||||
let ty_check = match (&attrs.type_name, attrs.type_required) {
|
||||
(Some(ref ty), required) => quote! {
|
||||
dict.expect(#typ, "Type", #ty, #required)?;
|
||||
},
|
||||
(None, _) => quote!{}
|
||||
};
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::FromDict for #id #ty_generics #where_clause {
|
||||
fn from_dict(mut dict: pdf::primitive::Dictionary, resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
#ty_check
|
||||
#( #checks )*
|
||||
#( #let_parts )*
|
||||
Ok(#id {
|
||||
#( #field_parts )*
|
||||
})
|
||||
}
|
||||
}
|
||||
impl #impl_generics pdf::object::Object for #id #ty_generics #where_clause {
|
||||
fn from_primitive(p: pdf::primitive::Primitive, resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
let dict = pdf::primitive::Dictionary::from_primitive(p, resolve)?;
|
||||
<Self as pdf::object::FromDict>::from_dict(dict, resolve)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn impl_objectwrite_for_struct(ast: &DeriveInput, fields: &Fields) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let mut generics = ast.generics.clone();
|
||||
for g in generics.params.iter_mut() {
|
||||
if let GenericParam::Type(p) = g {
|
||||
p.bounds.push(
|
||||
parse_quote!(pdf::object::ObjectWrite)
|
||||
);
|
||||
}
|
||||
}
|
||||
let (impl_generics, ty_generics, where_clause) = generics.split_for_impl();
|
||||
let attrs = GlobalAttrs::from_ast(ast);
|
||||
|
||||
let parts: Vec<_> = fields.iter()
|
||||
.map(|field| {
|
||||
(field.ident.clone(), FieldAttrs::parse(&field.attrs), is_option(field))
|
||||
}).collect();
|
||||
|
||||
let fields_ser = parts.iter()
|
||||
.map( |(field, attrs, _opt)|
|
||||
if attrs.skip | attrs.other {
|
||||
quote!()
|
||||
} else {
|
||||
let key = attrs.key();
|
||||
let tr = if attrs.indirect {
|
||||
quote! {
|
||||
match val {
|
||||
pdf::primitive::Primitive::Reference(r) => val,
|
||||
p => updater.create(p)?.into(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
quote! { val }
|
||||
};
|
||||
|
||||
quote! {
|
||||
let val = pdf::object::ObjectWrite::to_primitive(&self.#field, updater)?;
|
||||
if !matches!(val, pdf::primitive::Primitive::Null) {
|
||||
let val2 = #tr;
|
||||
dict.insert(#key, val2);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
let checks_code = attrs.checks.iter().map(|(key, val)|
|
||||
quote! {
|
||||
dict.insert(#key, pdf::primitive::Primitive::Name(#val.into()));
|
||||
}
|
||||
);
|
||||
let pdf_type = match attrs.type_name {
|
||||
Some(ref name) => quote! {
|
||||
dict.insert("Type", pdf::primitive::Primitive::Name(#name.into()));
|
||||
},
|
||||
None => quote! {}
|
||||
};
|
||||
|
||||
let other = parts.iter().filter(|(field, attrs, _)| attrs.other).flat_map(|(field, _, _)| field).next();
|
||||
let init_dict = if let Some(other) = other {
|
||||
quote! {
|
||||
let mut dict = self.#other.clone();
|
||||
}
|
||||
} else {
|
||||
quote! {
|
||||
let mut dict = pdf::primitive::Dictionary::new();
|
||||
}
|
||||
};
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::ObjectWrite for #id #ty_generics #where_clause {
|
||||
fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result<pdf::primitive::Primitive> {
|
||||
pdf::object::ToDict::to_dict(self, update).map(pdf::primitive::Primitive::Dictionary)
|
||||
}
|
||||
}
|
||||
impl #impl_generics pdf::object::ToDict for #id #ty_generics #where_clause {
|
||||
fn to_dict(&self, updater: &mut impl pdf::object::Updater) -> Result<pdf::primitive::Dictionary> {
|
||||
#init_dict
|
||||
#pdf_type
|
||||
#( #checks_code )*
|
||||
#(#fields_ser)*
|
||||
Ok(dict)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn impl_deepclone_for_struct(ast: &DeriveInput, fields: &Fields) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let mut generics = ast.generics.clone();
|
||||
for g in generics.params.iter_mut() {
|
||||
if let GenericParam::Type(p) = g {
|
||||
p.bounds.push(
|
||||
parse_quote!(pdf::object::DeepClone)
|
||||
);
|
||||
}
|
||||
}
|
||||
let (impl_generics, mut ty_generics, where_clause) = generics.split_for_impl();
|
||||
|
||||
let parts: Vec<_> = fields.iter()
|
||||
.map(|field| {
|
||||
(field.ident.clone(), is_option(field))
|
||||
}).collect();
|
||||
|
||||
let field_parts = parts.iter()
|
||||
.map( |(field, _opt)|
|
||||
{
|
||||
quote! {
|
||||
#field: self.#field.deep_clone(cloner)?,
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::DeepClone for #id #ty_generics #where_clause {
|
||||
fn deep_clone(&self, cloner: &mut impl pdf::object::Cloner) -> Result<Self> {
|
||||
Ok(#id {
|
||||
#( #field_parts )*
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: must have info and dict (TODO explain in docs)
|
||||
fn impl_object_for_stream(ast: &DeriveInput, fields: &Fields) -> SynStream {
|
||||
let id = &ast.ident;
|
||||
let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
|
||||
|
||||
let info_ty = fields.iter()
|
||||
.filter_map(|field| {
|
||||
if let Some(ident) = field.ident.as_ref() {
|
||||
if ident == "info" {
|
||||
Some(field.ty.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}).next().unwrap();
|
||||
|
||||
quote! {
|
||||
impl #impl_generics pdf::object::Object for #id #ty_generics #where_clause {
|
||||
fn from_primitive(p: pdf::primitive::Primitive, resolve: &impl pdf::object::Resolve) -> pdf::error::Result<Self> {
|
||||
let pdf::primitive::PdfStream {info, data}
|
||||
= p.to_stream(resolve)?;
|
||||
|
||||
Ok(#id {
|
||||
info: <#info_ty as pdf::object::Object>::from_primitive(pdf::primitive::Primitive::Dictionary (info), resolve)?,
|
||||
data: data,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
7
src-tauri/.gitignore
vendored
Normal file
7
src-tauri/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
|
||||
# Generated by Tauri
|
||||
# will have schema files for capabilities auto-completion
|
||||
/gen/schemas
|
||||
5691
src-tauri/Cargo.lock
generated
Normal file
5691
src-tauri/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
29
src-tauri/Cargo.toml
Normal file
29
src-tauri/Cargo.toml
Normal file
@ -0,0 +1,29 @@
|
||||
[package]
|
||||
name = "pdf-forge"
|
||||
version = "0.1.0"
|
||||
description = "A Tauri App"
|
||||
authors = ["you"]
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[lib]
|
||||
# The `_lib` suffix may seem redundant but it is necessary
|
||||
# to make the lib name unique and wouldn't conflict with the bin name.
|
||||
# This seems to be only an issue on Windows, see https://github.com/rust-lang/cargo/issues/8519
|
||||
name = "pdf_forge_lib"
|
||||
crate-type = ["staticlib", "cdylib", "rlib"]
|
||||
|
||||
[build-dependencies]
|
||||
tauri-build = { version = "2", features = [] }
|
||||
|
||||
[dependencies]
|
||||
tauri = { version = "2", features = [] }
|
||||
tauri-plugin-opener = "2"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
pdf = { path = "../src-pdfrs/pdf", features = ["cache"] }
|
||||
tauri-plugin-fs = "2"
|
||||
tauri-plugin-dialog = "2"
|
||||
uuid = { version = "1.12.0", features = ["v4"] }
|
||||
|
||||
3
src-tauri/build.rs
Normal file
3
src-tauri/build.rs
Normal file
@ -0,0 +1,3 @@
|
||||
fn main() {
|
||||
tauri_build::build()
|
||||
}
|
||||
20
src-tauri/capabilities/default.json
Normal file
20
src-tauri/capabilities/default.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"$schema": "../gen/schemas/desktop-schema.json",
|
||||
"identifier": "default",
|
||||
"description": "Capability for the main window",
|
||||
"windows": [
|
||||
"main"
|
||||
],
|
||||
"permissions": [
|
||||
"core:default",
|
||||
"opener:default",
|
||||
"fs:default",
|
||||
"dialog:default",
|
||||
"core:window:default",
|
||||
"core:window:allow-start-dragging",
|
||||
"core:window:allow-close",
|
||||
"core:window:allow-minimize",
|
||||
"core:window:allow-toggle-maximize",
|
||||
"core:window:allow-internal-toggle-maximize"
|
||||
]
|
||||
}
|
||||
684
src-tauri/src/lib.rs
Normal file
684
src-tauri/src/lib.rs
Normal file
@ -0,0 +1,684 @@
|
||||
extern crate pdf;
|
||||
|
||||
use crate::pdf::object::Resolve;
|
||||
|
||||
use pdf::content::Op;
|
||||
use pdf::file::{File, FileOptions, NoLog, ObjectCache, StreamCache};
|
||||
use pdf::object::{InfoDict, Object, ObjectWrite, PlainRef};
|
||||
use pdf::primitive::Primitive;
|
||||
use pdf::xref::XRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fmt::format;
|
||||
use std::ops::DerefMut;
|
||||
use std::path::Path;
|
||||
use std::sync::{Mutex, MutexGuard};
|
||||
use tauri::{Manager, State};
|
||||
use uuid::Uuid;
|
||||
|
||||
type CosFile = File<Vec<u8>, ObjectCache, StreamCache, NoLog>;
|
||||
|
||||
macro_rules! t {
|
||||
($result:expr) => {{
|
||||
match $result {
|
||||
Ok(f) => f,
|
||||
Err(e) => return Err(e.to_string()),
|
||||
}
|
||||
}};
|
||||
}
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct XRefTableModel {
|
||||
pub size: usize,
|
||||
pub entries: Vec<XRefEntryModel>,
|
||||
}
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct XRefEntryModel {
|
||||
pub obj_num: u64,
|
||||
pub gen_num: u64,
|
||||
pub obj_type: String,
|
||||
pub offset: u64,
|
||||
}
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct PdfFile {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub path: String,
|
||||
pub page_count: u32,
|
||||
pub xref_entries: usize,
|
||||
pub pages: Vec<PageModel>,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct PrimitiveModel {
|
||||
pub key: String,
|
||||
pub ptype: String,
|
||||
pub sub_type: String,
|
||||
pub value: String,
|
||||
pub children: Vec<PrimitiveModel>,
|
||||
pub detail_path: Vec<DetailPathStep>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct DetailPathStep {
|
||||
pub key: String,
|
||||
pub last_jump: String,
|
||||
}
|
||||
impl DetailPathStep {
|
||||
fn new(key: String, last_jump: String) -> DetailPathStep {
|
||||
DetailPathStep { key, last_jump }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct PageModel {
|
||||
key: String,
|
||||
id: u64,
|
||||
}
|
||||
#[derive(Deserialize, Serialize, Debug, Clone)]
|
||||
pub struct TreeViewNode {
|
||||
key: String,
|
||||
children: Vec<TreeViewNode>,
|
||||
}
|
||||
|
||||
impl TreeViewNode {
|
||||
fn step(&self) -> Step {
|
||||
Step::parse_step(&self.key)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone)]
|
||||
pub struct ContentsModel {
|
||||
parts: Vec<Vec<String>>
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_all_files(session: State<Mutex<Session>>) -> Vec<PdfFile> {
|
||||
let files = &session.lock().unwrap().files;
|
||||
files
|
||||
.values()
|
||||
.map(|sf| sf.pdf_file.clone())
|
||||
.collect::<Vec<PdfFile>>()
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_all_file_ids(session: State<Mutex<Session>>) -> Vec<String> {
|
||||
let files = &session.lock().unwrap().files;
|
||||
files
|
||||
.values()
|
||||
.map(|sf| sf.pdf_file.id.clone())
|
||||
.collect::<Vec<String>>()
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn close_file(id: &str, session: State<Mutex<Session>>) {
|
||||
session.lock().unwrap().deref_mut().handle_close(&id);
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_file_by_id(id: &str, session: State<Mutex<Session>>) -> Result<PdfFile, String> {
|
||||
let session_guard = session
|
||||
.lock()
|
||||
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
|
||||
let file = &get_file_from_state(id, &session_guard)?;
|
||||
Ok(file.pdf_file.clone())
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn upload(path: &str, session: State<Mutex<Session>>) -> Result<String, String> {
|
||||
let file = t!(FileOptions::cached().open(path));
|
||||
|
||||
let pdf_file = to_pdf_file(path, &file)?;
|
||||
|
||||
session
|
||||
.lock()
|
||||
.unwrap()
|
||||
.deref_mut()
|
||||
.handle_upload(&pdf_file, file);
|
||||
|
||||
Ok(pdf_file.id.to_string())
|
||||
}
|
||||
|
||||
fn to_pdf_file(path: &str, file: &CosFile) -> Result<PdfFile, String> {
|
||||
|
||||
fn parse_title_from_path(path: &str) -> Option<String> {
|
||||
Path::new(path).file_name()
|
||||
.and_then(|f| f.to_str().map(|s| s.to_string()))
|
||||
}
|
||||
|
||||
let file_name = if let Some(ref info) = file.trailer.info_dict {
|
||||
info.title.as_ref().map(|p| p.to_string_lossy())
|
||||
.unwrap_or( parse_title_from_path(path)
|
||||
.unwrap_or_else(|| "Not found".to_string()))
|
||||
} else {
|
||||
"Not found".to_string()
|
||||
};
|
||||
|
||||
|
||||
let pages = file.pages().enumerate().map(|(i, page_ref)| PageModel { key: format!("Page {}", i + 1), id: page_ref.unwrap().get_ref().get_inner().id }).collect();
|
||||
|
||||
let pdf_file = PdfFile {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
name: file_name.to_string().into(),
|
||||
path: path.to_string().into(),
|
||||
page_count: file.num_pages(),
|
||||
xref_entries: file.get_xref().len(),
|
||||
pages: pages,
|
||||
};
|
||||
Ok(pdf_file)
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_contents(id: &str, path: &str, session: State<Mutex<Session>>) -> Result<ContentsModel, String> {
|
||||
let session_guard = session
|
||||
.lock()
|
||||
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
|
||||
let file = get_file_from_state(path, &session_guard)?;
|
||||
|
||||
let (_, page_prim, _) = get_prim_by_path_with_file(id, &file.cos_file)?;
|
||||
let resolver = file.cos_file.resolver();
|
||||
|
||||
let page = t!(pdf::object::Page::from_primitive(page_prim, &resolver));
|
||||
if let Some(contents) = page.contents {
|
||||
let mut parts = vec![];
|
||||
for part in contents.parts {
|
||||
let data = &t!(part.data(&resolver));
|
||||
let ops = t!(pdf::content::parse_ops(&data, &resolver));
|
||||
let part = t!(pdf::content::display_ops(&ops));
|
||||
parts.push(part);
|
||||
};
|
||||
return Ok(ContentsModel {parts});
|
||||
}
|
||||
Err(String::from("Error occurred"))
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_prim_by_path(
|
||||
id: &str,
|
||||
path: &str,
|
||||
session: State<Mutex<Session>>,
|
||||
) -> Result<PrimitiveModel, String> {
|
||||
let session_guard = session
|
||||
.lock()
|
||||
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
|
||||
let file = get_file_from_state(id, &session_guard)?;
|
||||
|
||||
get_prim_model_by_path_with_file(path, &file.cos_file)
|
||||
}
|
||||
fn get_prim_model_by_path_with_file(path: &str, file: &CosFile) -> Result<PrimitiveModel, String> {
|
||||
let (key, prim, detail_path) = get_prim_by_path_with_file(path, file)?;
|
||||
|
||||
Ok(PrimitiveModel::from_primitive_with_children(
|
||||
key,
|
||||
&prim,
|
||||
detail_path
|
||||
))
|
||||
}
|
||||
|
||||
fn get_prim_by_path_with_file(path: &str, file: &CosFile) -> Result<(String, Primitive, Vec<DetailPathStep>), String> {
|
||||
let mut steps = Step::parse(path);
|
||||
if steps.len() == 0 {
|
||||
return Err(String::from(format!("{} is not a valid path!", path)));
|
||||
}
|
||||
let mut step = steps.pop_front().unwrap();
|
||||
let mut parent = match step {
|
||||
Step::Number(obj_num) => resolve_xref(obj_num, file)?,
|
||||
Step::Trailer => retrieve_trailer(file),
|
||||
_ => return Err(String::from(format!("{} is not a valid path!", path))),
|
||||
};
|
||||
|
||||
let mut detail_path = vec![DetailPathStep::new(step.get_key(), step.get_key())];
|
||||
let mut last_jump = step.get_key();
|
||||
|
||||
let mut current_prim = &parent;
|
||||
while !steps.is_empty() {
|
||||
step = steps.pop_front().unwrap();
|
||||
|
||||
current_prim = resolve_step(¤t_prim, &step)?;
|
||||
if let Primitive::Reference(xref) = current_prim {
|
||||
last_jump = xref.id.to_string();
|
||||
parent = resolve_xref(xref.id, file)?;
|
||||
current_prim = &parent;
|
||||
}
|
||||
detail_path.push(DetailPathStep::new(step.get_key(), last_jump.clone()));
|
||||
}
|
||||
Ok((step.get_key(), current_prim.clone(), detail_path))
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_prim_tree_by_path(
|
||||
id: &str,
|
||||
path: TreeViewNode,
|
||||
session: State<Mutex<Session>>,
|
||||
) -> Result<PrimitiveModel, String> {
|
||||
let session_guard = session
|
||||
.lock()
|
||||
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
|
||||
let file = get_file_from_state(id, &session_guard)?;
|
||||
|
||||
get_prim_tree_by_path_with_file(path, &file.cos_file)
|
||||
}
|
||||
|
||||
fn get_prim_tree_by_path_with_file(
|
||||
node: TreeViewNode,
|
||||
file: &CosFile,
|
||||
) -> Result<PrimitiveModel, String> {
|
||||
let step = node.step();
|
||||
let parent = match step {
|
||||
Step::Number(obj_num) => resolve_xref(obj_num, file)?,
|
||||
Step::Trailer => retrieve_trailer(file),
|
||||
_ => return Err(String::from(format!("{:?} is not a valid path!", node))),
|
||||
};
|
||||
let path = vec![DetailPathStep::new(step.get_key(), step.get_key())];
|
||||
|
||||
let mut parent_model = PrimitiveModel::from_primitive_with_children(step.get_key(), &parent, path);
|
||||
for child in node.children.iter() {
|
||||
expand(child, &mut parent_model, &parent, file)?;
|
||||
}
|
||||
|
||||
Ok(parent_model)
|
||||
}
|
||||
|
||||
fn expand(
|
||||
node: &TreeViewNode,
|
||||
parent_model: &mut PrimitiveModel,
|
||||
parent: &Primitive,
|
||||
file: &CosFile,
|
||||
) -> Result<(), String> {
|
||||
let step = node.step();
|
||||
let prim = resolve_step(parent, &step)?;
|
||||
if let Primitive::Reference(x_ref) = prim {
|
||||
let jump = resolve_xref(x_ref.id, file)?;
|
||||
// parent_model.ptype = format!("{}-Reference", jump.get_debug_name());
|
||||
let mut to_expand = parent_model.get_child(step.get_key()).unwrap();
|
||||
to_expand.add_children(&jump, append_path_with_jump(step.get_key(), x_ref.id.to_string(), &to_expand.detail_path));
|
||||
expand_children(node, file, &jump, &mut to_expand)?;
|
||||
} else {
|
||||
let mut to_expand = parent_model.get_child(step.get_key()).unwrap();
|
||||
to_expand.add_children(prim, append_path(step.get_key(), &to_expand.detail_path));
|
||||
expand_children(node, file, prim, &mut to_expand)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn expand_children(
|
||||
node: &TreeViewNode,
|
||||
file: &CosFile,
|
||||
prim: &Primitive,
|
||||
mut expanded: &mut PrimitiveModel,
|
||||
) -> Result<(), String> {
|
||||
for child in node.children.iter() {
|
||||
expand(child, &mut expanded, prim, file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn resolve_step<'a>(current_prim: &'a Primitive, step: &Step) -> Result<&'a Primitive, String> {
|
||||
Ok(match step {
|
||||
Step::Number(index) => match current_prim {
|
||||
Primitive::Array(prim_array) => {
|
||||
let i = index.clone() as usize;
|
||||
if prim_array.len() <= i {
|
||||
return Err(String::from(format!(
|
||||
"{} index out of bounds!",
|
||||
step.get_key()
|
||||
)));
|
||||
}
|
||||
&prim_array[i]
|
||||
}
|
||||
p => {
|
||||
return Err(String::from(format!(
|
||||
"{} is not indexed with numbers!",
|
||||
p.get_debug_name()
|
||||
)))
|
||||
}
|
||||
},
|
||||
Step::String(key) => match current_prim {
|
||||
Primitive::Dictionary(dict) => match dict.get(key) {
|
||||
Some(prim) => prim,
|
||||
None => {
|
||||
return Err(String::from(format!(
|
||||
"Key {} does not exist in Dictionary!",
|
||||
key
|
||||
)))
|
||||
}
|
||||
},
|
||||
Primitive::Stream(stream) => match stream.info.get(key) {
|
||||
Some(prim) => prim,
|
||||
None => {
|
||||
return Err(String::from(format!(
|
||||
"Key {} does not exist in Info Dictionary!",
|
||||
key
|
||||
)))
|
||||
}
|
||||
},
|
||||
p => {
|
||||
return Err(String::from(format!(
|
||||
"{} has no String paths!",
|
||||
p.get_debug_name()
|
||||
)))
|
||||
}
|
||||
},
|
||||
Step::Data => return Err("Not implemented!".to_string()),
|
||||
_ => return Err(format!("Invalid Step: {}", step.get_key())),
|
||||
})
|
||||
}
|
||||
|
||||
fn retrieve_trailer(file: &CosFile) -> Primitive {
|
||||
let mut updater = FileOptions::uncached().storage();
|
||||
file.trailer.to_primitive(&mut updater).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Step {
|
||||
String(String),
|
||||
Number(u64),
|
||||
Trailer,
|
||||
Data,
|
||||
}
|
||||
|
||||
impl Step {
|
||||
fn parse_step(path: &str) -> Step {
|
||||
match &path.parse::<u64>().ok() {
|
||||
Some(i) => Step::Number(*i),
|
||||
None => match &path[..] {
|
||||
"Data" => Step::Data,
|
||||
"/" => Step::Trailer,
|
||||
_ => Step::String(path.to_string().clone()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(path: &str) -> VecDeque<Step> {
|
||||
let mut steps = VecDeque::new();
|
||||
|
||||
if path.starts_with("/") {
|
||||
steps.push_back(Step::Trailer);
|
||||
}
|
||||
let split_path = path.split("/").collect::<VecDeque<&str>>();
|
||||
for path_component in split_path {
|
||||
if path_component.len() == 0 {
|
||||
continue;
|
||||
}
|
||||
let step = match &path_component.parse::<u64>().ok() {
|
||||
Some(i) => Step::Number(*i),
|
||||
None => match path_component {
|
||||
"Data" => Step::Data,
|
||||
_ => Step::String(path_component.to_string().clone()),
|
||||
},
|
||||
};
|
||||
steps.push_back(step);
|
||||
}
|
||||
steps
|
||||
}
|
||||
fn get_key(&self) -> String {
|
||||
match self {
|
||||
Step::String(s) => s.clone(),
|
||||
Step::Number(i) => i.to_string(),
|
||||
Step::Trailer => "/".to_string(),
|
||||
Step::Data => "Data".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_xref(id: u64, file: &CosFile) -> Result<Primitive, String> {
|
||||
let plain_ref = PlainRef { id, gen: 0 };
|
||||
file.resolver()
|
||||
.resolve(plain_ref)
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
|
||||
fn get_file_from_state<'a>(
|
||||
id: &str,
|
||||
session_guard: &'a MutexGuard<Session>,
|
||||
) -> Result<&'a SessionFile, String> {
|
||||
session_guard
|
||||
.files
|
||||
.get(id)
|
||||
.ok_or_else(|| format!("File with id {} does not exist!", id))
|
||||
}
|
||||
|
||||
fn append_path_with_jump(key: String, last_jump: String, path: &Vec<DetailPathStep>) -> Vec<DetailPathStep> {
|
||||
let mut new_path = path.clone();
|
||||
new_path.push(DetailPathStep::new(key, last_jump));
|
||||
new_path
|
||||
}
|
||||
|
||||
fn append_path(key: String, path: &Vec<DetailPathStep>) -> Vec<DetailPathStep> {
|
||||
let mut new_path = path.clone();
|
||||
let last_jump = new_path.last().unwrap().last_jump.clone();
|
||||
new_path.push(DetailPathStep::new(key, last_jump));
|
||||
new_path
|
||||
}
|
||||
|
||||
|
||||
impl PrimitiveModel {
|
||||
fn from_primitive(key: String, primitive: &Primitive, path: Vec<DetailPathStep>) -> PrimitiveModel {
|
||||
let value: String = match primitive {
|
||||
Primitive::Null => "Null".to_string(),
|
||||
Primitive::Integer(i) => i.to_string(),
|
||||
Primitive::Number(f) => f.to_string(),
|
||||
Primitive::Boolean(b) => b.to_string(),
|
||||
Primitive::String(s) => s.to_string().unwrap_or(String::new()),
|
||||
Primitive::Stream(_) => "-".to_string(),
|
||||
Primitive::Dictionary(_) => "-".to_string(),
|
||||
Primitive::Array(arr) =>PrimitiveModel::format_arr_content(arr),
|
||||
Primitive::Reference(pref) => {
|
||||
format!("Obj Number: {} Gen Number: {}", pref.id, pref.gen)
|
||||
}
|
||||
|
||||
Primitive::Name(name) => name.clone().as_str().to_string(),
|
||||
};
|
||||
let sub_type: String = match primitive {
|
||||
Primitive::Dictionary(d) => d
|
||||
.get("Type")
|
||||
.and_then(|value| match value {
|
||||
Primitive::Name(name) => Some(name.clone().as_str().to_string()),
|
||||
_ => None
|
||||
})
|
||||
.unwrap_or(String::from("-")),
|
||||
_ => String::from("-")
|
||||
};
|
||||
PrimitiveModel {
|
||||
key: key,
|
||||
ptype: primitive.get_debug_name().into(),
|
||||
sub_type: sub_type,
|
||||
value: value,
|
||||
children: Vec::new(),
|
||||
detail_path: path,
|
||||
}
|
||||
}
|
||||
|
||||
fn format_arr_content(arr: &Vec<Primitive>) -> String {
|
||||
if arr.len() == 0 {
|
||||
return "[]".to_string();
|
||||
}
|
||||
let mut result = String::from("[");
|
||||
let contents = if arr.len() > 4 { &arr[0..4] } else { &arr[..] };
|
||||
for i in 0..contents.len() {
|
||||
let prim = contents.get(i).unwrap();
|
||||
result.push_str(&match prim {
|
||||
Primitive::Integer(i) => format!("{}", i),
|
||||
Primitive::Number(n) => format!("{}", n),
|
||||
Primitive::Boolean(b) => format!("{}", b),
|
||||
Primitive::String(s) => s.to_string().unwrap_or(String::from("-")),
|
||||
Primitive::Name(n) => n.as_str().to_string(),
|
||||
_ => prim.get_debug_name().to_string(),
|
||||
});
|
||||
if i != contents.len() - 1 {
|
||||
result.push_str(", ");
|
||||
}
|
||||
}
|
||||
|
||||
if arr.len() > 4 {
|
||||
result.push_str(",...");
|
||||
}
|
||||
result.push_str("]");
|
||||
result
|
||||
}
|
||||
|
||||
fn from_primitive_with_children(key: String, primitive: &Primitive, path: Vec<DetailPathStep>) -> PrimitiveModel {
|
||||
let mut model = PrimitiveModel::from_primitive(key, primitive, path.clone());
|
||||
model.add_children(primitive, path);
|
||||
model
|
||||
}
|
||||
|
||||
fn add_children(&mut self, primitive: &Primitive, path: Vec<DetailPathStep>) {
|
||||
match primitive {
|
||||
Primitive::Dictionary(dict) => dict.iter().for_each(|(name, value)| {
|
||||
self.add_child(name.clone().as_str().to_string(), value, append_path(name.clone().as_str().to_string(), &path));
|
||||
}),
|
||||
Primitive::Array(arr) => arr.iter().enumerate().for_each(|(i, obj)| {
|
||||
self.add_child(i.to_string(), obj, append_path(i.to_string(), &path));
|
||||
}),
|
||||
Primitive::Stream(stream) => {
|
||||
self.children.push(PrimitiveModel {
|
||||
key: "Data".to_string(),
|
||||
ptype: "Stream Data".to_string(),
|
||||
sub_type: "-".to_string(),
|
||||
value: "".to_string(),
|
||||
children: vec![],
|
||||
detail_path: append_path("Data".to_string(), &path),
|
||||
});
|
||||
stream.info.iter().for_each(|(name, value)| {
|
||||
self.add_child(name.clone().as_str().to_string(), value, append_path(name.clone().as_str().to_string(), &path));
|
||||
})
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
fn add_child(&mut self, key: String, child: &Primitive, path: Vec<DetailPathStep>) -> &PrimitiveModel {
|
||||
let child_model = Self::from_primitive(key, child, path);
|
||||
self.children.push(child_model);
|
||||
&self.children[self.children.len() - 1]
|
||||
}
|
||||
|
||||
fn get_child(&mut self, key: String) -> Option<&mut PrimitiveModel> {
|
||||
self.children.iter_mut().find(|child| child.key == key)
|
||||
}
|
||||
}
|
||||
#[tauri::command]
|
||||
fn get_xref_table(id: &str, session: State<Mutex<Session>>) -> Result<XRefTableModel, String> {
|
||||
let session_guard = session
|
||||
.lock()
|
||||
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
|
||||
let file = get_file_from_state(id, &session_guard)?;
|
||||
get_xref_table_model_with_file(&file.cos_file)
|
||||
}
|
||||
fn get_xref_table_model_with_file(file: &CosFile) -> Result<XRefTableModel, String> {
|
||||
let resolver = file.resolver();
|
||||
let x_ref_table = file.get_xref();
|
||||
let mut models: Vec<XRefEntryModel> = Vec::new();
|
||||
|
||||
for (i, x_ref) in x_ref_table.iter_real().enumerate() {
|
||||
models.push(match x_ref {
|
||||
XRef::Raw { pos, gen_nr } => {
|
||||
let prim: Primitive = resolver
|
||||
.resolve(PlainRef {
|
||||
id: i as u64,
|
||||
gen: *gen_nr,
|
||||
})
|
||||
.unwrap();
|
||||
XRefEntryModel {
|
||||
obj_num: i as u64,
|
||||
gen_num: *gen_nr,
|
||||
obj_type: prim.get_debug_name().to_string().into(),
|
||||
offset: *pos as u64,
|
||||
}
|
||||
}
|
||||
XRef::Stream { stream_id, index } => XRefEntryModel {
|
||||
obj_num: i as u64,
|
||||
gen_num: *stream_id as u64,
|
||||
obj_type: "Stream".into(),
|
||||
offset: *index as u64,
|
||||
},
|
||||
XRef::Free {
|
||||
next_obj_nr,
|
||||
gen_nr,
|
||||
} => XRefEntryModel {
|
||||
obj_num: i as u64,
|
||||
gen_num: *gen_nr as u64,
|
||||
obj_type: "Free".into(),
|
||||
offset: *next_obj_nr as u64,
|
||||
},
|
||||
XRef::Promised => XRefEntryModel {
|
||||
obj_num: i as u64,
|
||||
gen_num: 0,
|
||||
obj_type: "Promised".into(),
|
||||
offset: 0,
|
||||
},
|
||||
XRef::Invalid => XRefEntryModel {
|
||||
obj_num: i as u64,
|
||||
gen_num: 0,
|
||||
obj_type: "Invalid".into(),
|
||||
offset: 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
Ok(XRefTableModel {
|
||||
size: x_ref_table.len(),
|
||||
entries: models,
|
||||
})
|
||||
}
|
||||
|
||||
struct Session {
|
||||
files: HashMap<String, SessionFile>,
|
||||
}
|
||||
|
||||
struct SessionFile {
|
||||
pdf_file: PdfFile,
|
||||
cos_file: CosFile,
|
||||
}
|
||||
|
||||
unsafe impl Send for SessionFile {}
|
||||
unsafe impl Sync for SessionFile {}
|
||||
|
||||
impl Session {
|
||||
fn load() -> Session {
|
||||
Session {
|
||||
files: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_upload(&mut self, pdf_file: &PdfFile, cos_file: CosFile) {
|
||||
self.files.insert(
|
||||
pdf_file.id.clone(),
|
||||
SessionFile {
|
||||
pdf_file: pdf_file.clone(),
|
||||
cos_file: cos_file,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
fn handle_close(&mut self, id: &str) {
|
||||
self.files.remove(id);
|
||||
}
|
||||
}
|
||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||
pub fn run() {
|
||||
tauri::Builder::default()
|
||||
.plugin(tauri_plugin_dialog::init())
|
||||
.plugin(tauri_plugin_fs::init())
|
||||
.plugin(tauri_plugin_opener::init())
|
||||
.setup(|app| {
|
||||
app.manage(Mutex::new(Session::load()));
|
||||
Ok(())
|
||||
})
|
||||
.invoke_handler(tauri::generate_handler![
|
||||
upload,
|
||||
get_all_files,
|
||||
get_all_file_ids,
|
||||
get_file_by_id,
|
||||
close_file,
|
||||
get_prim_by_path,
|
||||
get_prim_tree_by_path,
|
||||
get_xref_table,
|
||||
get_contents
|
||||
])
|
||||
.run(tauri::generate_context!())
|
||||
.expect("error while running tauri application");
|
||||
}
|
||||
|
||||
8
src-tauri/src/main.rs
Normal file
8
src-tauri/src/main.rs
Normal file
@ -0,0 +1,8 @@
|
||||
// Prevents additional console window on Windows in release, DO NOT REMOVE!!
|
||||
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
|
||||
|
||||
use pdf_forge_lib::run;
|
||||
|
||||
fn main() {
|
||||
run();
|
||||
}
|
||||
162
src-tauri/src/tests.rs
Normal file
162
src-tauri/src/tests.rs
Normal file
@ -0,0 +1,162 @@
|
||||
extern crate pdf;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::{
|
||||
get_prim_by_path_with_file, get_prim_model_by_path_with_file, get_prim_tree_by_path_with_file, get_xref_table_model_with_file, to_pdf_file, DetailPathStep, PrimitiveModel, TreeViewNode
|
||||
};
|
||||
|
||||
use pdf::content::{display_ops, serialize_ops, Op};
|
||||
use pdf::file::FileOptions;
|
||||
use pdf::object::{Object, ObjectWrite, Page, PlainRef, Resolve};
|
||||
use pdf::primitive::Primitive;
|
||||
use std::time::Instant;
|
||||
macro_rules! timed {
|
||||
($func_call:expr, $label:expr) => {{
|
||||
let start = std::time::Instant::now();
|
||||
let result = $func_call;
|
||||
let duration = std::time::Instant::now().duration_since(start);
|
||||
println!("{} took {:?}", $label, duration);
|
||||
result
|
||||
}};
|
||||
}
|
||||
// Import items to be tested from the parent module
|
||||
const FILE_PATH: &str =
|
||||
"/home/kschuettler/Dokumente/Scientific Papers/PDF Specification/ISO_32000-2_2020(en).pdf";
|
||||
|
||||
#[test]
|
||||
fn test_read_x_ref() {
|
||||
let start = Instant::now();
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
let resolver = file.resolver();
|
||||
let refs = get_xref_table_model_with_file(&file).unwrap().entries;
|
||||
let time = Instant::now().duration_since(start);
|
||||
println!("retrieving {} primitives took {:?}", refs.len(), time);
|
||||
let start = Instant::now();
|
||||
let ex = refs.get(19368).ok_or(Err::<i32, &str>("wtf")).unwrap();
|
||||
let prim: Primitive = resolver
|
||||
.resolve(PlainRef {
|
||||
id: ex.obj_num,
|
||||
gen: ex.gen_num,
|
||||
})
|
||||
.unwrap();
|
||||
let time = Instant::now().duration_since(start);
|
||||
println!("{:?}", prim);
|
||||
println!("retrieving one primitive took {:?}", time);
|
||||
let start = Instant::now();
|
||||
let refs = get_xref_table_model_with_file(&file).unwrap().entries;
|
||||
let time = Instant::now().duration_since(start);
|
||||
println!("retrieving {} primitives again took {:?}", refs.len(), time);
|
||||
}
|
||||
#[test]
|
||||
fn test_read_tree() {
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
let mut path = Vec::new();
|
||||
path.push(TreeViewNode {
|
||||
key: "Index".to_string(),
|
||||
children: vec![TreeViewNode {
|
||||
key: "1".to_string(),
|
||||
children: vec![],
|
||||
}],
|
||||
});
|
||||
path.push(TreeViewNode {
|
||||
key: "Info".to_string(),
|
||||
children: vec![],
|
||||
});
|
||||
path.push(TreeViewNode {
|
||||
key: "Root".to_string(),
|
||||
children: vec![TreeViewNode {
|
||||
key: "Pages".to_string(),
|
||||
children: vec![],
|
||||
}],
|
||||
});
|
||||
let root = TreeViewNode {
|
||||
key: "/".to_string(),
|
||||
children: path,
|
||||
};
|
||||
|
||||
let message = format!("Retrieval of {:?}", root);
|
||||
let prim = timed!(get_prim_tree_by_path_with_file(root, &file), message);
|
||||
print_node(prim.unwrap(), 0);
|
||||
}
|
||||
#[test]
|
||||
fn test_read_by_path() {
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
let path = "/Root/Pages";
|
||||
|
||||
let message = format!("Retrieval of {:?}", path);
|
||||
let prim = timed!(get_prim_model_by_path_with_file(path, &file), message);
|
||||
print_node(prim.unwrap(), 0);
|
||||
}
|
||||
|
||||
fn print_node(node: PrimitiveModel, depth: usize) {
|
||||
let spaces = " ".repeat(depth);
|
||||
println!("{:?}", node.detail_path);
|
||||
println!("{}{} | {} | {}", spaces, node.key, node.ptype, node.value);
|
||||
for child in node.children {
|
||||
print_node(child, depth + 1);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn test_read_trailer() {
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
let mut file2 = timed!(
|
||||
FileOptions::uncached().storage(),
|
||||
"Loading storage"
|
||||
);
|
||||
|
||||
|
||||
let trail = timed!(file.trailer.to_primitive(&mut file2).unwrap(), "writing trailer");
|
||||
let trail_model = PrimitiveModel::from_primitive_with_children("Trailer".to_string(), &trail, vec![DetailPathStep::new("/".to_string(), "/".to_string())]);
|
||||
print_node(trail_model, 5);
|
||||
println!("{:?}", file.trailer.info_dict);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_pdf_file() {
|
||||
use crate::to_pdf_file;
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
|
||||
let _pdf_file = timed!(to_pdf_file(FILE_PATH, &file), "pages 1");
|
||||
let pdf_file = timed!(to_pdf_file(FILE_PATH, &file), "pages 2");
|
||||
println!("{:?}", pdf_file);
|
||||
}
|
||||
#[test]
|
||||
fn test_read_contents() {
|
||||
|
||||
let file = timed!(
|
||||
FileOptions::cached().open(FILE_PATH).unwrap(),
|
||||
"Loading file"
|
||||
);
|
||||
|
||||
let (_, page2_prim, _) = get_prim_by_path_with_file("1", &file).unwrap();
|
||||
let resolver = file.resolver();
|
||||
let page2 = Page::from_primitive(page2_prim, &resolver).unwrap();
|
||||
let mut ops: Vec<Op> = timed!(page2.contents.unwrap().operations(&resolver).unwrap(), "parse ops");
|
||||
let serialized = timed!(serialize_ops(&mut ops).unwrap(), "serializing");
|
||||
let display = timed!(display_ops(&mut ops).unwrap(), "displaying");
|
||||
println!("Serialized -----------------------------------------------------------------");
|
||||
println!("{}", String::from_utf8(serialized).unwrap());
|
||||
println!("Displayed -----------------------------------------------------------------");
|
||||
for (line, s) in display.iter().enumerate() {
|
||||
println!("{}: {}", line, s);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
36
src-tauri/tauri.conf.json
Normal file
36
src-tauri/tauri.conf.json
Normal file
@ -0,0 +1,36 @@
|
||||
{
|
||||
"$schema": "https://schema.tauri.app/config/2",
|
||||
"productName": "pdf-forge",
|
||||
"version": "0.1.0",
|
||||
"identifier": "pdf-forge",
|
||||
"build": {
|
||||
"beforeDevCommand": "yarn dev",
|
||||
"devUrl": "http://localhost:1420",
|
||||
"beforeBuildCommand": "yarn build",
|
||||
"frontendDist": "../build"
|
||||
},
|
||||
"app": {
|
||||
"windows": [
|
||||
{
|
||||
"decorations": false,
|
||||
"title": "PDF Forge",
|
||||
"width": 1920,
|
||||
"height": 1080
|
||||
}
|
||||
],
|
||||
"security": {
|
||||
"csp": null
|
||||
}
|
||||
},
|
||||
"bundle": {
|
||||
"active": true,
|
||||
"targets": "all",
|
||||
"icon": [
|
||||
"icons/32x32.png",
|
||||
"icons/128x128.png",
|
||||
"icons/128x128@2x.png",
|
||||
"icons/icon.icns",
|
||||
"icons/icon.ico"
|
||||
]
|
||||
}
|
||||
}
|
||||
32
src/app.css
Normal file
32
src/app.css
Normal file
@ -0,0 +1,32 @@
|
||||
@import "tailwindcss/base";
|
||||
@import "tailwindcss/components";
|
||||
@import "tailwindcss/utilities";
|
||||
|
||||
:root {
|
||||
/* Colors */
|
||||
--background-color: rgb(43, 45, 48);
|
||||
--boundary-color: rgba(0, 0, 0, 0.29);
|
||||
--secondary-color: rgba(103, 101, 101, 0.6);
|
||||
--accent-color: rgb(44, 97, 97);
|
||||
--font-color: #c0cacd;
|
||||
--secondary-font-color: #6c6c6c;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: 'Arial', sans-serif;
|
||||
background-color: var(--background-color);
|
||||
color: var(--font-color);
|
||||
border-color: var(--secondary-color)
|
||||
}
|
||||
::before, ::after {
|
||||
border-color: var(--secondary-color);
|
||||
}
|
||||
|
||||
.full-container {
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
|
||||
|
||||
13
src/app.html
Normal file
13
src/app.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="icon" href="%sveltekit.assets%/pdf-forge-logo-bg.png" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>PDF Forge</title>
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
<div style="display: contents">%sveltekit.body%</div>
|
||||
</body>
|
||||
</html>
|
||||
287
src/components/App.svelte
Normal file
287
src/components/App.svelte
Normal file
@ -0,0 +1,287 @@
|
||||
<script lang="ts">
|
||||
import WelcomeScreen from "./WelcomeScreen.svelte";
|
||||
import {Pane, Splitpanes} from 'svelte-splitpanes';
|
||||
import {open} from "@tauri-apps/plugin-dialog";
|
||||
import {invoke} from "@tauri-apps/api/core";
|
||||
import type PdfFile from "../models/PdfFile";
|
||||
import ToolbarLeft from "./ToolbarLeft.svelte";
|
||||
import FileView from "./FileView.svelte";
|
||||
import TabBar from "./TabBar.svelte";
|
||||
import FileViewState from "../models/FileViewState.svelte";
|
||||
import TitleBar from "./TitleBar.svelte";
|
||||
import ToolbarRight from "./ToolbarRight.svelte";
|
||||
import Footer from "./Footer.svelte";
|
||||
|
||||
const footerHeight: number = 30;
|
||||
const titleBarHeight: number = 30;
|
||||
let files: PdfFile[] = $state([]);
|
||||
let innerHeight: number = $state(1060);
|
||||
let errorMessage: string = $state("");
|
||||
let xrefTableWidth: number = $state(0);
|
||||
let xrefTableShowing: boolean = $state(false);
|
||||
let treeShowing: boolean = $state(true);
|
||||
let pagesShowing: boolean = $state(false);
|
||||
let fileViewHeight: number = $derived(Math.max(innerHeight - footerHeight - titleBarHeight, 0));
|
||||
|
||||
let fStates: Map<string, FileViewState> = new Map<string, FileViewState>();
|
||||
let fState: FileViewState | undefined = $state();
|
||||
let selected_file = $derived(fState ? fState.file : undefined);
|
||||
initialLoadAllFiles()
|
||||
|
||||
function initialLoadAllFiles() {
|
||||
invoke<PdfFile[]>("get_all_files").then(result_list => {
|
||||
files = result_list;
|
||||
createFileStates(files);
|
||||
if (files.length > 0) {
|
||||
selectFile(files[files.length - 1])
|
||||
}
|
||||
}).catch(error => {
|
||||
errorMessage = error
|
||||
console.error("File retrieval failed: " + error)
|
||||
});
|
||||
}
|
||||
|
||||
function createFileStates(files: PdfFile[]) {
|
||||
for (let file of files) {
|
||||
if (fStates.has(file.id)) {
|
||||
continue;
|
||||
}
|
||||
let fState = new FileViewState(file)
|
||||
fStates.set(file.id, fState);
|
||||
}
|
||||
}
|
||||
|
||||
async function upload() {
|
||||
let file_path = await open({
|
||||
multiple: false,
|
||||
directory: false,
|
||||
filters: [{name: 'pdf', extensions: ['pdf']}]
|
||||
})
|
||||
if (file_path === null || Array.isArray(file_path)) {
|
||||
return;
|
||||
}
|
||||
|
||||
invoke<String>("upload", {path: file_path}).then(result => {
|
||||
invoke<PdfFile[]>("get_all_files").then(result_list => {
|
||||
files = result_list;
|
||||
createFileStates(files);
|
||||
let file = files.find(file => file.id === result);
|
||||
if (file) {
|
||||
selectFile(file);
|
||||
}
|
||||
}).catch(error => {
|
||||
errorMessage = error
|
||||
console.error("Fetch all files failed with: " + error)
|
||||
});
|
||||
}).catch(error => {
|
||||
errorMessage = error
|
||||
console.error("File upload failed with: " + error)
|
||||
});
|
||||
}
|
||||
|
||||
function selectFile(file: PdfFile) {
|
||||
fState = fStates.get(file.id);
|
||||
}
|
||||
|
||||
function closeFile(file: PdfFile) {
|
||||
invoke("close_file", {id: file.id}).then(_ => {
|
||||
files = files.filter(f => f.id != file.id)
|
||||
if (file === selected_file) {
|
||||
fState = undefined;
|
||||
}
|
||||
}).catch(err => console.error(err));
|
||||
}
|
||||
|
||||
</script>
|
||||
<svelte:window bind:innerHeight/>
|
||||
<main style="height: {innerHeight}px">
|
||||
<div style="height: {titleBarHeight}px">
|
||||
<TitleBar></TitleBar>
|
||||
</div>
|
||||
<div style="height: {fileViewHeight}px">
|
||||
<Splitpanes theme="forge-movable" dblClickSplitter={false}>
|
||||
<Pane size={2.5} minSize={1.5} maxSize={4}>
|
||||
<ToolbarLeft bind:tree={treeShowing} bind:pages={pagesShowing}></ToolbarLeft>
|
||||
</Pane>
|
||||
<Pane>
|
||||
<TabBar {files} {selected_file} closeTab={closeFile} openTab={upload} selectTab={selectFile}></TabBar>
|
||||
{#if (fState)}
|
||||
<FileView {treeShowing} {xrefTableShowing} {pagesShowing} {fState} height={fileViewHeight}></FileView>
|
||||
{:else}
|
||||
<WelcomeScreen {upload}></WelcomeScreen>
|
||||
{/if}
|
||||
</Pane>
|
||||
<Pane size={2.5} minSize={1.5} maxSize={4}>
|
||||
<ToolbarRight bind:xref={xrefTableShowing}></ToolbarRight>
|
||||
</Pane>
|
||||
</Splitpanes>
|
||||
</div>
|
||||
<Footer {fState} {footerHeight} ></Footer>
|
||||
</main>
|
||||
|
||||
|
||||
<style global>
|
||||
/* Resiable layout */
|
||||
:global(.splitpanes.forge-movable) :global(.splitpanes__pane) {
|
||||
background-color: var(--background-color);
|
||||
}
|
||||
|
||||
:global(.splitpanes.forge-movable) :global(.splitpanes__splitter) {
|
||||
background-color: var(--boundary-color);
|
||||
box-sizing: border-box;
|
||||
position: relative;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
:global(.splitpanes.forge-movable) :global(.splitpanes__splitter:before), :global(.splitpanes.forge-movable) :global(.splitpanes__splitter:after) {
|
||||
content: "";
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
display: none;
|
||||
background-color: var(--accent-color);
|
||||
transition: background-color 0.3s;
|
||||
}
|
||||
|
||||
:global(.splitpanes.forge-movable) :global(.splitpanes__splitter:hover:before), :global(.splitpanes.forge-movable) :global(.splitpanes__splitter:hover:after) {
|
||||
background-color: var(--boundary-color);
|
||||
}
|
||||
|
||||
:global(.splitpanes.forge-movable) :global(.splitpanes__splitter:first-child) {
|
||||
cursor: auto;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes) :global(.splitpanes) :global(.splitpanes__splitter) {
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--vertical) > :global(.splitpanes__splitter),
|
||||
:global(.forge-movable) :global(.splitpanes--vertical) > :global(.splitpanes__splitter) {
|
||||
width: 2px;
|
||||
border-left: 1px solid var(--boundary-color);
|
||||
cursor: col-resize;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--vertical) > :global(.splitpanes__splitter:before), :global(.forge-movable.splitpanes--vertical) > :global(.splitpanes__splitter:after), :global(.forge-movable) :global(.splitpanes--vertical) > :global(.splitpanes__splitter:before), :global(.forge-movable) :global(.splitpanes--vertical) > :global(.splitpanes__splitter:after) {
|
||||
transform: translateY(-50%);
|
||||
width: 1px;
|
||||
height: 40px;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--vertical) > :global(.splitpanes__splitter:before),
|
||||
:global(.forge-movable) :global(.splitpanes--vertical) > :global(.splitpanes__splitter:before) {
|
||||
margin-left: -2px;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--vertical) > :global(.splitpanes__splitter:after),
|
||||
:global(.forge-movable) :global(.splitpanes--vertical) > :global(.splitpanes__splitter:after) {
|
||||
margin-left: 1px;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--horizontal) > :global(.splitpanes__splitter),
|
||||
:global(.forge-movable) :global(.splitpanes--horizontal) > :global(.splitpanes__splitter) {
|
||||
height: 2px;
|
||||
border-top: 1px solid var(--boundary-color);
|
||||
cursor: row-resize;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--horizontal) > :global(.splitpanes__splitter:before), :global(.forge-movable.splitpanes--horizontal) > :global(.splitpanes__splitter:after), :global(.forge-movable) :global(.splitpanes--horizontal) > :global(.splitpanes__splitter:before), :global(.forge-movable) :global(.splitpanes--horizontal) > :global(.splitpanes__splitter:after) {
|
||||
transform: translateX(-50%);
|
||||
width: 40px;
|
||||
height: 3px;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--horizontal) > :global(.splitpanes__splitter:before),
|
||||
:global(.forge-movable) :global(.splitpanes--horizontal) > :global(.splitpanes__splitter:before) {
|
||||
margin-top: -2px;
|
||||
}
|
||||
|
||||
:global(.forge-movable.splitpanes--horizontal) > :global(.splitpanes__splitter:after),
|
||||
:global(.forge-movable) :global(.splitpanes--horizontal) > :global(.splitpanes__splitter:after) {
|
||||
margin-top: 1px;
|
||||
}
|
||||
|
||||
/* Fixed layout */
|
||||
:global(.splitpanes.forge-fixed) :global(.splitpanes__pane) {
|
||||
background-color: var(--background-color);
|
||||
}
|
||||
|
||||
:global(.splitpanes.forge-fixed) :global(.splitpanes__splitter) {
|
||||
background-color: var(--boundary-color);
|
||||
position: relative;
|
||||
}
|
||||
|
||||
|
||||
:global(div.splitpanes--horizontal.splitpanes--dragging) {
|
||||
cursor: row-resize;
|
||||
}
|
||||
|
||||
:global(div.splitpanes--vertical.splitpanes--dragging) {
|
||||
cursor: col-resize;
|
||||
}
|
||||
|
||||
:global(.splitpanes) {
|
||||
display: flex;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
:global(.splitpanes--vertical) {
|
||||
flex-direction: row;
|
||||
}
|
||||
|
||||
:global(.splitpanes--horizontal) {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
:global(.splitpanes--dragging) :global(*) {
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
:global(.splitpanes__pane) {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
overflow: hidden;
|
||||
/** Add also a direct child selector, for dealing with specifity of nested splitpanes transition.
|
||||
This issue was happening in the examples on nested splitpanes, vertical inside horizontal.
|
||||
I think it's better to keep also the previous CSS selector for (potential) old browser compatibility.
|
||||
*/
|
||||
}
|
||||
|
||||
:global(.splitpanes--vertical) :global(.splitpanes__pane) {
|
||||
transition: width 0.2s ease-out;
|
||||
}
|
||||
|
||||
:global(.splitpanes--horizontal) :global(.splitpanes__pane) {
|
||||
transition: height 0.2s ease-out;
|
||||
}
|
||||
|
||||
:global(.splitpanes--vertical) > :global(.splitpanes__pane) {
|
||||
transition: width 0.2s ease-out;
|
||||
}
|
||||
|
||||
:global(.splitpanes--horizontal) > :global(.splitpanes__pane) {
|
||||
transition: height 0.2s ease-out;
|
||||
}
|
||||
|
||||
:global(.splitpanes--dragging) :global(.splitpanes__pane) {
|
||||
transition: none;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
:global(.splitpanes--freeze) :global(.splitpanes__pane) {
|
||||
transition: none;
|
||||
}
|
||||
|
||||
:global(.splitpanes__splitter) {
|
||||
touch-action: none;
|
||||
}
|
||||
|
||||
:global(.splitpanes--vertical) > :global(.splitpanes__splitter) {
|
||||
min-width: 1px;
|
||||
}
|
||||
|
||||
:global(.splitpanes--horizontal) > :global(.splitpanes__splitter) {
|
||||
min-height: 1px;
|
||||
}
|
||||
</style>
|
||||
36
src/components/ContentsView.svelte
Normal file
36
src/components/ContentsView.svelte
Normal file
@ -0,0 +1,36 @@
|
||||
<script lang="ts">
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
import type ContentModel from "../models/ContentModel.svelte";
|
||||
|
||||
let {id, path, h}: {id: string, path: string, h: number} = $props();
|
||||
let contents: ContentModel | undefined = $state(undefined);
|
||||
$effect( () => {
|
||||
loadContents(path, id)
|
||||
})
|
||||
|
||||
function loadContents(id: string, path: string) {
|
||||
invoke<ContentModel>("get_contents", {id: id, path: path})
|
||||
.then(result => contents = result)
|
||||
.catch(err => console.error(err));
|
||||
}
|
||||
</script>
|
||||
{#if contents}
|
||||
<div class="overflow-auto">
|
||||
<div class="whitespace-nowrap" style="height: {h}px">
|
||||
{#each contents.parts as part }
|
||||
<div class="part">
|
||||
{#each part as line}
|
||||
<div class="line">{line}</div>
|
||||
{/each}
|
||||
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
{"Loading id: " + id + " Path: " + path}
|
||||
{/if}
|
||||
<style lang="postcss">
|
||||
|
||||
</style>
|
||||
|
||||
46
src/components/DocumentView.svelte
Normal file
46
src/components/DocumentView.svelte
Normal file
@ -0,0 +1,46 @@
|
||||
<script lang="ts">
|
||||
import type PdfFile from "../models/PdfFile";
|
||||
import PrimitiveView from "./PrimitiveView.svelte";
|
||||
|
||||
let {file, path, selectHandler, prim}: { file: PdfFile, path: string, selectHandler: any, prim: Primitive | undefined } = $props();
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
|
||||
<div class="document-view text-forge-text">
|
||||
<div class="grid grid-cols-6">
|
||||
<div class="header"><h1>{file.name}</h1></div>
|
||||
<div><small class=" small">COS Path: </small></div>
|
||||
<div class="big"><p>{path}</p></div>
|
||||
<div><small class=" small">Location: </small></div>
|
||||
<div class="big "><p>{file.path}</p></div>
|
||||
<div><small class=" small">Pages: </small></div>
|
||||
<div class="big "><p>{file.page_count}</p></div>
|
||||
<div><small class=" small">Last modified: </small></div>
|
||||
<div class="big "><p>{file.last_modified}</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<style lang="postcss">
|
||||
.document-view {
|
||||
padding: 2rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.small {
|
||||
@apply text-forge-text_sec whitespace-nowrap;
|
||||
text-align: left;
|
||||
|
||||
}
|
||||
|
||||
.big {
|
||||
@apply whitespace-nowrap col-span-5;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.header {
|
||||
@apply whitespace-nowrap col-span-6;
|
||||
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
68
src/components/FileView.svelte
Normal file
68
src/components/FileView.svelte
Normal file
@ -0,0 +1,68 @@
|
||||
<script lang="ts">
|
||||
import {Pane, Splitpanes} from "svelte-splitpanes";
|
||||
import XRefTable from "./XRefTable.svelte";
|
||||
import PrimitiveView from "./PrimitiveView.svelte";
|
||||
import TreeView from "./TreeView.svelte";
|
||||
import type FileViewState from "../models/FileViewState.svelte";
|
||||
import {onMount} from 'svelte';
|
||||
import PageList from "./PageList.svelte";
|
||||
|
||||
let {treeShowing, xrefTableShowing, pagesShowing, fState, height}: {
|
||||
treeShowing: boolean,
|
||||
xrefTableShowing: boolean,
|
||||
pagesShowing: boolean,
|
||||
fState: FileViewState,
|
||||
height: number
|
||||
} = $props()
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
// Check for "Alt + Left Arrow"
|
||||
if (event.altKey && event.key === 'ArrowLeft') {
|
||||
fState.popPath();
|
||||
}
|
||||
}
|
||||
|
||||
function handleMouseButton(event: MouseEvent) {
|
||||
// Check for mouse back button (button 4)
|
||||
if (event.button === 3) {
|
||||
fState.popPath();
|
||||
}
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
window.addEventListener('keydown', handleKeydown);
|
||||
window.addEventListener('mousedown', handleMouseButton);
|
||||
|
||||
return () => {
|
||||
window.removeEventListener('keydown', handleKeydown);
|
||||
window.removeEventListener('mousedown', handleMouseButton);
|
||||
};
|
||||
});
|
||||
</script>
|
||||
|
||||
<Splitpanes theme="forge-movable">
|
||||
<Pane size={treeShowing || pagesShowing ? 15 : 0} minSize={treeShowing || pagesShowing ? 1 : 0} maxSize={treeShowing || pagesShowing ? 100 : 0}>
|
||||
<Splitpanes theme="forge-movable" horizontal>
|
||||
<Pane size={treeShowing ? pagesShowing ? 50 : 100 : 0} minSize={treeShowing ? 2 : 0} maxSize={treeShowing ? 100 : 0}>
|
||||
<TreeView {fState}></TreeView>
|
||||
</Pane>
|
||||
<Pane size={pagesShowing ? treeShowing ? 50 : 100 : 0} minSize={pagesShowing ? 2 : 0} maxSize={pagesShowing ? 100 : 0}>
|
||||
<PageList {fState}></PageList>
|
||||
</Pane>
|
||||
</Splitpanes>
|
||||
</Pane>
|
||||
|
||||
|
||||
<Pane minSize={1}>
|
||||
<div>
|
||||
<PrimitiveView {fState} {height}></PrimitiveView>
|
||||
</div>
|
||||
</Pane>
|
||||
|
||||
<Pane size={xrefTableShowing ? 16 : 0} minSize={xrefTableShowing ? 1 : 0} maxSize={xrefTableShowing ? 100 : 0}>
|
||||
<XRefTable {fState}></XRefTable>
|
||||
</Pane>
|
||||
</Splitpanes>
|
||||
<style lang="postcss">
|
||||
|
||||
</style>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user