PDF-Forge/src-pdfrs/pdf/examples/other_page_content.rs
Kilian Schuettler 30a0f1beb7 initial-commit
2025-01-31 01:44:54 +01:00

111 lines
4.6 KiB
Rust

use pdf::content::ViewRect;
use pdf::error::PdfError;
use pdf::file::FileOptions;
use pdf::object::Resolve;
use pdf::primitive::{Dictionary, Primitive};
use std::env::args;
/// Extract data from a page entry that is under "other".
/// This example looks for stikethroughs in the annotations entry
/// and returns a Vec<Rect> for the bounds of the struckthrough text.
#[cfg(feature="cache")]
fn main() -> Result<(), PdfError> {
let path = args()
.nth(1)
.expect("Please provide a file path to the PDF you want to explore.");
let file = FileOptions::cached().open(&path).unwrap();
let resolver = file.resolver();
for (i, page) in file.pages().enumerate() {
let page = page.unwrap();
let strikethroughs = annotation_strikethrough(&page.other, &resolver)?;
println!(
"Found {} strikethrough annotations on page {}.",
strikethroughs.len(),
i + 1
);
for strikethrough in strikethroughs {
println!();
println!("Struck text:");
println!("{:#?}", strikethrough.0);
println!();
println!("Text spans {} lines", strikethrough.1.len());
println!();
println!("Strikethrough bounding boxes:");
for rect in strikethrough.1 {
println!("{:#?}", rect);
println!();
}
println!();
println!();
}
}
Ok(())
}
fn annotation_strikethrough(
other_dict: &Dictionary,
resolver: &impl Resolve,
) -> Result<Vec<(String, Vec<pdf::content::ViewRect>)>, PdfError> {
let mut strikethroughs: Vec<(String, Vec<pdf::content::ViewRect>)> = Vec::new();
if !other_dict.is_empty() {
let annotations = other_dict.get("Annots".into());
if let Some(annotations) = annotations {
let annotations_resolved = annotations.clone().resolve(resolver)?;
let annotations_array = annotations_resolved.into_array()?;
for annotation in annotations_array.iter() {
let mut paths: Vec<pdf::content::ViewRect> = Vec::new();
let annotation_resolved = annotation.clone().resolve(resolver)?;
let annotation_dict = annotation_resolved.into_dictionary()?;
// If you have multiline strikethrough "Rect" will be the bounding
// box around all the strikethrough lines.
// "QuadPoints" gives 8 points for each line that is struckthrough,
// so if a single annotation involves text on two lines, QuadPoints
// should have 16 values in it. It starts with bottom left and
// runs counter-clockwise.
let subtype = annotation_dict.get("Subtype".into());
if let Some(subtype) = subtype {
let subtype = subtype.clone().into_name()?;
if subtype.as_str() == "StrikeOut" {
let rects = annotation_dict.get("QuadPoints".into());
let text = annotation_dict.get("Contents".into());
if let (Some(rects), Some(text)) = (rects, text) {
let text = text.to_string()?;
// Check multiples of 8.
let rects_array = rects.clone().into_array()?;
if rects_array.len() % 8 == 0 {
let rects: Vec<Vec<Primitive>> =
rects_array.chunks(8).map(|chunk| chunk.to_vec()).collect();
for rect in rects {
let mut quad_points: Vec<f32> = Vec::new();
for num in rect {
let number = num.as_number()?;
quad_points.push(number);
}
if quad_points.len() == 8 {
paths.push(ViewRect {
x: quad_points[0],
y: quad_points[1],
width: quad_points[2] - quad_points[0],
height: quad_points[7] - quad_points[1],
});
}
}
strikethroughs.push((text, paths))
}
}
}
}
}
}
}
Ok(strikethroughs)
}