2025-02-02 00:06:06 +01:00

844 lines
26 KiB
Rust

#[cfg(test)]
mod tests;
extern crate pdf;
use crate::pdf::object::Resolve;
use lazy_static::lazy_static;
use pdf::file::{File, FileOptions, NoLog, ObjectCache, StreamCache};
use pdf::object::{Object, ObjectWrite, PlainRef, Stream, Trace};
use pdf::primitive::Primitive;
use pdf::xref::XRef;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
use std::ops::DerefMut;
use std::path::Path;
use std::sync::{Mutex, MutexGuard};
use tauri::{Manager, State};
use uuid::Uuid;
type CosFile = File<Vec<u8>, ObjectCache, StreamCache, NoLog>;
macro_rules! t {
($result:expr) => {{
match $result {
Ok(f) => f,
Err(e) => return Err(e.to_string()),
}
}};
}
#[derive(Serialize, Debug, Clone)]
pub struct XRefTableModel {
pub size: usize,
pub entries: Vec<XRefEntryModel>,
}
#[derive(Serialize, Debug, Clone)]
pub struct XRefEntryModel {
pub obj_num: u64,
pub gen_num: u64,
pub obj_type: String,
pub offset: u64,
}
#[derive(Serialize, Debug, Clone)]
pub struct PdfFile {
pub id: String,
pub name: String,
pub path: String,
pub page_count: u32,
pub xref_entries: usize,
pub pages: Vec<PageModel>,
}
#[derive(Serialize, Debug, Clone)]
pub struct PrimitiveModel {
pub key: String,
pub ptype: String,
pub sub_type: String,
pub value: String,
pub children: Vec<PrimitiveModel>,
pub trace: Vec<PathTrace>,
pub expanded: bool,
}
#[derive(Serialize, Debug, Clone)]
pub struct PrimitiveTreeView {
pub depth: usize,
pub key: String,
pub ptype: String,
pub sub_type: String,
pub value: String,
pub container: bool,
pub expanded: bool,
pub path: Vec<PathTrace>,
pub active: bool,
}
#[derive(Serialize, Debug, Clone)]
pub struct PathTrace {
pub key: String,
pub last_jump: String,
}
impl PathTrace {
fn new(key: String, last_jump: String) -> PathTrace {
PathTrace { key, last_jump }
}
}
#[derive(Serialize, Debug, Clone)]
pub struct PageModel {
key: String,
obj_num: u64,
page_num: u64,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct TreeViewRequest {
key: String,
children: Vec<TreeViewRequest>,
expand: bool,
}
impl TreeViewRequest {
fn step(&self) -> Result<Step, String> {
Step::parse_step(&self.key)
}
}
#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct ContentsModel {
parts: Vec<Vec<String>>,
}
#[tauri::command]
fn get_all_files(session: State<Mutex<Session>>) -> Vec<PdfFile> {
let files = &session.lock().unwrap().files;
files
.values()
.map(|sf| sf.pdf_file.clone())
.collect::<Vec<PdfFile>>()
}
#[tauri::command]
fn get_all_file_ids(session: State<Mutex<Session>>) -> Vec<String> {
let files = &session.lock().unwrap().files;
files
.values()
.map(|sf| sf.pdf_file.id.clone())
.collect::<Vec<String>>()
}
#[tauri::command]
fn close_file(id: &str, session: State<Mutex<Session>>) {
session.lock().unwrap().deref_mut().handle_close(&id);
}
#[tauri::command]
fn get_file_by_id(id: &str, session: State<Mutex<Session>>) -> Result<PdfFile, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = &get_file_from_state(id, &session_guard)?;
Ok(file.pdf_file.clone())
}
#[tauri::command]
fn upload(path: &str, session: State<Mutex<Session>>) -> Result<String, String> {
let file = t!(FileOptions::cached().open(path));
let pdf_file = to_pdf_file(path, &file)?;
session
.lock()
.unwrap()
.deref_mut()
.handle_upload(&pdf_file, file);
Ok(pdf_file.id.to_string())
}
fn to_pdf_file(path: &str, file: &CosFile) -> Result<PdfFile, String> {
fn parse_title_from_path(path: &str) -> Option<String> {
Path::new(path)
.file_name()
.and_then(|f| f.to_str().map(|s| s.to_string()))
}
let file_name = if let Some(ref info) = file.trailer.info_dict {
info.title
.as_ref()
.map(|p| p.to_string_lossy())
.unwrap_or(parse_title_from_path(path).unwrap_or_else(|| "Not found".to_string()))
} else {
"Not found".to_string()
};
let pages = file
.pages()
.enumerate()
.map(|(i, page_ref)| PageModel {
key: format!("Page {}", i + 1),
obj_num: page_ref.unwrap().get_ref().get_inner().id,
page_num: (i + 1) as u64,
})
.collect();
let pdf_file = PdfFile {
id: Uuid::new_v4().to_string(),
name: file_name.to_string().into(),
path: path.to_string().into(),
page_count: file.num_pages(),
xref_entries: file.get_xref().len(),
pages: pages,
};
Ok(pdf_file)
}
#[tauri::command]
fn get_contents(
id: &str,
path: &str,
session: State<Mutex<Session>>,
) -> Result<ContentsModel, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = get_file_from_state(id, &session_guard)?;
let (_, page_prim, _) = get_prim_by_path_with_file(path, &file.cos_file)?;
let resolver = file.cos_file.resolver();
let page = t!(pdf::object::Page::from_primitive(page_prim, &resolver));
if let Some(contents) = page.contents {
let mut parts = vec![];
for part in contents.parts {
let data = &t!(part.data(&resolver));
let ops = t!(pdf::content::parse_ops(&data, &resolver));
let part = t!(pdf::content::display_ops(&ops));
parts.push(part);
}
return Ok(ContentsModel { parts });
}
Err(String::from("Error occurred"))
}
#[tauri::command]
fn get_stream_data(id: &str, path: &str, session: State<Mutex<Session>>) -> Result<String, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = get_file_from_state(id, &session_guard)?;
get_stream_data_by_path_with_file(path, &file.cos_file)
}
fn get_stream_data_by_path_with_file(path: &str, file: &CosFile) -> Result<String, String> {
let mut steps = Step::parse(path);
if steps
.pop_back()
.filter(|last| *last == Step::Data)
.is_none()
{
return Err(format!("Path {} does not end with Data", path));
}
let (_, prim, _) = get_prim_by_steps_with_file(steps, file)?;
let Primitive::Stream(stream) = prim else {
return Err(format!("Path {} does not point to a stream", path));
};
let resolver = file.resolver();
let data = t!(t!(Stream::<Primitive>::from_stream(stream, &resolver)).data(&resolver));
Ok(String::from_utf8_lossy(&data).into_owned())
}
#[tauri::command]
fn get_prim_by_path(
id: &str,
path: &str,
session: State<Mutex<Session>>,
) -> Result<PrimitiveModel, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = get_file_from_state(id, &session_guard)?;
get_prim_model_by_path_with_file(path, &file.cos_file)
}
fn get_prim_model_by_path_with_file(path: &str, file: &CosFile) -> Result<PrimitiveModel, String> {
let (step, prim, trace) = get_prim_by_path_with_file(path, file)?;
Ok(PrimitiveModel::from_primitive_with_children(&prim, trace))
}
fn get_prim_by_path_with_file(
path: &str,
file: &CosFile,
) -> Result<(Step, Primitive, Vec<PathTrace>), String> {
get_prim_by_steps_with_file(Step::parse(path), file)
}
fn get_prim_by_steps_with_file(
mut steps: VecDeque<Step>,
file: &CosFile,
) -> Result<(Step, Primitive, Vec<PathTrace>), String> {
if steps.len() == 0 {
return Err(String::from(format!("{:?} is not a valid path!", steps)));
}
let step = steps.pop_front().unwrap();
let (mut parent, trace) = resolve_parent(step.clone(), file)?;
let mut last_jump = trace.last_jump.clone();
let mut trace = vec![trace];
let mut current_prim = &parent;
while !steps.is_empty() {
let step = steps.pop_front().unwrap();
current_prim = resolve_step(&current_prim, &step)?;
if let Primitive::Reference(xref) = current_prim {
last_jump = xref.id.to_string();
parent = resolve_p_ref(xref.clone(), file)?;
current_prim = &parent;
}
trace.push(PathTrace::new(step.get_key(), last_jump.clone()));
}
Ok((step, current_prim.clone(), trace))
}
fn resolve_parent(step: Step, file: &CosFile) -> Result<(Primitive, PathTrace), String> {
let parent = match step {
Step::Page(page_num) => return retrieve_page(page_num, file),
Step::Number(obj_num) => resolve_xref(obj_num, file)?,
Step::Trailer => retrieve_trailer(file),
_ => return Err(String::from(format!("{:?} is not a valid path!", step))),
};
Ok((parent, PathTrace::new(step.get_key(), step.get_key())))
}
#[tauri::command]
fn get_prim_tree_by_path(
id: &str,
paths: Vec<TreeViewRequest>,
session: State<Mutex<Session>>,
) -> Result<Vec<PrimitiveTreeView>, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = get_file_from_state(id, &session_guard)?;
let results = paths
.into_iter()
.map(|path| get_prim_tree_by_path_with_file(path, &file.cos_file))
.collect::<Result<Vec<_>, String>>()?
.into_iter()
.flatten()
.collect();
Ok(results)
}
fn get_prim_tree_by_path_with_file(
node: TreeViewRequest,
file: &CosFile,
) -> Result<Vec<PrimitiveTreeView>, String> {
let step = node.step()?;
let (parent, trace) = resolve_parent(step.clone(), file)?;
let trace = vec![trace];
let mut parent_model: PrimitiveModel;
if node.expand {
parent_model = PrimitiveModel::from_primitive_with_children(&parent, trace);
for child in node.children.iter() {
expand(child, &mut parent_model, &parent, file)?;
}
} else {
parent_model = PrimitiveModel::from_primitive(step.get_key(), &parent, trace);
}
Ok(PrimitiveTreeView::flatten(0, parent_model))
}
fn expand(
node: &TreeViewRequest,
parent_model: &mut PrimitiveModel,
parent: &Primitive,
file: &CosFile,
) -> Result<(), String> {
if !node.expand {
return Ok(());
}
let step = node.step()?;
let prim = resolve_step(parent, &step)?;
if let Primitive::Reference(x_ref) = prim {
let jump = resolve_xref(x_ref.id, file)?;
let mut jump_trace = parent_model.trace.clone();
jump_trace.push(PathTrace::new(step.get_key(), x_ref.id.to_string()));
let mut to_expand = parent_model.get_child(step.get_key()).unwrap();
to_expand.add_children(&jump, &jump_trace);
expand_children(node, file, &jump, &mut to_expand)?;
} else {
let mut to_expand = parent_model.get_child(step.get_key()).unwrap();
to_expand.add_children(prim, &to_expand.trace.clone());
expand_children(node, file, prim, &mut to_expand)?;
}
Ok(())
}
fn expand_children(
node: &TreeViewRequest,
file: &CosFile,
prim: &Primitive,
mut expanded: &mut PrimitiveModel,
) -> Result<(), String> {
for child in node.children.iter() {
expand(child, &mut expanded, prim, file)?;
}
Ok(())
}
fn resolve_step<'a>(current_prim: &'a Primitive, step: &Step) -> Result<&'a Primitive, String> {
Ok(match step {
Step::Number(index) => match current_prim {
Primitive::Array(prim_array) => {
let i = index.clone() as usize;
if prim_array.len() <= i {
return Err(String::from(format!(
"{} index out of bounds!",
step.get_key()
)));
}
&prim_array[i]
}
p => {
return Err(String::from(format!(
"{} is not indexed with numbers!",
p.get_debug_name()
)))
}
},
Step::String(key) => match current_prim {
Primitive::Dictionary(dict) => match dict.get(key) {
Some(prim) => prim,
None => {
return Err(String::from(format!(
"Key {} does not exist in Dictionary!",
key
)))
}
},
Primitive::Stream(stream) => match stream.info.get(key) {
Some(prim) => prim,
None => {
return Err(String::from(format!(
"Key {} does not exist in Info Dictionary!",
key
)))
}
},
p => {
return Err(String::from(format!(
"{} has no String paths!",
p.get_debug_name()
)))
}
},
_ => return Err(format!("Invalid Step: {}", step.get_key())),
})
}
fn retrieve_trailer(file: &CosFile) -> Primitive {
let mut updater = FileOptions::uncached().storage();
file.trailer.to_primitive(&mut updater).unwrap()
}
fn retrieve_page(page_num: u32, file: &CosFile) -> Result<(Primitive, PathTrace), String> {
if page_num <= 0 {
return Err("Page 0 does not exist, use 1-based index!".to_string());
}
let page_rc = t!(file.get_page(page_num - 1));
let p_ref = page_rc.get_ref().get_inner();
Ok((
resolve_p_ref(p_ref, file)?,
PathTrace::new(format!("Page{}", page_num), p_ref.id.to_string()),
))
}
#[derive(Debug, PartialEq, Clone)]
pub enum Step {
String(String),
Page(u32),
Number(u64),
Trailer,
Data,
}
impl Step {
fn parse_step(path: &str) -> Result<Step, String> {
lazy_static! {
static ref PAGE_RE: Regex = Regex::new(r"^Page(\d+)$").unwrap();
}
if path.len() == 0 {
return Err(String::from("Path is empty"));
}
Ok(match &path.parse::<u64>().ok() {
Some(i) => Step::Number(*i),
None => match &path[..] {
"Data" => Step::Data,
"Trailer" => Step::Trailer,
"/" => Step::Trailer,
_ => {
if let Some(caps) = PAGE_RE.captures(path) {
Step::Page(
caps[1]
.parse::<u32>()
.map_err(|_| format!("Invalid page number in {}", path))?,
)
} else {
Step::String(path.to_string())
}
}
},
})
}
fn parse(path: &str) -> VecDeque<Step> {
let mut steps = VecDeque::new();
if path.starts_with("/") {
steps.push_back(Step::Trailer);
}
let split_path = path.split("/").collect::<VecDeque<&str>>();
split_path
.iter()
.filter_map(|s| Step::parse_step(s).ok())
.collect::<VecDeque<Step>>()
}
fn get_key(&self) -> String {
match self {
Step::String(s) => s.clone(),
Step::Number(i) => i.to_string(),
Step::Trailer => "Trailer".to_string(),
Step::Page(n) => format!("Page{}", n),
Step::Data => "Data".into(),
}
}
}
fn resolve_xref(id: u64, file: &CosFile) -> Result<Primitive, String> {
let plain_ref = PlainRef { id, gen: 0 };
resolve_p_ref(plain_ref, file)
}
fn resolve_p_ref(plain_ref: PlainRef, file: &CosFile) -> Result<Primitive, String> {
file.resolver()
.resolve(plain_ref)
.map_err(|e| e.to_string())
}
fn get_file_from_state<'a>(
id: &str,
session_guard: &'a MutexGuard<Session>,
) -> Result<&'a SessionFile, String> {
session_guard
.files
.get(id)
.ok_or_else(|| format!("File with id {} does not exist!", id))
}
fn append_path(key: String, path: &Vec<PathTrace>) -> Vec<PathTrace> {
let mut new_path = path.clone();
let last_jump = new_path.last().unwrap().last_jump.clone();
new_path.push(PathTrace::new(key, last_jump));
new_path
}
impl PrimitiveModel {
fn from_primitive(key: String, primitive: &Primitive, path: Vec<PathTrace>) -> PrimitiveModel {
let value: String = match primitive {
Primitive::Null => "Null".to_string(),
Primitive::Integer(i) => i.to_string(),
Primitive::Number(f) => f.to_string(),
Primitive::Boolean(b) => b.to_string(),
Primitive::String(s) => s.to_string_lossy(),
Primitive::Stream(_) => "-".to_string(),
Primitive::Dictionary(_) => "-".to_string(),
Primitive::Array(arr) => PrimitiveModel::format_arr_content(arr),
Primitive::Reference(pref) => {
format!("Obj Nr: {} Gen Nr: {}", pref.id, pref.gen)
}
Primitive::Name(name) => name.clone().as_str().to_string(),
};
let sub_type: String = match primitive {
Primitive::Dictionary(d) => d
.get("Type")
.and_then(|value| match value {
Primitive::Name(name) => Some(name.clone().as_str().to_string()),
_ => None,
})
.unwrap_or(String::from("-")),
_ => String::from("-"),
};
PrimitiveModel {
key: key,
ptype: primitive.get_debug_name().into(),
sub_type: sub_type,
value: value,
children: Vec::new(),
trace: path,
expanded: false,
}
}
fn format_arr_content(arr: &Vec<Primitive>) -> String {
if arr.len() == 0 {
return "[]".to_string();
}
let mut result = String::from("[");
let contents = if arr.len() > 4 { &arr[0..4] } else { &arr[..] };
for i in 0..contents.len() {
let prim = contents.get(i).unwrap();
result.push_str(&match prim {
Primitive::Integer(i) => format!("{}", i),
Primitive::Number(n) => format!("{}", n),
Primitive::Boolean(b) => format!("{}", b),
Primitive::String(s) => s.to_string().unwrap_or(String::from("-")),
Primitive::Name(n) => n.as_str().to_string(),
_ => prim.get_debug_name().to_string(),
});
if i != contents.len() - 1 {
result.push_str(", ");
}
}
if arr.len() > 4 {
result.push_str(",...");
}
result.push_str("]");
result
}
fn from_primitive_with_children(
primitive: &Primitive,
trace: Vec<PathTrace>,
) -> PrimitiveModel {
let mut model = PrimitiveModel::from_primitive(
trace.last().unwrap().key.clone(),
primitive,
trace.clone(),
);
model.add_children(primitive, &trace);
model
}
fn add_children(&mut self, primitive: &Primitive, path: &Vec<PathTrace>) {
self.expanded = true;
match primitive {
Primitive::Dictionary(dict) => dict.iter().for_each(|(name, value)| {
self.add_child(
name.clone().as_str().to_string(),
value,
append_path(name.clone().as_str().to_string(), &path),
);
}),
Primitive::Array(arr) => arr.iter().enumerate().for_each(|(i, obj)| {
self.add_child(i.to_string(), obj, append_path(i.to_string(), &path));
}),
Primitive::Stream(stream) => {
stream.info.iter().for_each(|(name, value)| {
self.add_child(
name.clone().as_str().to_string(),
value,
append_path(name.clone().as_str().to_string(), &path),
);
});
self.children.push(PrimitiveModel {
key: "Data".to_string(),
ptype: "Stream Data".to_string(),
sub_type: "-".to_string(),
value: "".to_string(),
children: vec![],
trace: append_path("Data".to_string(), &path),
expanded: false,
});
}
_ => (),
};
}
fn add_child(
&mut self,
key: String,
child: &Primitive,
path: Vec<PathTrace>,
) -> &PrimitiveModel {
let child_model = Self::from_primitive(key, child, path);
self.children.push(child_model);
&self.children[self.children.len() - 1]
}
fn get_child(&mut self, key: String) -> Option<&mut PrimitiveModel> {
self.children.iter_mut().find(|child| child.key == key)
}
fn is_container(&self) -> bool {
self.ptype == "Dictionary"
|| self.ptype == "Array"
|| self.ptype == "Stream"
|| self.ptype == "Reference"
}
fn drain_children(&mut self) -> Vec<PrimitiveModel> {
self.children.drain(..).collect()
}
}
impl PrimitiveTreeView {
fn from_primitive(depth: usize, primitive: PrimitiveModel) -> PrimitiveTreeView {
let is_container = primitive.is_container();
PrimitiveTreeView {
depth: depth,
key: primitive.key,
ptype: primitive.ptype,
sub_type: primitive.sub_type,
value: primitive.value,
container: is_container,
expanded: primitive.expanded,
path: primitive.trace,
active: true,
}
}
fn flatten(depth: usize, mut primitive: PrimitiveModel) -> Vec<PrimitiveTreeView> {
let mut views: Vec<PrimitiveTreeView> = Vec::new();
let children = primitive.drain_children();
views.push(PrimitiveTreeView::from_primitive(depth, primitive));
children.into_iter().for_each(|child| {
views.extend(PrimitiveTreeView::flatten(depth + 1, child.clone()));
});
views
}
}
#[tauri::command]
fn get_xref_table(id: &str, session: State<Mutex<Session>>) -> Result<XRefTableModel, String> {
let session_guard = session
.lock()
.map_err(|_| "Failed to lock the session mutex.".to_string())?;
let file = get_file_from_state(id, &session_guard)?;
get_xref_table_model_with_file(&file.cos_file)
}
fn get_xref_table_model_with_file(file: &CosFile) -> Result<XRefTableModel, String> {
let resolver = file.resolver();
let x_ref_table = file.get_xref();
let mut models: Vec<XRefEntryModel> = Vec::new();
for (i, x_ref) in x_ref_table.iter_real().enumerate() {
models.push(match x_ref {
XRef::Raw { pos, gen_nr } => {
let prim: Primitive = resolver
.resolve(PlainRef {
id: i as u64,
gen: *gen_nr,
})
.unwrap();
XRefEntryModel {
obj_num: i as u64,
gen_num: *gen_nr,
obj_type: prim.get_debug_name().to_string().into(),
offset: *pos as u64,
}
}
XRef::Stream { stream_id, index } => XRefEntryModel {
obj_num: i as u64,
gen_num: 0,
obj_type: "Stream".into(),
offset: *index as u64,
},
XRef::Free {
next_obj_nr,
gen_nr,
} => XRefEntryModel {
obj_num: i as u64,
gen_num: *gen_nr as u64,
obj_type: "Free".into(),
offset: *next_obj_nr as u64,
},
XRef::Promised => XRefEntryModel {
obj_num: i as u64,
gen_num: 0,
obj_type: "Promised".into(),
offset: 0,
},
XRef::Invalid => XRefEntryModel {
obj_num: i as u64,
gen_num: 0,
obj_type: "Invalid".into(),
offset: 0,
},
});
}
Ok(XRefTableModel {
size: x_ref_table.len(),
entries: models,
})
}
struct Session {
files: HashMap<String, SessionFile>,
}
struct SessionFile {
pdf_file: PdfFile,
cos_file: CosFile,
}
unsafe impl Send for SessionFile {}
unsafe impl Sync for SessionFile {}
impl Session {
fn load() -> Session {
Session {
files: HashMap::new(),
}
}
fn handle_upload(&mut self, pdf_file: &PdfFile, cos_file: CosFile) {
self.files.insert(
pdf_file.id.clone(),
SessionFile {
pdf_file: pdf_file.clone(),
cos_file: cos_file,
},
);
}
fn handle_close(&mut self, id: &str) {
self.files.remove(id);
}
}
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
tauri::Builder::default()
.plugin(tauri_plugin_dialog::init())
.plugin(tauri_plugin_fs::init())
.plugin(tauri_plugin_opener::init())
.setup(|app| {
app.manage(Mutex::new(Session::load()));
Ok(())
})
.invoke_handler(tauri::generate_handler![
upload,
get_all_files,
get_all_file_ids,
get_file_by_id,
close_file,
get_prim_by_path,
get_prim_tree_by_path,
get_xref_table,
get_contents,
get_stream_data
])
.run(tauri::generate_context!())
.expect("error while running tauri application");
}