diff --git a/.dvc/.gitignore b/.dvc/.gitignore
new file mode 100644
index 0000000..528f30c
--- /dev/null
+++ b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
new file mode 100644
index 0000000..cb89cc4
--- /dev/null
+++ b/.dvc/config
@@ -0,0 +1,7 @@
+[core]
+    remote = vector
+    autostage = true
+['remote "vector"']
+    url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
+    port = 22
+
diff --git a/.dvc/plots/confusion.json b/.dvc/plots/confusion.json
new file mode 100644
index 0000000..af1b48d
--- /dev/null
+++ b/.dvc/plots/confusion.json
@@ -0,0 +1,107 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "facet": {
+        "field": "rev",
+        "type": "nominal"
+    },
+    "spec": {
+        "transform": [
+            {
+                "aggregate": [
+                    {
+                        "op": "count",
+                        "as": "xy_count"
+                    }
+                ],
+                "groupby": [
+                    "<DVC_METRIC_Y>",
+                    "<DVC_METRIC_X>"
+                ]
+            },
+            {
+                "impute": "xy_count",
+                "groupby": [
+                    "rev",
+                    "<DVC_METRIC_Y>"
+                ],
+                "key": "<DVC_METRIC_X>",
+                "value": 0
+            },
+            {
+                "impute": "xy_count",
+                "groupby": [
+                    "rev",
+                    "<DVC_METRIC_X>"
+                ],
+                "key": "<DVC_METRIC_Y>",
+                "value": 0
+            },
+            {
+                "joinaggregate": [
+                    {
+                        "op": "max",
+                        "field": "xy_count",
+                        "as": "max_count"
+                    }
+                ],
+                "groupby": []
+            },
+            {
+                "calculate": "datum.xy_count / datum.max_count",
+                "as": "percent_of_max"
+            }
+        ],
+        "encoding": {
+            "x": {
+                "field": "<DVC_METRIC_X>",
+                "type": "nominal",
+                "sort": "ascending",
+                "title": "<DVC_METRIC_X_LABEL>"
+            },
+            "y": {
+                "field": "<DVC_METRIC_Y>",
+                "type": "nominal",
+                "sort": "ascending",
+                "title": "<DVC_METRIC_Y_LABEL>"
+            }
+        },
+        "layer": [
+            {
+                "mark": "rect",
+                "width": 300,
+                "height": 300,
+                "encoding": {
+                    "color": {
+                        "field": "xy_count",
+                        "type": "quantitative",
+                        "title": "",
+                        "scale": {
+                            "domainMin": 0,
+                            "nice": true
+                        }
+                    }
+                }
+            },
+            {
+                "mark": "text",
+                "encoding": {
+                    "text": {
+                        "field": "xy_count",
+                        "type": "quantitative"
+                    },
+                    "color": {
+                        "condition": {
+                            "test": "datum.percent_of_max > 0.5",
+                            "value": "white"
+                        },
+                        "value": "black"
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/.dvc/plots/confusion_normalized.json b/.dvc/plots/confusion_normalized.json
new file mode 100644
index 0000000..1d38849
--- /dev/null
+++ b/.dvc/plots/confusion_normalized.json
@@ -0,0 +1,112 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "facet": {
+        "field": "rev",
+        "type": "nominal"
+    },
+    "spec": {
+        "transform": [
+            {
+                "aggregate": [
+                    {
+                        "op": "count",
+                        "as": "xy_count"
+                    }
+                ],
+                "groupby": [
+                    "<DVC_METRIC_Y>",
+                    "<DVC_METRIC_X>"
+                ]
+            },
+            {
+                "impute": "xy_count",
+                "groupby": [
+                    "rev",
+                    "<DVC_METRIC_Y>"
+                ],
+                "key": "<DVC_METRIC_X>",
+                "value": 0
+            },
+            {
+                "impute": "xy_count",
+                "groupby": [
+                    "rev",
+                    "<DVC_METRIC_X>"
+                ],
+                "key": "<DVC_METRIC_Y>",
+                "value": 0
+            },
+            {
+                "joinaggregate": [
+                    {
+                        "op": "sum",
+                        "field": "xy_count",
+                        "as": "sum_y"
+                    }
+                ],
+                "groupby": [
+                    "<DVC_METRIC_Y>"
+                ]
+            },
+            {
+                "calculate": "datum.xy_count / datum.sum_y",
+                "as": "percent_of_y"
+            }
+        ],
+        "encoding": {
+            "x": {
+                "field": "<DVC_METRIC_X>",
+                "type": "nominal",
+                "sort": "ascending",
+                "title": "<DVC_METRIC_X_LABEL>"
+            },
+            "y": {
+                "field": "<DVC_METRIC_Y>",
+                "type": "nominal",
+                "sort": "ascending",
+                "title": "<DVC_METRIC_Y_LABEL>"
+            }
+        },
+        "layer": [
+            {
+                "mark": "rect",
+                "width": 300,
+                "height": 300,
+                "encoding": {
+                    "color": {
+                        "field": "percent_of_y",
+                        "type": "quantitative",
+                        "title": "",
+                        "scale": {
+                            "domain": [
+                                0,
+                                1
+                            ]
+                        }
+                    }
+                }
+            },
+            {
+                "mark": "text",
+                "encoding": {
+                    "text": {
+                        "field": "percent_of_y",
+                        "type": "quantitative",
+                        "format": ".2f"
+                    },
+                    "color": {
+                        "condition": {
+                            "test": "datum.percent_of_y > 0.5",
+                            "value": "white"
+                        },
+                        "value": "black"
+                    }
+                }
+            }
+        ]
+    }
+}
diff --git a/.dvc/plots/linear.json b/.dvc/plots/linear.json
new file mode 100644
index 0000000..65549f9
--- /dev/null
+++ b/.dvc/plots/linear.json
@@ -0,0 +1,116 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "width": 300,
+    "height": 300,
+    "layer": [
+        {
+            "encoding": {
+                "x": {
+                    "field": "<DVC_METRIC_X>",
+                    "type": "quantitative",
+                    "title": "<DVC_METRIC_X_LABEL>"
+                },
+                "y": {
+                    "field": "<DVC_METRIC_Y>",
+                    "type": "quantitative",
+                    "title": "<DVC_METRIC_Y_LABEL>",
+                    "scale": {
+                        "zero": false
+                    }
+                },
+                "color": {
+                    "field": "rev",
+                    "type": "nominal"
+                }
+            },
+            "layer": [
+                {
+                    "mark": "line"
+                },
+                {
+                    "selection": {
+                        "label": {
+                            "type": "single",
+                            "nearest": true,
+                            "on": "mouseover",
+                            "encodings": [
+                                "x"
+                            ],
+                            "empty": "none",
+                            "clear": "mouseout"
+                        }
+                    },
+                    "mark": "point",
+                    "encoding": {
+                        "opacity": {
+                            "condition": {
+                                "selection": "label",
+                                "value": 1
+                            },
+                            "value": 0
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "transform": [
+                {
+                    "filter": {
+                        "selection": "label"
+                    }
+                }
+            ],
+            "layer": [
+                {
+                    "mark": {
+                        "type": "rule",
+                        "color": "gray"
+                    },
+                    "encoding": {
+                        "x": {
+                            "field": "<DVC_METRIC_X>",
+                            "type": "quantitative"
+                        }
+                    }
+                },
+                {
+                    "encoding": {
+                        "text": {
+                            "type": "quantitative",
+                            "field": "<DVC_METRIC_Y>"
+                        },
+                        "x": {
+                            "field": "<DVC_METRIC_X>",
+                            "type": "quantitative"
+                        },
+                        "y": {
+                            "field": "<DVC_METRIC_Y>",
+                            "type": "quantitative"
+                        }
+                    },
+                    "layer": [
+                        {
+                            "mark": {
+                                "type": "text",
+                                "align": "left",
+                                "dx": 5,
+                                "dy": -5
+                            },
+                            "encoding": {
+                                "color": {
+                                    "type": "nominal",
+                                    "field": "rev"
+                                }
+                            }
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/.dvc/plots/scatter.json b/.dvc/plots/scatter.json
new file mode 100644
index 0000000..9af9304
--- /dev/null
+++ b/.dvc/plots/scatter.json
@@ -0,0 +1,104 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "width": 300,
+    "height": 300,
+    "layer": [
+        {
+            "encoding": {
+                "x": {
+                    "field": "<DVC_METRIC_X>",
+                    "type": "quantitative",
+                    "title": "<DVC_METRIC_X_LABEL>"
+                },
+                "y": {
+                    "field": "<DVC_METRIC_Y>",
+                    "type": "quantitative",
+                    "title": "<DVC_METRIC_Y_LABEL>",
+                    "scale": {
+                        "zero": false
+                    }
+                },
+                "color": {
+                    "field": "rev",
+                    "type": "nominal"
+                }
+            },
+            "layer": [
+                {
+                    "mark": "point"
+                },
+                {
+                    "selection": {
+                        "label": {
+                            "type": "single",
+                            "nearest": true,
+                            "on": "mouseover",
+                            "encodings": [
+                                "x"
+                            ],
+                            "empty": "none",
+                            "clear": "mouseout"
+                        }
+                    },
+                    "mark": "point",
+                    "encoding": {
+                        "opacity": {
+                            "condition": {
+                                "selection": "label",
+                                "value": 1
+                            },
+                            "value": 0
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "transform": [
+                {
+                    "filter": {
+                        "selection": "label"
+                    }
+                }
+            ],
+            "layer": [
+                {
+                    "encoding": {
+                        "text": {
+                            "type": "quantitative",
+                            "field": "<DVC_METRIC_Y>"
+                        },
+                        "x": {
+                            "field": "<DVC_METRIC_X>",
+                            "type": "quantitative"
+                        },
+                        "y": {
+                            "field": "<DVC_METRIC_Y>",
+                            "type": "quantitative"
+                        }
+                    },
+                    "layer": [
+                        {
+                            "mark": {
+                                "type": "text",
+                                "align": "left",
+                                "dx": 5,
+                                "dy": -5
+                            },
+                            "encoding": {
+                                "color": {
+                                    "type": "nominal",
+                                    "field": "rev"
+                                }
+                            }
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/.dvc/plots/simple.json b/.dvc/plots/simple.json
new file mode 100644
index 0000000..9cf71ce
--- /dev/null
+++ b/.dvc/plots/simple.json
@@ -0,0 +1,31 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "width": 300,
+    "height": 300,
+    "mark": {
+        "type": "line"
+    },
+    "encoding": {
+        "x": {
+            "field": "<DVC_METRIC_X>",
+            "type": "quantitative",
+            "title": "<DVC_METRIC_X_LABEL>"
+        },
+        "y": {
+            "field": "<DVC_METRIC_Y>",
+            "type": "quantitative",
+            "title": "<DVC_METRIC_Y_LABEL>",
+            "scale": {
+                "zero": false
+            }
+        },
+        "color": {
+            "field": "rev",
+            "type": "nominal"
+        }
+    }
+}
diff --git a/.dvc/plots/smooth.json b/.dvc/plots/smooth.json
new file mode 100644
index 0000000..d497ce7
--- /dev/null
+++ b/.dvc/plots/smooth.json
@@ -0,0 +1,39 @@
+{
+    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
+    "data": {
+        "values": "<DVC_METRIC_DATA>"
+    },
+    "title": "<DVC_METRIC_TITLE>",
+    "mark": {
+        "type": "line"
+    },
+    "encoding": {
+        "x": {
+            "field": "<DVC_METRIC_X>",
+            "type": "quantitative",
+            "title": "<DVC_METRIC_X_LABEL>"
+        },
+        "y": {
+            "field": "<DVC_METRIC_Y>",
+            "type": "quantitative",
+            "title": "<DVC_METRIC_Y_LABEL>",
+            "scale": {
+                "zero": false
+            }
+        },
+        "color": {
+            "field": "rev",
+            "type": "nominal"
+        }
+    },
+    "transform": [
+        {
+            "loess": "<DVC_METRIC_Y>",
+            "on": "<DVC_METRIC_X>",
+            "groupby": [
+                "rev"
+            ],
+            "bandwidth": 0.3
+        }
+    ]
+}
diff --git a/.dvcignore b/.dvcignore
new file mode 100644
index 0000000..5197305
--- /dev/null
+++ b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/README.md b/README.md
index c66a861..1c0b947 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
-# Table Parsing
+# Vidocp
 
 This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
-previous redactions in PDFs.
+previous redactions in documents.
 
 ## Installation
 
 ```bash
-git clone ssh://git@git.iqser.com:2222/rr/table_parsing.git
-cd table_parsing
+git clone ssh://git@git.iqser.com:2222/rr/vidocp.git
+cd vidocp
 
 python -m venv env
 source env/bin/activate
@@ -18,10 +18,48 @@ pip install -r requirements.txt
 
 ## Usage
 
-```bash
-# Parse tables on second page of a PDF
-python scripts/annotate.py <path to pdf> 1 --type table
+### As an API
 
-# Detect redactions (black filled rectangles) on first page of a PDF
+The module provided functions for the individual tasks that all return some kid of collection of points, depending on
+the specific task. Example for finding the outlines of previous redactions.
+
+```python
+
+from vidocp.redaction_detection import find_redactions
+import pdf2image 
+import numpy as np
+
+pdf_path = ...
+page_index = ...
+
+
+page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
+page = np.array(page)
+
+redaction_contours = find_redactions(page)
+```
+
+
+
+
+### Example outputs from demo script:
+
+
+#### Table parsing
+
+The tables parsing utility detects and segments tables into individual cells.
+```bash
+python scripts/annotate.py <path to pdf> 1 --type table
+```
+
+
+#### Detect redactions
+
+The redaction detection utility detects previous redactions in PDFs (black filled rectangles).
+```bash
 python scripts/annotate.py <path to pdf> 0 --type redaction
 ```
+
+The below image shows the detected redactions with green outlines.
+
+![](data/redaction_detection.png)
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 0000000..09d8485
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1 @@
+/test_pdf.pdf
diff --git a/data/redaction_detection.png b/data/redaction_detection.png
new file mode 100644
index 0000000..6df3f30
Binary files /dev/null and b/data/redaction_detection.png differ
diff --git a/data/test_pdf.pdf.dvc b/data/test_pdf.pdf.dvc
new file mode 100644
index 0000000..4eff9a4
--- /dev/null
+++ b/data/test_pdf.pdf.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: 60840305e4ddb084aea21976b8b7c49e
+  size: 6916053
+  path: test_pdf.pdf
diff --git a/requirements.txt b/requirements.txt
index a3f596f..913a63f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,6 @@ pdf2image~=1.16.0
 matplotlib~=3.5.1
 imutils==0.5.4
 iteration-utilities==0.11.0
+dvc==2.9.3
+dvc[ssh]
+
diff --git a/scripts/annotate.py b/scripts/annotate.py
index 9a8b048..4c6d7b8 100644
--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@@ -1,8 +1,8 @@
 import argparse
 
-from table_parsing.table_parsig import annotate_tables_in_pdf
-from box_detection.redaction_detection import annotate_boxes_in_pdf
-from layout_detection.layout_detection import annotate_layout_in_pdf
+from vidocp.table_parsig import annotate_tables_in_pdf
+from vidocp.redaction_detection import annotate_boxes_in_pdf
+from vidocp.layout_detection import annotate_layout_in_pdf
 
 
 def parse_args():
diff --git a/setup.py b/setup.py
index 0cd4f7f..9fc73a8 100644
--- a/setup.py
+++ b/setup.py
@@ -3,11 +3,11 @@
 from distutils.core import setup
 
 setup(
-    name="table_parsing",
+    name="vidocp",
     version="0.0.1",
     description="",
     author="",
     author_email="",
     url="",
-    packages=["table_parsing"],
+    packages=["vidocp"],
 )
diff --git a/table_parsing/__init__.py b/table_parsing/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/box_detection/__init__.py b/vidocp/__init__.py
similarity index 100%
rename from box_detection/__init__.py
rename to vidocp/__init__.py
diff --git a/layout_detection/layout_detection.py b/vidocp/layout_detection.py
similarity index 100%
rename from layout_detection/layout_detection.py
rename to vidocp/layout_detection.py
diff --git a/box_detection/redaction_detection.py b/vidocp/redaction_detection.py
similarity index 93%
rename from box_detection/redaction_detection.py
rename to vidocp/redaction_detection.py
index 6b1c390..b071c93 100644
--- a/box_detection/redaction_detection.py
+++ b/vidocp/redaction_detection.py
@@ -42,9 +42,9 @@ def find_redactions(image: np.array, min_normalized_area=200000):
     return contours
 
 
-def annotate_poly(image, conts):
-    for cont in conts:
-        cv2.drawContours(image, cont, -1, (0, 255, 0), 2)
+def annotate_poly(image, contours):
+    for cont in contours:
+        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
 
     return image
 
diff --git a/table_parsing/table_parsig.py b/vidocp/table_parsig.py
similarity index 100%
rename from table_parsing/table_parsig.py
rename to vidocp/table_parsig.py