From 8b9621e798ed60d380eac5d54b8fe56b39324904 Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue, 8 Mar 2022 10:01:25 +0100
Subject: [PATCH] first fully working containerization; still needs environment
 variables; review request data format

---
 Dockerfile                      |  1 +
 Dockerfile_base                 |  7 ++--
 requirements.txt                |  2 +-
 scripts/client_mock.py          | 35 +++++++++++++++++++
 src/run_service.py              | 60 +++++++++++++++++++++------------
 vidocp/config.py                |  5 ++-
 vidocp/figure_detection.py      |  7 ++--
 vidocp/layout_detection.py      |  9 ++---
 vidocp/layout_parsing.py        | 22 +++++++-----
 vidocp/redaction_detection.py   | 12 +++++--
 vidocp/table_parsig.py          | 16 +++++----
 vidocp/table_parsing.py         | 27 +++++++++------
 vidocp/table_parsing_2.py       |  5 +--
 vidocp/utils/deskew.py          | 10 +++---
 vidocp/utils/detection.py       |  7 ++--
 vidocp/utils/post_processing.py |  2 +-
 vidocp/utils/preprocessing.py   | 23 +++++++++++++
 vidocp/utils/text.py            |  7 ++--
 vidocp/utils/utils.py           |  6 ++++
 19 files changed, 191 insertions(+), 72 deletions(-)
 create mode 100644 vidocp/utils/preprocessing.py

diff --git a/Dockerfile b/Dockerfile
index 076315f..98abde6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,6 +8,7 @@ WORKDIR /app/service
 COPY ./src ./src
 COPY vidocp ./vidocp
 
+RUN python3 -m pip install --upgrade pip
 RUN python3 -m pip install -e .
 
 WORKDIR /app/service
diff --git a/Dockerfile_base b/Dockerfile_base
index 18a255b..b564b73 100644
--- a/Dockerfile_base
+++ b/Dockerfile_base
@@ -12,7 +12,6 @@ WORKDIR /app/service
 COPY . ./
 
 # Install dependencies.
-RUN apt-get update && apt-get install -y python3-opencv
 RUN python3 -m pip install -r requirements.txt
 
 # Make a new container and copy all relevant files over to filter out temporary files
@@ -23,4 +22,8 @@ WORKDIR /app/
 COPY --from=builder1  /app .
 ENV PATH="/app/venv/bin:$PATH"
 
-WORKDIR /app/service
\ No newline at end of file
+WORKDIR /app/service
+
+RUN apt update
+#RUN apt install python3-opencv-headless 
+RUN apt install poppler-utils --yes
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index bf6d0be..2d95184 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-opencv-python~=4.5.5.62
+opencv-python-headless~=4.5.5.62
 numpy~=1.22.1
 pdf2image~=1.16.0
 matplotlib~=3.5.1
diff --git a/scripts/client_mock.py b/scripts/client_mock.py
index e69de29..d1e258d 100644
--- a/scripts/client_mock.py
+++ b/scripts/client_mock.py
@@ -0,0 +1,35 @@
+import argparse
+import json
+import requests
+
+from vidocp.utils.preprocessing import open_pdf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pdf_path", required=True, help="path to PDF file")
+    parser.add_argument("--first_page", type=int, required=True, help="path to PDF file")
+    parser.add_argument("--last_page", type=int, required=False, default=None, help="path to PDF file")
+    args = parser.parse_args()
+
+    return args
+
+
+def main(args):
+    
+    #data = open_pdf(args.pdf_path, args.first_page, args.last_page)
+    # params = json.dumps({
+    #     "pdf_path": "a",#args.pdf_path, 
+    #     "first_page": 4,#args.first_page, 
+    #     "last_page": 6#args.last_page
+    # })
+    response = requests.post("http://127.0.0.1:5000", data=open(args.pdf_path, "rb"))#, json=params)
+    response.raise_for_status()
+    predictions = response.json()
+
+    print(json.dumps(predictions, indent=2))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/src/run_service.py b/src/run_service.py
index 5d0d9be..0cf606f 100644
--- a/src/run_service.py
+++ b/src/run_service.py
@@ -4,12 +4,14 @@ import logging
 from flask import Flask, request, jsonify
 from waitress import serve
 
-from vidocp.utils import preprocess #TODO
+from vidocp.utils import npconvert
+from vidocp.utils.preprocessing import preprocess_pdf_image #TODO
 from vidocp.table_parsing import parse_table#, detect_tables_in_pdf
 from vidocp.redaction_detection import find_redactions#, detect_redactions_in_pdf
 from vidocp.layout_parsing import parse_layout#, detect_layout_in_pdf #TODO
 from vidocp.figure_detection import detect_figures#, detect_figures_in_pdf #TODO
 from vidocp.utils.logging import logger
+from vidocp.utils.preprocessing import open_pdf
 from vidocp.config import CONFIG
 
 
@@ -18,18 +20,18 @@ def suppress_user_warnings():
     warnings.filterwarnings("ignore")
 
 
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--warnings", action="store_true", default=False)
-    args = parser.parse_args()
+# def parse_args():
+#     parser = argparse.ArgumentParser()
+#     parser.add_argument("--warnings", action="store_true", default=False)
+#     args = parser.parse_args()
 
-    return args
+#     return args
 
 
-def main(args):
+def main():
 
-    if not args.warnings:
-        suppress_user_warnings()
+    #if not args.warnings:
+    #    suppress_user_warnings()
 
     run_server()
 
@@ -41,9 +43,16 @@ def run_server():
     def predict_request():
         def inner():
             data = request.data
+            #print(type(request))
+            #print(dir(request))
+            params = request.json
+            #print("params:", params)
             logger.info(f"<3 Received data.")
+            print("data type:", type(data))
+            #print("json type:", type(params))
             logger.info(f"Processing data. <3")
-            predictions = make_predictions(data)
+            pdf_data = open_pdf(data)
+            predictions = make_predictions(pdf_data)
             return jsonify({"result": predictions})
         try:
             return inner()
@@ -60,22 +69,31 @@ def run_server():
         return jsonify(response)
 
     #predictor = initialize_predictor()
-    #logger.info("<3 Predictor ready.")
+    logger.info("<3 Annotator ready.")
 
     mode = CONFIG.webserver.mode
     if mode == "development":
         app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
     elif mode == "production":
         serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
+        logging.info("Production.")
 
 
-def make_predictions(pdf_data, page_index):
-    pdf = preprocess(pdf_data[page_index])
-    tables = parse_table(pdf)
-    redactions = find_redactions(pdf)
-    layout = parse_layout(pdf)
-    figure = detect_figures(pdf)
-    return jsonify({"tables": tables,
-                    "redactions": redactions,
-                    "layout": layout,
-                    "figure": figure})
+def make_predictions(pdf_data):
+    output = {}
+    pdf = open_pdf(pdf_data)
+    for i, page in enumerate(pdf):
+        page = preprocess_pdf_image(page)
+        tables = json.dumps(list(parse_table(page)), default=npconvert) #list() for consistency; not strictly necessary
+        redactions = json.dumps(list(find_redactions(page)), default=npconvert)
+        layout = json.dumps(list(parse_layout(page)), default=npconvert)
+        figure = json.dumps(list(detect_figures(page)), default=npconvert)
+        output.update({i: {"tables": tables,
+                           "redactions": redactions,
+                           "layout": layout,
+                           "figure": figure}})
+    return output
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vidocp/config.py b/vidocp/config.py
index eaf36ce..b2b7b84 100644
--- a/vidocp/config.py
+++ b/vidocp/config.py
@@ -33,4 +33,7 @@ class Config:
             return _get_item_and_maybe_make_dotindexable(self.__config, item)
 
     def __getitem__(self, item):
-        return self.__getattr__(item)
\ No newline at end of file
+        return self.__getattr__(item)
+
+
+CONFIG = Config(CONFIG_FILE)
\ No newline at end of file
diff --git a/vidocp/figure_detection.py b/vidocp/figure_detection.py
index 27a8eb2..313ddef 100644
--- a/vidocp/figure_detection.py
+++ b/vidocp/figure_detection.py
@@ -28,7 +28,7 @@ def detect_figures(image: np.array):
     return rects
 
 
-def detect_figures_in_pdf(pdf_path, page_index=1):
+def detect_figures_in_pdf(pdf_path, page_index=1, show=True):
 
     page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
     page = np.array(page)
@@ -36,4 +36,7 @@ def detect_figures_in_pdf(pdf_path, page_index=1):
     redaction_contours = detect_figures(page)
     page = draw_rectangles(page, redaction_contours)
 
-    show_mpl(page)
+    if show:
+        show_mpl(page)
+    else:
+        return page
diff --git a/vidocp/layout_detection.py b/vidocp/layout_detection.py
index 1d49684..2014f90 100644
--- a/vidocp/layout_detection.py
+++ b/vidocp/layout_detection.py
@@ -7,10 +7,11 @@ from matplotlib import pyplot as plt
 
 def find_layout_boxes(image: np.array):
 
-    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blurred = cv2.GaussianBlur(gray_scale, (5, 5), 1)
-    thresh = cv2.threshold(blurred, 253, 255, cv2.THRESH_BINARY)[1]
-    img_bin = ~thresh
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    image = cv2.GaussianBlur(image, (5, 5), 1)
+    image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY)[1]
+    img_bin = ~image
 
     line_min_width = 10
     kernel_h = np.ones((10, line_min_width), np.uint8)
diff --git a/vidocp/layout_parsing.py b/vidocp/layout_parsing.py
index b5f1c51..b0691c2 100644
--- a/vidocp/layout_parsing.py
+++ b/vidocp/layout_parsing.py
@@ -31,10 +31,12 @@ def find_segments(image):
 def parse_layout(image: np.array):
 
     image = image.copy()
-
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blur = cv2.GaussianBlur(gray, (7, 7), 0)
-    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    image_ = image.copy()
+    
+    if len(image_.shape) > 2:
+        image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)
+    image_ = cv2.GaussianBlur(image_, (7, 7), 0)
+    thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
 
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
     dilate = cv2.dilate(thresh, kernel, iterations=4)
@@ -50,7 +52,8 @@ def parse_layout(image: np.array):
     _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
     image = ~image
 
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     rects = find_segments(image)
     # <- End of meta detection
 
@@ -60,12 +63,15 @@ def parse_layout(image: np.array):
     return rects
 
 
-def annotate_layout_in_pdf(pdf_path, page_index=1):
+def annotate_layout_in_pdf(pdf_path, page_index=1, show=False):
 
     page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
     page = np.array(page)
 
     rects = parse_layout(page)
     page = draw_rectangles(page, rects)
-
-    show_mpl(page)
+    
+    if show:
+        show_mpl(page)
+    else:
+        return page
diff --git a/vidocp/redaction_detection.py b/vidocp/redaction_detection.py
index 3362dc6..588be2b 100644
--- a/vidocp/redaction_detection.py
+++ b/vidocp/redaction_detection.py
@@ -18,7 +18,10 @@ def find_redactions(image: np.array, min_normalized_area=200000):
 
     min_normalized_area /= 200  # Assumes 200 DPI PDF -> image conversion resolution
 
-    gray = ~cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    if len(image.shape) > 2:
+        gray = ~cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = ~image
     blurred = cv2.GaussianBlur(gray, (5, 5), 1)
     thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1]
 
@@ -30,7 +33,7 @@ def find_redactions(image: np.array, min_normalized_area=200000):
     return contours
 
 
-def annotate_redactions_in_pdf(pdf_path, page_index=1):
+def annotate_redactions_in_pdf(pdf_path, page_index=1, show=True):
 
     page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
     page = np.array(page)
@@ -38,4 +41,7 @@ def annotate_redactions_in_pdf(pdf_path, page_index=1):
     redaction_contours = find_redactions(page)
     page = draw_contours(page, redaction_contours)
 
-    show_mpl(page)
+    if show:
+        show_mpl(page)
+    else:
+        return page
diff --git a/vidocp/table_parsig.py b/vidocp/table_parsig.py
index 099830e..2fe7c35 100644
--- a/vidocp/table_parsig.py
+++ b/vidocp/table_parsig.py
@@ -8,12 +8,13 @@ from matplotlib import pyplot as plt
 
 
 def parse(image: np.array):
-    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     #plt.imshow(gray_scale)
-    blurred = cv2.GaussianBlur(gray_scale, (7, 7), 2)  #5 5 1
-    thresh = cv2.threshold(blurred, 251, 255, cv2.THRESH_BINARY)[1]
+    image = cv2.GaussianBlur(image, (7, 7), 2)  #5 5 1
+    image = cv2.threshold(image, 251, 255, cv2.THRESH_BINARY)[1]
     #plt.imshow(thresh)
-    img_bin = ~thresh
+    img_bin = ~image
 
     line_min_width = 7
     kernel_h = np.ones((10, line_min_width), np.uint8)
@@ -37,9 +38,10 @@ def parse_tables(image: np.array, rects: list):
     for rect in rects:
         (x,y,w,h) = rect
         region_of_interest = image[x:x+w, y:y+h]
-        gray = cv2.cvtColor(region_of_interest, cv2.COLOR_BGR2GRAY)
-        thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
-        img_bin = ~thresh
+        if len(region_of_interest.shape) > 2:
+            region_of_interest = cv2.cvtColor(region_of_interest, cv2.COLOR_BGR2GRAY)
+        region_of_interest = cv2.threshold(region_of_interest, 200, 255, cv2.THRESH_BINARY)[1]
+        img_bin = ~region_of_interest
 
         line_min_width = 5
         kernel_h = np.ones((1, line_min_width), np.uint8)
diff --git a/vidocp/table_parsing.py b/vidocp/table_parsing.py
index f6801ca..97df384 100644
--- a/vidocp/table_parsing.py
+++ b/vidocp/table_parsing.py
@@ -26,14 +26,15 @@ def add_external_contours(image, img):
 
 
 
-def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
+def isolate_vertical_and_horizontal_components(img_bin, bounding_rects, show=False):
     line_min_width = 48
     kernel_h = np.ones((1, line_min_width), np.uint8)
     kernel_v = np.ones((line_min_width, 1), np.uint8)
 
     img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
     img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
-    show_mpl(img_bin_h | img_bin_v)
+    if show:
+        show_mpl(img_bin_h | img_bin_v)
 
     kernel_h = np.ones((1, 30), np.uint8)
     kernel_v = np.ones((30, 1), np.uint8)
@@ -46,7 +47,8 @@ def isolate_vertical_and_horizontal_components(img_bin, bounding_rects):
     img_bin_v = apply_motion_blur(img_bin_v, 80, 90)
 
     img_bin_final = img_bin_h | img_bin_v
-    show_mpl(img_bin_final)
+    if show:
+        show_mpl(img_bin_final)
     # changed threshold from 110 to 120 to minimize cell splitting
     th1, img_bin_final = cv2.threshold(img_bin_final, 120, 255, cv2.THRESH_BINARY)
     img_bin_final = cv2.dilate(img_bin_final, np.ones((1, 1), np.uint8), iterations=1)
@@ -118,15 +120,17 @@ def find_table_layout_boxes(image: np.array):
     return table_boxes
 
 
-def parse_table(image: np.array):
+def parse_table(image: np.array, show=False):
     def is_large_enough(stat):
         x1, y1, w, h, area = stat
         return area > 2000 and w > 35 and h > 25
 
-    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
-    th1, img_bin = cv2.threshold(gray_scale, 195, 255, cv2.THRESH_BINARY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
+    th1, img_bin = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
     img_bin = ~img_bin
-    show_mpl(img_bin)
+    if show:
+        show_mpl(img_bin)
 
     table_layout_boxes = find_table_layout_boxes(image)
     img_bin = isolate_vertical_and_horizontal_components(img_bin, table_layout_boxes)
@@ -143,7 +147,7 @@ def parse_table(image: np.array):
     return rects
 
 
-def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False):
+def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=True):
     page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
     page = np.array(page)
     if deskew:
@@ -153,5 +157,8 @@ def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False):
     page = draw_rectangles(page, stats, annotate=True)
     # if stats:
     #     page = draw_rectangles(page, stats, annotate=True)
-
-    show_mpl(page)
+    
+    if show:
+        show_mpl(page)
+    else:
+        return page
diff --git a/vidocp/table_parsing_2.py b/vidocp/table_parsing_2.py
index 8b035bf..d8f58c8 100644
--- a/vidocp/table_parsing_2.py
+++ b/vidocp/table_parsing_2.py
@@ -48,8 +48,9 @@ def annotate_image(image, stats):
 
 def parse_table(image: np.array):
 
-    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    th1, img_bin = cv2.threshold(gray_scale, 150, 255, cv2.THRESH_BINARY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    th1, img_bin = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY)
     img_bin = ~img_bin
 
     img_bin = isolate_vertical_and_horizontal_components(img_bin)
diff --git a/vidocp/utils/deskew.py b/vidocp/utils/deskew.py
index 4770bd9..727ccec 100644
--- a/vidocp/utils/deskew.py
+++ b/vidocp/utils/deskew.py
@@ -8,9 +8,10 @@ def detect_angle_from_lines(im: np.array, max_skew_deg=10, min_skew_deg=0.1, min
     min_skew_rad = np.deg2rad(min_skew_deg)
     width = im.shape[1]
 
-    im_gs = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
-    im_gs = cv2.fastNlMeansDenoising(im_gs, h=3)
-    im_bw = cv2.threshold(im_gs, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
+    if len(im.shape) > 2:
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+    im = cv2.fastNlMeansDenoising(im, h=3)
+    im_bw = cv2.threshold(im, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
 
     lines = cv2.HoughLinesP(im_bw, 1, np.pi / 180, 200, minLineLength=width / 12, maxLineGap=width / 150)
 
@@ -54,7 +55,8 @@ def deskew_linebased(image: np.array, verbose=False) -> np.array:
 
 def deskew_histbased(page: np.array, preprocess=True, max_abs_angle=1.5, delta=0.15, mode="nearest", verbose=False):
     if preprocess:
-        page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
+        if len(page.shape) > 2:
+            page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
         page = cv2.fastNlMeansDenoising(page, h=3)
     w, h = page.shape
 
diff --git a/vidocp/utils/detection.py b/vidocp/utils/detection.py
index e5d8266..2df75a2 100644
--- a/vidocp/utils/detection.py
+++ b/vidocp/utils/detection.py
@@ -8,12 +8,13 @@ def detect_large_coherent_structures(image: np.array):
     References:
          https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
     """
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
-    thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY)[1]
+    image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY)[1]
 
     dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
-    dilate = cv2.dilate(~thresh, dilate_kernel, iterations=4)
+    dilate = cv2.dilate(~image, dilate_kernel, iterations=4)
 
     close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
     close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
diff --git a/vidocp/utils/post_processing.py b/vidocp/utils/post_processing.py
index 86bd37f..a85b4dc 100644
--- a/vidocp/utils/post_processing.py
+++ b/vidocp/utils/post_processing.py
@@ -130,7 +130,7 @@ def xywh_to_vecs(rect):
     x1, y1, w, h = rect
     x2 = x1 + w
     y2 = y1 + h
-    return Rectangle(x1, y1, x2, y2)
+    return (x1, y1), (x2, y2)
 
 
 def vec_rect_to_xywh(rect):
diff --git a/vidocp/utils/preprocessing.py b/vidocp/utils/preprocessing.py
new file mode 100644
index 0000000..2aee52d
--- /dev/null
+++ b/vidocp/utils/preprocessing.py
@@ -0,0 +1,23 @@
+from numpy import array
+import pdf2image
+import cv2
+
+
+def open_pdf(pdf, first_page=0, last_page=None):
+    first_page += 1
+    last_page = None if last_page is None else last_page + 1
+    if type(pdf) == str:
+        pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page)
+    elif type(pdf) == bytes:
+        pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page)
+    elif type(pdf) == list:
+        return pdf
+    pages = [array(p) for p in pages]
+    return pages
+
+
+def preprocess_pdf_image(page):
+    if len(page.shape) > 2:
+        page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
+    page = cv2.fastNlMeansDenoising(page, h=3)
+    return page
\ No newline at end of file
diff --git a/vidocp/utils/text.py b/vidocp/utils/text.py
index 4189005..acfaa48 100644
--- a/vidocp/utils/text.py
+++ b/vidocp/utils/text.py
@@ -40,12 +40,13 @@ def find_primary_text_regions(image):
 
     image = image.copy()
 
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    if len(image.shape) > 2:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
-    thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
 
     close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3))
-    close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, close_kernel, iterations=1)
+    close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1)
 
     dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
     dilate = cv2.dilate(close, dilate_kernel, iterations=1)
diff --git a/vidocp/utils/utils.py b/vidocp/utils/utils.py
index 18c8eb2..72dd99f 100644
--- a/vidocp/utils/utils.py
+++ b/vidocp/utils/utils.py
@@ -1,3 +1,4 @@
+from numpy import generic
 import cv2
 
 
@@ -10,3 +11,8 @@ def copy_and_normalize_channels(image):
         pass
 
     return image
+
+
+def npconvert(ob):
+    if isinstance(ob, generic): return ob.item()  
+    raise TypeError
\ No newline at end of file