diff --git a/Dockerfile b/Dockerfile
index eb9fa90dd..a13fa7b3f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,7 +5,7 @@ MAINTAINER Pit Kleyersburg <pitkley@googlemail.com>
 RUN apt-get update \
     && apt-get install -y --no-install-recommends \
         sudo \
-        tesseract-ocr tesseract-ocr-eng imagemagick ghostscript \
+        tesseract-ocr tesseract-ocr-eng imagemagick ghostscript unpaper \
     && rm -rf /var/lib/apt/lists/*
 
 # Install python dependencies
diff --git a/docs/requirements.rst b/docs/requirements.rst
index ee287d835..36bc234c0 100644
--- a/docs/requirements.rst
+++ b/docs/requirements.rst
@@ -10,11 +10,13 @@ should work) that has the following software installed on it:
 * `GNU Privacy Guard`_
 * `Tesseract`_
 * `Imagemagick`_
+* `unpaper`_
 
 .. _Python3: https://python.org/
 .. _GNU Privacy Guard: https://gnupg.org
 .. _Tesseract: https://github.com/tesseract-ocr
 .. _Imagemagick: http://imagemagick.org/
+.. _unpaper: https://www.flameeyes.eu/projects/unpaper
 
 Notably, you should confirm how you access your Python3 installation.  Many
 Linux distributions will install Python3 in parallel to Python2, using the names
diff --git a/scripts/vagrant-provision b/scripts/vagrant-provision
index 0a09058e4..940bf476c 100644
--- a/scripts/vagrant-provision
+++ b/scripts/vagrant-provision
@@ -5,7 +5,7 @@ apt-get update
 apt-get build-dep -y python-imaging
 apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
 apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
-apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick
+apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
 
 # Python dependencies
 pip3 install -r /opt/paperless/requirements.txt
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 5cfc20852..fbdbbc276 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -39,8 +39,8 @@ class ConsumerError(Exception):
 class Consumer(object):
     """
     Loop over every file found in CONSUMPTION_DIR and:
-      1. Convert it to a greyscale png
-      2. Use tesseract on the png
+      1. Convert it to a greyscale pnm
+      2. Use tesseract on the pnm
       3. Encrypt and store the document in the MEDIA_ROOT
       4. Store the OCR'd text in the database
       5. Delete the document and image(s)
@@ -48,6 +48,7 @@ class Consumer(object):
 
     SCRATCH = settings.SCRATCH_DIR
     CONVERT = settings.CONVERT_BINARY
+    UNPAPER = settings.UNPAPER_BINARY
     CONSUME = settings.CONSUMPTION_DIR
     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
 
@@ -118,11 +119,11 @@ class Consumer(object):
             self.log("info", "Consuming {}".format(doc))
 
             tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
-            pngs = self._get_greyscale(tempdir, doc)
+            imgs = self._get_greyscale(tempdir, doc)
             thumbnail = self._get_thumbnail(tempdir, doc)
 
             try:
-                text = self._get_ocr(pngs)
+                text = self._get_ocr(imgs)
                 self._store(text, doc, thumbnail)
             except OCRError as e:
                 self._ignore.append(doc)
@@ -140,19 +141,30 @@ class Consumer(object):
 
         self.log("info", "Generating greyscale image from {}".format(doc))
 
-        png = os.path.join(tempdir, "convert-%04d.jpg")
-
+        # Convert PDF to multiple PNMs
+        pnm = os.path.join(tempdir, "convert-%04d.pnm")
         subprocess.Popen((
             self.CONVERT, "-density", "300", "-depth", "8",
-            "-type", "grayscale", doc, png
+            "-type", "grayscale", doc, pnm
         )).wait()
 
-        pngs = []
+        # Get a list of converted images
+        pnms = []
         for f in os.listdir(tempdir):
-            if f.startswith("convert"):
-                pngs.append(os.path.join(tempdir, f))
+            if f.endswith(".pnm"):
+                pnms.append(os.path.join(tempdir, f))
 
-        return sorted(filter(lambda __: os.path.isfile(__), pngs))
+        # Run unpaper in parallel on converted images
+        with Pool(processes=self.THREADS) as pool:
+            pool.map(run_unpaper, itertools.product([self.UNPAPER], pnms))
+
+        # Return list of converted images, processed with unpaper
+        pnms = []
+        for f in os.listdir(tempdir):
+            if f.endswith(".unpaper.pnm"):
+                pnms.append(os.path.join(tempdir, f))
+
+        return sorted(filter(lambda __: os.path.isfile(__), pnms))
 
     def _get_thumbnail(self, tempdir, doc):
         """
@@ -179,21 +191,21 @@ class Consumer(object):
         except Exception as e:
             self.log("warning", "Language detection error: {}".format(e))
 
-    def _get_ocr(self, pngs):
+    def _get_ocr(self, imgs):
         """
         Attempts to do the best job possible OCR'ing the document based on
         simple language detection trial & error.
         """
 
-        if not pngs:
+        if not imgs:
             raise OCRError("No images found")
 
         self.log("info", "OCRing the document")
 
         # Since the division gets rounded down by int, this calculation works
         # for every edge-case, i.e. 1
-        middle = int(len(pngs) / 2)
-        raw_text = self._ocr([pngs[middle]], self.DEFAULT_OCR_LANGUAGE)
+        middle = int(len(imgs) / 2)
+        raw_text = self._ocr([imgs[middle]], self.DEFAULT_OCR_LANGUAGE)
 
         guessed_language = self._guess_language(raw_text)
 
@@ -205,16 +217,16 @@ class Consumer(object):
                     "As FORGIVING_OCR is enabled, we're going to make the "
                     "best with what we have."
                 )
-                raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                 return raw_text
             raise OCRError("Language detection failed")
 
         if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
-            raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
             return raw_text
 
         try:
-            return self._ocr(pngs, ISO639[guessed_language])
+            return self._ocr(imgs, ISO639[guessed_language])
         except pyocr.pyocr.tesseract.TesseractError:
             if settings.FORGIVING_OCR:
                 self.log(
@@ -224,34 +236,34 @@ class Consumer(object):
                         guessed_language
                     )
                 )
-                raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
+                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                 return raw_text
             raise OCRError(
                 "The guessed language is not available in this instance of "
                 "Tesseract."
             )
 
-    def _assemble_ocr_sections(self, pngs, middle, text):
+    def _assemble_ocr_sections(self, imgs, middle, text):
         """
         Given a `middle` value and the text that middle page represents, we OCR
         the remainder of the document and return the whole thing.
         """
-        text = self._ocr(pngs[:middle], self.DEFAULT_OCR_LANGUAGE) + text
-        text += self._ocr(pngs[middle+1:], self.DEFAULT_OCR_LANGUAGE)
+        text = self._ocr(imgs[:middle], self.DEFAULT_OCR_LANGUAGE) + text
+        text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
         return text
 
-    def _ocr(self, pngs, lang):
+    def _ocr(self, imgs, lang):
         """
         Performs a single OCR attempt.
         """
 
-        if not pngs:
+        if not imgs:
             return ""
 
         self.log("info", "Parsing for {}".format(lang))
 
         with Pool(processes=self.THREADS) as pool:
-            r = pool.map(image_to_string, itertools.product(pngs, [lang]))
+            r = pool.map(image_to_string, itertools.product(imgs, [lang]))
             r = " ".join(r)
 
         # Strip out excess white space to allow matching to go smoother
@@ -374,16 +386,9 @@ class Consumer(object):
 
 
 def image_to_string(args):
-    """
-    I have no idea why, but if this function were a method of Consumer, it
-    would explode with:
-
-      `TypeError: cannot serialize '_io.TextIOWrapper' object`.
-    """
-
-    png, lang = args
+    img, lang = args
     ocr = pyocr.get_available_tools()[0]
-    with Image.open(os.path.join(Consumer.SCRATCH, png)) as f:
+    with Image.open(os.path.join(Consumer.SCRATCH, img)) as f:
         if ocr.can_detect_orientation():
             try:
                 orientation = ocr.detect_orientation(f, lang=lang)
@@ -391,3 +396,10 @@ def image_to_string(args):
             except TesseractError:
                 pass
         return ocr.image_to_string(f, lang=lang)
+
+
+def run_unpaper(args):
+    unpaper, pnm = args
+    subprocess.Popen((
+        unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
+    )).wait()
diff --git a/src/paperless/settings.py b/src/paperless/settings.py
index f2fb41941..b7daecaf8 100644
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -189,6 +189,9 @@ GNUPG_HOME = os.getenv("HOME", "/tmp")
 # Convert is part of the ImageMagick package
 CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY")
 
+# Unpaper
+UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
+
 # This will be created if it doesn't exist
 SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")