From 52c5aafb3fe3f9debc8d9f1c4f9f8318c820e38a Mon Sep 17 00:00:00 2001
From: Brian Martin <bmartin5692@gmail.com>
Date: Fri, 13 May 2016 22:47:40 -0400
Subject: [PATCH 1/3] Convert Density

Add settings variable for the convert density setting.
If no variable is set, default to 300.
---
 src/documents/consumer.py | 3 ++-
 src/paperless/settings.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index bac93647e..df1a73cb3 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -53,6 +53,7 @@ class Consumer(object):
     UNPAPER = settings.UNPAPER_BINARY
     CONSUME = settings.CONSUMPTION_DIR
     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
+    DENSITY = int(settings.CONVERT_DENSITY) if settings.CONVERT_DENSITY else 300
 
     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 
@@ -158,7 +159,7 @@ class Consumer(object):
         pnm = os.path.join(tempdir, "convert-%04d.pnm")
         run_convert(
             self.CONVERT,
-            "-density", "300",
+            "-density", self.DENSITY,
             "-depth", "8",
             "-type", "grayscale",
             doc, pnm,
diff --git a/src/paperless/settings.py b/src/paperless/settings.py
index db74d9cea..ad6084711 100644
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -191,6 +191,7 @@ GNUPG_HOME = os.getenv("HOME", "/tmp")
 CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY")
 CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
 CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
+CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
 
 # Unpaper
 UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")

From b6ae129ad181f325e2fb0253d554f2ca91939b8c Mon Sep 17 00:00:00 2001
From: Brian Martin <bmartin5692@gmail.com>
Date: Fri, 13 May 2016 23:23:58 -0400
Subject: [PATCH 2/3] Sample Config and Bug Fix

Update sample config to reflect new setting variable.
Change consumer to handle density setting as str instead of int.
---
 paperless.conf.example    | 11 +++++++++++
 src/documents/consumer.py |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/paperless.conf.example b/paperless.conf.example
index c6f6ae191..26ca8a8fb 100644
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -61,6 +61,17 @@ PAPERLESS_SHARED_SECRET=""
 # the web for "MAGICK_MEMORY_LIMIT".
 #PAPERLESS_CONVERT_MEMORY_LIMIT=0
 
+# By default the conversion density setting for documents is 300DPI, in some
+# cases it has proven useful to configure a lesser value.
+# This setting has a high impact on the physical size of tmp page files,
+# the speed of document conversion, and can affect the accuracy of OCR
+# results. Individual results can vary and this setting should be tested 
+# thoroughly against the documents you are importing to see if it has any 
+# impacts either negative or positive. Testing on limited document sets has
+# shown a setting of 200 can cut the size of tmp files by 1/3, and speed up
+# conversion by up to 4x with little impact to OCR accuracy.
+#PAPERLESS_CONVERT_DENSITY=200
+
 # Similar to the memory limit, if you've got a small system and your OS mounts
 # /tmp as tmpfs, you should set this to a path that's on a physical disk, like
 # /home/your_user/tmp or something.  ImageMagick will use this as scratch space
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index df1a73cb3..3d66d581d 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -53,7 +53,7 @@ class Consumer(object):
     UNPAPER = settings.UNPAPER_BINARY
     CONSUME = settings.CONSUMPTION_DIR
     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
-    DENSITY = int(settings.CONVERT_DENSITY) if settings.CONVERT_DENSITY else 300
+    DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
 
     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 
@@ -159,7 +159,7 @@ class Consumer(object):
         pnm = os.path.join(tempdir, "convert-%04d.pnm")
         run_convert(
             self.CONVERT,
-            "-density", self.DENSITY,
+            "-density", str(self.DENSITY),
             "-depth", "8",
             "-type", "grayscale",
             doc, pnm,

From 9c062206e42e39638c961be3e16dc2f40fb07b74 Mon Sep 17 00:00:00 2001
From: Brian Martin <bmartin5692@gmail.com>
Date: Sun, 15 May 2016 21:56:41 -0400
Subject: [PATCH 3/3] Sample Config Update

Update Sample Config to show 300 density as default.
---
 paperless.conf.example | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paperless.conf.example b/paperless.conf.example
index 26ca8a8fb..85709698f 100644
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -70,7 +70,7 @@ PAPERLESS_SHARED_SECRET=""
 # impacts either negative or positive. Testing on limited document sets has
 # shown a setting of 200 can cut the size of tmp files by 1/3, and speed up
 # conversion by up to 4x with little impact to OCR accuracy.
-#PAPERLESS_CONVERT_DENSITY=200
+#PAPERLESS_CONVERT_DENSITY=300
 
 # Similar to the memory limit, if you've got a small system and your OS mounts
 # /tmp as tmpfs, you should set this to a path that's on a physical disk, like