diff --git a/paperless.conf.example b/paperless.conf.example index d254b7320..9ef9a1b42 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -32,7 +32,32 @@ PAPERLESS_PASSPHRASE="secret" # have a shared secret here. PAPERLESS_SHARED_SECRET="" +# +# The following values use sensible defaults for modern systems, but if you're +# running Paperless on a low-resource machine (like a Rasberry Pi), modifying +# some of these values may be necessary. +# + # By default, Paperless will attempt to use all available CPU cores to process # a document, but if you would like to limit that, you can set this value to # an integer: #PAPERLESS_OCR_THREADS=1 + +# On smaller systems, or even in the case of Very Large Documents, the consumer +# may explode, complaining about how it's "unable to extent pixel cache". In +# such cases, try setting this to a reasonably low value, like 32000000. The +# default is to use whatever is necessary to do everything without writing to +# disk, and units are in megabytes. +# +# For more information on how to use this value, you should probably search +# the web for "MAGICK_MEMORY_LIMIT". +#PAPERLESS_CONVERT_MEMORY_LIMIT=0 + +# Similar to the memory limit, if you've got a small system and your OS mounts +# /tmp as tmpfs, you should set this to a path that's on a physical disk, like +# /home/your_user/tmp or something. ImageMagick will use this as scratch space +# when crunching through very large documents. +# +# For more information on how to use this value, you should probably search +# the web for "MAGICK_TMPDIR". +#PAPERLESS_CONVERT_TMPDIR=/var/tmp/paperless diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 45239696b..f564e96e3 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -129,10 +129,13 @@ class Consumer(object): # Convert PDF to multiple PNMs pnm = os.path.join(tempdir, "convert-%04d.pnm") - subprocess.Popen(( - self.CONVERT, "-density", "300", "-depth", "8", - "-type", "grayscale", doc, pnm - )).wait() + run_convert( + self.CONVERT, + "-density", "300", + "-depth", "8", + "-type", "grayscale", + doc, pnm, + ) # Get a list of converted images pnms = [] @@ -159,13 +162,14 @@ class Consumer(object): self.log("info", "Generating the thumbnail") - subprocess.Popen(( + run_convert( self.CONVERT, "-scale", "500x5000", "-alpha", "remove", + "-limit", "memory", "20MiB", doc, os.path.join(tempdir, "convert-%04d.png") - )).wait() + ) return os.path.join(tempdir, "convert-0000.png") @@ -334,6 +338,16 @@ def image_to_string(args): def run_unpaper(args): unpaper, pnm = args - subprocess.Popen(( - unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm") - )).wait() + subprocess.Popen( + (unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm"))).wait() + + +def run_convert(*args): + + environment = {} + if settings.CONVERT_MEMORY_LIMIT: + environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT + if settings.CONVERT_TMPDIR: + environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR + + subprocess.Popen(args, env=environment).wait() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bb1ba363b..7d9d03cd0 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -189,6 +189,8 @@ GNUPG_HOME = os.getenv("HOME", "/tmp") # Convert is part of the ImageMagick package CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY") +CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR") +CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT") # Unpaper UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper") @@ -226,7 +228,7 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE") SHARED_SECRET = os.getenv("PAPERLESS_SHARED_SECRET", "") # -# TODO: Remove after 1.2 +# TODO: Remove after 0.2 # # This logic is here to address issue #44, wherein we were using inconsistent # constant names vs. environment variables. If you're using Paperless for the