# Sample paperless.conf # Copy this file to /etc/paperless.conf and modify it to suit your needs. # As this file contains passwords it should only be readable by the user # running paperless. ############################################################################### #### Message Broker #### ############################################################################### # This is required for processing scheduled tasks such as email fetching, index # optimization and for training the automatic document matcher. # Defaults to localhost:6379. #PAPERLESS_REDIS=redis://localhost:6379 ############################################################################### #### Database Settings #### ############################################################################### # By default, sqlite is used as the database backend. This can be changed here. # The docker-compose service definition uses a postgresql server. The # configuration for this is already done inside the docker-compose.env file. #Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. #PAPERLESS_DBHOST=localhost #Adjust port if necessary #PAPERLESS_DBPORT= #name, user and pass all default to "paperless" #PAPERLESS_DBNAME=paperless #PAPERLESS_DBUSER=paperless #PAPERLESS_DBPASS=paperless ############################################################################### #### Paths & Folders #### ############################################################################### # This where your documents should go to be consumed. Make sure that it exists # and that the user running the paperless service can read/write its contents # before you start Paperless. PAPERLESS_CONSUMPTION_DIR=../consume # This is where paperless stores all its data (search index, sqlite database, # classification model, etc). #PAPERLESS_DATA_DIR=../data # This is where your documents and thumbnails are stored. #PAPERLESS_MEDIA_ROOT=../media # Override the default STATIC_ROOT here. This is where all static files # created using "collectstatic" manager command are stored. #PAPERLESS_STATICDIR=../static # Override the STATIC_URL here. Unless you're hosting Paperless off a # subdomain like /paperless/, you probably don't need to change this. #PAPERLESS_STATIC_URL=/static/ # Specify a filename format for the document (directories are supported) # Use the following placeholders: # * {correspondent} # * {title} # * {created} # * {added} # * {tags[KEY]} If your tags conform to key_value or key-value # * {tags[INDEX]} If your tags are strings, select the tag by index # Uniqueness of filenames is ensured, as an incrementing counter is attached # to each filename. #PAPERLESS_FILENAME_FORMAT= ############################################################################### #### Security #### ############################################################################### # Controls whether django's debug mode is enabled. Disable this on production # systems. Debug mode is disabled by default. #PAPERLESS_DEBUG=false # GnuPG encryption is deprecated and will be removed in future versions. # # Dont use it. It does not provide any security at all. # # Paperless can be instructed to attempt to encrypt your PDF files with GPG # using the PAPERLESS_PASSPHRASE specified below. If however you're not # concerned about encrypting these files (for example if you have disk # encryption locally) then you don't need this and can safely leave this value # un-set. # # One final note about the passphrase. Once you've consumed a document with # one passphrase, DON'T CHANGE IT. Paperless assumes this to be a constant and # can't properly export documents that were encrypted with an old passphrase if # you've since changed it to a new one. # # The default is to not use encryption at all. #PAPERLESS_PASSPHRASE=secret # The secret key has a default that should be fine so long as you're hosting # Paperless on a closed network. However, if you're putting this anywhere # public, you should change the key to something unique and verbose. #PAPERLESS_SECRET_KEY=change-me # If you're planning on putting Paperless on the open internet, then you # really should set this value to the domain name you're using. Failing to do # so leaves you open to HTTP host header attacks: # https://docs.djangoproject.com/en/1.10/topics/security/#host-headers-virtual-hosting # # Just remember that this is a comma-separated list, so "example.com" is fine, # as is "example.com,www.example.com", but NOT " example.com" or "example.com," #PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com # If you decide to use the Paperless API in an ajax call, you need to add your # servers to the list of allowed hosts that can do CORS calls. By default # Paperless allows calls from localhost:8080, but you'd like to change that, # you can set this value to a comma-separated list. #PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000 # To host paperless under a subpath url like example.com/paperless you set # this value to /paperless. No trailing slash! # # https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name #PAPERLESS_FORCE_SCRIPT_NAME= ############################################################################### #### Software Tweaks #### ############################################################################### # Paperless does multiple things in the background: Maintain the search index, # maintain the automatic matching algorithm, check emails, consume documents, # etc. This variable specifies how many things it will do in parallel. #PAPERLESS_TASK_WORKERS=1 # Furthermore, paperless uses multiple threads when consuming documents to # speed up OCR. This variable specifies how many pages paperless will process # in parallel on a single document. #PAPERLESS_THREADS_PER_WORKER=1 # Ensure that the product # PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER # does not exceed your CPU core count or else paperless will be extremely slow. # If you want paperless to process many documents in parallel, choose a high # worker count. If you want paperless to process very large documents faster, # use a higher thread per worker count. # The default is a balance between the two, according to your CPU core count, # with a slight favor towards threads per worker, and using as much cores as # possible. # If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust # PAPERLESS_THREADS_PER_WORKER automatically. # If paperless won't find documents added to your consume folder, it might # not be able to automatically detect filesystem changes. In that case, # specify a polling interval in seconds below, which will then cause paperless # to periodically check your consumption directory for changes. #PAPERLESS_CONSUMER_POLLING=10 # When the consumer detects a duplicate document, it will not touch the # original document. This default behavior can be changed here. #PAPERLESS_CONSUMER_DELETE_DUPLICATES=false # Use optipng to optimize thumbnails. This usually reduces the sice of # thumbnails by about 20%, but uses considerable compute time during # consumption. #PAPERLESS_OPTIMIZE_THUMBNAILS=true # After a document is consumed, Paperless can trigger an arbitrary script if # you like. This script will be passed a number of arguments for you to work # with. The default is blank, which means nothing will be executed. For more # information, take a look at the docs: # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh # By default, paperless will check the document text for document date information. # Uncomment the line below to enable checking the document filename for date # information. The date order can be set to any option as specified in # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be # checked first, and if nothing is found, the document text will be checked # as normal. #PAPERLESS_FILENAME_DATE_ORDER=YMD # Sometimes devices won't create filenames which can be parsed properly # by the filename parser (see # https://paperless.readthedocs.io/en/latest/guesswork.html). # # This setting allows to specify a list of transformations # in regular expression syntax, which are passed in order to re.sub. # Transformation stops after the first match, so at most one transformation # is applied. # # Syntax is a JSON array of dictionaries containing "pattern" and "repl" # as keys. # # The example below transforms filenames created by a Brother ADS-2400N # document scanner in its standard configuration `Name_Date_Count', so that # count is used as title, name as tag and date can be parsed by paperless. #PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}] # # The following values use sensible defaults for modern systems, but if you're # running Paperless on a low-resource device (like a Raspberry Pi), modifying # some of these values may be necessary. # # Customize the default language that tesseract will attempt to use when # parsing documents. The default language is used whenever # - No language could be detected on a document # - No tesseract data files are available for the detected language # It should be a 3-letter language code consistent with ISO # 639: https://www.loc.gov/standards/iso639-2/php/code_list.php #PAPERLESS_OCR_LANGUAGE=eng # On smaller systems, or even in the case of Very Large Documents, the consumer # may explode, complaining about how it's "unable to extend pixel cache". In # such cases, try setting this to a reasonably low value, like 32000000. The # default is to use whatever is necessary to do everything without writing to # disk, and units are in megabytes. # # For more information on how to use this value, you should probably search # the web for "MAGICK_MEMORY_LIMIT". #PAPERLESS_CONVERT_MEMORY_LIMIT=0 # Similar to the memory limit, if you've got a small system and your OS mounts # /tmp as tmpfs, you should set this to a path that's on a physical disk, like # /home/your_user/tmp or something. ImageMagick will use this as scratch space # when crunching through very large documents. # # For more information on how to use this value, you should probably search # the web for "MAGICK_TMPDIR". #PAPERLESS_CONVERT_TMPDIR=/var/tmp/paperless # By default the conversion density setting for documents is 300DPI, in some # cases it has proven useful to configure a lesser value. # This setting has a high impact on the physical size of tmp page files, # the speed of document conversion, and can affect the accuracy of OCR # results. Individual results can vary and this setting should be tested # thoroughly against the documents you are importing to see if it has any # impacts either negative or positive. # Testing on limited document sets has shown a setting of 200 can cut the # size of tmp files by 1/3, and speed up conversion by up to 4x # with little impact to OCR accuracy. #PAPERLESS_CONVERT_DENSITY=300 # By default Paperless does not OCR a document if the text can be retrieved from # the document directly. Set to true to always OCR documents. #PAPERLESS_OCR_ALWAYS=false ############################################################################### #### Interface #### ############################################################################### # Override the default UTC time zone here. # See https://docs.djangoproject.com/en/1.10/ref/settings/#std:setting-TIME_ZONE # for details on how to set it. #PAPERLESS_TIME_ZONE=UTC ############################################################################### #### Third-Party Binaries #### ############################################################################### # There are a few external software packages that Paperless expects to find on # your system when it starts up. Unless you've done something creative with # their installation, you probably won't need to edit any of these. However, # if you've installed these programs somewhere where simply typing the name of # the program doesn't automatically execute it (ie. the program isn't in your # $PATH), then you'll need to specify the literal path for that program here. # Convert (part of the ImageMagick suite) #PAPERLESS_CONVERT_BINARY=/usr/bin/convert # Ghostscript #PAPERLESS_GS_BINARY=/usr/bin/gs # Unpaper #PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper # Optipng (for optimising thumbnail sizes) #PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng