From 202b88632cef943c417033617d7222971bce38d6 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 27 Nov 2020 12:02:36 +0100 Subject: [PATCH] updated docs --- docs/configuration.rst | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 75d0a0b4c..d4e7752ec 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -193,17 +193,6 @@ PAPERLESS_TIME_ZONE= Defaults to UTC. - -PAPERLESS_OCR_PAGES= - Tells paperless to use only the specified amount of pages for OCR. Documents - with less than the specified amount of pages get OCR'ed completely. - - Specifying 1 here will only use the first page. - - Defaults to 0, which disables this feature and always uses all pages. - - - PAPERLESS_OCR_LANGUAGE= Customize the default language that tesseract will attempt to use when parsing documents. The default language is used whenever @@ -250,6 +239,20 @@ PAPERLESS_OCR_OUTPUT_TYPE= If not specified, ``pdfa`` is used. Remember that paperless also keeps the original input file as well as the archived version. + +PAPERLESS_OCR_PAGES= + Tells paperless to use only the specified amount of pages for OCR. Documents + with less than the specified amount of pages get OCR'ed completely. + + Specifying 1 here will only use the first page. + + When combined with ``PAPERLESS_OCR_MODE=redo`` or ``PAPERLESS_OCR_MODE=force``, + paperless will not modify any text it finds on excluded pages and copy it + verbatim. + + Defaults to 0, which disables this feature and always uses all pages. + + PAPERLESS_OCR_IMAGE_DPI= Paperless will OCR any images you put into the system and convert them into PDF documents. This is useful if your scanner produces images. @@ -300,18 +303,6 @@ PAPERLESS_CONVERT_TMPDIR= Default is none, which disables the temporary directory. -PAPERLESS_CONVERT_DENSITY= - This setting has a high impact on the physical size of tmp page files, - the speed of document conversion, and can affect the accuracy of OCR - results. Individual results can vary and this setting should be tested - thoroughly against the documents you are importing to see if it has any - impacts either negative or positive. - Testing on limited document sets has shown a setting of 200 can cut the - size of tmp files by 1/3, and speed up conversion by up to 4x - with little impact to OCR accuracy. - - Default is 300. - PAPERLESS_OPTIMIZE_THUMBNAILS= Use optipng to optimize thumbnails. This usually reduces the size of thumbnails by about 20%, but uses considerable compute time during @@ -358,8 +349,5 @@ PAPERLESS_CONVERT_BINARY= PAPERLESS_GS_BINARY= Defaults to "/usr/bin/gs". -PAPERLESS_UNPAPER_BINARY= - Defaults to "/usr/bin/unpaper". - PAPERLESS_OPTIPNG_BINARY= Defaults to "/usr/bin/optipng".