From 840626e5719300d833e3e76205c7c4506972da7e Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Mon, 21 Mar 2016 13:54:32 +0000 Subject: [PATCH 1/4] Made the created field editable --- src/documents/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/models.py b/src/documents/models.py index 0d79dba0a..20bc5004c 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -152,7 +152,7 @@ class Document(models.Model): ) tags = models.ManyToManyField( Tag, related_name="documents", blank=True) - created = models.DateTimeField(default=timezone.now, editable=False) + created = models.DateTimeField(default=timezone.now) modified = models.DateTimeField(auto_now=True, editable=False) class Meta(object): From 8115cf8905af3e4bc69a8d0b9f6a48b924dcee83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20G=C3=B3mez?= Date: Mon, 21 Mar 2016 21:57:36 +0100 Subject: [PATCH 2/4] Improves the docs: OCRing files in languages other than English + fixes typos --- README.rst | 2 +- docs/consumption.rst | 2 +- docs/index.rst | 1 + docs/requirements.rst | 2 +- docs/troubleshooting.rst | 18 ++++++++++++++++++ 5 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 docs/troubleshooting.rst diff --git a/README.rst b/README.rst index 71b59750b..6eec07d72 100644 --- a/README.rst +++ b/README.rst @@ -59,7 +59,7 @@ powerful tools. * `ImageMagick`_ converts the images between colour and greyscale. * `Tesseract`_ does the character recognition. -* `Unpaper`_ despeckles and and deskews the scanned image. +* `Unpaper`_ despeckles and deskews the scanned image. * `GNU Privacy Guard`_ is used as the encryption backend. * `Python 3`_ is the language of the project. diff --git a/docs/consumption.rst b/docs/consumption.rst index eadf12823..6e5bd8574 100644 --- a/docs/consumption.rst +++ b/docs/consumption.rst @@ -128,7 +128,7 @@ following name/value pairs: don't start uploading stuff to your server. The means of generating this signature is defined below. -Specify ``enctype="multipart/form-data"``, and then POST your file with::: +Specify ``enctype="multipart/form-data"``, and then POST your file with:: Content-Disposition: form-data; name="document"; filename="whatever.pdf" diff --git a/docs/index.rst b/docs/index.rst index 47710d376..43f77b15a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,4 +33,5 @@ Contents api utilities migrating + troubleshooting changelog diff --git a/docs/requirements.rst b/docs/requirements.rst index 5d4fddaf0..d31096257 100644 --- a/docs/requirements.rst +++ b/docs/requirements.rst @@ -8,7 +8,7 @@ should work) that has the following software installed on it: * `Python3`_ (with development libraries, pip and virtualenv) * `GNU Privacy Guard`_ -* `Tesseract`_ +* `Tesseract`_, plus it's language files matching your document base. * `Imagemagick`_ * `unpaper`_ diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst new file mode 100644 index 000000000..bf2890304 --- /dev/null +++ b/docs/troubleshooting.rst @@ -0,0 +1,18 @@ +.. _troubleshooting: + +Troubleshooting +=============== + +.. _troubleshooting_ocr_language_files_missing: + +Consumer warns ``OCR for XX failed`` +------------------------------------ + +If you find the OCR accuracy to be too low, and/or the document consumer warns that ``OCR for +XX failed, but we're going to stick with what we've got since FORGIVING_OCR is enabled``, then you +might need to install the `Tesseract language files +`_ marching your documents languages. + +As an example, if your documents are written in Spanish you may need to run:: + + apt-get install -y tesseract-ocr-spa From a366542ed995415696b031a15b1c4eed9c5434c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20G=C3=B3mez?= Date: Wed, 23 Mar 2016 02:41:29 +0100 Subject: [PATCH 3/4] Docs: fixes typos + clarifies troubleshotting section --- docs/requirements.rst | 2 +- docs/troubleshooting.rst | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/requirements.rst b/docs/requirements.rst index d31096257..a1567361a 100644 --- a/docs/requirements.rst +++ b/docs/requirements.rst @@ -8,7 +8,7 @@ should work) that has the following software installed on it: * `Python3`_ (with development libraries, pip and virtualenv) * `GNU Privacy Guard`_ -* `Tesseract`_, plus it's language files matching your document base. +* `Tesseract`_, plus its language files matching your document base. * `Imagemagick`_ * `unpaper`_ diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index bf2890304..aab2d7759 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -13,6 +13,7 @@ XX failed, but we're going to stick with what we've got since FORGIVING_OCR is e might need to install the `Tesseract language files `_ marching your documents languages. -As an example, if your documents are written in Spanish you may need to run:: +As an example, if you are runing Paperless from the Vagrant setup provided (or from any Ubuntu or Debian +box), and your documents are written in Spanish you may need to run:: apt-get install -y tesseract-ocr-spa From 37191f038366e571806d59503e79c774f9bf9bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20G=C3=B3mez=20Obreg=C3=B3n?= Date: Wed, 23 Mar 2016 02:48:04 +0100 Subject: [PATCH 4/4] Fixes typo --- docs/troubleshooting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index aab2d7759..0fa7c1a29 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -13,7 +13,7 @@ XX failed, but we're going to stick with what we've got since FORGIVING_OCR is e might need to install the `Tesseract language files `_ marching your documents languages. -As an example, if you are runing Paperless from the Vagrant setup provided (or from any Ubuntu or Debian +As an example, if you are running Paperless from the Vagrant setup provided (or from any Ubuntu or Debian box), and your documents are written in Spanish you may need to run:: apt-get install -y tesseract-ocr-spa