mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-11 10:00:48 -05:00
Fix: ghostscript rendering error doesnt trigger frontend failure message (#4092)
* Raise ParseError from gs rendering error * catch all parser errors as generic exception * Differentiate generic vs parse errors during consumption
This commit is contained in:
parent
407a119b9a
commit
e14f4c94c2
@ -450,11 +450,18 @@ class Consumer(LoggingMixin):
|
|||||||
archive_path = document_parser.get_archive_path()
|
archive_path = document_parser.get_archive_path()
|
||||||
|
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
|
self._fail(
|
||||||
|
str(e),
|
||||||
|
f"Error occurred while consuming document {self.filename}: {e}",
|
||||||
|
exc_info=True,
|
||||||
|
exception=e,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
str(e),
|
str(e),
|
||||||
f"Error while consuming document {self.filename}: {e}",
|
f"Unexpected error while consuming document {self.filename}: {e}",
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
@ -544,8 +551,8 @@ class Consumer(LoggingMixin):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._fail(
|
self._fail(
|
||||||
str(e),
|
str(e),
|
||||||
f"The following error occurred while consuming "
|
f"The following error occurred while storing document "
|
||||||
f"{self.filename}: {e}",
|
f"{self.filename} after consuming: {e}",
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
|
@ -211,6 +211,18 @@ class FaultyParser(DocumentParser):
|
|||||||
raise ParseError("Does not compute.")
|
raise ParseError("Does not compute.")
|
||||||
|
|
||||||
|
|
||||||
|
class FaultyGenericExceptionParser(DocumentParser):
|
||||||
|
def __init__(self, logging_group, scratch_dir):
|
||||||
|
super().__init__(logging_group)
|
||||||
|
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
|
||||||
|
|
||||||
|
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||||
|
return self.fake_thumb
|
||||||
|
|
||||||
|
def parse(self, document_path, mime_type, file_name=None):
|
||||||
|
raise Exception("Generic exception.")
|
||||||
|
|
||||||
|
|
||||||
def fake_magic_from_file(file, mime=False):
|
def fake_magic_from_file(file, mime=False):
|
||||||
if mime:
|
if mime:
|
||||||
if os.path.splitext(file)[1] == ".pdf":
|
if os.path.splitext(file)[1] == ".pdf":
|
||||||
@ -260,6 +272,13 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def make_faulty_parser(self, logging_group, progress_callback=None):
|
def make_faulty_parser(self, logging_group, progress_callback=None):
|
||||||
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
||||||
|
|
||||||
|
def make_faulty_generic_exception_parser(
|
||||||
|
self,
|
||||||
|
logging_group,
|
||||||
|
progress_callback=None,
|
||||||
|
):
|
||||||
|
return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
||||||
@ -496,7 +515,29 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
self.assertRaisesMessage(
|
self.assertRaisesMessage(
|
||||||
ConsumerError,
|
ConsumerError,
|
||||||
"sample.pdf: Error while consuming document sample.pdf: Does not compute.",
|
"sample.pdf: Error occurred while consuming document sample.pdf: Does not compute.",
|
||||||
|
self.consumer.try_consume_file,
|
||||||
|
self.get_test_file(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
|
|
||||||
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
|
def testGenericParserException(self, m):
|
||||||
|
m.return_value = [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
|
"parser": self.make_faulty_generic_exception_parser,
|
||||||
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
|
"weight": 0,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.assertRaisesMessage(
|
||||||
|
ConsumerError,
|
||||||
|
"sample.pdf: Unexpected error while consuming document sample.pdf: Generic exception.",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
self.get_test_file(),
|
self.get_test_file(),
|
||||||
)
|
)
|
||||||
@ -510,7 +551,7 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
self.assertRaisesMessage(
|
self.assertRaisesMessage(
|
||||||
ConsumerError,
|
ConsumerError,
|
||||||
"sample.pdf: The following error occurred while consuming sample.pdf: NO.",
|
"sample.pdf: The following error occurred while storing document sample.pdf after consuming: NO.",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
filename,
|
filename,
|
||||||
)
|
)
|
||||||
|
@ -340,7 +340,10 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
"Ghostscript PDF/A rendering failed, consider setting "
|
"Ghostscript PDF/A rendering failed, consider setting "
|
||||||
"PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'", # noqa: E501
|
"PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'", # noqa: E501
|
||||||
)
|
)
|
||||||
raise e
|
|
||||||
|
raise ParseError(
|
||||||
|
f"SubprocessOutputError: {e!s}. See logs for more information.",
|
||||||
|
) from e
|
||||||
except (NoTextFoundException, InputFileError) as e:
|
except (NoTextFoundException, InputFileError) as e:
|
||||||
self.log.warning(
|
self.log.warning(
|
||||||
f"Encountered an error while running OCR: {e!s}. "
|
f"Encountered an error while running OCR: {e!s}. "
|
||||||
|
@ -8,6 +8,7 @@ from unittest import mock
|
|||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
|
from ocrmypdf import SubprocessOutputError
|
||||||
|
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from documents.parsers import run_convert
|
from documents.parsers import run_convert
|
||||||
@ -827,6 +828,18 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
# Copied from the PDF to here. Don't even look at it
|
# Copied from the PDF to here. Don't even look at it
|
||||||
self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text())
|
self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text())
|
||||||
|
|
||||||
|
@mock.patch("ocrmypdf.ocr")
|
||||||
|
def test_gs_rendering_error(self, m):
|
||||||
|
m.side_effect = SubprocessOutputError("Ghostscript PDF/A rendering failed")
|
||||||
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
|
self.assertRaises(
|
||||||
|
ParseError,
|
||||||
|
parser.parse,
|
||||||
|
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user