From 01706dd3911496a5ac0227b793873c0eca7c942e Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:54:42 -0800 Subject: [PATCH] Just messing around --- .pre-commit-config.yaml | 2 +- Pipfile | 1 + Pipfile.lock | 18 +- src/paperless/settings.py | 4 + src/paperless_einvoice/__init__.py | 0 src/paperless_einvoice/apps.py | 15 ++ src/paperless_einvoice/parsers.py | 86 +++++++++ src/paperless_einvoice/signals.py | 15 ++ src/paperless_einvoice/templates/invoice.css | 138 ++++++++++++++ .../templates/invoice.j2.html | 80 ++++++++ src/paperless_einvoice/tests/__init__.py | 0 src/paperless_einvoice/tests/conftest.py | 25 +++ .../samples/zugferd_2p1_BASIC-WL_Einfach.xml | 176 ++++++++++++++++++ .../tests/test_einvoice_parser.py | 19 ++ 14 files changed, 577 insertions(+), 2 deletions(-) create mode 100644 src/paperless_einvoice/__init__.py create mode 100644 src/paperless_einvoice/apps.py create mode 100644 src/paperless_einvoice/parsers.py create mode 100644 src/paperless_einvoice/signals.py create mode 100644 src/paperless_einvoice/templates/invoice.css create mode 100644 src/paperless_einvoice/templates/invoice.j2.html create mode 100644 src/paperless_einvoice/tests/__init__.py create mode 100644 src/paperless_einvoice/tests/conftest.py create mode 100644 src/paperless_einvoice/tests/samples/zugferd_2p1_BASIC-WL_Einfach.xml create mode 100644 src/paperless_einvoice/tests/test_einvoice_parser.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45fd09fea..935ed6f36 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: rev: v2.3.0 hooks: - id: codespell - exclude: "(^src-ui/src/locale/)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)" + exclude: "(^src-ui/src/locale/)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)|(^src/paperless_einvoice/tests/samples/)|(^src/paperless_einvoice/templates/)" exclude_types: - pofile - json diff --git a/Pipfile b/Pipfile index c32f512fe..23c976303 100644 --- a/Pipfile +++ b/Pipfile @@ -20,6 +20,7 @@ django-multiselectfield = "*" django-soft-delete = "*" djangorestframework = "~=3.15.2" djangorestframework-guardian = "*" +drafthorse = "*" drf-writable-nested = "*" bleach = "*" celery = {extras = ["redis"], version = "*"} diff --git a/Pipfile.lock b/Pipfile.lock index 81be484ce..1b5d15bc7 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "148cd379b8ceeb857ea817bea1432821d8ea20ffe8d0bfc04c89871a87b87b0f" + "sha256": "bf51fa211450fbf310e070739f225429fd7947eb72ba2a48b6bc92a1cd4cb51e" }, "pipfile-spec": 6, "requires": {}, @@ -629,6 +629,14 @@ "index": "pypi", "version": "==0.3.0" }, + "drafthorse": { + "hashes": [ + "sha256:4d16a6dd60708676465e63ac7ff1b5f140e8c1c58be7d0eda66bc309814fc5c5", + "sha256:e67d9f21bbada2282e5f63257c01cc56f8cb2667000f67e68bbddf6956484375" + ], + "index": "pypi", + "version": "==2.4.0" + }, "drf-writable-nested": { "hashes": [ "sha256:d8ddc606dc349e56373810842965712a5789e6a5ca7704729d15429b95f8f2ee" @@ -1668,6 +1676,14 @@ "markers": "python_version >= '3.8'", "version": "==2.9.0" }, + "pypdf": { + "hashes": [ + "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", + "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740" + ], + "markers": "python_version >= '3.8'", + "version": "==5.1.0" + }, "python-dateutil": { "hashes": [ "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", diff --git a/src/paperless/settings.py b/src/paperless/settings.py index a32c78ef5..6bb8d9bc2 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1089,6 +1089,10 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv( if TIKA_ENABLED: INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig") +EINVOICE_PARSER_ENABLED = __get_boolean("PAPERLESS_EINVOICE_PARSER_ENABLED", "NO") +if EINVOICE_PARSER_ENABLED and TIKA_ENABLED: + INSTALLED_APPS.append("paperless_einvoice.apps.PaperlessEInvoiceConfig") + AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true") if AUDIT_LOG_ENABLED: INSTALLED_APPS.append("auditlog") diff --git a/src/paperless_einvoice/__init__.py b/src/paperless_einvoice/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/paperless_einvoice/apps.py b/src/paperless_einvoice/apps.py new file mode 100644 index 000000000..0ff574d50 --- /dev/null +++ b/src/paperless_einvoice/apps.py @@ -0,0 +1,15 @@ +from django.apps import AppConfig +from django.conf import settings + +from paperless_einvoice.signals import einvoice_consumer_declaration + + +class PaperlessEInvoiceConfig(AppConfig): + name = "paperless_einvoice" + + def ready(self): + from documents.signals import document_consumer_declaration + + if settings.TIKA_ENABLED and settings.EINVOICE_PARSER_ENABLED: + document_consumer_declaration.connect(einvoice_consumer_declaration) + AppConfig.ready(self) diff --git a/src/paperless_einvoice/parsers.py b/src/paperless_einvoice/parsers.py new file mode 100644 index 000000000..b3c6fe57c --- /dev/null +++ b/src/paperless_einvoice/parsers.py @@ -0,0 +1,86 @@ +from pathlib import Path + +from django.conf import settings +from drafthorse.models.document import Document +from gotenberg_client import GotenbergClient +from gotenberg_client.options import MarginType +from gotenberg_client.options import MarginUnitType +from gotenberg_client.options import PageMarginsType +from gotenberg_client.options import PageSize +from gotenberg_client.options import PdfAFormat +from jinja2 import Template + +from documents.parsers import ParseError +from paperless.models import OutputTypeChoices +from paperless_tika.parsers import TikaDocumentParser + + +class EInvoiceDocumentParser(TikaDocumentParser): + """ + This parser parses e-invoices using Tika and Gotenberg + """ + + logging_name = "paperless.parsing.einvoice" + + def convert_to_pdf(self, document_path: Path, file_name): + pdf_path = Path(self.tempdir) / "convert.pdf" + self.log.info(f"Converting {document_path} to PDF as {pdf_path}") + + with document_path.open("r") as f: + xml = f.read().encode("utf-8") + invoice = Document.parse(xml) + context = { + "id": invoice.trade.agreement.seller.name, + } + template = Template("templates/invoice.j2.html") + html_file = Path(self.tempdir) / "invoice_as_html.html" + html_file.write_text( + template.render(context), + ) + + with ( + GotenbergClient( + host=settings.TIKA_GOTENBERG_ENDPOINT, + timeout=settings.CELERY_TASK_TIME_LIMIT, + ) as client, + client.chromium.html_to_pdf() as route, + ): + # Set the output format of the resulting PDF + if settings.OCR_OUTPUT_TYPE in { + OutputTypeChoices.PDF_A, + OutputTypeChoices.PDF_A2, + }: + route.pdf_format(PdfAFormat.A2b) + elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1: + self.log.warning( + "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead", + ) + route.pdf_format(PdfAFormat.A2b) + elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3: + route.pdf_format(PdfAFormat.A3b) + + try: + response = ( + route.index(html_file) + .resource(Path(__file__).parent / "templates" / "invoice.css") + .margins( + PageMarginsType( + top=MarginType(0.1, MarginUnitType.Inches), + bottom=MarginType(0.1, MarginUnitType.Inches), + left=MarginType(0.1, MarginUnitType.Inches), + right=MarginType(0.1, MarginUnitType.Inches), + ), + ) + .size(PageSize(height=11.7, width=8.27)) + .scale(1.0) + .run() + ) + + pdf_path.write_bytes(response.content) + + return pdf_path + + except Exception as err: + raise ParseError( + f"Error while converting document to PDF: {err}", + ) from err diff --git a/src/paperless_einvoice/signals.py b/src/paperless_einvoice/signals.py new file mode 100644 index 000000000..6539056fe --- /dev/null +++ b/src/paperless_einvoice/signals.py @@ -0,0 +1,15 @@ +def get_parser(*args, **kwargs): + from paperless_einvoice.parsers import EInvoiceDocumentParser + + return EInvoiceDocumentParser(*args, **kwargs) + + +def einvoice_consumer_declaration(sender, **kwargs): + return { + "parser": get_parser, + "weight": 10, + "mime_types": { + "text/xml": ".xml", + "application/xml": ".xml", + }, + } diff --git a/src/paperless_einvoice/templates/invoice.css b/src/paperless_einvoice/templates/invoice.css new file mode 100644 index 000000000..baac7c83f --- /dev/null +++ b/src/paperless_einvoice/templates/invoice.css @@ -0,0 +1,138 @@ +@charset "UTF-8"; + +@page { + margin: 3cm; + + @bottom-left { + color: #1ee494; + font-family: Pacifico; + } + + @bottom-center { + content: string(title); + color: #a9a; + font-family: Pacifico; + font-size: 9pt; + } +} + +footer { + width: 0; + height: 0; + visibility: hidden; + string-set: title content(); +} + +html { + color: #14213d; + font-family: Source Sans Pro; + font-size: 11pt; + line-height: 1.6; +} + +html body { + margin: 0; +} + +html h1 { + color: #1ee494; + font-family: Pacifico; + font-size: 40pt; + margin: 0; +} + +html aside { + display: flex; + margin: 2em 0 4em; +} + +html aside address { + font-style: normal; + white-space: pre-line; +} + +html aside address#from { + color: #a9a; + flex: 1; +} + +html aside address#to { + text-align: right; +} + +html dl { + text-align: right; + position: absolute; + right: 0; + top: 0; +} + +html dl dt, +html dl dd { + display: inline; + margin: 0; +} + +html dl dt { + color: #a9a; +} + +html dl dt::before { + content: ''; + display: block; +} + +html dl dt::after { + content: ':'; +} + +html table { + border-collapse: collapse; + width: 100%; +} + +html table th { + border-bottom: .2mm solid #a9a; + color: #a9a; + font-size: 10pt; + font-weight: 400; + padding-bottom: .25cm; + text-transform: uppercase; +} + +html table td { + padding-top: 7mm; +} + +html table td:last-of-type { + color: #1ee494; + font-weight: bold; + text-align: right; +} + +html table th, +html table td { + text-align: center; +} + +html table th:first-of-type, +html table td:first-of-type { + text-align: left; +} + +html table th:last-of-type, +html table td:last-of-type { + text-align: right; +} + +html table#total { + background: #f6f6f6; + border-color: #f6f6f6; + border-style: solid; + border-width: 2cm 3cm; + bottom: 0; + font-size: 20pt; + margin: 0 -3cm; + position: absolute; + width: 18cm; +} diff --git a/src/paperless_einvoice/templates/invoice.j2.html b/src/paperless_einvoice/templates/invoice.j2.html new file mode 100644 index 000000000..23589b203 --- /dev/null +++ b/src/paperless_einvoice/templates/invoice.j2.html @@ -0,0 +1,80 @@ + + + + + Rechnung {{ id }} + + + + +

Rechnung

+ +

+ westnetz w.V., Karl-Heine-Str. 93, 04229 Leipzig +

+

+ {{ address | join("
") }} +

+ + + + + + + + + + + + + +
RechnungsnummerLeistungszeitraumRechnungsdatum
{{ id }}{{ period }}{{ date }}
+ + +

Rechnungspositionen

+ + + + + + + + + {% for item in items %} + + + + + + + + {% endfor %} +
Pos.BeschreibungEinzelpreisAnzahlGesamtpreis
{{ item.item }}{{ item.description }}{{ item.price }}{{ item.quantity }}{{ item.subtotal }}
+ + + + + + + + + + + + +
NettobetragMehrwertsteuer (19%)Rechnungsbetrag
{{ total_net }}{{ total_vat }}{{ total_gross }}
+ + +

Hinweise

+

+ Alle Preise verstehen sich in Euro und sind innerhalb von 14 Tagen auf das nebenstehend angegebene Konto zu überweisen. Um eine möglichst fehlerfreie Verbuchung ihrer Einzahlungen vornehmen zu können, bitten wir um die Angabe der Rechnungsnummer {{ id }} im Verwendungszeck. Bei Daueraufträgen benötigen wir zumindest ihre Kundennummer {{ cid }}. +

+ +

+ Diese Rechnung wurde maschinell erstellt und ist auch ohne Unterschrift gültig. +

+ + + + + diff --git a/src/paperless_einvoice/tests/__init__.py b/src/paperless_einvoice/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/paperless_einvoice/tests/conftest.py b/src/paperless_einvoice/tests/conftest.py new file mode 100644 index 000000000..0ecacb7ff --- /dev/null +++ b/src/paperless_einvoice/tests/conftest.py @@ -0,0 +1,25 @@ +from collections.abc import Generator +from pathlib import Path + +import pytest + +from paperless_einvoice.parsers import EInvoiceDocumentParser + + +@pytest.fixture() +def einvoice_parser() -> Generator[EInvoiceDocumentParser, None, None]: + try: + parser = EInvoiceDocumentParser(logging_group=None) + yield parser + finally: + parser.cleanup() + + +@pytest.fixture(scope="session") +def sample_dir() -> Path: + return (Path(__file__).parent / Path("samples")).resolve() + + +@pytest.fixture(scope="session") +def sample_xml_file(sample_dir: Path) -> Path: + return sample_dir / "zugferd_2p1_BASIC-WL_Einfach.xml" diff --git a/src/paperless_einvoice/tests/samples/zugferd_2p1_BASIC-WL_Einfach.xml b/src/paperless_einvoice/tests/samples/zugferd_2p1_BASIC-WL_Einfach.xml new file mode 100644 index 000000000..63b36f142 --- /dev/null +++ b/src/paperless_einvoice/tests/samples/zugferd_2p1_BASIC-WL_Einfach.xml @@ -0,0 +1,176 @@ + + + + + + + + + + urn:factur-x.eu:1p0:basicwl + + + + TX-471102 + 380 + + 20191030 + + + Rechnung gemäß Taxifahrt vom 29.10.2019 + + + Taxiunternehmen TX GmbH +Lieferantenstraße 20 +10369 Berlin +Deutschland +Geschäftsführer: Hans Mustermann +Handelsregisternummer: H A 123 + + + + Unsere GLN: 4000001123452 +Ihre GLN: 4000001987658 +Ihre Kundennummer: GE2020211 + + + + + + + Taxiunternehmen TX GmbH + + 10369 + Lieferantenstraße 20 + Berlin + DE + + + DE123456789 + + + + Taxi-Gast AG Mitte + + 13351 + Hans Mustermann + Kundenstraße 15 + Berlin + DE + + + + + + + 20191029 + + + + + EUR + + 1.18 + VAT + 16.90 + S + 7 + + + + 20191129 + + + + 16.90 + 0.00 + 0.00 + 16.90 + 1.18 + 18.08 + 18.08 + + + + diff --git a/src/paperless_einvoice/tests/test_einvoice_parser.py b/src/paperless_einvoice/tests/test_einvoice_parser.py new file mode 100644 index 000000000..5298238c6 --- /dev/null +++ b/src/paperless_einvoice/tests/test_einvoice_parser.py @@ -0,0 +1,19 @@ +from pathlib import Path + +import pytest +from pytest_django.fixtures import SettingsWrapper +from pytest_httpx import HTTPXMock + +from paperless_einvoice.parsers import EInvoiceDocumentParser + + +@pytest.mark.django_db() +class TestEInvoiceParser: + def test_parse( + self, + httpx_mock: HTTPXMock, + settings: SettingsWrapper, + einvoice_parser: EInvoiceDocumentParser, + sample_xml_file: Path, + ): + return None