diff --git a/Dockerfile b/Dockerfile index 550f86690..6e6bf6977 100644 --- a/Dockerfile +++ b/Dockerfile @@ -118,9 +118,7 @@ ARG RUNTIME_PACKAGES="\ zlib1g \ # Barcode splitter libzbar0 \ - poppler-utils \ - # XRechnung - default-jre" + poppler-utils" # Install basic runtime packages. # These change very infrequently @@ -162,8 +160,6 @@ RUN set -eux \ && echo "Installing supervisor" \ && python3 -m pip install --default-timeout=1000 --upgrade --no-cache-dir supervisor==4.2.5 -RUN curl -o /usr/local/bin/mustang-cli.jar https://github.com/ZUGFeRD/mustangproject/releases/download/core-2.15.1/Mustang-CLI-2.15.1.jar && chmod +x /usr/local/bin/mustang-cli.jar - # Copy gunicorn config # Changes very infrequently WORKDIR /usr/src/paperless/ diff --git a/docker/compose/docker-compose.rechnungless.yml b/docker/compose/docker-compose.rechnungless.yml new file mode 100644 index 000000000..9a87d165c --- /dev/null +++ b/docker/compose/docker-compose.rechnungless.yml @@ -0,0 +1,10 @@ +services: + rechnungless: + image: marcel2508/rechnungless:latest + restart: unless-stopped + environment: + # REQUIRED FOR APPLE M4 CHIP / MACOS 15.2 + JAVA_OPTS: -XX:UseSVE=0 + webserver: + environment: + PAPERLESS_ENABLE_RECHNUNGLESS: 1 diff --git a/docs/configuration.md b/docs/configuration.md index a329bf844..b04908611 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -198,6 +198,18 @@ Docker, this may be the `environment` key of the webserver or a containing the configuration parameters. Be sure to use the correct format and watch out for indentation if editing the YAML file. +#### [`PAPERLESS_RECHNUNGLESS_ENABLED=`](#PAPERLESS_RECHNUNGLESS_ENABLED) {#PAPERLESS_RECHNUNGLESS_ENABLED} + +: Enable (or disable) the Rechnungless xml to pdf converter and validator. + + Defaults to false. + +#### [`PAPERLESS_RECHNUNGLESS_ENDPOINT=`](#PAPERLESS_RECHNUNGLESS_ENDPOINT) {#PAPERLESS_RECHNUNGLESS_ENDPOINT} + +: Set the endpoint URL where Paperless can reach your Rechnungless api server. + + Defaults to "". + ## Paths and folders #### [`PAPERLESS_CONSUMPTION_DIR=`](#PAPERLESS_CONSUMPTION_DIR) {#PAPERLESS_CONSUMPTION_DIR} diff --git a/install-paperless-ngx.sh b/install-paperless-ngx.sh index 3977cece9..f842df6c6 100755 --- a/install-paperless-ngx.sh +++ b/install-paperless-ngx.sh @@ -142,6 +142,14 @@ echo "" ask "Enable Apache Tika?" "no" "yes no" TIKA_ENABLED=$ask_result +echo "" +echo "Paperless is able to use Mustang Library to support XML files in XInvoice schema" +echo "This feature requires more resources due to the required services." +echo "" + +ask "Enable Rechnungless XInvoice service?" "no" "yes no" +RECHNUNGLESS_ENABLED=$ask_result + echo "" echo "Specify the default language that most of your documents are written in." echo "Use ISO 639-2, (T) variant language codes: " @@ -322,6 +330,10 @@ if [[ $TIKA_ENABLED == "yes" ]] ; then DOCKER_COMPOSE_VERSION="$DOCKER_COMPOSE_VERSION-tika" fi +if [[ $RECHNUNGLESS_ENABLED == "yes" ]] ; then + wget "https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/docker/compose/docker-compose.rechnungless.yml" -O docker-compose.rechnungless.yml +fi + wget "https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/docker/compose/docker-compose.$DOCKER_COMPOSE_VERSION.yml" -O docker-compose.yml wget "https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/docker/compose/.env" -O .env @@ -391,6 +403,9 @@ if [ "$l1" -eq "$l2" ] ; then sed -i "/^volumes:/d" docker-compose.yml fi +if [[ $RECHNUNGLESS_ENABLED == "yes" ]] ; then + docker compose -f docker-compose.rechnungless.yml pull +fi docker compose pull @@ -404,4 +419,8 @@ fi docker compose run --rm -e DJANGO_SUPERUSER_PASSWORD="$PASSWORD" webserver createsuperuser --noinput --username "$USERNAME" --email "$EMAIL" -docker compose up --detach +if [[ $RECHNUNGLESS_ENABLED == "yes" ]] ; then + docker compose up -f docker-compose.yml -f docker-compose.rechnungless.yml --detach +else + docker compose up --detach +fi diff --git a/paperless.conf.example b/paperless.conf.example index 63ee7be22..2edc356c7 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -88,6 +88,11 @@ #PAPERLESS_TIKA_ENDPOINT=http://localhost:9998 #PAPERLESS_TIKA_GOTENBERG_ENDPOINT=http://localhost:3000 +# Rechnungless settings + +#PAPERLESS_RECHNUNGLESS_ENABLED=false +#PAPERLESS_RECHNUNGLESS_ENDPOINT=http://rechnungless:8080/rechnungless + # Binaries #PAPERLESS_CONVERT_BINARY=/usr/bin/convert diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 26b46e8fc..5aa51231b 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -318,7 +318,6 @@ INSTALLED_APPS = [ "paperless_tesseract.apps.PaperlessTesseractConfig", "paperless_text.apps.PaperlessTextConfig", "paperless_mail.apps.PaperlessMailConfig", - "paperless_xml.apps.PaperlessXMLConfig", "django.contrib.admin", "rest_framework", "rest_framework.authtoken", @@ -1090,6 +1089,16 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv( if TIKA_ENABLED: INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig") +# XML / RECHNUNGLESS settings +RECHNUNGLESS_ENABLED = __get_boolean("PAPERLESS_RECHNUNGLESS_ENABLED", "NO") +RECHNUNGLESS_ENDPOINT = os.getenv( + "PAPERLESS_RECHNUNGLESS_ENDPOINT", + "http://rechnungless:8080/rechnungless", +) + +if RECHNUNGLESS_ENABLED: + INSTALLED_APPS.append("paperless_xml.apps.PaperlessXMLConfig") + AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true") if AUDIT_LOG_ENABLED: INSTALLED_APPS.append("auditlog") diff --git a/src/paperless_xml/apps.py b/src/paperless_xml/apps.py index fc7e6ea06..5680af6f1 100644 --- a/src/paperless_xml/apps.py +++ b/src/paperless_xml/apps.py @@ -1,4 +1,5 @@ from django.apps import AppConfig +from django.conf import settings from paperless_xml.signals import xml_consumer_declaration @@ -9,6 +10,7 @@ class PaperlessXMLConfig(AppConfig): def ready(self): from documents.signals import document_consumer_declaration - document_consumer_declaration.connect(xml_consumer_declaration) + if settings.RECHNUNGLESS_ENABLED: + document_consumer_declaration.connect(xml_consumer_declaration) AppConfig.ready(self) diff --git a/src/paperless_xml/parsers.py b/src/paperless_xml/parsers.py index d8f5a1f58..15248efe6 100644 --- a/src/paperless_xml/parsers.py +++ b/src/paperless_xml/parsers.py @@ -1,6 +1,13 @@ -import subprocess +import base64 +import json +from datetime import datetime from pathlib import Path +import httpx +from django.conf import settings +from django.utils.timezone import is_naive +from django.utils.timezone import make_aware + from documents.parsers import ParseError from documents.parsers import make_thumbnail_from_pdf from paperless_text.parsers import TextDocumentParser @@ -25,83 +32,58 @@ class XMLDocumentParser(TextDocumentParser): else: return super().get_thumbnail(document_path, mime_type, file_name) - def xml_to_pdf_mustang( - self, - document_path: Path, - mime_type, - file_name=None, - ) -> Path: - outpdf = Path(self.tempdir, "out.pdf") - res = subprocess.run( - [ - "mustang-cli.jar", - "--action", - "pdf", - "--source", - document_path, - "--out", - outpdf, - ], - timeout=20, - ) - if res.returncode != 0: - raise ParseError("Mustang CLI exited with code: " + str(res.returncode)) - else: - return outpdf - - def attach_xml_pdf_mustang(self, pdf_path, xml_path) -> Path: - outpdf = Path(self.tempdir, "combined.pdf") - res = subprocess.run( - [ - "mustang-cli.jar", - "--action", - "combine", - "--source", - pdf_path, - "--source-xml", - xml_path, - "--format", - "zf", - "--version", - "2", - "--profile", - "X", - "--no-additional-attachments", - "--out", - outpdf, - ], - timeout=20, - ) - if res.returncode != 0: - raise ParseError("Mustang CLI exited with code: " + str(res.returncode)) - else: - return outpdf - - def is_xrechnung_mustang( - self, - document_path: Path, - mime_type, - file_name=None, - ) -> bool: - res = subprocess.run( - [ - "mustang-cli.jar", - "--action", - "validate", - "--source", - document_path, - "--no-notices", - ], - timeout=20, - ) - return res.returncode == 0 - def parse(self, document_path, mime_type, file_name=None): super().parse(document_path, mime_type, file_name) - if self.is_xrechnung_mustang(document_path, mime_type, file_name): - self.is_invoice = True - pdfOnly = self.xml_to_pdf_mustang(document_path, mime_type, file_name) - pdfWith = self.attach_xml_pdf_mustang(pdfOnly, document_path) - self.archive_path = pdfWith - else: + self.is_invoice = False + + header = {"Content-Type": "application/xml"} + url = settings.RECHNUNGLESS_ENDPOINT + httpResponse = httpx.post( + url + "/convert", + headers=header, + data=self.text, + timeout=60.0, + ) + if httpResponse.status_code == httpx.codes.INTERNAL_SERVER_ERROR: + raise ParseError("Server Error: " + str(httpResponse.content)) + if httpResponse.status_code not in ( + httpx.codes.OK, + httpx.codes.UNPROCESSABLE_ENTITY, + ): + raise ParseError( + "Unknown Error: HTTP" + + str(httpResponse.status_code) + + " " + + str(httpResponse.content), + ) + response = json.loads(httpResponse.content) + + if response["result"] == "failed": + message = "Conversion failed: \n" + for msg in response["messages"]: + message += msg + self.log.info(f"Invalid schema: {message}") self.is_invoice = False + return + if httpResponse.status_code == httpx.codes.UNPROCESSABLE_ENTITY: + message = "The XML file is not valid:" + for msg in response["messages"]: + message += "\n" + msg + self.log.info(f"Invalid schema: {message}") + self.is_invoice = False + return + + if response["result"] == "invalid": + contStr = str(httpResponse.content) + self.log.warning(f"The file received is technically invalid: {contStr}") + + self.archive_path = Path(self.tempdir, "invoice.pdf") + self.is_invoice = True + + with self.archive_path.open("wb") as archiveFile: + archiveFile.write(base64.b64decode(response["archive_pdf"])) + + if "issue_date" in response: + self.date = datetime.strptime(response["issue_date"], "%Y%m%d") + if is_naive(self.date): + self.date = make_aware(self.date)