mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Detect and reset invalid ASNs to 0 during indexing with a loud error to the user
This commit is contained in:
parent
a203b006e7
commit
0f536a9b9a
@ -146,11 +146,16 @@ class Consumer(LoggingMixin):
|
|||||||
return
|
return
|
||||||
# Validate the range is above zero and less than uint32_t max
|
# Validate the range is above zero and less than uint32_t max
|
||||||
# otherwise, Whoosh can't handle it in the index
|
# otherwise, Whoosh can't handle it in the index
|
||||||
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
|
if (
|
||||||
|
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||||
|
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||||
|
):
|
||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_ASN_RANGE,
|
MESSAGE_ASN_RANGE,
|
||||||
f"Not consuming {self.filename}: "
|
f"Not consuming {self.filename}: "
|
||||||
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
|
f"Given ASN {self.override_asn} is out of range "
|
||||||
|
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||||
|
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
|
||||||
)
|
)
|
||||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||||
self._fail(
|
self._fail(
|
||||||
|
@ -90,10 +90,22 @@ def open_index_searcher():
|
|||||||
searcher.close()
|
searcher.close()
|
||||||
|
|
||||||
|
|
||||||
def update_document(writer, doc):
|
def update_document(writer: AsyncWriter, doc: Document):
|
||||||
tags = ",".join([t.name for t in doc.tags.all()])
|
tags = ",".join([t.name for t in doc.tags.all()])
|
||||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||||
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
||||||
|
asn = doc.archive_serial_number
|
||||||
|
if asn is not None and (
|
||||||
|
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||||
|
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||||
|
):
|
||||||
|
logger.error(
|
||||||
|
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
|
||||||
|
f"ASN is out of range "
|
||||||
|
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||||
|
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
|
||||||
|
)
|
||||||
|
asn = 0
|
||||||
writer.update_document(
|
writer.update_document(
|
||||||
id=doc.pk,
|
id=doc.pk,
|
||||||
title=doc.title,
|
title=doc.title,
|
||||||
@ -109,7 +121,7 @@ def update_document(writer, doc):
|
|||||||
has_type=doc.document_type is not None,
|
has_type=doc.document_type is not None,
|
||||||
created=doc.created,
|
created=doc.created,
|
||||||
added=doc.added,
|
added=doc.added,
|
||||||
asn=doc.archive_serial_number,
|
asn=asn,
|
||||||
modified=doc.modified,
|
modified=doc.modified,
|
||||||
path=doc.storage_path.name if doc.storage_path else None,
|
path=doc.storage_path.name if doc.storage_path else None,
|
||||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||||
|
@ -3,6 +3,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from typing import Final
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
@ -229,6 +230,9 @@ class Document(models.Model):
|
|||||||
help_text=_("The original name of the file when it was uploaded"),
|
help_text=_("The original name of the file when it was uploaded"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
|
||||||
|
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
|
||||||
|
|
||||||
archive_serial_number = models.PositiveIntegerField(
|
archive_serial_number = models.PositiveIntegerField(
|
||||||
_("archive serial number"),
|
_("archive serial number"),
|
||||||
blank=True,
|
blank=True,
|
||||||
@ -236,8 +240,8 @@ class Document(models.Model):
|
|||||||
unique=True,
|
unique=True,
|
||||||
db_index=True,
|
db_index=True,
|
||||||
validators=[
|
validators=[
|
||||||
MaxValueValidator(0xFF_FF_FF_FF),
|
MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
|
||||||
MinValueValidator(0),
|
MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
|
||||||
],
|
],
|
||||||
help_text=_(
|
help_text=_(
|
||||||
"The position of this document in your physical document " "archive.",
|
"The position of this document in your physical document " "archive.",
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from unittest import mock
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
|||||||
)
|
)
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||||
|
|
||||||
|
def test_archive_serial_number_ranging(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document with an archive serial number above schema allowed size
|
||||||
|
WHEN:
|
||||||
|
- Document is provided to the index
|
||||||
|
THEN:
|
||||||
|
- Error is logged
|
||||||
|
- Document ASN is reset to 0 for the index
|
||||||
|
"""
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
checksum="A",
|
||||||
|
content="test test2 test3",
|
||||||
|
# yes, this is allowed, unless full_clean is run
|
||||||
|
# DRF does call the validators, this test won't
|
||||||
|
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
||||||
|
)
|
||||||
|
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
||||||
|
with mock.patch(
|
||||||
|
"documents.index.AsyncWriter.update_document",
|
||||||
|
) as mocked_update_doc:
|
||||||
|
index.add_or_update_document(doc1)
|
||||||
|
|
||||||
|
mocked_update_doc.assert_called_once()
|
||||||
|
_, kwargs = mocked_update_doc.call_args
|
||||||
|
|
||||||
|
self.assertEqual(kwargs["asn"], 0)
|
||||||
|
|
||||||
|
error_str = cm.output[0]
|
||||||
|
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
||||||
|
self.assertIn(expected_str, error_str)
|
||||||
|
|
||||||
|
def test_archive_serial_number_is_none(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Document with no archive serial number
|
||||||
|
WHEN:
|
||||||
|
- Document is provided to the index
|
||||||
|
THEN:
|
||||||
|
- ASN isn't touched
|
||||||
|
"""
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
checksum="A",
|
||||||
|
content="test test2 test3",
|
||||||
|
)
|
||||||
|
with mock.patch(
|
||||||
|
"documents.index.AsyncWriter.update_document",
|
||||||
|
) as mocked_update_doc:
|
||||||
|
index.add_or_update_document(doc1)
|
||||||
|
|
||||||
|
mocked_update_doc.assert_called_once()
|
||||||
|
_, kwargs = mocked_update_doc.call_args
|
||||||
|
|
||||||
|
self.assertIsNone(kwargs["asn"])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user