mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Detect and reset invalid ASNs to 0 during indexing with a loud error to the user
This commit is contained in:
parent
a203b006e7
commit
0f536a9b9a
@ -146,11 +146,16 @@ class Consumer(LoggingMixin):
|
||||
return
|
||||
# Validate the range is above zero and less than uint32_t max
|
||||
# otherwise, Whoosh can't handle it in the index
|
||||
if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
|
||||
if (
|
||||
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||
):
|
||||
self._fail(
|
||||
MESSAGE_ASN_RANGE,
|
||||
f"Not consuming {self.filename}: "
|
||||
f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
|
||||
f"Given ASN {self.override_asn} is out of range "
|
||||
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
|
||||
)
|
||||
if Document.objects.filter(archive_serial_number=self.override_asn).exists():
|
||||
self._fail(
|
||||
|
@ -90,10 +90,22 @@ def open_index_searcher():
|
||||
searcher.close()
|
||||
|
||||
|
||||
def update_document(writer, doc):
|
||||
def update_document(writer: AsyncWriter, doc: Document):
|
||||
tags = ",".join([t.name for t in doc.tags.all()])
|
||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||
comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
|
||||
asn = doc.archive_serial_number
|
||||
if asn is not None and (
|
||||
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||
):
|
||||
logger.error(
|
||||
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
|
||||
f"ASN is out of range "
|
||||
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
|
||||
)
|
||||
asn = 0
|
||||
writer.update_document(
|
||||
id=doc.pk,
|
||||
title=doc.title,
|
||||
@ -109,7 +121,7 @@ def update_document(writer, doc):
|
||||
has_type=doc.document_type is not None,
|
||||
created=doc.created,
|
||||
added=doc.added,
|
||||
asn=doc.archive_serial_number,
|
||||
asn=asn,
|
||||
modified=doc.modified,
|
||||
path=doc.storage_path.name if doc.storage_path else None,
|
||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||
|
@ -3,6 +3,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
|
||||
import dateutil.parser
|
||||
@ -229,6 +230,9 @@ class Document(models.Model):
|
||||
help_text=_("The original name of the file when it was uploaded"),
|
||||
)
|
||||
|
||||
ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
|
||||
ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
|
||||
|
||||
archive_serial_number = models.PositiveIntegerField(
|
||||
_("archive serial number"),
|
||||
blank=True,
|
||||
@ -236,8 +240,8 @@ class Document(models.Model):
|
||||
unique=True,
|
||||
db_index=True,
|
||||
validators=[
|
||||
MaxValueValidator(0xFF_FF_FF_FF),
|
||||
MinValueValidator(0),
|
||||
MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
|
||||
MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
|
||||
],
|
||||
help_text=_(
|
||||
"The position of this document in your physical document " "archive.",
|
||||
|
@ -1,3 +1,5 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from documents import index
|
||||
from documents.models import Document
|
||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
)
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||
|
||||
def test_archive_serial_number_ranging(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with an archive serial number above schema allowed size
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- Error is logged
|
||||
- Document ASN is reset to 0 for the index
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
# yes, this is allowed, unless full_clean is run
|
||||
# DRF does call the validators, this test won't
|
||||
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
||||
)
|
||||
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertEqual(kwargs["asn"], 0)
|
||||
|
||||
error_str = cm.output[0]
|
||||
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
def test_archive_serial_number_is_none(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with no archive serial number
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- ASN isn't touched
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
)
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertIsNone(kwargs["asn"])
|
||||
|
Loading…
x
Reference in New Issue
Block a user