mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Added test for duplicates
This commit is contained in:
parent
2853545b9d
commit
64b72d4337
@ -1,4 +1,5 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import tempfile
|
||||
import uuid
|
||||
@ -101,6 +102,14 @@ class Consumer(object):
|
||||
if self._is_ready(doc):
|
||||
continue
|
||||
|
||||
if self._is_duplicate(doc):
|
||||
self.log(
|
||||
"info",
|
||||
"Skipping {} as it appears to be a duplicate".format(doc)
|
||||
)
|
||||
self._ignore.append(doc)
|
||||
continue
|
||||
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
self.log("info", "Consuming {}".format(doc))
|
||||
@ -340,6 +349,12 @@ class Consumer(object):
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _is_duplicate(doc):
|
||||
with open(doc, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
return Document.objects.filter(checksum=checksum).exists()
|
||||
|
||||
|
||||
def image_to_string(args):
|
||||
img, lang = args
|
||||
|
Loading…
x
Reference in New Issue
Block a user