mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Added test for duplicates
This commit is contained in:
parent
2853545b9d
commit
64b72d4337
@ -1,4 +1,5 @@
|
|||||||
import datetime
|
import datetime
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
@ -101,6 +102,14 @@ class Consumer(object):
|
|||||||
if self._is_ready(doc):
|
if self._is_ready(doc):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self._is_duplicate(doc):
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Skipping {} as it appears to be a duplicate".format(doc)
|
||||||
|
)
|
||||||
|
self._ignore.append(doc)
|
||||||
|
continue
|
||||||
|
|
||||||
self.logging_group = uuid.uuid4()
|
self.logging_group = uuid.uuid4()
|
||||||
|
|
||||||
self.log("info", "Consuming {}".format(doc))
|
self.log("info", "Consuming {}".format(doc))
|
||||||
@ -340,6 +349,12 @@ class Consumer(object):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_duplicate(doc):
|
||||||
|
with open(doc, "rb") as f:
|
||||||
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
|
return Document.objects.filter(checksum=checksum).exists()
|
||||||
|
|
||||||
|
|
||||||
def image_to_string(args):
|
def image_to_string(args):
|
||||||
img, lang = args
|
img, lang = args
|
||||||
|
Loading…
x
Reference in New Issue
Block a user