mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-29 11:09:27 -05:00
Just log the info always for the moment
This commit is contained in:
parent
3580c4c899
commit
a5822c212d
@ -169,7 +169,8 @@ class DocumentClassifier:
|
|||||||
tags__is_inbox_tag=True,
|
tags__is_inbox_tag=True,
|
||||||
)
|
)
|
||||||
.select_related("document_type", "correspondent", "storage_path")
|
.select_related("document_type", "correspondent", "storage_path")
|
||||||
.prefetch_related("tags").order_by("pk")
|
.prefetch_related("tags")
|
||||||
|
.order_by("pk")
|
||||||
)
|
)
|
||||||
|
|
||||||
# No documents exit to train against
|
# No documents exit to train against
|
||||||
@ -199,7 +200,11 @@ class DocumentClassifier:
|
|||||||
hasher.update(y.to_bytes(4, "little", signed=True))
|
hasher.update(y.to_bytes(4, "little", signed=True))
|
||||||
labels_correspondent.append(y)
|
labels_correspondent.append(y)
|
||||||
|
|
||||||
tags: list[int] = list(doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO).order_by("pk").values_list("pk", flat=True))
|
tags: list[int] = list(
|
||||||
|
doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO)
|
||||||
|
.order_by("pk")
|
||||||
|
.values_list("pk", flat=True),
|
||||||
|
)
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
hasher.update(tag.to_bytes(4, "little", signed=True))
|
hasher.update(tag.to_bytes(4, "little", signed=True))
|
||||||
labels_tags.append(tags)
|
labels_tags.append(tags)
|
||||||
@ -214,11 +219,22 @@ class DocumentClassifier:
|
|||||||
labels_tags_unique = {tag for tags in labels_tags for tag in tags}
|
labels_tags_unique = {tag for tags in labels_tags for tag in tags}
|
||||||
|
|
||||||
num_tags = len(labels_tags_unique)
|
num_tags = len(labels_tags_unique)
|
||||||
|
latest_doc_change = docs_queryset.latest("modified").modified
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"""Found:
|
||||||
|
Last training: {self.last_doc_change_time}
|
||||||
|
Latest doc change: {latest_doc_change}
|
||||||
|
Last auto type hash: {self.last_auto_type_hash.hex() if self.last_auto_type_hash else None}
|
||||||
|
Current hash: {hasher.digest().hex()}
|
||||||
|
Result: Retraining required
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
# Check if retraining is actually required.
|
# Check if retraining is actually required.
|
||||||
# A document has been updated since the classifier was trained
|
# A document has been updated since the classifier was trained
|
||||||
# New auto tags, types, correspondent, storage paths exist
|
# New auto tags, types, correspondent, storage paths exist
|
||||||
latest_doc_change = docs_queryset.latest("modified").modified
|
|
||||||
if (
|
if (
|
||||||
self.last_doc_change_time is not None
|
self.last_doc_change_time is not None
|
||||||
and self.last_doc_change_time >= latest_doc_change
|
and self.last_doc_change_time >= latest_doc_change
|
||||||
@ -234,15 +250,6 @@ class DocumentClassifier:
|
|||||||
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
|
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
|
||||||
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
|
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
|
||||||
return False
|
return False
|
||||||
logger.debug(
|
|
||||||
f"""Found:
|
|
||||||
Last training: {self.last_doc_change_time}
|
|
||||||
Latest doc change: {latest_doc_change}
|
|
||||||
Last auto type hash: {self.last_auto_type_hash.hex() if self.last_auto_type_hash else None}
|
|
||||||
Current hash: {hasher.digest().hex()}
|
|
||||||
Result: Retraining required
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
# subtract 1 since -1 (null) is also part of the classes.
|
# subtract 1 since -1 (null) is also part of the classes.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user