mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
41 lines
947 B
Python
41 lines
947 B
Python
import ocrmypdf
|
|
from ocrmypdf import ocr, hookimpl
|
|
from ocrmypdf._concurrent import NullProgressBar
|
|
|
|
|
|
def get_unified_progress(self, desc, current, total):
|
|
steps = ["Scanning contents", "OCR", "PDF/A conversion"]
|
|
if desc in steps:
|
|
index = steps.index(desc)
|
|
return (index / len(steps)) + (current / total) / len(steps)
|
|
else:
|
|
return 0
|
|
|
|
|
|
class MyProgressBar:
|
|
|
|
# __enter__, __exit__ and others removed for simplicity
|
|
|
|
def update(self, *args, **kwargs):
|
|
pass
|
|
# i'd need to call MyOcrClass.progress() here.
|
|
|
|
|
|
@hookimpl
|
|
def get_progressbar_class(*args, **kwargs):
|
|
return MyProgressBar
|
|
|
|
|
|
class MyOcrClass:
|
|
|
|
def progress(self, current_p, max_p):
|
|
# send progress over web sockets, *requires* self reference
|
|
pass
|
|
|
|
def run(self):
|
|
ocrmypdf.ocr("test.pdf", "test_out.pdf", skip_text=True, jobs=1, plugins="test")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
MyOcrClass().run()
|