diff --git a/.docker-hub-test b/.docker-hub-test deleted file mode 100644 index 6c2dff5ab..000000000 --- a/.docker-hub-test +++ /dev/null @@ -1 +0,0 @@ -Docker Hub test 2 diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 8863fa346..000000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -THANKS.md merge=union diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 111e5bf91..000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,68 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# ******** NOTE ******** - -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] - schedule: - - cron: '42 3 * * 1' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - language: [ 'javascript', 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] - # Learn more... - # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # ℹ️ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index f19c0578b..871a7bd08 100644 --- a/.gitignore +++ b/.gitignore @@ -84,4 +84,6 @@ scripts/nuke /data/index /paperless.conf -/consumption/ +/consume +/export +/src-ui/.vscode diff --git a/Dockerfile b/Dockerfile index 2a689c98e..85eadc21e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,7 +24,8 @@ COPY Pipfile* ./ #Dependencies RUN apt-get update \ - && apt-get -y --no-install-recommends install \ + && apt-get -y --no-install-recommends install \ + anacron \ build-essential \ curl \ ghostscript \ @@ -43,30 +44,38 @@ RUN apt-get update \ tesseract-ocr-spa \ tzdata \ unpaper \ - && pip install --upgrade pipenv \ + && pip install --upgrade pipenv supervisor \ && pipenv install --system --deploy \ && pipenv --clear \ && apt-get -y purge build-essential \ && apt-get -y autoremove --purge \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && mkdir /var/log/supervisord /var/run/supervisord -# # Copy application +# copy scripts +# this fixes issues with imagemagick and PDF +COPY scripts/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml COPY scripts/gunicorn.conf.py ./ +COPY scripts/supervisord.conf /etc/supervisord.conf +COPY scripts/paperless-cron /etc/cron.daily/ +COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh + +# copy app COPY src/ ./src/ COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/ -RUN addgroup --gid 1000 paperless && \ - useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless && \ - chown -R paperless:paperless . +# add users, setup scripts +RUN addgroup --gid 1000 paperless \ + && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ + && chown -R paperless:paperless . \ + && chmod 755 /sbin/docker-entrypoint.sh \ + && chmod +x /etc/cron.daily/paperless-cron \ + && rm /etc/cron.daily/apt-compat /etc/cron.daily/dpkg WORKDIR /usr/src/paperless/src/ RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/consume", "/usr/src/paperless/export"] - -COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh -RUN chmod 755 /sbin/docker-entrypoint.sh ENTRYPOINT ["/sbin/docker-entrypoint.sh"] - -CMD ["--help"] +CMD ["python3", "manage.py", "--help"] diff --git a/Pipfile b/Pipfile index f9c23beb9..e8f862578 100644 --- a/Pipfile +++ b/Pipfile @@ -4,29 +4,28 @@ verify_ssl = true name = "pypi" [packages] -django = "*" +django = "~=3.1" pillow = "*" -dateparser = "*" +dateparser = "~=0.7" django-cors-headers = "*" -djangorestframework = "*" -inotify-simple = "*" +djangorestframework = "~=3.12" python-gnupg = "*" python-dotenv = "*" filemagic = "*" -pyocr = "*" +pyocr = "~=0.7" langdetect = "*" pdftotext = "*" -django-filter = "*" +django-filter = "~=2.4" python-dateutil = "*" psycopg2-binary = "*" -scikit-learn="*" -whoosh="*" +scikit-learn="~=0.23" +whoosh="~=2.7" gunicorn = "*" whitenoise = "*" fuzzywuzzy = "*" python-Levenshtein = "*" - -django-extensions = "*" +django-extensions = "" +watchdog = "*" [dev-packages] coveralls = "*" diff --git a/Pipfile.lock b/Pipfile.lock index b66f886e7..8b3bf705a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "48343a032c1becd5f1a3ae46c2ade70c14c251591c5f9cb49dd2cab26b0e0bea" + "sha256": "2c1558fe7df0aee1ee20b095c2102f802470bf4a4ae09a7749ac487f8bfab8b6" }, "pipfile-spec": 6, "requires": {}, @@ -52,6 +52,7 @@ "sha256:dc663652ac9460fd06580a973576820430c6d428720e874ae46b041fa63e0efa" ], "index": "pypi", + "markers": "python_version >= '3.5'", "version": "==3.0.9" }, "django-filter": { @@ -93,13 +94,6 @@ "index": "pypi", "version": "==20.0.4" }, - "inotify-simple": { - "hashes": [ - "sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128" - ], - "index": "pypi", - "version": "==1.3.5" - }, "joblib": { "hashes": [ "sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72", @@ -118,35 +112,49 @@ }, "numpy": { "hashes": [ - "sha256:04c7d4ebc5ff93d9822075ddb1751ff392a4375e5885299445fcebf877f179d5", - "sha256:0bfd85053d1e9f60234f28f63d4a5147ada7f432943c113a11afcf3e65d9d4c8", - "sha256:0c66da1d202c52051625e55a249da35b31f65a81cb56e4c69af0dfb8fb0125bf", - "sha256:0d310730e1e793527065ad7dde736197b705d0e4c9999775f212b03c44a8484c", - "sha256:1669ec8e42f169ff715a904c9b2105b6640f3f2a4c4c2cb4920ae8b2785dac65", - "sha256:2117536e968abb7357d34d754e3733b0d7113d4c9f1d921f21a3d96dec5ff716", - "sha256:3733640466733441295b0d6d3dcbf8e1ffa7e897d4d82903169529fd3386919a", - "sha256:4339741994c775396e1a274dba3609c69ab0f16056c1077f18979bec2a2c2e6e", - "sha256:51ee93e1fac3fe08ef54ff1c7f329db64d8a9c5557e6c8e908be9497ac76374b", - "sha256:54045b198aebf41bf6bf4088012777c1d11703bf74461d70cd350c0af2182e45", - "sha256:58d66a6b3b55178a1f8a5fe98df26ace76260a70de694d99577ddeab7eaa9a9d", - "sha256:59f3d687faea7a4f7f93bd9665e5b102f32f3fa28514f15b126f099b7997203d", - "sha256:62139af94728d22350a571b7c82795b9d59be77fc162414ada6c8b6a10ef5d02", - "sha256:7118f0a9f2f617f921ec7d278d981244ba83c85eea197be7c5a4f84af80a9c3c", - "sha256:7c6646314291d8f5ea900a7ea9c4261f834b5b62159ba2abe3836f4fa6705526", - "sha256:967c92435f0b3ba37a4257c48b8715b76741410467e2bdb1097e8391fccfae15", - "sha256:9a3001248b9231ed73894c773142658bab914645261275f675d86c290c37f66d", - "sha256:aba1d5daf1144b956bc87ffb87966791f5e9f3e1f6fab3d7f581db1f5b598f7a", - "sha256:addaa551b298052c16885fc70408d3848d4e2e7352de4e7a1e13e691abc734c1", - "sha256:b594f76771bc7fc8a044c5ba303427ee67c17a09b36e1fa32bde82f5c419d17a", - "sha256:c35a01777f81e7333bcf276b605f39c872e28295441c265cd0c860f4b40148c1", - "sha256:cebd4f4e64cfe87f2039e4725781f6326a61f095bc77b3716502bed812b385a9", - "sha256:d526fa58ae4aead839161535d59ea9565863bb0b0bdb3cc63214613fb16aced4", - "sha256:d7ac33585e1f09e7345aa902c281bd777fdb792432d27fca857f39b70e5dd31c", - "sha256:e6ddbdc5113628f15de7e4911c02aed74a4ccff531842c583e5032f6e5a179bd", - "sha256:eb25c381d168daf351147713f49c626030dcff7a393d5caa62515d415a6071d8" + "sha256:0ee77786eebbfa37f2141fd106b549d37c89207a0d01d8852fde1c82e9bfc0e7", + "sha256:199bebc296bd8a5fc31c16f256ac873dd4d5b4928dfd50e6c4995570fc71a8f3", + "sha256:1a307bdd3dd444b1d0daa356b5f4c7de2e24d63bdc33ea13ff718b8ec4c6a268", + "sha256:1ea7e859f16e72ab81ef20aae69216cfea870676347510da9244805ff9670170", + "sha256:271139653e8b7a046d11a78c0d33bafbddd5c443a5b9119618d0652a4eb3a09f", + "sha256:35bf5316af8dc7c7db1ad45bec603e5fb28671beb98ebd1d65e8059efcfd3b72", + "sha256:463792a249a81b9eb2b63676347f996d3f0082c2666fd0604f4180d2e5445996", + "sha256:50d3513469acf5b2c0406e822d3f314d7ac5788c2b438c24e5dd54d5a81ef522", + "sha256:50f68ebc439821b826823a8da6caa79cd080dee2a6d5ab9f1163465a060495ed", + "sha256:51e8d2ae7c7e985c7bebf218e56f72fa93c900ad0c8a7d9fbbbf362f45710f69", + "sha256:522053b731e11329dd52d258ddf7de5288cae7418b55e4b7d32f0b7e31787e9d", + "sha256:5ea4401ada0d3988c263df85feb33818dc995abc85b8125f6ccb762009e7bc68", + "sha256:604d2e5a31482a3ad2c88206efd43d6fcf666ada1f3188fd779b4917e49b7a98", + "sha256:6ff88bcf1872b79002569c63fe26cd2cda614e573c553c4d5b814fb5eb3d2822", + "sha256:7197ee0a25629ed782c7bd01871ee40702ffeef35bc48004bc2fdcc71e29ba9d", + "sha256:741d95eb2b505bb7a99fbf4be05fa69f466e240c2b4f2d3ddead4f1b5f82a5a5", + "sha256:83af653bb92d1e248ccf5fdb05ccc934c14b936bcfe9b917dc180d3f00250ac6", + "sha256:8802d23e4895e0c65e418abe67cdf518aa5cbb976d97f42fd591f921d6dffad0", + "sha256:8edc4d687a74d0a5f8b9b26532e860f4f85f56c400b3a98899fc44acb5e27add", + "sha256:942d2cdcb362739908c26ce8dd88db6e139d3fa829dd7452dd9ff02cba6b58b2", + "sha256:9a0669787ba8c9d3bb5de5d9429208882fb47764aa79123af25c5edc4f5966b9", + "sha256:9d08d84bb4128abb9fbd9f073e5c69f70e5dab991a9c42e5b4081ea5b01b5db0", + "sha256:9f7f56b5e85b08774939622b7d45a5d00ff511466522c44fc0756ac7692c00f2", + "sha256:a2daea1cba83210c620e359de2861316f49cc7aea8e9a6979d6cb2ddab6dda8c", + "sha256:b9074d062d30c2779d8af587924f178a539edde5285d961d2dfbecbac9c4c931", + "sha256:c4aa79993f5d856765819a3651117520e41ac3f89c3fc1cb6dee11aa562df6da", + "sha256:d78294f1c20f366cde8a75167f822538a7252b6e8b9d6dbfb3bdab34e7c1929e", + "sha256:dfdc8b53aa9838b9d44ed785431ca47aa3efaa51d0d5dd9c412ab5247151a7c4", + "sha256:dffed17848e8b968d8d3692604e61881aa6ef1f8074c99e81647ac84f6038535", + "sha256:e080087148fd70469aade2abfeadee194357defd759f9b59b349c6192aba994c", + "sha256:e983cbabe10a8989333684c98fdc5dd2f28b236216981e0c26ed359aaa676772", + "sha256:ea6171d2d8d648dee717457d0f75db49ad8c2f13100680e284d7becf3dc311a6", + "sha256:eefc13863bf01583a85e8c1121a901cc7cb8f059b960c4eba30901e2e6aba95f", + "sha256:efd656893171bbf1331beca4ec9f2e74358fc732a2084f664fd149cc4b3441d2" ], "markers": "python_version >= '3.6'", - "version": "==1.19.2" + "version": "==1.19.3" + }, + "pathtools": { + "hashes": [ + "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" + ], + "version": "==0.1.2" }, "pdftotext": { "hashes": [ @@ -245,11 +253,11 @@ }, "python-dotenv": { "hashes": [ - "sha256:8c10c99a1b25d9a68058a1ad6f90381a62ba68230ca93966882a4dbc3bc9c33d", - "sha256:c10863aee750ad720f4f43436565e4c1698798d763b63234fb5021b6c616e423" + "sha256:0c8d1b80d1a1e91717ea7d526178e3882732420b03f08afea0406db6402e220e", + "sha256:587825ed60b1711daea4832cf37524dfd404325b7db5e25ebe88c495c9f807a0" ], "index": "pypi", - "version": "==0.14.0" + "version": "==0.15.0" }, "python-gnupg": { "hashes": [ @@ -275,35 +283,35 @@ }, "regex": { "hashes": [ - "sha256:0cb23ed0e327c18fb7eac61ebbb3180ebafed5b9b86ca2e15438201e5903b5dd", - "sha256:1a065e7a6a1b4aa851a0efa1a2579eabc765246b8b3a5fd74000aaa3134b8b4e", - "sha256:1a511470db3aa97432ac8c1bf014fcc6c9fbfd0f4b1313024d342549cf86bcd6", - "sha256:1c447b0d108cddc69036b1b3910fac159f2b51fdeec7f13872e059b7bc932be1", - "sha256:2278453c6a76280b38855a263198961938108ea2333ee145c5168c36b8e2b376", - "sha256:240509721a663836b611fa13ca1843079fc52d0b91ef3f92d9bba8da12e768a0", - "sha256:4e21340c07090ddc8c16deebfd82eb9c9e1ec5e62f57bb86194a2595fd7b46e0", - "sha256:570e916a44a361d4e85f355aacd90e9113319c78ce3c2d098d2ddf9631b34505", - "sha256:59d5c6302d22c16d59611a9fd53556554010db1d47e9df5df37be05007bebe75", - "sha256:6a46eba253cedcbe8a6469f881f014f0a98819d99d341461630885139850e281", - "sha256:6f567df0601e9c7434958143aebea47a9c4b45434ea0ae0286a4ec19e9877169", - "sha256:781906e45ef1d10a0ed9ec8ab83a09b5e0d742de70e627b20d61ccb1b1d3964d", - "sha256:8469377a437dbc31e480993399fd1fd15fe26f382dc04c51c9cb73e42965cc06", - "sha256:8cd0d587aaac74194ad3e68029124c06245acaeddaae14cb45844e5c9bebeea4", - "sha256:97a023f97cddf00831ba04886d1596ef10f59b93df7f855856f037190936e868", - "sha256:a973d5a7a324e2a5230ad7c43f5e1383cac51ef4903bf274936a5634b724b531", - "sha256:af360e62a9790e0a96bc9ac845d87bfa0e4ee0ee68547ae8b5a9c1030517dbef", - "sha256:b706c70070eea03411b1761fff3a2675da28d042a1ab7d0863b3efe1faa125c9", - "sha256:bfd7a9fddd11d116a58b62ee6c502fd24cfe22a4792261f258f886aa41c2a899", - "sha256:c30d8766a055c22e39dd7e1a4f98f6266169f2de05db737efe509c2fb9c8a3c8", - "sha256:c53dc8ee3bb7b7e28ee9feb996a0c999137be6c1d3b02cb6b3c4cba4f9e5ed09", - "sha256:c95d514093b80e5309bdca5dd99e51bcf82c44043b57c34594d9d7556bd04d05", - "sha256:d43cf21df524283daa80ecad551c306b7f52881c8d0fe4e3e76a96b626b6d8d8", - "sha256:d62205f00f461fe8b24ade07499454a3b7adf3def1225e258b994e2215fd15c5", - "sha256:e289a857dca3b35d3615c3a6a438622e20d1bf0abcb82c57d866c8d0be3f44c4", - "sha256:e5f6aa56dda92472e9d6f7b1e6331f4e2d51a67caafff4d4c5121cadac03941e", - "sha256:f4b1c65ee86bfbf7d0c3dfd90592a9e3d6e9ecd36c367c884094c050d4c35d04" + "sha256:03855ee22980c3e4863dc84c42d6d2901133362db5daf4c36b710dd895d78f0a", + "sha256:06b52815d4ad38d6524666e0d50fe9173533c9cc145a5779b89733284e6f688f", + "sha256:11116d424734fe356d8777f89d625f0df783251ada95d6261b4c36ad27a394bb", + "sha256:119e0355dbdd4cf593b17f2fc5dbd4aec2b8899d0057e4957ba92f941f704bf5", + "sha256:1ec66700a10e3c75f1f92cbde36cca0d3aaee4c73dfa26699495a3a30b09093c", + "sha256:2dc522e25e57e88b4980d2bdd334825dbf6fa55f28a922fc3bfa60cc09e5ef53", + "sha256:3a5f08039eee9ea195a89e180c5762bfb55258bfb9abb61a20d3abee3b37fd12", + "sha256:49461446b783945597c4076aea3f49aee4b4ce922bd241e4fcf62a3e7c61794c", + "sha256:4afa350f162551cf402bfa3cd8302165c8e03e689c897d185f16a167328cc6dd", + "sha256:4b5a9bcb56cc146c3932c648603b24514447eafa6ce9295234767bf92f69b504", + "sha256:625116aca6c4b57c56ea3d70369cacc4d62fead4930f8329d242e4fe7a58ce4b", + "sha256:654c1635f2313d0843028487db2191530bca45af61ca85d0b16555c399625b0e", + "sha256:8092a5a06ad9a7a247f2a76ace121183dc4e1a84c259cf9c2ce3bbb69fac3582", + "sha256:832339223b9ce56b7b15168e691ae654d345ac1635eeb367ade9ecfe0e66bee0", + "sha256:8ca9dca965bd86ea3631b975d63b0693566d3cc347e55786d5514988b6f5b84c", + "sha256:a62162be05edf64f819925ea88d09d18b09bebf20971b363ce0c24e8b4aa14c0", + "sha256:b88fa3b8a3469f22b4f13d045d9bd3eda797aa4e406fde0a2644bc92bbdd4bdd", + "sha256:c13d311a4c4a8d671f5860317eb5f09591fbe8259676b86a85769423b544451e", + "sha256:c2c6c56ee97485a127555c9595c069201b5161de9d05495fbe2132b5ac104786", + "sha256:c3466a84fce42c2016113101018a9981804097bacbab029c2d5b4fcb224b89de", + "sha256:c8a2b7ccff330ae4c460aff36626f911f918555660cc28163417cb84ffb25789", + "sha256:cb905f3d2e290a8b8f1579d3984f2cfa7c3a29cc7cba608540ceeed18513f520", + "sha256:cfcf28ed4ce9ced47b9b9670a4f0d3d3c0e4d4779ad4dadb1ad468b097f808aa", + "sha256:dd3e6547ecf842a29cf25123fbf8d2461c53c8d37aa20d87ecee130c89b7079b", + "sha256:ea37320877d56a7f0a1e6a625d892cf963aa7f570013499f5b8d5ab8402b5625", + "sha256:f1fce1e4929157b2afeb4bb7069204d4370bab9f4fc03ca1fbec8bd601f8c87d", + "sha256:f43109822df2d3faac7aad79613f5f02e4eab0fc8ad7932d2e70e2a83bd49c26" ], - "version": "==2020.10.23" + "version": "==2020.10.28" }, "scikit-learn": { "hashes": [ @@ -383,6 +391,13 @@ ], "version": "==2.1" }, + "watchdog": { + "hashes": [ + "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04" + ], + "index": "pypi", + "version": "==0.10.3" + }, "whitenoise": { "hashes": [ "sha256:05ce0be39ad85740a78750c86a93485c40f08ad8c62a6006de0233765996e5c7", @@ -674,11 +689,11 @@ }, "pytest": { "hashes": [ - "sha256:7a8190790c17d79a11f847fba0b004ee9a8122582ebff4729a082c109e81a4c9", - "sha256:8f593023c1a0f916110285b6efd7f99db07d59546e3d8c36fc60e2ab05d3be92" + "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", + "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" ], "index": "pypi", - "version": "==6.1.1" + "version": "==6.1.2" }, "pytest-cov": { "hashes": [ @@ -835,10 +850,11 @@ }, "toml": { "hashes": [ - "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", - "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "version": "==0.10.1" + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.2" }, "tox": { "hashes": [ diff --git a/README.md b/README.md index b5755d80f..6003090fa 100644 --- a/README.md +++ b/README.md @@ -25,9 +25,11 @@ Here's what you get: This is a list of changes that have been made to the original project. ## Added -- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. -- **Document uploading on the web page.** This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it. -- **Full text search** with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future. +- **A new single page UI** built with bootstrap and Angular. Its much more responsive than the django admin pages. It features the follwing improvements over the old django admin interface: + - *Document uploading on the web page.* This is very crude right now, but gets the job done. It simply uploads the documents and stores them in the configured consumer directory. The API for that has always been in the project, there simply was no form on the UI to support it. + - *Full text search* with a proper document indexer: The search feature sorts documents by relevance to the search query, highlights query terms in the found documents and provides autocomplete while typing the query. This is still very basic but will see extensions in the future. + - *Saveable filters.* Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board. + - *Statistics.* Provides basic statistics about your document collection. - **Document types.** Similar to correspondents, each document may have a type (i.e., invoice, letter, receipt, bank statement, ...). I've initially intented to use this for some individual processing of differently typed documents, however, no such features exists yet. - **Inbox tags.** These tags are automatically assigned to every newly scanned document. They are intented to be removed once you have manually edited the meta data of a document. - **Automatic matching** for document types, correspondents, and tags. A new matching algorithm has been implemented (Auto), which is based on a classification model (simple feed forward neural nets are used). This classifier is trained on your document collection and learns to assign metadata to new documents based on their similiarity to existing documents. @@ -36,7 +38,11 @@ This is a list of changes that have been made to the original project. - **Archive serial numbers.** These are there to support the recommended workflow for storing physical copies of very important documents. The idea is that if a document has to be kept in physical form, you write a running number on the document before scanning (the archive serial number) and keep these documents sorted by number in a binder. If you need to access a specific physical document at some point in time, search for the document in paperless, identify the ASN and grab the document. ## Modified -- **(BREAKING) REST API changes.** In order to support the new UI, changes had to be made to the API. Some filters are not available anymore, other filters were added. Furthermore, foreign key relationships are not expressed with URLs anymore, but with their respective ids. Also, the old urls for fetching documents and thumbnails are not valid anymore. These resources are now served through the api. +- **(BREAKING) REST API changes.** In order to support the new UI, changes had to be made to the API. Some filters are not available anymore, other filters were added. Furthermore, foreign key relationships are not expressed with URLs anymore, but with their respective ids. Also, the urls for fetching documents and thumbnails have changed. Redirects are in place to support the old urls. + +## Internal changes +- Many improvements to the code. More concise logging of the consumer, better multithreading of the tesseract parser for large documents, less hacks overall. +- Updated docker image. This image runs everything in a single container. (Except the optional database, of course) ## Removed @@ -50,8 +56,7 @@ These features were removed each due to two reasons. First, I did not feel these These features will make it into the application at some point, sorted by priority. -- **Saveable filters.** Save filter and sorting presets and optionally display a couple documents of saved filters (i.e., your inbox sorted descending by added date, or tagged TODO, oldest to newest) on the dash board. -- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer. +- **Better tag editor.** The tag editor on the document detail page is not very convenient. This was put in there to get the project working but will be replaced with something nicer eventually. - **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like: - Group and limit search results by correspondent, show “more from this” links in the results. - Ability to search for “Similar documents” in the search results diff --git a/docker-compose.env.example b/docker-compose.env.example index d5339db1f..cc2a1d3ec 100644 --- a/docker-compose.env.example +++ b/docker-compose.env.example @@ -1,35 +1,43 @@ -PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2" +# Database settings for paperless +# If you want to use sqlite instead, remove this setting. PAPERLESS_DBHOST="db" -PAPERLESS_DBNAME="paperless" -PAPERLESS_DBUSER="paperless" -PAPERLESS_DBPASS="paperless" - -PAPERLESS_CONSUMPTION_DIR="../consume" - -# Environment variables to set for Paperless -# Commented out variables will be replaced with a default within Paperless. -# -# In addition to what you see here, you can also define any values you find in -# paperless.conf.example here. Values like: -# -# * PAPERLESS_PASSPHRASE -# * PAPERLESS_CONSUME_MAIL_HOST -# -# ...are all explained in that file but can be defined here, since the Docker -# installation doesn't make use of paperless.conf. - -# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC. -#TZ=America/Los_Angeles - -# Additional languages to install for text recognition. Note that this is -# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the -# default language used when guessing the language from the OCR output. -# The container installs English, German, Italian, Spanish and French by -# default. -#PAPERLESS_OCR_LANGUAGES=deu ita spa fra # The UID and GID of the user used to run paperless in the container. Set this # to your UID and GID on the host so that you have write access to the # consumption directory. #USERMAP_UID=1000 #USERMAP_GID=1000 + +# Additional languages to install for text recognition, separated by a +# whitespace. Note that this is +# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the +# default language used when guessing the language from the OCR output. +# The container installs English, German, Italian, Spanish and French by +# default. +# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster +# for available languages. +#PAPERLESS_OCR_LANGUAGES=tur ces + +############################################################################### +# Paperless-specific settings # +############################################################################### + +# All settings defined in the paperless.conf.example can be used here. The +# Docker setup does not use the configuration file. +# A few commonly adjusted settings are provided below. + +# Adjust this key if you plan to make paperless available publicly. It should +# be a very long sequence of random characters. You don't need to remember it. +#PAPERLESS_SECRET_KEY="change-me" + +# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC. +#PAPERLESS_TIME_ZONE=America/Los_Angeles + +# The default language to use for OCR. Set this to the language most of your +# documents are written in. +#PAPERLESS_OCR_LANGUAGE="eng" + +# By default Paperless does not OCR a document if the text can be retrieved from +# the document directly. Set to true to always OCR documents. (i.e., if you +# know that some of your documents have faulty/bad OCR data) +#PAPERLESS_OCR_ALWAYS="true" diff --git a/docker-compose.yml.example b/docker-compose.yml.example index 5c5123fd8..f6312ff50 100644 --- a/docker-compose.yml.example +++ b/docker-compose.yml.example @@ -1,4 +1,4 @@ -version: "3.8" +version: "3.4" services: db: image: postgres:13 @@ -27,23 +27,10 @@ services: - data:/usr/src/paperless/data - media:/usr/src/paperless/media - ./export:/usr/src/paperless/export - env_file: docker-compose.env - environment: - - PAPERLESS_OCR_LANGUAGES= - command: ["gunicorn", "-b", "0.0.0.0:8000"] - - consumer: - image: paperless_app - depends_on: - - webserver - - db - restart: on-failure:5 - volumes: - - data:/usr/src/paperless/data - - media:/usr/src/paperless/media - ./consume:/usr/src/paperless/consume env_file: docker-compose.env - command: ["document_consumer"] + command: ["supervisord", "-c", "/etc/supervisord.conf"] + volumes: data: diff --git a/docs/conf.py b/docs/conf.py index 7cf8c9fe1..eb6720dbb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys -import os - __version__ = None exec(open("../src/paperless/version.py").read()) diff --git a/paperless.conf.example b/paperless.conf.example index 2bb24cee4..41ac778e7 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -10,7 +10,14 @@ # By default, sqlite is used as the database backend. This can be changed here. # The docker-compose service definition uses a postgresql server. The # configuration for this is already done inside the docker-compose.env file. -#PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2" + +#Set PAPERLESS_DBHOST and postgresql will be used instead of mysql. +#PAPERLESS_DBHOST="localhost" + +#Adjust port if necessary +#PAPERLESS_DBPORT= + +#name, user and pass all default to "paperless" #PAPERLESS_DBNAME="paperless" #PAPERLESS_DBUSER="paperless" #PAPERLESS_DBPASS="paperless" @@ -23,7 +30,7 @@ # This where your documents should go to be consumed. Make sure that it exists # and that the user running the paperless service can read/write its contents # before you start Paperless. -#PAPERLESS_CONSUMPTION_DIR="" +PAPERLESS_CONSUMPTION_DIR="../consume" # This is where paperless stores all its data (search index, sqlite database, # classification model, etc). @@ -165,7 +172,10 @@ # Customize the default language that tesseract will attempt to use when -# parsing documents. It should be a 3-letter language code consistent with ISO +# parsing documents. The default language is used whenever +# - No language could be detected on a document +# - No tesseract data files are available for the detected language +# It should be a 3-letter language code consistent with ISO # 639: https://www.loc.gov/standards/iso639-2/php/code_list.php #PAPERLESS_OCR_LANGUAGE=eng @@ -203,21 +213,6 @@ # with little impact to OCR accuracy. #PAPERLESS_CONVERT_DENSITY=300 - -# (This setting is ignored on Linux where inotify is used instead of a -# polling loop.) -# The number of seconds that Paperless will wait between checking -# PAPERLESS_CONSUMPTION_DIR. If you tend to write documents to this directory -# rarely, you may want to use a higher value than the default (10). -#PAPERLESS_CONSUMER_LOOP_TIME=10 - - -# By default Paperless stops consuming a document if no language can be -# detected. Set to true to consume documents even if the language detection -# fails. -#PAPERLESS_FORGIVING_OCR="false" - - # By default Paperless does not OCR a document if the text can be retrieved from # the document directly. Set to true to always OCR documents. #PAPERLESS_OCR_ALWAYS="false" diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 9d4a429f4..20cabc8ea 100644 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -79,22 +79,11 @@ install_languages() { done } -if [[ "$1" != "/"* ]]; then - initialize - - # Install additional languages if specified - if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then - install_languages "$PAPERLESS_OCR_LANGUAGES" - fi - - if [[ "$1" = "gunicorn" ]]; then - shift - cd /usr/src/paperless/src/ && \ - exec sudo -HEu paperless gunicorn -c /usr/src/paperless/gunicorn.conf.py "$@" paperless.wsgi - fi - - exec sudo -HEu paperless python3 manage.py "$@" +initialize +# Install additional languages if specified +if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then + install_languages "$PAPERLESS_OCR_LANGUAGES" fi exec "$@" diff --git a/scripts/imagemagick-policy.xml b/scripts/imagemagick-policy.xml new file mode 100644 index 000000000..095355706 --- /dev/null +++ b/scripts/imagemagick-policy.xml @@ -0,0 +1,96 @@ + + + + + +]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/paperless-cron b/scripts/paperless-cron new file mode 100644 index 000000000..238857227 --- /dev/null +++ b/scripts/paperless-cron @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /usr/src/paperless/src + +sudo -HEu paperless python3 manage.py document_create_classifier diff --git a/scripts/supervisord.conf b/scripts/supervisord.conf new file mode 100644 index 000000000..d3ff288de --- /dev/null +++ b/scripts/supervisord.conf @@ -0,0 +1,33 @@ +[supervisord] +nodaemon=true ; start in foreground if true; default false +logfile=/var/log/supervisord/supervisord.log ; main log file; default $CWD/supervisord.log +pidfile=/var/log/supervisord/supervisord.pid ; supervisord pidfile; default supervisord.pid +logfile_maxbytes=50MB ; max main logfile bytes b4 rotation; default 50MB +logfile_backups=10 ; # of main logfile backups; 0 means none, default 10 +loglevel=info ; log level; default info; others: debug,warn,trace + +[program:gunicorn] +command=gunicorn -c /usr/src/paperless/gunicorn.conf.py -b 0.0.0.0:8000 paperless.wsgi +user=paperless + +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:consumer] +command=python3 manage.py document_consumer +user=paperless + +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:anacron] +command=anacron -d + +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/src-ui/package-lock.json b/src-ui/package-lock.json index 993d5c5cf..1d73a856f 100644 --- a/src-ui/package-lock.json +++ b/src-ui/package-lock.json @@ -2140,6 +2140,11 @@ } } }, + "@scarf/scarf": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.1.0.tgz", + "integrity": "sha512-b2iE8kjjzzUo2WZ0xuE2N77kfnTds7ClrDxcz3Atz7h2XrNVoAPUoT75i7CY0st5x++70V91Y+c6RpBX9MX7Jg==" + }, "@schematics/angular": { "version": "10.1.5", "resolved": "https://registry.npmjs.org/@schematics/angular/-/angular-10.1.5.tgz", @@ -8263,6 +8268,15 @@ "tslib": "^2.0.0" } }, + "ngx-infinite-scroll": { + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/ngx-infinite-scroll/-/ngx-infinite-scroll-9.1.0.tgz", + "integrity": "sha512-ZulbahgFsoPmP8cz7qPGDeFX9nKiSm74aav8vXNSI1ZoPiGYY5FQd8AK+yXqygY7tyCJRyt8Wp3DIg7zgP5dPA==", + "requires": { + "@scarf/scarf": "^1.1.0", + "opencollective-postinstall": "^2.0.2" + } + }, "nice-try": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", @@ -8731,6 +8745,11 @@ "is-wsl": "^2.1.1" } }, + "opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==" + }, "opn": { "version": "5.5.0", "resolved": "https://registry.npmjs.org/opn/-/opn-5.5.0.tgz", diff --git a/src-ui/package.json b/src-ui/package.json index 08fe6d33b..b2da7eabe 100644 --- a/src-ui/package.json +++ b/src-ui/package.json @@ -24,6 +24,7 @@ "bootstrap": "^4.5.0", "ng-bootstrap": "^1.6.3", "ngx-file-drop": "^10.0.0", + "ngx-infinite-scroll": "^9.1.0", "rxjs": "~6.6.0", "tslib": "^2.0.0", "uuid": "^8.3.1", diff --git a/src-ui/src/app/app-routing.module.ts b/src-ui/src/app/app-routing.module.ts index f1398e8f1..fde8fd31f 100644 --- a/src-ui/src/app/app-routing.module.ts +++ b/src-ui/src/app/app-routing.module.ts @@ -19,7 +19,7 @@ const routes: Routes = [ {path: '', component: AppFrameComponent, children: [ {path: 'dashboard', component: DashboardComponent, canActivate: [AuthGuardService] }, {path: 'documents', component: DocumentListComponent, canActivate: [AuthGuardService] }, - {path: 'view/:name', component: DocumentListComponent, canActivate: [AuthGuardService] }, + {path: 'view/:id', component: DocumentListComponent, canActivate: [AuthGuardService] }, {path: 'search', component: SearchComponent, canActivate: [AuthGuardService] }, {path: 'documents/:id', component: DocumentDetailComponent, canActivate: [AuthGuardService] }, diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index 73c3244e3..dce6a9225 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -36,6 +36,8 @@ import { NgxFileDropModule } from 'ngx-file-drop'; import { TextComponent } from './components/common/input/text/text.component'; import { SelectComponent } from './components/common/input/select/select.component'; import { CheckComponent } from './components/common/input/check/check.component'; +import { SaveViewConfigDialogComponent } from './components/document-list/save-view-config-dialog/save-view-config-dialog.component'; +import { InfiniteScrollModule } from 'ngx-infinite-scroll'; @NgModule({ declarations: [ @@ -66,7 +68,8 @@ import { CheckComponent } from './components/common/input/check/check.component' DocumentCardSmallComponent, TextComponent, SelectComponent, - CheckComponent + CheckComponent, + SaveViewConfigDialogComponent ], imports: [ BrowserModule, @@ -75,7 +78,8 @@ import { CheckComponent } from './components/common/input/check/check.component' HttpClientModule, FormsModule, ReactiveFormsModule, - NgxFileDropModule + NgxFileDropModule, + InfiniteScrollModule ], providers: [ DatePipe, diff --git a/src-ui/src/app/components/app-frame/app-frame.component.html b/src-ui/src/app/components/app-frame/app-frame.component.html index c4158bf9c..a0350eb78 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.html +++ b/src-ui/src/app/components/app-frame/app-frame.component.html @@ -43,6 +43,20 @@ + + + diff --git a/src-ui/src/app/components/app-frame/app-frame.component.ts b/src-ui/src/app/components/app-frame/app-frame.component.ts index 33a13b384..595da5b1d 100644 --- a/src-ui/src/app/components/app-frame/app-frame.component.ts +++ b/src-ui/src/app/components/app-frame/app-frame.component.ts @@ -7,6 +7,7 @@ import { PaperlessDocument } from 'src/app/data/paperless-document'; import { AuthService } from 'src/app/services/auth.service'; import { OpenDocumentsService } from 'src/app/services/open-documents.service'; import { SearchService } from 'src/app/services/rest/search.service'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; @Component({ selector: 'app-app-frame', @@ -15,7 +16,13 @@ import { SearchService } from 'src/app/services/rest/search.service'; }) export class AppFrameComponent implements OnInit, OnDestroy { - constructor (public router: Router, private openDocumentsService: OpenDocumentsService, private authService: AuthService, private searchService: SearchService) { + constructor ( + public router: Router, + private openDocumentsService: OpenDocumentsService, + private authService: AuthService, + private searchService: SearchService, + public viewConfigService: SavedViewConfigService + ) { } searchField = new FormControl('') diff --git a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts index 153f588a3..ba0d90847 100644 --- a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts @@ -1,8 +1,8 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core'; -import { Form, FormGroup } from '@angular/forms'; +import { FormGroup } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { Observable } from 'rxjs'; -import { MatchingModel } from 'src/app/data/matching-model'; +import { MATCHING_ALGORITHMS } from 'src/app/data/matching-model'; import { ObjectWithId } from 'src/app/data/object-with-id'; import { AbstractPaperlessService } from 'src/app/services/rest/abstract-paperless-service'; import { Toast, ToastService } from 'src/app/services/toast.service'; @@ -47,7 +47,7 @@ export abstract class EditDialogComponent implements OnI } getMatchingAlgorithms() { - return MatchingModel.MATCHING_ALGORITHMS + return MATCHING_ALGORITHMS } save() { diff --git a/src-ui/src/app/components/common/input/select/select.component.ts b/src-ui/src/app/components/common/input/select/select.component.ts index a53566dab..c8e213722 100644 --- a/src-ui/src/app/components/common/input/select/select.component.ts +++ b/src-ui/src/app/components/common/input/select/select.component.ts @@ -1,6 +1,5 @@ import { Component, EventEmitter, forwardRef, Input, OnInit, Output } from '@angular/core'; -import { ControlValueAccessor, NG_VALUE_ACCESSOR } from '@angular/forms'; -import { v4 as uuidv4 } from 'uuid'; +import { NG_VALUE_ACCESSOR } from '@angular/forms'; import { AbstractInputComponent } from '../abstract-input'; @Component({ diff --git a/src-ui/src/app/components/common/tag/tag.component.ts b/src-ui/src/app/components/common/tag/tag.component.ts index bb4c2a15c..a7f81fa0a 100644 --- a/src-ui/src/app/components/common/tag/tag.component.ts +++ b/src-ui/src/app/components/common/tag/tag.component.ts @@ -1,5 +1,5 @@ import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; @Component({ selector: 'app-tag', @@ -23,7 +23,7 @@ export class TagComponent implements OnInit { } getColour() { - return PaperlessTag.COLOURS.find(c => c.id == this.tag.colour) + return TAG_COLOURS.find(c => c.id == this.tag.colour) } } diff --git a/src-ui/src/app/components/dashboard/dashboard.component.html b/src-ui/src/app/components/dashboard/dashboard.component.html index fc7d4067e..1894b3e0b 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.html +++ b/src-ui/src/app/components/dashboard/dashboard.component.html @@ -2,15 +2,39 @@ -

... This space for rent

-

This page will provide more information in the future, such as access to recently scanned documents, etc.

+

Welcome to paperless!

-

Statistics

-

None yet.

+ +

{{v.viewConfig.title}}

+ + + + + + + + + + + + + +
Date createdDocument
{{doc.created | date}}{{doc.title}} +
+ +
+ +

Saved views

+

This space is reserved to display your saved views. Go to your documents and save a view to have it displayed here!

+
+
+

Statistics

+

Documents in inbox: {{statistics.documents_inbox}}

+

Total documents: {{statistics.documents_total}}

Upload new Document

+
Document conumser status
+

This is what it might look like in the future.

+
+
+

Filename.pdf: Running tesseract on page 4/8...

+

+
+
+
+
+

Filename2.pdf: Completed.

+

+
+
diff --git a/src-ui/src/app/components/dashboard/dashboard.component.ts b/src-ui/src/app/components/dashboard/dashboard.component.ts index 68173cefc..f8d5fb0ae 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.ts +++ b/src-ui/src/app/components/dashboard/dashboard.component.ts @@ -1,7 +1,16 @@ +import { HttpClient } from '@angular/common/http'; import { Component, OnInit } from '@angular/core'; -import { FileSystemDirectoryEntry, FileSystemFileEntry, NgxFileDropEntry } from 'ngx-file-drop'; +import { FileSystemFileEntry, NgxFileDropEntry } from 'ngx-file-drop'; +import { Observable } from 'rxjs'; import { DocumentService } from 'src/app/services/rest/document.service'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; import { Toast, ToastService } from 'src/app/services/toast.service'; +import { environment } from 'src/environments/environment'; + +export interface Statistics { + documents_total?: number + documents_inbox?: number +} @Component({ selector: 'app-dashboard', @@ -10,11 +19,29 @@ import { Toast, ToastService } from 'src/app/services/toast.service'; }) export class DashboardComponent implements OnInit { - constructor(private documentService: DocumentService, private toastService: ToastService) { } + constructor(private documentService: DocumentService, private toastService: ToastService, + public savedViewConfigService: SavedViewConfigService, private http: HttpClient) { } + + + savedDashboardViews = [] + statistics: Statistics = {} ngOnInit(): void { + this.savedViewConfigService.getDashboardConfigs().forEach(config => { + this.documentService.list(1,10,config.sortField,config.sortDirection,config.filterRules).subscribe(result => { + this.savedDashboardViews.push({viewConfig: config, documents: result.results}) + }) + }) + this.getStatistics().subscribe(statistics => { + this.statistics = statistics + }) } + getStatistics(): Observable { + return this.http.get(`${environment.apiBaseUrl}statistics/`) + } + + public fileOver(event){ console.log(event); } diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index a32418fe7..f6bb4cebb 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -69,6 +69,8 @@ + Hold CTRL to (de)select multiple tags. +     diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 8ae46b9c8..308e499a4 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -6,7 +6,7 @@ import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; import { DocumentListViewService } from 'src/app/services/document-list-view.service'; import { OpenDocumentsService } from 'src/app/services/open-documents.service'; import { CorrespondentService } from 'src/app/services/rest/correspondent.service'; @@ -17,6 +17,7 @@ import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.com import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component'; import { TagEditDialogComponent } from '../manage/tag-list/tag-edit-dialog/tag-edit-dialog.component'; + @Component({ selector: 'app-document-detail', templateUrl: './document-detail.component.html', @@ -116,7 +117,7 @@ export class DocumentDetailComponent implements OnInit { } getColour(id: number) { - return PaperlessTag.COLOURS.find(c => c.id == this.getTag(id).colour) + return TAG_COLOURS.find(c => c.id == this.getTag(id).colour) } addTag(id: number) { @@ -166,7 +167,11 @@ export class DocumentDetailComponent implements OnInit { close() { this.openDocumentService.closeDocument(this.document) - this.router.navigate(['documents']) + if (this.documentListViewService.viewConfig) { + this.router.navigate(['view', this.documentListViewService.viewConfig.id]) + } else { + this.router.navigate(['documents']) + } } delete() { diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html index 167cf0247..2ad12ddc2 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html @@ -6,7 +6,10 @@
-
{{document.correspondent ? document.correspondent.name + ': ' : ''}}{{document.title}}
+
+
{{document.correspondent ? document.correspondent.name + ': ' : ''}}{{document.title}}
+
#{{document.archive_serial_number}}
+

{{getDetailsAsString()}} @@ -29,7 +32,7 @@ Download

- {{document.created | date}} + Created: {{document.created | date}}
diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts index 0f3aa69ea..c05b1f039 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.ts @@ -1,9 +1,7 @@ import { Component, Input, OnInit } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; import { PaperlessDocument } from 'src/app/data/paperless-document'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; import { DocumentService } from 'src/app/services/rest/document.service'; -import { SearchResultHighlightedText } from 'src/app/services/rest/search.service'; @Component({ selector: 'app-document-card-large', diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index f7558be49..b66cfbfa0 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -1,74 +1,83 @@ - + -
+
-
+
- +
- +
+ + + +
+ + +
+ + +
Filter
- +
- +
- +
diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index 927b9aa43..be83bf0bf 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -1,6 +1,12 @@ import { Component, OnInit } from '@angular/core'; +import { ActivatedRoute, Router } from '@angular/router'; +import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; +import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule'; +import { SavedViewConfig } from 'src/app/data/saved-view-config'; import { DocumentListViewService } from 'src/app/services/document-list-view.service'; -import { FilterRuleSet } from '../filter-editor/filter-editor.component'; +import { DOCUMENT_SORT_FIELDS } from 'src/app/services/rest/document.service'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; +import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component'; @Component({ selector: 'app-document-list', @@ -10,15 +16,18 @@ import { FilterRuleSet } from '../filter-editor/filter-editor.component'; export class DocumentListComponent implements OnInit { constructor( - public docs: DocumentListViewService) { } + public docs: DocumentListViewService, + public savedViewConfigService: SavedViewConfigService, + public route: ActivatedRoute, + public modalService: NgbModal) { } displayMode = 'smallCards' // largeCards, smallCards, details - filter = new FilterRuleSet() + filterRules: FilterRule[] = [] showFilter = false getSortFields() { - return DocumentListViewService.SORT_FIELDS + return DOCUMENT_SORT_FIELDS } setSort(field: string) { @@ -34,18 +43,47 @@ export class DocumentListComponent implements OnInit { if (localStorage.getItem('document-list:displayMode') != null) { this.displayMode = localStorage.getItem('document-list:displayMode') } - this.filter = this.docs.currentFilter.clone() - this.showFilter = this.filter.rules.length > 0 - this.reload() + this.route.paramMap.subscribe(params => { + if (params.has('id')) { + this.docs.viewConfig = this.savedViewConfigService.getConfig(params.get('id')) + } else { + this.filterRules = cloneFilterRules(this.docs.currentFilterRules) + this.showFilter = this.filterRules.length > 0 + this.docs.viewConfig = null + } + this.reload() + }) } reload() { this.docs.reload() } - applyFilter() { - this.docs.setFilter(this.filter.clone()) + applyFilterRules() { + this.docs.setFilterRules(this.filterRules) this.reload() } + loadViewConfig(config: SavedViewConfig) { + this.filterRules = cloneFilterRules(config.filterRules) + this.docs.setFilterRules(config.filterRules) + this.docs.currentSortField = config.sortField + this.docs.currentSortDirection = config.sortDirection + this.reload() + } + + saveViewConfig() { + let modal = this.modalService.open(SaveViewConfigDialogComponent, {backdrop: 'static'}) + modal.componentInstance.saveClicked.subscribe(formValue => { + this.savedViewConfigService.saveConfig({ + filterRules: cloneFilterRules(this.filterRules), + title: formValue.title, + showInDashboard: formValue.showInDashboard, + showInSideBar: formValue.showInSideBar, + sortDirection: this.docs.currentSortDirection, + sortField: this.docs.currentSortField + }) + modal.close() + }) + } } diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.css b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.css new file mode 100644 index 000000000..e69de29bb diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html new file mode 100644 index 000000000..870431096 --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html @@ -0,0 +1,17 @@ +
+ + + +
diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts new file mode 100644 index 000000000..11ac77c0b --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { SaveViewConfigDialogComponent } from './save-view-config-dialog.component'; + +describe('SaveViewConfigDialogComponent', () => { + let component: SaveViewConfigDialogComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ SaveViewConfigDialogComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(SaveViewConfigDialogComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts new file mode 100644 index 000000000..6fcdbd2c8 --- /dev/null +++ b/src-ui/src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.ts @@ -0,0 +1,33 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; +import { FormControl, FormGroup } from '@angular/forms'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; + +@Component({ + selector: 'app-save-view-config-dialog', + templateUrl: './save-view-config-dialog.component.html', + styleUrls: ['./save-view-config-dialog.component.css'] +}) +export class SaveViewConfigDialogComponent implements OnInit { + + constructor(private modal: NgbActiveModal) { } + + @Output() + public saveClicked = new EventEmitter() + + saveViewConfigForm = new FormGroup({ + title: new FormControl(''), + showInSideBar: new FormControl(false), + showInDashboard: new FormControl(false), + }) + + ngOnInit(): void { + } + + save() { + this.saveClicked.emit(this.saveViewConfigForm.value) + } + + cancel() { + this.modal.close() + } +} diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.html b/src-ui/src/app/components/filter-editor/filter-editor.component.html index de65b1150..1cca0fd7f 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.html +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.html @@ -1,8 +1,9 @@ -
-
- @@ -13,7 +14,7 @@ - diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/filter-editor/filter-editor.component.ts index a5d73a3a3..8c47ceafb 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.ts @@ -1,4 +1,6 @@ import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; +import { FilterRule } from 'src/app/data/filter-rule'; +import { FilterRuleType, FILTER_RULE_TYPES } from 'src/app/data/filter-rule-type'; import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent'; import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; import { PaperlessTag } from 'src/app/data/paperless-tag'; @@ -6,66 +8,6 @@ import { CorrespondentService } from 'src/app/services/rest/correspondent.servic import { DocumentTypeService } from 'src/app/services/rest/document-type.service'; import { TagService } from 'src/app/services/rest/tag.service'; -export interface FilterRuleType { - name: string - filtervar: string - datatype: string //number, string, boolean, date -} - -export interface FilterRule { - type: FilterRuleType - value: any -} - -export class FilterRuleSet { - - static RULE_TYPES: FilterRuleType[] = [ - {name: "Title contains", filtervar: "title__icontains", datatype: "string"}, - {name: "Content contains", filtervar: "content__icontains", datatype: "string"}, - - {name: "ASN is", filtervar: "archive_serial_number", datatype: "number"}, - - {name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent"}, - {name: "Document type is", filtervar: "document_type__id", datatype: "document_type"}, - {name: "Has tag", filtervar: "tags__id", datatype: "tag"}, - - {name: "Has any tag", filtervar: "is_tagged", datatype: "boolean"}, - - {name: "Date created before", filtervar: "created__date__lt", datatype: "date"}, - {name: "Date created after", filtervar: "created__date__gt", datatype: "date"}, - - {name: "Year created is", filtervar: "created__year", datatype: "number"}, - {name: "Month created is", filtervar: "created__month", datatype: "number"}, - {name: "Day created is", filtervar: "created__day", datatype: "number"}, - - {name: "Date added before", filtervar: "added__date__lt", datatype: "date"}, - {name: "Date added after", filtervar: "added__date__gt", datatype: "date"}, - - {name: "Date modified before", filtervar: "modified__date__lt", datatype: "date"}, - {name: "Date modified after", filtervar: "modified__date__gt", datatype: "date"}, - ] - - rules: FilterRule[] = [] - - toQueryParams() { - let params = {} - for (let rule of this.rules) { - params[rule.type.filtervar] = rule.value - } - return params - } - - clone(): FilterRuleSet { - let newRuleSet = new FilterRuleSet() - for (let rule of this.rules) { - newRuleSet.rules.push({type: rule.type, value: rule.value}) - } - return newRuleSet - } - - constructor() { } - -} @Component({ selector: 'app-filter-editor', @@ -77,28 +19,26 @@ export class FilterEditorComponent implements OnInit { constructor(private documentTypeService: DocumentTypeService, private tagService: TagService, private correspondentService: CorrespondentService) { } @Input() - ruleSet = new FilterRuleSet() - - @Output() - ruleSetChange = new EventEmitter() + filterRules: FilterRule[] = [] @Output() apply = new EventEmitter() - selectedRuleType: FilterRuleType = FilterRuleSet.RULE_TYPES[0] + selectedRuleType: FilterRuleType = FILTER_RULE_TYPES[0] correspondents: PaperlessCorrespondent[] = [] tags: PaperlessTag[] = [] documentTypes: PaperlessDocumentType[] = [] newRuleClicked() { - this.ruleSet.rules.push({type: this.selectedRuleType, value: null}) + this.filterRules.push({type: this.selectedRuleType, value: null}) + this.selectedRuleType = this.getRuleTypes().length > 0 ? this.getRuleTypes()[0] : null } removeRuleClicked(rule) { - let index = this.ruleSet.rules.findIndex(r => r == rule) + let index = this.filterRules.findIndex(r => r == rule) if (index > -1) { - this.ruleSet.rules.splice(index, 1) + this.filterRules.splice(index, 1) } } @@ -107,7 +47,7 @@ export class FilterEditorComponent implements OnInit { } clearClicked() { - this.ruleSet.rules.splice(0,this.ruleSet.rules.length) + this.filterRules.splice(0,this.filterRules.length) this.apply.next() } @@ -118,6 +58,7 @@ export class FilterEditorComponent implements OnInit { } getRuleTypes() { - return FilterRuleSet.RULE_TYPES + return FILTER_RULE_TYPES.filter(rt => rt.multi || !this.filterRules.find(r => r.type == rt)) } + } diff --git a/src-ui/src/app/components/manage/generic-list/generic-list.component.ts b/src-ui/src/app/components/manage/generic-list/generic-list.component.ts index 6b0a0f33e..12cf08ea9 100644 --- a/src-ui/src/app/components/manage/generic-list/generic-list.component.ts +++ b/src-ui/src/app/components/manage/generic-list/generic-list.component.ts @@ -1,6 +1,6 @@ import { Directive, OnInit } from '@angular/core'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; -import { MatchingModel } from 'src/app/data/matching-model'; +import { MatchingModel, MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'; import { ObjectWithId } from 'src/app/data/object-with-id'; import { AbstractPaperlessService } from 'src/app/services/rest/abstract-paperless-service'; import { DeleteDialogComponent } from '../../common/delete-dialog/delete-dialog.component'; @@ -21,10 +21,10 @@ export abstract class GenericListComponent implements On public collectionSize = 0 getMatching(o: MatchingModel) { - if (o.matching_algorithm == MatchingModel.MATCH_AUTO) { + if (o.matching_algorithm == MATCH_AUTO) { return "Automatic" } else if (o.match && o.match.length > 0) { - return `${o.match} (${MatchingModel.MATCHING_ALGORITHMS.find(a => a.id == o.matching_algorithm).name})` + return `${o.match} (${MATCHING_ALGORITHMS.find(a => a.id == o.matching_algorithm).name})` } else { return "-" } diff --git a/src-ui/src/app/components/manage/logs/logs.component.css b/src-ui/src/app/components/manage/logs/logs.component.css index e69de29bb..1f0112fbc 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.css +++ b/src-ui/src/app/components/manage/logs/logs.component.css @@ -0,0 +1,12 @@ +.log-entry-30 { + color: yellow !important; +} + +.log-entry-40 { + color: red !important; +} + +.log-entry-50 { + color: lightcoral !important; + font-weight: bold; +} \ No newline at end of file diff --git a/src-ui/src/app/components/manage/logs/logs.component.html b/src-ui/src/app/components/manage/logs/logs.component.html index 8b290fc0d..f6738d373 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.html +++ b/src-ui/src/app/components/manage/logs/logs.component.html @@ -1,2 +1,26 @@ - \ No newline at end of file + +
+ +
+ +
+
+ + + +
+

+ {{log.created | date:'short'}} + {{getLevelText(log.level)}} + {{log.message}} +

+
\ No newline at end of file diff --git a/src-ui/src/app/components/manage/logs/logs.component.ts b/src-ui/src/app/components/manage/logs/logs.component.ts index c546b8253..0550e8151 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.ts +++ b/src-ui/src/app/components/manage/logs/logs.component.ts @@ -1,4 +1,7 @@ import { Component, OnInit } from '@angular/core'; +import { kMaxLength } from 'buffer'; +import { LOG_LEVELS, LOG_LEVEL_INFO, PaperlessLog } from 'src/app/data/paperless-log'; +import { LogService } from 'src/app/services/rest/log.service'; @Component({ selector: 'app-logs', @@ -7,9 +10,40 @@ import { Component, OnInit } from '@angular/core'; }) export class LogsComponent implements OnInit { - constructor() { } + constructor(private logService: LogService) { } + + logs: PaperlessLog[] = [] + level: number = LOG_LEVEL_INFO ngOnInit(): void { + this.reload() + } + + reload() { + this.logService.list(1, 50, null, {'level__gte': this.level}).subscribe(result => this.logs = result.results) + } + + getLevelText(level: number) { + return LOG_LEVELS.find(l => l.id == level)?.name + } + + onScroll() { + let lastCreated = null + if (this.logs.length > 0) { + lastCreated = this.logs[this.logs.length-1].created + } + this.logService.list(1, 25, null, {'created__lt': lastCreated, 'level__gte': this.level}).subscribe(result => { + this.logs.push(...result.results) + }) + } + + getLevels() { + return LOG_LEVELS + } + + setLevel(id) { + this.level = id + this.reload() } } diff --git a/src-ui/src/app/components/manage/settings/settings.component.html b/src-ui/src/app/components/manage/settings/settings.component.html index 67d9ad6f5..e8931ccdf 100644 --- a/src-ui/src/app/components/manage/settings/settings.component.html +++ b/src-ui/src/app/components/manage/settings/settings.component.html @@ -2,5 +2,38 @@ -

items per page, documents per view type

+ + + +
\ No newline at end of file diff --git a/src-ui/src/app/components/manage/settings/settings.component.ts b/src-ui/src/app/components/manage/settings/settings.component.ts index 19862fb02..c56cd6a78 100644 --- a/src-ui/src/app/components/manage/settings/settings.component.ts +++ b/src-ui/src/app/components/manage/settings/settings.component.ts @@ -1,4 +1,6 @@ import { Component, OnInit } from '@angular/core'; +import { SavedViewConfig } from 'src/app/data/saved-view-config'; +import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; @Component({ selector: 'app-settings', @@ -7,9 +9,17 @@ import { Component, OnInit } from '@angular/core'; }) export class SettingsComponent implements OnInit { - constructor() { } + constructor( + private savedViewConfigService: SavedViewConfigService + ) { } + + active ngOnInit(): void { } + deleteViewConfig(config: SavedViewConfig) { + this.savedViewConfigService.deleteConfig(config) + } + } diff --git a/src-ui/src/app/components/manage/tag-list/tag-edit-dialog/tag-edit-dialog.component.ts b/src-ui/src/app/components/manage/tag-list/tag-edit-dialog/tag-edit-dialog.component.ts index d8f8b2510..7aee39e77 100644 --- a/src-ui/src/app/components/manage/tag-list/tag-edit-dialog/tag-edit-dialog.component.ts +++ b/src-ui/src/app/components/manage/tag-list/tag-edit-dialog/tag-edit-dialog.component.ts @@ -2,7 +2,7 @@ import { Component } from '@angular/core'; import { FormControl, FormGroup } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; import { TagService } from 'src/app/services/rest/tag.service'; import { ToastService } from 'src/app/services/toast.service'; @@ -29,11 +29,11 @@ export class TagEditDialogComponent extends EditDialogComponent { } getColours() { - return PaperlessTag.COLOURS + return TAG_COLOURS } getColor(id: number) { - return PaperlessTag.COLOURS.find(c => c.id == id) + return TAG_COLOURS.find(c => c.id == id) } } diff --git a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts index 2b8f916b9..88fc03a59 100644 --- a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts +++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts @@ -1,6 +1,6 @@ import { Component } from '@angular/core'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; -import { PaperlessTag } from 'src/app/data/paperless-tag'; +import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; import { TagService } from 'src/app/services/rest/tag.service'; import { CorrespondentEditDialogComponent } from '../correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; import { GenericListComponent } from '../generic-list/generic-list.component'; @@ -18,7 +18,7 @@ export class TagListComponent extends GenericListComponent { } getColor(id) { - return PaperlessTag.COLOURS.find(c => c.id == id) + return TAG_COLOURS.find(c => c.id == id) } getObjectName(object: PaperlessTag) { diff --git a/src-ui/src/app/components/search/result-hightlight/result-hightlight.component.ts b/src-ui/src/app/components/search/result-hightlight/result-hightlight.component.ts index 5ce5ef607..0f20c93cc 100644 --- a/src-ui/src/app/components/search/result-hightlight/result-hightlight.component.ts +++ b/src-ui/src/app/components/search/result-hightlight/result-hightlight.component.ts @@ -1,5 +1,5 @@ import { Component, Input, OnInit } from '@angular/core'; -import { SearchResultHighlightedText } from 'src/app/services/rest/search.service'; +import { SearchHitHighlight } from 'src/app/data/search-result'; @Component({ selector: 'app-result-hightlight', @@ -11,7 +11,7 @@ export class ResultHightlightComponent implements OnInit { constructor() { } @Input() - highlights: SearchResultHighlightedText[][] + highlights: SearchHitHighlight[][] ngOnInit(): void { } diff --git a/src-ui/src/app/components/search/search.component.css b/src-ui/src/app/components/search/search.component.css index 5539c1eea..5be54746a 100644 --- a/src-ui/src/app/components/search/search.component.css +++ b/src-ui/src/app/components/search/search.component.css @@ -8,4 +8,8 @@ height: 100%; position: absolute; +} + +.result-content-searching { + opacity: 0.2; } \ No newline at end of file diff --git a/src-ui/src/app/components/search/search.component.html b/src-ui/src/app/components/search/search.component.html index 2f178097e..59c24fa04 100644 --- a/src-ui/src/app/components/search/search.component.html +++ b/src-ui/src/app/components/search/search.component.html @@ -3,10 +3,11 @@

Search string: {{query}}

- +
+

{{resultCount}} result(s)

+ - -

No results

+
\ No newline at end of file diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts index 7b6630454..bd15611c6 100644 --- a/src-ui/src/app/components/search/search.component.ts +++ b/src-ui/src/app/components/search/search.component.ts @@ -1,6 +1,7 @@ import { Component, OnInit } from '@angular/core'; import { ActivatedRoute } from '@angular/router'; -import { SearchResult, SearchService } from 'src/app/services/rest/search.service'; +import { SearchHit } from 'src/app/data/search-result'; +import { SearchService } from 'src/app/services/rest/search.service'; @Component({ selector: 'app-search', @@ -9,20 +10,50 @@ import { SearchResult, SearchService } from 'src/app/services/rest/search.servic }) export class SearchComponent implements OnInit { - results: SearchResult[] = [] + results: SearchHit[] = [] query: string = "" + searching = false + + currentPage = 1 + + pageCount = 1 + + resultCount + constructor(private searchService: SearchService, private route: ActivatedRoute) { } ngOnInit(): void { this.route.queryParamMap.subscribe(paramMap => { this.query = paramMap.get('query') - this.searchService.search(this.query).subscribe(result => { - this.results = result - }) + this.searching = true + this.currentPage = 1 + this.loadPage() }) } + loadPage(append: boolean = false) { + this.searchService.search(this.query, this.currentPage).subscribe(result => { + if (append) { + this.results.push(...result.results) + } else { + this.results = result.results + } + this.pageCount = result.page_count + this.searching = false + this.resultCount = result.count + }) + } + + onScroll() { + console.log(this.currentPage) + console.log(this.pageCount) + if (this.currentPage < this.pageCount) { + this.currentPage += 1 + this.loadPage(true) + } + } + } diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts new file mode 100644 index 000000000..e5de30271 --- /dev/null +++ b/src-ui/src/app/data/filter-rule-type.ts @@ -0,0 +1,33 @@ +export const FILTER_RULE_TYPES: FilterRuleType[] = [ + {name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false}, + {name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false}, + + {name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false}, + + {name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false}, + {name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false}, + + {name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false}, + {name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true}, + {name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false}, + + {name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false}, + {name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false}, + + {name: "Year created is", filtervar: "created__year", datatype: "number", multi: false}, + {name: "Month created is", filtervar: "created__month", datatype: "number", multi: false}, + {name: "Day created is", filtervar: "created__day", datatype: "number", multi: false}, + + {name: "Added before", filtervar: "added__date__lt", datatype: "date", multi: false}, + {name: "Added after", filtervar: "added__date__gt", datatype: "date", multi: false}, + + {name: "Modified before", filtervar: "modified__date__lt", datatype: "date", multi: false}, + {name: "Modified after", filtervar: "modified__date__gt", datatype: "date", multi: false}, +] + +export interface FilterRuleType { + name: string + filtervar: string + datatype: string //number, string, boolean, date + multi: boolean +} \ No newline at end of file diff --git a/src-ui/src/app/data/filter-rule.ts b/src-ui/src/app/data/filter-rule.ts new file mode 100644 index 000000000..2dc632d9c --- /dev/null +++ b/src-ui/src/app/data/filter-rule.ts @@ -0,0 +1,18 @@ +import { FilterRuleType } from './filter-rule-type'; + +export function cloneFilterRules(filterRules: FilterRule[]): FilterRule[] { + if (filterRules) { + let newRules: FilterRule[] = [] + for (let rule of filterRules) { + newRules.push({type: rule.type, value: rule.value}) + } + return newRules + } else { + return null + } +} + +export interface FilterRule { + type: FilterRuleType + value: any +} \ No newline at end of file diff --git a/src-ui/src/app/data/matching-model.spec.ts b/src-ui/src/app/data/matching-model.spec.ts deleted file mode 100644 index 08999993a..000000000 --- a/src-ui/src/app/data/matching-model.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { MatchingModel } from './matching-model'; - -describe('MatchingModel', () => { - it('should create an instance', () => { - expect(new MatchingModel()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/matching-model.ts b/src-ui/src/app/data/matching-model.ts index 8a15fc1f5..698c32da5 100644 --- a/src-ui/src/app/data/matching-model.ts +++ b/src-ui/src/app/data/matching-model.ts @@ -1,22 +1,23 @@ import { ObjectWithId } from './object-with-id'; -export class MatchingModel extends ObjectWithId { - static MATCH_ANY = 1 - static MATCH_ALL = 2 - static MATCH_LITERAL = 3 - static MATCH_REGEX = 4 - static MATCH_FUZZY = 5 - static MATCH_AUTO = 6 +export const MATCH_ANY = 1 +export const MATCH_ALL = 2 +export const MATCH_LITERAL = 3 +export const MATCH_REGEX = 4 +export const MATCH_FUZZY = 5 +export const MATCH_AUTO = 6 - static MATCHING_ALGORITHMS = [ - {id: MatchingModel.MATCH_ANY, name: "Any"}, - {id: MatchingModel.MATCH_ALL, name: "All"}, - {id: MatchingModel.MATCH_LITERAL, name: "Literal"}, - {id: MatchingModel.MATCH_REGEX, name: "Regular Expression"}, - {id: MatchingModel.MATCH_FUZZY, name: "Fuzzy Match"}, - {id: MatchingModel.MATCH_AUTO, name: "Auto"}, - ] +export const MATCHING_ALGORITHMS = [ + {id: MATCH_ANY, name: "Any"}, + {id: MATCH_ALL, name: "All"}, + {id: MATCH_LITERAL, name: "Literal"}, + {id: MATCH_REGEX, name: "Regular Expression"}, + {id: MATCH_FUZZY, name: "Fuzzy Match"}, + {id: MATCH_AUTO, name: "Auto"}, +] + +export interface MatchingModel extends ObjectWithId { name?: string diff --git a/src-ui/src/app/data/object-with-id.spec.ts b/src-ui/src/app/data/object-with-id.spec.ts deleted file mode 100644 index d9a0ea024..000000000 --- a/src-ui/src/app/data/object-with-id.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { ObjectWithId } from './object-with-id'; - -describe('ObjectWithId', () => { - it('should create an instance', () => { - expect(new ObjectWithId()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/object-with-id.ts b/src-ui/src/app/data/object-with-id.ts index b9bab4fdf..e81548f4e 100644 --- a/src-ui/src/app/data/object-with-id.ts +++ b/src-ui/src/app/data/object-with-id.ts @@ -1,4 +1,4 @@ -export class ObjectWithId { +export interface ObjectWithId { id?: number diff --git a/src-ui/src/app/data/paperless-correspondent.spec.ts b/src-ui/src/app/data/paperless-correspondent.spec.ts deleted file mode 100644 index 27fd210dc..000000000 --- a/src-ui/src/app/data/paperless-correspondent.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { PaperlessCorrespondent } from './paperless-correspondent'; - -describe('PaperlessCorrespondent', () => { - it('should create an instance', () => { - expect(new PaperlessCorrespondent()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/paperless-correspondent.ts b/src-ui/src/app/data/paperless-correspondent.ts index 4d149b280..217e62529 100644 --- a/src-ui/src/app/data/paperless-correspondent.ts +++ b/src-ui/src/app/data/paperless-correspondent.ts @@ -1,6 +1,6 @@ import { MatchingModel } from './matching-model'; -export class PaperlessCorrespondent extends MatchingModel { +export interface PaperlessCorrespondent extends MatchingModel { document_count?: number diff --git a/src-ui/src/app/data/paperless-document-type.spec.ts b/src-ui/src/app/data/paperless-document-type.spec.ts deleted file mode 100644 index fa4e1dbc5..000000000 --- a/src-ui/src/app/data/paperless-document-type.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { PaperlessDocumentType } from './paperless-document-type'; - -describe('PaperlessDocumentType', () => { - it('should create an instance', () => { - expect(new PaperlessDocumentType()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/paperless-document-type.ts b/src-ui/src/app/data/paperless-document-type.ts index fb2a3f0a3..d099bec47 100644 --- a/src-ui/src/app/data/paperless-document-type.ts +++ b/src-ui/src/app/data/paperless-document-type.ts @@ -1,6 +1,6 @@ import { MatchingModel } from './matching-model'; -export class PaperlessDocumentType extends MatchingModel { +export interface PaperlessDocumentType extends MatchingModel { document_count?: number diff --git a/src-ui/src/app/data/paperless-document.spec.ts b/src-ui/src/app/data/paperless-document.spec.ts deleted file mode 100644 index 8aedf447c..000000000 --- a/src-ui/src/app/data/paperless-document.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { PaperlessDocument } from './paperless-document'; - -describe('PaperlessDocument', () => { - it('should create an instance', () => { - expect(new PaperlessDocument()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/paperless-document.ts b/src-ui/src/app/data/paperless-document.ts index 3da54ce2a..31a24bcad 100644 --- a/src-ui/src/app/data/paperless-document.ts +++ b/src-ui/src/app/data/paperless-document.ts @@ -3,7 +3,7 @@ import { ObjectWithId } from './object-with-id' import { PaperlessTag } from './paperless-tag' import { PaperlessDocumentType } from './paperless-document-type' -export class PaperlessDocument extends ObjectWithId { +export interface PaperlessDocument extends ObjectWithId { correspondent?: PaperlessCorrespondent diff --git a/src-ui/src/app/data/paperless-log.spec.ts b/src-ui/src/app/data/paperless-log.spec.ts deleted file mode 100644 index 0470e34ce..000000000 --- a/src-ui/src/app/data/paperless-log.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { PaperlessLog } from './paperless-log'; - -describe('PaperlessLog', () => { - it('should create an instance', () => { - expect(new PaperlessLog()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/paperless-log.ts b/src-ui/src/app/data/paperless-log.ts index 57416ec5a..61a6fce99 100644 --- a/src-ui/src/app/data/paperless-log.ts +++ b/src-ui/src/app/data/paperless-log.ts @@ -1,2 +1,27 @@ -export class PaperlessLog { +export const LOG_LEVEL_DEBUG = 10 +export const LOG_LEVEL_INFO = 20 +export const LOG_LEVEL_WARNING = 30 +export const LOG_LEVEL_ERROR = 40 +export const LOG_LEVEL_CRITICAL = 50 + +export const LOG_LEVELS = [ + {id: LOG_LEVEL_DEBUG, name: "DEBUG"}, + {id: LOG_LEVEL_INFO, name: "INFO"}, + {id: LOG_LEVEL_WARNING, name: "WARNING"}, + {id: LOG_LEVEL_ERROR, name: "ERROR"}, + {id: LOG_LEVEL_CRITICAL, name: "CRITICAL"} +] + +export interface PaperlessLog { + + id?: number + + group?: string + + message?: string + + created?: Date + + level?: number + } diff --git a/src-ui/src/app/data/paperless-tag.spec.ts b/src-ui/src/app/data/paperless-tag.spec.ts deleted file mode 100644 index bda0a46c8..000000000 --- a/src-ui/src/app/data/paperless-tag.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { PaperlessTag } from './paperless-tag'; - -describe('PaperlessTag', () => { - it('should create an instance', () => { - expect(new PaperlessTag()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/paperless-tag.ts b/src-ui/src/app/data/paperless-tag.ts index 9931947ec..551c6e03a 100644 --- a/src-ui/src/app/data/paperless-tag.ts +++ b/src-ui/src/app/data/paperless-tag.ts @@ -1,23 +1,24 @@ import { MatchingModel } from './matching-model'; import { ObjectWithId } from './object-with-id'; -export class PaperlessTag extends MatchingModel { - static COLOURS = [ - {id: 1, value: "#a6cee3", name: "Light Blue", textColor: "#000000"}, - {id: 2, value: "#1f78b4", name: "Blue", textColor: "#ffffff"}, - {id: 3, value: "#b2df8a", name: "Light Green", textColor: "#000000"}, - {id: 4, value: "#33a02c", name: "Green", textColor: "#000000"}, - {id: 5, value: "#fb9a99", name: "Light Red", textColor: "#000000"}, - {id: 6, value: "#e31a1c", name: "Red ", textColor: "#ffffff"}, - {id: 7, value: "#fdbf6f", name: "Light Orange", textColor: "#000000"}, - {id: 8, value: "#ff7f00", name: "Orange", textColor: "#000000"}, - {id: 9, value: "#cab2d6", name: "Light Violet", textColor: "#000000"}, - {id: 10, value: "#6a3d9a", name: "Violet", textColor: "#ffffff"}, - {id: 11, value: "#b15928", name: "Brown", textColor: "#000000"}, - {id: 12, value: "#000000", name: "Black", textColor: "#ffffff"}, - {id: 13, value: "#cccccc", name: "Light Grey", textColor: "#000000"} - ] +export const TAG_COLOURS = [ + {id: 1, value: "#a6cee3", name: "Light Blue", textColor: "#000000"}, + {id: 2, value: "#1f78b4", name: "Blue", textColor: "#ffffff"}, + {id: 3, value: "#b2df8a", name: "Light Green", textColor: "#000000"}, + {id: 4, value: "#33a02c", name: "Green", textColor: "#000000"}, + {id: 5, value: "#fb9a99", name: "Light Red", textColor: "#000000"}, + {id: 6, value: "#e31a1c", name: "Red ", textColor: "#ffffff"}, + {id: 7, value: "#fdbf6f", name: "Light Orange", textColor: "#000000"}, + {id: 8, value: "#ff7f00", name: "Orange", textColor: "#000000"}, + {id: 9, value: "#cab2d6", name: "Light Violet", textColor: "#000000"}, + {id: 10, value: "#6a3d9a", name: "Violet", textColor: "#ffffff"}, + {id: 11, value: "#b15928", name: "Brown", textColor: "#000000"}, + {id: 12, value: "#000000", name: "Black", textColor: "#ffffff"}, + {id: 13, value: "#cccccc", name: "Light Grey", textColor: "#000000"} +] + +export interface PaperlessTag extends MatchingModel { colour?: number diff --git a/src-ui/src/app/data/results.spec.ts b/src-ui/src/app/data/results.spec.ts deleted file mode 100644 index f21684300..000000000 --- a/src-ui/src/app/data/results.spec.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { Results } from './results'; - -describe('Results', () => { - it('should create an instance', () => { - expect(new Results()).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/data/results.ts b/src-ui/src/app/data/results.ts index 557c8e806..83e9c583c 100644 --- a/src-ui/src/app/data/results.ts +++ b/src-ui/src/app/data/results.ts @@ -1,4 +1,4 @@ -export class Results { +export interface Results { count: number diff --git a/src-ui/src/app/data/saved-view-config.ts b/src-ui/src/app/data/saved-view-config.ts new file mode 100644 index 000000000..29d881510 --- /dev/null +++ b/src-ui/src/app/data/saved-view-config.ts @@ -0,0 +1,19 @@ +import { FilterRule } from './filter-rule'; + +export interface SavedViewConfig { + + id?: string + + filterRules: FilterRule[] + + sortField: string + + sortDirection: string + + title: string + + showInSideBar: boolean + + showInDashboard: boolean + +} \ No newline at end of file diff --git a/src-ui/src/app/data/search-result.ts b/src-ui/src/app/data/search-result.ts new file mode 100644 index 000000000..b22dc64af --- /dev/null +++ b/src-ui/src/app/data/search-result.ts @@ -0,0 +1,27 @@ +import { PaperlessDocument } from './paperless-document' + +export class SearchHitHighlight { + text?: string + term?: number +} + +export interface SearchHit { + id?: number + title?: string + score?: number + rank?: number + + highlights?: SearchHitHighlight[][] + document?: PaperlessDocument +} + +export interface SearchResult { + + count?: number + page?: number + page_count?: number + + results?: SearchHit[] + + +} \ No newline at end of file diff --git a/src-ui/src/app/services/auth.service.ts b/src-ui/src/app/services/auth.service.ts index 1f2f1bdf3..dc31a9bbd 100644 --- a/src-ui/src/app/services/auth.service.ts +++ b/src-ui/src/app/services/auth.service.ts @@ -54,13 +54,9 @@ export class AuthService { map(tokenResponse => { this.currentUsername = username this.token = tokenResponse.token - if (rememberMe) { - localStorage.setItem('auth-service:token', this.token) - localStorage.setItem('auth-service:currentUsername', this.currentUsername) - } else { - sessionStorage.setItem('auth-service:token', this.token) - sessionStorage.setItem('auth-service:currentUsername', this.currentUsername) - } + let storage = rememberMe ? localStorage : sessionStorage + storage.setItem('auth-service:token', this.token) + storage.setItem('auth-service:currentUsername', this.currentUsername) return true }) ) diff --git a/src-ui/src/app/services/document-list-view.service.ts b/src-ui/src/app/services/document-list-view.service.ts index 292e349dd..0ea509863 100644 --- a/src-ui/src/app/services/document-list-view.service.ts +++ b/src-ui/src/app/services/document-list-view.service.ts @@ -1,8 +1,10 @@ import { Injectable } from '@angular/core'; import { Observable } from 'rxjs'; -import { FilterRuleSet } from '../components/filter-editor/filter-editor.component'; +import { cloneFilterRules, FilterRule } from '../data/filter-rule'; import { PaperlessDocument } from '../data/paperless-document'; -import { DocumentService } from './rest/document.service'; +import { SavedViewConfig } from '../data/saved-view-config'; +import { DocumentService, SORT_DIRECTION_DESCENDING } from './rest/document.service'; + @Injectable({ providedIn: 'root' @@ -11,30 +13,36 @@ export class DocumentListViewService { static DEFAULT_SORT_FIELD = 'created' - static SORT_FIELDS = [ - {field: "correspondent__name", name: "Correspondent"}, - {field: 'title', name: 'Title'}, - {field: 'archive_serial_number', name: 'ASN'}, - {field: 'created', name: 'Created'}, - {field: 'added', name: 'Added'}, - {field: 'modified', name: 'Modified'} - ] - documents: PaperlessDocument[] = [] currentPage = 1 collectionSize: number - currentFilter = new FilterRuleSet() - - currentSortDirection = 'des' + currentFilterRules: FilterRule[] = [] + currentSortDirection = SORT_DIRECTION_DESCENDING currentSortField = DocumentListViewService.DEFAULT_SORT_FIELD + + viewConfig: SavedViewConfig reload(onFinish?) { + let sortField: string + let sortDirection: string + let filterRules: FilterRule[] + if (this.viewConfig) { + sortField = this.viewConfig.sortField + sortDirection = this.viewConfig.sortDirection + filterRules = this.viewConfig.filterRules + } else { + sortField = this.currentSortField + sortDirection = this.currentSortDirection + filterRules = this.currentFilterRules + } + this.documentService.list( this.currentPage, null, - this.getOrderingQueryParam(), - this.currentFilter.toQueryParams()).subscribe( + sortField, + sortDirection, + filterRules).subscribe( result => { this.collectionSize = result.count this.documents = result.results @@ -50,16 +58,9 @@ export class DocumentListViewService { }) } - getOrderingQueryParam() { - if (DocumentListViewService.SORT_FIELDS.find(f => f.field == this.currentSortField)) { - return (this.currentSortDirection == 'des' ? '-' : '') + this.currentSortField - } else { - return DocumentListViewService.DEFAULT_SORT_FIELD - } - } - setFilter(filter: FilterRuleSet) { - this.currentFilter = filter + setFilterRules(filterRules: FilterRule[]) { + this.currentFilterRules = cloneFilterRules(filterRules) } getLastPage(): number { diff --git a/src-ui/src/app/services/rest/abstract-paperless-service.ts b/src-ui/src/app/services/rest/abstract-paperless-service.ts index 9ee07d31a..c8459f080 100644 --- a/src-ui/src/app/services/rest/abstract-paperless-service.ts +++ b/src-ui/src/app/services/rest/abstract-paperless-service.ts @@ -33,7 +33,7 @@ export abstract class AbstractPaperlessService { httpParams = httpParams.set('ordering', ordering) } for (let extraParamKey in extraParams) { - if (extraParams[extraParamKey]) { + if (extraParams[extraParamKey] != null) { httpParams = httpParams.set(extraParamKey, extraParams[extraParamKey]) } } diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index 863758234..7328b380e 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -2,8 +2,24 @@ import { Injectable } from '@angular/core'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { AbstractPaperlessService } from './abstract-paperless-service'; import { HttpClient } from '@angular/common/http'; -import { Observable } from 'rxjs'; import { AuthService } from '../auth.service'; +import { Observable } from 'rxjs'; +import { Results } from 'src/app/data/results'; +import { FilterRule } from 'src/app/data/filter-rule'; + + +export const DOCUMENT_SORT_FIELDS = [ + { field: "correspondent__name", name: "Correspondent" }, + { field: 'title', name: 'Title' }, + { field: 'archive_serial_number', name: 'ASN' }, + { field: 'created', name: 'Created' }, + { field: 'added', name: 'Added' }, + { field: 'modified', name: 'Modified' } +] + +export const SORT_DIRECTION_ASCENDING = "asc" +export const SORT_DIRECTION_DESCENDING = "des" + @Injectable({ providedIn: 'root' @@ -14,6 +30,34 @@ export class DocumentService extends AbstractPaperlessService super(http, 'documents') } + private filterRulesToQueryParams(filterRules: FilterRule[]) { + if (filterRules) { + let params = {} + for (let rule of filterRules) { + if (rule.type.multi) { + params[rule.type.filtervar] = params[rule.type.filtervar] ? params[rule.type.filtervar] + "," + rule.value : rule.value + } else { + params[rule.type.filtervar] = rule.value + } + } + return params + } else { + return null + } + } + + private getOrderingQueryParam(sortField: string, sortDirection: string) { + if (DOCUMENT_SORT_FIELDS.find(f => f.field == sortField)) { + return (sortDirection == SORT_DIRECTION_DESCENDING ? '-' : '') + sortField + } else { + return null + } + } + + list(page?: number, pageSize?: number, sortField?: string, sortDirection?: string, filterRules?: FilterRule[]): Observable> { + return super.list(page, pageSize, this.getOrderingQueryParam(sortField, sortDirection), this.filterRulesToQueryParams(filterRules)) + } + getPreviewUrl(id: number): string { return this.getResourceUrl(id, 'preview') + `?auth_token=${this.auth.getToken()}` } diff --git a/src-ui/src/app/services/rest/log.service.spec.ts b/src-ui/src/app/services/rest/log.service.spec.ts new file mode 100644 index 000000000..4a99f7727 --- /dev/null +++ b/src-ui/src/app/services/rest/log.service.spec.ts @@ -0,0 +1,16 @@ +import { TestBed } from '@angular/core/testing'; + +import { LogService } from './log.service'; + +describe('LogService', () => { + let service: LogService; + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(LogService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/services/rest/log.service.ts b/src-ui/src/app/services/rest/log.service.ts new file mode 100644 index 000000000..797d9b6b9 --- /dev/null +++ b/src-ui/src/app/services/rest/log.service.ts @@ -0,0 +1,14 @@ +import { HttpClient } from '@angular/common/http'; +import { Injectable } from '@angular/core'; +import { PaperlessLog } from 'src/app/data/paperless-log'; +import { AbstractPaperlessService } from './abstract-paperless-service'; + +@Injectable({ + providedIn: 'root' +}) +export class LogService extends AbstractPaperlessService { + + constructor(http: HttpClient) { + super(http, 'logs') + } +} diff --git a/src-ui/src/app/services/rest/search.service.ts b/src-ui/src/app/services/rest/search.service.ts index a9065bc7c..2da5f9a08 100644 --- a/src-ui/src/app/services/rest/search.service.ts +++ b/src-ui/src/app/services/rest/search.service.ts @@ -2,27 +2,9 @@ import { HttpClient, HttpParams } from '@angular/common/http'; import { Injectable } from '@angular/core'; import { Observable } from 'rxjs'; import { PaperlessDocument } from 'src/app/data/paperless-document'; +import { SearchResult } from 'src/app/data/search-result'; import { environment } from 'src/environments/environment'; -export class SearchResultHighlightedText { - text?: string - term?: number - - toString(): string { - return this.text - } -} - -export class SearchResult { - id?: number - title?: string - content?: string - - score?: number - highlights?: SearchResultHighlightedText[][] - - document?: PaperlessDocument -} @Injectable({ providedIn: 'root' @@ -31,8 +13,12 @@ export class SearchService { constructor(private http: HttpClient) { } - search(query: string): Observable { - return this.http.get(`${environment.apiBaseUrl}search/`, {params: new HttpParams().set('query', query)}) + search(query: string, page?: number): Observable { + let httpParams = new HttpParams().set('query', query) + if (page) { + httpParams = httpParams.set('page', page.toString()) + } + return this.http.get(`${environment.apiBaseUrl}search/`, {params: httpParams}) } autocomplete(term: string): Observable { diff --git a/src-ui/src/app/services/saved-view-config.service.spec.ts b/src-ui/src/app/services/saved-view-config.service.spec.ts new file mode 100644 index 000000000..c67affead --- /dev/null +++ b/src-ui/src/app/services/saved-view-config.service.spec.ts @@ -0,0 +1,16 @@ +import { TestBed } from '@angular/core/testing'; + +import { SavedViewConfigService } from './saved-view-config.service'; + +describe('SavedViewConfigService', () => { + let service: SavedViewConfigService; + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(SavedViewConfigService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/services/saved-view-config.service.ts b/src-ui/src/app/services/saved-view-config.service.ts new file mode 100644 index 000000000..a6b538b0d --- /dev/null +++ b/src-ui/src/app/services/saved-view-config.service.ts @@ -0,0 +1,54 @@ +import { Injectable } from '@angular/core'; +import { v4 as uuidv4 } from 'uuid'; +import { SavedViewConfig } from '../data/saved-view-config'; + +@Injectable({ + providedIn: 'root' +}) +export class SavedViewConfigService { + + constructor() { + let savedConfigs = localStorage.getItem('saved-view-config-service:savedConfigs') + if (savedConfigs) { + this.configs = JSON.parse(savedConfigs) + } + } + + private configs: SavedViewConfig[] = [] + + getConfigs(): SavedViewConfig[] { + return this.configs + } + + getDashboardConfigs(): SavedViewConfig[] { + return this.configs.filter(sf => sf.showInDashboard) + } + + getSideBarConfigs(): SavedViewConfig[] { + return this.configs.filter(sf => sf.showInSideBar) + } + + getConfig(id: string): SavedViewConfig { + return this.configs.find(sf => sf.id == id) + } + + saveConfig(config: SavedViewConfig) { + config.id = uuidv4() + this.configs.push(config) + + this.save() + } + + private save() { + localStorage.setItem('saved-view-config-service:savedConfigs', JSON.stringify(this.configs)) + } + + deleteConfig(config: SavedViewConfig) { + let index = this.configs.findIndex(vc => vc.id == config.id) + if (index != -1) { + this.configs.splice(index, 1) + this.save() + } + + } +} diff --git a/src/documents/admin.py b/src/documents/admin.py index b9d2b5543..74a152c68 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -75,7 +75,6 @@ class DocumentAdmin(admin.ModelAdmin): def tags_(self, obj): r = "" for tag in obj.tags.all(): - colour = tag.get_colour_display() r += self._html_tag( "span", tag.slug + ", " diff --git a/src/documents/apps.py b/src/documents/apps.py index 48807adf1..ca278e2e3 100644 --- a/src/documents/apps.py +++ b/src/documents/apps.py @@ -16,7 +16,6 @@ class DocumentsConfig(AppConfig): run_post_consume_script, cleanup_document_deletion, set_log_entry, - index_document, set_correspondent, set_document_type, set_tags @@ -25,7 +24,6 @@ class DocumentsConfig(AppConfig): document_consumption_started.connect(run_pre_consume_script) - document_consumption_finished.connect(index_document) document_consumption_finished.connect(add_inbox_tags) document_consumption_finished.connect(set_correspondent) document_consumption_finished.connect(set_document_type) diff --git a/src/documents/classifier.py b/src/documents/classifier.py index 52c508655..851a75899 100755 --- a/src/documents/classifier.py +++ b/src/documents/classifier.py @@ -75,16 +75,16 @@ class DocumentClassifier(object): y = -1 if doc.document_type: if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO: - y = doc.document_type.id + y = doc.document_type.pk labels_document_type.append(y) y = -1 if doc.correspondent: if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO: - y = doc.correspondent.id + y = doc.correspondent.pk labels_correspondent.append(y) - tags = [tag.id for tag in doc.tags.filter( + tags = [tag.pk for tag in doc.tags.filter( matching_algorithm=MatchingModel.MATCH_AUTO )] labels_tags.append(tags) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 401ef0ff0..3920f2942 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -1,22 +1,19 @@ -from django.db import transaction import datetime import hashlib import logging import os import re -import time import uuid -from operator import itemgetter from django.conf import settings +from django.db import transaction from django.utils import timezone + from paperless.db import GnuPG from .classifier import DocumentClassifier - -from .models import Document, FileInfo, Tag -from .parsers import ParseError +from .models import Document, FileInfo +from .parsers import ParseError, get_parser_class from .signals import ( - document_consumer_declaration, document_consumption_finished, document_consumption_started ) @@ -36,17 +33,12 @@ class Consumer: 5. Delete the document and image(s) """ - # Files are considered ready for consumption if they have been unmodified - # for this duration - FILES_MIN_UNMODIFIED_DURATION = 0.5 - def __init__(self, consume=settings.CONSUMPTION_DIR, scratch=settings.SCRATCH_DIR): self.logger = logging.getLogger(__name__) self.logging_group = None - self._ignore = [] self.consume = consume self.scratch = scratch @@ -68,64 +60,20 @@ class Consumer: raise ConsumerError( "Consumption directory {} does not exist".format(self.consume)) - self.parsers = [] - for response in document_consumer_declaration.send(self): - self.parsers.append(response[1]) - - if not self.parsers: - raise ConsumerError( - "No parsers could be found, not even the default. " - "This is a problem." - ) def log(self, level, message): getattr(self.logger, level)(message, extra={ "group": self.logging_group }) - def consume_new_files(self): - """ - Find non-ignored files in consumption dir and consume them if they have - been unmodified for FILES_MIN_UNMODIFIED_DURATION. - """ - ignored_files = [] - files = [] - for entry in os.scandir(self.consume): - if entry.is_file(): - file = (entry.path, entry.stat().st_mtime) - if file in self._ignore: - ignored_files.append(file) - else: - files.append(file) - else: - self.logger.warning( - "Skipping %s as it is not a file", - entry.path - ) - - if not files: - return - - # Set _ignore to only include files that still exist. - # This keeps it from growing indefinitely. - self._ignore[:] = ignored_files - - files_old_to_new = sorted(files, key=itemgetter(1)) - - time.sleep(self.FILES_MIN_UNMODIFIED_DURATION) - - for file, mtime in files_old_to_new: - if mtime == os.path.getmtime(file): - # File has not been modified and can be consumed - if not self.try_consume_file(file): - self._ignore.append((file, mtime)) - @transaction.atomic def try_consume_file(self, file): """ Return True if file was consumed """ + self.logging_group = uuid.uuid4() + if not re.match(FileInfo.REGEXES["title"], file): return False @@ -133,20 +81,21 @@ class Consumer: if self._is_duplicate(doc): self.log( - "info", + "warning", "Skipping {} as it appears to be a duplicate".format(doc) ) return False - parser_class = self._get_parser_class(doc) + self.log("info", "Consuming {}".format(doc)) + + parser_class = get_parser_class(doc) if not parser_class: self.log( "error", "No parsers could be found for {}".format(doc)) return False + else: + self.log("info", "Parser: {}".format(parser_class.__name__)) - self.logging_group = uuid.uuid4() - - self.log("info", "Consuming {}".format(doc)) document_consumption_started.send( sender=self.__class__, @@ -154,23 +103,24 @@ class Consumer: logging_group=self.logging_group ) - parsed_document = parser_class(doc) + document_parser = parser_class(doc, self.logging_group) try: - thumbnail = parsed_document.get_optimised_thumbnail() - date = parsed_document.get_date() + self.log("info", "Generating thumbnail for {}...".format(doc)) + thumbnail = document_parser.get_optimised_thumbnail() + date = document_parser.get_date() document = self._store( - parsed_document.get_text(), + document_parser.get_text(), doc, thumbnail, date ) except ParseError as e: - self.log("error", "PARSE FAILURE for {}: {}".format(doc, e)) - parsed_document.cleanup() + self.log("fatal", "PARSE FAILURE for {}: {}".format(doc, e)) + document_parser.cleanup() return False else: - parsed_document.cleanup() + document_parser.cleanup() self._cleanup_doc(doc) self.log( @@ -184,9 +134,10 @@ class Consumer: self.classifier.reload() classifier = self.classifier except FileNotFoundError: - logging.getLogger(__name__).warning("Cannot classify documents, " - "classifier model file was not " - "found.") + self.log("warning", "Cannot classify documents, classifier " + "model file was not found. Consider " + "running python manage.py " + "document_create_classifier.") document_consumption_finished.send( sender=self.__class__, @@ -196,31 +147,6 @@ class Consumer: ) return True - def _get_parser_class(self, doc): - """ - Determine the appropriate parser class based on the file - """ - - options = [] - for parser in self.parsers: - result = parser(doc) - if result: - options.append(result) - - self.log( - "info", - "Parsers available: {}".format( - ", ".join([str(o["parser"].__name__) for o in options]) - ) - ) - - if not options: - return None - - # Return the parser with the highest weight. - return sorted( - options, key=lambda _: _["weight"], reverse=True)[0]["parser"] - def _store(self, text, doc, thumbnail, date): file_info = FileInfo.from_path(doc) @@ -253,10 +179,9 @@ class Consumer: self._write(document, doc, document.source_path) self._write(document, thumbnail, document.thumbnail_path) + #TODO: why do we need to save the document again? document.save() - self.log("info", "Completed") - return document def _write(self, document, source, target): diff --git a/src/documents/filters.py b/src/documents/filters.py index 2c0c71dc2..770e0e5af 100755 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -1,11 +1,10 @@ -from django_filters.rest_framework import BooleanFilter, FilterSet - -from .models import Correspondent, Document, Tag, DocumentType +from django_filters.rest_framework import BooleanFilter, FilterSet, Filter +from .models import Correspondent, Document, Tag, DocumentType, Log CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] ID_KWARGS = ["in", "exact"] -INT_KWARGS = ["exact"] +INT_KWARGS = ["exact", "gt", "gte", "lt", "lte"] DATE_KWARGS = ["year", "month", "day", "date__gt", "gt", "date__lt", "lt"] @@ -36,6 +35,34 @@ class DocumentTypeFilterSet(FilterSet): } +class TagsFilter(Filter): + + def filter(self, qs, value): + if not value: + return qs + + try: + tag_ids = [int(x) for x in value.split(',')] + except ValueError: + return qs + + for tag_id in tag_ids: + qs = qs.filter(tags__id=tag_id) + + return qs + + +class InboxFilter(Filter): + + def filter(self, qs, value): + if value == 'true': + return qs.filter(tags__is_inbox_tag=True) + elif value == 'false': + return qs.exclude(tags__is_inbox_tag=True) + else: + return qs + + class DocumentFilterSet(FilterSet): is_tagged = BooleanFilter( @@ -45,6 +72,10 @@ class DocumentFilterSet(FilterSet): exclude=True ) + tags__id__all = TagsFilter() + + is_in_inbox = InboxFilter() + class Meta: model = Document fields = { @@ -68,3 +99,16 @@ class DocumentFilterSet(FilterSet): "document_type__name": CHAR_KWARGS, } + + +class LogFilterSet(FilterSet): + + class Meta: + model = Log + fields = { + + "level": INT_KWARGS, + "created": DATE_KWARGS, + "group": ID_KWARGS + + } diff --git a/src/documents/index.py b/src/documents/index.py index 62d3b822a..eb1a9c739 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -1,12 +1,10 @@ -from collections import Iterable +import logging from django.db import models from django.dispatch import receiver -from whoosh.fields import Schema, TEXT, NUMERIC, DATETIME, KEYWORD +from whoosh.fields import Schema, TEXT, NUMERIC from whoosh.highlight import Formatter, get_text from whoosh.index import create_in, exists_in, open_dir -from whoosh.qparser import QueryParser -from whoosh.query import terms from whoosh.writing import AsyncWriter from documents.models import Document @@ -57,7 +55,7 @@ def get_schema(): return Schema( id=NUMERIC(stored=True, unique=True, numtype=int), title=TEXT(stored=True), - content=TEXT(stored=True) + content=TEXT() ) @@ -69,8 +67,9 @@ def open_index(recreate=False): def update_document(writer, doc): + logging.getLogger(__name__).debug("Updating index with document{}".format(str(doc))) writer.update_document( - id=doc.id, + id=doc.pk, title=doc.title, content=doc.content ) @@ -85,24 +84,10 @@ def add_document_to_index(sender, instance, **kwargs): @receiver(models.signals.post_delete, sender=Document) def remove_document_from_index(sender, instance, **kwargs): + logging.getLogger(__name__).debug("Removing document {} from index".format(str(instance))) ix = open_index() with AsyncWriter(ix) as writer: - writer.delete_by_term('id', instance.id) - - -def query_index(ix, querystr): - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema, termclass=terms.FuzzyTerm).parse(querystr) - results = searcher.search(query) - results.formatter = JsonFormatter() - results.fragmenter.surround = 50 - - return [ - {'id': r['id'], - 'highlights': r.highlights("content"), - 'score': r.score, - 'title': r['title'] - } for r in results] + writer.delete_by_term('id', instance.pk) def autocomplete(ix, term, limit=10): diff --git a/src/documents/loggers.py b/src/documents/loggers.py index a35841299..d9c90ab16 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -1,16 +1,8 @@ import logging -class PaperlessLogger(logging.StreamHandler): - """ - A logger smart enough to know to log some kinds of messages to the database - for later retrieval in a pretty interface. - """ - +class PaperlessHandler(logging.Handler): def emit(self, record): - - logging.StreamHandler.emit(self, record) - # We have to do the import here or Django will barf when it tries to # load this because the apps aren't loaded at that point from .models import Log diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 4a3d24bf5..93ad6947c 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -1,12 +1,13 @@ import logging import os -import time from django.conf import settings -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand -from ...consumer import Consumer, ConsumerError -from ...mail import MailFetcher, MailFetcherError +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler + +from documents.consumer import Consumer try: from inotify_simple import INotify, flags @@ -14,6 +15,15 @@ except ImportError: INotify = flags = None +class Handler(FileSystemEventHandler): + + def __init__(self, consumer): + self.consumer = consumer + + def on_created(self, event): + self.consumer.try_consume_file(event.src_path) + + class Command(BaseCommand): """ On every iteration of an infinite loop, consume what we can from the @@ -29,6 +39,8 @@ class Command(BaseCommand): self.mail_fetcher = None self.first_iteration = True + self.consumer = Consumer() + BaseCommand.__init__(self, *args, **kwargs) def add_arguments(self, parser): @@ -38,111 +50,34 @@ class Command(BaseCommand): nargs="?", help="The consumption directory." ) - parser.add_argument( - "--loop-time", - default=settings.CONSUMER_LOOP_TIME, - type=int, - help="Wait time between each loop (in seconds)." - ) - parser.add_argument( - "--mail-delta", - default=10, - type=int, - help="Wait time between each mail fetch (in minutes)." - ) - parser.add_argument( - "--oneshot", - action="store_true", - help="Run only once." - ) - parser.add_argument( - "--no-inotify", - action="store_true", - help="Don't use inotify, even if it's available.", - default=False - ) def handle(self, *args, **options): self.verbosity = options["verbosity"] directory = options["directory"] - loop_time = options["loop_time"] - mail_delta = options["mail_delta"] * 60 - use_inotify = INotify is not None and options["no_inotify"] is False - - try: - self.file_consumer = Consumer(consume=directory) - self.mail_fetcher = MailFetcher(consume=directory) - except (ConsumerError, MailFetcherError) as e: - raise CommandError(e) for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR): os.makedirs(d, exist_ok=True) logging.getLogger(__name__).info( - "Starting document consumer at {}{}".format( - directory, - " with inotify" if use_inotify else "" + "Starting document consumer at {}".format( + directory ) ) - if options["oneshot"]: - self.loop_step(mail_delta) - else: - try: - if use_inotify: - self.loop_inotify(mail_delta) - else: - self.loop(loop_time, mail_delta) - except KeyboardInterrupt: - print("Exiting") + # Consume all files as this is not done initially by the watchdog + for entry in os.scandir(directory): + if entry.is_file(): + self.consumer.try_consume_file(entry.path) - def loop(self, loop_time, mail_delta): - while True: - start_time = time.time() - if self.verbosity > 1: - print(".", int(start_time)) - self.loop_step(mail_delta, start_time) - # Sleep until the start of the next loop step - time.sleep(max(0, start_time + loop_time - time.time())) - - def loop_step(self, mail_delta, time_now=None): - - # Occasionally fetch mail and store it to be consumed on the next loop - # We fetch email when we first start up so that it is not necessary to - # wait for 10 minutes after making changes to the config file. - next_mail_time = self.mail_fetcher.last_checked + mail_delta - if self.first_iteration or time_now > next_mail_time: - self.first_iteration = False - self.mail_fetcher.pull() - - self.file_consumer.consume_new_files() - - def loop_inotify(self, mail_delta): - directory = self.file_consumer.consume - inotify = INotify() - inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) - - # Run initial mail fetch and consume all currently existing documents - self.loop_step(mail_delta) - next_mail_time = self.mail_fetcher.last_checked + mail_delta - - while True: - # Consume documents until next_mail_time - while True: - delta = next_mail_time - time.time() - if delta > 0: - for event in inotify.read(timeout=delta): - file = os.path.join(directory, event.name) - if os.path.isfile(file): - self.file_consumer.try_consume_file(file) - else: - self.logger.warning( - "Skipping %s as it is not a file", - file - ) - else: - break - - self.mail_fetcher.pull() - next_mail_time = self.mail_fetcher.last_checked + mail_delta + # Start the watchdog. Woof! + observer = Observer() + event_handler = Handler(self.consumer) + observer.schedule(event_handler, directory, recursive=True) + observer.start() + try: + while observer.is_alive(): + observer.join(1) + except KeyboardInterrupt: + observer.stop() + observer.join() diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 502bec0c1..43582a619 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -64,12 +64,14 @@ class Command(Renderable, BaseCommand): document = document_map[document_dict["pk"]] - file_target = os.path.join(self.target, document.file_name) + unique_filename = "{:07}_{}".format(document.pk, document.file_name) - thumbnail_name = document.file_name + "-thumbnail.png" + file_target = os.path.join(self.target, unique_filename) + + thumbnail_name = unique_filename + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) - document_dict[EXPORTER_FILE_NAME] = document.file_name + document_dict[EXPORTER_FILE_NAME] = unique_filename document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name print("Exporting: {}".format(file_target)) diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py deleted file mode 100644 index d7d77a111..000000000 --- a/src/documents/management/commands/document_renamer.py +++ /dev/null @@ -1,24 +0,0 @@ -from django.core.management.base import BaseCommand - -from documents.models import Document, Tag - -from ...mixins import Renderable - - -class Command(Renderable, BaseCommand): - - help = """ - This will rename all documents to match the latest filename format. - """.replace(" ", "") - - def __init__(self, *args, **kwargs): - self.verbosity = 0 - BaseCommand.__init__(self, *args, **kwargs) - - def handle(self, *args, **options): - - self.verbosity = options["verbosity"] - - for document in Document.objects.all(): - # Saving the document again will generate a new filename and rename - document.save() diff --git a/src/documents/management/commands/document_rerun_ocr.py b/src/documents/management/commands/document_rerun_ocr.py new file mode 100644 index 000000000..794357420 --- /dev/null +++ b/src/documents/management/commands/document_rerun_ocr.py @@ -0,0 +1,60 @@ +import argparse +import threading +from multiprocessing import Pool +from multiprocessing.pool import ThreadPool + +from django.core.management.base import BaseCommand + +from documents.consumer import Consumer +from documents.models import Log, Document +from documents.parsers import get_parser_class + + +def process_document(doc): + parser_class = get_parser_class(doc.file_name) + if not parser_class: + print("no parser available") + else: + print("Parser: {}".format(parser_class.__name__)) + parser = parser_class(doc.source_path, None) + try: + text = parser.get_text() + doc.content = text + doc.save() + finally: + parser.cleanup() + + +def document_index(value): + ivalue = int(value) + if not (1 <= ivalue <= Document.objects.count()): + raise argparse.ArgumentTypeError( + "{} is not a valid document index (out of range)".format(value)) + + return ivalue + + +class Command(BaseCommand): + + help = "Performs OCR on all documents again!" + + + def add_arguments(self, parser): + parser.add_argument( + "-s", "--start_index", + default=None, + type=document_index + ) + + def handle(self, *args, **options): + + docs = Document.objects.all().order_by("added") + + indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs)) + + for i in indices: + doc = docs[i] + print("==================================") + print("{} out of {}: {}".format(i+1, len(docs), doc.file_name)) + print("==================================") + process_document(doc) diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 007286935..9238bea71 100755 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -3,8 +3,7 @@ import logging from django.core.management.base import BaseCommand from documents.classifier import DocumentClassifier -from documents.models import Document, Tag - +from documents.models import Document from ...mixins import Renderable from ...signals.handlers import set_correspondent, set_document_type, set_tags diff --git a/src/documents/managers.py b/src/documents/managers.py deleted file mode 100644 index f324137ef..000000000 --- a/src/documents/managers.py +++ /dev/null @@ -1,70 +0,0 @@ -from django.conf import settings - -from django.db import models -from django.db.models.aggregates import Max - - -class GroupConcat(models.Aggregate): - """ - Theoretically, this should work in Sqlite, PostgreSQL, and MySQL, but I've - only ever tested it in Sqlite. - """ - - ENGINE_SQLITE = 1 - ENGINE_POSTGRESQL = 2 - ENGINE_MYSQL = 3 - ENGINES = { - "django.db.backends.sqlite3": ENGINE_SQLITE, - "django.db.backends.postgresql_psycopg2": ENGINE_POSTGRESQL, - "django.db.backends.postgresql": ENGINE_POSTGRESQL, - "django.db.backends.mysql": ENGINE_MYSQL - } - - def __init__(self, expression, separator="\n", **extra): - - self.engine = self._get_engine() - self.function = self._get_function() - self.template = self._get_template(separator) - - models.Aggregate.__init__( - self, - expression, - output_field=models.CharField(), - **extra - ) - - def _get_engine(self): - engine = settings.DATABASES["default"]["ENGINE"] - try: - return self.ENGINES[engine] - except KeyError: - raise NotImplementedError( - "There's currently no support for {} when it comes to group " - "concatenation in Paperless".format(engine) - ) - - def _get_function(self): - if self.engine == self.ENGINE_POSTGRESQL: - return "STRING_AGG" - return "GROUP_CONCAT" - - def _get_template(self, separator): - if self.engine == self.ENGINE_MYSQL: - return "%(function)s(%(expressions)s SEPARATOR '{}')".format( - separator) - return "%(function)s(%(expressions)s, '{}')".format(separator) - - -class LogQuerySet(models.query.QuerySet): - - def by_group(self): - return self.values("group").annotate( - time=Max("modified"), - messages=GroupConcat("message"), - ).order_by("-time") - - -class LogManager(models.Manager): - - def get_queryset(self): - return LogQuerySet(self.model, using=self._db) diff --git a/src/documents/matching.py b/src/documents/matching.py index a52f06a06..045e2863a 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -9,7 +9,7 @@ def match_correspondents(document_content, classifier): correspondents = Correspondent.objects.all() predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None - matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.id == predicted_correspondent_id] + matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id] return matched_correspondents @@ -17,7 +17,7 @@ def match_document_types(document_content, classifier): document_types = DocumentType.objects.all() predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None - matched_document_types = [o for o in document_types if matches(o, document_content) or o.id == predicted_document_type_id] + matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id] return matched_document_types @@ -25,7 +25,7 @@ def match_tags(document_content, classifier): objects = Tag.objects.all() predicted_tag_ids = classifier.predict_tags(document_content) if classifier else [] - matched_tags = [o for o in objects if matches(o, document_content) or o.id in predicted_tag_ids] + matched_tags = [o for o in objects if matches(o, document_content) or o.pk in predicted_tag_ids] return matched_tags diff --git a/src/documents/migrations/0023_document_current_filename.py b/src/documents/migrations/0023_document_current_filename.py new file mode 100644 index 000000000..be78ea863 --- /dev/null +++ b/src/documents/migrations/0023_document_current_filename.py @@ -0,0 +1,37 @@ +# Generated by Django 2.0.10 on 2019-04-26 18:57 + +from django.db import migrations, models + + +def set_filename(apps, schema_editor): + Document = apps.get_model("documents", "Document") + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + # Set filename + doc.filename = file_name + + # Save document + doc.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0022_auto_20181007_1420'), + ] + + operations = [ + migrations.AddField( + model_name='document', + name='filename', + field=models.FilePathField(default=None, + null=True, + editable=False, + help_text='Current filename in storage', + max_length=256), + ), + migrations.RunPython(set_filename) + ] diff --git a/src/documents/migrations/1000_update_paperless.py b/src/documents/migrations/1000_update_paperless.py new file mode 100644 index 000000000..900510c72 --- /dev/null +++ b/src/documents/migrations/1000_update_paperless.py @@ -0,0 +1,73 @@ +# Generated by Django 3.1.2 on 2020-10-29 14:29 +import os + +from django.db import migrations + +from django.conf import settings + + +def make_index(apps, schema_editor): + Document = apps.get_model("documents", "Document") + documents = Document.objects.all() + print() + try: + print(" --> Creating document index...") + from whoosh.writing import AsyncWriter + from documents import index + ix = index.open_index(recreate=True) + with AsyncWriter(ix) as writer: + for document in documents: + index.update_document(writer, document) + except ImportError: + # index may not be relevant anymore + print(" --> Cannot create document index.") + + +def restore_filenames(apps, schema_editor): + Document = apps.get_model("documents", "Document") + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + if not doc.filename == file_name: + try: + print("file was renamed, restoring {} to {}".format(doc.filename, file_name)) + os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename), + os.path.join(settings.ORIGINALS_DIR, file_name)) + except PermissionError: + pass + except FileNotFoundError: + pass + + +def initialize_document_classifier(apps, schema_editor): + try: + print("Initalizing document classifier...") + from documents.classifier import DocumentClassifier + classifier = DocumentClassifier() + try: + classifier.train() + classifier.save_classifier() + except Exception as e: + print("Classifier error: {}".format(e)) + except ImportError: + print("Document classifier not found, skipping") + + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0023_document_current_filename'), + ] + + operations = [ + migrations.RunPython(make_index, migrations.RunPython.noop), + migrations.RunPython(restore_filenames), + migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop), + migrations.RemoveField( + model_name='document', + name='filename', + ), + ] diff --git a/src/documents/migrations/1001_workflow_improvements.py b/src/documents/migrations/1001_workflow_improvements.py index d0e5158db..94ad8135d 100755 --- a/src/documents/migrations/1001_workflow_improvements.py +++ b/src/documents/migrations/1001_workflow_improvements.py @@ -6,7 +6,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('documents', '0022_auto_20181007_1420'), + ('documents', '1000_update_paperless'), ] operations = [ diff --git a/src/documents/migrations/1005_auto_20201102_0007.py b/src/documents/migrations/1005_auto_20201102_0007.py new file mode 100644 index 000000000..146cc0b5a --- /dev/null +++ b/src/documents/migrations/1005_auto_20201102_0007.py @@ -0,0 +1,26 @@ +# Generated by Django 3.1.2 on 2020-11-02 00:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1004_auto_20201029_1331'), + ] + + operations = [ + migrations.AlterModelOptions( + name='log', + options={'ordering': ('-created',)}, + ), + migrations.RemoveField( + model_name='log', + name='modified', + ), + migrations.AlterField( + model_name='log', + name='group', + field=models.UUIDField(blank=True, null=True), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 0eb984f8b..436f5163a 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -3,7 +3,6 @@ import logging import os import re -import uuid from collections import OrderedDict import dateutil.parser @@ -13,12 +12,6 @@ from django.template.defaultfilters import slugify from django.utils import timezone from django.utils.text import slugify -from .managers import LogManager - -try: - from django.core.urlresolvers import reverse -except ImportError: - from django.urls import reverse class MatchingModel(models.Model): @@ -263,33 +256,17 @@ class Log(models.Model): (logging.CRITICAL, "Critical"), ) - group = models.UUIDField(blank=True) + group = models.UUIDField(blank=True, null=True) message = models.TextField() level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO) created = models.DateTimeField(auto_now_add=True) - modified = models.DateTimeField(auto_now=True) - - objects = LogManager() class Meta: - ordering = ("-modified",) + ordering = ("-created",) def __str__(self): return self.message - def save(self, *args, **kwargs): - """ - To allow for the case where we don't want to group the message, we - shouldn't force the caller to specify a one-time group value. However, - allowing group=None means that the manager can't differentiate the - different un-grouped messages, so instead we set a random one here. - """ - - if not self.group: - self.group = uuid.uuid4() - - models.Model.save(self, *args, **kwargs) - class FileInfo: diff --git a/src/documents/parsers.py b/src/documents/parsers.py index c0a80a55d..0cbd13987 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -20,6 +20,8 @@ from django.utils import timezone # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits # - MONTH ZZZZ, with ZZZZ being 4 digits # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits +from documents.signals import document_consumer_declaration + DATE_REGEX = re.compile( r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501 r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501 @@ -29,6 +31,71 @@ DATE_REGEX = re.compile( ) +logger = logging.getLogger(__name__) + + +def get_parser_class(doc): + """ + Determine the appropriate parser class based on the file + """ + + parsers = [] + for response in document_consumer_declaration.send(None): + parsers.append(response[1]) + + #TODO: add a check that checks parser availability. + + options = [] + for parser in parsers: + result = parser(doc) + if result: + options.append(result) + + if not options: + return None + + # Return the parser with the highest weight. + return sorted( + options, key=lambda _: _["weight"], reverse=True)[0]["parser"] + + +def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None): + environment = os.environ.copy() + if settings.CONVERT_MEMORY_LIMIT: + environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT + if settings.CONVERT_TMPDIR: + environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR + + args = [settings.CONVERT_BINARY] + args += ['-density', str(density)] if density else [] + args += ['-scale', str(scale)] if scale else [] + args += ['-alpha', str(alpha)] if alpha else [] + args += ['-strip'] if strip else [] + args += ['-trim'] if trim else [] + args += ['-type', str(type)] if type else [] + args += ['-depth', str(depth)] if depth else [] + args += [input, output] + + logger.debug("Execute: " + " ".join(args), extra={'group': logging_group}) + + if not subprocess.Popen(args, env=environment).wait() == 0: + raise ParseError("Convert failed at {}".format(args)) + + +def run_unpaper(pnm, logging_group=None): + pnm_out = pnm.replace(".pnm", ".unpaper.pnm") + + command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm, + pnm_out) + + logger.debug("Execute: " + " ".join(command_args), extra={'group': logging_group}) + + if not subprocess.Popen(command_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait() == 0: + raise ParseError("Unpaper failed at {}".format(command_args)) + + return pnm_out + + class ParseError(Exception): pass @@ -39,16 +106,11 @@ class DocumentParser: `paperless_tesseract.parsers` for inspiration. """ - SCRATCH = settings.SCRATCH_DIR - DATE_ORDER = settings.DATE_ORDER - FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER - OPTIPNG = settings.OPTIPNG_BINARY - - def __init__(self, path): + def __init__(self, path, logging_group): self.document_path = path - self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=self.SCRATCH) + self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) self.logger = logging.getLogger(__name__) - self.logging_group = None + self.logging_group = logging_group def get_thumbnail(self): """ @@ -60,7 +122,10 @@ class DocumentParser: out_path = os.path.join(self.tempdir, "optipng.png") - args = (self.OPTIPNG, "-o5", in_path, "-out", out_path) + args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path) + + self.log('debug', 'Execute: ' + " ".join(args)) + if not subprocess.Popen(args).wait() == 0: raise ParseError("Optipng failed at {}".format(args)) @@ -101,13 +166,13 @@ class DocumentParser: title = os.path.basename(self.document_path) # if filename date parsing is enabled, search there first: - if self.FILENAME_DATE_ORDER: + if settings.FILENAME_DATE_ORDER: self.log("info", "Checking document title for date") for m in re.finditer(DATE_REGEX, title): date_string = m.group(0) try: - date = __parser(date_string, self.FILENAME_DATE_ORDER) + date = __parser(date_string, settings.FILENAME_DATE_ORDER) except (TypeError, ValueError): # Skip all matches that do not parse to a proper date continue @@ -133,7 +198,7 @@ class DocumentParser: date_string = m.group(0) try: - date = __parser(date_string, self.DATE_ORDER) + date = __parser(date_string, settings.DATE_ORDER) except (TypeError, ValueError): # Skip all matches that do not parse to a proper date continue diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index e1abf9685..60cd7b293 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -105,12 +105,13 @@ class DocumentSerializer(serializers.ModelSerializer): class LogSerializer(serializers.ModelSerializer): - time = serializers.DateTimeField() - messages = serializers.CharField() class Meta: model = Log fields = ( - "time", - "messages" + "id", + "created", + "message", + "group", + "level" ) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 5adf9df68..231a39e0d 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -8,7 +8,6 @@ from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.utils import timezone -from documents.classifier import DocumentClassifier from .. import index, matching from ..models import Document, Tag @@ -17,10 +16,6 @@ def logger(message, group): logging.getLogger(__name__).debug(message, extra={"group": group}) -def index_document(sender, document=None, logging_group=None, **kwargs): - index.add_document_to_index(sender, instance=document) - - def add_inbox_tags(sender, document=None, logging_group=None, **kwargs): inbox_tags = Tag.objects.filter(is_inbox_tag=True) document.tags.add(*inbox_tags) @@ -52,13 +47,14 @@ def set_correspondent(sender, document=None, logging_group=None, classifier=None ) return - logger( - 'Assigning correspondent "{}" to "{}" '.format(selected, document), - logging_group - ) + if selected or replace: + logger( + 'Assigning correspondent "{}" to "{}" '.format(selected, document), + logging_group + ) - document.correspondent = selected - document.save(update_fields=("correspondent",)) + document.correspondent = selected + document.save(update_fields=("correspondent",)) def set_document_type(sender, document=None, logging_group=None, classifier=None, replace=False, use_first=True, **kwargs): @@ -88,13 +84,14 @@ def set_document_type(sender, document=None, logging_group=None, classifier=None ) return - logger( - 'Assigning document type "{}" to "{}" '.format(selected, document), - logging_group - ) + if selected or replace: + logger( + 'Assigning document type "{}" to "{}" '.format(selected, document), + logging_group + ) - document.document_type = selected - document.save(update_fields=("document_type",)) + document.document_type = selected + document.save(update_fields=("document_type",)) def set_tags(sender, document=None, logging_group=None, classifier=None, replace=False, **kwargs): @@ -133,7 +130,7 @@ def run_post_consume_script(sender, document, **kwargs): Popen(( settings.POST_CONSUME_SCRIPT, - str(document.id), + str(document.pk), document.file_name, document.source_path, document.thumbnail_path, @@ -165,7 +162,7 @@ def set_log_entry(sender, document=None, logging_group=None, **kwargs): action_flag=ADDITION, action_time=timezone.now(), content_type=ct, - object_id=document.id, + object_id=document.pk, user=user, object_repr=document.__str__(), ) diff --git a/src/documents/tests/test_logger.py b/src/documents/tests/test_logger.py index 9b14a3902..51a4fad83 100644 --- a/src/documents/tests/test_logger.py +++ b/src/documents/tests/test_logger.py @@ -25,20 +25,20 @@ class TestPaperlessLog(TestCase): # Debug messages are ignored by default self.logger.debug("This is a debugging message", extra=kw) - self.assertEqual(Log.objects.all().count(), 0) - - self.logger.info("This is an informational message", extra=kw) self.assertEqual(Log.objects.all().count(), 1) - self.logger.warning("This is an warning message", extra=kw) + self.logger.info("This is an informational message", extra=kw) self.assertEqual(Log.objects.all().count(), 2) - self.logger.error("This is an error message", extra=kw) + self.logger.warning("This is an warning message", extra=kw) self.assertEqual(Log.objects.all().count(), 3) - self.logger.critical("This is a critical message", extra=kw) + self.logger.error("This is an error message", extra=kw) self.assertEqual(Log.objects.all().count(), 4) + self.logger.critical("This is a critical message", extra=kw) + self.assertEqual(Log.objects.all().count(), 5) + def test_groups(self): kw1 = {"group": uuid.uuid4()} @@ -48,10 +48,6 @@ class TestPaperlessLog(TestCase): with mock.patch("logging.StreamHandler.emit") as __: - # Debug messages are ignored by default - self.logger.debug("This is a debugging message", extra=kw1) - self.assertEqual(Log.objects.all().count(), 0) - self.logger.info("This is an informational message", extra=kw2) self.assertEqual(Log.objects.all().count(), 1) self.assertEqual(Log.objects.filter(group=kw2["group"]).count(), 1) @@ -67,18 +63,3 @@ class TestPaperlessLog(TestCase): self.logger.critical("This is a critical message", extra=kw1) self.assertEqual(Log.objects.all().count(), 4) self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2) - - def test_groupped_query(self): - - kw = {"group": uuid.uuid4()} - with mock.patch("logging.StreamHandler.emit") as __: - self.logger.info("Message 0", extra=kw) - self.logger.info("Message 1", extra=kw) - self.logger.info("Message 2", extra=kw) - self.logger.info("Message 3", extra=kw) - - self.assertEqual(Log.objects.all().by_group().count(), 1) - self.assertEqual( - Log.objects.all().by_group()[0]["messages"], - "Message 0\nMessage 1\nMessage 2\nMessage 3" - ) diff --git a/src/documents/views.py b/src/documents/views.py index 9bec12555..f8050a459 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -6,6 +6,9 @@ from django_filters.rest_framework import DjangoFilterBackend from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView +from whoosh import highlight +from whoosh.qparser import QueryParser +from whoosh.query import terms from paperless.db import GnuPG from paperless.views import StandardPagination @@ -27,7 +30,8 @@ from .filters import ( CorrespondentFilterSet, DocumentFilterSet, TagFilterSet, - DocumentTypeFilterSet + DocumentTypeFilterSet, + LogFilterSet ) import documents.index as index @@ -147,12 +151,14 @@ class DocumentViewSet(RetrieveModelMixin, class LogViewSet(ReadOnlyModelViewSet): model = Log - queryset = Log.objects.all().by_group() + + queryset = Log.objects.all() serializer_class = LogSerializer pagination_class = StandardPagination permission_classes = (IsAuthenticated,) filter_backends = (DjangoFilterBackend, OrderingFilter) - ordering_fields = ("time",) + filter_class = LogFilterSet + ordering_fields = ("created",) class SearchView(APIView): @@ -161,16 +167,45 @@ class SearchView(APIView): ix = index.open_index() + def add_infos_to_hit(self, r): + doc = Document.objects.get(id=r['id']) + return {'id': r['id'], + 'highlights': r.highlights("content", text=doc.content), + 'score': r.score, + 'rank': r.rank, + 'document': DocumentSerializer(doc).data, + 'title': r['title'] + } + def get(self, request, format=None): if 'query' in request.query_params: query = request.query_params['query'] - query_results = index.query_index(self.ix, query) - for r in query_results: - r['document'] = DocumentSerializer(Document.objects.get(id=r['id'])).data + try: + page = int(request.query_params.get('page', 1)) + except (ValueError, TypeError): + page = 1 + + with self.ix.searcher() as searcher: + query_parser = QueryParser("content", self.ix.schema, + termclass=terms.FuzzyTerm).parse(query) + result_page = searcher.search_page(query_parser, page) + result_page.results.fragmenter = highlight.ContextFragmenter( + surround=50) + result_page.results.fragmenter = highlight.PinpointFragmenter() + result_page.results.formatter = index.JsonFormatter() + + return Response( + {'count': len(result_page), + 'page': result_page.pagenum, + 'page_count': result_page.pagecount, + 'results': list(map(self.add_infos_to_hit, result_page))}) - return Response(query_results) else: - return Response([]) + return Response({ + 'count': 0, + 'page': 0, + 'page_count': 0, + 'results': []}) class SearchAutoCompleteView(APIView): @@ -194,3 +229,14 @@ class SearchAutoCompleteView(APIView): return Response(index.autocomplete(self.ix, term, limit)) else: return Response([]) + + +class StatisticsView(APIView): + + permission_classes = (IsAuthenticated,) + + def get(self, request, format=None): + return Response({ + 'documents_total': Document.objects.all().count(), + 'documents_inbox': Document.objects.filter(tags__is_inbox_tag=True).distinct().count() + }) diff --git a/src/paperless/mixins.py b/src/paperless/mixins.py deleted file mode 100644 index f4f1fcdec..000000000 --- a/src/paperless/mixins.py +++ /dev/null @@ -1,46 +0,0 @@ -from django.contrib.auth.mixins import AccessMixin -from django.contrib.auth import authenticate, login -import base64 - - -class SessionOrBasicAuthMixin(AccessMixin): - """ - Session or Basic Authentication mixin for Django. - It determines if the requester is already logged in or if they have - provided proper http-authorization and returning the view if all goes - well, otherwise responding with a 401. - - Base for mixin found here: https://djangosnippets.org/snippets/3073/ - """ - - def dispatch(self, request, *args, **kwargs): - - # check if user is authenticated via the session - if request.user.is_authenticated: - - # Already logged in, just return the view. - return super(SessionOrBasicAuthMixin, self).dispatch( - request, *args, **kwargs - ) - - # apparently not authenticated via session, maybe via HTTP Basic? - if 'HTTP_AUTHORIZATION' in request.META: - auth = request.META['HTTP_AUTHORIZATION'].split() - if len(auth) == 2: - # NOTE: Support for only basic authentication - if auth[0].lower() == "basic": - authString = base64.b64decode(auth[1]).decode('utf-8') - uname, passwd = authString.split(':') - user = authenticate(username=uname, password=passwd) - if user is not None: - if user.is_active: - login(request, user) - request.user = user - return super( - SessionOrBasicAuthMixin, self - ).dispatch( - request, *args, **kwargs - ) - - # nope, really not authenticated - return self.handle_no_permission() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 9c5f2bd0f..e6aa86217 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -1,22 +1,10 @@ -""" -Django settings for paperless project. - -Generated by 'django-admin startproject' using Django 1.9. - -For more information on this file, see -https://docs.djangoproject.com/en/1.10/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.10/ref/settings/ -""" - import json +import multiprocessing import os import re from dotenv import load_dotenv - # Tap paperless.conf if it's available if os.path.exists("../paperless.conf"): load_dotenv("../paperless.conf") @@ -33,45 +21,30 @@ def __get_boolean(key, default="NO"): """ return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true")) +############################################################################### +# Directories # +############################################################################### -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) +STATIC_ROOT = os.getenv("PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static")) MEDIA_ROOT = os.getenv('PAPERLESS_MEDIA_ROOT', os.path.join(BASE_DIR, "..", "media")) - -INDEX_DIR = os.path.join(DATA_DIR, "index") ORIGINALS_DIR = os.path.join(MEDIA_ROOT, "documents", "originals") THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") + +DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) +INDEX_DIR = os.path.join(DATA_DIR, "index") MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ +CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume")) -# The secret key has a default that should be fine so long as you're hosting -# Paperless on a closed network. However, if you're putting this anywhere -# public, you should change the key to something unique and verbose. -SECRET_KEY = os.getenv( - "PAPERLESS_SECRET_KEY", - "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee" -) +# This will be created if it doesn't exist +SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless") - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") - -LOGIN_URL = "admin:login" - -_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS") -if _allowed_hosts: - ALLOWED_HOSTS = _allowed_hosts.split(",") -else: - ALLOWED_HOSTS = ["*"] - -FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME") - -# Application definition +############################################################################### +# Application Definition # +############################################################################### INSTALLED_APPS = [ "whitenoise.runserver_nostatic", @@ -103,9 +76,6 @@ REST_FRAMEWORK = { 'rest_framework.authentication.BasicAuthentication', 'rest_framework.authentication.TokenAuthentication', 'paperless.auth.QueryTokenAuthentication' - ], - 'DEFAULT_PERMISSION_CLASSES': [ - 'rest_framework.permissions.IsAuthenticated', ] } @@ -121,18 +91,17 @@ MIDDLEWARE = [ 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] -X_FRAME_OPTIONS = 'SAMEORIGIN' - -# We allow CORS from localhost:8080 -CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080,http://localhost:4200").split(",")) - -# If auth is disabled, we just use our "bypass" authentication middleware -if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")): - _index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware") - MIDDLEWARE[_index] = "paperless.middleware.Middleware" - ROOT_URLCONF = 'paperless.urls' +LOGIN_URL = "admin:login" + +FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME") + +WSGI_APPLICATION = 'paperless.wsgi.application' + +STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/") + +# what is this used for? TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', @@ -149,38 +118,40 @@ TEMPLATES = [ }, ] -WSGI_APPLICATION = 'paperless.wsgi.application' +############################################################################### +# Security # +############################################################################### +# NEVER RUN WITH DEBUG IN PRODUCTION. +DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO") -# Database -# https://docs.djangoproject.com/en/1.10/ref/settings/#databases +X_FRAME_OPTIONS = 'SAMEORIGIN' -DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": os.path.join( - DATA_DIR, - "db.sqlite3" - ) - } -} +# We allow CORS from localhost:8080 +CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8080,https://localhost:8080").split(",")) -if os.getenv("PAPERLESS_DBENGINE"): - DATABASES["default"] = { - "ENGINE": os.getenv("PAPERLESS_DBENGINE"), - "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), - "USER": os.getenv("PAPERLESS_DBUSER"), - } - if os.getenv("PAPERLESS_DBPASS"): - DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS") - if os.getenv("PAPERLESS_DBHOST"): - DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST") - if os.getenv("PAPERLESS_DBPORT"): - DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") +if DEBUG: + # Allow access from the angular development server during debugging + CORS_ORIGIN_WHITELIST += ('http://localhost:4200',) +# If auth is disabled, we just use our "bypass" authentication middleware +if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")): + _index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware") + MIDDLEWARE[_index] = "paperless.middleware.Middleware" -# Password validation -# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators +# The secret key has a default that should be fine so long as you're hosting +# Paperless on a closed network. However, if you're putting this anywhere +# public, you should change the key to something unique and verbose. +SECRET_KEY = os.getenv( + "PAPERLESS_SECRET_KEY", + "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee" +) + +_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS") +if _allowed_hosts: + ALLOWED_HOSTS = _allowed_hosts.split(",") +else: + ALLOWED_HOSTS = ["*"] AUTH_PASSWORD_VALIDATORS = [ { @@ -197,9 +168,45 @@ AUTH_PASSWORD_VALIDATORS = [ }, ] +# Disable Django's artificial limit on the number of form fields to submit at +# once. This is a protection against overloading the server, but since this is +# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne +# of log entries outweight the benefits of such a safeguard. -# Internationalization -# https://docs.djangoproject.com/en/1.10/topics/i18n/ +DATA_UPLOAD_MAX_NUMBER_FIELDS = None + +############################################################################### +# Database # +############################################################################### + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": os.path.join( + DATA_DIR, + "db.sqlite3" + ) + } +} + +# Always have sqlite available as a second option for management commands +# This is important when migrating to/from sqlite +DATABASES['sqlite'] = DATABASES['default'].copy() + +if os.getenv("PAPERLESS_DBHOST"): + DATABASES["default"] = { + "ENGINE": "django.db.backends.postgresql_psycopg2", + "HOST": os.getenv("PAPERLESS_DBHOST"), + "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"), + "USER": os.getenv("PAPERLESS_DBUSER", "paperless"), + "PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"), + } + if os.getenv("PAPERLESS_DBPORT"): + DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") + +############################################################################### +# Internationalization # +############################################################################### LANGUAGE_CODE = 'en-us' @@ -211,64 +218,42 @@ USE_L10N = True USE_TZ = True - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.10/howto/static-files/ - -STATIC_ROOT = os.getenv( - "PAPERLESS_STATICDIR", os.path.join(BASE_DIR, "..", "static")) - -STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/") - - -# Other - -# Disable Django's artificial limit on the number of form fields to submit at -# once. This is a protection against overloading the server, but since this is -# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne -# of log entries outweight the benefits of such a safeguard. - -DATA_UPLOAD_MAX_NUMBER_FIELDS = None - - -# Paperless-specific stuff -# You shouldn't have to edit any of these values. Rather, you can set these -# values in /etc/paperless.conf instead. -# ---------------------------------------------------------------------------- - -# Logging +############################################################################### +# Logging # +############################################################################### LOGGING = { "version": 1, "disable_existing_loggers": False, "handlers": { - "consumer": { - "class": "documents.loggers.PaperlessLogger", + "dbhandler": { + "class": "documents.loggers.PaperlessHandler", + }, + "streamhandler": { + "class": "logging.StreamHandler" } }, "loggers": { "documents": { - "handlers": ["consumer"], - "level": os.getenv("PAPERLESS_CONSUMER_LOG_LEVEL", "INFO"), + "handlers": ["dbhandler", "streamhandler"], + "level": "DEBUG" }, }, } +############################################################################### +# Paperless Specific Settings # +############################################################################### # The default language that tesseract will attempt to use when parsing # documents. It should be a 3-letter language code consistent with ISO 639. OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") # The amount of threads to use for OCR -OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS") +OCR_THREADS = int(os.getenv("PAPERLESS_OCR_THREADS", multiprocessing.cpu_count())) # OCR all documents? -OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS") - -# If this is true, any failed attempts to OCR a PDF will result in the PDF -# being indexed anyway, with whatever we could get. If it's False, the file -# will simply be left in the CONSUMPTION_DIR. -FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR") +OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS", "false") # GNUPG needs a home directory for some reason GNUPG_HOME = os.getenv("HOME", "/tmp") @@ -277,30 +262,12 @@ GNUPG_HOME = os.getenv("HOME", "/tmp") CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY", "convert") CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR") CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT") -CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY") +CONVERT_DENSITY = int(os.getenv("PAPERLESS_CONVERT_DENSITY", 300)) -# Ghostscript GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs") - -# OptiPNG OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng") - -# Unpaper UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper") -# This will be created if it doesn't exist -SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless") - -# This is where Paperless will look for PDFs to index -CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR") - - -# (This setting is ignored on Linux where inotify is used instead of a -# polling loop.) -# The number of seconds that Paperless will wait between checking -# CONSUMPTION_DIR. If you tend to write documents to this directory very -# slowly, you may want to use a higher value than the default. -CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10)) # Pre-2.x versions of Paperless stored your documents locally with GPG # encryption, but that is no longer the default. This behaviour is still diff --git a/src/paperless/urls.py b/src/paperless/urls.py index b78cdc1ff..43ba5eb49 100755 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -1,6 +1,7 @@ from django.conf.urls import include, url from django.contrib import admin from django.urls import path +from django.views.decorators.csrf import csrf_exempt from django.views.generic import RedirectView from rest_framework.authtoken import views from rest_framework.routers import DefaultRouter @@ -14,7 +15,8 @@ from documents.views import ( DocumentTypeViewSet, SearchView, IndexView, - SearchAutoCompleteView + SearchAutoCompleteView, + StatisticsView ) api_router = DefaultRouter() @@ -31,6 +33,7 @@ urlpatterns = [ url(r"^api/auth/",include(('rest_framework.urls', 'rest_framework'), namespace="rest_framework")), url(r"^api/search/autocomplete/", SearchAutoCompleteView.as_view(), name="autocomplete"), url(r"^api/search/", SearchView.as_view(), name="search"), + url(r"^api/statistics/", StatisticsView.as_view(), name="statistics"), url(r"^api/token/", views.obtain_auth_token), url(r"^api/", include((api_router.urls, 'drf'), namespace="drf")), # Favicon @@ -39,6 +42,21 @@ urlpatterns = [ # The Django admin url(r"admin/", admin.site.urls), + # These redirects are here to support clients that use the old FetchView. + url( + r"^fetch/doc/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/download/'), + ), + url( + r"^fetch/thumb/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/thumb/'), + ), + url( + r"^fetch/preview/(?P\d+)$", + RedirectView.as_view(url='/api/documents/%(pk)s/preview/'), + ), + url(r"^push$", csrf_exempt(RedirectView.as_view(url='/api/documents/post_document/'))), + # Frontend assets TODO: this is pretty bad. path('assets/', RedirectView.as_view(url='/static/assets/%(path)s')), diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index eeca540b1..befc9bcd7 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -8,12 +8,11 @@ import langdetect import pyocr from django.conf import settings from PIL import Image -from pyocr.libtesseract.tesseract_raw import \ - TesseractError as OtherTesseractError -from pyocr.tesseract import TesseractError +from pyocr import PyocrException import pdftotext -from documents.parsers import DocumentParser, ParseError +from documents.parsers import DocumentParser, ParseError, run_unpaper, \ + run_convert from .languages import ISO639 @@ -28,16 +27,8 @@ class RasterisedDocumentParser(DocumentParser): image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.) """ - CONVERT = settings.CONVERT_BINARY - GHOSTSCRIPT = settings.GS_BINARY - DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300 - THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None - UNPAPER = settings.UNPAPER_BINARY - DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE - OCR_ALWAYS = settings.OCR_ALWAYS - - def __init__(self, path): - super().__init__(path) + def __init__(self, path, logging_group): + super().__init__(path, logging_group) self._text = None def get_thumbnail(self): @@ -49,25 +40,20 @@ class RasterisedDocumentParser(DocumentParser): # Run convert to get a decent thumbnail try: - run_convert( - self.CONVERT, - "-density", "300", - "-scale", "500x5000>", - "-alpha", "remove", - "-strip", "-trim", - "{}[0]".format(self.document_path), - out_path - ) + run_convert(density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=True, + input="{}[0]".format(self.document_path), + output=out_path, + logging_group=self.logging_group) except ParseError: # if convert fails, fall back to extracting # the first PDF page as a PNG using Ghostscript - self.log( - "warning", - "Thumbnail generation with ImageMagick failed, " - "falling back to Ghostscript." - ) + self.log('warning', 'Thumbnail generation with ImageMagick failed, falling back to ghostscript. Check your /etc/ImageMagick-x/policy.xml!') gs_out_path = os.path.join(self.tempdir, "gs_out.png") - cmd = [self.GHOSTSCRIPT, + cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, @@ -75,15 +61,14 @@ class RasterisedDocumentParser(DocumentParser): if not subprocess.Popen(cmd).wait() == 0: raise ParseError("Thumbnail (gs) failed at {}".format(cmd)) # then run convert on the output from gs - run_convert( - self.CONVERT, - "-density", "300", - "-scale", "500x5000>", - "-alpha", "remove", - "-strip", "-trim", - gs_out_path, - out_path - ) + run_convert(density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=True, + input=gs_out_path, + output=out_path, + logging_group=self.logging_group) return out_path @@ -101,16 +86,43 @@ class RasterisedDocumentParser(DocumentParser): if self._text is not None: return self._text - if not self.OCR_ALWAYS and self._is_ocred(): + if not settings.OCR_ALWAYS and self._is_ocred(): self.log("info", "Skipping OCR, using Text from PDF") self._text = get_text_from_pdf(self.document_path) return self._text images = self._get_greyscale() + if not images: + raise ParseError("Empty document, nothing to do.") + try: - self._text = self._get_ocr(images) + + sample_page_index = int(len(images) / 2) + self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index+1, len(images))) + sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0] + guessed_language = self._guess_language(sample_page_text) + + if not guessed_language or guessed_language not in ISO639: + self.log("warning", "Language detection failed.") + ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) + + elif ISO639[guessed_language] == settings.OCR_LANGUAGE: + self.log("info", "Detected language: {} (default language)".format(guessed_language)) + ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) + + elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages(): + self.log("warning", "Detected language {} is not available on this system.".format(guessed_language)) + ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text) + + else: + self.log("info", "Detected language: {}".format(guessed_language)) + ocr_pages = self._ocr(images, ISO639[guessed_language]) + + self.log("info", "OCR completed.") + self._text = strip_excess_whitespace(" ".join(ocr_pages)) return self._text + except OCRError as e: raise ParseError(e) @@ -119,15 +131,17 @@ class RasterisedDocumentParser(DocumentParser): Greyscale images are easier for Tesseract to OCR """ + self.log("info", "Converting document {} into greyscale images...".format(self.document_path)) + # Convert PDF to multiple PNMs pnm = os.path.join(self.tempdir, "convert-%04d.pnm") - run_convert( - self.CONVERT, - "-density", str(self.DENSITY), - "-depth", "8", - "-type", "grayscale", - self.document_path, pnm, - ) + + run_convert(density=settings.CONVERT_DENSITY, + depth="8", + type="grayscale", + input=self.document_path, + output=pnm, + logging_group=self.logging_group) # Get a list of converted images pnms = [] @@ -135,127 +149,46 @@ class RasterisedDocumentParser(DocumentParser): if f.endswith(".pnm"): pnms.append(os.path.join(self.tempdir, f)) - # Run unpaper in parallel on converted images - with Pool(processes=self.THREADS) as pool: - pool.map(run_unpaper, itertools.product([self.UNPAPER], pnms)) + self.log("info", "Running unpaper on {} pages...".format(len(pnms))) - # Return list of converted images, processed with unpaper - pnms = [] - for f in os.listdir(self.tempdir): - if f.endswith(".unpaper.pnm"): - pnms.append(os.path.join(self.tempdir, f)) + # Run unpaper in parallel on converted images + with Pool(processes=settings.OCR_THREADS) as pool: + pnms = pool.map(run_unpaper, pnms) return sorted(filter(lambda __: os.path.isfile(__), pnms)) def _guess_language(self, text): try: guess = langdetect.detect(text) - self.log("debug", "Language detected: {}".format(guess)) return guess except Exception as e: - self.log("warning", "Language detection error: {}".format(e)) - - def _get_ocr(self, imgs): - """ - Attempts to do the best job possible OCR'ing the document based on - simple language detection trial & error. - """ - - if not imgs: - raise OCRError("No images found") - - self.log("info", "OCRing the document") - - # Since the division gets rounded down by int, this calculation works - # for every edge-case, i.e. 1 - middle = int(len(imgs) / 2) - raw_text = self._ocr([imgs[middle]], self.DEFAULT_OCR_LANGUAGE) - - guessed_language = self._guess_language(raw_text) - - if not guessed_language or guessed_language not in ISO639: - self.log("warning", "Language detection failed!") - if settings.FORGIVING_OCR: - self.log( - "warning", - "As FORGIVING_OCR is enabled, we're going to make the " - "best with what we have." - ) - raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) - return raw_text - error_msg = ("Language detection failed. Set " - "PAPERLESS_FORGIVING_OCR in config file to continue " - "anyway.") - raise OCRError(error_msg) - - if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: - raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) - return raw_text - - try: - return self._ocr(imgs, ISO639[guessed_language]) - except pyocr.pyocr.tesseract.TesseractError: - if settings.FORGIVING_OCR: - self.log( - "warning", - "OCR for {} failed, but we're going to stick with what " - "we've got since FORGIVING_OCR is enabled.".format( - guessed_language - ) - ) - raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) - return raw_text - raise OCRError( - "The guessed language ({}) is not available in this instance " - "of Tesseract.".format(guessed_language) - ) + self.log('debug', "Language detection failed with: {}".format(e)) + return None def _ocr(self, imgs, lang): - """ - Performs a single OCR attempt. - """ - - if not imgs: - return "" - - self.log("info", "Parsing for {}".format(lang)) - - with Pool(processes=self.THREADS) as pool: + self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang)) + with Pool(processes=settings.OCR_THREADS) as pool: r = pool.map(image_to_string, itertools.product(imgs, [lang])) - r = " ".join(r) + return r - # Strip out excess white space to allow matching to go smoother - return strip_excess_whitespace(r) - - def _assemble_ocr_sections(self, imgs, middle, text): + def _complete_ocr_default_language(self, images, sample_page_index, sample_page): """ Given a `middle` value and the text that middle page represents, we OCR the remainder of the document and return the whole thing. """ - text = self._ocr(imgs[:middle], self.DEFAULT_OCR_LANGUAGE) + text - text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE) - return text + # text = self._ocr(imgs[:middle], settings.OCR_LANGUAGE) + text + # text += self._ocr(imgs[middle + 1:], settings.OCR_LANGUAGE) + images_copy = list(images) + del images_copy[sample_page_index] + if images_copy: + self.log('info', 'Continuing ocr with default language.') + ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE) + ocr_pages.insert(sample_page_index, sample_page) + return ocr_pages + else: + return [sample_page] -def run_convert(*args): - - environment = os.environ.copy() - if settings.CONVERT_MEMORY_LIMIT: - environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT - if settings.CONVERT_TMPDIR: - environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR - - if not subprocess.Popen(args, env=environment).wait() == 0: - raise ParseError("Convert failed at {}".format(args)) - - -def run_unpaper(args): - unpaper, pnm = args - command_args = (unpaper, "--overwrite", pnm, - pnm.replace(".pnm", ".unpaper.pnm")) - if not subprocess.Popen(command_args).wait() == 0: - raise ParseError("Unpaper failed at {}".format(command_args)) - def strip_excess_whitespace(text): collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) @@ -269,14 +202,18 @@ def strip_excess_whitespace(text): def image_to_string(args): img, lang = args ocr = pyocr.get_available_tools()[0] - with Image.open(os.path.join(RasterisedDocumentParser.SCRATCH, img)) as f: + with Image.open(img) as f: if ocr.can_detect_orientation(): try: orientation = ocr.detect_orientation(f, lang=lang) f = f.rotate(orientation["angle"], expand=1) - except (TesseractError, OtherTesseractError, AttributeError): + except Exception: + # Rotation not possible, ignore pass - return ocr.image_to_string(f, lang=lang) + try: + return ocr.image_to_string(f, lang=lang) + except PyocrException as e: + raise OCRError(e) def get_text_from_pdf(pdf_file): diff --git a/src/paperless_tesseract/tests/test_date.py b/src/paperless_tesseract/tests/test_date.py index 9e9d48b90..51317362f 100644 --- a/src/paperless_tesseract/tests/test_date.py +++ b/src/paperless_tesseract/tests/test_date.py @@ -5,7 +5,7 @@ from unittest import mock from uuid import uuid4 from dateutil import tz -from django.test import TestCase +from django.test import TestCase, override_settings from ..parsers import RasterisedDocumentParser from django.conf import settings @@ -16,39 +16,37 @@ class TestDate(TestCase): SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples") SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8]) - MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501 - def setUp(self): os.makedirs(self.SCRATCH, exist_ok=True) def tearDown(self): shutil.rmtree(self.SCRATCH) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_1(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 130218 lorem ipsum" self.assertEqual(document.get_date(), None) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_2(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 2018 lorem ipsum" self.assertEqual(document.get_date(), None) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_3(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 20180213 lorem ipsum" self.assertEqual(document.get_date(), None) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_4(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = "lorem ipsum 13.02.2018 lorem ipsum" date = document.get_date() self.assertEqual( @@ -59,10 +57,10 @@ class TestDate(TestCase): ) ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_5(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " "ipsum" @@ -76,10 +74,10 @@ class TestDate(TestCase): ) ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_6(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "Wohnort\n" @@ -93,10 +91,10 @@ class TestDate(TestCase): ) self.assertEqual(document.get_date(), None) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_7(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "März 2019\n" @@ -111,10 +109,10 @@ class TestDate(TestCase): ) ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_8(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "Wohnort\n" @@ -135,10 +133,10 @@ class TestDate(TestCase): ) ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_date_format_9(self): input_file = os.path.join(self.SAMPLE_FILES, "") - document = RasterisedDocumentParser(input_file) + document = RasterisedDocumentParser(input_file, None) document._text = ( "lorem ipsum\n" "27. Nullmonth 2020\n" @@ -157,9 +155,9 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", return_value="01-07-0590 00:00:00" ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_crazy_date_past(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -167,9 +165,9 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", return_value="01-07-2350 00:00:00" ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_crazy_date_future(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -177,9 +175,9 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", return_value="20 408000l 2475" ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(SCRATCH_DIR=SCRATCH) def test_crazy_date_with_spaces(self, *args): - document = RasterisedDocumentParser("/dev/null") + document = RasterisedDocumentParser("/dev/null", None) document.get_text() self.assertIsNone(document.get_date()) @@ -187,14 +185,9 @@ class TestDate(TestCase): "paperless_tesseract.parsers.RasterisedDocumentParser.get_text", return_value="No date in here" ) - @mock.patch( - "paperless_tesseract.parsers.RasterisedDocumentParser." - "FILENAME_DATE_ORDER", - new_callable=mock.PropertyMock, - return_value="YMD" - ) - @mock.patch(MOCK_SCRATCH, SCRATCH) + @override_settings(FILENAME_DATE_ORDER="YMD") + @override_settings(SCRATCH_DIR=SCRATCH) def test_filename_date_parse_invalid(self, *args): - document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf") + document = RasterisedDocumentParser("/tmp/20 408000l 2475 - test.pdf", None) document.get_text() self.assertIsNone(document.get_date()) diff --git a/src/paperless_tesseract/tests/test_ocr.py b/src/paperless_tesseract/tests/test_ocr.py index 68ab64707..e0d5726ba 100644 --- a/src/paperless_tesseract/tests/test_ocr.py +++ b/src/paperless_tesseract/tests/test_ocr.py @@ -62,10 +62,6 @@ class TestOCR(TestCase): ) @skipIf(not TESSERACT_INSTALLED, "Tesseract not installed. Skipping") - @mock.patch( - "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH", - SAMPLE_FILES - ) @mock.patch("paperless_tesseract.parsers.pyocr", FakePyOcr) def test_image_to_string_with_text_free_page(self): """ @@ -77,4 +73,4 @@ class TestOCR(TestCase): text-free pages are now handled correctly so long as we work around this weird exception. """ - image_to_string(["no-text.png", "en"]) + image_to_string([os.path.join(self.SAMPLE_FILES, "no-text.png"), "en"]) diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py index 3ccb78404..0db1e230b 100644 --- a/src/paperless_text/parsers.py +++ b/src/paperless_text/parsers.py @@ -11,14 +11,8 @@ class TextDocumentParser(DocumentParser): This parser directly parses a text document (.txt, .md, or .csv) """ - CONVERT = settings.CONVERT_BINARY - THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None - UNPAPER = settings.UNPAPER_BINARY - DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE - OCR_ALWAYS = settings.OCR_ALWAYS - - def __init__(self, path): - super().__init__(path) + def __init__(self, path, logging_group): + super().__init__(path, logging_group) self._text = None def get_thumbnail(self): @@ -44,7 +38,7 @@ class TextDocumentParser(DocumentParser): r = str(round(psize[0] / 10)) rounded = ",".join([r, r]) run_command( - self.CONVERT, + settings.CONVERT_BINARY, "-size ", picsize, ' xc:none -draw ', '"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501 @@ -59,7 +53,7 @@ class TextDocumentParser(DocumentParser): def create_txlayer(): run_command( - self.CONVERT, + settings.CONVERT_BINARY, "-background none", "-fill", text_color, @@ -73,7 +67,7 @@ class TextDocumentParser(DocumentParser): create_txlayer() create_bg() run_command( - self.CONVERT, + settings.CONVERT_BINARY, temp_bg, temp_txlayer, "-background None -layers merge ",