mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Add aws textract, remove chatgpt
This commit is contained in:
parent
6e7e40e7a2
commit
a0c6d25d9a
2
Pipfile
2
Pipfile
@ -22,6 +22,7 @@ djangorestframework = "~=3.14"
|
||||
djangorestframework-guardian = "*"
|
||||
drf-writable-nested = "*"
|
||||
bleach = "*"
|
||||
boto3 = "*"
|
||||
celery = {extras = ["redis"], version = "*"}
|
||||
channels = "~=4.0"
|
||||
channels-redis = "*"
|
||||
@ -37,7 +38,6 @@ langdetect = "*"
|
||||
mysqlclient = "*"
|
||||
nltk = "*"
|
||||
ocrmypdf = "~=15.4"
|
||||
openai = "*"
|
||||
pathvalidate = "*"
|
||||
pdf2image = "*"
|
||||
psycopg2 = "*"
|
||||
|
245
Pipfile.lock
generated
245
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "56dcb96a9bc99b9902bfd3891d3b04f83715cfb9ae54f9d193442c90613e0ef9"
|
||||
"sha256": "afea58891f3b1e0860daa8bc56b33b56fbe7c95c6b30d3fdc8cf1a25560e2d1a"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@ -87,6 +87,23 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==6.1.0"
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:66303b5f26d92afb72656ff490b22ea72dfff8bf1a29e4a0c5d5f11ec56245dd",
|
||||
"sha256:898ad2123b18cae8efd85adc56ac2d1925be54592aebc237020d4f16e9a9e7a9"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.34.52"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:05567d8aba344826060481ea309555432c96f0febe22bee7cf5a3b6d3a03cec8",
|
||||
"sha256:187da93aec3f2e87d8a31eced16fa2cb9c71fe2d69b0a797f9f7a9220f5bf7ae"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.34.52"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
"sha256:03d20af184290887bdea3f0f78c4f737d126c74dc2f3ccadf07e54ceca3bf208",
|
||||
@ -1106,6 +1123,14 @@
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"jmespath": {
|
||||
"hashes": [
|
||||
"sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980",
|
||||
"sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.0.1"
|
||||
},
|
||||
"joblib": {
|
||||
"hashes": [
|
||||
"sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1",
|
||||
@ -1132,101 +1157,87 @@
|
||||
},
|
||||
"lxml": {
|
||||
"hashes": [
|
||||
"sha256:00e91573183ad273e242db5585b52670eddf92bacad095ce25c1e682da14ed91",
|
||||
"sha256:01bf1df1db327e748dcb152d17389cf6d0a8c5d533ef9bab781e9d5037619229",
|
||||
"sha256:056a17eaaf3da87a05523472ae84246f87ac2f29a53306466c22e60282e54ff8",
|
||||
"sha256:0a08c89b23117049ba171bf51d2f9c5f3abf507d65d016d6e0fa2f37e18c0fc5",
|
||||
"sha256:1343df4e2e6e51182aad12162b23b0a4b3fd77f17527a78c53f0f23573663545",
|
||||
"sha256:1449f9451cd53e0fd0a7ec2ff5ede4686add13ac7a7bfa6988ff6d75cff3ebe2",
|
||||
"sha256:16b9ec51cc2feab009e800f2c6327338d6ee4e752c76e95a35c4465e80390ccd",
|
||||
"sha256:1f10f250430a4caf84115b1e0f23f3615566ca2369d1962f82bef40dd99cd81a",
|
||||
"sha256:231142459d32779b209aa4b4d460b175cadd604fed856f25c1571a9d78114771",
|
||||
"sha256:232fd30903d3123be4c435fb5159938c6225ee8607b635a4d3fca847003134ba",
|
||||
"sha256:23d891e5bdc12e2e506e7d225d6aa929e0a0368c9916c1fddefab88166e98b20",
|
||||
"sha256:266f655d1baff9c47b52f529b5f6bec33f66042f65f7c56adde3fcf2ed62ae8b",
|
||||
"sha256:273473d34462ae6e97c0f4e517bd1bf9588aa67a1d47d93f760a1282640e24ac",
|
||||
"sha256:2bd9ac6e44f2db368ef8986f3989a4cad3de4cd55dbdda536e253000c801bcc7",
|
||||
"sha256:33714fcf5af4ff7e70a49731a7cc8fd9ce910b9ac194f66eaa18c3cc0a4c02be",
|
||||
"sha256:359a8b09d712df27849e0bcb62c6a3404e780b274b0b7e4c39a88826d1926c28",
|
||||
"sha256:365005e8b0718ea6d64b374423e870648ab47c3a905356ab6e5a5ff03962b9a9",
|
||||
"sha256:389d2b2e543b27962990ab529ac6720c3dded588cc6d0f6557eec153305a3622",
|
||||
"sha256:3b505f2bbff50d261176e67be24e8909e54b5d9d08b12d4946344066d66b3e43",
|
||||
"sha256:3d74d4a3c4b8f7a1f676cedf8e84bcc57705a6d7925e6daef7a1e54ae543a197",
|
||||
"sha256:3f3f00a9061605725df1816f5713d10cd94636347ed651abdbc75828df302b20",
|
||||
"sha256:43498ea734ccdfb92e1886dfedaebeb81178a241d39a79d5351ba2b671bff2b2",
|
||||
"sha256:4855161013dfb2b762e02b3f4d4a21cc7c6aec13c69e3bffbf5022b3e708dd97",
|
||||
"sha256:4d973729ce04784906a19108054e1fd476bc85279a403ea1a72fdb051c76fa48",
|
||||
"sha256:4ece9cca4cd1c8ba889bfa67eae7f21d0d1a2e715b4d5045395113361e8c533d",
|
||||
"sha256:506becdf2ecaebaf7f7995f776394fcc8bd8a78022772de66677c84fb02dd33d",
|
||||
"sha256:520486f27f1d4ce9654154b4494cf9307b495527f3a2908ad4cb48e4f7ed7ef7",
|
||||
"sha256:5557461f83bb7cc718bc9ee1f7156d50e31747e5b38d79cf40f79ab1447afd2d",
|
||||
"sha256:562778586949be7e0d7435fcb24aca4810913771f845d99145a6cee64d5b67ca",
|
||||
"sha256:59bb5979f9941c61e907ee571732219fa4774d5a18f3fa5ff2df963f5dfaa6bc",
|
||||
"sha256:606d445feeb0856c2b424405236a01c71af7c97e5fe42fbc778634faef2b47e4",
|
||||
"sha256:6197c3f3c0b960ad033b9b7d611db11285bb461fc6b802c1dd50d04ad715c225",
|
||||
"sha256:647459b23594f370c1c01768edaa0ba0959afc39caeeb793b43158bb9bb6a663",
|
||||
"sha256:647bfe88b1997d7ae8d45dabc7c868d8cb0c8412a6e730a7651050b8c7289cf2",
|
||||
"sha256:6bee9c2e501d835f91460b2c904bc359f8433e96799f5c2ff20feebd9bb1e590",
|
||||
"sha256:6dbdacf5752fbd78ccdb434698230c4f0f95df7dd956d5f205b5ed6911a1367c",
|
||||
"sha256:701847a7aaefef121c5c0d855b2affa5f9bd45196ef00266724a80e439220e46",
|
||||
"sha256:786d6b57026e7e04d184313c1359ac3d68002c33e4b1042ca58c362f1d09ff58",
|
||||
"sha256:7b378847a09d6bd46047f5f3599cdc64fcb4cc5a5a2dd0a2af610361fbe77b16",
|
||||
"sha256:7d1d6c9e74c70ddf524e3c09d9dc0522aba9370708c2cb58680ea40174800013",
|
||||
"sha256:857d6565f9aa3464764c2cb6a2e3c2e75e1970e877c188f4aeae45954a314e0c",
|
||||
"sha256:8671622256a0859f5089cbe0ce4693c2af407bc053dcc99aadff7f5310b4aa02",
|
||||
"sha256:88f7c383071981c74ec1998ba9b437659e4fd02a3c4a4d3efc16774eb108d0ec",
|
||||
"sha256:8aecb5a7f6f7f8fe9cac0bcadd39efaca8bbf8d1bf242e9f175cbe4c925116c3",
|
||||
"sha256:91bbf398ac8bb7d65a5a52127407c05f75a18d7015a270fdd94bbcb04e65d573",
|
||||
"sha256:936e8880cc00f839aa4173f94466a8406a96ddce814651075f95837316369899",
|
||||
"sha256:953dd5481bd6252bd480d6ec431f61d7d87fdcbbb71b0d2bdcfc6ae00bb6fb10",
|
||||
"sha256:95ae6c5a196e2f239150aa4a479967351df7f44800c93e5a975ec726fef005e2",
|
||||
"sha256:9a2b5915c333e4364367140443b59f09feae42184459b913f0f41b9fed55794a",
|
||||
"sha256:9ae6c3363261021144121427b1552b29e7b59de9d6a75bf51e03bc072efb3c37",
|
||||
"sha256:9b556596c49fa1232b0fff4b0e69b9d4083a502e60e404b44341e2f8fb7187f5",
|
||||
"sha256:9c131447768ed7bc05a02553d939e7f0e807e533441901dd504e217b76307745",
|
||||
"sha256:9d9d5726474cbbef279fd709008f91a49c4f758bec9c062dfbba88eab00e3ff9",
|
||||
"sha256:a1bdcbebd4e13446a14de4dd1825f1e778e099f17f79718b4aeaf2403624b0f7",
|
||||
"sha256:a602ed9bd2c7d85bd58592c28e101bd9ff9c718fbde06545a70945ffd5d11868",
|
||||
"sha256:a8edae5253efa75c2fc79a90068fe540b197d1c7ab5803b800fccfe240eed33c",
|
||||
"sha256:a905affe76f1802edcac554e3ccf68188bea16546071d7583fb1b693f9cf756b",
|
||||
"sha256:a9e7c6d89c77bb2770c9491d988f26a4b161d05c8ca58f63fb1f1b6b9a74be45",
|
||||
"sha256:aa9b5abd07f71b081a33115d9758ef6077924082055005808f68feccb27616bd",
|
||||
"sha256:aaa5c173a26960fe67daa69aa93d6d6a1cd714a6eb13802d4e4bd1d24a530644",
|
||||
"sha256:ac7674d1638df129d9cb4503d20ffc3922bd463c865ef3cb412f2c926108e9a4",
|
||||
"sha256:b1541e50b78e15fa06a2670157a1962ef06591d4c998b998047fff5e3236880e",
|
||||
"sha256:b1980dbcaad634fe78e710c8587383e6e3f61dbe146bcbfd13a9c8ab2d7b1192",
|
||||
"sha256:bafa65e3acae612a7799ada439bd202403414ebe23f52e5b17f6ffc2eb98c2be",
|
||||
"sha256:bb5bd6212eb0edfd1e8f254585290ea1dadc3687dd8fd5e2fd9a87c31915cdab",
|
||||
"sha256:bbdd69e20fe2943b51e2841fc1e6a3c1de460d630f65bde12452d8c97209464d",
|
||||
"sha256:bc354b1393dce46026ab13075f77b30e40b61b1a53e852e99d3cc5dd1af4bc85",
|
||||
"sha256:bcee502c649fa6351b44bb014b98c09cb00982a475a1912a9881ca28ab4f9cd9",
|
||||
"sha256:bdd9abccd0927673cffe601d2c6cdad1c9321bf3437a2f507d6b037ef91ea307",
|
||||
"sha256:c42ae7e010d7d6bc51875d768110c10e8a59494855c3d4c348b068f5fb81fdcd",
|
||||
"sha256:c71b5b860c5215fdbaa56f715bc218e45a98477f816b46cfde4a84d25b13274e",
|
||||
"sha256:c7721a3ef41591341388bb2265395ce522aba52f969d33dacd822da8f018aff8",
|
||||
"sha256:ca8e44b5ba3edb682ea4e6185b49661fc22b230cf811b9c13963c9f982d1d964",
|
||||
"sha256:cb53669442895763e61df5c995f0e8361b61662f26c1b04ee82899c2789c8f69",
|
||||
"sha256:cc02c06e9e320869d7d1bd323df6dd4281e78ac2e7f8526835d3d48c69060683",
|
||||
"sha256:d3caa09e613ece43ac292fbed513a4bce170681a447d25ffcbc1b647d45a39c5",
|
||||
"sha256:d82411dbf4d3127b6cde7da0f9373e37ad3a43e89ef374965465928f01c2b979",
|
||||
"sha256:dbcb2dc07308453db428a95a4d03259bd8caea97d7f0776842299f2d00c72fc8",
|
||||
"sha256:dd4fda67f5faaef4f9ee5383435048ee3e11ad996901225ad7615bc92245bc8e",
|
||||
"sha256:ddd92e18b783aeb86ad2132d84a4b795fc5ec612e3545c1b687e7747e66e2b53",
|
||||
"sha256:de362ac8bc962408ad8fae28f3967ce1a262b5d63ab8cefb42662566737f1dc7",
|
||||
"sha256:e214025e23db238805a600f1f37bf9f9a15413c7bf5f9d6ae194f84980c78722",
|
||||
"sha256:e8f9f93a23634cfafbad6e46ad7d09e0f4a25a2400e4a64b1b7b7c0fbaa06d9d",
|
||||
"sha256:e96a1788f24d03e8d61679f9881a883ecdf9c445a38f9ae3f3f193ab6c591c66",
|
||||
"sha256:ec53a09aee61d45e7dbe7e91252ff0491b6b5fee3d85b2d45b173d8ab453efc1",
|
||||
"sha256:f10250bb190fb0742e3e1958dd5c100524c2cc5096c67c8da51233f7448dc137",
|
||||
"sha256:f1faee2a831fe249e1bae9cbc68d3cd8a30f7e37851deee4d7962b17c410dd56",
|
||||
"sha256:f610d980e3fccf4394ab3806de6065682982f3d27c12d4ce3ee46a8183d64a6a",
|
||||
"sha256:f6c35b2f87c004270fa2e703b872fcc984d714d430b305145c39d53074e1ffe0",
|
||||
"sha256:f836f39678cb47c9541f04d8ed4545719dc31ad850bf1832d6b4171e30d65d23",
|
||||
"sha256:f99768232f036b4776ce419d3244a04fe83784bce871b16d2c2e984c7fcea847",
|
||||
"sha256:fd814847901df6e8de13ce69b84c31fc9b3fb591224d6762d0b256d510cbf382",
|
||||
"sha256:fdb325b7fba1e2c40b9b1db407f85642e32404131c08480dd652110fc908561b"
|
||||
"sha256:13521a321a25c641b9ea127ef478b580b5ec82aa2e9fc076c86169d161798b01",
|
||||
"sha256:14deca1460b4b0f6b01f1ddc9557704e8b365f55c63070463f6c18619ebf964f",
|
||||
"sha256:16018f7099245157564d7148165132c70adb272fb5a17c048ba70d9cc542a1a1",
|
||||
"sha256:16dd953fb719f0ffc5bc067428fc9e88f599e15723a85618c45847c96f11f431",
|
||||
"sha256:19a1bc898ae9f06bccb7c3e1dfd73897ecbbd2c96afe9095a6026016e5ca97b8",
|
||||
"sha256:1ad17c20e3666c035db502c78b86e58ff6b5991906e55bdbef94977700c72623",
|
||||
"sha256:22b7ee4c35f374e2c20337a95502057964d7e35b996b1c667b5c65c567d2252a",
|
||||
"sha256:24ef5a4631c0b6cceaf2dbca21687e29725b7c4e171f33a8f8ce23c12558ded1",
|
||||
"sha256:25663d6e99659544ee8fe1b89b1a8c0aaa5e34b103fab124b17fa958c4a324a6",
|
||||
"sha256:262bc5f512a66b527d026518507e78c2f9c2bd9eb5c8aeeb9f0eb43fcb69dc67",
|
||||
"sha256:280f3edf15c2a967d923bcfb1f8f15337ad36f93525828b40a0f9d6c2ad24890",
|
||||
"sha256:2ad3a8ce9e8a767131061a22cd28fdffa3cd2dc193f399ff7b81777f3520e372",
|
||||
"sha256:2befa20a13f1a75c751f47e00929fb3433d67eb9923c2c0b364de449121f447c",
|
||||
"sha256:2f37c6d7106a9d6f0708d4e164b707037b7380fcd0b04c5bd9cae1fb46a856fb",
|
||||
"sha256:304128394c9c22b6569eba2a6d98392b56fbdfbad58f83ea702530be80d0f9df",
|
||||
"sha256:342e95bddec3a698ac24378d61996b3ee5ba9acfeb253986002ac53c9a5f6f84",
|
||||
"sha256:3aeca824b38ca78d9ee2ab82bd9883083d0492d9d17df065ba3b94e88e4d7ee6",
|
||||
"sha256:3d184e0d5c918cff04cdde9dbdf9600e960161d773666958c9d7b565ccc60c45",
|
||||
"sha256:3e3898ae2b58eeafedfe99e542a17859017d72d7f6a63de0f04f99c2cb125936",
|
||||
"sha256:3eea6ed6e6c918e468e693c41ef07f3c3acc310b70ddd9cc72d9ef84bc9564ca",
|
||||
"sha256:3f14a4fb1c1c402a22e6a341a24c1341b4a3def81b41cd354386dcb795f83897",
|
||||
"sha256:436a943c2900bb98123b06437cdd30580a61340fbdb7b28aaf345a459c19046a",
|
||||
"sha256:4946e7f59b7b6a9e27bef34422f645e9a368cb2be11bf1ef3cafc39a1f6ba68d",
|
||||
"sha256:49a9b4af45e8b925e1cd6f3b15bbba2c81e7dba6dce170c677c9cda547411e14",
|
||||
"sha256:4f8b0c78e7aac24979ef09b7f50da871c2de2def043d468c4b41f512d831e912",
|
||||
"sha256:52427a7eadc98f9e62cb1368a5079ae826f94f05755d2d567d93ee1bc3ceb354",
|
||||
"sha256:5e53d7e6a98b64fe54775d23a7c669763451340c3d44ad5e3a3b48a1efbdc96f",
|
||||
"sha256:5fcfbebdb0c5d8d18b84118842f31965d59ee3e66996ac842e21f957eb76138c",
|
||||
"sha256:601f4a75797d7a770daed8b42b97cd1bb1ba18bd51a9382077a6a247a12aa38d",
|
||||
"sha256:61c5a7edbd7c695e54fca029ceb351fc45cd8860119a0f83e48be44e1c464862",
|
||||
"sha256:6a2a2c724d97c1eb8cf966b16ca2915566a4904b9aad2ed9a09c748ffe14f969",
|
||||
"sha256:6d48fc57e7c1e3df57be5ae8614bab6d4e7b60f65c5457915c26892c41afc59e",
|
||||
"sha256:6f11b77ec0979f7e4dc5ae081325a2946f1fe424148d3945f943ceaede98adb8",
|
||||
"sha256:704f5572ff473a5f897745abebc6df40f22d4133c1e0a1f124e4f2bd3330ff7e",
|
||||
"sha256:725e171e0b99a66ec8605ac77fa12239dbe061482ac854d25720e2294652eeaa",
|
||||
"sha256:7cfced4a069003d8913408e10ca8ed092c49a7f6cefee9bb74b6b3e860683b45",
|
||||
"sha256:7ec465e6549ed97e9f1e5ed51c657c9ede767bc1c11552f7f4d022c4df4a977a",
|
||||
"sha256:82bddf0e72cb2af3cbba7cec1d2fd11fda0de6be8f4492223d4a268713ef2147",
|
||||
"sha256:82cd34f1081ae4ea2ede3d52f71b7be313756e99b4b5f829f89b12da552d3aa3",
|
||||
"sha256:843b9c835580d52828d8f69ea4302537337a21e6b4f1ec711a52241ba4a824f3",
|
||||
"sha256:877efb968c3d7eb2dad540b6cabf2f1d3c0fbf4b2d309a3c141f79c7e0061324",
|
||||
"sha256:8b9f19df998761babaa7f09e6bc169294eefafd6149aaa272081cbddc7ba4ca3",
|
||||
"sha256:8cf5877f7ed384dabfdcc37922c3191bf27e55b498fecece9fd5c2c7aaa34c33",
|
||||
"sha256:8d2900b7f5318bc7ad8631d3d40190b95ef2aa8cc59473b73b294e4a55e9f30f",
|
||||
"sha256:8d7b4beebb178e9183138f552238f7e6613162a42164233e2bda00cb3afac58f",
|
||||
"sha256:8f52fe6859b9db71ee609b0c0a70fea5f1e71c3462ecf144ca800d3f434f0764",
|
||||
"sha256:98f3f020a2b736566c707c8e034945c02aa94e124c24f77ca097c446f81b01f1",
|
||||
"sha256:9aa543980ab1fbf1720969af1d99095a548ea42e00361e727c58a40832439114",
|
||||
"sha256:9b99f564659cfa704a2dd82d0684207b1aadf7d02d33e54845f9fc78e06b7581",
|
||||
"sha256:9bcf86dfc8ff3e992fed847c077bd875d9e0ba2fa25d859c3a0f0f76f07f0c8d",
|
||||
"sha256:9bd0ae7cc2b85320abd5e0abad5ccee5564ed5f0cc90245d2f9a8ef330a8deae",
|
||||
"sha256:9d3c0f8567ffe7502d969c2c1b809892dc793b5d0665f602aad19895f8d508da",
|
||||
"sha256:9e5ac3437746189a9b4121db2a7b86056ac8786b12e88838696899328fc44bb2",
|
||||
"sha256:a36c506e5f8aeb40680491d39ed94670487ce6614b9d27cabe45d94cd5d63e1e",
|
||||
"sha256:a5ab722ae5a873d8dcee1f5f45ddd93c34210aed44ff2dc643b5025981908cda",
|
||||
"sha256:a96f02ba1bcd330807fc060ed91d1f7a20853da6dd449e5da4b09bfcc08fdcf5",
|
||||
"sha256:acb6b2f96f60f70e7f34efe0c3ea34ca63f19ca63ce90019c6cbca6b676e81fa",
|
||||
"sha256:ae15347a88cf8af0949a9872b57a320d2605ae069bcdf047677318bc0bba45b1",
|
||||
"sha256:af8920ce4a55ff41167ddbc20077f5698c2e710ad3353d32a07d3264f3a2021e",
|
||||
"sha256:afd825e30f8d1f521713a5669b63657bcfe5980a916c95855060048b88e1adb7",
|
||||
"sha256:b21b4031b53d25b0858d4e124f2f9131ffc1530431c6d1321805c90da78388d1",
|
||||
"sha256:b4b68c961b5cc402cbd99cca5eb2547e46ce77260eb705f4d117fd9c3f932b95",
|
||||
"sha256:b66aa6357b265670bb574f050ffceefb98549c721cf28351b748be1ef9577d93",
|
||||
"sha256:b9e240ae0ba96477682aa87899d94ddec1cc7926f9df29b1dd57b39e797d5ab5",
|
||||
"sha256:bc64d1b1dab08f679fb89c368f4c05693f58a9faf744c4d390d7ed1d8223869b",
|
||||
"sha256:bf8443781533b8d37b295016a4b53c1494fa9a03573c09ca5104550c138d5c05",
|
||||
"sha256:c26aab6ea9c54d3bed716b8851c8bfc40cb249b8e9880e250d1eddde9f709bf5",
|
||||
"sha256:c3cd1fc1dc7c376c54440aeaaa0dcc803d2126732ff5c6b68ccd619f2e64be4f",
|
||||
"sha256:c7257171bb8d4432fe9d6fdde4d55fdbe663a63636a17f7f9aaba9bcb3153ad7",
|
||||
"sha256:d42e3a3fc18acc88b838efded0e6ec3edf3e328a58c68fbd36a7263a874906c8",
|
||||
"sha256:d74fcaf87132ffc0447b3c685a9f862ffb5b43e70ea6beec2fb8057d5d2a1fea",
|
||||
"sha256:d8c1d679df4361408b628f42b26a5d62bd3e9ba7f0c0e7969f925021554755aa",
|
||||
"sha256:e856c1c7255c739434489ec9c8aa9cdf5179785d10ff20add308b5d673bed5cd",
|
||||
"sha256:eac68f96539b32fce2c9b47eb7c25bb2582bdaf1bbb360d25f564ee9e04c542b",
|
||||
"sha256:ed7326563024b6e91fef6b6c7a1a2ff0a71b97793ac33dbbcf38f6005e51ff6e",
|
||||
"sha256:ed8c3d2cd329bf779b7ed38db176738f3f8be637bb395ce9629fc76f78afe3d4",
|
||||
"sha256:f4c9bda132ad108b387c33fabfea47866af87f4ea6ffb79418004f0521e63204",
|
||||
"sha256:f643ffd2669ffd4b5a3e9b41c909b72b2a1d5e4915da90a77e119b8d48ce867a"
|
||||
],
|
||||
"version": "==4.9.4"
|
||||
"markers": "python_version >= '3.10'",
|
||||
"version": "==5.1.0"
|
||||
},
|
||||
"markdown-it-py": {
|
||||
"hashes": [
|
||||
@ -1399,15 +1410,6 @@
|
||||
"markers": "python_version >= '3.9'",
|
||||
"version": "==15.4.4"
|
||||
},
|
||||
"openai": {
|
||||
"hashes": [
|
||||
"sha256:99c5d257d09ea6533d689d1cc77caa0ac679fa21efef8893d8b0832a86877f1b",
|
||||
"sha256:a54002c814e05222e413664f651b5916714e4700d041d5cf5724d3ae1a3e3481"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_full_version >= '3.7.1'",
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5",
|
||||
@ -2045,6 +2047,14 @@
|
||||
"markers": "python_version >= '3.6' and python_version < '4'",
|
||||
"version": "==4.9"
|
||||
},
|
||||
"s3transfer": {
|
||||
"hashes": [
|
||||
"sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e",
|
||||
"sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==0.10.0"
|
||||
},
|
||||
"scikit-learn": {
|
||||
"hashes": [
|
||||
"sha256:0df87de9ce1c0140f2818beef310fb2e2afdc1e66fc9ad587965577f17733649",
|
||||
@ -2292,11 +2302,11 @@
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d",
|
||||
"sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"
|
||||
"sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84",
|
||||
"sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.2.1"
|
||||
"markers": "python_version >= '3.10'",
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"uvicorn": {
|
||||
"extras": [
|
||||
@ -3231,14 +3241,6 @@
|
||||
"index": "pypi",
|
||||
"version": "==4.3.1"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e",
|
||||
"sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"version": "==7.0.1"
|
||||
},
|
||||
"incremental": {
|
||||
"hashes": [
|
||||
"sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0",
|
||||
@ -4065,14 +4067,6 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==4.0.0"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31",
|
||||
"sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==3.17.0"
|
||||
},
|
||||
"zope-interface": {
|
||||
"hashes": [
|
||||
"sha256:02adbab560683c4eca3789cc0ac487dcc5f5a81cc48695ec247f00803cafe2fe",
|
||||
@ -4510,7 +4504,6 @@
|
||||
"sha256:9acd36fef264d9ed5a96345c45f7d80f0d967059e92213998b3046fbb64f67fc",
|
||||
"sha256:d6861d9d68e8268a5346d8a43d14727e6c636ebc6d49f2b8fc034c25996d35dd"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==3.5.0.20240129"
|
||||
},
|
||||
@ -4519,7 +4512,6 @@
|
||||
"sha256:062c5a0f20301a30f2df4db583f15b3c2a1283a12518d1f9d81396154e12c1af",
|
||||
"sha256:4800b61bf7eabdae2f1b17ade0d080709ed33e9f26a2e900e470e8b56ebe2387"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==10.2.0.20240213"
|
||||
},
|
||||
@ -4537,7 +4529,6 @@
|
||||
"sha256:8052c574b0ab8f2dc94bdc4a31b9d48e8aa5a0f12398ef40cadadbe551da949b",
|
||||
"sha256:92e62ac37793e567cd2b0f64f1456c24fccce4041d9c5f869697a6739fde4fce"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.17.0.20240106"
|
||||
},
|
||||
|
@ -1158,3 +1158,5 @@ if DEBUG: # pragma: no cover
|
||||
REMOTE_PARSER_ENGINE = os.getenv("PAPERLESS_REMOTE_PARSER_ENGINE")
|
||||
REMOTE_PARSER_API_KEY = os.getenv("PAPERLESS_REMOTE_PARSER_API_KEY")
|
||||
REMOTE_PARSER_ENDPOINT = os.getenv("PAPERLESS_REMOTE_PARSER_ENDPOINT")
|
||||
REMOTE_PARSER_API_KEY_ID = os.getenv("PAPERLESS_REMOTE_PARSER_API_KEY_ID")
|
||||
REMOTE_PARSER_REGION = os.getenv("PAPERLESS_REMOTE_PARSER_REGION")
|
||||
|
@ -22,4 +22,13 @@ def check_remote_parser_configured(app_configs, **kwargs):
|
||||
),
|
||||
]
|
||||
|
||||
if settings.REMOTE_PARSER_ENGINE == "awstextract" and (
|
||||
not settings.REMOTE_PARSER_API_KEY_ID or not settings.REMOTE_PARSER_REGION
|
||||
):
|
||||
return [
|
||||
Error(
|
||||
"AWS Textract remote parser requires access key ID and region to be configured.",
|
||||
),
|
||||
]
|
||||
|
||||
return []
|
||||
|
@ -8,15 +8,29 @@ from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class RemoteEngineConfig:
|
||||
def __init__(self, engine: str, api_key: str, endpoint: Optional[str] = None):
|
||||
def __init__(
|
||||
self,
|
||||
engine: str,
|
||||
api_key: str,
|
||||
endpoint: Optional[str] = None,
|
||||
api_key_id: Optional[str] = None,
|
||||
region: Optional[str] = None,
|
||||
):
|
||||
self.engine = engine
|
||||
self.api_key = api_key
|
||||
self.endpoint = endpoint
|
||||
self.api_key_id = api_key_id
|
||||
self.region = region
|
||||
|
||||
def engine_is_valid(self):
|
||||
valid = self.engine in ["chatgpt", "azureaivision"] and self.api_key is not None
|
||||
valid = (
|
||||
self.engine in ["azureaivision", "awstextract", "googlecloudvision"]
|
||||
and self.api_key is not None
|
||||
)
|
||||
if self.engine == "azureaivision":
|
||||
valid = valid and self.endpoint is not None
|
||||
if self.engine == "awstextract":
|
||||
valid = valid and self.region is not None and self.api_key_id is not None
|
||||
return valid
|
||||
|
||||
|
||||
@ -35,6 +49,8 @@ class RemoteDocumentParser(RasterisedDocumentParser):
|
||||
engine=settings.REMOTE_PARSER_ENGINE,
|
||||
api_key=settings.REMOTE_PARSER_API_KEY,
|
||||
endpoint=settings.REMOTE_PARSER_ENDPOINT,
|
||||
api_key_id=settings.REMOTE_PARSER_API_KEY_ID,
|
||||
region=settings.REMOTE_PARSER_REGION,
|
||||
)
|
||||
|
||||
def supported_mime_types(self):
|
||||
@ -57,47 +73,36 @@ class RemoteDocumentParser(RasterisedDocumentParser):
|
||||
else:
|
||||
return []
|
||||
|
||||
def chatgpt_parse(
|
||||
def aws_textract_parse(
|
||||
self,
|
||||
file: Path,
|
||||
) -> Optional[str]:
|
||||
# does not work
|
||||
from openai import OpenAI
|
||||
import boto3
|
||||
|
||||
client = OpenAI(
|
||||
api_key=self.settings.api_key,
|
||||
client = boto3.client(
|
||||
"textract",
|
||||
region_name=self.settings.region,
|
||||
aws_access_key_id=self.settings.api_key_id,
|
||||
aws_secret_access_key=self.settings.api_key,
|
||||
)
|
||||
assistants = client.beta.assistants.list()
|
||||
for assistant in assistants.data:
|
||||
if assistant.name == "Paperless-ngx Document Parser":
|
||||
assistant = assistant
|
||||
break
|
||||
if not assistant:
|
||||
assistant = client.beta.assistants.create(
|
||||
model="gpt-3.5-turbo",
|
||||
tools=[{"type": "code_interpreter"}],
|
||||
name="Paperless-ngx Document Parser",
|
||||
)
|
||||
|
||||
self.log.info("Uploading document to OpenAI...")
|
||||
gpt_file = client.files.create(file=file, purpose="assistants")
|
||||
client.files.wait_for_processing(gpt_file.id)
|
||||
client.beta.assistants.update(assistant_id=assistant.id, files=[gpt_file.id])
|
||||
thread = client.beta.threads.create()
|
||||
client.beta.threads.messages.create(
|
||||
thread_id=thread.id,
|
||||
role="user",
|
||||
content="Output the text of the file",
|
||||
lines = []
|
||||
with open(file, "rb") as f:
|
||||
file_bytes = f.read()
|
||||
file_bytearray = bytearray(file_bytes)
|
||||
|
||||
self.log.info("Analyzing document with AWS Textract...")
|
||||
response = client.analyze_document(
|
||||
Document={"Bytes": file_bytearray},
|
||||
FeatureTypes=["TABLES"],
|
||||
)
|
||||
client.beta.threads.runs.create(
|
||||
thread_id=thread,
|
||||
assistant_id=assistant.id,
|
||||
)
|
||||
response = client.beta.threads.messages.list(
|
||||
thread_id=thread.id,
|
||||
)
|
||||
self.text = response.data[0].content[0].text.value
|
||||
client.files.delete(gpt_file.id)
|
||||
|
||||
blocks = response["Blocks"]
|
||||
for block in blocks:
|
||||
if block["BlockType"] == "LINE":
|
||||
lines.append(block["Text"])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def azure_ai_vision_parse(
|
||||
self,
|
||||
@ -197,15 +202,9 @@ class RemoteDocumentParser(RasterisedDocumentParser):
|
||||
)
|
||||
self.text = ""
|
||||
return
|
||||
elif self.settings.engine == "chatgpt":
|
||||
self.text = self.chatgpt_parse(document_path)
|
||||
elif self.settings.engine == "azureaivision":
|
||||
self.text = self.azure_ai_vision_parse(document_path)
|
||||
elif self.settings.engine == "awstextract":
|
||||
self.text = self.aws_textract_parse(document_path)
|
||||
elif self.settings.engine == "googlecloudvision":
|
||||
self.text = self.google_cloud_vision_parse(document_path, mime_type)
|
||||
else:
|
||||
self.log.warning(
|
||||
"No valid remote parser engine is configured, content will be empty.",
|
||||
)
|
||||
self.text = ""
|
||||
return
|
||||
|
@ -33,6 +33,19 @@ class TestChecks(TestCase):
|
||||
),
|
||||
)
|
||||
|
||||
@override_settings(REMOTE_PARSER_ENGINE="awstextract")
|
||||
@override_settings(REMOTE_PARSER_API_KEY="somekey")
|
||||
@override_settings(REMOTE_PARSER_API_KEY_ID=None)
|
||||
@override_settings(REMOTE_PARSER_REGION=None)
|
||||
def test_aws_no_id_or_region(self):
|
||||
msgs = check_remote_parser_configured(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertTrue(
|
||||
msgs[0].msg.startswith(
|
||||
"AWS Textract remote parser requires access key ID and region to be configured.",
|
||||
),
|
||||
)
|
||||
|
||||
@override_settings(REMOTE_PARSER_ENGINE="something")
|
||||
@override_settings(REMOTE_PARSER_API_KEY="somekey")
|
||||
def test_valid_configuration(self):
|
||||
|
@ -1,9 +1,13 @@
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless_remote.parsers import RemoteDocumentParser
|
||||
|
||||
|
||||
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@ -19,27 +23,55 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.fail(f"'{s}' is not in '{content}'")
|
||||
self.assertListEqual(indices, sorted(indices))
|
||||
|
||||
# Currently test is not working on 3.11 on CI but works locally. Dont know why.
|
||||
# @mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient.begin_analyze_document")
|
||||
# def test_get_text_with_azure(self, mock_begin_analyze_document):
|
||||
# result = mock.Mock()
|
||||
# result.content = "This is a test document."
|
||||
# mock_begin_analyze_document.return_value.result.return_value = result
|
||||
@mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient")
|
||||
def test_get_text_with_azure(self, mock_azure_client):
|
||||
result = mock.Mock()
|
||||
result.content = "This is a test document."
|
||||
|
||||
# with override_settings(
|
||||
# REMOTE_PARSER_ENGINE="azureaivision",
|
||||
# REMOTE_PARSER_API_KEY="somekey",
|
||||
# REMOTE_PARSER_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
|
||||
# ):
|
||||
# parser = RemoteDocumentParser(uuid.uuid4())
|
||||
# parser.parse(
|
||||
# self.SAMPLE_FILES / "simple-digital.pdf",
|
||||
# "application/pdf",
|
||||
# )
|
||||
mock_azure_client.return_value.begin_analyze_document.return_value.result.return_value = (
|
||||
result
|
||||
)
|
||||
|
||||
# mock_begin_analyze_document.assert_called_once()
|
||||
with override_settings(
|
||||
REMOTE_PARSER_ENGINE="azureaivision",
|
||||
REMOTE_PARSER_API_KEY="somekey",
|
||||
REMOTE_PARSER_ENDPOINT="https://endpoint.cognitiveservices.azure.com/",
|
||||
):
|
||||
parser = RemoteDocumentParser(uuid.uuid4())
|
||||
parser.parse(
|
||||
self.SAMPLE_FILES / "simple-digital.pdf",
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
# self.assertContainsStrings(
|
||||
# parser.text.strip(),
|
||||
# ["This is a test document."],
|
||||
# )
|
||||
self.assertContainsStrings(
|
||||
parser.text.strip(),
|
||||
["This is a test document."],
|
||||
)
|
||||
|
||||
@mock.patch("boto3.client")
|
||||
def test_get_text_with_awstextract(self, mock_aws_client):
|
||||
mock_aws_client.return_value.analyze_document.return_value = {
|
||||
"Blocks": [
|
||||
{
|
||||
"BlockType": "LINE",
|
||||
"Text": "This is a test document.",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
with override_settings(
|
||||
REMOTE_PARSER_ENGINE="awstextract",
|
||||
REMOTE_PARSER_API_KEY="somekey",
|
||||
REMOTE_PARSER_API_KEY_ID="somekeyid",
|
||||
REMOTE_PARSER_REGION="us-west-2",
|
||||
):
|
||||
parser = RemoteDocumentParser(uuid.uuid4())
|
||||
parser.parse(
|
||||
self.SAMPLE_FILES / "simple-digital.pdf",
|
||||
"application/pdf",
|
||||
)
|
||||
|
||||
self.assertContainsStrings(
|
||||
parser.text.strip(),
|
||||
["This is a test document."],
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user