mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	directly use rapidfuzz
This commit is contained in:
		
							
								
								
									
										2
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Pipfile
									
									
									
									
									
								
							| @@ -16,7 +16,6 @@ django-extensions = "*" | |||||||
| django-filter = "~=22.1" | django-filter = "~=22.1" | ||||||
| djangorestframework = "~=3.13" | djangorestframework = "~=3.13" | ||||||
| filelock = "*" | filelock = "*" | ||||||
| fuzzywuzzy = {extras = ["speedup"], version = "*"} |  | ||||||
| gunicorn = "*" | gunicorn = "*" | ||||||
| imap-tools = "*" | imap-tools = "*" | ||||||
| langdetect = "*" | langdetect = "*" | ||||||
| @@ -28,6 +27,7 @@ python-dotenv = "*" | |||||||
| python-dateutil = "*" | python-dateutil = "*" | ||||||
| python-magic = "*" | python-magic = "*" | ||||||
| psycopg2 = "*" | psycopg2 = "*" | ||||||
|  | rapidfuzz = "*" | ||||||
| redis = {extras = ["hiredis"], version = "*"} | redis = {extras = ["hiredis"], version = "*"} | ||||||
| scikit-learn = "~=1.1" | scikit-learn = "~=1.1" | ||||||
| # Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/) | # Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/) | ||||||
|   | |||||||
							
								
								
									
										126
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										126
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							| @@ -401,17 +401,6 @@ | |||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==1.2.0" |             "version": "==1.2.0" | ||||||
|         }, |         }, | ||||||
|         "fuzzywuzzy": { |  | ||||||
|             "extras": [ |  | ||||||
|                 "speedup" |  | ||||||
|             ], |  | ||||||
|             "hashes": [ |  | ||||||
|                 "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", |  | ||||||
|                 "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993" |  | ||||||
|             ], |  | ||||||
|             "index": "pypi", |  | ||||||
|             "version": "==0.18.0" |  | ||||||
|         }, |  | ||||||
|         "gunicorn": { |         "gunicorn": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e", |                 "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e", | ||||||
| @@ -622,114 +611,6 @@ | |||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==1.0.9" |             "version": "==1.0.9" | ||||||
|         }, |         }, | ||||||
|         "levenshtein": { |  | ||||||
|             "hashes": [ |  | ||||||
|                 "sha256:019ae21de930d6077efa1eac746de4df5234e7c6c11ab10080c0935fc5abbecf", |  | ||||||
|                 "sha256:02688fff6d256afdd57da5359144ddab8e054b2ba98ddcf147fe191bdf996e88", |  | ||||||
|                 "sha256:0274b87df89d1dda8dce77cf05a9dfab7bd30045a09e0d9435ec8be622e374e6", |  | ||||||
|                 "sha256:0323e8dbeec4d63c27111796baa7e8a89b391c32d90e67d78f9404d0c8edeab4", |  | ||||||
|                 "sha256:053edbb52fe8b8a1a6698c4fee39590c9e44a602ace807291eb87e3b17f85f48", |  | ||||||
|                 "sha256:059027f5dd2aafb916301f46a619c7fe03ff5761cdb2d091cf80bf6dbc24bc29", |  | ||||||
|                 "sha256:05f11a4be4f668974238cff21208fbd9f629cab8a68b444b7d4a4cfd8081b1d6", |  | ||||||
|                 "sha256:0ab71cc5ea86f6685a7b2235edad65f1f2a4b6341109af259d758973d96eece5", |  | ||||||
|                 "sha256:0b439f4fb0b615bc0443cc83eaf5835bd480f680c69ed1be963bdb401b8159f8", |  | ||||||
|                 "sha256:0ec50d24a12e50857e94ac9035d3c06fd0827bb477b9ebcd83a2a49dd89e5e23", |  | ||||||
|                 "sha256:131fc50d52a52acc367ea8bccb028447b734243d00ba1cfc7d9ff8d0dc37fa38", |  | ||||||
|                 "sha256:17b5f1d1a4a5ac536283298c98cafc5632ae3897c8601fb2ec8babc6f47a1be9", |  | ||||||
|                 "sha256:183b8da9b870ad171a11a629c43e0587a228aea9d595a969231d59bf530b6c77", |  | ||||||
|                 "sha256:18888d50813b9df9b8dc8c1506ec40c783db25f130a6101eb89896b27076f751", |  | ||||||
|                 "sha256:25b88277832eb558305c3bb986ad61f19b5cb5a87aced289bce4a1701a92aa31", |  | ||||||
|                 "sha256:266cdab48e2242b6c010beb8b7af4164aa87f4ad8d6fbd9f4f531214f8ddb234", |  | ||||||
|                 "sha256:281bffb09b2e1620db4e99a9df96e38d939c341c7c43cd5191326fbdb4d42275", |  | ||||||
|                 "sha256:28cd002cf5a499e6e9bd69d992ffd501b8473948f3e97d6e075b774df1901e8e", |  | ||||||
|                 "sha256:2972c6c6a806e0c788f6ec39510abdb61b3a648fd141a5fa77becd2cc05ff551", |  | ||||||
|                 "sha256:2b4027b370cc46c4802ba32a979729209c0407d548723e809f19a50a9df27405", |  | ||||||
|                 "sha256:318c924e218be754427ce6bb4c630d9dcb5478eb00a8a3f8a0972086adc763b1", |  | ||||||
|                 "sha256:380accae56f8c9df99f34bc7e79d286fee37c3dd06b362c394b08ea96371b7c5", |  | ||||||
|                 "sha256:3c7784f9936292c9d3f92fc772d874edc071a16cd883ea0d997e5c4318f6362c", |  | ||||||
|                 "sha256:3ebd85fd6253abe89f852fc008294d490eb7a5f66913703148b8d263b048cc90", |  | ||||||
|                 "sha256:4126c8fe9d817ac3ab223ee5db41a09d0fa82dbd6bb59d207b6f7313d733f19b", |  | ||||||
|                 "sha256:4155f0ab246b6892110960f25989ab91073cd708b974f4732dca4d219a8be3e1", |  | ||||||
|                 "sha256:41f16267d8e6d916e06a6a1a0e151f643a6bab1277945a4bd494f359d4185dd2", |  | ||||||
|                 "sha256:4522f5d662d3ee55a072fad18e2af5dae480658d4e23b04b455c4b7542ce4327", |  | ||||||
|                 "sha256:46c900c807b0614c454ba89271ec6f59212403c54dc68ea493ab1ece2c510618", |  | ||||||
|                 "sha256:48291b25a904243f37c9aabbfed3eaba466c9a993f5f5946fe647163b7face07", |  | ||||||
|                 "sha256:5038a5e9e106087c117f0a7d6fd9d8a382b228da24bbd085b9f2b5d54ab11c3a", |  | ||||||
|                 "sha256:594a26bcf0cb720c16ac6db3fd4b3f411be756f9da7682f2f629089ff15aef18", |  | ||||||
|                 "sha256:59706135d3107939effe9f9263bd78c507f4abd7bfb96acc5a7f4176aa0a90d2", |  | ||||||
|                 "sha256:5a327d7581696c7a392a8f85cce7e54fa1303f5b79b3b2983abaab309b56cfd6", |  | ||||||
|                 "sha256:5eca8a45d38c916783c44e5da06a367b77234efa51d84dda8804654b99efecc9", |  | ||||||
|                 "sha256:5fa85f6789178ede5333568cbee5bac5fa9718d5f02406b65545e83368fa8fe9", |  | ||||||
|                 "sha256:65097e45ef7a942a9b92999b81d2e91fe80cbd0616215e625af39d2166692018", |  | ||||||
|                 "sha256:65cc9938cb9bd8862fc220e0719fd7f9c291d788f0a62bb8840820c46fa5a4d0", |  | ||||||
|                 "sha256:6a4c3607e2a0e66337d8ddf95ca7efe9b30ebf944119a4fb86503ea66f777263", |  | ||||||
|                 "sha256:72f11a136f148eb1218e7d1492749b8b5594302010db0cebd47423c4ac8c79ee", |  | ||||||
|                 "sha256:78b5a71de59e30c697a64c69fc48b032bb99c43b7437091b808a9ba20bb0235c", |  | ||||||
|                 "sha256:7b212edc9bf9d0c25cc3117483289b9e1a49a1ed134a02635baa987e9f0d89db", |  | ||||||
|                 "sha256:7e0f7045c420abdea249a28384baa846b87bad5c9f42af1957dc50c6e337fa1a", |  | ||||||
|                 "sha256:7e83cfec424f546dc3f0cc71896f8cc384a711f4116bc1abb0598302a9af3240", |  | ||||||
|                 "sha256:80c55bcc31d21bd07f7d1589e11f2ac1faf3359cf9f93026a1944ee76a40f954", |  | ||||||
|                 "sha256:863740d7f45adfd29b95658a680b16113721eaa89857c67e7e9573c61e87bbd8", |  | ||||||
|                 "sha256:88484b8c3f71dc9205d0d36da541e2cdcf4bc74474a2ee8d99c2e6411b659b89", |  | ||||||
|                 "sha256:8a08810e0bcc606d10cf1c5389c96fc92362244c0cf761358c495c2eb29df3dc", |  | ||||||
|                 "sha256:8c0637ae4fcb54d5c7fc9af24d348003b6f9dbaf7a06bf13f769d7b85903af39", |  | ||||||
|                 "sha256:8e9e3409338a42e3d4c30c224fdb678364542c77994f089fd6cc8131969eff48", |  | ||||||
|                 "sha256:902ea10ba85e014dc5d23a7bbb3ab70722349561e73783dd71571359e8867244", |  | ||||||
|                 "sha256:9533db74a2685169380db3db3ab59643453e7c486fffa9bf3ab60b73c4e174be", |  | ||||||
|                 "sha256:97f02ff49d1fa21308207a7743bec4fdd7aa90e8dd091539da660fc51e624c4d", |  | ||||||
|                 "sha256:9ea9a2a154dc7d8658930fa87cda0e6094235b5e130f037d9894eaf8722119a5", |  | ||||||
|                 "sha256:a0440d847b2c9986e4d27e8a59164714e5198530c69a5f9fb2e4620f9136d653", |  | ||||||
|                 "sha256:a6d39a27b542a781d691827b955d685d496fb6cccfc6eecc336a78b399032062", |  | ||||||
|                 "sha256:a7f4d3c478b1fcf412bf6c82914b02fed33ab359120df9172dda7bc855227461", |  | ||||||
|                 "sha256:ad297807bbdffce61b04e5e0c22f3c5d9e1905c1ee186f1f6d029f83bf0f18b8", |  | ||||||
|                 "sha256:add6778bb51efb80174937543754d2dfa0f4e504e7302d97896006a642c14f95", |  | ||||||
|                 "sha256:ae075ebf7bb5f48b3bd2fc9cd53346e4ff43e2515a4f822914bbc62a3cbd6e7e", |  | ||||||
|                 "sha256:b26fb439a7fbb522af63bbd781fbf51ec0c0659134a93f5bc8e9e68641df811e", |  | ||||||
|                 "sha256:b2bac59721d246939b21274229b9923aeae3db97b6118da739c658c17e110dd6", |  | ||||||
|                 "sha256:b314ad1f0667715e8d1b6197d5336ab579b13e801172721d62331bd40034a30c", |  | ||||||
|                 "sha256:b7317035875bd7c4705e2566848b2043b78e18f2f5675ea651f9f7805b5589eb", |  | ||||||
|                 "sha256:b8e936e620e5f336a207e08c0da9dace5d4dbcc8e64743ab1acaa77a64bbf060", |  | ||||||
|                 "sha256:b906da4e9a7ba4ec33ed2f7238343866932c1a6f84944c804252b2922708d0ee", |  | ||||||
|                 "sha256:ba690e4e33c360fcf0b8411ca90f8b9cc595e8deddd6a25a9a75a725b698cd6a", |  | ||||||
|                 "sha256:bb14da3d63da994c34cfa47cde469df8013ddf5f575455a22530c8c4a0ed8616", |  | ||||||
|                 "sha256:bbc2e1632f4a61fa171ddab3bc8368fb8475e7ce68733ca92fec862fdd8e0f60", |  | ||||||
|                 "sha256:bbdd3c896db09993b7879cd35e56da6ed8918d161d6e80f9d9c40d78d34e4784", |  | ||||||
|                 "sha256:bcaaa8e542cb7e1962d0a58ce6a25f6b4b6ca2e5ce743155fc1f6eb2fea52574", |  | ||||||
|                 "sha256:bee682ab1005aff597946234e47c95fcf0f44d2b1f38075f0aba26bbc4e7545a", |  | ||||||
|                 "sha256:bfec6543d60c57e7543d9cbccdd5dfcf562f2c05cd6b814df68108a20794e254", |  | ||||||
|                 "sha256:c2e50baf7be8831524a87beec6c1873539519a1948f907dc3d4b9be27ebacb80", |  | ||||||
|                 "sha256:c6c79a6138be017d85f3bab1df735669b669a38f9b3ff646a1f179afbacb7b63", |  | ||||||
|                 "sha256:c702fb7c8bfd87c9ce9c8bddfc9a5796a492bab35a52b1693adee413721e32f2", |  | ||||||
|                 "sha256:c9ba1725826f6571a6e4c1561bb1613711f0058b91927a147dc42c637ba087d9", |  | ||||||
|                 "sha256:cf205ac52cb6b45745c0a4891cdb6e709c10ad5b034aa736aff561fc4ce9828c", |  | ||||||
|                 "sha256:d0d03fc67499ee90feedfa2add4aaa1c091a7bf333535d847b10fffe390e58fe", |  | ||||||
|                 "sha256:d118d63f08fd6ac285cb8166e96c992a6ed0e7a1644e8790c39070b18779e688", |  | ||||||
|                 "sha256:d24c09f397c3ce55f20e0250da7ba5b0e5249cb5d21465e71ec15154a3a7e8e0", |  | ||||||
|                 "sha256:d41735c7a646dae8612e0552dfc53f45807eeb54364dfb1f0a65ac274bc56b3a", |  | ||||||
|                 "sha256:dd1696d91f2a37cece9bd22e507e7be7c37c59ecc61fd15f0d0f31e3b6888957", |  | ||||||
|                 "sha256:dfcad9c63a893c95ba1149481b9680ce68dd71211f08df0073ee62700790bc97", |  | ||||||
|                 "sha256:e384782608837d9aaf123e413679883091744664a2cd76f0ad0e0a1f12facc57", |  | ||||||
|                 "sha256:e5ea0abea338c617b753082f36f64c70ade853d88e91ab5732b301ae8ed16e3f", |  | ||||||
|                 "sha256:e6ff81c570413bcc35f1c16850eb66e2493a3259e68efe8672376533d2c82d38", |  | ||||||
|                 "sha256:e88951ad2831880405f3f055ab12a6aa72696c20a2815128eeccdc3bf914cd78", |  | ||||||
|                 "sha256:e98e16b6ce531b12100c01daac922e8ec5b991832a5f58003f13b7d45ea82dc0", |  | ||||||
|                 "sha256:eb0fd32e8e433797499571447d9f975b4744be79c0a3339413868d79517231ed", |  | ||||||
|                 "sha256:ee74a73e1f9e16b71f67329e99bb58aa4af9a2c3c4b3a5db9f26e92e7c39e161", |  | ||||||
|                 "sha256:f15ec5f825c283a5aa427d78759ab8f84e7b5441d15cfff476b548bce3764666", |  | ||||||
|                 "sha256:f296c7fe928ce0e29e313f85c43a5ab80542e096e1163c2605b8cc18aa2aff2b", |  | ||||||
|                 "sha256:f32df1b19f773bb41382e8b215955d248c9766e3d6ff5a1dd89709e7d96e4685", |  | ||||||
|                 "sha256:f3ed67279a4b317a808ac743d3a915f74187530c5f3d9c859e5d04d475b8c174", |  | ||||||
|                 "sha256:f5b972ca514898fb7131671c425a62ca38fdae2a8d6296e4b605ec8202349f8c", |  | ||||||
|                 "sha256:f961086c0dbba6c00cbd5c5b5646247efd0d0a4044444bfaa9efc7a6ba5e96a5", |  | ||||||
|                 "sha256:f9bd7d7a449667d6f17edd9045ec82a4ed2767afb91743d3d0b18c376a56dfe2", |  | ||||||
|                 "sha256:fbac4c8ffadb685189efa92fafdb2f5392e9cbd262eae3818bcdb1bd19acaaf2", |  | ||||||
|                 "sha256:fc43c8276d0a7c7b76f31d4f3f80f9eb820673628f1411770a70029c1d5f6a75", |  | ||||||
|                 "sha256:fcfded324f0710632e22050a2fd7b56b1cbcb2d21001630bcc26d536f54bffec", |  | ||||||
|                 "sha256:ff435abdcbfdf4a070f488830cd53aef77cf8649d0fd8ed76bf27d9566e80e78" |  | ||||||
|             ], |  | ||||||
|             "markers": "python_version >= '3.6'", |  | ||||||
|             "version": "==0.20.7" |  | ||||||
|         }, |  | ||||||
|         "lxml": { |         "lxml": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318", |                 "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318", | ||||||
| @@ -1215,13 +1096,6 @@ | |||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "version": "==0.5.0" |             "version": "==0.5.0" | ||||||
|         }, |         }, | ||||||
|         "python-levenshtein": { |  | ||||||
|             "hashes": [ |  | ||||||
|                 "sha256:88a58b95e3340a918489dac0c78f731323c0a4d8f5564f839ffea80155574e77", |  | ||||||
|                 "sha256:9228af5523f797f0798f045dc4a95ed1f46df72bc2186e52b530a33998a51b37" |  | ||||||
|             ], |  | ||||||
|             "version": "==0.20.7" |  | ||||||
|         }, |  | ||||||
|         "python-magic": { |         "python-magic": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", |                 "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", | ||||||
|   | |||||||
| @@ -142,14 +142,14 @@ def matches(matching_model, document): | |||||||
|         return bool(match) |         return bool(match) | ||||||
|  |  | ||||||
|     elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY: |     elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY: | ||||||
|         from fuzzywuzzy import fuzz |         from rapidfuzz import fuzz | ||||||
|  |  | ||||||
|         match = re.sub(r"[^\w\s]", "", matching_model.match) |         match = re.sub(r"[^\w\s]", "", matching_model.match) | ||||||
|         text = re.sub(r"[^\w\s]", "", document_content) |         text = re.sub(r"[^\w\s]", "", document_content) | ||||||
|         if matching_model.is_insensitive: |         if matching_model.is_insensitive: | ||||||
|             match = match.lower() |             match = match.lower() | ||||||
|             text = text.lower() |             text = text.lower() | ||||||
|         if fuzz.partial_ratio(match, text) >= 90: |         if fuzz.partial_ratio(match, text, score_cutoff=90): | ||||||
|             # TODO: make this better |             # TODO: make this better | ||||||
|             log_reason( |             log_reason( | ||||||
|                 matching_model, |                 matching_model, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Max Bachmann
					Max Bachmann