directly use rapidfuzz

This commit is contained in:
Max Bachmann 2022-10-31 20:42:44 +01:00 committed by Trenton H
parent 9214b41255
commit e97c04c03d
3 changed files with 3 additions and 129 deletions

View File

@ -16,7 +16,6 @@ django-extensions = "*"
django-filter = "~=22.1"
djangorestframework = "~=3.13"
filelock = "*"
fuzzywuzzy = {extras = ["speedup"], version = "*"}
gunicorn = "*"
imap-tools = "*"
langdetect = "*"
@ -28,6 +27,7 @@ python-dotenv = "*"
python-dateutil = "*"
python-magic = "*"
psycopg2 = "*"
rapidfuzz = "*"
redis = {extras = ["hiredis"], version = "*"}
scikit-learn = "~=1.1"
# Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)

126
Pipfile.lock generated
View File

@ -401,17 +401,6 @@
"index": "pypi",
"version": "==1.2.0"
},
"fuzzywuzzy": {
"extras": [
"speedup"
],
"hashes": [
"sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8",
"sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"
],
"index": "pypi",
"version": "==0.18.0"
},
"gunicorn": {
"hashes": [
"sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e",
@ -622,114 +611,6 @@
"index": "pypi",
"version": "==1.0.9"
},
"levenshtein": {
"hashes": [
"sha256:019ae21de930d6077efa1eac746de4df5234e7c6c11ab10080c0935fc5abbecf",
"sha256:02688fff6d256afdd57da5359144ddab8e054b2ba98ddcf147fe191bdf996e88",
"sha256:0274b87df89d1dda8dce77cf05a9dfab7bd30045a09e0d9435ec8be622e374e6",
"sha256:0323e8dbeec4d63c27111796baa7e8a89b391c32d90e67d78f9404d0c8edeab4",
"sha256:053edbb52fe8b8a1a6698c4fee39590c9e44a602ace807291eb87e3b17f85f48",
"sha256:059027f5dd2aafb916301f46a619c7fe03ff5761cdb2d091cf80bf6dbc24bc29",
"sha256:05f11a4be4f668974238cff21208fbd9f629cab8a68b444b7d4a4cfd8081b1d6",
"sha256:0ab71cc5ea86f6685a7b2235edad65f1f2a4b6341109af259d758973d96eece5",
"sha256:0b439f4fb0b615bc0443cc83eaf5835bd480f680c69ed1be963bdb401b8159f8",
"sha256:0ec50d24a12e50857e94ac9035d3c06fd0827bb477b9ebcd83a2a49dd89e5e23",
"sha256:131fc50d52a52acc367ea8bccb028447b734243d00ba1cfc7d9ff8d0dc37fa38",
"sha256:17b5f1d1a4a5ac536283298c98cafc5632ae3897c8601fb2ec8babc6f47a1be9",
"sha256:183b8da9b870ad171a11a629c43e0587a228aea9d595a969231d59bf530b6c77",
"sha256:18888d50813b9df9b8dc8c1506ec40c783db25f130a6101eb89896b27076f751",
"sha256:25b88277832eb558305c3bb986ad61f19b5cb5a87aced289bce4a1701a92aa31",
"sha256:266cdab48e2242b6c010beb8b7af4164aa87f4ad8d6fbd9f4f531214f8ddb234",
"sha256:281bffb09b2e1620db4e99a9df96e38d939c341c7c43cd5191326fbdb4d42275",
"sha256:28cd002cf5a499e6e9bd69d992ffd501b8473948f3e97d6e075b774df1901e8e",
"sha256:2972c6c6a806e0c788f6ec39510abdb61b3a648fd141a5fa77becd2cc05ff551",
"sha256:2b4027b370cc46c4802ba32a979729209c0407d548723e809f19a50a9df27405",
"sha256:318c924e218be754427ce6bb4c630d9dcb5478eb00a8a3f8a0972086adc763b1",
"sha256:380accae56f8c9df99f34bc7e79d286fee37c3dd06b362c394b08ea96371b7c5",
"sha256:3c7784f9936292c9d3f92fc772d874edc071a16cd883ea0d997e5c4318f6362c",
"sha256:3ebd85fd6253abe89f852fc008294d490eb7a5f66913703148b8d263b048cc90",
"sha256:4126c8fe9d817ac3ab223ee5db41a09d0fa82dbd6bb59d207b6f7313d733f19b",
"sha256:4155f0ab246b6892110960f25989ab91073cd708b974f4732dca4d219a8be3e1",
"sha256:41f16267d8e6d916e06a6a1a0e151f643a6bab1277945a4bd494f359d4185dd2",
"sha256:4522f5d662d3ee55a072fad18e2af5dae480658d4e23b04b455c4b7542ce4327",
"sha256:46c900c807b0614c454ba89271ec6f59212403c54dc68ea493ab1ece2c510618",
"sha256:48291b25a904243f37c9aabbfed3eaba466c9a993f5f5946fe647163b7face07",
"sha256:5038a5e9e106087c117f0a7d6fd9d8a382b228da24bbd085b9f2b5d54ab11c3a",
"sha256:594a26bcf0cb720c16ac6db3fd4b3f411be756f9da7682f2f629089ff15aef18",
"sha256:59706135d3107939effe9f9263bd78c507f4abd7bfb96acc5a7f4176aa0a90d2",
"sha256:5a327d7581696c7a392a8f85cce7e54fa1303f5b79b3b2983abaab309b56cfd6",
"sha256:5eca8a45d38c916783c44e5da06a367b77234efa51d84dda8804654b99efecc9",
"sha256:5fa85f6789178ede5333568cbee5bac5fa9718d5f02406b65545e83368fa8fe9",
"sha256:65097e45ef7a942a9b92999b81d2e91fe80cbd0616215e625af39d2166692018",
"sha256:65cc9938cb9bd8862fc220e0719fd7f9c291d788f0a62bb8840820c46fa5a4d0",
"sha256:6a4c3607e2a0e66337d8ddf95ca7efe9b30ebf944119a4fb86503ea66f777263",
"sha256:72f11a136f148eb1218e7d1492749b8b5594302010db0cebd47423c4ac8c79ee",
"sha256:78b5a71de59e30c697a64c69fc48b032bb99c43b7437091b808a9ba20bb0235c",
"sha256:7b212edc9bf9d0c25cc3117483289b9e1a49a1ed134a02635baa987e9f0d89db",
"sha256:7e0f7045c420abdea249a28384baa846b87bad5c9f42af1957dc50c6e337fa1a",
"sha256:7e83cfec424f546dc3f0cc71896f8cc384a711f4116bc1abb0598302a9af3240",
"sha256:80c55bcc31d21bd07f7d1589e11f2ac1faf3359cf9f93026a1944ee76a40f954",
"sha256:863740d7f45adfd29b95658a680b16113721eaa89857c67e7e9573c61e87bbd8",
"sha256:88484b8c3f71dc9205d0d36da541e2cdcf4bc74474a2ee8d99c2e6411b659b89",
"sha256:8a08810e0bcc606d10cf1c5389c96fc92362244c0cf761358c495c2eb29df3dc",
"sha256:8c0637ae4fcb54d5c7fc9af24d348003b6f9dbaf7a06bf13f769d7b85903af39",
"sha256:8e9e3409338a42e3d4c30c224fdb678364542c77994f089fd6cc8131969eff48",
"sha256:902ea10ba85e014dc5d23a7bbb3ab70722349561e73783dd71571359e8867244",
"sha256:9533db74a2685169380db3db3ab59643453e7c486fffa9bf3ab60b73c4e174be",
"sha256:97f02ff49d1fa21308207a7743bec4fdd7aa90e8dd091539da660fc51e624c4d",
"sha256:9ea9a2a154dc7d8658930fa87cda0e6094235b5e130f037d9894eaf8722119a5",
"sha256:a0440d847b2c9986e4d27e8a59164714e5198530c69a5f9fb2e4620f9136d653",
"sha256:a6d39a27b542a781d691827b955d685d496fb6cccfc6eecc336a78b399032062",
"sha256:a7f4d3c478b1fcf412bf6c82914b02fed33ab359120df9172dda7bc855227461",
"sha256:ad297807bbdffce61b04e5e0c22f3c5d9e1905c1ee186f1f6d029f83bf0f18b8",
"sha256:add6778bb51efb80174937543754d2dfa0f4e504e7302d97896006a642c14f95",
"sha256:ae075ebf7bb5f48b3bd2fc9cd53346e4ff43e2515a4f822914bbc62a3cbd6e7e",
"sha256:b26fb439a7fbb522af63bbd781fbf51ec0c0659134a93f5bc8e9e68641df811e",
"sha256:b2bac59721d246939b21274229b9923aeae3db97b6118da739c658c17e110dd6",
"sha256:b314ad1f0667715e8d1b6197d5336ab579b13e801172721d62331bd40034a30c",
"sha256:b7317035875bd7c4705e2566848b2043b78e18f2f5675ea651f9f7805b5589eb",
"sha256:b8e936e620e5f336a207e08c0da9dace5d4dbcc8e64743ab1acaa77a64bbf060",
"sha256:b906da4e9a7ba4ec33ed2f7238343866932c1a6f84944c804252b2922708d0ee",
"sha256:ba690e4e33c360fcf0b8411ca90f8b9cc595e8deddd6a25a9a75a725b698cd6a",
"sha256:bb14da3d63da994c34cfa47cde469df8013ddf5f575455a22530c8c4a0ed8616",
"sha256:bbc2e1632f4a61fa171ddab3bc8368fb8475e7ce68733ca92fec862fdd8e0f60",
"sha256:bbdd3c896db09993b7879cd35e56da6ed8918d161d6e80f9d9c40d78d34e4784",
"sha256:bcaaa8e542cb7e1962d0a58ce6a25f6b4b6ca2e5ce743155fc1f6eb2fea52574",
"sha256:bee682ab1005aff597946234e47c95fcf0f44d2b1f38075f0aba26bbc4e7545a",
"sha256:bfec6543d60c57e7543d9cbccdd5dfcf562f2c05cd6b814df68108a20794e254",
"sha256:c2e50baf7be8831524a87beec6c1873539519a1948f907dc3d4b9be27ebacb80",
"sha256:c6c79a6138be017d85f3bab1df735669b669a38f9b3ff646a1f179afbacb7b63",
"sha256:c702fb7c8bfd87c9ce9c8bddfc9a5796a492bab35a52b1693adee413721e32f2",
"sha256:c9ba1725826f6571a6e4c1561bb1613711f0058b91927a147dc42c637ba087d9",
"sha256:cf205ac52cb6b45745c0a4891cdb6e709c10ad5b034aa736aff561fc4ce9828c",
"sha256:d0d03fc67499ee90feedfa2add4aaa1c091a7bf333535d847b10fffe390e58fe",
"sha256:d118d63f08fd6ac285cb8166e96c992a6ed0e7a1644e8790c39070b18779e688",
"sha256:d24c09f397c3ce55f20e0250da7ba5b0e5249cb5d21465e71ec15154a3a7e8e0",
"sha256:d41735c7a646dae8612e0552dfc53f45807eeb54364dfb1f0a65ac274bc56b3a",
"sha256:dd1696d91f2a37cece9bd22e507e7be7c37c59ecc61fd15f0d0f31e3b6888957",
"sha256:dfcad9c63a893c95ba1149481b9680ce68dd71211f08df0073ee62700790bc97",
"sha256:e384782608837d9aaf123e413679883091744664a2cd76f0ad0e0a1f12facc57",
"sha256:e5ea0abea338c617b753082f36f64c70ade853d88e91ab5732b301ae8ed16e3f",
"sha256:e6ff81c570413bcc35f1c16850eb66e2493a3259e68efe8672376533d2c82d38",
"sha256:e88951ad2831880405f3f055ab12a6aa72696c20a2815128eeccdc3bf914cd78",
"sha256:e98e16b6ce531b12100c01daac922e8ec5b991832a5f58003f13b7d45ea82dc0",
"sha256:eb0fd32e8e433797499571447d9f975b4744be79c0a3339413868d79517231ed",
"sha256:ee74a73e1f9e16b71f67329e99bb58aa4af9a2c3c4b3a5db9f26e92e7c39e161",
"sha256:f15ec5f825c283a5aa427d78759ab8f84e7b5441d15cfff476b548bce3764666",
"sha256:f296c7fe928ce0e29e313f85c43a5ab80542e096e1163c2605b8cc18aa2aff2b",
"sha256:f32df1b19f773bb41382e8b215955d248c9766e3d6ff5a1dd89709e7d96e4685",
"sha256:f3ed67279a4b317a808ac743d3a915f74187530c5f3d9c859e5d04d475b8c174",
"sha256:f5b972ca514898fb7131671c425a62ca38fdae2a8d6296e4b605ec8202349f8c",
"sha256:f961086c0dbba6c00cbd5c5b5646247efd0d0a4044444bfaa9efc7a6ba5e96a5",
"sha256:f9bd7d7a449667d6f17edd9045ec82a4ed2767afb91743d3d0b18c376a56dfe2",
"sha256:fbac4c8ffadb685189efa92fafdb2f5392e9cbd262eae3818bcdb1bd19acaaf2",
"sha256:fc43c8276d0a7c7b76f31d4f3f80f9eb820673628f1411770a70029c1d5f6a75",
"sha256:fcfded324f0710632e22050a2fd7b56b1cbcb2d21001630bcc26d536f54bffec",
"sha256:ff435abdcbfdf4a070f488830cd53aef77cf8649d0fd8ed76bf27d9566e80e78"
],
"markers": "python_version >= '3.6'",
"version": "==0.20.7"
},
"lxml": {
"hashes": [
"sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318",
@ -1215,13 +1096,6 @@
"index": "pypi",
"version": "==0.5.0"
},
"python-levenshtein": {
"hashes": [
"sha256:88a58b95e3340a918489dac0c78f731323c0a4d8f5564f839ffea80155574e77",
"sha256:9228af5523f797f0798f045dc4a95ed1f46df72bc2186e52b530a33998a51b37"
],
"version": "==0.20.7"
},
"python-magic": {
"hashes": [
"sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b",

View File

@ -142,14 +142,14 @@ def matches(matching_model, document):
return bool(match)
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
from fuzzywuzzy import fuzz
from rapidfuzz import fuzz
match = re.sub(r"[^\w\s]", "", matching_model.match)
text = re.sub(r"[^\w\s]", "", document_content)
if matching_model.is_insensitive:
match = match.lower()
text = text.lower()
if fuzz.partial_ratio(match, text) >= 90:
if fuzz.partial_ratio(match, text, score_cutoff=90):
# TODO: make this better
log_reason(
matching_model,