From f7cd6974c5cf0202328b175d45289ebdee0bbb33 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Thu, 15 Sep 2022 19:40:21 -0700 Subject: [PATCH] Mock out the nltk portions so the data doesn't need to be downloaded --- .github/workflows/ci.yml | 3 --- src/documents/tests/test_classifier.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49efc7ef4..ae2e30d6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -125,9 +125,6 @@ jobs: name: Install Python dependencies run: | pipenv sync --dev - pipenv run python3 -m nltk.downloader snowball_data - pipenv run python3 -m nltk.downloader stopwords - pipenv run python3 -m nltk.downloader punkt - name: List installed Python dependencies run: | diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index cfa662c02..8daaafc07 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -1,9 +1,9 @@ import os +import re import tempfile from pathlib import Path from unittest import mock -import documents import pytest from django.conf import settings from django.test import override_settings @@ -20,10 +20,19 @@ from documents.models import Tag from documents.tests.utils import DirectoriesMixin +def dummy_preprocess(content: str): + content = content.lower().strip() + content = re.sub(r"\s+", " ", content) + return content + + class TestClassifier(DirectoriesMixin, TestCase): def setUp(self): super().setUp() self.classifier = DocumentClassifier() + self.classifier.preprocess_content = mock.MagicMock( + side_effect=dummy_preprocess, + ) def generate_test_data(self): self.c1 = Correspondent.objects.create( @@ -192,6 +201,8 @@ class TestClassifier(DirectoriesMixin, TestCase): new_classifier = DocumentClassifier() new_classifier.load() + new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess) + self.assertFalse(new_classifier.train()) # @override_settings( @@ -215,6 +226,7 @@ class TestClassifier(DirectoriesMixin, TestCase): new_classifier = DocumentClassifier() new_classifier.load() + new_classifier.preprocess_content = mock.MagicMock(side_effect=dummy_preprocess) self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])