From 5a1ef27224bcb5f14319527d33ed81bee4445be0 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 22 Aug 2024 20:32:02 -0700 Subject: [PATCH] Fix: fix nltk tokenizer breaking change (#7522) --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 57c584327..2a9d7b306 100644 --- a/Dockerfile +++ b/Dockerfile @@ -246,7 +246,7 @@ RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \ && echo "Installing NLTK data" \ && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \ && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \ - && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt \ + && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt_tab \ && echo "Cleaning up image" \ && apt-get --yes purge ${BUILD_PACKAGES} \ && apt-get --yes autoremove --purge \