Fix: fix nltk tokenizer breaking change (#7522)

This commit is contained in:
shamoon 2024-08-22 20:32:02 -07:00 committed by GitHub
parent 6f79ee9877
commit 5a1ef27224
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -246,7 +246,7 @@ RUN --mount=type=cache,target=/root/.cache/pip/,id=pip-cache \
&& echo "Installing NLTK data" \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt_tab \
&& echo "Cleaning up image" \
&& apt-get --yes purge ${BUILD_PACKAGES} \
&& apt-get --yes autoremove --purge \