more tests of the new parser

This commit is contained in:
Jonas Winkler
2020-11-26 00:08:23 +01:00
parent 39fa02dcb1
commit e87575240d
10 changed files with 146 additions and 10 deletions

View File

@@ -160,7 +160,9 @@ def strip_excess_whitespace(text):
r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
no_trailing_whitespace = re.sub(
r"([^\S\n\r]+)$", '', no_leading_whitespace)
return no_trailing_whitespace
# TODO: this needs a rework
return no_trailing_whitespace.strip()
def get_text_from_pdf(pdf_file):