more tests of the new parser

This commit is contained in:
Jonas Winkler
2020-11-26 00:08:23 +01:00
parent bd0db57604
commit f901def797
10 changed files with 146 additions and 10 deletions

View File

@@ -160,7 +160,9 @@ def strip_excess_whitespace(text):
r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
no_trailing_whitespace = re.sub(
r"([^\S\n\r]+)$", '', no_leading_whitespace)
return no_trailing_whitespace
# TODO: this needs a rework
return no_trailing_whitespace.strip()
def get_text_from_pdf(pdf_file):