From 6d2d7e3a41c7da1ccbea4c9d4c05b4f041cc65ef Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Wed, 11 Apr 2018 14:13:35 +1000 Subject: [PATCH] Replace newlines in pdf text with spaces for improved legibility --- pdfsearch/pdfsearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfsearch/pdfsearch.py b/pdfsearch/pdfsearch.py index 14b41f7..35bfd4e 100644 --- a/pdfsearch/pdfsearch.py +++ b/pdfsearch/pdfsearch.py @@ -79,7 +79,7 @@ def search_pdf(pdf_name, search_patterns, context_length): page_num = i + 1 page = reader.getPage(i) try: - page_text = page.extractText().replace('\n', '') + page_text = page.extractText().replace('\n', ' ') # Skip page if text cannot be read except (KeyError, ZLibError): continue