From 01cba802c857b89ddb0c9d7e4c27d2b8ab10f19f Mon Sep 17 00:00:00 2001 From: Dan Howe Date: Wed, 11 Apr 2018 10:27:49 +1000 Subject: [PATCH] Update docstring to explain case sensitivity --- pdfsearch/pdfsearch.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pdfsearch/pdfsearch.py b/pdfsearch/pdfsearch.py index 1c7d403..6ae045a 100644 --- a/pdfsearch/pdfsearch.py +++ b/pdfsearch/pdfsearch.py @@ -43,6 +43,16 @@ def search_string(pattern, string, match_object, context_length): def search_pdf(pdf_name, search_patterns, context_length): """Search for text strings inside a pdf. + Search patterns should be lowercase, unless case-sensitivity is important. + + 'ph' will match: + - photograph + - PHANTOM + - pH + + 'pH' will match: + - pH + Args: pdf_name: path to pdf file search_patterns: list of re search patterns @@ -80,7 +90,7 @@ def search_pdf(pdf_name, search_patterns, context_length): matches = re.finditer(pattern, page_text.lower()) else: matches = re.finditer(pattern, page_text) - + for match in matches: whole_word, context = search_string( pattern, page_text, match, context_length)