diff --git a/ocr/README.md b/ocr/README.md index 9e6c1f6..89d397c 100644 --- a/ocr/README.md +++ b/ocr/README.md @@ -1,32 +1,12 @@ -"""Get image timestamps from Swift camera""" +# ocr -import os -from glob import glob -from datetime import datetime -from tqdm import tqdm -from PIL import Image -import pandas as pd -import pytesseract +Use `pytesseract` (python wrapper for Google's `tesseract` OCR engine) to extract timestamps from images captured with Swift camera, which do not contain exif metadata. -input_dir = 'jpg' +## installation -jpg_names = glob(os.path.join(input_dir, '*.jpg')) -dates = [] +`tesseract` must be installed, and in your path. + https://github.com/tesseract-ocr/tesseract/wiki -for jpg_name in tqdm(jpg_names): - im = Image.open(jpg_name) - - # Crop image - w, h = im.size - im = im.crop((w - 550, h - 40, w, h)) - - # Perform OCR with tesseract - text = pytesseract.image_to_string(im, lang='eng') - - # Convert to datetime object - dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S')) - -# Save as csv -df = pd.DataFrame(data=dates, index=jpg_names, columns=['date']) -df.index.name = 'file' -df.to_csv('image-dates.csv') +``` +pip install pytesseract +```