Update readme

master
Dan Howe 5 years ago
parent 60de9ebb5c
commit 86623007bd

@ -1,32 +1,12 @@
"""Get image timestamps from Swift camera""" # ocr
import os Use `pytesseract` (python wrapper for Google's `tesseract` OCR engine) to extract timestamps from images captured with Swift camera, which do not contain exif metadata.
from glob import glob
from datetime import datetime
from tqdm import tqdm
from PIL import Image
import pandas as pd
import pytesseract
input_dir = 'jpg' ## installation
jpg_names = glob(os.path.join(input_dir, '*.jpg')) `tesseract` must be installed, and in your path.
dates = [] https://github.com/tesseract-ocr/tesseract/wiki
for jpg_name in tqdm(jpg_names): ```
im = Image.open(jpg_name) pip install pytesseract
```
# Crop image
w, h = im.size
im = im.crop((w - 550, h - 40, w, h))
# Perform OCR with tesseract
text = pytesseract.image_to_string(im, lang='eng')
# Convert to datetime object
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
# Save as csv
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
df.index.name = 'file'
df.to_csv('image-dates.csv')

Loading…
Cancel
Save