Update readme

master
Dan Howe 5 years ago
parent 60de9ebb5c
commit 86623007bd

@ -1,32 +1,12 @@
"""Get image timestamps from Swift camera"""
# ocr
import os
from glob import glob
from datetime import datetime
from tqdm import tqdm
from PIL import Image
import pandas as pd
import pytesseract
Use `pytesseract` (python wrapper for Google's `tesseract` OCR engine) to extract timestamps from images captured with Swift camera, which do not contain exif metadata.
input_dir = 'jpg'
## installation
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
dates = []
`tesseract` must be installed, and in your path.
https://github.com/tesseract-ocr/tesseract/wiki
for jpg_name in tqdm(jpg_names):
im = Image.open(jpg_name)
# Crop image
w, h = im.size
im = im.crop((w - 550, h - 40, w, h))
# Perform OCR with tesseract
text = pytesseract.image_to_string(im, lang='eng')
# Convert to datetime object
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
# Save as csv
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
df.index.name = 'file'
df.to_csv('image-dates.csv')
```
pip install pytesseract
```

Loading…
Cancel
Save