python-snippets/ocr/swift_ocr.py

33 lines
734 B
Python

"""Get image timestamps from Swift camera"""
import os
from glob import glob
from datetime import datetime
from tqdm import tqdm
from PIL import Image
import pandas as pd
import pytesseract
input_dir = 'jpg'
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
dates = []
for jpg_name in tqdm(jpg_names):
im = Image.open(jpg_name)
# Crop image
w, h = im.size
im = im.crop((w - 550, h - 40, w, h))
# Perform OCR with tesseract
text = pytesseract.image_to_string(im, lang='eng')
# Convert to datetime object
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
# Save as csv
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
df.index.name = 'file'
df.to_csv('image-dates.csv')