Update readme
parent
60de9ebb5c
commit
86623007bd
@ -1,32 +1,12 @@
|
||||
"""Get image timestamps from Swift camera"""
|
||||
# ocr
|
||||
|
||||
import os
|
||||
from glob import glob
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
import pytesseract
|
||||
Use `pytesseract` (python wrapper for Google's `tesseract` OCR engine) to extract timestamps from images captured with Swift camera, which do not contain exif metadata.
|
||||
|
||||
input_dir = 'jpg'
|
||||
## installation
|
||||
|
||||
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
||||
dates = []
|
||||
`tesseract` must be installed, and in your path.
|
||||
https://github.com/tesseract-ocr/tesseract/wiki
|
||||
|
||||
for jpg_name in tqdm(jpg_names):
|
||||
im = Image.open(jpg_name)
|
||||
|
||||
# Crop image
|
||||
w, h = im.size
|
||||
im = im.crop((w - 550, h - 40, w, h))
|
||||
|
||||
# Perform OCR with tesseract
|
||||
text = pytesseract.image_to_string(im, lang='eng')
|
||||
|
||||
# Convert to datetime object
|
||||
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
||||
|
||||
# Save as csv
|
||||
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
||||
df.index.name = 'file'
|
||||
df.to_csv('image-dates.csv')
|
||||
```
|
||||
pip install pytesseract
|
||||
```
|
||||
|
Loading…
Reference in New Issue