Update readme
parent
60de9ebb5c
commit
86623007bd
@ -1,32 +1,12 @@
|
|||||||
"""Get image timestamps from Swift camera"""
|
# ocr
|
||||||
|
|
||||||
import os
|
Use `pytesseract` (python wrapper for Google's `tesseract` OCR engine) to extract timestamps from images captured with Swift camera, which do not contain exif metadata.
|
||||||
from glob import glob
|
|
||||||
from datetime import datetime
|
|
||||||
from tqdm import tqdm
|
|
||||||
from PIL import Image
|
|
||||||
import pandas as pd
|
|
||||||
import pytesseract
|
|
||||||
|
|
||||||
input_dir = 'jpg'
|
## installation
|
||||||
|
|
||||||
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
`tesseract` must be installed, and in your path.
|
||||||
dates = []
|
https://github.com/tesseract-ocr/tesseract/wiki
|
||||||
|
|
||||||
for jpg_name in tqdm(jpg_names):
|
```
|
||||||
im = Image.open(jpg_name)
|
pip install pytesseract
|
||||||
|
```
|
||||||
# Crop image
|
|
||||||
w, h = im.size
|
|
||||||
im = im.crop((w - 550, h - 40, w, h))
|
|
||||||
|
|
||||||
# Perform OCR with tesseract
|
|
||||||
text = pytesseract.image_to_string(im, lang='eng')
|
|
||||||
|
|
||||||
# Convert to datetime object
|
|
||||||
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
|
||||||
|
|
||||||
# Save as csv
|
|
||||||
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
|
||||||
df.index.name = 'file'
|
|
||||||
df.to_csv('image-dates.csv')
|
|
||||||
|
Loading…
Reference in New Issue