Add ocr
parent
65119499ae
commit
aeb5135746
@ -0,0 +1,32 @@
|
||||
"""Get image timestamps from Swift camera"""
|
||||
|
||||
import os
|
||||
from glob import glob
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
import pytesseract
|
||||
|
||||
input_dir = 'jpg'
|
||||
|
||||
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
||||
dates = []
|
||||
|
||||
for jpg_name in tqdm(jpg_names):
|
||||
im = Image.open(jpg_name)
|
||||
|
||||
# Crop image
|
||||
w, h = im.size
|
||||
im = im.crop((w - 550, h - 40, w, h))
|
||||
|
||||
# Perform OCR with tesseract
|
||||
text = pytesseract.image_to_string(im, lang='eng')
|
||||
|
||||
# Convert to datetime object
|
||||
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
||||
|
||||
# Save as csv
|
||||
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
||||
df.index.name = 'file'
|
||||
df.to_csv('image-dates.csv')
|
@ -0,0 +1,13 @@
|
||||
file,date
|
||||
jpg\0000-SYCR0314.jpg,2019-04-25 07:00:02
|
||||
jpg\0000-SYCR0315.jpg,2019-04-25 08:00:02
|
||||
jpg\0000-SYCR0316.jpg,2019-04-25 09:00:02
|
||||
jpg\0000-SYCR0317.jpg,2019-04-25 10:00:02
|
||||
jpg\0000-SYCR0318.jpg,2019-04-25 11:00:02
|
||||
jpg\0000-SYCR0319.jpg,2019-04-25 12:00:01
|
||||
jpg\0000-SYCR0320.jpg,2019-04-25 13:00:01
|
||||
jpg\0000-SYCR0321.jpg,2019-04-25 14:00:01
|
||||
jpg\0000-SYCR0322.jpg,2019-04-25 15:00:01
|
||||
jpg\0000-SYCR0323.jpg,2019-04-25 16:00:01
|
||||
jpg\0000-SYCR0324.jpg,2019-04-25 17:00:01
|
||||
jpg\0000-SYCR0325.jpg,2019-04-26 07:00:02
|
|
Binary file not shown.
After Width: | Height: | Size: 701 KiB |
Binary file not shown.
After Width: | Height: | Size: 947 KiB |
@ -0,0 +1,32 @@
|
||||
"""Get image timestamps from Swift camera"""
|
||||
|
||||
import os
|
||||
from glob import glob
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
import pytesseract
|
||||
|
||||
input_dir = 'jpg'
|
||||
|
||||
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
||||
dates = []
|
||||
|
||||
for jpg_name in tqdm(jpg_names):
|
||||
im = Image.open(jpg_name)
|
||||
|
||||
# Crop image
|
||||
w, h = im.size
|
||||
im = im.crop((w - 550, h - 40, w, h))
|
||||
|
||||
# Perform OCR with tesseract
|
||||
text = pytesseract.image_to_string(im, lang='eng')
|
||||
|
||||
# Convert to datetime object
|
||||
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
||||
|
||||
# Save as csv
|
||||
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
||||
df.index.name = 'file'
|
||||
df.to_csv('image-dates.csv')
|
Loading…
Reference in New Issue