Add ocr
parent
65119499ae
commit
aeb5135746
@ -0,0 +1,32 @@
|
|||||||
|
"""Get image timestamps from Swift camera"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from glob import glob
|
||||||
|
from datetime import datetime
|
||||||
|
from tqdm import tqdm
|
||||||
|
from PIL import Image
|
||||||
|
import pandas as pd
|
||||||
|
import pytesseract
|
||||||
|
|
||||||
|
input_dir = 'jpg'
|
||||||
|
|
||||||
|
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
||||||
|
dates = []
|
||||||
|
|
||||||
|
for jpg_name in tqdm(jpg_names):
|
||||||
|
im = Image.open(jpg_name)
|
||||||
|
|
||||||
|
# Crop image
|
||||||
|
w, h = im.size
|
||||||
|
im = im.crop((w - 550, h - 40, w, h))
|
||||||
|
|
||||||
|
# Perform OCR with tesseract
|
||||||
|
text = pytesseract.image_to_string(im, lang='eng')
|
||||||
|
|
||||||
|
# Convert to datetime object
|
||||||
|
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
||||||
|
|
||||||
|
# Save as csv
|
||||||
|
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
||||||
|
df.index.name = 'file'
|
||||||
|
df.to_csv('image-dates.csv')
|
@ -0,0 +1,13 @@
|
|||||||
|
file,date
|
||||||
|
jpg\0000-SYCR0314.jpg,2019-04-25 07:00:02
|
||||||
|
jpg\0000-SYCR0315.jpg,2019-04-25 08:00:02
|
||||||
|
jpg\0000-SYCR0316.jpg,2019-04-25 09:00:02
|
||||||
|
jpg\0000-SYCR0317.jpg,2019-04-25 10:00:02
|
||||||
|
jpg\0000-SYCR0318.jpg,2019-04-25 11:00:02
|
||||||
|
jpg\0000-SYCR0319.jpg,2019-04-25 12:00:01
|
||||||
|
jpg\0000-SYCR0320.jpg,2019-04-25 13:00:01
|
||||||
|
jpg\0000-SYCR0321.jpg,2019-04-25 14:00:01
|
||||||
|
jpg\0000-SYCR0322.jpg,2019-04-25 15:00:01
|
||||||
|
jpg\0000-SYCR0323.jpg,2019-04-25 16:00:01
|
||||||
|
jpg\0000-SYCR0324.jpg,2019-04-25 17:00:01
|
||||||
|
jpg\0000-SYCR0325.jpg,2019-04-26 07:00:02
|
|
Binary file not shown.
After Width: | Height: | Size: 701 KiB |
Binary file not shown.
After Width: | Height: | Size: 947 KiB |
@ -0,0 +1,32 @@
|
|||||||
|
"""Get image timestamps from Swift camera"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from glob import glob
|
||||||
|
from datetime import datetime
|
||||||
|
from tqdm import tqdm
|
||||||
|
from PIL import Image
|
||||||
|
import pandas as pd
|
||||||
|
import pytesseract
|
||||||
|
|
||||||
|
input_dir = 'jpg'
|
||||||
|
|
||||||
|
jpg_names = glob(os.path.join(input_dir, '*.jpg'))
|
||||||
|
dates = []
|
||||||
|
|
||||||
|
for jpg_name in tqdm(jpg_names):
|
||||||
|
im = Image.open(jpg_name)
|
||||||
|
|
||||||
|
# Crop image
|
||||||
|
w, h = im.size
|
||||||
|
im = im.crop((w - 550, h - 40, w, h))
|
||||||
|
|
||||||
|
# Perform OCR with tesseract
|
||||||
|
text = pytesseract.image_to_string(im, lang='eng')
|
||||||
|
|
||||||
|
# Convert to datetime object
|
||||||
|
dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S'))
|
||||||
|
|
||||||
|
# Save as csv
|
||||||
|
df = pd.DataFrame(data=dates, index=jpg_names, columns=['date'])
|
||||||
|
df.index.name = 'file'
|
||||||
|
df.to_csv('image-dates.csv')
|
Loading…
Reference in New Issue