diff --git a/ocr/README.md b/ocr/README.md new file mode 100644 index 0000000..9e6c1f6 --- /dev/null +++ b/ocr/README.md @@ -0,0 +1,32 @@ +"""Get image timestamps from Swift camera""" + +import os +from glob import glob +from datetime import datetime +from tqdm import tqdm +from PIL import Image +import pandas as pd +import pytesseract + +input_dir = 'jpg' + +jpg_names = glob(os.path.join(input_dir, '*.jpg')) +dates = [] + +for jpg_name in tqdm(jpg_names): + im = Image.open(jpg_name) + + # Crop image + w, h = im.size + im = im.crop((w - 550, h - 40, w, h)) + + # Perform OCR with tesseract + text = pytesseract.image_to_string(im, lang='eng') + + # Convert to datetime object + dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S')) + +# Save as csv +df = pd.DataFrame(data=dates, index=jpg_names, columns=['date']) +df.index.name = 'file' +df.to_csv('image-dates.csv') diff --git a/ocr/image-dates.csv b/ocr/image-dates.csv new file mode 100644 index 0000000..09f95b2 --- /dev/null +++ b/ocr/image-dates.csv @@ -0,0 +1,13 @@ +file,date +jpg\0000-SYCR0314.jpg,2019-04-25 07:00:02 +jpg\0000-SYCR0315.jpg,2019-04-25 08:00:02 +jpg\0000-SYCR0316.jpg,2019-04-25 09:00:02 +jpg\0000-SYCR0317.jpg,2019-04-25 10:00:02 +jpg\0000-SYCR0318.jpg,2019-04-25 11:00:02 +jpg\0000-SYCR0319.jpg,2019-04-25 12:00:01 +jpg\0000-SYCR0320.jpg,2019-04-25 13:00:01 +jpg\0000-SYCR0321.jpg,2019-04-25 14:00:01 +jpg\0000-SYCR0322.jpg,2019-04-25 15:00:01 +jpg\0000-SYCR0323.jpg,2019-04-25 16:00:01 +jpg\0000-SYCR0324.jpg,2019-04-25 17:00:01 +jpg\0000-SYCR0325.jpg,2019-04-26 07:00:02 diff --git a/ocr/jpg/0000-SYCR0314.jpg b/ocr/jpg/0000-SYCR0314.jpg new file mode 100644 index 0000000..8061a4d Binary files /dev/null and b/ocr/jpg/0000-SYCR0314.jpg differ diff --git a/ocr/jpg/0000-SYCR0315.jpg b/ocr/jpg/0000-SYCR0315.jpg new file mode 100644 index 0000000..8a4559b Binary files /dev/null and b/ocr/jpg/0000-SYCR0315.jpg differ diff --git a/ocr/swift_ocr.py b/ocr/swift_ocr.py new file mode 100644 index 0000000..9e6c1f6 --- /dev/null +++ b/ocr/swift_ocr.py @@ -0,0 +1,32 @@ +"""Get image timestamps from Swift camera""" + +import os +from glob import glob +from datetime import datetime +from tqdm import tqdm +from PIL import Image +import pandas as pd +import pytesseract + +input_dir = 'jpg' + +jpg_names = glob(os.path.join(input_dir, '*.jpg')) +dates = [] + +for jpg_name in tqdm(jpg_names): + im = Image.open(jpg_name) + + # Crop image + w, h = im.size + im = im.crop((w - 550, h - 40, w, h)) + + # Perform OCR with tesseract + text = pytesseract.image_to_string(im, lang='eng') + + # Convert to datetime object + dates.append(datetime.strptime(text, '%d/%m/%Y %H:%M:%S')) + +# Save as csv +df = pd.DataFrame(data=dates, index=jpg_names, columns=['date']) +df.index.name = 'file' +df.to_csv('image-dates.csv')