diff --git a/README.md b/README.md index a94b01b..43e5a7a 100644 --- a/README.md +++ b/README.md @@ -72,4 +72,17 @@ Script Logic: For every site in oneDrive CoastSnap directory, iterate through th * Starts tagging from the most recent image and stops for the site when an image has already been tagged. This way, the user can manually remove bad registered/tagged images, and they will not be automatically replaced. * Retrieves tide data for the site from the .mat file specified in Database/CoastSnapDB.xlsx +### Statistics +Run `generate_statistics_csv.bat` + +Generates `statistics.csv` which contains information about the Images directory. Columns include: + +site | # processed | # photoshop | # registered | stability | Most recent image deleted + +* stability = # registered / # processed. This formula is based on the assumption that someone will manually remove poorly registered images in `Images/Registered`. Thus stability represents the percentage of images that had good registration. + +## Future Improvements + +### Image metadata +Currently (22/6/22) it would appear that images downloaded from Spotteron do not retain the images' metadata. This is based on looking in windows file explorer image->properties, as well as using the exif python package. Note: The metadata presented in file explorer is IPTC data. There is a python package to interact with this data, but I had issues with it. diff --git a/coastsnap/folder_stats.py b/coastsnap/folder_stats.py new file mode 100644 index 0000000..785fcad --- /dev/null +++ b/coastsnap/folder_stats.py @@ -0,0 +1,104 @@ +import os +import pandas as pd +from pathlib import Path +from time import strptime + +code_dir = str(Path(os.getcwd()).parent) +sites_csv_path = os.path.join(code_dir, "coastsnap_sites.csv") +coastsnap_sites_csv = pd.read_csv(sites_csv_path) +images_parent_dir = coastsnap_sites_csv.parent_directory[0] +images_dir = os.path.join(images_parent_dir, "Images") + +stats_csv = pd.DataFrame(columns = ['site','# processed', '# photoshop', '# registered', 'stability', 'most recently deleted'] ) + + +for site in os.listdir(images_dir): # Loop through SITES + i=0 + to_append = [site, 0, 0, 0, 0, 0] + processed = False + photoshop = False + registered = False + latest_image_found = False + site_path = os.path.join(images_dir, site) + processed_path = os.path.join(site_path,'Processed') + photoshop_path = os.path.join(site_path,'Photoshop') + registered_path = os.path.join(site_path, 'Registered') + try: # Check if site contains 'Processed' directory + processed_years_list = os.listdir(processed_path) + processed_years_list = [x for x in processed_years_list if len(x) == 4] # remove files that aren't years + processed_years_list.reverse() + processed = True + except: + continue + + try: # Check if site contains 'Processed' directory + photoshop_years_list = os.listdir(photoshop_path) + photoshop_years_list = [x for x in photoshop_years_list if len(x) == 4] # remove files that aren't years + photoshop_years_list.reverse() + photoshop = True + except: + continue + + try: # Check if site contains 'Processed' directory + registered_years_list = os.listdir(registered_path) + registered_years_list = [x for x in registered_years_list if len(x) == 4] # remove files that aren't years + registered_years_list.reverse() + registered = True + except: + continue + + if processed: + i=0 + for year in processed_years_list: # Loop through YEARS + processed_year_path = os.path.join(processed_path, year) + processed_image_list = os.listdir(processed_year_path) + processed_image_list.reverse() + for image_filename in processed_image_list: # Loop through IMAGES + i += 1 + to_append[1] = i + + if photoshop: + i=0 + for year in photoshop_years_list: # Loop through YEARS + year_path = os.path.join(photoshop_path, year) + image_list = os.listdir(year_path) + image_list.reverse() + for image_filename in image_list: # Loop through IMAGES + + year_path = year_path.replace('Photoshop', 'Registered') + registered_image_path = year_path + '/' + image_filename[:-4] + '_registered.jpg' + if image_filename.endswith('.jpg') and not os.path.isfile(registered_image_path) and not latest_image_found: + latest_image_found = True + filename_list = image_filename.split(".") + date = filename_list[3].split("_") + image_date = date[0] + '-' + '{:02d}'.format(strptime(filename_list[2],'%b').tm_mon) +'-'+ filename_list[5] + + to_append[5] = image_date + print(site) + print(image_filename) + i += 1 + to_append[2] = i + + if registered: + i=0 + for year in registered_years_list: # Loop through YEARS + registered_year_path = os.path.join(registered_path, year) + registered_image_list = os.listdir(registered_year_path) + registered_image_list.reverse() + for image_filename in registered_image_list: # Loop through IMAGES + i += 1 + to_append[3] = i + + stats_csv_length = len(stats_csv) + stats_csv.loc[stats_csv_length] = to_append + + +# Add site stability data (# registered / # processed) +for i, row in stats_csv.iterrows(): + stability = "{0:.0%}".format(stats_csv.at[i, '# registered'] / stats_csv.at[i, '# processed']) + stats_csv.at[i,'stability'] = stability + +stats_csv.set_index('site', inplace = True) + +output_file_path = os.path.join(code_dir, 'statistics.csv') +stats_csv.to_csv(output_file_path) diff --git a/coastsnap/generate_statistics_csv.bat b/coastsnap/generate_statistics_csv.bat new file mode 100644 index 0000000..9316b29 --- /dev/null +++ b/coastsnap/generate_statistics_csv.bat @@ -0,0 +1,3 @@ +call activate coastsnap +python "%~dp0folder_stats.py" +call conda deactivate \ No newline at end of file diff --git a/coastsnap/spotteron_batch_download.py b/coastsnap/spotteron_batch_download.py index 928fbcf..85ed5e8 100644 --- a/coastsnap/spotteron_batch_download.py +++ b/coastsnap/spotteron_batch_download.py @@ -6,7 +6,7 @@ import datetime import re from pathlib import Path from urllib.parse import urljoin -from os import path, makedirs +from os import path, makedirs, getcwd import attr import pytz @@ -17,10 +17,9 @@ from loguru import logger from timezonefinder import TimezoneFinder from werkzeug.utils import secure_filename -#app = typer.Typer() -coastsnap_sites = pd.read_csv("C:/Users/z5079346/OneDrive - UNSW/Projects/Coastsnap_test/CoastSnap_Sites.csv") - - +code_images_dir = str(Path(getcwd()).parent) +coastsnap_sites_path = path.join(code_images_dir, "coastsnap_sites.csv") +coastsnap_sites = pd.read_csv(coastsnap_sites_path) @attr.s() class SpotteronImage: diff --git a/statistics.csv b/statistics.csv new file mode 100644 index 0000000..9628e84 --- /dev/null +++ b/statistics.csv @@ -0,0 +1,35 @@ +site,# processed,# photoshop,# registered,stability,most recently deleted +alex,75,74,73,97%,25-08-2021 +birubi,65,64,0,0%,12-06-2022 +blacksmiths,1581,1577,1381,87%,30-04-2022 +broulee,190,191,93,49%,14-06-2022 +buddina,117,114,85,73%,15-06-2022 +burleigh,245,242,0,0%,17-06-2022 +byron,1255,1329,676,54%,13-06-2022 +cathieillaroo,97,103,66,68%,16-06-2022 +cathielagoon,74,75,73,99%,10-04-2022 +coolum,80,36,22,28%,13-06-2022 +cooya,55,46,0,0%,15-11-2021 +cowbay,34,34,0,0%,16-06-2022 +era,56,56,45,80%,14-04-2022 +fourmile,244,247,115,47%,01-06-2022 +frankston,191,189,157,82%,16-06-2022 +garie,48,48,0,0%,13-06-2022 +hungry,128,128,92,72%,20-05-2022 +macsnth,73,72,0,0%,13-06-2022 +macssth,60,59,0,0%,13-06-2022 +manly,1167,1262,1122,96%,14-06-2022 +moffat,114,131,215,189%,09-03-2022 +newell,23,34,0,0%,27-04-2022 +nthnarra,2315,2444,1097,47%,16-06-2022 +queenscliff,85,79,0,0%,14-06-2022 +rainbow,50,50,0,0%,24-04-2022 +seaford,69,57,0,0%,11-06-2022 +shortpoint,222,222,0,0%,06-06-2022 +stockton1,262,261,33,13%,24-05-2022 +stockton2,214,213,62,29%,11-05-2022 +stockton3,257,256,69,27%,15-05-2022 +tomakin,211,211,142,67%,13-06-2022 +tugun,288,289,132,46%,01-06-2022 +wamberal,372,366,0,0%,15-06-2022 +wonga,46,46,0,0%,30-05-2022 diff --git a/workflow.pptx b/workflow.pptx index 2a49ae1..cadca3c 100644 Binary files a/workflow.pptx and b/workflow.pptx differ