diff --git a/coastsnap/folder_stats.py b/coastsnap/folder_stats.py new file mode 100644 index 0000000..254e493 --- /dev/null +++ b/coastsnap/folder_stats.py @@ -0,0 +1,115 @@ +import os +import pandas as pd +from pathlib import Path +from time import strptime + +code_dir = str(Path(os.getcwd()).parent) +sites_csv_path = os.path.join(code_dir, "coastsnap_sites.csv") +coastsnap_sites_csv = pd.read_csv(sites_csv_path) +images_parent_dir = coastsnap_sites_csv.parent_directory[0] +images_dir = os.path.join(images_parent_dir, "Images") + +stats_csv = pd.DataFrame(columns = ['site','# processed', '# photoshop', '# registered', 'stability', 'most recently deleted'] ) + +print("Retrieving snapshot for:") +for site in os.listdir(images_dir): # Loop through SITES + i=0 + print(site) + to_append = [site, 0, 0, 0, 0, 'None deleted'] + processed = False + photoshop = False + registered = False + latest_deleted_image_found = False + latest_registered_image_found = False + site_path = os.path.join(images_dir, site) + processed_path = os.path.join(site_path,'Processed') + photoshop_path = os.path.join(site_path,'Photoshop') + registered_path = os.path.join(site_path, 'Registered') + try: # Check if site contains 'Processed' directory + processed_years_list = os.listdir(processed_path) + processed_years_list = [x for x in processed_years_list if len(x) == 4] # remove files that aren't years + processed_years_list.reverse() + processed = True + except: + continue + + try: # Check if site contains 'Processed' directory + photoshop_years_list = os.listdir(photoshop_path) + photoshop_years_list = [x for x in photoshop_years_list if len(x) == 4] # remove files that aren't years + photoshop_years_list.reverse() + photoshop = True + except: + continue + + try: # Check if site contains 'Processed' directory + registered_years_list = os.listdir(registered_path) + registered_years_list = [x for x in registered_years_list if len(x) == 4] # remove files that aren't years + registered_years_list.reverse() + registered = True + except: + continue + + if processed: + i=0 + for year in processed_years_list: # Loop through YEARS + processed_year_path = os.path.join(processed_path, year) + processed_image_list = os.listdir(processed_year_path) + processed_image_list.reverse() + for image_filename in processed_image_list: # Loop through IMAGES + i += 1 + to_append[1] = i + + if photoshop: + i=0 + for year in photoshop_years_list: # Loop through YEARS + year_path = os.path.join(photoshop_path, year) + image_list = os.listdir(year_path) + image_list.reverse() + for image_filename in image_list: # Loop through IMAGES + + year_path = year_path.replace('Photoshop', 'Registered') + registered_image_path = year_path + '/' + image_filename[:-4] + '_registered.jpg' + + # Finding the Latest Deleted Image Logic: + # Iterate through 'Images/Processed' + if os.path.isfile(registered_image_path): # Find the latest registered image. + latest_registered_image_found = True # This is so the latest deleted image + # isn't just the most recent image + + if (latest_registered_image_found and # Check if latest registered image has been found + not latest_deleted_image_found and # Check if latest deleted image has already been found + not os.path.isfile(registered_image_path) and # Check if photoshop registered image is also in 'Images/Registered' + image_filename.endswith('.jpg')): # Sanity check: Make sure the file is an image + + latest_deleted_image_found = True + filename_list = image_filename.split(".") + date = filename_list[3].split("_") + image_date = date[0] + '-' + '{:02d}'.format(strptime(filename_list[2],'%b').tm_mon) +'-'+ filename_list[5] + to_append[5] = image_date + + i += 1 + to_append[2] = i + + if registered: + i=0 + for year in registered_years_list: # Loop through YEARS + registered_year_path = os.path.join(registered_path, year) + registered_image_list = os.listdir(registered_year_path) + registered_image_list.reverse() + for image_filename in registered_image_list: # Loop through IMAGES + i += 1 + to_append[3] = i + + stats_csv_length = len(stats_csv) + stats_csv.loc[stats_csv_length] = to_append + + +# Add site stability data (# registered / # processed) +for i, row in stats_csv.iterrows(): + stability = "{0:.0%}".format(stats_csv.at[i, '# registered'] / stats_csv.at[i, '# processed']) + stats_csv.at[i,'stability'] = stability + +stats_csv.set_index('site', inplace = True) + +output_file_path = os.path.join(code_dir, 'statistics.csv') +stats_csv.to_csv(output_file_path)