Added folder_stats.py

master
Jonathan Chan 2 years ago
parent 96f9780bb8
commit cd6e7b963c

@ -72,4 +72,17 @@ Script Logic: For every site in oneDrive CoastSnap directory, iterate through th
* Starts tagging from the most recent image and stops for the site when an image has already been tagged. This way, the user can manually remove bad registered/tagged images, and they will not be automatically replaced.
* Retrieves tide data for the site from the .mat file specified in Database/CoastSnapDB.xlsx
### Statistics
Run `generate_statistics_csv.bat`
Generates `statistics.csv` which contains information about the Images directory. Columns include:
site | # processed | # photoshop | # registered | stability | Most recent image deleted
* stability = # registered / # processed. This formula is based on the assumption that someone will manually remove poorly registered images in `Images/Registered`. Thus stability represents the percentage of images that had good registration.
## Future Improvements
### Image metadata
Currently (22/6/22) it would appear that images downloaded from Spotteron do not retain the images' metadata. This is based on looking in windows file explorer image->properties, as well as using the exif python package. Note: The metadata presented in file explorer is IPTC data. There is a python package to interact with this data, but I had issues with it.

@ -0,0 +1,104 @@
import os
import pandas as pd
from pathlib import Path
from time import strptime
code_dir = str(Path(os.getcwd()).parent)
sites_csv_path = os.path.join(code_dir, "coastsnap_sites.csv")
coastsnap_sites_csv = pd.read_csv(sites_csv_path)
images_parent_dir = coastsnap_sites_csv.parent_directory[0]
images_dir = os.path.join(images_parent_dir, "Images")
stats_csv = pd.DataFrame(columns = ['site','# processed', '# photoshop', '# registered', 'stability', 'most recently deleted'] )
for site in os.listdir(images_dir): # Loop through SITES
i=0
to_append = [site, 0, 0, 0, 0, 0]
processed = False
photoshop = False
registered = False
latest_image_found = False
site_path = os.path.join(images_dir, site)
processed_path = os.path.join(site_path,'Processed')
photoshop_path = os.path.join(site_path,'Photoshop')
registered_path = os.path.join(site_path, 'Registered')
try: # Check if site contains 'Processed' directory
processed_years_list = os.listdir(processed_path)
processed_years_list = [x for x in processed_years_list if len(x) == 4] # remove files that aren't years
processed_years_list.reverse()
processed = True
except:
continue
try: # Check if site contains 'Processed' directory
photoshop_years_list = os.listdir(photoshop_path)
photoshop_years_list = [x for x in photoshop_years_list if len(x) == 4] # remove files that aren't years
photoshop_years_list.reverse()
photoshop = True
except:
continue
try: # Check if site contains 'Processed' directory
registered_years_list = os.listdir(registered_path)
registered_years_list = [x for x in registered_years_list if len(x) == 4] # remove files that aren't years
registered_years_list.reverse()
registered = True
except:
continue
if processed:
i=0
for year in processed_years_list: # Loop through YEARS
processed_year_path = os.path.join(processed_path, year)
processed_image_list = os.listdir(processed_year_path)
processed_image_list.reverse()
for image_filename in processed_image_list: # Loop through IMAGES
i += 1
to_append[1] = i
if photoshop:
i=0
for year in photoshop_years_list: # Loop through YEARS
year_path = os.path.join(photoshop_path, year)
image_list = os.listdir(year_path)
image_list.reverse()
for image_filename in image_list: # Loop through IMAGES
year_path = year_path.replace('Photoshop', 'Registered')
registered_image_path = year_path + '/' + image_filename[:-4] + '_registered.jpg'
if image_filename.endswith('.jpg') and not os.path.isfile(registered_image_path) and not latest_image_found:
latest_image_found = True
filename_list = image_filename.split(".")
date = filename_list[3].split("_")
image_date = date[0] + '-' + '{:02d}'.format(strptime(filename_list[2],'%b').tm_mon) +'-'+ filename_list[5]
to_append[5] = image_date
print(site)
print(image_filename)
i += 1
to_append[2] = i
if registered:
i=0
for year in registered_years_list: # Loop through YEARS
registered_year_path = os.path.join(registered_path, year)
registered_image_list = os.listdir(registered_year_path)
registered_image_list.reverse()
for image_filename in registered_image_list: # Loop through IMAGES
i += 1
to_append[3] = i
stats_csv_length = len(stats_csv)
stats_csv.loc[stats_csv_length] = to_append
# Add site stability data (# registered / # processed)
for i, row in stats_csv.iterrows():
stability = "{0:.0%}".format(stats_csv.at[i, '# registered'] / stats_csv.at[i, '# processed'])
stats_csv.at[i,'stability'] = stability
stats_csv.set_index('site', inplace = True)
output_file_path = os.path.join(code_dir, 'statistics.csv')
stats_csv.to_csv(output_file_path)

@ -0,0 +1,3 @@
call activate coastsnap
python "%~dp0folder_stats.py"
call conda deactivate

@ -6,7 +6,7 @@ import datetime
import re
from pathlib import Path
from urllib.parse import urljoin
from os import path, makedirs
from os import path, makedirs, getcwd
import attr
import pytz
@ -17,10 +17,9 @@ from loguru import logger
from timezonefinder import TimezoneFinder
from werkzeug.utils import secure_filename
#app = typer.Typer()
coastsnap_sites = pd.read_csv("C:/Users/z5079346/OneDrive - UNSW/Projects/Coastsnap_test/CoastSnap_Sites.csv")
code_images_dir = str(Path(getcwd()).parent)
coastsnap_sites_path = path.join(code_images_dir, "coastsnap_sites.csv")
coastsnap_sites = pd.read_csv(coastsnap_sites_path)
@attr.s()
class SpotteronImage:

@ -0,0 +1,35 @@
site,# processed,# photoshop,# registered,stability,most recently deleted
alex,75,74,73,97%,25-08-2021
birubi,65,64,0,0%,12-06-2022
blacksmiths,1581,1577,1381,87%,30-04-2022
broulee,190,191,93,49%,14-06-2022
buddina,117,114,85,73%,15-06-2022
burleigh,245,242,0,0%,17-06-2022
byron,1255,1329,676,54%,13-06-2022
cathieillaroo,97,103,66,68%,16-06-2022
cathielagoon,74,75,73,99%,10-04-2022
coolum,80,36,22,28%,13-06-2022
cooya,55,46,0,0%,15-11-2021
cowbay,34,34,0,0%,16-06-2022
era,56,56,45,80%,14-04-2022
fourmile,244,247,115,47%,01-06-2022
frankston,191,189,157,82%,16-06-2022
garie,48,48,0,0%,13-06-2022
hungry,128,128,92,72%,20-05-2022
macsnth,73,72,0,0%,13-06-2022
macssth,60,59,0,0%,13-06-2022
manly,1167,1262,1122,96%,14-06-2022
moffat,114,131,215,189%,09-03-2022
newell,23,34,0,0%,27-04-2022
nthnarra,2315,2444,1097,47%,16-06-2022
queenscliff,85,79,0,0%,14-06-2022
rainbow,50,50,0,0%,24-04-2022
seaford,69,57,0,0%,11-06-2022
shortpoint,222,222,0,0%,06-06-2022
stockton1,262,261,33,13%,24-05-2022
stockton2,214,213,62,29%,11-05-2022
stockton3,257,256,69,27%,15-05-2022
tomakin,211,211,142,67%,13-06-2022
tugun,288,289,132,46%,01-06-2022
wamberal,372,366,0,0%,15-06-2022
wonga,46,46,0,0%,30-05-2022
1 site # processed # photoshop # registered stability most recently deleted
2 alex 75 74 73 97% 25-08-2021
3 birubi 65 64 0 0% 12-06-2022
4 blacksmiths 1581 1577 1381 87% 30-04-2022
5 broulee 190 191 93 49% 14-06-2022
6 buddina 117 114 85 73% 15-06-2022
7 burleigh 245 242 0 0% 17-06-2022
8 byron 1255 1329 676 54% 13-06-2022
9 cathieillaroo 97 103 66 68% 16-06-2022
10 cathielagoon 74 75 73 99% 10-04-2022
11 coolum 80 36 22 28% 13-06-2022
12 cooya 55 46 0 0% 15-11-2021
13 cowbay 34 34 0 0% 16-06-2022
14 era 56 56 45 80% 14-04-2022
15 fourmile 244 247 115 47% 01-06-2022
16 frankston 191 189 157 82% 16-06-2022
17 garie 48 48 0 0% 13-06-2022
18 hungry 128 128 92 72% 20-05-2022
19 macsnth 73 72 0 0% 13-06-2022
20 macssth 60 59 0 0% 13-06-2022
21 manly 1167 1262 1122 96% 14-06-2022
22 moffat 114 131 215 189% 09-03-2022
23 newell 23 34 0 0% 27-04-2022
24 nthnarra 2315 2444 1097 47% 16-06-2022
25 queenscliff 85 79 0 0% 14-06-2022
26 rainbow 50 50 0 0% 24-04-2022
27 seaford 69 57 0 0% 11-06-2022
28 shortpoint 222 222 0 0% 06-06-2022
29 stockton1 262 261 33 13% 24-05-2022
30 stockton2 214 213 62 29% 11-05-2022
31 stockton3 257 256 69 27% 15-05-2022
32 tomakin 211 211 142 67% 13-06-2022
33 tugun 288 289 132 46% 01-06-2022
34 wamberal 372 366 0 0% 15-06-2022
35 wonga 46 46 0 0% 30-05-2022

Binary file not shown.
Loading…
Cancel
Save