Added folder_stats.py

master
Jonathan Chan 2 years ago
parent 96f9780bb8
commit cd6e7b963c

@ -72,4 +72,17 @@ Script Logic: For every site in oneDrive CoastSnap directory, iterate through th
* Starts tagging from the most recent image and stops for the site when an image has already been tagged. This way, the user can manually remove bad registered/tagged images, and they will not be automatically replaced. * Starts tagging from the most recent image and stops for the site when an image has already been tagged. This way, the user can manually remove bad registered/tagged images, and they will not be automatically replaced.
* Retrieves tide data for the site from the .mat file specified in Database/CoastSnapDB.xlsx * Retrieves tide data for the site from the .mat file specified in Database/CoastSnapDB.xlsx
### Statistics
Run `generate_statistics_csv.bat`
Generates `statistics.csv` which contains information about the Images directory. Columns include:
site | # processed | # photoshop | # registered | stability | Most recent image deleted
* stability = # registered / # processed. This formula is based on the assumption that someone will manually remove poorly registered images in `Images/Registered`. Thus stability represents the percentage of images that had good registration.
## Future Improvements
### Image metadata
Currently (22/6/22) it would appear that images downloaded from Spotteron do not retain the images' metadata. This is based on looking in windows file explorer image->properties, as well as using the exif python package. Note: The metadata presented in file explorer is IPTC data. There is a python package to interact with this data, but I had issues with it.

@ -0,0 +1,104 @@
import os
import pandas as pd
from pathlib import Path
from time import strptime
code_dir = str(Path(os.getcwd()).parent)
sites_csv_path = os.path.join(code_dir, "coastsnap_sites.csv")
coastsnap_sites_csv = pd.read_csv(sites_csv_path)
images_parent_dir = coastsnap_sites_csv.parent_directory[0]
images_dir = os.path.join(images_parent_dir, "Images")
stats_csv = pd.DataFrame(columns = ['site','# processed', '# photoshop', '# registered', 'stability', 'most recently deleted'] )
for site in os.listdir(images_dir): # Loop through SITES
i=0
to_append = [site, 0, 0, 0, 0, 0]
processed = False
photoshop = False
registered = False
latest_image_found = False
site_path = os.path.join(images_dir, site)
processed_path = os.path.join(site_path,'Processed')
photoshop_path = os.path.join(site_path,'Photoshop')
registered_path = os.path.join(site_path, 'Registered')
try: # Check if site contains 'Processed' directory
processed_years_list = os.listdir(processed_path)
processed_years_list = [x for x in processed_years_list if len(x) == 4] # remove files that aren't years
processed_years_list.reverse()
processed = True
except:
continue
try: # Check if site contains 'Processed' directory
photoshop_years_list = os.listdir(photoshop_path)
photoshop_years_list = [x for x in photoshop_years_list if len(x) == 4] # remove files that aren't years
photoshop_years_list.reverse()
photoshop = True
except:
continue
try: # Check if site contains 'Processed' directory
registered_years_list = os.listdir(registered_path)
registered_years_list = [x for x in registered_years_list if len(x) == 4] # remove files that aren't years
registered_years_list.reverse()
registered = True
except:
continue
if processed:
i=0
for year in processed_years_list: # Loop through YEARS
processed_year_path = os.path.join(processed_path, year)
processed_image_list = os.listdir(processed_year_path)
processed_image_list.reverse()
for image_filename in processed_image_list: # Loop through IMAGES
i += 1
to_append[1] = i
if photoshop:
i=0
for year in photoshop_years_list: # Loop through YEARS
year_path = os.path.join(photoshop_path, year)
image_list = os.listdir(year_path)
image_list.reverse()
for image_filename in image_list: # Loop through IMAGES
year_path = year_path.replace('Photoshop', 'Registered')
registered_image_path = year_path + '/' + image_filename[:-4] + '_registered.jpg'
if image_filename.endswith('.jpg') and not os.path.isfile(registered_image_path) and not latest_image_found:
latest_image_found = True
filename_list = image_filename.split(".")
date = filename_list[3].split("_")
image_date = date[0] + '-' + '{:02d}'.format(strptime(filename_list[2],'%b').tm_mon) +'-'+ filename_list[5]
to_append[5] = image_date
print(site)
print(image_filename)
i += 1
to_append[2] = i
if registered:
i=0
for year in registered_years_list: # Loop through YEARS
registered_year_path = os.path.join(registered_path, year)
registered_image_list = os.listdir(registered_year_path)
registered_image_list.reverse()
for image_filename in registered_image_list: # Loop through IMAGES
i += 1
to_append[3] = i
stats_csv_length = len(stats_csv)
stats_csv.loc[stats_csv_length] = to_append
# Add site stability data (# registered / # processed)
for i, row in stats_csv.iterrows():
stability = "{0:.0%}".format(stats_csv.at[i, '# registered'] / stats_csv.at[i, '# processed'])
stats_csv.at[i,'stability'] = stability
stats_csv.set_index('site', inplace = True)
output_file_path = os.path.join(code_dir, 'statistics.csv')
stats_csv.to_csv(output_file_path)

@ -0,0 +1,3 @@
call activate coastsnap
python "%~dp0folder_stats.py"
call conda deactivate

@ -6,7 +6,7 @@ import datetime
import re import re
from pathlib import Path from pathlib import Path
from urllib.parse import urljoin from urllib.parse import urljoin
from os import path, makedirs from os import path, makedirs, getcwd
import attr import attr
import pytz import pytz
@ -17,10 +17,9 @@ from loguru import logger
from timezonefinder import TimezoneFinder from timezonefinder import TimezoneFinder
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
#app = typer.Typer() code_images_dir = str(Path(getcwd()).parent)
coastsnap_sites = pd.read_csv("C:/Users/z5079346/OneDrive - UNSW/Projects/Coastsnap_test/CoastSnap_Sites.csv") coastsnap_sites_path = path.join(code_images_dir, "coastsnap_sites.csv")
coastsnap_sites = pd.read_csv(coastsnap_sites_path)
@attr.s() @attr.s()
class SpotteronImage: class SpotteronImage:

@ -0,0 +1,35 @@
site,# processed,# photoshop,# registered,stability,most recently deleted
alex,75,74,73,97%,25-08-2021
birubi,65,64,0,0%,12-06-2022
blacksmiths,1581,1577,1381,87%,30-04-2022
broulee,190,191,93,49%,14-06-2022
buddina,117,114,85,73%,15-06-2022
burleigh,245,242,0,0%,17-06-2022
byron,1255,1329,676,54%,13-06-2022
cathieillaroo,97,103,66,68%,16-06-2022
cathielagoon,74,75,73,99%,10-04-2022
coolum,80,36,22,28%,13-06-2022
cooya,55,46,0,0%,15-11-2021
cowbay,34,34,0,0%,16-06-2022
era,56,56,45,80%,14-04-2022
fourmile,244,247,115,47%,01-06-2022
frankston,191,189,157,82%,16-06-2022
garie,48,48,0,0%,13-06-2022
hungry,128,128,92,72%,20-05-2022
macsnth,73,72,0,0%,13-06-2022
macssth,60,59,0,0%,13-06-2022
manly,1167,1262,1122,96%,14-06-2022
moffat,114,131,215,189%,09-03-2022
newell,23,34,0,0%,27-04-2022
nthnarra,2315,2444,1097,47%,16-06-2022
queenscliff,85,79,0,0%,14-06-2022
rainbow,50,50,0,0%,24-04-2022
seaford,69,57,0,0%,11-06-2022
shortpoint,222,222,0,0%,06-06-2022
stockton1,262,261,33,13%,24-05-2022
stockton2,214,213,62,29%,11-05-2022
stockton3,257,256,69,27%,15-05-2022
tomakin,211,211,142,67%,13-06-2022
tugun,288,289,132,46%,01-06-2022
wamberal,372,366,0,0%,15-06-2022
wonga,46,46,0,0%,30-05-2022
1 site # processed # photoshop # registered stability most recently deleted
2 alex 75 74 73 97% 25-08-2021
3 birubi 65 64 0 0% 12-06-2022
4 blacksmiths 1581 1577 1381 87% 30-04-2022
5 broulee 190 191 93 49% 14-06-2022
6 buddina 117 114 85 73% 15-06-2022
7 burleigh 245 242 0 0% 17-06-2022
8 byron 1255 1329 676 54% 13-06-2022
9 cathieillaroo 97 103 66 68% 16-06-2022
10 cathielagoon 74 75 73 99% 10-04-2022
11 coolum 80 36 22 28% 13-06-2022
12 cooya 55 46 0 0% 15-11-2021
13 cowbay 34 34 0 0% 16-06-2022
14 era 56 56 45 80% 14-04-2022
15 fourmile 244 247 115 47% 01-06-2022
16 frankston 191 189 157 82% 16-06-2022
17 garie 48 48 0 0% 13-06-2022
18 hungry 128 128 92 72% 20-05-2022
19 macsnth 73 72 0 0% 13-06-2022
20 macssth 60 59 0 0% 13-06-2022
21 manly 1167 1262 1122 96% 14-06-2022
22 moffat 114 131 215 189% 09-03-2022
23 newell 23 34 0 0% 27-04-2022
24 nthnarra 2315 2444 1097 47% 16-06-2022
25 queenscliff 85 79 0 0% 14-06-2022
26 rainbow 50 50 0 0% 24-04-2022
27 seaford 69 57 0 0% 11-06-2022
28 shortpoint 222 222 0 0% 06-06-2022
29 stockton1 262 261 33 13% 24-05-2022
30 stockton2 214 213 62 29% 11-05-2022
31 stockton3 257 256 69 27% 15-05-2022
32 tomakin 211 211 142 67% 13-06-2022
33 tugun 288 289 132 46% 01-06-2022
34 wamberal 372 366 0 0% 15-06-2022
35 wonga 46 46 0 0% 30-05-2022

Binary file not shown.
Loading…
Cancel
Save