# -*- coding: utf-8 -*- """ Created on Wed Jun 28 09:48:06 2023 @author: z5079346 """ import os import re import pandas as pd from datetime import datetime base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images" # Empty dataframe df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"]) last_site = None # initialize last_site variable # Recursive directory walk for root, dirs, files in os.walk(base_dir): for file in files: # Check if the file is a .jpg and its parent directory is a year if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)): # IGNORE TRAILCAM IMAGES if ('4Gtrailcam' in file) or \ ('3gtrailcam' in file) or \ ('ReconSC950' in file) or \ ('swiftenduro' in file) or \ ('SwiftEnduro4G' in file): continue # Get full file path full_path = os.path.join(root, file) split_path = full_path.split(os.sep) # Extract data from path site = split_path[1] # Add this code to print the site name once if site != last_site: print(f"Processing site: {site}") last_site = site type = split_path[2] year = split_path[3] # Extract data from filename raw_filename = os.path.splitext(file)[0] # Clean up filename to ignore '_tagged' or '_registered' suffixes clean_filename = re.sub(r'(_tagged|_registered)$', '', raw_filename) date_and_creator = clean_filename.split('.') # Check if the first string is in the format '_0009_1633055855' epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0]) if epoch_time_match: epoch_time = epoch_time_match.group(2) # group 2 will have the digits at the end else: print(f"Unexpected filename format: {file}.") epoch_time = date_and_creator[0] # fallback to using the first part as epoch time as it is # Convert epoch time to readable date try: date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S') except ValueError: print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.") date = epoch_time # if conversion fails, use raw epoch time as date creator = date_and_creator[-2] # The creator's name is the second last part before 'snap' # Check if the row already exists in the dataframe mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator) df_exists = df.loc[mask] if df_exists.empty: # Create new row if it doesn't exist new_df = pd.DataFrame({ "Site": [site], "Date": [date], "Creator": [creator], "Processed": [type == "Processed"], "Photoshop": [type == "Photoshop"], "Registered": [type == "Registered"], "Rectified": [type == "Rectified"], "FilePath": [full_path] }) df = pd.concat([df, new_df], ignore_index=True) else: # Update existing row if it exists df.loc[mask, type] = True # Save the DataFrame to a CSV file df.to_csv("image_data.csv", index=False)