You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
3.6 KiB
Python

# -*- coding: utf-8 -*-
"""
Created on Wed Jun 28 09:48:06 2023
@author: z5079346
"""
import os
import re
import pandas as pd
from datetime import datetime
base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images"
# Empty dataframe
df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"])
last_site = None # initialize last_site variable
# Recursive directory walk
for root, dirs, files in os.walk(base_dir):
for file in files:
# Check if the file is a .jpg and its parent directory is a year
if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)):
# IGNORE TRAILCAM IMAGES
if ('4Gtrailcam' in file) or \
('3gtrailcam' in file) or \
('ReconSC950' in file) or \
('swiftenduro' in file) or \
('SwiftEnduro4G' in file):
continue
# Get full file path
full_path = os.path.join(root, file)
split_path = full_path.split(os.sep)
# Extract data from path
site = split_path[1]
# Add this code to print the site name once
if site != last_site:
print(f"Processing site: {site}")
last_site = site
type = split_path[2]
year = split_path[3]
# Extract data from filename
raw_filename = os.path.splitext(file)[0]
# Clean up filename to ignore '_tagged' or '_registered' suffixes
clean_filename = re.sub(r'(_tagged|_registered)$', '', raw_filename)
date_and_creator = clean_filename.split('.')
# Check if the first string is in the format '_0009_1633055855'
epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0])
if epoch_time_match:
epoch_time = epoch_time_match.group(2) # group 2 will have the digits at the end
else:
print(f"Unexpected filename format: {file}.")
epoch_time = date_and_creator[0] # fallback to using the first part as epoch time as it is
# Convert epoch time to readable date
try:
date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.")
date = epoch_time # if conversion fails, use raw epoch time as date
creator = date_and_creator[-2] # The creator's name is the second last part before 'snap'
# Check if the row already exists in the dataframe
mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator)
df_exists = df.loc[mask]
if df_exists.empty:
# Create new row if it doesn't exist
new_df = pd.DataFrame({
"Site": [site],
"Date": [date],
"Creator": [creator],
"Processed": [type == "Processed"],
"Photoshop": [type == "Photoshop"],
"Registered": [type == "Registered"],
"Rectified": [type == "Rectified"],
"FilePath": [full_path]
})
df = pd.concat([df, new_df], ignore_index=True)
else:
# Update existing row if it exists
df.loc[mask, type] = True
# Save the DataFrame to a CSV file
df.to_csv("image_data.csv", index=False)