Put image record into 1 db
parent
7f770c045f
commit
635b697510
@ -0,0 +1,95 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Jun 28 09:48:06 2023
|
||||||
|
|
||||||
|
@author: z5079346
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images"
|
||||||
|
|
||||||
|
# Empty dataframe
|
||||||
|
df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"])
|
||||||
|
|
||||||
|
last_site = None # initialize last_site variable
|
||||||
|
|
||||||
|
# Recursive directory walk
|
||||||
|
for root, dirs, files in os.walk(base_dir):
|
||||||
|
for file in files:
|
||||||
|
# Check if the file is a .jpg and its parent directory is a year
|
||||||
|
if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)):
|
||||||
|
|
||||||
|
# IGNORE TRAILCAM IMAGES
|
||||||
|
if ('4Gtrailcam' in file) or \
|
||||||
|
('3gtrailcam' in file) or \
|
||||||
|
('ReconSC950' in file) or \
|
||||||
|
('swiftenduro' in file) or \
|
||||||
|
('SwiftEnduro4G' in file):
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# Get full file path
|
||||||
|
full_path = os.path.join(root, file)
|
||||||
|
split_path = full_path.split(os.sep)
|
||||||
|
# Extract data from path
|
||||||
|
site = split_path[1]
|
||||||
|
# Add this code to print the site name once
|
||||||
|
if site != last_site:
|
||||||
|
print(f"Processing site: {site}")
|
||||||
|
last_site = site
|
||||||
|
type = split_path[2]
|
||||||
|
year = split_path[3]
|
||||||
|
|
||||||
|
# Extract data from filename
|
||||||
|
raw_filename = os.path.splitext(file)[0]
|
||||||
|
# Clean up filename to ignore '_tagged' or '_registered' suffixes
|
||||||
|
clean_filename = re.sub(r'(_tagged|_registered)$', '', raw_filename)
|
||||||
|
date_and_creator = clean_filename.split('.')
|
||||||
|
|
||||||
|
# Check if the first string is in the format '_0009_1633055855'
|
||||||
|
epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0])
|
||||||
|
if epoch_time_match:
|
||||||
|
epoch_time = epoch_time_match.group(2) # group 2 will have the digits at the end
|
||||||
|
else:
|
||||||
|
print(f"Unexpected filename format: {file}.")
|
||||||
|
epoch_time = date_and_creator[0] # fallback to using the first part as epoch time as it is
|
||||||
|
|
||||||
|
# Convert epoch time to readable date
|
||||||
|
try:
|
||||||
|
date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
except ValueError:
|
||||||
|
print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.")
|
||||||
|
date = epoch_time # if conversion fails, use raw epoch time as date
|
||||||
|
|
||||||
|
creator = date_and_creator[-2] # The creator's name is the second last part before 'snap'
|
||||||
|
|
||||||
|
# Check if the row already exists in the dataframe
|
||||||
|
mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator)
|
||||||
|
df_exists = df.loc[mask]
|
||||||
|
|
||||||
|
if df_exists.empty:
|
||||||
|
# Create new row if it doesn't exist
|
||||||
|
new_df = pd.DataFrame({
|
||||||
|
"Site": [site],
|
||||||
|
"Date": [date],
|
||||||
|
"Creator": [creator],
|
||||||
|
"Processed": [type == "Processed"],
|
||||||
|
"Photoshop": [type == "Photoshop"],
|
||||||
|
"Registered": [type == "Registered"],
|
||||||
|
"Rectified": [type == "Rectified"],
|
||||||
|
"FilePath": [full_path]
|
||||||
|
})
|
||||||
|
df = pd.concat([df, new_df], ignore_index=True)
|
||||||
|
else:
|
||||||
|
# Update existing row if it exists
|
||||||
|
df.loc[mask, type] = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Save the DataFrame to a CSV file
|
||||||
|
df.to_csv("image_data.csv", index=False)
|
Loading…
Reference in New Issue