CoastsnapAuto/coastsnap/images_db.py

# -*- coding: utf-8 -*-
"""
Created on Wed Jun 28 09:48:06 2023

@author: z5079346
"""

import os
import re
import pandas as pd
from datetime import datetime

base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images"

# Empty dataframe
df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"])

last_site = None  # initialize last_site variable

# Recursive directory walk
for root, dirs, files in os.walk(base_dir):
    for file in files:
        # Check if the file is a .jpg and its parent directory is a year
        if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)):
            
            # IGNORE TRAILCAM IMAGES
            if ('4Gtrailcam' in file) or \
               ('3gtrailcam' in file) or \
               ('ReconSC950' in file) or \
               ('swiftenduro' in file) or \
               ('SwiftEnduro4G' in file): 
               continue

            
            # Get full file path
            full_path = os.path.join(root, file)
            split_path = full_path.split(os.sep)
            # Extract data from path
            site = split_path[1]
            # Add this code to print the site name once
            if site != last_site:
                print(f"Processing site: {site}")
                last_site = site
            type = split_path[2]
            year = split_path[3]

            # Extract data from filename
            raw_filename = os.path.splitext(file)[0]
            # Clean up filename to ignore '_tagged' or '_registered' suffixes
            clean_filename = re.sub(r'(_tagged|_registered)$', '', raw_filename)
            date_and_creator = clean_filename.split('.')
            
            # Check if the first string is in the format '_0009_1633055855'
            epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0])
            if epoch_time_match:
                epoch_time = epoch_time_match.group(2)  # group 2 will have the digits at the end
            else:
                print(f"Unexpected filename format: {file}.")
                epoch_time = date_and_creator[0]  # fallback to using the first part as epoch time as it is

            # Convert epoch time to readable date
            try:
                date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S')
            except ValueError:
                print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.")
                date = epoch_time  # if conversion fails, use raw epoch time as date

            creator = date_and_creator[-2]  # The creator's name is the second last part before 'snap'

            # Check if the row already exists in the dataframe
            mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator)
            df_exists = df.loc[mask]
            
            if df_exists.empty:
                # Create new row if it doesn't exist
                new_df = pd.DataFrame({
                    "Site": [site],
                    "Date": [date],
                    "Creator": [creator],
                    "Processed": [type == "Processed"],
                    "Photoshop": [type == "Photoshop"],
                    "Registered": [type == "Registered"],
                    "Rectified": [type == "Rectified"],
                    "FilePath": [full_path]
                })
                df = pd.concat([df, new_df], ignore_index=True)
            else:
                # Update existing row if it exists
                df.loc[mask, type] = True


# Save the DataFrame to a CSV file
df.to_csv("image_data.csv", index=False)
Put image record into 1 db 1 year ago			`# -- coding: utf-8 --`
			`"""`
			`Created on Wed Jun 28 09:48:06 2023`

			`@author: z5079346`
			`"""`

			`import os`
			`import re`
			`import pandas as pd`
			`from datetime import datetime`

			`base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images"`

			`# Empty dataframe`
			`df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"])`

			`last_site = None # initialize last_site variable`

			`# Recursive directory walk`
			`for root, dirs, files in os.walk(base_dir):`
			`for file in files:`
			`# Check if the file is a .jpg and its parent directory is a year`
			`if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)):`

			`# IGNORE TRAILCAM IMAGES`
			`if ('4Gtrailcam' in file) or \`
			`('3gtrailcam' in file) or \`
			`('ReconSC950' in file) or \`
			`('swiftenduro' in file) or \`
			`('SwiftEnduro4G' in file):`
			`continue`


			`# Get full file path`
			`full_path = os.path.join(root, file)`
			`split_path = full_path.split(os.sep)`
			`# Extract data from path`
			`site = split_path[1]`
			`# Add this code to print the site name once`
			`if site != last_site:`
			`print(f"Processing site: {site}")`
			`last_site = site`
			`type = split_path[2]`
			`year = split_path[3]`

			`# Extract data from filename`
			`raw_filename = os.path.splitext(file)[0]`
			`# Clean up filename to ignore '_tagged' or '_registered' suffixes`
			`clean_filename = re.sub(r'(_tagged\|_registered)$', '', raw_filename)`
			`date_and_creator = clean_filename.split('.')`

			`# Check if the first string is in the format '_0009_1633055855'`
			`epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0])`
			`if epoch_time_match:`
			`epoch_time = epoch_time_match.group(2) # group 2 will have the digits at the end`
			`else:`
			`print(f"Unexpected filename format: {file}.")`
			`epoch_time = date_and_creator[0] # fallback to using the first part as epoch time as it is`

			`# Convert epoch time to readable date`
			`try:`
			`date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S')`
			`except ValueError:`
			`print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.")`
			`date = epoch_time # if conversion fails, use raw epoch time as date`

			`creator = date_and_creator[-2] # The creator's name is the second last part before 'snap'`

			`# Check if the row already exists in the dataframe`
			`mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator)`
			`df_exists = df.loc[mask]`

			`if df_exists.empty:`
			`# Create new row if it doesn't exist`
			`new_df = pd.DataFrame({`
			`"Site": [site],`
			`"Date": [date],`
			`"Creator": [creator],`
			`"Processed": [type == "Processed"],`
			`"Photoshop": [type == "Photoshop"],`
			`"Registered": [type == "Registered"],`
			`"Rectified": [type == "Rectified"],`
			`"FilePath": [full_path]`
			`})`
			`df = pd.concat([df, new_df], ignore_index=True)`
			`else:`
			`# Update existing row if it exists`
			`df.loc[mask, type] = True`




			`# Save the DataFrame to a CSV file`
			`df.to_csv("image_data.csv", index=False)`