diff --git a/coastsnap/images_db.py b/coastsnap/images_db.py
new file mode 100644
index 0000000..aed9c20
--- /dev/null
+++ b/coastsnap/images_db.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jun 28 09:48:06 2023
+
+@author: z5079346
+"""
+
+import os
+import re
+import pandas as pd
+from datetime import datetime
+
+base_dir = "C:/Users/z5079346/OneDrive - UNSW/CoastSnap/Images"
+
+# Empty dataframe
+df = pd.DataFrame(columns=["Site", "Date", "Creator", "Processed", "Photoshop", "Registered", "Rectified", "FilePath"])
+
+last_site = None  # initialize last_site variable
+
+# Recursive directory walk
+for root, dirs, files in os.walk(base_dir):
+    for file in files:
+        # Check if the file is a .jpg and its parent directory is a year
+        if file.endswith(".jpg") and re.match(r'^\d{4}$', os.path.basename(root)):
+            
+            # IGNORE TRAILCAM IMAGES
+            if ('4Gtrailcam' in file) or \
+               ('3gtrailcam' in file) or \
+               ('ReconSC950' in file) or \
+               ('swiftenduro' in file) or \
+               ('SwiftEnduro4G' in file): 
+               continue
+
+            
+            # Get full file path
+            full_path = os.path.join(root, file)
+            split_path = full_path.split(os.sep)
+            # Extract data from path
+            site = split_path[1]
+            # Add this code to print the site name once
+            if site != last_site:
+                print(f"Processing site: {site}")
+                last_site = site
+            type = split_path[2]
+            year = split_path[3]
+
+            # Extract data from filename
+            raw_filename = os.path.splitext(file)[0]
+            # Clean up filename to ignore '_tagged' or '_registered' suffixes
+            clean_filename = re.sub(r'(_tagged|_registered)$', '', raw_filename)
+            date_and_creator = clean_filename.split('.')
+            
+            # Check if the first string is in the format '_0009_1633055855'
+            epoch_time_match = re.search(r'(_\d+)?(\d+)$', date_and_creator[0])
+            if epoch_time_match:
+                epoch_time = epoch_time_match.group(2)  # group 2 will have the digits at the end
+            else:
+                print(f"Unexpected filename format: {file}.")
+                epoch_time = date_and_creator[0]  # fallback to using the first part as epoch time as it is
+
+            # Convert epoch time to readable date
+            try:
+                date = datetime.fromtimestamp(int(epoch_time)).strftime('%Y-%m-%d %H:%M:%S')
+            except ValueError:
+                print(f"Unable to convert epoch time to date for file: {file}. Using raw epoch time as date.")
+                date = epoch_time  # if conversion fails, use raw epoch time as date
+
+            creator = date_and_creator[-2]  # The creator's name is the second last part before 'snap'
+
+            # Check if the row already exists in the dataframe
+            mask = (df['Site'] == site) & (df['Date'] == date) & (df['Creator'] == creator)
+            df_exists = df.loc[mask]
+            
+            if df_exists.empty:
+                # Create new row if it doesn't exist
+                new_df = pd.DataFrame({
+                    "Site": [site],
+                    "Date": [date],
+                    "Creator": [creator],
+                    "Processed": [type == "Processed"],
+                    "Photoshop": [type == "Photoshop"],
+                    "Registered": [type == "Registered"],
+                    "Rectified": [type == "Rectified"],
+                    "FilePath": [full_path]
+                })
+                df = pd.concat([df, new_df], ignore_index=True)
+            else:
+                # Update existing row if it exists
+                df.loc[mask, type] = True
+
+
+
+
+# Save the DataFrame to a CSV file
+df.to_csv("image_data.csv", index=False)
\ No newline at end of file