CoastsnapAuto/coastsnap/user_statistics.py

import os
import csv
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

import difflib


# Provide the path to the CSV file in the parent directory

code_dir = str(Path(os.getcwd()).parent)
csv_path = os.path.join(code_dir, "coastsnap_sites.csv")

coastsnap_sites_csv = pd.read_csv(csv_path)
parent_directories = coastsnap_sites_csv.parent_directory[0] 

# Extract site names and parent directories
site_names = coastsnap_sites_csv['site_name']
root_id = coastsnap_sites_csv['root_id']

dfoverall = pd.DataFrame(columns=['Site','Root_ID','Total count'])


# Iterate over site names and parent directories
for index, row in coastsnap_sites_csv.iterrows():
    site_name = row['site_name']
    root_id = row['root_id']
    
    print(site_name)
    # Create a dictionary to store photo counts
    day_of_week_counts = {}
    month_counts = {}
    hour_counts = {}
    year_counts = {}
    username_counts = {}    
    
    # Construct the full path to the photo directory
    photo_directory = os.path.join(parent_directories, "Images", site_name, "Processed")

    df = pd.DataFrame(columns=['Site','Total count','Datetime','Year','Month','Date','Hour','Minute','Second','Day','Month name','User'])
    total = -1

    # Iterate over the years in the photo directory
    for year_directory in os.listdir(photo_directory):
        
        # Check if the folder name is in the 4 number year format
        if not year_directory.isdigit() or len(year_directory) != 4:
            continue  # Skip this folder
        
        # Construct the full path to the year directory
        year_path = os.path.join(photo_directory, year_directory)
        

        # Iterate over the files in the year directory
        for filename in os.listdir(year_path):
            if filename.endswith(".jpg"):
                
                # Check that it isn't the first photo uploaded
                total = total + 1
                if total == 0:
                    continue
                
                
                # Extract information from the filename
                filename = filename.replace("_",".")
                file_parts = filename.split(".")
                username = file_parts[-2]
                timestamp = ".".join([file_parts[1],file_parts[2],file_parts[3],file_parts[4],file_parts[5],file_parts[6],file_parts[8]])

                # Parse the timestamp
                date_format = "%a.%b.%d.%H.%M.%S.%Y"
                timestamp_datetime = datetime.strptime(timestamp, date_format)

                # Extract relevant information from the timestamp
                day_of_week = timestamp_datetime.strftime("%A")
                month = timestamp_datetime.strftime("%B")
                hour = timestamp_datetime.hour
                year = timestamp_datetime.year

                # Update photo counts
                day_of_week_counts[day_of_week] = day_of_week_counts.get(day_of_week, 0) + 1
                month_counts[month] = month_counts.get(month, 0) + 1
                hour_counts[hour] = hour_counts.get(hour, 0) + 1
                year_counts[year] = year_counts.get(year, 0) + 1

                # Update username counts
                username_counts[username] = username_counts.get(username, 0) + 1
                
                # Month string to number
                monthnum = datetime.strptime(file_parts[2], '%b').month
                
                
                column_map = {
                    'Site': file_parts[9],
                    'Datetime': timestamp_datetime,
                    'Year': int(file_parts[8]),
                    'Month': monthnum,
                    'Date': int(file_parts[3]),
                    'Hour': int(file_parts[4]),
                    'Minute': int(file_parts[5]),
                    'Second': int(file_parts[6]),
                    'Month name': file_parts[2],
                    'Day': file_parts[1],
                    'User': file_parts[11],
                    'Total count': total
                    }

                new_row = pd.DataFrame([column_map])

                # Add the new row to the existing DataFrame
                df = pd.concat([df, new_row], ignore_index=True)
    
    if total < 1:
        continue            
    
    # Determine top three usernames
    top_usernames = sorted(username_counts.items(), key=lambda x: x[1], reverse=True)[:3]
    dftopuser = pd.DataFrame(top_usernames, columns=['User', 'Count']).sort_values(by=['Count'], ascending=True).reset_index(drop=True)

    ###
    # Save statistics to an excel spreadsheet
    ###

    # Creat Statistics folder in Images directory if it doesn't exist
    statistics_path = os.path.join(parent_directories, "Images", site_name, "Statistics")
    
    if not os.path.exists(statistics_path):
        os.makedirs(statistics_path)

    # Create an Excel writer object to save the statistics
    writer = pd.ExcelWriter(statistics_path+'\statistics.xlsx')
    
    # Create a summary sheet which includes all photo data from the site
    df.to_excel(writer, sheet_name='Summary', index=False)
    
    catday = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    catmonth = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
    
    # Create a sheet summarising the hours of the day counts
    dfhours = pd.DataFrame(list(hour_counts.items()), columns=['Hour', 'Count']).sort_values(by=['Hour'])
    dfhours.to_excel(writer, sheet_name='Hours', index=False)    
    
    #create a sheet summarising the days of the week counts
    dfdays = pd.DataFrame(list(day_of_week_counts.items()), columns=['Day', 'Count'])
    dfdays = dfdays.groupby(['Day']).sum().reindex(catday) 
    dfdays.to_excel(writer, sheet_name='Days', index=True)

    #Create a sheet summarising the months of the year counts
    dfmonths = pd.DataFrame(list(month_counts.items()), columns=['Month', 'Count'])
    dfmonths = dfmonths.groupby(['Month']).sum().reindex(catmonth)
    dfmonths.to_excel(writer, sheet_name='Months', index=True)
    
    # Create a sheet summarising counts by year
    dfyears = pd.DataFrame(list(year_counts.items()), columns=['Year', 'Count']).sort_values(by=['Year'])
    dfyears.to_excel(writer, sheet_name='Years', index=False)
    
    # Create a sheet summarising the counts by user
    dfuser = pd.DataFrame(list(username_counts.items()), columns=['User', 'Count']).sort_values(by=['Count'], ascending=False)
    
    ### Combines similar username entries to avoid repeated top users
    # Initialize a dictionary to store combined entries and their corresponding values
    combined_entries = {}

    # Create a copy of the column with the original capitalization
    dfuser['User' + '_original'] = dfuser['User']

    # Convert the column to lowercase for case-insensitive comparison
    dfuser['User'] = dfuser['User'].str.lower()

    # Iterate over each entry in the column
    for entry, original_entry, value in zip(dfuser['User'], dfuser['User' + '_original'], dfuser["Count"]):
        # Check if a similar entry already exists in the combined_entries dictionary
        similar_entry = next((key for key in combined_entries if difflib.SequenceMatcher(None, entry, key).ratio() >= 0.9), None)

        if similar_entry:
            # If a similar entry exists, add the value to the existing entry
            combined_entries[similar_entry][1] += value
            print(f"Combined entry: {original_entry} -> {combined_entries[similar_entry][0]}")
        else:
            # If no similar entry exists, create a new entry in the dictionary
            combined_entries[entry] = [original_entry, value]
    
    # Restore the original capitalization in the combined_entries dictionary
    for key, value in combined_entries.items():
        value[0] = dfuser.loc[dfuser['User'] == key, 'User' + '_original'].values[0]
    
    # Create a new dataframe with the combined entries and their summed values
    dfuser = pd.DataFrame(list(combined_entries.values()), columns=['User', "Count"])
    
    dfuser.to_excel(writer, sheet_name='Users', index=False)

    writer.close()
    ###


    # Create overall statistics sheet

    column_map_ovr = {
        'Site': site_name,
        'Root_ID': root_id,
        'Total count': total}
    
    new_row_ov = pd.DataFrame([column_map_ovr])

    # Add the new row to the existing DataFrame
    dfoverall = pd.concat([dfoverall, new_row_ov], ignore_index=True)


    # Create the plot for total counts
    fig, ax = plt.subplots(figsize=(7,4))
    fig.subplots_adjust(right=0.8)
    ax.plot(df['Datetime'], df['Total count'])

    
    time_range = df['Datetime'].max() - df['Datetime'].min()
    
    if time_range == pd.Timedelta(days=0):
        continue
    if time_range < pd.Timedelta(days=365):  # Less than 1 years
        minor_locator = mdates.MonthLocator()
        major_locator = mdates.MonthLocator(bymonth=[1, 3, 5, 7, 9, 11])
        major_formatter = mdates.DateFormatter('%b\n%Y')
    if time_range < pd.Timedelta(days=365*2) and time_range >= pd.Timedelta(days=365):  # Between 1 and 2 years
        minor_locator = mdates.MonthLocator()
        minor_formatter = mdates.DateFormatter('%b')
        major_locator = mdates.MonthLocator(bymonth=[1, 4, 7, 10])
        major_formatter = mdates.DateFormatter('%b\n%Y')
    if time_range >= pd.Timedelta(days=365*2):  # Longer than 2 years
        minor_locator = mdates.MonthLocator()
        major_locator = mdates.MonthLocator(bymonth=[1, 7])
        major_formatter = mdates.DateFormatter('%b\n%Y')
    
    # Set the x-axis tick locators and formatters
    ax.xaxis.set_minor_locator(minor_locator)
    ax.xaxis.set_major_locator(major_locator)
    ax.xaxis.set_major_formatter(major_formatter)
    
    # Add labels and title to the plot
    plt.xlabel('Month')
    plt.ylabel('Cumulative count')
    plt.title('Cumulative submissions since installation')
    
    # Rotate x-axis labels if needed
    #plt.xticks(rotation=45)
    
    
    # Add a table to the plot
    table_data = [
    ["Total number\nof images", total],
    ["Submissions\nper week", round(total * 7 / time_range.days,1)]]
    table = ax.table(cellText=table_data, colWidths = [0.2,0.1],cellLoc='left',loc='right', bbox=[1.05, 0.35, 0.4, 0.4])


    # Adjust table properties
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 3)

        
    cells = table.properties()["celld"]
    for i in range(0,1):
        cells[i, 1]._loc = 'center'

    
    # Adjust the plot layout to accommodate the table
    plt.subplots_adjust(bottom=0.2)
    
    # Save the plot to a temporary file
    plt.savefig(statistics_path + "/total_count_plot.png", bbox_inches = 'tight')#, pad_inches = 0.5)
    plt.close()


    # Create 3 subplots for hours, days and months submissions
    
    # Set up the figure and axes
    fig, axs = plt.subplots(1, 3, figsize=(12, 4))  # Three subplots side by side
    
    # First subplot: Hourly Counts
    axs[0].bar(dfhours["Hour"], dfhours["Count"])
    axs[0].set_xlabel("Time of Day")
    axs[0].set_ylabel("Counts")
    axs[0].set_title("Submissions by\ntime of the day")
    axs[0].set_xticks(np.arange(0, 24, 3))  # Major ticks every hour
    axs[0].set_xticks(np.arange(0, 24, 1), minor=True)  # Minor ticks every 3 hours
    axs[0].set_xticklabels([f"{h:02d}" for h in np.arange(0, 24, 3)])
    
    # Second subplot: Daily Counts
    axs[1].bar(dfdays.index, dfdays["Count"])
    axs[1].set_xlabel("Days of the Week")
    axs[1].set_ylabel("Counts")
    axs[1].set_title("Submissions by\nday of the week")
    axs[1].set_xticks(np.arange(7))
    axs[1].set_xticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
    
    # Third subplot: Monthly Counts
    axs[2].bar(dfmonths.index, dfmonths["Count"])
    axs[2].set_xlabel("Months of the Year")
    axs[2].set_ylabel("Counts")
    axs[2].set_title("Submissions by\nmonth of the year")
    axs[2].set_xticks(np.arange(12))
    axs[2].set_xticklabels(["J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D"])
    
    # Adjust the spacing between the subplots
    plt.tight_layout()
    
    # Save the plot
    plt.savefig(statistics_path + "/hour_day_month_plot.png")
    plt.close()


    # Create plot showing top users

    # Extract data from the DataFrame
    users = dftopuser['User']
    counts = dftopuser['Count']
    
    # Set the size of the plot
    fig, ax = plt.subplots(figsize=(7, 2))
    
    # Create the bar plot
    colors = ['#AD8A56', '#D7D7D7', '#AF9500'] # Define a list of pale pastel colors
    ax.barh(users, counts, color=colors)
    
    # Add labels to the bars
    
    label_offset = 0.02 * max(counts) # Offset for the text labels from the end of the bars 
    for i, user in enumerate(users):
        ax.text(counts[i] - label_offset, i, str(user), ha='right', va='center')
    
    # Set plot title and axis labels
    ax.set_title('Top Users')
    ax.set_xlabel('Count')
    # Remove y-axis ticks
    ax.yaxis.set_ticks([])

    # Adjust subplot parameters to avoid x-axis label cutoff
    plt.subplots_adjust(bottom=0.3)
    
    # Save the plot
    plt.savefig(statistics_path + "/top_user_plot.png")
    plt.close()

    
# Save overall counts in the Images directory
writer = pd.ExcelWriter(os.path.join(parent_directories, "Images")+'\\total_counts.xlsx')

dfoverall.to_excel(writer, index=False)
writer.close()
Added by Fred 2 years ago			`import os`
			`import csv`
			`import pandas as pd`
			`import numpy as np`
			`from datetime import datetime`
			`from pathlib import Path`
			`import matplotlib.pyplot as plt`
			`import matplotlib.dates as mdates`
			`import matplotlib.ticker as ticker`

			`import difflib`



			`# Provide the path to the CSV file in the parent directory`

			`code_dir = str(Path(os.getcwd()).parent)`
			`csv_path = os.path.join(code_dir, "coastsnap_sites.csv")`

			`coastsnap_sites_csv = pd.read_csv(csv_path)`
			`parent_directories = coastsnap_sites_csv.parent_directory[0]`

			`# Extract site names and parent directories`
			`site_names = coastsnap_sites_csv['site_name']`
			`root_id = coastsnap_sites_csv['root_id']`

			`dfoverall = pd.DataFrame(columns=['Site','Root_ID','Total count'])`


			`# Iterate over site names and parent directories`
			`for index, row in coastsnap_sites_csv.iterrows():`
			`site_name = row['site_name']`
			`root_id = row['root_id']`

			`print(site_name)`
			`# Create a dictionary to store photo counts`
			`day_of_week_counts = {}`
			`month_counts = {}`
			`hour_counts = {}`
			`year_counts = {}`
			`username_counts = {}`

			`# Construct the full path to the photo directory`
			`photo_directory = os.path.join(parent_directories, "Images", site_name, "Processed")`

			`df = pd.DataFrame(columns=['Site','Total count','Datetime','Year','Month','Date','Hour','Minute','Second','Day','Month name','User'])`
			`total = -1`

			`# Iterate over the years in the photo directory`
			`for year_directory in os.listdir(photo_directory):`

			`# Check if the folder name is in the 4 number year format`
			`if not year_directory.isdigit() or len(year_directory) != 4:`
			`continue # Skip this folder`

			`# Construct the full path to the year directory`
			`year_path = os.path.join(photo_directory, year_directory)`


			`# Iterate over the files in the year directory`
			`for filename in os.listdir(year_path):`
			`if filename.endswith(".jpg"):`

			`# Check that it isn't the first photo uploaded`
			`total = total + 1`
			`if total == 0:`
			`continue`


			`# Extract information from the filename`
			`filename = filename.replace("_",".")`
			`file_parts = filename.split(".")`
			`username = file_parts[-2]`
			`timestamp = ".".join([file_parts[1],file_parts[2],file_parts[3],file_parts[4],file_parts[5],file_parts[6],file_parts[8]])`

			`# Parse the timestamp`
			`date_format = "%a.%b.%d.%H.%M.%S.%Y"`
			`timestamp_datetime = datetime.strptime(timestamp, date_format)`

			`# Extract relevant information from the timestamp`
			`day_of_week = timestamp_datetime.strftime("%A")`
			`month = timestamp_datetime.strftime("%B")`
			`hour = timestamp_datetime.hour`
			`year = timestamp_datetime.year`

			`# Update photo counts`
			`day_of_week_counts[day_of_week] = day_of_week_counts.get(day_of_week, 0) + 1`
			`month_counts[month] = month_counts.get(month, 0) + 1`
			`hour_counts[hour] = hour_counts.get(hour, 0) + 1`
			`year_counts[year] = year_counts.get(year, 0) + 1`

			`# Update username counts`
			`username_counts[username] = username_counts.get(username, 0) + 1`

			`# Month string to number`
			`monthnum = datetime.strptime(file_parts[2], '%b').month`



			`column_map = {`
			`'Site': file_parts[9],`
			`'Datetime': timestamp_datetime,`
			`'Year': int(file_parts[8]),`
			`'Month': monthnum,`
			`'Date': int(file_parts[3]),`
			`'Hour': int(file_parts[4]),`
			`'Minute': int(file_parts[5]),`
			`'Second': int(file_parts[6]),`
			`'Month name': file_parts[2],`
			`'Day': file_parts[1],`
			`'User': file_parts[11],`
			`'Total count': total`
			`}`

			`new_row = pd.DataFrame([column_map])`

			`# Add the new row to the existing DataFrame`
			`df = pd.concat([df, new_row], ignore_index=True)`

			`if total < 1:`
			`continue`

			`# Determine top three usernames`
			`top_usernames = sorted(username_counts.items(), key=lambda x: x[1], reverse=True)[:3]`
			`dftopuser = pd.DataFrame(top_usernames, columns=['User', 'Count']).sort_values(by=['Count'], ascending=True).reset_index(drop=True)`

			`###`
			`# Save statistics to an excel spreadsheet`
			`###`

			`# Creat Statistics folder in Images directory if it doesn't exist`
			`statistics_path = os.path.join(parent_directories, "Images", site_name, "Statistics")`

			`if not os.path.exists(statistics_path):`
			`os.makedirs(statistics_path)`

			`# Create an Excel writer object to save the statistics`
			`writer = pd.ExcelWriter(statistics_path+'\statistics.xlsx')`

			`# Create a summary sheet which includes all photo data from the site`
			`df.to_excel(writer, sheet_name='Summary', index=False)`

			`catday = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']`
			`catmonth = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']`

			`# Create a sheet summarising the hours of the day counts`
			`dfhours = pd.DataFrame(list(hour_counts.items()), columns=['Hour', 'Count']).sort_values(by=['Hour'])`
			`dfhours.to_excel(writer, sheet_name='Hours', index=False)`

			`#create a sheet summarising the days of the week counts`
			`dfdays = pd.DataFrame(list(day_of_week_counts.items()), columns=['Day', 'Count'])`
			`dfdays = dfdays.groupby(['Day']).sum().reindex(catday)`
			`dfdays.to_excel(writer, sheet_name='Days', index=True)`

			`#Create a sheet summarising the months of the year counts`
			`dfmonths = pd.DataFrame(list(month_counts.items()), columns=['Month', 'Count'])`
			`dfmonths = dfmonths.groupby(['Month']).sum().reindex(catmonth)`
			`dfmonths.to_excel(writer, sheet_name='Months', index=True)`

			`# Create a sheet summarising counts by year`
			`dfyears = pd.DataFrame(list(year_counts.items()), columns=['Year', 'Count']).sort_values(by=['Year'])`
			`dfyears.to_excel(writer, sheet_name='Years', index=False)`

			`# Create a sheet summarising the counts by user`
			`dfuser = pd.DataFrame(list(username_counts.items()), columns=['User', 'Count']).sort_values(by=['Count'], ascending=False)`

			`### Combines similar username entries to avoid repeated top users`
			`# Initialize a dictionary to store combined entries and their corresponding values`
			`combined_entries = {}`

			`# Create a copy of the column with the original capitalization`
			`dfuser['User' + '_original'] = dfuser['User']`

			`# Convert the column to lowercase for case-insensitive comparison`
			`dfuser['User'] = dfuser['User'].str.lower()`

			`# Iterate over each entry in the column`
			`for entry, original_entry, value in zip(dfuser['User'], dfuser['User' + '_original'], dfuser["Count"]):`
			`# Check if a similar entry already exists in the combined_entries dictionary`
			`similar_entry = next((key for key in combined_entries if difflib.SequenceMatcher(None, entry, key).ratio() >= 0.9), None)`

			`if similar_entry:`
			`# If a similar entry exists, add the value to the existing entry`
			`combined_entries[similar_entry][1] += value`
			`print(f"Combined entry: {original_entry} -> {combined_entries[similar_entry][0]}")`
			`else:`
			`# If no similar entry exists, create a new entry in the dictionary`
			`combined_entries[entry] = [original_entry, value]`

			`# Restore the original capitalization in the combined_entries dictionary`
			`for key, value in combined_entries.items():`
			`value[0] = dfuser.loc[dfuser['User'] == key, 'User' + '_original'].values[0]`

			`# Create a new dataframe with the combined entries and their summed values`
			`dfuser = pd.DataFrame(list(combined_entries.values()), columns=['User', "Count"])`

			`dfuser.to_excel(writer, sheet_name='Users', index=False)`

			`writer.close()`
			`###`


			`# Create overall statistics sheet`

			`column_map_ovr = {`
			`'Site': site_name,`
			`'Root_ID': root_id,`
			`'Total count': total}`

			`new_row_ov = pd.DataFrame([column_map_ovr])`

			`# Add the new row to the existing DataFrame`
			`dfoverall = pd.concat([dfoverall, new_row_ov], ignore_index=True)`



			`# Create the plot for total counts`
			`fig, ax = plt.subplots(figsize=(7,4))`
			`fig.subplots_adjust(right=0.8)`
			`ax.plot(df['Datetime'], df['Total count'])`


			`time_range = df['Datetime'].max() - df['Datetime'].min()`

			`if time_range == pd.Timedelta(days=0):`
			`continue`
			`if time_range < pd.Timedelta(days=365): # Less than 1 years`
			`minor_locator = mdates.MonthLocator()`
			`major_locator = mdates.MonthLocator(bymonth=[1, 3, 5, 7, 9, 11])`
			`major_formatter = mdates.DateFormatter('%b\n%Y')`
			`if time_range < pd.Timedelta(days=365*2) and time_range >= pd.Timedelta(days=365): # Between 1 and 2 years`
			`minor_locator = mdates.MonthLocator()`
			`minor_formatter = mdates.DateFormatter('%b')`
			`major_locator = mdates.MonthLocator(bymonth=[1, 4, 7, 10])`
			`major_formatter = mdates.DateFormatter('%b\n%Y')`
			`if time_range >= pd.Timedelta(days=365*2): # Longer than 2 years`
			`minor_locator = mdates.MonthLocator()`
			`major_locator = mdates.MonthLocator(bymonth=[1, 7])`
			`major_formatter = mdates.DateFormatter('%b\n%Y')`

			`# Set the x-axis tick locators and formatters`
			`ax.xaxis.set_minor_locator(minor_locator)`
			`ax.xaxis.set_major_locator(major_locator)`
			`ax.xaxis.set_major_formatter(major_formatter)`

			`# Add labels and title to the plot`
			`plt.xlabel('Month')`
			`plt.ylabel('Cumulative count')`
			`plt.title('Cumulative submissions since installation')`

			`# Rotate x-axis labels if needed`
			`#plt.xticks(rotation=45)`


			`# Add a table to the plot`
			`table_data = [`
			`["Total number\nof images", total],`
			`["Submissions\nper week", round(total * 7 / time_range.days,1)]]`
			`table = ax.table(cellText=table_data, colWidths = [0.2,0.1],cellLoc='left',loc='right', bbox=[1.05, 0.35, 0.4, 0.4])`


			`# Adjust table properties`
			`table.auto_set_font_size(False)`
			`table.set_fontsize(10)`
			`table.scale(1, 3)`


			`cells = table.properties()["celld"]`
			`for i in range(0,1):`
			`cells[i, 1]._loc = 'center'`


			`# Adjust the plot layout to accommodate the table`
			`plt.subplots_adjust(bottom=0.2)`

			`# Save the plot to a temporary file`
			`plt.savefig(statistics_path + "/total_count_plot.png", bbox_inches = 'tight')#, pad_inches = 0.5)`
			`plt.close()`


			`# Create 3 subplots for hours, days and months submissions`

			`# Set up the figure and axes`
			`fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # Three subplots side by side`

			`# First subplot: Hourly Counts`
			`axs[0].bar(dfhours["Hour"], dfhours["Count"])`
			`axs[0].set_xlabel("Time of Day")`
			`axs[0].set_ylabel("Counts")`
			`axs[0].set_title("Submissions by\ntime of the day")`
			`axs[0].set_xticks(np.arange(0, 24, 3)) # Major ticks every hour`
			`axs[0].set_xticks(np.arange(0, 24, 1), minor=True) # Minor ticks every 3 hours`
			`axs[0].set_xticklabels([f"{h:02d}" for h in np.arange(0, 24, 3)])`

			`# Second subplot: Daily Counts`
			`axs[1].bar(dfdays.index, dfdays["Count"])`
			`axs[1].set_xlabel("Days of the Week")`
			`axs[1].set_ylabel("Counts")`
			`axs[1].set_title("Submissions by\nday of the week")`
			`axs[1].set_xticks(np.arange(7))`
			`axs[1].set_xticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])`

			`# Third subplot: Monthly Counts`
			`axs[2].bar(dfmonths.index, dfmonths["Count"])`
			`axs[2].set_xlabel("Months of the Year")`
			`axs[2].set_ylabel("Counts")`
			`axs[2].set_title("Submissions by\nmonth of the year")`
			`axs[2].set_xticks(np.arange(12))`
			`axs[2].set_xticklabels(["J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D"])`

			`# Adjust the spacing between the subplots`
			`plt.tight_layout()`

			`# Save the plot`
			`plt.savefig(statistics_path + "/hour_day_month_plot.png")`
			`plt.close()`


			`# Create plot showing top users`

			`# Extract data from the DataFrame`
			`users = dftopuser['User']`
			`counts = dftopuser['Count']`

			`# Set the size of the plot`
			`fig, ax = plt.subplots(figsize=(7, 2))`

			`# Create the bar plot`
			`colors = ['#AD8A56', '#D7D7D7', '#AF9500'] # Define a list of pale pastel colors`
			`ax.barh(users, counts, color=colors)`

			`# Add labels to the bars`

			`label_offset = 0.02 * max(counts) # Offset for the text labels from the end of the bars`
			`for i, user in enumerate(users):`
			`ax.text(counts[i] - label_offset, i, str(user), ha='right', va='center')`

			`# Set plot title and axis labels`
			`ax.set_title('Top Users')`
			`ax.set_xlabel('Count')`
			`# Remove y-axis ticks`
			`ax.yaxis.set_ticks([])`

			`# Adjust subplot parameters to avoid x-axis label cutoff`
			`plt.subplots_adjust(bottom=0.3)`

			`# Save the plot`
			`plt.savefig(statistics_path + "/top_user_plot.png")`
			`plt.close()`


			`# Save overall counts in the Images directory`
			`writer = pd.ExcelWriter(os.path.join(parent_directories, "Images")+'\\total_counts.xlsx')`

			`dfoverall.to_excel(writer, index=False)`
			`writer.close()`