|
|
|
@ -86,15 +86,16 @@ def main(xml_name):
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(revisions)
|
|
|
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
|
|
|
df = df.set_index('date').sort_index()
|
|
|
|
|
df.index = pd.to_datetime(df['date'])
|
|
|
|
|
|
|
|
|
|
# Convert to local time, and round to nearest day
|
|
|
|
|
df.index = df.index.tz_convert('Australia/Sydney')
|
|
|
|
|
df.index = df.index.round('d')
|
|
|
|
|
df = df.tz_convert('Australia/Sydney')
|
|
|
|
|
df = df.sort_index()
|
|
|
|
|
df['date'] = df.index.date
|
|
|
|
|
|
|
|
|
|
# Drop duplicate entries (ignore edits for same user/same page/same day)
|
|
|
|
|
df = df.drop_duplicates()
|
|
|
|
|
df = df.drop(columns=['date'])
|
|
|
|
|
|
|
|
|
|
# Only include non-academic users
|
|
|
|
|
df = df[df['user'].isin(PROJECTS_TEAM)]
|
|
|
|
|