Optimise count_prs_over_time function to result in 14.2X improvement in speed which will hopefully remove lag on page requests
This commit is contained in:
61
utils.py
61
utils.py
@@ -344,47 +344,52 @@ def get_workout_counts(workouts, period='week'):
|
|||||||
return workout_counts
|
return workout_counts
|
||||||
|
|
||||||
def count_prs_over_time(workouts, period='week'):
|
def count_prs_over_time(workouts, period='week'):
|
||||||
# Convert to DataFrame
|
|
||||||
df = pd.DataFrame(workouts)
|
df = pd.DataFrame(workouts)
|
||||||
|
|
||||||
# Convert 'StartDate' to datetime
|
# Convert 'StartDate' to datetime
|
||||||
df['StartDate'] = pd.to_datetime(df['StartDate'])
|
df['StartDate'] = pd.to_datetime(df['StartDate'])
|
||||||
|
|
||||||
# Determine the range of periods to cover
|
# Set period as week or month
|
||||||
min_date = df['StartDate'].min()
|
df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M')
|
||||||
max_date = pd.Timestamp(datetime.now())
|
|
||||||
|
|
||||||
# Generate a complete range of periods
|
# Group by Person, Exercise, and Period to find max Estimated1RM in each period
|
||||||
period_range = pd.date_range(start=min_date, end=max_date, freq='W-MON' if period == 'week' else 'MS')
|
period_max = df.groupby(['PersonId', 'ExerciseId', 'Period'])['Estimated1RM'].max().reset_index()
|
||||||
|
|
||||||
# Initialize a dictionary to store PR counts and names
|
# Determine all-time max Estimated1RM up to the start of each period
|
||||||
pr_counts = {
|
period_max['AllTimeMax'] = period_max.groupby(['PersonId', 'ExerciseId'])['Estimated1RM'].cummax().shift(1)
|
||||||
person_id: {
|
|
||||||
"PersonName": person_name,
|
|
||||||
"PRCounts": {p: 0 for p in period_range}
|
|
||||||
} for person_id, person_name in df[['PersonId', 'PersonName']].drop_duplicates().values
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process the workouts
|
# Identify PRs as entries where the period's max Estimated1RM exceeds the all-time max
|
||||||
for person_id, person_data in pr_counts.items():
|
period_max['IsPR'] = period_max['Estimated1RM'] > period_max['AllTimeMax']
|
||||||
person_df = df[df['PersonId'] == person_id]
|
|
||||||
|
|
||||||
for period_start in person_data["PRCounts"]:
|
# Count PRs in each period for each person
|
||||||
period_end = period_start + pd.DateOffset(weeks=1) if period == 'week' else period_start + pd.DateOffset(months=1)
|
pr_counts = period_max.groupby(['PersonId', 'Period'])['IsPR'].sum().reset_index()
|
||||||
period_workouts = person_df[(person_df['StartDate'] >= period_start) & (person_df['StartDate'] < period_end)]
|
|
||||||
|
|
||||||
for exercise_id in period_workouts['ExerciseId'].unique():
|
# Convert 'Period' to timestamp using the start date of the period
|
||||||
exercise_max = period_workouts[period_workouts['ExerciseId'] == exercise_id]['Estimated1RM'].max()
|
pr_counts['Period'] = pr_counts['Period'].apply(lambda x: x.start_time)
|
||||||
|
|
||||||
# Check if this is a PR
|
# Pivot table to get the desired output format
|
||||||
previous_max = person_df[(person_df['StartDate'] < period_start) &
|
output = pr_counts.pivot(index='PersonId', columns='Period', values='IsPR').fillna(0)
|
||||||
(person_df['ExerciseId'] == exercise_id)]['Estimated1RM'].max()
|
|
||||||
|
|
||||||
if pd.isna(previous_max) or exercise_max > previous_max:
|
|
||||||
person_data["PRCounts"][period_start] += 1
|
|
||||||
|
|
||||||
return pr_counts
|
# Convert only the PR count columns to integers
|
||||||
|
for col in output.columns:
|
||||||
|
output[col] = output[col].astype(int)
|
||||||
|
|
||||||
|
# Merge with names and convert to desired format
|
||||||
|
names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId')
|
||||||
|
output = names.join(output, how='left').fillna(0)
|
||||||
|
|
||||||
|
# Reset the index to bring 'PersonId' back as a column
|
||||||
|
output.reset_index(inplace=True)
|
||||||
|
|
||||||
|
# Convert to the final dictionary format with PRCounts nested
|
||||||
|
result = {}
|
||||||
|
for index, row in output.iterrows():
|
||||||
|
person_id = row['PersonId']
|
||||||
|
person_name = row['PersonName']
|
||||||
|
pr_counts = row.drop(['PersonId', 'PersonName']).to_dict()
|
||||||
|
result[person_id] = {"PersonName": person_name, "PRCounts": pr_counts}
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def get_weekly_pr_graph_model(title, weekly_pr_data):
|
def get_weekly_pr_graph_model(title, weekly_pr_data):
|
||||||
# Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...}
|
# Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...}
|
||||||
|
|||||||
Reference in New Issue
Block a user