From dda7cc3b8027b759d28035ccc28f5ffcaaa083d8 Mon Sep 17 00:00:00 2001 From: Peter Stockings Date: Sat, 13 Jan 2024 22:33:16 +1100 Subject: [PATCH] Optimise count_prs_over_time function to result in 14.2X improvement in speed which will hopefully remove lag on page requests --- utils.py | 61 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/utils.py b/utils.py index 43b7b3b..9a3c3a4 100644 --- a/utils.py +++ b/utils.py @@ -344,47 +344,52 @@ def get_workout_counts(workouts, period='week'): return workout_counts def count_prs_over_time(workouts, period='week'): - # Convert to DataFrame df = pd.DataFrame(workouts) # Convert 'StartDate' to datetime df['StartDate'] = pd.to_datetime(df['StartDate']) - # Determine the range of periods to cover - min_date = df['StartDate'].min() - max_date = pd.Timestamp(datetime.now()) + # Set period as week or month + df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M') - # Generate a complete range of periods - period_range = pd.date_range(start=min_date, end=max_date, freq='W-MON' if period == 'week' else 'MS') + # Group by Person, Exercise, and Period to find max Estimated1RM in each period + period_max = df.groupby(['PersonId', 'ExerciseId', 'Period'])['Estimated1RM'].max().reset_index() - # Initialize a dictionary to store PR counts and names - pr_counts = { - person_id: { - "PersonName": person_name, - "PRCounts": {p: 0 for p in period_range} - } for person_id, person_name in df[['PersonId', 'PersonName']].drop_duplicates().values - } + # Determine all-time max Estimated1RM up to the start of each period + period_max['AllTimeMax'] = period_max.groupby(['PersonId', 'ExerciseId'])['Estimated1RM'].cummax().shift(1) - # Process the workouts - for person_id, person_data in pr_counts.items(): - person_df = df[df['PersonId'] == person_id] + # Identify PRs as entries where the period's max Estimated1RM exceeds the all-time max + period_max['IsPR'] = period_max['Estimated1RM'] > period_max['AllTimeMax'] - for period_start in person_data["PRCounts"]: - period_end = period_start + pd.DateOffset(weeks=1) if period == 'week' else period_start + pd.DateOffset(months=1) - period_workouts = person_df[(person_df['StartDate'] >= period_start) & (person_df['StartDate'] < period_end)] + # Count PRs in each period for each person + pr_counts = period_max.groupby(['PersonId', 'Period'])['IsPR'].sum().reset_index() - for exercise_id in period_workouts['ExerciseId'].unique(): - exercise_max = period_workouts[period_workouts['ExerciseId'] == exercise_id]['Estimated1RM'].max() + # Convert 'Period' to timestamp using the start date of the period + pr_counts['Period'] = pr_counts['Period'].apply(lambda x: x.start_time) - # Check if this is a PR - previous_max = person_df[(person_df['StartDate'] < period_start) & - (person_df['ExerciseId'] == exercise_id)]['Estimated1RM'].max() - - if pd.isna(previous_max) or exercise_max > previous_max: - person_data["PRCounts"][period_start] += 1 + # Pivot table to get the desired output format + output = pr_counts.pivot(index='PersonId', columns='Period', values='IsPR').fillna(0) - return pr_counts + # Convert only the PR count columns to integers + for col in output.columns: + output[col] = output[col].astype(int) + # Merge with names and convert to desired format + names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId') + output = names.join(output, how='left').fillna(0) + + # Reset the index to bring 'PersonId' back as a column + output.reset_index(inplace=True) + + # Convert to the final dictionary format with PRCounts nested + result = {} + for index, row in output.iterrows(): + person_id = row['PersonId'] + person_name = row['PersonName'] + pr_counts = row.drop(['PersonId', 'PersonName']).to_dict() + result[person_id] = {"PersonName": person_name, "PRCounts": pr_counts} + + return result def get_weekly_pr_graph_model(title, weekly_pr_data): # Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...}