refactor(sql_explorer): Replace Plotly with SVG rendering for plots

Replaces the Plotly-based graph generation in the SQL Explorer with direct SVG rendering within an HTML template, similar to the exercise progress sparklines. - Modifies `routes/sql_explorer.py` endpoints (`plot_query`, `plot_unsaved_query`) to fetch raw data instead of using pandas/Plotly. - Adds `utils.prepare_svg_plot_data` to process raw SQL results, determine plot type (scatter, line, bar, table), normalize data, and prepare it for SVG. - Creates `templates/partials/sql_explorer/svg_plot.html` to render the SVG plot with axes, ticks, labels, and basic tooltips. - Removes the `generate_plot` function's usage for SQL Explorer and the direct dependency on Plotly for this feature.
2025-04-15 19:34:26 +10:00
parent 51ec18c461
commit e947feb3e3
6 changed files with 636 additions and 152 deletions
--- a/features/people_graphs.py
+++ b/features/people_graphs.py
@@ -1,39 +1,36 @@
 import pandas as pd
-from utils import get_distinct_colors
-
+from utils import get_distinct_colors, calculate_estimated_1rm

 class PeopleGraphs:
    def __init__(self, db_connection_method):
        self.execute = db_connection_method

    def get(self, selected_people_ids=None, min_date=None, max_date=None, selected_exercise_ids=None):
-        # Base query
+        """
+        Fetch workout topsets, calculate Estimated1RM in Python,
+        then generate weekly workout & PR graphs.
+        """
+        # Build query (no in-SQL 1RM calculation).
        query = """
        SELECT
-            P.person_id AS "PersonId",
-            P.name AS "PersonName",
-            W.workout_id AS "WorkoutId",
-            W.start_date AS "StartDate",
-            T.topset_id AS "TopSetId",
+            P.person_id   AS "PersonId",
+            P.name        AS "PersonName",
+            W.workout_id  AS "WorkoutId",
+            W.start_date  AS "StartDate",
+            T.topset_id   AS "TopSetId",
            E.exercise_id AS "ExerciseId",
-            E.name AS "ExerciseName",
+            E.name        AS "ExerciseName",
            T.repetitions AS "Repetitions",
-            T.weight AS "Weight",
-            round((100 * T.Weight::numeric::integer)/(101.3-2.67123 * T.Repetitions),0)::numeric::integer AS "Estimated1RM"
+            T.weight      AS "Weight"
        FROM Person P
-            LEFT JOIN Workout W ON P.person_id = W.person_id
-            LEFT JOIN TopSet T ON W.workout_id = T.workout_id
-            LEFT JOIN Exercise E ON T.exercise_id = E.exercise_id
+        LEFT JOIN Workout W ON P.person_id = W.person_id
+        LEFT JOIN TopSet T ON W.workout_id = T.workout_id
+        LEFT JOIN Exercise E ON T.exercise_id = E.exercise_id
        WHERE TRUE
        """
-
-        # Parameters for the query
        params = []
-
-        # Add optional filters
        if selected_people_ids:
-            placeholders = ", ".join(["%s"] * len(selected_people_ids))
-            query += f" AND P.person_id IN ({placeholders})"
+            query += f" AND P.person_id IN ({', '.join(['%s'] * len(selected_people_ids))})"
            params.extend(selected_people_ids)
        if min_date:
            query += " AND W.start_date >= %s"
@@ -42,143 +39,233 @@ class PeopleGraphs:
            query += " AND W.start_date <= %s"
            params.append(max_date)
        if selected_exercise_ids:
-            placeholders = ", ".join(["%s"] * len(selected_exercise_ids))
-            query += f" AND E.exercise_id IN ({placeholders})"
+            query += f" AND E.exercise_id IN ({', '.join(['%s'] * len(selected_exercise_ids))})"
            params.extend(selected_exercise_ids)

-            # Execute the query
-        topsets = self.execute(query, params)
+        # Execute and convert to DataFrame
+        raw_data = self.execute(query, params)
+        if not raw_data:
+            # Return empty graphs if no data at all
+            return [
+                self.get_graph_model("Workouts per week", {}),
+                self.get_graph_model("PRs per week", {})
+            ]

-        # Generate graphs
-        weekly_counts = self.get_workout_counts(topsets, 'week')
-        weekly_pr_counts = self.count_prs_over_time(topsets, 'week')
+        df = pd.DataFrame(raw_data)

-        graphs = [self.get_weekly_pr_graph_model('Workouts per week', weekly_counts), self.get_weekly_pr_graph_model('PRs per week', weekly_pr_counts)]
-        return graphs
+        # Calculate Estimated1RM in Python
+        df['Estimated1RM'] = df.apply(
+            lambda row: calculate_estimated_1rm(row["Weight"], row["Repetitions"]), axis=1
+        )

-    def get_weekly_pr_graph_model(self, title, weekly_pr_data):
-        # Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...}
+        # Build the weekly data models
+        weekly_counts   = self.get_workout_counts(df, period='week')
+        weekly_pr_counts = self.count_prs_over_time(df, period='week')

-        # Find the overall date range for all users
-        all_dates = [date for user_data in weekly_pr_data.values() for date in user_data["PRCounts"].keys()]
-        min_date, max_date = min(all_dates), max(all_dates)
-        total_span = (max_date - min_date).days or 1
-        relative_positions = [(date - min_date).days / total_span for date in all_dates]
+        return [
+            self.get_graph_model("Workouts per week", weekly_counts),
+            self.get_graph_model("PRs per week", weekly_pr_counts)
+        ]

-        # Calculate viewBox dimensions
-        max_value = max(max(user_data["PRCounts"].values()) for user_data in weekly_pr_data.values()) or 1
-        min_value = 0
-        value_range = max_value - min_value
-        vb_width = 200
-        vb_height= 75
+    def _prepare_period_column(self, df, period='week'):
+        """
+        Convert StartDate to datetime and add a Period column 
+        based on 'week' or 'month' as needed.
+        """
+        df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
+        freq = 'W' if period == 'week' else 'M'
+        df['Period'] = df['StartDate'].dt.to_period(freq)
+        return df

-        plots = []
-        colors = get_distinct_colors(len(weekly_pr_data.items()))
-        for count, (user_id, user_data) in enumerate(weekly_pr_data.items()):
-            pr_counts = user_data["PRCounts"]
-            person_name = user_data["PersonName"]
+    def get_workout_counts(self, df, period='week'):
+        """
+        Returns a dictionary:
+          {
+            person_id: {
+              'PersonName': 'Alice',
+              'PRCounts': {
+                Timestamp('2023-01-02'): 2,
+                ...
+              }
+            },
+            ...
+          }
+        representing how many workouts each person performed per time period.
+        """
+        # Make a copy and prepare Period column
+        df = self._prepare_period_column(df.copy(), period)

-            values = pr_counts.values()
+        # Count unique workouts per (PersonId, PersonName, Period)
+        grp = (
+            df.groupby(['PersonId', 'PersonName', 'Period'], as_index=False)['WorkoutId']
+              .nunique()
+              .rename(columns={'WorkoutId': 'Count'})
+        )
+        # Convert each Period to its start time
+        grp['Period'] = grp['Period'].apply(lambda p: p.start_time)

-            values_scaled = [((value - min_value) / value_range) * vb_height for value in values]
-            plot_points = list(zip(values_scaled, relative_positions))
-            messages = [f'{value} for {person_name} at {date.strftime("%d %b %y")}' for value, date in zip(values, pr_counts.keys())]
-            plot_labels = zip(values_scaled, relative_positions, messages)
+        return self._pivot_to_graph_dict(
+            grp,
+            index_col='PersonId',
+            name_col='PersonName',
+            period_col='Period',
+            value_col='Count'
+        )

-            # Create a plot for each user
-            plot = {
-                'label': person_name,  # Use PersonName instead of User ID
-                'color': colors[count],
-                'points': plot_points,
-                'plot_labels': plot_labels
+    def count_prs_over_time(self, df, period='week'):
+        """
+        Returns a dictionary:
+          {
+            person_id: {
+              'PersonName': 'Alice',
+              'PRCounts': {
+                Timestamp('2023-01-02'): 1,
+                ...
+              }
+            },
+            ...
+          }
+        representing how many PRs each person hit per time period.
+        """
+        # Make a copy and prepare Period column
+        df = self._prepare_period_column(df.copy(), period)
+
+        # Max 1RM per (Person, Exercise, Period)
+        grouped = (
+            df.groupby(['PersonId', 'PersonName', 'ExerciseId', 'Period'], as_index=False)['Estimated1RM']
+              .max()
+              .rename(columns={'Estimated1RM': 'PeriodMax'})
+        )
+
+        # Sort so we can track "all-time max" up to that row
+        grouped.sort_values(by=['PersonId', 'ExerciseId', 'Period'], inplace=True)
+
+        # For each person & exercise, track the cumulative max (shifted by 1)
+        grouped['AllTimeMax'] = grouped.groupby(['PersonId', 'ExerciseId'])['PeriodMax'].cummax().shift(1)
+        grouped['IsPR'] = (grouped['PeriodMax'] > grouped['AllTimeMax']).astype(int)
+
+        # Sum PRs across exercises for (Person, Period)
+        pr_counts = (
+            grouped.groupby(['PersonId', 'PersonName', 'Period'], as_index=False)['IsPR']
+                   .sum()
+                   .rename(columns={'IsPR': 'Count'})
+        )
+        pr_counts['Period'] = pr_counts['Period'].apply(lambda p: p.start_time)
+
+        return self._pivot_to_graph_dict(
+            pr_counts,
+            index_col='PersonId',
+            name_col='PersonName',
+            period_col='Period',
+            value_col='Count'
+        )
+
+    def _pivot_to_graph_dict(self, df, index_col, name_col, period_col, value_col):
+        """
+        Convert [index_col, name_col, period_col, value_col]
+        into a nested dictionary for plotting:
+           {
+             person_id: {
+               'PersonName': <...>,
+               'PRCounts': {
+                 <timestamp>: <value>,
+                 ...
+               }
+             },
+             ...
+           }
+        """
+        if df.empty:
+            return {}
+
+        pivoted = df.pivot(
+            index=[index_col, name_col],
+            columns=period_col,
+            values=value_col
+        ).fillna(0)
+
+        pivoted.reset_index(inplace=True)
+
+        result = {}
+        for _, row in pivoted.iterrows():
+            pid = row[index_col]
+            pname = row[name_col]
+            # Remaining columns = date -> count
+            period_counts = row.drop([index_col, name_col]).to_dict()
+            result[pid] = {
+                'PersonName': pname,
+                'PRCounts': period_counts
            }
-            plots.append(plot)

-        # Return workout data with SVG dimensions and data points
+        return result
+
+    def get_graph_model(self, title, data_dict):
+        """
+        Builds a line-graph model from a dictionary of the form:
+           {
+             person_id: {
+               'PersonName': 'Alice',
+               'PRCounts': {
+                 Timestamp('2023-01-02'): 2,
+                 Timestamp('2023-01-09'): 1,
+                 ...
+               }
+             },
+             ...
+           }
+        """
+        if not data_dict:
+            return {
+                'title': title,
+                'vb_width': 200,
+                'vb_height': 75,
+                'plots': []
+            }
+
+        # Gather all dates & values
+        all_dates  = []
+        all_values = []
+        for user_data in data_dict.values():
+            all_dates.extend(user_data['PRCounts'].keys())
+            all_values.extend(user_data['PRCounts'].values())
+
+        min_date = min(all_dates)
+        max_date = max(all_dates)
+        date_span = max((max_date - min_date).days, 1)
+
+        max_val = max(all_values)
+        min_val = 0
+        val_range = max_val - min_val if max_val != min_val else 1
+
+        vb_width, vb_height = 200, 75
+        colors = get_distinct_colors(len(data_dict))
+        plots = []
+
+        for i, (pid, user_data) in enumerate(data_dict.items()):
+            name = user_data['PersonName']
+            pr_counts = user_data['PRCounts']
+            # Sort by date so points are in chronological order
+            sorted_pr = sorted(pr_counts.items(), key=lambda x: x[0])
+
+            points = []
+            labels = []
+            for d, val in sorted_pr:
+                # Scale x,y to fit [0..1], then we multiply y by vb_height
+                x = (d - min_date).days / date_span
+                y = (val - min_val) / val_range * vb_height
+                points.append((y, x))
+                labels.append((y, x, f'{val} for {name} at {d.strftime("%d %b %y")}'))
+
+            plots.append({
+                'label': name,
+                'color': colors[i],
+                'points': points,
+                'plot_labels': labels
+            })
+
        return {
            'title': title,
            'vb_width': vb_width,
            'vb_height': vb_height,
            'plots': plots
        }
-    
-    def get_workout_counts(self, workouts, period='week'):
-        df = pd.DataFrame(workouts)
-        
-        # Convert 'StartDate' to datetime and set period
-        df['StartDate'] = pd.to_datetime(df['StartDate'])
-        df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M')
-
-        # Group by PersonId, Period and count unique workouts
-        workout_counts = df.groupby(['PersonId', 'Period'])['WorkoutId'].nunique().reset_index()
-
-        # Convert 'Period' to timestamp using the start date of the period
-        workout_counts['Period'] = workout_counts['Period'].apply(lambda x: x.start_time)
-
-        # Pivot the result to get periods as columns
-        workout_counts_pivot = workout_counts.pivot(index='PersonId', columns='Period', values='WorkoutId').fillna(0)
-
-        # Include person names
-        names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId')
-        workout_counts_final = names.join(workout_counts_pivot, how='left').fillna(0)
-
-        # Convert DataFrame to dictionary
-        result = workout_counts_final.reset_index().to_dict('records')
-
-        # Reformat the dictionary to desired structure
-        formatted_result = {}
-        for record in result:
-            person_id = record.pop('PersonId')
-            person_name = record.pop('PersonName')
-            pr_counts = {k: v for k, v in record.items()}
-            formatted_result[person_id] = {'PersonName': person_name, 'PRCounts': pr_counts}
-
-        return formatted_result
-
-    def count_prs_over_time(self, workouts, period='week'):
-        df = pd.DataFrame(workouts)
-        
-        # Convert 'StartDate' to datetime
-        df['StartDate'] = pd.to_datetime(df['StartDate'])
-
-        # Set period as week or month
-        df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M')
-
-        # Group by Person, Exercise, and Period to find max Estimated1RM in each period
-        period_max = df.groupby(['PersonId', 'ExerciseId', 'Period'])['Estimated1RM'].max().reset_index()
-
-        # Determine all-time max Estimated1RM up to the start of each period
-        period_max['AllTimeMax'] = period_max.groupby(['PersonId', 'ExerciseId'])['Estimated1RM'].cummax().shift(1)
-
-        # Identify PRs as entries where the period's max Estimated1RM exceeds the all-time max
-        period_max['IsPR'] = period_max['Estimated1RM'] > period_max['AllTimeMax']
-
-        # Count PRs in each period for each person
-        pr_counts = period_max.groupby(['PersonId', 'Period'])['IsPR'].sum().reset_index()
-
-        # Convert 'Period' to timestamp using the start date of the period
-        pr_counts['Period'] = pr_counts['Period'].apply(lambda x: x.start_time)
-
-        # Pivot table to get the desired output format
-        output = pr_counts.pivot(index='PersonId', columns='Period', values='IsPR').fillna(0)
-
-        # Convert only the PR count columns to integers
-        for col in output.columns:
-            output[col] = output[col].astype(int)
-
-        # Merge with names and convert to desired format
-        names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId')
-        output = names.join(output, how='left').fillna(0)
-
-        # Reset the index to bring 'PersonId' back as a column
-        output.reset_index(inplace=True)
-
-        # Convert to the final dictionary format with PRCounts nested
-        result = {}
-        for index, row in output.iterrows():
-            person_id = row['PersonId']
-            person_name = row['PersonName']
-            pr_counts = row.drop(['PersonId', 'PersonName']).to_dict()
-            result[person_id] = {"PersonName": person_name, "PRCounts": pr_counts}
-
-        return result