From e947feb3e3f313a7e27316d13f3a567a0c01404a Mon Sep 17 00:00:00 2001 From: Peter Stockings Date: Tue, 15 Apr 2025 19:34:26 +1000 Subject: [PATCH] refactor(sql_explorer): Replace Plotly with SVG rendering for plots Replaces the Plotly-based graph generation in the SQL Explorer with direct SVG rendering within an HTML template, similar to the exercise progress sparklines. - Modifies `routes/sql_explorer.py` endpoints (`plot_query`, `plot_unsaved_query`) to fetch raw data instead of using pandas/Plotly. - Adds `utils.prepare_svg_plot_data` to process raw SQL results, determine plot type (scatter, line, bar, table), normalize data, and prepare it for SVG. - Creates `templates/partials/sql_explorer/svg_plot.html` to render the SVG plot with axes, ticks, labels, and basic tooltips. - Removes the `generate_plot` function's usage for SQL Explorer and the direct dependency on Plotly for this feature. --- features/people_graphs.py | 369 +++++++++++------- routes/sql_explorer.py | 46 ++- templates/base.html | 1 - templates/changelog/changelog.html | 17 + templates/partials/sql_explorer/svg_plot.html | 125 ++++++ utils.py | 230 ++++++++++- 6 files changed, 636 insertions(+), 152 deletions(-) create mode 100644 templates/partials/sql_explorer/svg_plot.html diff --git a/features/people_graphs.py b/features/people_graphs.py index 12e32f0..054df55 100644 --- a/features/people_graphs.py +++ b/features/people_graphs.py @@ -1,39 +1,36 @@ import pandas as pd -from utils import get_distinct_colors - +from utils import get_distinct_colors, calculate_estimated_1rm class PeopleGraphs: def __init__(self, db_connection_method): self.execute = db_connection_method def get(self, selected_people_ids=None, min_date=None, max_date=None, selected_exercise_ids=None): - # Base query + """ + Fetch workout topsets, calculate Estimated1RM in Python, + then generate weekly workout & PR graphs. + """ + # Build query (no in-SQL 1RM calculation). query = """ SELECT - P.person_id AS "PersonId", - P.name AS "PersonName", - W.workout_id AS "WorkoutId", - W.start_date AS "StartDate", - T.topset_id AS "TopSetId", + P.person_id AS "PersonId", + P.name AS "PersonName", + W.workout_id AS "WorkoutId", + W.start_date AS "StartDate", + T.topset_id AS "TopSetId", E.exercise_id AS "ExerciseId", - E.name AS "ExerciseName", + E.name AS "ExerciseName", T.repetitions AS "Repetitions", - T.weight AS "Weight", - round((100 * T.Weight::numeric::integer)/(101.3-2.67123 * T.Repetitions),0)::numeric::integer AS "Estimated1RM" + T.weight AS "Weight" FROM Person P - LEFT JOIN Workout W ON P.person_id = W.person_id - LEFT JOIN TopSet T ON W.workout_id = T.workout_id - LEFT JOIN Exercise E ON T.exercise_id = E.exercise_id + LEFT JOIN Workout W ON P.person_id = W.person_id + LEFT JOIN TopSet T ON W.workout_id = T.workout_id + LEFT JOIN Exercise E ON T.exercise_id = E.exercise_id WHERE TRUE """ - - # Parameters for the query params = [] - - # Add optional filters if selected_people_ids: - placeholders = ", ".join(["%s"] * len(selected_people_ids)) - query += f" AND P.person_id IN ({placeholders})" + query += f" AND P.person_id IN ({', '.join(['%s'] * len(selected_people_ids))})" params.extend(selected_people_ids) if min_date: query += " AND W.start_date >= %s" @@ -42,143 +39,233 @@ class PeopleGraphs: query += " AND W.start_date <= %s" params.append(max_date) if selected_exercise_ids: - placeholders = ", ".join(["%s"] * len(selected_exercise_ids)) - query += f" AND E.exercise_id IN ({placeholders})" + query += f" AND E.exercise_id IN ({', '.join(['%s'] * len(selected_exercise_ids))})" params.extend(selected_exercise_ids) - # Execute the query - topsets = self.execute(query, params) + # Execute and convert to DataFrame + raw_data = self.execute(query, params) + if not raw_data: + # Return empty graphs if no data at all + return [ + self.get_graph_model("Workouts per week", {}), + self.get_graph_model("PRs per week", {}) + ] - # Generate graphs - weekly_counts = self.get_workout_counts(topsets, 'week') - weekly_pr_counts = self.count_prs_over_time(topsets, 'week') + df = pd.DataFrame(raw_data) - graphs = [self.get_weekly_pr_graph_model('Workouts per week', weekly_counts), self.get_weekly_pr_graph_model('PRs per week', weekly_pr_counts)] - return graphs + # Calculate Estimated1RM in Python + df['Estimated1RM'] = df.apply( + lambda row: calculate_estimated_1rm(row["Weight"], row["Repetitions"]), axis=1 + ) - def get_weekly_pr_graph_model(self, title, weekly_pr_data): - # Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...} + # Build the weekly data models + weekly_counts = self.get_workout_counts(df, period='week') + weekly_pr_counts = self.count_prs_over_time(df, period='week') - # Find the overall date range for all users - all_dates = [date for user_data in weekly_pr_data.values() for date in user_data["PRCounts"].keys()] - min_date, max_date = min(all_dates), max(all_dates) - total_span = (max_date - min_date).days or 1 - relative_positions = [(date - min_date).days / total_span for date in all_dates] + return [ + self.get_graph_model("Workouts per week", weekly_counts), + self.get_graph_model("PRs per week", weekly_pr_counts) + ] - # Calculate viewBox dimensions - max_value = max(max(user_data["PRCounts"].values()) for user_data in weekly_pr_data.values()) or 1 - min_value = 0 - value_range = max_value - min_value - vb_width = 200 - vb_height= 75 + def _prepare_period_column(self, df, period='week'): + """ + Convert StartDate to datetime and add a Period column + based on 'week' or 'month' as needed. + """ + df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') + freq = 'W' if period == 'week' else 'M' + df['Period'] = df['StartDate'].dt.to_period(freq) + return df - plots = [] - colors = get_distinct_colors(len(weekly_pr_data.items())) - for count, (user_id, user_data) in enumerate(weekly_pr_data.items()): - pr_counts = user_data["PRCounts"] - person_name = user_data["PersonName"] + def get_workout_counts(self, df, period='week'): + """ + Returns a dictionary: + { + person_id: { + 'PersonName': 'Alice', + 'PRCounts': { + Timestamp('2023-01-02'): 2, + ... + } + }, + ... + } + representing how many workouts each person performed per time period. + """ + # Make a copy and prepare Period column + df = self._prepare_period_column(df.copy(), period) - values = pr_counts.values() + # Count unique workouts per (PersonId, PersonName, Period) + grp = ( + df.groupby(['PersonId', 'PersonName', 'Period'], as_index=False)['WorkoutId'] + .nunique() + .rename(columns={'WorkoutId': 'Count'}) + ) + # Convert each Period to its start time + grp['Period'] = grp['Period'].apply(lambda p: p.start_time) - values_scaled = [((value - min_value) / value_range) * vb_height for value in values] - plot_points = list(zip(values_scaled, relative_positions)) - messages = [f'{value} for {person_name} at {date.strftime("%d %b %y")}' for value, date in zip(values, pr_counts.keys())] - plot_labels = zip(values_scaled, relative_positions, messages) + return self._pivot_to_graph_dict( + grp, + index_col='PersonId', + name_col='PersonName', + period_col='Period', + value_col='Count' + ) - # Create a plot for each user - plot = { - 'label': person_name, # Use PersonName instead of User ID - 'color': colors[count], - 'points': plot_points, - 'plot_labels': plot_labels + def count_prs_over_time(self, df, period='week'): + """ + Returns a dictionary: + { + person_id: { + 'PersonName': 'Alice', + 'PRCounts': { + Timestamp('2023-01-02'): 1, + ... + } + }, + ... + } + representing how many PRs each person hit per time period. + """ + # Make a copy and prepare Period column + df = self._prepare_period_column(df.copy(), period) + + # Max 1RM per (Person, Exercise, Period) + grouped = ( + df.groupby(['PersonId', 'PersonName', 'ExerciseId', 'Period'], as_index=False)['Estimated1RM'] + .max() + .rename(columns={'Estimated1RM': 'PeriodMax'}) + ) + + # Sort so we can track "all-time max" up to that row + grouped.sort_values(by=['PersonId', 'ExerciseId', 'Period'], inplace=True) + + # For each person & exercise, track the cumulative max (shifted by 1) + grouped['AllTimeMax'] = grouped.groupby(['PersonId', 'ExerciseId'])['PeriodMax'].cummax().shift(1) + grouped['IsPR'] = (grouped['PeriodMax'] > grouped['AllTimeMax']).astype(int) + + # Sum PRs across exercises for (Person, Period) + pr_counts = ( + grouped.groupby(['PersonId', 'PersonName', 'Period'], as_index=False)['IsPR'] + .sum() + .rename(columns={'IsPR': 'Count'}) + ) + pr_counts['Period'] = pr_counts['Period'].apply(lambda p: p.start_time) + + return self._pivot_to_graph_dict( + pr_counts, + index_col='PersonId', + name_col='PersonName', + period_col='Period', + value_col='Count' + ) + + def _pivot_to_graph_dict(self, df, index_col, name_col, period_col, value_col): + """ + Convert [index_col, name_col, period_col, value_col] + into a nested dictionary for plotting: + { + person_id: { + 'PersonName': <...>, + 'PRCounts': { + : , + ... + } + }, + ... + } + """ + if df.empty: + return {} + + pivoted = df.pivot( + index=[index_col, name_col], + columns=period_col, + values=value_col + ).fillna(0) + + pivoted.reset_index(inplace=True) + + result = {} + for _, row in pivoted.iterrows(): + pid = row[index_col] + pname = row[name_col] + # Remaining columns = date -> count + period_counts = row.drop([index_col, name_col]).to_dict() + result[pid] = { + 'PersonName': pname, + 'PRCounts': period_counts } - plots.append(plot) - # Return workout data with SVG dimensions and data points + return result + + def get_graph_model(self, title, data_dict): + """ + Builds a line-graph model from a dictionary of the form: + { + person_id: { + 'PersonName': 'Alice', + 'PRCounts': { + Timestamp('2023-01-02'): 2, + Timestamp('2023-01-09'): 1, + ... + } + }, + ... + } + """ + if not data_dict: + return { + 'title': title, + 'vb_width': 200, + 'vb_height': 75, + 'plots': [] + } + + # Gather all dates & values + all_dates = [] + all_values = [] + for user_data in data_dict.values(): + all_dates.extend(user_data['PRCounts'].keys()) + all_values.extend(user_data['PRCounts'].values()) + + min_date = min(all_dates) + max_date = max(all_dates) + date_span = max((max_date - min_date).days, 1) + + max_val = max(all_values) + min_val = 0 + val_range = max_val - min_val if max_val != min_val else 1 + + vb_width, vb_height = 200, 75 + colors = get_distinct_colors(len(data_dict)) + plots = [] + + for i, (pid, user_data) in enumerate(data_dict.items()): + name = user_data['PersonName'] + pr_counts = user_data['PRCounts'] + # Sort by date so points are in chronological order + sorted_pr = sorted(pr_counts.items(), key=lambda x: x[0]) + + points = [] + labels = [] + for d, val in sorted_pr: + # Scale x,y to fit [0..1], then we multiply y by vb_height + x = (d - min_date).days / date_span + y = (val - min_val) / val_range * vb_height + points.append((y, x)) + labels.append((y, x, f'{val} for {name} at {d.strftime("%d %b %y")}')) + + plots.append({ + 'label': name, + 'color': colors[i], + 'points': points, + 'plot_labels': labels + }) + return { 'title': title, 'vb_width': vb_width, 'vb_height': vb_height, 'plots': plots } - - def get_workout_counts(self, workouts, period='week'): - df = pd.DataFrame(workouts) - - # Convert 'StartDate' to datetime and set period - df['StartDate'] = pd.to_datetime(df['StartDate']) - df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M') - - # Group by PersonId, Period and count unique workouts - workout_counts = df.groupby(['PersonId', 'Period'])['WorkoutId'].nunique().reset_index() - - # Convert 'Period' to timestamp using the start date of the period - workout_counts['Period'] = workout_counts['Period'].apply(lambda x: x.start_time) - - # Pivot the result to get periods as columns - workout_counts_pivot = workout_counts.pivot(index='PersonId', columns='Period', values='WorkoutId').fillna(0) - - # Include person names - names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId') - workout_counts_final = names.join(workout_counts_pivot, how='left').fillna(0) - - # Convert DataFrame to dictionary - result = workout_counts_final.reset_index().to_dict('records') - - # Reformat the dictionary to desired structure - formatted_result = {} - for record in result: - person_id = record.pop('PersonId') - person_name = record.pop('PersonName') - pr_counts = {k: v for k, v in record.items()} - formatted_result[person_id] = {'PersonName': person_name, 'PRCounts': pr_counts} - - return formatted_result - - def count_prs_over_time(self, workouts, period='week'): - df = pd.DataFrame(workouts) - - # Convert 'StartDate' to datetime - df['StartDate'] = pd.to_datetime(df['StartDate']) - - # Set period as week or month - df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M') - - # Group by Person, Exercise, and Period to find max Estimated1RM in each period - period_max = df.groupby(['PersonId', 'ExerciseId', 'Period'])['Estimated1RM'].max().reset_index() - - # Determine all-time max Estimated1RM up to the start of each period - period_max['AllTimeMax'] = period_max.groupby(['PersonId', 'ExerciseId'])['Estimated1RM'].cummax().shift(1) - - # Identify PRs as entries where the period's max Estimated1RM exceeds the all-time max - period_max['IsPR'] = period_max['Estimated1RM'] > period_max['AllTimeMax'] - - # Count PRs in each period for each person - pr_counts = period_max.groupby(['PersonId', 'Period'])['IsPR'].sum().reset_index() - - # Convert 'Period' to timestamp using the start date of the period - pr_counts['Period'] = pr_counts['Period'].apply(lambda x: x.start_time) - - # Pivot table to get the desired output format - output = pr_counts.pivot(index='PersonId', columns='Period', values='IsPR').fillna(0) - - # Convert only the PR count columns to integers - for col in output.columns: - output[col] = output[col].astype(int) - - # Merge with names and convert to desired format - names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId') - output = names.join(output, how='left').fillna(0) - - # Reset the index to bring 'PersonId' back as a column - output.reset_index(inplace=True) - - # Convert to the final dictionary format with PRCounts nested - result = {} - for index, row in output.iterrows(): - person_id = row['PersonId'] - person_name = row['PersonName'] - pr_counts = row.drop(['PersonId', 'PersonName']).to_dict() - result[person_id] = {"PersonName": person_name, "PRCounts": pr_counts} - - return result \ No newline at end of file diff --git a/routes/sql_explorer.py b/routes/sql_explorer.py index a82e813..4653df5 100644 --- a/routes/sql_explorer.py +++ b/routes/sql_explorer.py @@ -5,7 +5,7 @@ from flask import Blueprint, render_template, request, current_app, jsonify from jinja2_fragments import render_block from flask_htmx import HTMX from extensions import db -from utils import generate_plot +from utils import prepare_svg_plot_data # Will be created for SVG data prep sql_explorer_bp = Blueprint('sql_explorer', __name__, url_prefix='/sql') htmx = HTMX() @@ -281,17 +281,47 @@ def sql_schema(): def plot_query(query_id): (title, query) = _get_saved_query(query_id) if not query: return "Query not found", 404 - results_df = db.read_sql_as_df(query) - plot_div = generate_plot(results_df, title) - return plot_div + # Fetch raw results instead of DataFrame + (results, columns, error) = _execute_sql(query) + if error: + # Return an HTML snippet indicating the error + return f'<div class="p-4 text-red-700 bg-red-100 border border-red-400 rounded">Error executing query: {error}</div>', 400 + if not results: + # Return an HTML snippet indicating no data + return '<div class="p-4 text-yellow-700 bg-yellow-100 border border-yellow-400 rounded">No data returned by query.</div>' + + try: + # Prepare data for SVG plotting (function to be created in utils.py) + plot_data = prepare_svg_plot_data(results, columns, title) + # Render the new SVG template + return render_template('partials/sql_explorer/svg_plot.html', **plot_data) + except Exception as e: + current_app.logger.error(f"Error preparing SVG plot data: {e}") + # Return an HTML snippet indicating a processing error + return f'<div class="p-4 text-red-700 bg-red-100 border border-red-400 rounded">Error preparing plot data: {e}</div>', 500 @sql_explorer_bp.route("/plot/show", methods=['POST']) def plot_unsaved_query(): query = request.form.get('query') - title = request.form.get('title') - results_df = db.read_sql_as_df(query) - plot_div = generate_plot(results_df, title) - return plot_div + title = request.form.get('title', 'SQL Query Plot') # Add default title + # Fetch raw results instead of DataFrame + (results, columns, error) = _execute_sql(query) + if error: + # Return an HTML snippet indicating the error + return f'<div class="p-4 text-red-700 bg-red-100 border border-red-400 rounded">Error executing query: {error}</div>', 400 + if not results: + # Return an HTML snippet indicating no data + return '<div class="p-4 text-yellow-700 bg-yellow-100 border border-yellow-400 rounded">No data returned by query.</div>' + + try: + # Prepare data for SVG plotting (function to be created in utils.py) + plot_data = prepare_svg_plot_data(results, columns, title) + # Render the new SVG template + return render_template('partials/sql_explorer/svg_plot.html', **plot_data) + except Exception as e: + current_app.logger.error(f"Error preparing SVG plot data: {e}") + # Return an HTML snippet indicating a processing error + return f'<div class="p-4 text-red-700 bg-red-100 border border-red-400 rounded">Error preparing plot data: {e}</div>', 500 @sql_explorer_bp.route("/generate_sql", methods=['POST']) def generate_sql(): diff --git a/templates/base.html b/templates/base.html index 5278d0c..6909ce3 100644 --- a/templates/base.html +++ b/templates/base.html @@ -20,7 +20,6 @@ -