refactor(sql_explorer): Replace Plotly with SVG rendering for plots

Replaces the Plotly-based graph generation in the SQL Explorer with direct SVG rendering within an HTML template, similar to the exercise progress sparklines.

- Modifies `routes/sql_explorer.py` endpoints (`plot_query`, `plot_unsaved_query`) to fetch raw data instead of using pandas/Plotly.
- Adds `utils.prepare_svg_plot_data` to process raw SQL results, determine plot type (scatter, line, bar, table), normalize data, and prepare it for SVG.
- Creates `templates/partials/sql_explorer/svg_plot.html` to render the SVG plot with axes, ticks, labels, and basic tooltips.
- Removes the `generate_plot` function's usage for SQL Explorer and the direct dependency on Plotly for this feature.
This commit is contained in:
Peter Stockings
2025-04-15 19:34:26 +10:00
parent 51ec18c461
commit e947feb3e3
6 changed files with 636 additions and 152 deletions

230
utils.py
View File

@@ -3,7 +3,9 @@ from datetime import datetime, date, timedelta
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.io as pio # Keep for now, might remove later if generate_plot is fully replaced
import math
from decimal import Decimal
def convert_str_to_date(date_str, format='%Y-%m-%d'):
try:
@@ -141,4 +143,228 @@ def calculate_estimated_1rm(weight, repetitions):
if repetitions == 0: # Avoid division by zero
return 0
estimated_1rm = round((100 * int(weight)) / (101.3 - 2.67123 * repetitions), 0)
return int(estimated_1rm)
return int(estimated_1rm)
def _is_numeric(val):
"""Check if a value is numeric (int, float, Decimal)."""
return isinstance(val, (int, float, Decimal))
def _is_datetime(val):
"""Check if a value is a date or datetime object."""
return isinstance(val, (date, datetime))
def _get_column_type(results, column_name):
"""Determine the effective type of a column (numeric, datetime, categorical)."""
numeric_count = 0
datetime_count = 0
total_count = 0
for row in results:
val = row.get(column_name)
if val is not None:
total_count += 1
if _is_numeric(val):
numeric_count += 1
elif _is_datetime(val):
datetime_count += 1
if total_count == 0: return 'categorical' # Default if all null or empty
if numeric_count / total_count > 0.8: return 'numeric' # Allow some non-numeric noise
if datetime_count / total_count > 0.8: return 'datetime'
return 'categorical'
def _normalize_value(value, min_val, range_val, target_max):
"""Normalize a value to a target range (e.g., SVG coordinate)."""
if range_val == 0: return target_max / 2 # Avoid division by zero, place in middle
return ((value - min_val) / range_val) * target_max
def prepare_svg_plot_data(results, columns, title):
"""
Prepares data from raw SQL results for SVG plotting.
Determines plot type and scales data.
"""
if not results:
raise ValueError("No data provided for plotting.")
num_columns = len(columns)
plot_type = 'table' # Default if no suitable plot found
plot_data = {}
x_col, y_col = None, None
x_type, y_type = None, None
# --- Determine Plot Type and Columns ---
if num_columns == 1:
x_col = columns[0]
x_type = _get_column_type(results, x_col)
if x_type == 'numeric':
plot_type = 'histogram'
else:
plot_type = 'bar_count' # Bar chart of value counts
elif num_columns >= 2:
# Prioritize common patterns
x_col, y_col = columns[0], columns[1]
x_type = _get_column_type(results, x_col)
y_type = _get_column_type(results, y_col)
if x_type == 'numeric' and y_type == 'numeric':
plot_type = 'scatter'
elif x_type == 'datetime' and y_type == 'numeric':
plot_type = 'line' # Treat datetime as numeric for position
elif x_type == 'categorical' and y_type == 'numeric':
plot_type = 'bar'
elif x_type == 'numeric' and y_type == 'categorical':
# Could do horizontal bar, but let's stick to vertical for now
plot_type = 'bar' # Treat numeric as category label, categorical as value (count?) - less common
# Or maybe swap? Let's assume categorical X, numeric Y is more likely intended
x_col, y_col = columns[1], columns[0] # Try swapping
x_type, y_type = y_type, x_type
if not (x_type == 'categorical' and y_type == 'numeric'):
plot_type = 'table' # Revert if swap didn't help
else: # Other combinations (datetime/cat, cat/cat, etc.) default to table
plot_type = 'table'
# --- Basic SVG Setup ---
vb_width = 500
vb_height = 300
margin = {'top': 20, 'right': 20, 'bottom': 50, 'left': 60} # Increased bottom/left for labels/axes
draw_width = vb_width - margin['left'] - margin['right']
draw_height = vb_height - margin['top'] - margin['bottom']
plot_data = {
'title': title,
'plot_type': plot_type,
'vb_width': vb_width,
'vb_height': vb_height,
'margin': margin,
'draw_width': draw_width,
'draw_height': draw_height,
'x_axis_label': x_col or '',
'y_axis_label': y_col or '',
'plots': [],
'x_ticks': [],
'y_ticks': [],
'original_results': results, # Keep original for table fallback
'original_columns': columns
}
if plot_type == 'table':
return plot_data # No further processing needed for table fallback
# --- Data Extraction and Scaling (Specific to Plot Type) ---
points = []
x_values_raw = []
y_values_raw = []
# Extract relevant data, handling potential type issues
for row in results:
x_val_raw = row.get(x_col)
y_val_raw = row.get(y_col)
# Convert datetimes to numeric representation (e.g., days since min date)
if x_type == 'datetime':
x_values_raw.append(x_val_raw) # Keep original dates for range calculation
elif _is_numeric(x_val_raw):
x_values_raw.append(float(x_val_raw)) # Convert Decimal to float
# Add handling for categorical X if needed (e.g., bar chart)
if y_type == 'numeric':
if _is_numeric(y_val_raw):
y_values_raw.append(float(y_val_raw))
else:
y_values_raw.append(None) # Mark non-numeric Y as None
# Add handling for categorical Y if needed
if not x_values_raw or not y_values_raw:
plot_data['plot_type'] = 'table' # Fallback if essential data is missing
return plot_data
# Calculate ranges (handle datetime separately)
if x_type == 'datetime':
valid_dates = [d for d in x_values_raw if d is not None]
if not valid_dates:
plot_data['plot_type'] = 'table'; return plot_data
min_x_dt, max_x_dt = min(valid_dates), max(valid_dates)
# Convert dates to days since min_date for numerical scaling
total_days = (max_x_dt - min_x_dt).days
x_values_numeric = [(d - min_x_dt).days if d is not None else None for d in x_values_raw]
min_x, max_x = 0, total_days
else: # Numeric or Categorical (treat categorical index as numeric for now)
valid_x = [x for x in x_values_raw if x is not None]
if not valid_x:
plot_data['plot_type'] = 'table'; return plot_data
min_x, max_x = min(valid_x), max(valid_x)
x_values_numeric = x_values_raw # Already numeric (or will be treated as such)
valid_y = [y for y in y_values_raw if y is not None]
if not valid_y:
plot_data['plot_type'] = 'table'; return plot_data
min_y, max_y = min(valid_y), max(valid_y)
range_x = max_x - min_x
range_y = max_y - min_y
# Scale points
for i, row in enumerate(results):
x_num = x_values_numeric[i]
y_num = y_values_raw[i] # Use original list which might have None
if x_num is None or y_num is None: continue # Skip points with missing essential data
# Scale X to drawing width, Y to drawing height (inverted Y for SVG)
scaled_x = margin['left'] + _normalize_value(x_num, min_x, range_x, draw_width)
scaled_y = margin['top'] + draw_height - _normalize_value(y_num, min_y, range_y, draw_height)
points.append({
'x': scaled_x,
'y': scaled_y,
'original': row # Store original row data for tooltips
})
# --- Generate Ticks ---
num_ticks = 5 # Desired number of ticks
# X Ticks
x_ticks = []
if range_x >= 0:
step_x = (max_x - min_x) / (num_ticks -1) if num_ticks > 1 and range_x > 0 else 0
for i in range(num_ticks):
tick_val_raw = min_x + i * step_x
tick_pos = margin['left'] + _normalize_value(tick_val_raw, min_x, range_x, draw_width)
label = ""
if x_type == 'datetime':
tick_date = min_x_dt + timedelta(days=tick_val_raw)
label = tick_date.strftime('%Y-%m-%d') # Format date label
else: # Numeric
label = f"{tick_val_raw:.1f}" if isinstance(tick_val_raw, float) else str(tick_val_raw)
x_ticks.append({'value': tick_val_raw, 'label': label, 'position': tick_pos})
# Y Ticks
y_ticks = []
if range_y >= 0:
step_y = (max_y - min_y) / (num_ticks - 1) if num_ticks > 1 and range_y > 0 else 0
for i in range(num_ticks):
tick_val = min_y + i * step_y
tick_pos = margin['top'] + draw_height - _normalize_value(tick_val, min_y, range_y, draw_height)
label = f"{tick_val:.1f}" if isinstance(tick_val, float) else str(tick_val)
y_ticks.append({'value': tick_val, 'label': label, 'position': tick_pos})
# --- Finalize Plot Data ---
# For now, put all points into one series
plot_data['plots'].append({
'label': f'{y_col} vs {x_col}',
'color': '#388fed', # Default color
'points': points
})
plot_data['x_ticks'] = x_ticks
plot_data['y_ticks'] = y_ticks
# Add specific adjustments for plot types if needed (e.g., bar width)
if plot_type == 'bar':
# Calculate bar width based on number of bars/categories
# This needs more refinement based on how categorical X is handled
plot_data['bar_width'] = draw_width / len(points) * 0.8 if points else 10
return plot_data