workout/utils.py

import colorsys
from datetime import datetime, date, timedelta
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio

def get_workouts(topsets):
    # Ensure all entries have 'WorkoutId' and 'TopSetId', then sort by 'WorkoutId' and 'TopSetId'
    filtered_topsets = sorted(
        [t for t in topsets if t['WorkoutId'] is not None and t['TopSetId'] is not None],
        key=lambda x: (x['WorkoutId'], x['TopSetId'])
    )

    workouts = {}
    for t in filtered_topsets:
        workout_id = t['WorkoutId']
        if workout_id not in workouts:
            workouts[workout_id] = {
                'WorkoutId': workout_id,
                'StartDate': t['StartDate'],
                'TopSets': []
            }
        workouts[workout_id]['TopSets'].append({
            'TopSetId': t['TopSetId'],
            'ExerciseId': t['ExerciseId'],
            'ExerciseName': t['ExerciseName'],
            'Weight': t['Weight'],
            'Repetitions': t['Repetitions'],
            'Estimated1RM': t['Estimated1RM']
        })

    # Convert the workouts dictionary back to a list and sort by 'StartDate'
    sorted_workouts = sorted(workouts.values(), key=lambda x: x['StartDate'], reverse=True)

    return sorted_workouts


def get_all_exercises_from_topsets(topsets):
    exercises_dict = {}
    for t in topsets:
        exercise_id = t.get('ExerciseId')
        if exercise_id and exercise_id not in exercises_dict:
            exercises_dict[exercise_id] = {
                'ExerciseId': exercise_id,
                'ExerciseName': t.get('ExerciseName', 'Unknown')
            }
    return list(exercises_dict.values())

def get_topsets_for_person(person_topsets):
    # Group topsets by ExerciseId
    grouped_topsets = {}
    for topset in person_topsets:
        exercise_id = topset['ExerciseId']
        if exercise_id in grouped_topsets:
            grouped_topsets[exercise_id].append(topset)
        else:
            grouped_topsets[exercise_id] = [topset]

    # Process each group of topsets
    exercises_topsets = []
    for exercise_id, topsets in grouped_topsets.items():
        # Sort topsets by StartDate in descending order
        sorted_topsets = sorted(topsets, key=lambda x: x['StartDate'], reverse=True)

        # Extracting values and calculating value ranges for SVG dimensions
        estimated_1rm = [t['Estimated1RM'] for t in sorted_topsets]
        repetitions = [t['Repetitions'] for t in sorted_topsets]
        weight = [t['Weight'] for t in sorted_topsets]
        start_dates = [t['StartDate'] for t in sorted_topsets]
        messages = [f'{t["Repetitions"]} x {t["Weight"]}kg ({t["Estimated1RM"]}kg E1RM) on {t["StartDate"].strftime("%d %b %y")}' for t in sorted_topsets]
        epoch = 'All'
        person_id = sorted_topsets[0]['PersonId']
        exercise_name = sorted_topsets[0]['ExerciseName']

        if exercise_name and estimated_1rm and repetitions and weight and start_dates and messages:
            exercise_progress = get_exercise_graph_model(exercise_name, estimated_1rm, repetitions, weight, start_dates, messages, epoch, person_id, exercise_id)

            exercises_topsets.append({
                'ExerciseId': exercise_id,
                'ExerciseName': exercise_name,
                'Topsets': sorted_topsets,
                'ExerciseProgressGraph': exercise_progress
            })

    return exercises_topsets

def get_people_and_exercise_rep_maxes(topsets, selected_person_ids, selected_exercise_ids, min_date, max_date):
    # Filter topsets once based on the criteria
    filtered_topsets = [
        t for t in topsets if t['PersonId'] in selected_person_ids
        and t['ExerciseId'] in selected_exercise_ids
        and min_date <= t['StartDate'] <= max_date
    ]

    # Group the filtered topsets by PersonId
    grouped_by_person = {}
    for t in filtered_topsets:
        person_id = t['PersonId']
        if person_id in grouped_by_person:
            grouped_by_person[person_id].append(t)
        else:
            grouped_by_person[person_id] = [t]

    people = []
    for person_id, person_topsets in grouped_by_person.items():
        person_name = person_topsets[0]['PersonName']
        workout_ids = {t['WorkoutId'] for t in person_topsets if t['WorkoutId']}
        number_of_workouts = len(workout_ids)

        people.append({
            'PersonId': person_id,
            'PersonName': person_name,
            'NumberOfWorkouts': number_of_workouts,
            'Exercises': get_topsets_for_person(person_topsets)
        })

    return {"People": people, "Stats": get_stats_from_topsets(topsets)}


def get_stats_from_topsets(topsets):
    workout_count = len(set([t['WorkoutId']
                             for t in topsets if t['WorkoutId'] is not None]))
    people_count = len(set([t['PersonId']
                            for t in topsets if t['PersonId'] is not None]))
    exercise_count = len(set([t['ExerciseId']
                            for t in topsets if t['ExerciseId'] is not None]))
    workout_start_dates = [t['StartDate']
                           for t in topsets if t['StartDate'] is not None]

    stats = [{"Text": "Total Workouts", "Value": workout_count},
             {"Text": "Total Sets", "Value": len(topsets)},
             {"Text": "Total Exercises", "Value": exercise_count}]
    if people_count > 1:
        stats.append({"Text": "People tracked", "Value": people_count})
    if workout_count > 0:
        first_workout_date = min(workout_start_dates)
        last_workout_date = max(workout_start_dates)

        stats.append({"Text": "Days Since First Workout", "Value": (
            date.today() - first_workout_date).days})
        if workout_count >= 2:
            stats.append({"Text": "Days Since Last Workout",
                          "Value": (
                              date.today() - last_workout_date).days})
            average_number_sets_per_workout = round(
                len(topsets) / workout_count, 1)
            stats.append({"Text": "Average sets per workout",
                         "Value": average_number_sets_per_workout})

            training_duration = last_workout_date - first_workout_date
            if training_duration > timedelta(days=0):
                average_workouts_per_week = round(
                    workout_count / (training_duration.days / 7), 1)
                stats.append({"Text": "Average Workouts Per Week",
                              "Value": average_workouts_per_week})

    return stats


def convert_str_to_date(date_str, format='%Y-%m-%d'):
    try:
        return datetime.strptime(date_str, format).date()
    except ValueError:
        return None
    except TypeError:
        return None


def get_earliest_and_latest_workout_date(person):
    workouts = person.get('Workouts', [])
    if workouts:
        # Initialize earliest and latest dates with the first workout's start date
        earliest_date = latest_date = workouts[0]['StartDate']
        for workout in workouts[1:]:
            date = workout['StartDate']
            if date < earliest_date:
                earliest_date = date
            if date > latest_date:
                latest_date = date
        return (earliest_date, latest_date)

    # Return the current date for both if no workouts are present
    current_date = datetime.now().date()
    return (current_date, current_date)


def filter_workout_topsets(workout, selected_exercise_ids):
    workout['TopSets'] = [topset for topset in workout['TopSets']
                          if topset['ExerciseId'] in selected_exercise_ids]
    return workout


def flatten_list(list_of_lists):
    return [item for sublist in list_of_lists for item in sublist]


def first_and_last_visible_days_in_month(first_day_of_month, last_day_of_month):
    start = dict([(6, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
    start_date = first_day_of_month - \
        timedelta(days=start[first_day_of_month.weekday()])

    end = dict([(6, 6), (0, 5), (1, 4), (2, 3), (3, 2), (4, 1), (5, 0)])
    end_date = last_day_of_month + \
        timedelta(days=end[last_day_of_month.weekday()])
    return (start_date, end_date)


def flatten(lst):
    """
    Flatten a list of lists.
    """
    result = []
    for item in lst:
        if isinstance(item, list):
            result.extend(flatten(item))
        else:
            result.append(item)
    return result

def get_exercise_graph_model(title, estimated_1rm, repetitions, weight, start_dates, messages, epoch, person_id, exercise_id, min_date=None, max_date=None):
    # Precompute ranges
    min_date, max_date = min(start_dates), max(start_dates)
    total_span = (max_date - min_date).days or 1

    min_e1rm, max_e1rm = min(estimated_1rm), max(estimated_1rm)
    min_reps, max_reps = min(repetitions), max(repetitions)
    min_weight, max_weight = min(weight), max(weight)

    e1rm_range = max_e1rm - min_e1rm or 1
    reps_range = max_reps - min_reps or 1
    weight_range = max_weight - min_weight or 1

    # Calculate viewBox dimensions
    vb_width, vb_height = total_span, e1rm_range
    vb_width *= 200 / vb_width  # Scale to 200px width
    vb_height *= 75 / vb_height  # Scale to 75px height

    # Use NumPy arrays for efficient scaling
    relative_positions = np.array([(date - min_date).days / total_span for date in start_dates])
    estimated_1rm_scaled = ((np.array(estimated_1rm) - min_e1rm) / e1rm_range) * vb_height
    repetitions_scaled = ((np.array(repetitions) - min_reps) / reps_range) * vb_height
    weight_scaled = ((np.array(weight) - min_weight) / weight_range) * vb_height

    # Calculate slope and line of best fit
    slope_kg_per_day = e1rm_range / total_span
    best_fit_formula = {
        'kg_per_week': round(slope_kg_per_day * 7, 1),
        'kg_per_month': round(slope_kg_per_day * 30, 1)
    }

    best_fit_points = []
    try:
        if len(relative_positions) > 1:  # Ensure there are enough points for polyfit
            # Calculate line of best fit using NumPy
            m, b = np.polyfit(relative_positions, estimated_1rm_scaled, 1)
            y_best_fit = m * relative_positions + b
            best_fit_points = list(zip(y_best_fit.tolist(), relative_positions.tolist()))
        else:
            raise ValueError("Not enough data points for polyfit")
    except (np.linalg.LinAlgError, ValueError) as e:
        # Handle cases where polyfit fails
        best_fit_points = []
        m, b = 0, 0

    # Prepare data for plots
    repetitions_data = {
        'label': 'Reps',
        'color': '#388fed',
        'points': list(zip(repetitions_scaled.tolist(), relative_positions.tolist()))
    }
    weight_data = {
        'label': 'Weight',
        'color': '#bd3178',
        'points': list(zip(weight_scaled.tolist(), relative_positions.tolist()))
    }
    estimated_1rm_data = {
        'label': 'E1RM',
        'color': '#2ca02c',
        'points': list(zip(estimated_1rm_scaled.tolist(), relative_positions.tolist()))
    }

    # Prepare plot labels
    plot_labels = list(zip(relative_positions.tolist(), messages))

    # Return exercise data with SVG dimensions and data points
    return {
        'title': title,
        'vb_width': vb_width,
        'vb_height': vb_height,
        'plots': [repetitions_data, weight_data, estimated_1rm_data],
        'best_fit_points': best_fit_points,
        'best_fit_formula': best_fit_formula,
        'plot_labels': plot_labels,
        'epochs': ['Custom', '1M', '3M', '6M', 'All'],
        'selected_epoch': epoch,
        'person_id': person_id,
        'exercise_id': exercise_id,
        'min_date': min_date,
        'max_date': max_date
    }


def get_workout_counts(workouts, period='week'):
    df = pd.DataFrame(workouts)

    # Convert 'StartDate' to datetime and set period
    df['StartDate'] = pd.to_datetime(df['StartDate'])
    df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M')

    # Group by PersonId, Period and count unique workouts
    workout_counts = df.groupby(['PersonId', 'Period'])['WorkoutId'].nunique().reset_index()

    # Convert 'Period' to timestamp using the start date of the period
    workout_counts['Period'] = workout_counts['Period'].apply(lambda x: x.start_time)

    # Pivot the result to get periods as columns
    workout_counts_pivot = workout_counts.pivot(index='PersonId', columns='Period', values='WorkoutId').fillna(0)

    # Include person names
    names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId')
    workout_counts_final = names.join(workout_counts_pivot, how='left').fillna(0)

    # Convert DataFrame to dictionary
    result = workout_counts_final.reset_index().to_dict('records')

    # Reformat the dictionary to desired structure
    formatted_result = {}
    for record in result:
        person_id = record.pop('PersonId')
        person_name = record.pop('PersonName')
        pr_counts = {k: v for k, v in record.items()}
        formatted_result[person_id] = {'PersonName': person_name, 'PRCounts': pr_counts}

    return formatted_result

def count_prs_over_time(workouts, period='week'):
    df = pd.DataFrame(workouts)

    # Convert 'StartDate' to datetime
    df['StartDate'] = pd.to_datetime(df['StartDate'])

    # Set period as week or month
    df['Period'] = df['StartDate'].dt.to_period('W' if period == 'week' else 'M')

    # Group by Person, Exercise, and Period to find max Estimated1RM in each period
    period_max = df.groupby(['PersonId', 'ExerciseId', 'Period'])['Estimated1RM'].max().reset_index()

    # Determine all-time max Estimated1RM up to the start of each period
    period_max['AllTimeMax'] = period_max.groupby(['PersonId', 'ExerciseId'])['Estimated1RM'].cummax().shift(1)

    # Identify PRs as entries where the period's max Estimated1RM exceeds the all-time max
    period_max['IsPR'] = period_max['Estimated1RM'] > period_max['AllTimeMax']

    # Count PRs in each period for each person
    pr_counts = period_max.groupby(['PersonId', 'Period'])['IsPR'].sum().reset_index()

    # Convert 'Period' to timestamp using the start date of the period
    pr_counts['Period'] = pr_counts['Period'].apply(lambda x: x.start_time)

    # Pivot table to get the desired output format
    output = pr_counts.pivot(index='PersonId', columns='Period', values='IsPR').fillna(0)

    # Convert only the PR count columns to integers
    for col in output.columns:
        output[col] = output[col].astype(int)

    # Merge with names and convert to desired format
    names = df[['PersonId', 'PersonName']].drop_duplicates().set_index('PersonId')
    output = names.join(output, how='left').fillna(0)

    # Reset the index to bring 'PersonId' back as a column
    output.reset_index(inplace=True)

    # Convert to the final dictionary format with PRCounts nested
    result = {}
    for index, row in output.iterrows():
        person_id = row['PersonId']
        person_name = row['PersonName']
        pr_counts = row.drop(['PersonId', 'PersonName']).to_dict()
        result[person_id] = {"PersonName": person_name, "PRCounts": pr_counts}

    return result

def get_weekly_pr_graph_model(title, weekly_pr_data):
    # Assuming weekly_pr_data is in the format {1: {"PersonName": "Alice", "PRCounts": {Timestamp('2022-01-01', freq='W-MON'): 0, ...}}, 2: {...}, ...}

    # Find the overall date range for all users
    all_dates = [date for user_data in weekly_pr_data.values() for date in user_data["PRCounts"].keys()]
    min_date, max_date = min(all_dates), max(all_dates)
    total_span = (max_date - min_date).days or 1
    relative_positions = [(date - min_date).days / total_span for date in all_dates]

    # Calculate viewBox dimensions
    max_value = max(max(user_data["PRCounts"].values()) for user_data in weekly_pr_data.values()) or 1
    min_value = 0
    value_range = max_value - min_value
    vb_width = 200
    vb_height= 75

    plots = []
    colors = get_distinct_colors(len(weekly_pr_data.items()))
    for count, (user_id, user_data) in enumerate(weekly_pr_data.items()):
        pr_counts = user_data["PRCounts"]
        person_name = user_data["PersonName"]

        values = pr_counts.values()

        values_scaled = [((value - min_value) / value_range) * vb_height for value in values]
        plot_points = list(zip(values_scaled, relative_positions))
        messages = [f'{value} for {person_name} at {date.strftime("%d %b %y")}' for value, date in zip(values, pr_counts.keys())]
        plot_labels = zip(values_scaled, relative_positions, messages)

        # Create a plot for each user
        plot = {
            'label': person_name,  # Use PersonName instead of User ID
            'color': colors[count],
            'points': plot_points,
            'plot_labels': plot_labels
        }
        plots.append(plot)

    # Return workout data with SVG dimensions and data points
    return {
        'title': title,
        'vb_width': vb_width,
        'vb_height': vb_height,
        'plots': plots
    }

def get_distinct_colors(n):
    colors = []
    for i in range(n):
        # Divide the color wheel into n parts
        hue = i / n
        # Convert HSL (Hue, Saturation, Lightness) to RGB and then to a Hex string
        rgb = colorsys.hls_to_rgb(hue, 0.6, 0.4)  # Fixed lightness and saturation
        hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
        colors.append(hex_color)
    return colors

def generate_plot(df, title):
    """
    Analyzes the DataFrame and generates an appropriate Plotly visualization.
    Returns the Plotly figure as a div string.
    """
    if df.empty:
        return "<p>No data available to plot.</p>"

    num_columns = len(df.columns)

    # Simple logic to decide plot type based on DataFrame structure
    if num_columns == 1:
        # Single column: perhaps a histogram or bar chart
        column = df.columns[0]
        if pd.api.types.is_numeric_dtype(df[column]):
            fig = px.histogram(df, x=column, title=title)
        else:
            fig = px.bar(df, x=column, title=title)
    elif num_columns == 2:
        # Two columns: scatter plot or line chart
        col1, col2 = df.columns
        if pd.api.types.is_numeric_dtype(df[col1]) and pd.api.types.is_numeric_dtype(df[col2]):
            fig = px.scatter(df, x=col1, y=col2, title=title)
        else:
            fig = px.bar(df, x=col1, y=col2, title=title)
    else:
        # More than two columns: heatmap or other complex plots
        fig = px.imshow(df.corr(), text_auto=True, title=title)

    # Convert Plotly figure to HTML div
    plot_div = pio.to_html(fig, full_html=False)
    return plot_div

def calculate_estimated_1rm(weight, repetitions):
    # Ensure the inputs are numeric
    if repetitions == 0:  # Avoid division by zero
        return 0
    estimated_1rm = round((100 * int(weight)) / (101.3 - 2.67123 * repetitions), 0)
    return int(estimated_1rm)