Time-Series-Analysis/app.py

from flask import Flask, request, render_template, send_file, session
import pandas as pd
import io
import os
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import pmdarima as pm
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
from werkzeug.utils import secure_filename
import matplotlib

matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
import io
import base64
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'Uploads'
app.config['ALLOWED_EXTENSIONS'] = {'csv', 'xls', 'xlsx'}
app.secret_key = 'your-secret-key'  # Required for session management

# Ensure upload folder exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)


def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']


def create_acf_pacf_plots(data):
    # Create ACF and PACF plots using matplotlib
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

    plot_acf(data, ax=ax1, lags=40)
    ax1.set_title('Autocorrelation Function')

    plot_pacf(data, ax=ax2, lags=40)
    ax2.set_title('Partial Autocorrelation Function')

    # Convert matplotlib plot to Plotly
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close(fig)
    buf.seek(0)
    img_str = base64.b64encode(buf.getvalue()).decode('utf-8')

    # Create Plotly figure with image
    fig_plotly = go.Figure()
    fig_plotly.add_layout_image(
        dict(
            source=f'data:image/png;base64,{img_str}',
            x=0,
            y=1,
            xref="paper",
            yref="paper",
            sizex=1,
            sizey=1,
            sizing="stretch",
            opacity=1,
            layer="below"
        )
    )
    fig_plotly.update_layout(
        height=600,
        showlegend=False,
        xaxis=dict(visible=False),
        yaxis=dict(visible=False)
    )
    return pio.to_html(fig_plotly, full_html=False)


def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods):
    try:
        # Read file
        if filepath.endswith('.csv'):
            df = pd.read_csv(filepath)
        else:
            df = pd.read_excel(filepath)

        # Ensure datetime column exists
        date_col = df.columns[0]  # Assume first column is date
        value_col = df.columns[1]  # Assume second column is value
        df[date_col] = pd.to_datetime(df[date_col])
        df.set_index(date_col, inplace=True)

        # Initialize variables
        plot_html = None
        forecast_html = None
        acf_pacf_html = None
        summary = df[value_col].describe().to_dict()
        arima_params = None
        seasonal_params = None
        train_size = None
        test_size = None
        metrics = None

        # Save processed data
        processed_df = df.copy()

        # Time series decomposition
        if do_decomposition:
            decomposition = seasonal_decompose(df[value_col], model='additive', period=12)
            fig = make_subplots(rows=4, cols=1,
                                subplot_titles=('Original Series', 'Trend', 'Seasonality', 'Residuals'))

            fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Original'), row=1, col=1)
            fig.add_trace(go.Scatter(x=df.index, y=decomposition.trend, name='Trend'), row=2, col=1)
            fig.add_trace(go.Scatter(x=df.index, y=decomposition.seasonal, name='Seasonality'), row=3, col=1)
            fig.add_trace(go.Scatter(x=df.index, y=decomposition.resid, name='Residuals'), row=4, col=1)

            fig.update_layout(height=800, showlegend=True)
            plot_html = pio.to_html(fig, full_html=False)

            processed_df['Trend'] = decomposition.trend
            processed_df['Seasonality'] = decomposition.seasonal
            processed_df['Residuals'] = decomposition.resid

        # Forecasting
        if do_forecasting:
            # Split data into train and test
            train_size = int(len(df) * train_percent)
            test_size = len(df) - train_size
            train_data = df[value_col].iloc[:train_size]
            test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()

            # Auto ARIMA for best parameters
            model = pm.auto_arima(train_data,
                                  seasonal=True,
                                  m=12,
                                  start_p=0, start_q=0,
                                  max_p=3, max_q=3,
                                  start_P=0, start_Q=0,
                                  max_P=2, max_Q=2,
                                  d=1, D=1,
                                  trace=False,
                                  error_action='ignore',
                                  suppress_warnings=True,
                                  stepwise=True)

            # Fit ARIMA with best parameters
            model_fit = model.fit(train_data)
            forecast = model_fit.predict(n_periods=forecast_periods)

            # Get ARIMA parameters
            arima_params = model.order
            seasonal_params = model.seasonal_order

            # Calculate metrics on test data if available
            if test_size > 0:
                test_predictions = model_fit.predict(n_periods=test_size)
                mae = mean_absolute_error(test_data, test_predictions)
                mse = mean_squared_error(test_data, test_predictions)
                rmse = np.sqrt(mse)
                metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse}

            # Forecast plot
            forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1,
                                           freq=df.index.inferred_freq)[1:]
            forecast_fig = go.Figure()
            forecast_fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical'))
            if test_size > 0:
                forecast_fig.add_trace(
                    go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
            forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast,
                                              name=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})',
                                              line=dict(dash='dash')))
            forecast_fig.update_layout(title='Forecast', height=400)
            forecast_html = pio.to_html(forecast_fig, full_html=False)

        # ACF/PACF plots
        if do_acf_pacf:
            acf_pacf_html = create_acf_pacf_plots(df[value_col])

        # Save processed data
        processed_df.to_csv(os.path.join(app.config['UPLOAD_FOLDER'], 'processed_' + os.path.basename(filepath)))

        return {
            'plot_html': plot_html,
            'forecast_html': forecast_html,
            'acf_pacf_html': acf_pacf_html,
            'summary': summary,
            'filename': 'processed_' + os.path.basename(filepath),
            'arima_params': arima_params,
            'seasonal_params': seasonal_params,
            'train_size': train_size,
            'test_size': test_size,
            'metrics': metrics,
            'forecast_dates': forecast_dates.tolist() if do_forecasting else [],
            'forecast_values': forecast.tolist() if do_forecasting else []
        }

    except Exception as e:
        return {'error': str(e)}


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return render_template('index.html', error='No file part')

    file = request.files['file']
    if file.filename == '':
        return render_template('index.html', error='No selected file')

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)
        session['filepath'] = filepath  # Store filepath in session
        session['forecast_history'] = []  # Initialize forecast history
        session['selected_indices'] = []  # Initialize selected indices

        # Get user selections
        do_decomposition = 'decomposition' in request.form
        do_forecasting = 'forecasting' in request.form
        do_acf_pacf = 'acf_pacf' in request.form
        train_percent = float(request.form.get('train_percent', 80)) / 100
        test_percent = float(request.form.get('test_percent', 20)) / 100
        forecast_periods = int(request.form.get('forecast_periods', 12))

        # Validate train/test percentages
        if abs(train_percent + test_percent - 1.0) > 0.01:  # Allow small float precision errors
            return render_template('index.html', error='Train and test percentages must sum to 100%')

        session['do_decomposition'] = do_decomposition
        session['do_forecasting'] = do_forecasting
        session['do_acf_pacf'] = do_acf_pacf
        session['train_percent'] = train_percent
        session['test_percent'] = test_percent
        session['forecast_periods'] = forecast_periods

        result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
                                     forecast_periods)

        if 'error' in result:
            return render_template('index.html', error=result['error'])

        # Update forecast history if unique
        if do_forecasting and result['metrics']:
            new_entry = {
                'train_percent': train_percent * 100,
                'test_percent': test_percent * 100,
                'forecast_periods': forecast_periods,
                'mae': result['metrics']['MAE'] if result['metrics'] else None,
                'mse': result['metrics']['MSE'] if result['metrics'] else None,
                'rmse': result['metrics']['RMSE'] if result['metrics'] else None
            }
            # Check for duplicates
            forecast_history = session.get('forecast_history', [])
            if not any(entry['train_percent'] == new_entry['train_percent'] and
                       entry['test_percent'] == new_entry['test_percent'] and
                       entry['forecast_periods'] == new_entry['forecast_periods']
                       for entry in forecast_history):
                forecast_history.append(new_entry)
                session['forecast_history'] = forecast_history
                session['selected_indices'] = [len(forecast_history) - 1]  # Select latest forecast
                session.modified = True

        return render_template('results.html',
                               do_decomposition=do_decomposition,
                               do_forecasting=do_forecasting,
                               do_acf_pacf=do_acf_pacf,
                               train_percent=train_percent * 100,
                               test_percent=test_percent * 100,
                               forecast_periods=forecast_periods,
                               forecast_history=session['forecast_history'],
                               selected_indices=session['selected_indices'],
                               **result)


@app.route('/reforecast', methods=['POST'])
def reforecast():
    filepath = session.get('filepath')
    if not filepath or not os.path.exists(filepath):
        return render_template('index.html', error='Session expired or file not found. Please upload the file again.')

    # Get user selections from reforecast form
    train_percent = float(request.form.get('train_percent', 80)) / 100
    test_percent = float(request.form.get('test_percent', 20)) / 100
    forecast_periods = int(request.form.get('forecast_periods', 12))
    add_to_existing = 'add_to_existing' in request.form

    # Validate train/test percentages
    if abs(train_percent + test_percent - 1.0) > 0.01:  # Allow small float precision errors
        return render_template('index.html', error='Train and test percentages must sum to 100%')

    # Get original selections from session or defaults
    do_decomposition = session.get('do_decomposition', False)
    do_forecasting = True  # Since this is a reforecast
    do_acf_pacf = session.get('do_acf_pacf', False)

    result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
                                 forecast_periods)

    if 'error' in result:
        return render_template('index.html', error=result['error'])

    # Update forecast history if unique
    forecast_history = session.get('forecast_history', [])
    selected_indices = session.get('selected_indices', [])
    if do_forecasting and result['metrics']:
        new_entry = {
            'train_percent': train_percent * 100,
            'test_percent': test_percent * 100,
            'forecast_periods': forecast_periods,
            'mae': result['metrics']['MAE'] if result['metrics'] else None,
            'mse': result['metrics']['MSE'] if result['metrics'] else None,
            'rmse': result['metrics']['RMSE'] if result['metrics'] else None
        }
        # Check for duplicates
        if not any(entry['train_percent'] == new_entry['train_percent'] and
                   entry['test_percent'] == new_entry['test_percent'] and
                   entry['forecast_periods'] == new_entry['forecast_periods']
                   for entry in forecast_history):
            forecast_history.append(new_entry)
            session['forecast_history'] = forecast_history
            if add_to_existing:
                selected_indices.append(len(forecast_history) - 1)
            else:
                selected_indices = [len(forecast_history) - 1]
            session['selected_indices'] = selected_indices
            session.modified = True

    # Update session with current parameters
    session['train_percent'] = train_percent
    session['test_percent'] = test_percent
    session['forecast_periods'] = forecast_periods

    # Generate comparison plot if multiple forecasts are selected
    if len(selected_indices) > 1:
        result['forecast_html'] = create_comparison_plot(filepath, forecast_history, selected_indices)

    return render_template('results.html',
                           do_decomposition=do_decomposition,
                           do_forecasting=do_forecasting,
                           do_acf_pacf=do_acf_pacf,
                           train_percent=train_percent * 100,
                           test_percent=test_percent * 100,
                           forecast_periods=forecast_periods,
                           forecast_history=forecast_history,
                           selected_indices=selected_indices,
                           scroll_to_forecast=True,
                           **result)


def create_comparison_plot(filepath, forecast_history, selected_indices):
    # Read data
    if filepath.endswith('.csv'):
        df = pd.read_csv(filepath)
    else:
        df = pd.read_excel(filepath)

    date_col = df.columns[0]
    value_col = df.columns[1]
    df[date_col] = pd.to_datetime(df[date_col])
    df.set_index(date_col, inplace=True)

    # Create Plotly figure
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical', line=dict(color='black')))

    # Use Plotly qualitative colors
    colors = px.colors.qualitative.Plotly

    # Generate forecasts for selected indices
    for idx, run_idx in enumerate(selected_indices):
        entry = forecast_history[run_idx]
        train_percent = entry['train_percent'] / 100
        forecast_periods = entry['forecast_periods']

        # Split data
        train_size = int(len(df) * train_percent)
        test_size = len(df) - train_size
        train_data = df[value_col].iloc[:train_size]
        test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()

        # Run ARIMA
        model = pm.auto_arima(train_data,
                              seasonal=True,
                              m=12,
                              start_p=0, start_q=0,
                              max_p=3, max_q=3,
                              start_P=0, start_Q=0,
                              max_P=2, max_Q=2,
                              d=1, D=1,
                              trace=False,
                              error_action='ignore',
                              suppress_warnings=True,
                              stepwise=True)

        model_fit = model.fit(train_data)
        forecast = model_fit.predict(n_periods=forecast_periods)
        forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[
                         1:]

        # Add test data if available (only once to avoid clutter)
        if test_size > 0 and idx == 0:
            fig.add_trace(go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))

        # Add forecast
        label = f"Forecast Run {run_idx + 1}: {entry['train_percent']:.0f}/{entry['test_percent']:.0f}, {forecast_periods} periods"
        fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=label,
                                 line=dict(dash='dash', color=colors[idx % len(colors)])))

    fig.update_layout(title='Forecast Comparison', height=400, showlegend=True)
    return pio.to_html(fig, full_html=False)


@app.route('/compare_forecasts', methods=['POST'])
def compare_forecasts():
    filepath = session.get('filepath')
    if not filepath or not os.path.exists(filepath):
        return render_template('index.html', error='Session expired or file not found. Please upload the file again.')

    # Get selected forecast indices
    selected_indices = [int(idx) for idx in request.form.getlist('selected_forecasts')]
    if not selected_indices:
        return render_template('index.html', error='No forecasts selected for comparison')

    # Update session with selected indices
    session['selected_indices'] = selected_indices
    session.modified = True

    # Get current parameters and settings
    do_decomposition = session.get('do_decomposition', False)
    do_forecasting = session.get('do_forecasting', True)
    do_acf_pacf = session.get('do_acf_pacf', False)
    train_percent = session.get('train_percent', 0.8)
    test_percent = session.get('test_percent', 0.2)
    forecast_periods = session.get('forecast_periods', 12)
    forecast_history = session.get('forecast_history', [])

    # Generate comparison plot
    forecast_html = create_comparison_plot(filepath, forecast_history, selected_indices)

    # Re-run the current forecast to maintain other results
    result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
                                 forecast_periods)

    if 'error' in result:
        return render_template('index.html', error=result['error'])

    result['forecast_html'] = forecast_html

    return render_template('results.html',
                           do_decomposition=do_decomposition,
                           do_forecasting=do_forecasting,
                           do_acf_pacf=do_acf_pacf,
                           train_percent=train_percent * 100,
                           test_percent=test_percent * 100,
                           forecast_periods=forecast_periods,
                           forecast_history=forecast_history,
                           selected_indices=selected_indices,
                           scroll_to_forecast=True,
                           **result)


@app.route('/download_forecast_history')
def download_forecast_history():
    forecast_history = session.get('forecast_history', [])
    if not forecast_history:
        return render_template('index.html', error='No forecast history available')

    # Create DataFrame for forecast history
    df = pd.DataFrame(forecast_history)
    df = df.rename(columns={
        'train_percent': 'Train Percent (%)',
        'test_percent': 'Test Percent (%)',
        'forecast_periods': 'Forecast Periods',
        'mae': 'MAE',
        'mse': 'MSE',
        'rmse': 'RMSE'
    })
    df.insert(0, 'Run', range(1, len(df) + 1))

    # Save to Excel
    output = io.BytesIO()
    df.to_excel(output, index=False)
    output.seek(0)

    return send_file(output,
                     mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                     as_attachment=True,
                     download_name='forecast_history.xlsx')


@app.route('/download/<filename>')
def download_file(filename):
    filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    return send_file(filepath, as_attachment=True)


if __name__ == '__main__':
    app.run(debug=True)