some new features

2025-07-30 18:53:50 +03:00
parent 8019bd3b7c
commit 079804a0fc
2118 changed files with 297840 additions and 502 deletions
--- a/app.py
+++ b/app.py
@ -1,23 +1,10 @@
-from flask import Flask, request, render_template, send_file, session
-import pandas as pd
-import io
-import os
-from statsmodels.tsa.seasonal import seasonal_decompose
-from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
-import pmdarima as pm
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import plotly.io as pio
+from flask import Flask, request, render_template, session
 from werkzeug.utils import secure_filename
-import matplotlib
-
-matplotlib.use('Agg')  # Use non-interactive backend
-import matplotlib.pyplot as plt
-import io
-import base64
-import numpy as np
-from sklearn.metrics import mean_absolute_error, mean_squared_error
+from models.time_series import process_time_series
+from models.plotting import create_comparison_plot
+from utils.file_handling import allowed_file, read_file, save_processed_file
+from utils.forecast_history import update_forecast_history, download_forecast_history
+import os

 app = Flask(__name__)
 app.config['UPLOAD_FOLDER'] = 'Uploads'
@ -28,176 +15,6 @@ app.secret_key = 'your-secret-key'  # Required for session management
 os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)


-def allowed_file(filename):
-    return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
-
-
-def create_acf_pacf_plots(data):
-    # Create ACF and PACF plots using matplotlib
-    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
-
-    plot_acf(data, ax=ax1, lags=40)
-    ax1.set_title('Autocorrelation Function')
-
-    plot_pacf(data, ax=ax2, lags=40)
-    ax2.set_title('Partial Autocorrelation Function')
-
-    # Convert matplotlib plot to Plotly
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    plt.close(fig)
-    buf.seek(0)
-    img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
-
-    # Create Plotly figure with image
-    fig_plotly = go.Figure()
-    fig_plotly.add_layout_image(
-        dict(
-            source=f'data:image/png;base64,{img_str}',
-            x=0,
-            y=1,
-            xref="paper",
-            yref="paper",
-            sizex=1,
-            sizey=1,
-            sizing="stretch",
-            opacity=1,
-            layer="below"
-        )
-    )
-    fig_plotly.update_layout(
-        height=600,
-        showlegend=False,
-        xaxis=dict(visible=False),
-        yaxis=dict(visible=False)
-    )
-    return pio.to_html(fig_plotly, full_html=False)
-
-
-def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods):
-    try:
-        # Read file
-        if filepath.endswith('.csv'):
-            df = pd.read_csv(filepath)
-        else:
-            df = pd.read_excel(filepath)
-
-        # Ensure datetime column exists
-        date_col = df.columns[0]  # Assume first column is date
-        value_col = df.columns[1]  # Assume second column is value
-        df[date_col] = pd.to_datetime(df[date_col])
-        df.set_index(date_col, inplace=True)
-
-        # Initialize variables
-        plot_html = None
-        forecast_html = None
-        acf_pacf_html = None
-        summary = df[value_col].describe().to_dict()
-        arima_params = None
-        seasonal_params = None
-        train_size = None
-        test_size = None
-        metrics = None
-
-        # Save processed data
-        processed_df = df.copy()
-
-        # Time series decomposition
-        if do_decomposition:
-            decomposition = seasonal_decompose(df[value_col], model='additive', period=12)
-            fig = make_subplots(rows=4, cols=1,
-                                subplot_titles=('Original Series', 'Trend', 'Seasonality', 'Residuals'))
-
-            fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Original'), row=1, col=1)
-            fig.add_trace(go.Scatter(x=df.index, y=decomposition.trend, name='Trend'), row=2, col=1)
-            fig.add_trace(go.Scatter(x=df.index, y=decomposition.seasonal, name='Seasonality'), row=3, col=1)
-            fig.add_trace(go.Scatter(x=df.index, y=decomposition.resid, name='Residuals'), row=4, col=1)
-
-            fig.update_layout(height=800, showlegend=True)
-            plot_html = pio.to_html(fig, full_html=False)
-
-            processed_df['Trend'] = decomposition.trend
-            processed_df['Seasonality'] = decomposition.seasonal
-            processed_df['Residuals'] = decomposition.resid
-
-        # Forecasting
-        if do_forecasting:
-            # Split data into train and test
-            train_size = int(len(df) * train_percent)
-            test_size = len(df) - train_size
-            train_data = df[value_col].iloc[:train_size]
-            test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()
-
-            # Auto ARIMA for best parameters
-            model = pm.auto_arima(train_data,
-                                  seasonal=True,
-                                  m=12,
-                                  start_p=0, start_q=0,
-                                  max_p=3, max_q=3,
-                                  start_P=0, start_Q=0,
-                                  max_P=2, max_Q=2,
-                                  d=1, D=1,
-                                  trace=False,
-                                  error_action='ignore',
-                                  suppress_warnings=True,
-                                  stepwise=True)
-
-            # Fit ARIMA with best parameters
-            model_fit = model.fit(train_data)
-            forecast = model_fit.predict(n_periods=forecast_periods)
-
-            # Get ARIMA parameters
-            arima_params = model.order
-            seasonal_params = model.seasonal_order
-
-            # Calculate metrics on test data if available
-            if test_size > 0:
-                test_predictions = model_fit.predict(n_periods=test_size)
-                mae = mean_absolute_error(test_data, test_predictions)
-                mse = mean_squared_error(test_data, test_predictions)
-                rmse = np.sqrt(mse)
-                metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse}
-
-            # Forecast plot
-            forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1,
-                                           freq=df.index.inferred_freq)[1:]
-            forecast_fig = go.Figure()
-            forecast_fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical'))
-            if test_size > 0:
-                forecast_fig.add_trace(
-                    go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
-            forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast,
-                                              name=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})',
-                                              line=dict(dash='dash')))
-            forecast_fig.update_layout(title='Forecast', height=400)
-            forecast_html = pio.to_html(forecast_fig, full_html=False)
-
-        # ACF/PACF plots
-        if do_acf_pacf:
-            acf_pacf_html = create_acf_pacf_plots(df[value_col])
-
-        # Save processed data
-        processed_df.to_csv(os.path.join(app.config['UPLOAD_FOLDER'], 'processed_' + os.path.basename(filepath)))
-
-        return {
-            'plot_html': plot_html,
-            'forecast_html': forecast_html,
-            'acf_pacf_html': acf_pacf_html,
-            'summary': summary,
-            'filename': 'processed_' + os.path.basename(filepath),
-            'arima_params': arima_params,
-            'seasonal_params': seasonal_params,
-            'train_size': train_size,
-            'test_size': test_size,
-            'metrics': metrics,
-            'forecast_dates': forecast_dates.tolist() if do_forecasting else [],
-            'forecast_values': forecast.tolist() if do_forecasting else []
-        }
-
-    except Exception as e:
-        return {'error': str(e)}
-
-
@app.route('/')
 def index():
    return render_template('index.html')
@ -227,6 +44,7 @@ def upload_file():
        train_percent = float(request.form.get('train_percent', 80)) / 100
        test_percent = float(request.form.get('test_percent', 20)) / 100
        forecast_periods = int(request.form.get('forecast_periods', 12))
+        model_type = request.form.get('model_type', 'ARIMA')

        # Validate train/test percentages
        if abs(train_percent + test_percent - 1.0) > 0.01:  # Allow small float precision errors
@ -238,33 +56,18 @@ def upload_file():
        session['train_percent'] = train_percent
        session['test_percent'] = test_percent
        session['forecast_periods'] = forecast_periods
+        session['model_type'] = model_type

        result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
-                                     forecast_periods)
+                                     forecast_periods, model_type)

        if 'error' in result:
            return render_template('index.html', error=result['error'])

        # Update forecast history if unique
        if do_forecasting and result['metrics']:
-            new_entry = {
-                'train_percent': train_percent * 100,
-                'test_percent': test_percent * 100,
-                'forecast_periods': forecast_periods,
-                'mae': result['metrics']['MAE'] if result['metrics'] else None,
-                'mse': result['metrics']['MSE'] if result['metrics'] else None,
-                'rmse': result['metrics']['RMSE'] if result['metrics'] else None
-            }
-            # Check for duplicates
-            forecast_history = session.get('forecast_history', [])
-            if not any(entry['train_percent'] == new_entry['train_percent'] and
-                       entry['test_percent'] == new_entry['test_percent'] and
-                       entry['forecast_periods'] == new_entry['forecast_periods']
-                       for entry in forecast_history):
-                forecast_history.append(new_entry)
-                session['forecast_history'] = forecast_history
-                session['selected_indices'] = [len(forecast_history) - 1]  # Select latest forecast
-                session.modified = True
+            update_forecast_history(session, train_percent, test_percent, forecast_periods, model_type,
+                                    result['metrics'])

        return render_template('results.html',
                               do_decomposition=do_decomposition,
@ -288,6 +91,7 @@ def reforecast():
    train_percent = float(request.form.get('train_percent', 80)) / 100
    test_percent = float(request.form.get('test_percent', 20)) / 100
    forecast_periods = int(request.form.get('forecast_periods', 12))
+    model_type = request.form.get('model_type', 'ARIMA')
    add_to_existing = 'add_to_existing' in request.form

    # Validate train/test percentages
@ -300,45 +104,26 @@ def reforecast():
    do_acf_pacf = session.get('do_acf_pacf', False)

    result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
-                                 forecast_periods)
+                                 forecast_periods, model_type)

    if 'error' in result:
        return render_template('index.html', error=result['error'])

    # Update forecast history if unique
-    forecast_history = session.get('forecast_history', [])
-    selected_indices = session.get('selected_indices', [])
    if do_forecasting and result['metrics']:
-        new_entry = {
-            'train_percent': train_percent * 100,
-            'test_percent': test_percent * 100,
-            'forecast_periods': forecast_periods,
-            'mae': result['metrics']['MAE'] if result['metrics'] else None,
-            'mse': result['metrics']['MSE'] if result['metrics'] else None,
-            'rmse': result['metrics']['RMSE'] if result['metrics'] else None
-        }
-        # Check for duplicates
-        if not any(entry['train_percent'] == new_entry['train_percent'] and
-                   entry['test_percent'] == new_entry['test_percent'] and
-                   entry['forecast_periods'] == new_entry['forecast_periods']
-                   for entry in forecast_history):
-            forecast_history.append(new_entry)
-            session['forecast_history'] = forecast_history
-            if add_to_existing:
-                selected_indices.append(len(forecast_history) - 1)
-            else:
-                selected_indices = [len(forecast_history) - 1]
-            session['selected_indices'] = selected_indices
-            session.modified = True
+        update_forecast_history(session, train_percent, test_percent, forecast_periods, model_type, result['metrics'],
+                                add_to_existing)

    # Update session with current parameters
    session['train_percent'] = train_percent
    session['test_percent'] = test_percent
    session['forecast_periods'] = forecast_periods
+    session['model_type'] = model_type

    # Generate comparison plot if multiple forecasts are selected
-    if len(selected_indices) > 1:
-        result['forecast_html'] = create_comparison_plot(filepath, forecast_history, selected_indices)
+    if len(session.get('selected_indices', [])) > 1:
+        result['forecast_html'] = create_comparison_plot(filepath, session['forecast_history'],
+                                                         session['selected_indices'])

    return render_template('results.html',
                           do_decomposition=do_decomposition,
@ -347,75 +132,12 @@ def reforecast():
                           train_percent=train_percent * 100,
                           test_percent=test_percent * 100,
                           forecast_periods=forecast_periods,
-                           forecast_history=forecast_history,
-                           selected_indices=selected_indices,
+                           forecast_history=session['forecast_history'],
+                           selected_indices=session['selected_indices'],
                           scroll_to_forecast=True,
                           **result)


-def create_comparison_plot(filepath, forecast_history, selected_indices):
-    # Read data
-    if filepath.endswith('.csv'):
-        df = pd.read_csv(filepath)
-    else:
-        df = pd.read_excel(filepath)
-
-    date_col = df.columns[0]
-    value_col = df.columns[1]
-    df[date_col] = pd.to_datetime(df[date_col])
-    df.set_index(date_col, inplace=True)
-
-    # Create Plotly figure
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical', line=dict(color='black')))
-
-    # Use Plotly qualitative colors
-    colors = px.colors.qualitative.Plotly
-
-    # Generate forecasts for selected indices
-    for idx, run_idx in enumerate(selected_indices):
-        entry = forecast_history[run_idx]
-        train_percent = entry['train_percent'] / 100
-        forecast_periods = entry['forecast_periods']
-
-        # Split data
-        train_size = int(len(df) * train_percent)
-        test_size = len(df) - train_size
-        train_data = df[value_col].iloc[:train_size]
-        test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()
-
-        # Run ARIMA
-        model = pm.auto_arima(train_data,
-                              seasonal=True,
-                              m=12,
-                              start_p=0, start_q=0,
-                              max_p=3, max_q=3,
-                              start_P=0, start_Q=0,
-                              max_P=2, max_Q=2,
-                              d=1, D=1,
-                              trace=False,
-                              error_action='ignore',
-                              suppress_warnings=True,
-                              stepwise=True)
-
-        model_fit = model.fit(train_data)
-        forecast = model_fit.predict(n_periods=forecast_periods)
-        forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[
-                         1:]
-
-        # Add test data if available (only once to avoid clutter)
-        if test_size > 0 and idx == 0:
-            fig.add_trace(go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
-
-        # Add forecast
-        label = f"Forecast Run {run_idx + 1}: {entry['train_percent']:.0f}/{entry['test_percent']:.0f}, {forecast_periods} periods"
-        fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=label,
-                                 line=dict(dash='dash', color=colors[idx % len(colors)])))
-
-    fig.update_layout(title='Forecast Comparison', height=400, showlegend=True)
-    return pio.to_html(fig, full_html=False)
-
-
@app.route('/compare_forecasts', methods=['POST'])
 def compare_forecasts():
    filepath = session.get('filepath')
@ -438,14 +160,14 @@ def compare_forecasts():
    train_percent = session.get('train_percent', 0.8)
    test_percent = session.get('test_percent', 0.2)
    forecast_periods = session.get('forecast_periods', 12)
-    forecast_history = session.get('forecast_history', [])
+    model_type = session.get('model_type', 'ARIMA')

    # Generate comparison plot
-    forecast_html = create_comparison_plot(filepath, forecast_history, selected_indices)
+    forecast_html = create_comparison_plot(filepath, session['forecast_history'], selected_indices)

    # Re-run the current forecast to maintain other results
    result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
-                                 forecast_periods)
+                                 forecast_periods, model_type)

    if 'error' in result:
        return render_template('index.html', error=result['error'])
@ -459,7 +181,7 @@ def compare_forecasts():
                           train_percent=train_percent * 100,
                           test_percent=test_percent * 100,
                           forecast_periods=forecast_periods,
-                           forecast_history=forecast_history,
+                           forecast_history=session['forecast_history'],
                           selected_indices=selected_indices,
                           scroll_to_forecast=True,
                           **result)
@ -467,31 +189,7 @@ def compare_forecasts():

@app.route('/download_forecast_history')
 def download_forecast_history():
-    forecast_history = session.get('forecast_history', [])
-    if not forecast_history:
-        return render_template('index.html', error='No forecast history available')
-
-    # Create DataFrame for forecast history
-    df = pd.DataFrame(forecast_history)
-    df = df.rename(columns={
-        'train_percent': 'Train Percent (%)',
-        'test_percent': 'Test Percent (%)',
-        'forecast_periods': 'Forecast Periods',
-        'mae': 'MAE',
-        'mse': 'MSE',
-        'rmse': 'RMSE'
-    })
-    df.insert(0, 'Run', range(1, len(df) + 1))
-
-    # Save to Excel
-    output = io.BytesIO()
-    df.to_excel(output, index=False)
-    output.seek(0)
-
-    return send_file(output,
-                     mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-                     as_attachment=True,
-                     download_name='forecast_history.xlsx')
+    return download_forecast_history(session)


@app.route('/download/<filename>')