some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/app.py
+++ b/app.py
@ -17,6 +17,7 @@ import matplotlib.pyplot as plt
 import io
 import base64
 import numpy as np
+from sklearn.metrics import mean_absolute_error, mean_squared_error

 app = Flask(__name__)
 app.config['UPLOAD_FOLDER'] = 'Uploads'
@ -96,6 +97,7 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
        seasonal_params = None
        train_size = None
        test_size = None
+        metrics = None

        # Save processed data
        processed_df = df.copy()
@ -148,6 +150,14 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
            arima_params = model.order
            seasonal_params = model.seasonal_order

+            # Calculate metrics on test data if available
+            if test_size > 0:
+                test_predictions = model_fit.predict(n_periods=test_size)
+                mae = mean_absolute_error(test_data, test_predictions)
+                mse = mean_squared_error(test_data, test_predictions)
+                rmse = np.sqrt(mse)
+                metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse}
+
            # Forecast plot
            forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1,
                                           freq=df.index.inferred_freq)[1:]
@ -156,8 +166,10 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
            if test_size > 0:
                forecast_fig.add_trace(
                    go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
-            forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name='Forecast', line=dict(dash='dash')))
-            forecast_fig.update_layout(title=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})', height=400)
+            forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast,
+                                              name=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})',
+                                              line=dict(dash='dash')))
+            forecast_fig.update_layout(title='Forecast', height=400)
            forecast_html = pio.to_html(forecast_fig, full_html=False)

        # ACF/PACF plots
@ -176,7 +188,10 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
            'arima_params': arima_params,
            'seasonal_params': seasonal_params,
            'train_size': train_size,
-            'test_size': test_size
+            'test_size': test_size,
+            'metrics': metrics,
+            'forecast_dates': forecast_dates.tolist() if do_forecasting else [],
+            'forecast_values': forecast.tolist() if do_forecasting else []
        }

    except Exception as e:
@ -202,6 +217,8 @@ def upload_file():
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)
        session['filepath'] = filepath  # Store filepath in session
+        session['forecast_history'] = []  # Initialize forecast history
+        session['selected_indices'] = []  # Initialize selected indices

        # Get user selections
        do_decomposition = 'decomposition' in request.form
@ -209,6 +226,7 @@ def upload_file():
        do_acf_pacf = 'acf_pacf' in request.form
        train_percent = float(request.form.get('train_percent', 80)) / 100
        test_percent = float(request.form.get('test_percent', 20)) / 100
+        forecast_periods = int(request.form.get('forecast_periods', 12))

        # Validate train/test percentages
        if abs(train_percent + test_percent - 1.0) > 0.01:  # Allow small float precision errors
@ -217,20 +235,46 @@ def upload_file():
        session['do_decomposition'] = do_decomposition
        session['do_forecasting'] = do_forecasting
        session['do_acf_pacf'] = do_acf_pacf
+        session['train_percent'] = train_percent
+        session['test_percent'] = test_percent
+        session['forecast_periods'] = forecast_periods

        result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
-                                     forecast_periods=int(request.form.get('forecast_periods', 12)))
+                                     forecast_periods)

        if 'error' in result:
            return render_template('index.html', error=result['error'])

+        # Update forecast history if unique
+        if do_forecasting and result['metrics']:
+            new_entry = {
+                'train_percent': train_percent * 100,
+                'test_percent': test_percent * 100,
+                'forecast_periods': forecast_periods,
+                'mae': result['metrics']['MAE'] if result['metrics'] else None,
+                'mse': result['metrics']['MSE'] if result['metrics'] else None,
+                'rmse': result['metrics']['RMSE'] if result['metrics'] else None
+            }
+            # Check for duplicates
+            forecast_history = session.get('forecast_history', [])
+            if not any(entry['train_percent'] == new_entry['train_percent'] and
+                       entry['test_percent'] == new_entry['test_percent'] and
+                       entry['forecast_periods'] == new_entry['forecast_periods']
+                       for entry in forecast_history):
+                forecast_history.append(new_entry)
+                session['forecast_history'] = forecast_history
+                session['selected_indices'] = [len(forecast_history) - 1]  # Select latest forecast
+                session.modified = True
+
        return render_template('results.html',
                               do_decomposition=do_decomposition,
                               do_forecasting=do_forecasting,
                               do_acf_pacf=do_acf_pacf,
                               train_percent=train_percent * 100,
                               test_percent=test_percent * 100,
-                               forecast_periods=int(request.form.get('forecast_periods', 12)),
+                               forecast_periods=forecast_periods,
+                               forecast_history=session['forecast_history'],
+                               selected_indices=session['selected_indices'],
                               **result)


@ -244,6 +288,7 @@ def reforecast():
    train_percent = float(request.form.get('train_percent', 80)) / 100
    test_percent = float(request.form.get('test_percent', 20)) / 100
    forecast_periods = int(request.form.get('forecast_periods', 12))
+    add_to_existing = 'add_to_existing' in request.form

    # Validate train/test percentages
    if abs(train_percent + test_percent - 1.0) > 0.01:  # Allow small float precision errors
@ -260,11 +305,41 @@ def reforecast():
    if 'error' in result:
        return render_template('index.html', error=result['error'])

-    # Update session with new parameters
+    # Update forecast history if unique
+    forecast_history = session.get('forecast_history', [])
+    selected_indices = session.get('selected_indices', [])
+    if do_forecasting and result['metrics']:
+        new_entry = {
+            'train_percent': train_percent * 100,
+            'test_percent': test_percent * 100,
+            'forecast_periods': forecast_periods,
+            'mae': result['metrics']['MAE'] if result['metrics'] else None,
+            'mse': result['metrics']['MSE'] if result['metrics'] else None,
+            'rmse': result['metrics']['RMSE'] if result['metrics'] else None
+        }
+        # Check for duplicates
+        if not any(entry['train_percent'] == new_entry['train_percent'] and
+                   entry['test_percent'] == new_entry['test_percent'] and
+                   entry['forecast_periods'] == new_entry['forecast_periods']
+                   for entry in forecast_history):
+            forecast_history.append(new_entry)
+            session['forecast_history'] = forecast_history
+            if add_to_existing:
+                selected_indices.append(len(forecast_history) - 1)
+            else:
+                selected_indices = [len(forecast_history) - 1]
+            session['selected_indices'] = selected_indices
+            session.modified = True
+
+    # Update session with current parameters
    session['train_percent'] = train_percent
    session['test_percent'] = test_percent
    session['forecast_periods'] = forecast_periods

+    # Generate comparison plot if multiple forecasts are selected
+    if len(selected_indices) > 1:
+        result['forecast_html'] = create_comparison_plot(filepath, forecast_history, selected_indices)
+
    return render_template('results.html',
                           do_decomposition=do_decomposition,
                           do_forecasting=do_forecasting,
@ -272,9 +347,153 @@ def reforecast():
                           train_percent=train_percent * 100,
                           test_percent=test_percent * 100,
                           forecast_periods=forecast_periods,
+                           forecast_history=forecast_history,
+                           selected_indices=selected_indices,
+                           scroll_to_forecast=True,
                           **result)


+def create_comparison_plot(filepath, forecast_history, selected_indices):
+    # Read data
+    if filepath.endswith('.csv'):
+        df = pd.read_csv(filepath)
+    else:
+        df = pd.read_excel(filepath)
+
+    date_col = df.columns[0]
+    value_col = df.columns[1]
+    df[date_col] = pd.to_datetime(df[date_col])
+    df.set_index(date_col, inplace=True)
+
+    # Create Plotly figure
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical', line=dict(color='black')))
+
+    # Use Plotly qualitative colors
+    colors = px.colors.qualitative.Plotly
+
+    # Generate forecasts for selected indices
+    for idx, run_idx in enumerate(selected_indices):
+        entry = forecast_history[run_idx]
+        train_percent = entry['train_percent'] / 100
+        forecast_periods = entry['forecast_periods']
+
+        # Split data
+        train_size = int(len(df) * train_percent)
+        test_size = len(df) - train_size
+        train_data = df[value_col].iloc[:train_size]
+        test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()
+
+        # Run ARIMA
+        model = pm.auto_arima(train_data,
+                              seasonal=True,
+                              m=12,
+                              start_p=0, start_q=0,
+                              max_p=3, max_q=3,
+                              start_P=0, start_Q=0,
+                              max_P=2, max_Q=2,
+                              d=1, D=1,
+                              trace=False,
+                              error_action='ignore',
+                              suppress_warnings=True,
+                              stepwise=True)
+
+        model_fit = model.fit(train_data)
+        forecast = model_fit.predict(n_periods=forecast_periods)
+        forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[
+                         1:]
+
+        # Add test data if available (only once to avoid clutter)
+        if test_size > 0 and idx == 0:
+            fig.add_trace(go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
+
+        # Add forecast
+        label = f"Forecast Run {run_idx + 1}: {entry['train_percent']:.0f}/{entry['test_percent']:.0f}, {forecast_periods} periods"
+        fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=label,
+                                 line=dict(dash='dash', color=colors[idx % len(colors)])))
+
+    fig.update_layout(title='Forecast Comparison', height=400, showlegend=True)
+    return pio.to_html(fig, full_html=False)
+
+
+@app.route('/compare_forecasts', methods=['POST'])
+def compare_forecasts():
+    filepath = session.get('filepath')
+    if not filepath or not os.path.exists(filepath):
+        return render_template('index.html', error='Session expired or file not found. Please upload the file again.')
+
+    # Get selected forecast indices
+    selected_indices = [int(idx) for idx in request.form.getlist('selected_forecasts')]
+    if not selected_indices:
+        return render_template('index.html', error='No forecasts selected for comparison')
+
+    # Update session with selected indices
+    session['selected_indices'] = selected_indices
+    session.modified = True
+
+    # Get current parameters and settings
+    do_decomposition = session.get('do_decomposition', False)
+    do_forecasting = session.get('do_forecasting', True)
+    do_acf_pacf = session.get('do_acf_pacf', False)
+    train_percent = session.get('train_percent', 0.8)
+    test_percent = session.get('test_percent', 0.2)
+    forecast_periods = session.get('forecast_periods', 12)
+    forecast_history = session.get('forecast_history', [])
+
+    # Generate comparison plot
+    forecast_html = create_comparison_plot(filepath, forecast_history, selected_indices)
+
+    # Re-run the current forecast to maintain other results
+    result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
+                                 forecast_periods)
+
+    if 'error' in result:
+        return render_template('index.html', error=result['error'])
+
+    result['forecast_html'] = forecast_html
+
+    return render_template('results.html',
+                           do_decomposition=do_decomposition,
+                           do_forecasting=do_forecasting,
+                           do_acf_pacf=do_acf_pacf,
+                           train_percent=train_percent * 100,
+                           test_percent=test_percent * 100,
+                           forecast_periods=forecast_periods,
+                           forecast_history=forecast_history,
+                           selected_indices=selected_indices,
+                           scroll_to_forecast=True,
+                           **result)
+
+
+@app.route('/download_forecast_history')
+def download_forecast_history():
+    forecast_history = session.get('forecast_history', [])
+    if not forecast_history:
+        return render_template('index.html', error='No forecast history available')
+
+    # Create DataFrame for forecast history
+    df = pd.DataFrame(forecast_history)
+    df = df.rename(columns={
+        'train_percent': 'Train Percent (%)',
+        'test_percent': 'Test Percent (%)',
+        'forecast_periods': 'Forecast Periods',
+        'mae': 'MAE',
+        'mse': 'MSE',
+        'rmse': 'RMSE'
+    })
+    df.insert(0, 'Run', range(1, len(df) + 1))
+
+    # Save to Excel
+    output = io.BytesIO()
+    df.to_excel(output, index=False)
+    output.seek(0)
+
+    return send_file(output,
+                     mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                     as_attachment=True,
+                     download_name='forecast_history.xlsx')
+
+
@app.route('/download/<filename>')
 def download_file(filename):
    filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)