from flask import Flask, request, render_template, send_file, session import pandas as pd import io import os from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.graphics.tsaplots import plot_acf, plot_pacf import pmdarima as pm import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import plotly.io as pio from werkzeug.utils import secure_filename import matplotlib matplotlib.use('Agg') # Use non-interactive backend import matplotlib.pyplot as plt import io import base64 import numpy as np from sklearn.metrics import mean_absolute_error, mean_squared_error app = Flask(__name__) app.config['UPLOAD_FOLDER'] = 'Uploads' app.config['ALLOWED_EXTENSIONS'] = {'csv', 'xls', 'xlsx'} app.secret_key = 'your-secret-key' # Required for session management # Ensure upload folder exists os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS'] def create_acf_pacf_plots(data): # Create ACF and PACF plots using matplotlib fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) plot_acf(data, ax=ax1, lags=40) ax1.set_title('Autocorrelation Function') plot_pacf(data, ax=ax2, lags=40) ax2.set_title('Partial Autocorrelation Function') # Convert matplotlib plot to Plotly buf = io.BytesIO() plt.savefig(buf, format='png') plt.close(fig) buf.seek(0) img_str = base64.b64encode(buf.getvalue()).decode('utf-8') # Create Plotly figure with image fig_plotly = go.Figure() fig_plotly.add_layout_image( dict( source=f'data:image/png;base64,{img_str}', x=0, y=1, xref="paper", yref="paper", sizex=1, sizey=1, sizing="stretch", opacity=1, layer="below" ) ) fig_plotly.update_layout( height=600, showlegend=False, xaxis=dict(visible=False), yaxis=dict(visible=False) ) return pio.to_html(fig_plotly, full_html=False) def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods): try: # Read file if filepath.endswith('.csv'): df = pd.read_csv(filepath) else: df = pd.read_excel(filepath) # Ensure datetime column exists date_col = df.columns[0] # Assume first column is date value_col = df.columns[1] # Assume second column is value df[date_col] = pd.to_datetime(df[date_col]) df.set_index(date_col, inplace=True) # Initialize variables plot_html = None forecast_html = None acf_pacf_html = None summary = df[value_col].describe().to_dict() arima_params = None seasonal_params = None train_size = None test_size = None metrics = None # Save processed data processed_df = df.copy() # Time series decomposition if do_decomposition: decomposition = seasonal_decompose(df[value_col], model='additive', period=12) fig = make_subplots(rows=4, cols=1, subplot_titles=('Original Series', 'Trend', 'Seasonality', 'Residuals')) fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Original'), row=1, col=1) fig.add_trace(go.Scatter(x=df.index, y=decomposition.trend, name='Trend'), row=2, col=1) fig.add_trace(go.Scatter(x=df.index, y=decomposition.seasonal, name='Seasonality'), row=3, col=1) fig.add_trace(go.Scatter(x=df.index, y=decomposition.resid, name='Residuals'), row=4, col=1) fig.update_layout(height=800, showlegend=True) plot_html = pio.to_html(fig, full_html=False) processed_df['Trend'] = decomposition.trend processed_df['Seasonality'] = decomposition.seasonal processed_df['Residuals'] = decomposition.resid # Forecasting if do_forecasting: # Split data into train and test train_size = int(len(df) * train_percent) test_size = len(df) - train_size train_data = df[value_col].iloc[:train_size] test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series() # Auto ARIMA for best parameters model = pm.auto_arima(train_data, seasonal=True, m=12, start_p=0, start_q=0, max_p=3, max_q=3, start_P=0, start_Q=0, max_P=2, max_Q=2, d=1, D=1, trace=False, error_action='ignore', suppress_warnings=True, stepwise=True) # Fit ARIMA with best parameters model_fit = model.fit(train_data) forecast = model_fit.predict(n_periods=forecast_periods) # Get ARIMA parameters arima_params = model.order seasonal_params = model.seasonal_order # Calculate metrics on test data if available if test_size > 0: test_predictions = model_fit.predict(n_periods=test_size) mae = mean_absolute_error(test_data, test_predictions) mse = mean_squared_error(test_data, test_predictions) rmse = np.sqrt(mse) metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse} # Forecast plot forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[1:] forecast_fig = go.Figure() forecast_fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical')) if test_size > 0: forecast_fig.add_trace( go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green'))) forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})', line=dict(dash='dash'))) forecast_fig.update_layout(title='Forecast', height=400) forecast_html = pio.to_html(forecast_fig, full_html=False) # ACF/PACF plots if do_acf_pacf: acf_pacf_html = create_acf_pacf_plots(df[value_col]) # Save processed data processed_df.to_csv(os.path.join(app.config['UPLOAD_FOLDER'], 'processed_' + os.path.basename(filepath))) return { 'plot_html': plot_html, 'forecast_html': forecast_html, 'acf_pacf_html': acf_pacf_html, 'summary': summary, 'filename': 'processed_' + os.path.basename(filepath), 'arima_params': arima_params, 'seasonal_params': seasonal_params, 'train_size': train_size, 'test_size': test_size, 'metrics': metrics, 'forecast_dates': forecast_dates.tolist() if do_forecasting else [], 'forecast_values': forecast.tolist() if do_forecasting else [] } except Exception as e: return {'error': str(e)} @app.route('/') def index(): return render_template('index.html') @app.route('/upload', methods=['POST']) def upload_file(): if 'file' not in request.files: return render_template('index.html', error='No file part') file = request.files['file'] if file.filename == '': return render_template('index.html', error='No selected file') if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) session['filepath'] = filepath # Store filepath in session session['forecast_history'] = [] # Initialize forecast history session['selected_indices'] = [] # Initialize selected indices # Get user selections do_decomposition = 'decomposition' in request.form do_forecasting = 'forecasting' in request.form do_acf_pacf = 'acf_pacf' in request.form train_percent = float(request.form.get('train_percent', 80)) / 100 test_percent = float(request.form.get('test_percent', 20)) / 100 forecast_periods = int(request.form.get('forecast_periods', 12)) # Validate train/test percentages if abs(train_percent + test_percent - 1.0) > 0.01: # Allow small float precision errors return render_template('index.html', error='Train and test percentages must sum to 100%') session['do_decomposition'] = do_decomposition session['do_forecasting'] = do_forecasting session['do_acf_pacf'] = do_acf_pacf session['train_percent'] = train_percent session['test_percent'] = test_percent session['forecast_periods'] = forecast_periods result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods) if 'error' in result: return render_template('index.html', error=result['error']) # Update forecast history if unique if do_forecasting and result['metrics']: new_entry = { 'train_percent': train_percent * 100, 'test_percent': test_percent * 100, 'forecast_periods': forecast_periods, 'mae': result['metrics']['MAE'] if result['metrics'] else None, 'mse': result['metrics']['MSE'] if result['metrics'] else None, 'rmse': result['metrics']['RMSE'] if result['metrics'] else None } # Check for duplicates forecast_history = session.get('forecast_history', []) if not any(entry['train_percent'] == new_entry['train_percent'] and entry['test_percent'] == new_entry['test_percent'] and entry['forecast_periods'] == new_entry['forecast_periods'] for entry in forecast_history): forecast_history.append(new_entry) session['forecast_history'] = forecast_history session['selected_indices'] = [len(forecast_history) - 1] # Select latest forecast session.modified = True return render_template('results.html', do_decomposition=do_decomposition, do_forecasting=do_forecasting, do_acf_pacf=do_acf_pacf, train_percent=train_percent * 100, test_percent=test_percent * 100, forecast_periods=forecast_periods, forecast_history=session['forecast_history'], selected_indices=session['selected_indices'], **result) @app.route('/reforecast', methods=['POST']) def reforecast(): filepath = session.get('filepath') if not filepath or not os.path.exists(filepath): return render_template('index.html', error='Session expired or file not found. Please upload the file again.') # Get user selections from reforecast form train_percent = float(request.form.get('train_percent', 80)) / 100 test_percent = float(request.form.get('test_percent', 20)) / 100 forecast_periods = int(request.form.get('forecast_periods', 12)) add_to_existing = 'add_to_existing' in request.form # Validate train/test percentages if abs(train_percent + test_percent - 1.0) > 0.01: # Allow small float precision errors return render_template('index.html', error='Train and test percentages must sum to 100%') # Get original selections from session or defaults do_decomposition = session.get('do_decomposition', False) do_forecasting = True # Since this is a reforecast do_acf_pacf = session.get('do_acf_pacf', False) result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods) if 'error' in result: return render_template('index.html', error=result['error']) # Update forecast history if unique forecast_history = session.get('forecast_history', []) selected_indices = session.get('selected_indices', []) if do_forecasting and result['metrics']: new_entry = { 'train_percent': train_percent * 100, 'test_percent': test_percent * 100, 'forecast_periods': forecast_periods, 'mae': result['metrics']['MAE'] if result['metrics'] else None, 'mse': result['metrics']['MSE'] if result['metrics'] else None, 'rmse': result['metrics']['RMSE'] if result['metrics'] else None } # Check for duplicates if not any(entry['train_percent'] == new_entry['train_percent'] and entry['test_percent'] == new_entry['test_percent'] and entry['forecast_periods'] == new_entry['forecast_periods'] for entry in forecast_history): forecast_history.append(new_entry) session['forecast_history'] = forecast_history if add_to_existing: selected_indices.append(len(forecast_history) - 1) else: selected_indices = [len(forecast_history) - 1] session['selected_indices'] = selected_indices session.modified = True # Update session with current parameters session['train_percent'] = train_percent session['test_percent'] = test_percent session['forecast_periods'] = forecast_periods # Generate comparison plot if multiple forecasts are selected if len(selected_indices) > 1: result['forecast_html'] = create_comparison_plot(filepath, forecast_history, selected_indices) return render_template('results.html', do_decomposition=do_decomposition, do_forecasting=do_forecasting, do_acf_pacf=do_acf_pacf, train_percent=train_percent * 100, test_percent=test_percent * 100, forecast_periods=forecast_periods, forecast_history=forecast_history, selected_indices=selected_indices, scroll_to_forecast=True, **result) def create_comparison_plot(filepath, forecast_history, selected_indices): # Read data if filepath.endswith('.csv'): df = pd.read_csv(filepath) else: df = pd.read_excel(filepath) date_col = df.columns[0] value_col = df.columns[1] df[date_col] = pd.to_datetime(df[date_col]) df.set_index(date_col, inplace=True) # Create Plotly figure fig = go.Figure() fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical', line=dict(color='black'))) # Use Plotly qualitative colors colors = px.colors.qualitative.Plotly # Generate forecasts for selected indices for idx, run_idx in enumerate(selected_indices): entry = forecast_history[run_idx] train_percent = entry['train_percent'] / 100 forecast_periods = entry['forecast_periods'] # Split data train_size = int(len(df) * train_percent) test_size = len(df) - train_size train_data = df[value_col].iloc[:train_size] test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series() # Run ARIMA model = pm.auto_arima(train_data, seasonal=True, m=12, start_p=0, start_q=0, max_p=3, max_q=3, start_P=0, start_Q=0, max_P=2, max_Q=2, d=1, D=1, trace=False, error_action='ignore', suppress_warnings=True, stepwise=True) model_fit = model.fit(train_data) forecast = model_fit.predict(n_periods=forecast_periods) forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[ 1:] # Add test data if available (only once to avoid clutter) if test_size > 0 and idx == 0: fig.add_trace(go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green'))) # Add forecast label = f"Forecast Run {run_idx + 1}: {entry['train_percent']:.0f}/{entry['test_percent']:.0f}, {forecast_periods} periods" fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=label, line=dict(dash='dash', color=colors[idx % len(colors)]))) fig.update_layout(title='Forecast Comparison', height=400, showlegend=True) return pio.to_html(fig, full_html=False) @app.route('/compare_forecasts', methods=['POST']) def compare_forecasts(): filepath = session.get('filepath') if not filepath or not os.path.exists(filepath): return render_template('index.html', error='Session expired or file not found. Please upload the file again.') # Get selected forecast indices selected_indices = [int(idx) for idx in request.form.getlist('selected_forecasts')] if not selected_indices: return render_template('index.html', error='No forecasts selected for comparison') # Update session with selected indices session['selected_indices'] = selected_indices session.modified = True # Get current parameters and settings do_decomposition = session.get('do_decomposition', False) do_forecasting = session.get('do_forecasting', True) do_acf_pacf = session.get('do_acf_pacf', False) train_percent = session.get('train_percent', 0.8) test_percent = session.get('test_percent', 0.2) forecast_periods = session.get('forecast_periods', 12) forecast_history = session.get('forecast_history', []) # Generate comparison plot forecast_html = create_comparison_plot(filepath, forecast_history, selected_indices) # Re-run the current forecast to maintain other results result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent, forecast_periods) if 'error' in result: return render_template('index.html', error=result['error']) result['forecast_html'] = forecast_html return render_template('results.html', do_decomposition=do_decomposition, do_forecasting=do_forecasting, do_acf_pacf=do_acf_pacf, train_percent=train_percent * 100, test_percent=test_percent * 100, forecast_periods=forecast_periods, forecast_history=forecast_history, selected_indices=selected_indices, scroll_to_forecast=True, **result) @app.route('/download_forecast_history') def download_forecast_history(): forecast_history = session.get('forecast_history', []) if not forecast_history: return render_template('index.html', error='No forecast history available') # Create DataFrame for forecast history df = pd.DataFrame(forecast_history) df = df.rename(columns={ 'train_percent': 'Train Percent (%)', 'test_percent': 'Test Percent (%)', 'forecast_periods': 'Forecast Periods', 'mae': 'MAE', 'mse': 'MSE', 'rmse': 'RMSE' }) df.insert(0, 'Run', range(1, len(df) + 1)) # Save to Excel output = io.BytesIO() df.to_excel(output, index=False) output.seek(0) return send_file(output, mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', as_attachment=True, download_name='forecast_history.xlsx') @app.route('/download/') def download_file(filename): filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) return send_file(filepath, as_attachment=True) if __name__ == '__main__': app.run(debug=True)