some new features
This commit is contained in:
231
app.py
231
app.py
@ -17,6 +17,7 @@ import matplotlib.pyplot as plt
|
||||
import io
|
||||
import base64
|
||||
import numpy as np
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = 'Uploads'
|
||||
@ -96,6 +97,7 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
|
||||
seasonal_params = None
|
||||
train_size = None
|
||||
test_size = None
|
||||
metrics = None
|
||||
|
||||
# Save processed data
|
||||
processed_df = df.copy()
|
||||
@ -148,6 +150,14 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
|
||||
arima_params = model.order
|
||||
seasonal_params = model.seasonal_order
|
||||
|
||||
# Calculate metrics on test data if available
|
||||
if test_size > 0:
|
||||
test_predictions = model_fit.predict(n_periods=test_size)
|
||||
mae = mean_absolute_error(test_data, test_predictions)
|
||||
mse = mean_squared_error(test_data, test_predictions)
|
||||
rmse = np.sqrt(mse)
|
||||
metrics = {'MAE': mae, 'MSE': mse, 'RMSE': rmse}
|
||||
|
||||
# Forecast plot
|
||||
forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1,
|
||||
freq=df.index.inferred_freq)[1:]
|
||||
@ -156,8 +166,10 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
|
||||
if test_size > 0:
|
||||
forecast_fig.add_trace(
|
||||
go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
|
||||
forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name='Forecast', line=dict(dash='dash')))
|
||||
forecast_fig.update_layout(title=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})', height=400)
|
||||
forecast_fig.add_trace(go.Scatter(x=forecast_dates, y=forecast,
|
||||
name=f'Forecast (ARIMA{arima_params}, Seasonal{seasonal_params})',
|
||||
line=dict(dash='dash')))
|
||||
forecast_fig.update_layout(title='Forecast', height=400)
|
||||
forecast_html = pio.to_html(forecast_fig, full_html=False)
|
||||
|
||||
# ACF/PACF plots
|
||||
@ -176,7 +188,10 @@ def process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf,
|
||||
'arima_params': arima_params,
|
||||
'seasonal_params': seasonal_params,
|
||||
'train_size': train_size,
|
||||
'test_size': test_size
|
||||
'test_size': test_size,
|
||||
'metrics': metrics,
|
||||
'forecast_dates': forecast_dates.tolist() if do_forecasting else [],
|
||||
'forecast_values': forecast.tolist() if do_forecasting else []
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
@ -202,6 +217,8 @@ def upload_file():
|
||||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
file.save(filepath)
|
||||
session['filepath'] = filepath # Store filepath in session
|
||||
session['forecast_history'] = [] # Initialize forecast history
|
||||
session['selected_indices'] = [] # Initialize selected indices
|
||||
|
||||
# Get user selections
|
||||
do_decomposition = 'decomposition' in request.form
|
||||
@ -209,6 +226,7 @@ def upload_file():
|
||||
do_acf_pacf = 'acf_pacf' in request.form
|
||||
train_percent = float(request.form.get('train_percent', 80)) / 100
|
||||
test_percent = float(request.form.get('test_percent', 20)) / 100
|
||||
forecast_periods = int(request.form.get('forecast_periods', 12))
|
||||
|
||||
# Validate train/test percentages
|
||||
if abs(train_percent + test_percent - 1.0) > 0.01: # Allow small float precision errors
|
||||
@ -217,20 +235,46 @@ def upload_file():
|
||||
session['do_decomposition'] = do_decomposition
|
||||
session['do_forecasting'] = do_forecasting
|
||||
session['do_acf_pacf'] = do_acf_pacf
|
||||
session['train_percent'] = train_percent
|
||||
session['test_percent'] = test_percent
|
||||
session['forecast_periods'] = forecast_periods
|
||||
|
||||
result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
|
||||
forecast_periods=int(request.form.get('forecast_periods', 12)))
|
||||
forecast_periods)
|
||||
|
||||
if 'error' in result:
|
||||
return render_template('index.html', error=result['error'])
|
||||
|
||||
# Update forecast history if unique
|
||||
if do_forecasting and result['metrics']:
|
||||
new_entry = {
|
||||
'train_percent': train_percent * 100,
|
||||
'test_percent': test_percent * 100,
|
||||
'forecast_periods': forecast_periods,
|
||||
'mae': result['metrics']['MAE'] if result['metrics'] else None,
|
||||
'mse': result['metrics']['MSE'] if result['metrics'] else None,
|
||||
'rmse': result['metrics']['RMSE'] if result['metrics'] else None
|
||||
}
|
||||
# Check for duplicates
|
||||
forecast_history = session.get('forecast_history', [])
|
||||
if not any(entry['train_percent'] == new_entry['train_percent'] and
|
||||
entry['test_percent'] == new_entry['test_percent'] and
|
||||
entry['forecast_periods'] == new_entry['forecast_periods']
|
||||
for entry in forecast_history):
|
||||
forecast_history.append(new_entry)
|
||||
session['forecast_history'] = forecast_history
|
||||
session['selected_indices'] = [len(forecast_history) - 1] # Select latest forecast
|
||||
session.modified = True
|
||||
|
||||
return render_template('results.html',
|
||||
do_decomposition=do_decomposition,
|
||||
do_forecasting=do_forecasting,
|
||||
do_acf_pacf=do_acf_pacf,
|
||||
train_percent=train_percent * 100,
|
||||
test_percent=test_percent * 100,
|
||||
forecast_periods=int(request.form.get('forecast_periods', 12)),
|
||||
forecast_periods=forecast_periods,
|
||||
forecast_history=session['forecast_history'],
|
||||
selected_indices=session['selected_indices'],
|
||||
**result)
|
||||
|
||||
|
||||
@ -244,6 +288,7 @@ def reforecast():
|
||||
train_percent = float(request.form.get('train_percent', 80)) / 100
|
||||
test_percent = float(request.form.get('test_percent', 20)) / 100
|
||||
forecast_periods = int(request.form.get('forecast_periods', 12))
|
||||
add_to_existing = 'add_to_existing' in request.form
|
||||
|
||||
# Validate train/test percentages
|
||||
if abs(train_percent + test_percent - 1.0) > 0.01: # Allow small float precision errors
|
||||
@ -260,11 +305,41 @@ def reforecast():
|
||||
if 'error' in result:
|
||||
return render_template('index.html', error=result['error'])
|
||||
|
||||
# Update session with new parameters
|
||||
# Update forecast history if unique
|
||||
forecast_history = session.get('forecast_history', [])
|
||||
selected_indices = session.get('selected_indices', [])
|
||||
if do_forecasting and result['metrics']:
|
||||
new_entry = {
|
||||
'train_percent': train_percent * 100,
|
||||
'test_percent': test_percent * 100,
|
||||
'forecast_periods': forecast_periods,
|
||||
'mae': result['metrics']['MAE'] if result['metrics'] else None,
|
||||
'mse': result['metrics']['MSE'] if result['metrics'] else None,
|
||||
'rmse': result['metrics']['RMSE'] if result['metrics'] else None
|
||||
}
|
||||
# Check for duplicates
|
||||
if not any(entry['train_percent'] == new_entry['train_percent'] and
|
||||
entry['test_percent'] == new_entry['test_percent'] and
|
||||
entry['forecast_periods'] == new_entry['forecast_periods']
|
||||
for entry in forecast_history):
|
||||
forecast_history.append(new_entry)
|
||||
session['forecast_history'] = forecast_history
|
||||
if add_to_existing:
|
||||
selected_indices.append(len(forecast_history) - 1)
|
||||
else:
|
||||
selected_indices = [len(forecast_history) - 1]
|
||||
session['selected_indices'] = selected_indices
|
||||
session.modified = True
|
||||
|
||||
# Update session with current parameters
|
||||
session['train_percent'] = train_percent
|
||||
session['test_percent'] = test_percent
|
||||
session['forecast_periods'] = forecast_periods
|
||||
|
||||
# Generate comparison plot if multiple forecasts are selected
|
||||
if len(selected_indices) > 1:
|
||||
result['forecast_html'] = create_comparison_plot(filepath, forecast_history, selected_indices)
|
||||
|
||||
return render_template('results.html',
|
||||
do_decomposition=do_decomposition,
|
||||
do_forecasting=do_forecasting,
|
||||
@ -272,9 +347,153 @@ def reforecast():
|
||||
train_percent=train_percent * 100,
|
||||
test_percent=test_percent * 100,
|
||||
forecast_periods=forecast_periods,
|
||||
forecast_history=forecast_history,
|
||||
selected_indices=selected_indices,
|
||||
scroll_to_forecast=True,
|
||||
**result)
|
||||
|
||||
|
||||
def create_comparison_plot(filepath, forecast_history, selected_indices):
|
||||
# Read data
|
||||
if filepath.endswith('.csv'):
|
||||
df = pd.read_csv(filepath)
|
||||
else:
|
||||
df = pd.read_excel(filepath)
|
||||
|
||||
date_col = df.columns[0]
|
||||
value_col = df.columns[1]
|
||||
df[date_col] = pd.to_datetime(df[date_col])
|
||||
df.set_index(date_col, inplace=True)
|
||||
|
||||
# Create Plotly figure
|
||||
fig = go.Figure()
|
||||
fig.add_trace(go.Scatter(x=df.index, y=df[value_col], name='Historical', line=dict(color='black')))
|
||||
|
||||
# Use Plotly qualitative colors
|
||||
colors = px.colors.qualitative.Plotly
|
||||
|
||||
# Generate forecasts for selected indices
|
||||
for idx, run_idx in enumerate(selected_indices):
|
||||
entry = forecast_history[run_idx]
|
||||
train_percent = entry['train_percent'] / 100
|
||||
forecast_periods = entry['forecast_periods']
|
||||
|
||||
# Split data
|
||||
train_size = int(len(df) * train_percent)
|
||||
test_size = len(df) - train_size
|
||||
train_data = df[value_col].iloc[:train_size]
|
||||
test_data = df[value_col].iloc[train_size:] if test_size > 0 else pd.Series()
|
||||
|
||||
# Run ARIMA
|
||||
model = pm.auto_arima(train_data,
|
||||
seasonal=True,
|
||||
m=12,
|
||||
start_p=0, start_q=0,
|
||||
max_p=3, max_q=3,
|
||||
start_P=0, start_Q=0,
|
||||
max_P=2, max_Q=2,
|
||||
d=1, D=1,
|
||||
trace=False,
|
||||
error_action='ignore',
|
||||
suppress_warnings=True,
|
||||
stepwise=True)
|
||||
|
||||
model_fit = model.fit(train_data)
|
||||
forecast = model_fit.predict(n_periods=forecast_periods)
|
||||
forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_periods + 1, freq=df.index.inferred_freq)[
|
||||
1:]
|
||||
|
||||
# Add test data if available (only once to avoid clutter)
|
||||
if test_size > 0 and idx == 0:
|
||||
fig.add_trace(go.Scatter(x=df.index[train_size:], y=test_data, name='Test Data', line=dict(color='green')))
|
||||
|
||||
# Add forecast
|
||||
label = f"Forecast Run {run_idx + 1}: {entry['train_percent']:.0f}/{entry['test_percent']:.0f}, {forecast_periods} periods"
|
||||
fig.add_trace(go.Scatter(x=forecast_dates, y=forecast, name=label,
|
||||
line=dict(dash='dash', color=colors[idx % len(colors)])))
|
||||
|
||||
fig.update_layout(title='Forecast Comparison', height=400, showlegend=True)
|
||||
return pio.to_html(fig, full_html=False)
|
||||
|
||||
|
||||
@app.route('/compare_forecasts', methods=['POST'])
|
||||
def compare_forecasts():
|
||||
filepath = session.get('filepath')
|
||||
if not filepath or not os.path.exists(filepath):
|
||||
return render_template('index.html', error='Session expired or file not found. Please upload the file again.')
|
||||
|
||||
# Get selected forecast indices
|
||||
selected_indices = [int(idx) for idx in request.form.getlist('selected_forecasts')]
|
||||
if not selected_indices:
|
||||
return render_template('index.html', error='No forecasts selected for comparison')
|
||||
|
||||
# Update session with selected indices
|
||||
session['selected_indices'] = selected_indices
|
||||
session.modified = True
|
||||
|
||||
# Get current parameters and settings
|
||||
do_decomposition = session.get('do_decomposition', False)
|
||||
do_forecasting = session.get('do_forecasting', True)
|
||||
do_acf_pacf = session.get('do_acf_pacf', False)
|
||||
train_percent = session.get('train_percent', 0.8)
|
||||
test_percent = session.get('test_percent', 0.2)
|
||||
forecast_periods = session.get('forecast_periods', 12)
|
||||
forecast_history = session.get('forecast_history', [])
|
||||
|
||||
# Generate comparison plot
|
||||
forecast_html = create_comparison_plot(filepath, forecast_history, selected_indices)
|
||||
|
||||
# Re-run the current forecast to maintain other results
|
||||
result = process_time_series(filepath, do_decomposition, do_forecasting, do_acf_pacf, train_percent,
|
||||
forecast_periods)
|
||||
|
||||
if 'error' in result:
|
||||
return render_template('index.html', error=result['error'])
|
||||
|
||||
result['forecast_html'] = forecast_html
|
||||
|
||||
return render_template('results.html',
|
||||
do_decomposition=do_decomposition,
|
||||
do_forecasting=do_forecasting,
|
||||
do_acf_pacf=do_acf_pacf,
|
||||
train_percent=train_percent * 100,
|
||||
test_percent=test_percent * 100,
|
||||
forecast_periods=forecast_periods,
|
||||
forecast_history=forecast_history,
|
||||
selected_indices=selected_indices,
|
||||
scroll_to_forecast=True,
|
||||
**result)
|
||||
|
||||
|
||||
@app.route('/download_forecast_history')
|
||||
def download_forecast_history():
|
||||
forecast_history = session.get('forecast_history', [])
|
||||
if not forecast_history:
|
||||
return render_template('index.html', error='No forecast history available')
|
||||
|
||||
# Create DataFrame for forecast history
|
||||
df = pd.DataFrame(forecast_history)
|
||||
df = df.rename(columns={
|
||||
'train_percent': 'Train Percent (%)',
|
||||
'test_percent': 'Test Percent (%)',
|
||||
'forecast_periods': 'Forecast Periods',
|
||||
'mae': 'MAE',
|
||||
'mse': 'MSE',
|
||||
'rmse': 'RMSE'
|
||||
})
|
||||
df.insert(0, 'Run', range(1, len(df) + 1))
|
||||
|
||||
# Save to Excel
|
||||
output = io.BytesIO()
|
||||
df.to_excel(output, index=False)
|
||||
output.seek(0)
|
||||
|
||||
return send_file(output,
|
||||
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
as_attachment=True,
|
||||
download_name='forecast_history.xlsx')
|
||||
|
||||
|
||||
@app.route('/download/<filename>')
|
||||
def download_file(filename):
|
||||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
|
||||
Reference in New Issue
Block a user