from IPython.display import Image, display
display(Image(url="https://raw.githubusercontent.com/lindsayalexandra14/ds_portfolio/main/1_projects/machine_learning/time_series_and_survival/cocoa_imports/cocoa_imports_final_model_performance.png",
    width=800))

display(Image(url="https://raw.githubusercontent.com/lindsayalexandra14/ds_portfolio/main/1_projects/machine_learning/time_series_and_survival/cocoa_imports/cocoa_imports_final_model_plot.png",
    width=800))

!pip install pmdarima

import pandas as pd
import numpy as np
from prophet import Prophet
import itertools
import time
from prophet.diagnostics import cross_validation, performance_metrics
import plotly.express as px
import cmdstanpy
import seaborn as sns
import matplotlib.pyplot as plt
from prophet.diagnostics import cross_validation
from tabulate import tabulate
from prophet.diagnostics import performance_metrics
import logging
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
import requests
import matplotlib.ticker as mtick
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, month_plot, quarter_plot
from statsmodels.tsa.stattools import adfuller
import warnings
from statsmodels.tsa.api import ExponentialSmoothing
import statsmodels.api as sm
import prophet
import pmdarima as pm
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from matplotlib.ticker import FuncFormatter
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
import matplotlib.ticker as ticker
import os
os.environ["TQDM_NOTEBOOK"] = "false"

warnings.filterwarnings("ignore")

custom_palette = [
    '#cb95f9',
    '#FAC287',
    '#895b3a',
    '#8383F7',
    '#C2C8FF',
    '#CACACC',
    '#C0F0EF',
    '#101827',
    '#1bc2bb'
]

sns.palplot(custom_palette)
plt.show()

base_url = "https://api.census.gov/data/timeseries/intltrade/imports/enduse"
get_fields = "CTY_CODE,CTY_NAME,I_ENDUSE,I_ENDUSE_LDESC,GEN_VAL_MO,CON_VAL_MO,AIR_WGT_MO,CNT_WGT_MO,VES_WGT_MO"
I_ENDUSE = "00010"
start_year = 2020
end_year = 2024
months = range(1, 13)

all_data = []

for year in range(start_year, end_year + 1):
    for month in months:
        time_str = f"{year}-{month:02}"
        url = f"{base_url}?get={get_fields}&I_ENDUSE={I_ENDUSE}&time={time_str}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            headers = data[0]
            values = data[1:]
            all_data.extend(values)
        else:
            print(f"Error fetching {time_str}: {response.status_code}")

df = pd.DataFrame(all_data, columns=headers)
print(df)

     CTY_CODE            CTY_NAME I_ENDUSE I_ENDUSE_LDESC GEN_VAL_MO  \
0        6040    PAPUA NEW GUINEA    00010    COCOA BEANS    1825500   
1        2489             GRENADA    00010    COCOA BEANS       4172   
2        3010            COLOMBIA    00010    COCOA BEANS     254616   
3        3070           VENEZUELA    00010    COCOA BEANS     281799   
4        3310             ECUADOR    00010    COCOA BEANS    9281115   
...       ...                 ...      ...            ...        ...   
3018     2470  DOMINICAN REPUBLIC    00010    COCOA BEANS    8116783   
3019     7830            TANZANIA    00010    COCOA BEANS     105958   
3020     7880          MADAGASCAR    00010    COCOA BEANS     142291   
3021     2XXX     CENTRAL AMERICA    00010    COCOA BEANS    8116783   
3022     3XXX       SOUTH AMERICA    00010    COCOA BEANS   77767295   

     CON_VAL_MO AIR_WGT_MO CNT_WGT_MO VES_WGT_MO I_ENDUSE     time  
0       1825500          0     731520     731520    00010  2020-01  
1          4172       1090          0          0    00010  2020-01  
2        254616          0      75950      75950    00010  2020-01  
3        281799          0     113401     113401    00010  2020-01  
4       9281115          0    3359617    3359617    00010  2020-01  
...         ...        ...        ...        ...      ...      ...  
3018    8116783          0    1086180    1086180    00010  2024-12  
3019     105958          0      12826      12826    00010  2024-12  
3020     142291        210      13219      13219    00010  2024-12  
3021    8116783          0    1086180    1086180    00010  2024-12  
3022   77767295       5188    9285205    9285205    00010  2024-12  

[3023 rows x 11 columns]

df.to_csv("us_cocoa_imports_2020_2024.csv", index=False)

variable_dict = {
  "CTY_CODE": "Country Code",
  "CTY_NAME": "Country Name",
  "I_ENDUSE": "Import End Use Code",
  "I_ENDUSE_LDESC": "Import End Use Long Description",
  "GEN_VAL_MO": "General Imports Total Value",
  "CON_VAL_MO": "Imports for Consumption, Total Value",
  "AIR_WGT_MO": "Air Shipping Weight",
  "CNT_WGT_MO": "Containerized Vessel Shipping Weight",
  "VES_WGT_MO": "Vessel Shipping Weight"}

print(tabulate(variable_dict.items(), headers=["Variable", "Description"]))

Variable        Description
--------------  ------------------------------------
CTY_CODE        Country Code
CTY_NAME        Country Name
I_ENDUSE        Import End Use Code
I_ENDUSE_LDESC  Import End Use Long Description
GEN_VAL_MO      General Imports Total Value
CON_VAL_MO      Imports for Consumption, Total Value
AIR_WGT_MO      Air Shipping Weight
CNT_WGT_MO      Containerized Vessel Shipping Weight
VES_WGT_MO      Vessel Shipping Weight

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3023 entries, 0 to 3022
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   CTY_CODE        3023 non-null   object
 1   CTY_NAME        3023 non-null   object
 2   I_ENDUSE        3023 non-null   object
 3   I_ENDUSE_LDESC  3023 non-null   object
 4   GEN_VAL_MO      3023 non-null   object
 5   CON_VAL_MO      3023 non-null   object
 6   AIR_WGT_MO      3023 non-null   object
 7   CNT_WGT_MO      3023 non-null   object
 8   VES_WGT_MO      3023 non-null   object
 9   I_ENDUSE        3023 non-null   object
 10  time            3023 non-null   object
dtypes: object(11)
memory usage: 259.9+ KB

df["Weight"] = df["AIR_WGT_MO"].astype(int) + df["CNT_WGT_MO"].astype(int) + df["VES_WGT_MO"].astype(int)

df.drop(columns=["CTY_CODE","I_ENDUSE","I_ENDUSE_LDESC","CON_VAL_MO","AIR_WGT_MO","CNT_WGT_MO","VES_WGT_MO"], inplace=True)

df.rename(columns={'CTY_NAME':'Country'}, inplace=True)
df.rename(columns={'GEN_VAL_MO':'Spend'}, inplace=True)
df.rename(columns={'time':'Import Date'}, inplace=True)

df['Import Date'] = pd.to_datetime(
    df['Import Date'],
    format='%Y-%m',
    errors='raise'
)

df['Year'] = df['Import Date'].dt.year

df['Month'] = df['Import Date'].dt.month

df['Spend']=df['Spend'].astype(int)

df['Price per Pound'] = df['Spend'] / df['Weight']

df['Price per Pound']=df['Price per Pound'].fillna(0)

df['Country'].unique()

array(['PAPUA NEW GUINEA', 'GRENADA', 'COLOMBIA', 'VENEZUELA', 'ECUADOR',
       'PERU', 'BRAZIL', 'FIJI', "COTE D'IVOIRE", 'GHANA',
       'DEMOCRATIC REPUBLIC OF THE CONGO', 'TOTAL FOR ALL COUNTRIES',
       'NORTH AMERICA', 'AFRICA', 'PACIFIC RIM COUNTRIES', 'CAFTA-DR',
       'AUSTRALIA AND OCEANIA', 'USMCA (NAFTA)',
       'TWENTY LATIN AMERICAN REPUBLICS', 'OECD', 'LAFTA', 'APEC', 'CACM',
       'MEXICO', 'GUATEMALA', 'COSTA RICA', 'DOMINICAN REPUBLIC',
       'TANZANIA', 'MADAGASCAR', 'CENTRAL AMERICA', 'SOUTH AMERICA',
       'CAMEROON', 'HAITI', 'INDIA', 'BOLIVIA', 'FRANCE', 'SWITZERLAND',
       'EUROPEAN UNION', 'EUROPE', 'ASIA', 'NATO', 'EURO AREA',
       'NICARAGUA', 'INDONESIA', 'PHILIPPINES', 'NIGERIA', 'CONGO',
       'ASEAN', 'NETHERLANDS', 'UGANDA', 'BELIZE', 'EL SALVADOR',
       'JAMAICA', 'TRINIDAD AND TOBAGO', 'HONDURAS', 'VIETNAM', 'TURKEY',
       'ITALY', 'CANADA', 'THAILAND', 'TOGO', 'BELGIUM', 'SIERRA LEONE',
       'MALAYSIA', 'GERMANY', 'CZECH REPUBLIC', 'PANAMA', 'SPAIN',
       'VANUATU', 'SAMOA'], dtype=object)

df['Country'].nunique()

70

groups_to_remove = [
    "TOTAL FOR ALL COUNTRIES", "USMCA (NAFTA)", "OECD", "LAFTA", "APEC",
    "CACM", "EUROPEAN UNION", "EUROPE","AFRICA", "ASIA", "NATO", "EURO AREA", "ASEAN", "SOUTH AMERICA",
    'CENTRAL AMERICA', 'NORTH AMERICA',
    'AUSTRALIA AND OCEANIA',
    'EUROPEAN UNION', 'EURO AREA', 'NATO','TWENTY LATIN AMERICAN REPUBLICS',
    'PACIFIC RIM COUNTRIES'
]

df = df[~df["Country"].isin(groups_to_remove)]

continent_map = {
    # Asia
    'INDONESIA': 'Asia', 'VIETNAM': 'Asia',
    'INDIA': 'Asia', 'THAILAND': 'Asia', 'PHILIPPINES': 'Asia', 'MALAYSIA': 'Asia',
    'TURKEY': 'Asia', #Turkey is 97% in Asia - 3% in Europe

    # Africa
    'CAMEROON': 'Africa', "COTE D'IVOIRE": 'Africa', 'GHANA': 'Africa', 'NIGERIA': 'Africa',
    'DEMOCRATIC REPUBLIC OF THE CONGO': 'Africa', 'CONGO': 'Africa', 'TANZANIA': 'Africa', 'UGANDA': 'Africa',
    'TOGO': 'Africa', 'SIERRA LEONE': 'Africa', 'MADAGASCAR': 'Africa',

    # Europe
    'NETHERLANDS': 'Europe', 'FRANCE': 'Europe', 'SWITZERLAND': 'Europe',
    'ITALY': 'Europe', 'BELGIUM': 'Europe', 'GERMANY': 'Europe', 'CZECH REPUBLIC': 'Europe', 'SPAIN': 'Europe',

    # North America
    'EL SALVADOR': 'North America', 'GRENADA': 'North America', 'PANAMA': 'North America',
    'CANADA': 'North America', 'MEXICO': 'North America', 'GUATEMALA': 'North America', 'HONDURAS': 'North America',
    'COSTA RICA': 'North America', 'HAITI': 'North America', 'DOMINICAN REPUBLIC': 'North America',
    'JAMAICA': 'North America',
    'TRINIDAD AND TOBAGO': 'North America', 'BELIZE': 'North America', 'NICARAGUA': 'North America',
    'CAFTA-DR': 'North America',

    # South America
    'ECUADOR': 'South America', 'PERU': 'South America', 'BRAZIL': 'South America', 'COLOMBIA': 'South America',
    'BOLIVIA': 'South America', 'VENEZUELA': 'South America',

    # Oceania
    'FIJI': 'Oceania', 'VANUATU': 'Oceania', 'SAMOA': 'Oceania', 'PAPUA NEW GUINEA': 'Oceania'

}

df['Continent'] = df['Country'].map(continent_map).fillna('Other')

print(df)

                 Country    Spend Import Date   Weight  Year  Month  \
0       PAPUA NEW GUINEA  1825500  2020-01-01  1463040  2020      1   
1                GRENADA     4172  2020-01-01     1090  2020      1   
2               COLOMBIA   254616  2020-01-01   151900  2020      1   
3              VENEZUELA   281799  2020-01-01   226802  2020      1   
4                ECUADOR  9281115  2020-01-01  6719234  2020      1   
...                  ...      ...         ...      ...   ...    ...   
3016          COSTA RICA        0  2024-12-01        0  2024     12   
3017               HAITI        0  2024-12-01        0  2024     12   
3018  DOMINICAN REPUBLIC  8116783  2024-12-01  2172360  2024     12   
3019            TANZANIA   105958  2024-12-01    25652  2024     12   
3020          MADAGASCAR   142291  2024-12-01    26648  2024     12   

      Price per Pound      Continent  
0            1.247744        Oceania  
1            3.827523  North America  
2            1.676208  South America  
3            1.242489  South America  
4            1.381276  South America  
...               ...            ...  
3016         0.000000  North America  
3017         0.000000  North America  
3018         3.736389  North America  
3019         4.130594         Africa  
3020         5.339650         Africa  

[1906 rows x 8 columns]

new_order = ['Continent','Country', 'Import Date', 'Month','Year', 'Weight', 'Price per Pound', 'Spend']

df = df[new_order]

df.head()

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1906 entries, 0 to 3020
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Continent        1906 non-null   object        
 1   Country          1906 non-null   object        
 2   Import Date      1906 non-null   datetime64[ns]
 3   Month            1906 non-null   int32         
 4   Year             1906 non-null   int32         
 5   Weight           1906 non-null   int64         
 6   Price per Pound  1906 non-null   float64       
 7   Spend            1906 non-null   int64         
dtypes: datetime64[ns](1), float64(1), int32(2), int64(2), object(2)
memory usage: 119.1+ KB

df['Country'].nunique()

51

spend_by_year = df.groupby(['Year'])['Spend'].sum().reset_index()

def axis_format(x, pos=None):
    if x >= 1e12:
        return f'{x/1e12:.1f}T'
    elif x >= 1e9:
        return f'{x/1e9:.1f}B'
    elif x >= 1e6:
        return f'{x/1e6:.1f}M'
    else:
        return f'{x:.0f}'

ax = sns.barplot(
    data=spend_by_year,
    x='Year',
    y='Spend',
    hue='Year',
    palette=custom_palette[:5],
    legend=False
)

for container in ax.containers:
    labels = [axis_format(v) for v in container.datavalues]
    ax.bar_label(container, labels=labels, padding=3)

ax.yaxis.set_major_formatter(mtick.FuncFormatter(axis_format))

plt.title('Total Spend by Year')
plt.ylabel('Total Spend')
plt.xlabel('Year')
plt.xticks(rotation=45)
plt.show()

weight_by_year = df.groupby(['Year'])['Weight'].sum().reset_index()

def axis_format(x, pos=None):
    if x >= 1e12:
        return f'{x/1e12:.1f}T'
    elif x >= 1e9:
        return f'{x/1e9:.1f}B'
    elif x >= 1e6:
        return f'{x/1e6:.1f}M'
    else:
        return f'{x:.0f}'

ax = sns.barplot(
    data=weight_by_year,
    x='Year',
    y='Weight',
    hue='Year',
    palette=custom_palette[:5],
    legend=False
)

for container in ax.containers:
    labels = [axis_format(v) for v in container.datavalues]
    ax.bar_label(container, labels=labels, padding=3)

ax.yaxis.set_major_formatter(mtick.FuncFormatter(axis_format))

plt.title('Total Weight by Year')
plt.ylabel('Total Weight')
plt.xlabel('Year')
plt.xticks(rotation=45)
plt.show()

monthly_df = (
    df
    .groupby('Import Date', as_index=False)
    .agg({
        'Weight': 'sum',
        'Spend': 'sum'
    })
)

monthly_df['Price per Pound'] = monthly_df['Spend'] / monthly_df['Weight']
monthly_df['Year'] = monthly_df['Import Date'].dt.year
monthly_df['Month'] = monthly_df['Import Date'].dt.strftime('%b')

monthly_df.head()

plt.figure(figsize=(12, 8))

for i, year in enumerate(monthly_df['Year'].unique()):
    year_data = monthly_df[monthly_df['Year'] == year]
    plt.plot(year_data['Month'], year_data['Spend'],
             marker='o', linewidth=2.5, markersize=6,
             color=custom_palette[i % len(custom_palette)],
             label=str(year), markerfacecolor='white',
             markeredgewidth=2)

plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.title('Cocoa Bean US Imports: Spend (YOY)', pad=20)
plt.xlabel('Month')
plt.ylabel('Total Spend')
plt.legend(title='Year', loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.show()

plt.figure(figsize=(12, 8))

for i, year in enumerate(monthly_df['Year'].unique()):
    year_data = monthly_df[monthly_df['Year'] == year]
    plt.plot(year_data['Month'], year_data['Price per Pound'],
             marker='o', linewidth=2.5, markersize=6,
             color=custom_palette[i % len(custom_palette)],
             label=str(year), markerfacecolor='white',
             markeredgewidth=2)

plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.2f}'))
plt.title('Cocoa Bean US Imports: Price per Pound (YOY)', pad=20)
plt.xlabel('Month')
plt.ylabel('Price per Pound')
plt.legend(title='Year', loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.show()

plt.figure(figsize=(12, 8))

for i, year in enumerate(monthly_df['Year'].unique()):
    year_data = monthly_df[monthly_df['Year'] == year]
    plt.plot(year_data['Month'], year_data['Weight'],
             marker='o', linewidth=2.5, markersize=6,
             color=custom_palette[i % len(custom_palette)],
             label=str(year), markerfacecolor='white',
             markeredgewidth=2)
plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.title('Cocoa Bean US Imports: Weight (YOY)', pad=20)
plt.xlabel('Month')
plt.ylabel('Total Weight')
plt.legend(title='Year', loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.show()

def run_sequence_plot(x, y, title, xlabel="time", ylabel="series", width=13, height=4):
    plt.figure(figsize=(width, height))
    plt.plot(x, y, 'k-')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(alpha=0.3)
    plt.show()

run_sequence_plot(monthly_df['Import Date'], monthly_df['Weight'],
                  title="US Cocoa Bean Imports by Month (Weight lbs)")

run_sequence_plot(monthly_df['Import Date'], monthly_df['Spend'],
                  title="US Cocoa Bean Imports by Month (Spend)")

run_sequence_plot( monthly_df['Import Date'],  monthly_df['Price per Pound'],
                  title="US Cocoa Bean Imports by Month (Price per Pound)")

data = monthly_df.set_index('Import Date')['Weight']

display(data.head())

Import Date
2020-01-01     77863938
2020-02-01    118967413
2020-03-01    117594405
2020-04-01     72523979
2020-05-01     58180114
Name: Weight, dtype: int64

ss_decomposition = seasonal_decompose(x=data, model='additive', period=12)
estimated_trend = ss_decomposition.trend
estimated_seasonal = ss_decomposition.seasonal
estimated_residual = ss_decomposition.resid

fig, axes = plt.subplots(4, 1, sharex=True, sharey=False)
fig.set_figheight(10)
fig.set_figwidth(15)

axes[0].plot(data, label='Original', color='black')
axes[0].legend(loc='upper left')

axes[1].plot(estimated_trend, label='Trend', color='black')
axes[1].legend(loc='upper left')

axes[2].plot(estimated_seasonal, label='Seasonality', color='black')
axes[2].legend(loc='upper left')

axes[3].plot(estimated_residual, label='Residuals', color='black')
axes[3].legend(loc='upper left')

<matplotlib.legend.Legend at 0x12dc59a60>

plt.figure(figsize=(10, 5))
plt.hist(estimated_residual, bins=30, color=custom_palette[0], edgecolor='gray')
plt.title('Histogram of Residuals')
plt.xlabel('Residual Value')
plt.ylabel('Frequency')
plt.show()

print(estimated_residual[-6:])

Import Date
2024-07-01   NaN
2024-08-01   NaN
2024-09-01   NaN
2024-10-01   NaN
2024-11-01   NaN
2024-12-01   NaN
Name: resid, dtype: float64

print(estimated_residual[:6])

Import Date
2020-01-01   NaN
2020-02-01   NaN
2020-03-01   NaN
2020-04-01   NaN
2020-05-01   NaN
2020-06-01   NaN
Name: resid, dtype: float64

plot_acf(data, lags=24)
plot_pacf(data, lags=24)
plt.show()

residuals_final = estimated_residual[6:-6]
#same as estimated_residual.dropna()

plot_acf(residuals_final)
plot_pacf(residuals_final)
plt.show()

adf_b4, pvalue_b4, usedlag_, nobs_, critical_values_, icbest_ = adfuller(data)
print(f"ADF: {adf_b4:.6f}")
print(f"p-value: {pvalue_b4:.9f}")

ADF: 0.021728
p-value: 0.960292504

adf_b4, pvalue_b4, usedlag_, nobs_, critical_values_, icbest_ = adfuller(residuals_final)
print(f"ADF: {adf_b4:.6f}")
print(f"p-value: {pvalue_b4:.9f}")

ADF: -4.862790
p-value: 0.000041182

monthly_df.head()

data = pd.DataFrame({
    'Import Date': monthly_df['Import Date'],
    'Weight': monthly_df['Weight']
})

data.head()

train = data[:-5]
test = data[-5:]

triple = ExponentialSmoothing(train['Weight'],
                              trend="additive",damped_trend=True,
                              seasonal="additive",
                              seasonal_periods=12).fit(optimized=True)
triple_preds = triple.forecast(len(test['Weight']))

plt.plot(train['Import Date'], train['Weight'], color=custom_palette[2], label="train")
plt.plot(test['Import Date'], test['Weight'], color=custom_palette[1], linestyle="--", label="test")
plt.plot(test['Import Date'], triple_preds, color=custom_palette[0], linestyle="--", label="predictions")

def millions(x, pos):
    return f'{int(x/1e6)}M'

plt.gca().yaxis.set_major_formatter(FuncFormatter(millions))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing")
plt.grid(alpha=0.3)
plt.show()

horizon = 3
exp_ts_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train_series = data['Weight'].iloc[:cutoff + 1]

    triple = ExponentialSmoothing(
        train_series,
        trend="additive", damped_trend=True,
        seasonal="additive", seasonal_periods=12
    ).fit(optimized=True)

    yhat = triple.forecast(steps=horizon)
    y_true = data['Weight'].iloc[cutoff + 1 : cutoff + 1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        exp_ts_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

exp_ts_cv_df = pd.DataFrame(exp_ts_results)

exp_ts_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

exp_ts_cv = exp_ts_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

exp_ts_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

exp_ts_overall_MAE  = exp_ts_cv_df['AE'].mean()
exp_ts_overall_MAPE = exp_ts_cv_df['APE'].mean()

print(f"MAE: {exp_ts_overall_MAE:,.0f}")
print(f"MAPE (%): {exp_ts_overall_MAPE:.0%}")

MAE: 17,732,595
MAPE (%): 63%

triple = ExponentialSmoothing(
    data['Weight'],
    trend="additive", damped_trend=True,
    seasonal="additive", seasonal_periods=12
).fit(optimized=True)

fitted = triple.fittedvalues

forecast_steps = 3
forecast = triple.forecast(steps=forecast_steps)

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(
    start=last_date + pd.offsets.MonthBegin(1),
    periods=forecast_steps,
    freq='MS'
)

combined_preds = np.concatenate([fitted.values, forecast.values])
combined_dates = pd.concat([data['Import Date'], pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))

plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")

plt.plot(combined_dates, combined_preds, color=custom_palette[0], linestyle='--', label="model fit + forecast")

plt.axvspan(future_dates[0], future_dates[-1], color=custom_palette[0], alpha=0.1)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

triple_tuned = ExponentialSmoothing(train['Weight'],
                              trend="additive",damped_trend=True,
                              seasonal="multiplicative",
                              seasonal_periods=12).fit(optimized=True)
triple_tuned_preds = triple_tuned.forecast(len(test['Weight']))

plt.plot(train['Import Date'], train['Weight'], color=custom_palette[2], label="train")
plt.plot(test['Import Date'], test['Weight'], color=custom_palette[1], linestyle="--", label="test")
plt.plot(test['Import Date'], triple_tuned_preds, color=custom_palette[0], linestyle="--", label="predictions")

def millions(x, pos):
    return f'{int(x/1e6)}M'

plt.gca().yaxis.set_major_formatter(FuncFormatter(millions))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (tuned)")
plt.grid(alpha=0.3)
plt.show()

horizon = 3
exp_ts_tuned_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data['Weight'].iloc[:cutoff + 1]

    triple_tuned = ExponentialSmoothing(
        train,
        trend="additive", damped_trend=True,
        seasonal="multiplicative", seasonal_periods=12
    ).fit(optimized=True)

    yhat = triple_tuned.forecast(steps=horizon)
    y_true = data['Weight'].iloc[cutoff + 1 : cutoff + 1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        exp_ts_tuned_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

exp_ts_tuned_cv_df = pd.DataFrame(exp_ts_tuned_results)
exp_ts_tuned_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

exp_ts_tuned_cv = exp_ts_tuned_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

exp_ts_tuned_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

exp_ts_tuned_overall_MAE  = exp_ts_tuned_cv_df['AE'].mean()
exp_ts_tuned_overall_MAPE = exp_ts_tuned_cv_df['APE'].mean()
print(f"MAE: {exp_ts_tuned_overall_MAE:,.0f}")
print(f"MAPE: {exp_ts_tuned_overall_MAPE:.0%}")

MAE: 9,559,131
MAPE: 27%

triple_tuned = ExponentialSmoothing(
    data['Weight'],
    trend="additive",
    damped_trend=True,
    seasonal="multiplicative",
    seasonal_periods=12
).fit(optimized=True)

fitted = triple_tuned.fittedvalues

forecast_steps = 3
forecast = triple_tuned.forecast(steps=forecast_steps)

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(
    start=last_date + pd.offsets.MonthBegin(1),
    periods=forecast_steps,
    freq='MS'
)

combined_preds = np.concatenate([fitted.values, forecast.values])
combined_dates = pd.concat([data['Import Date'], pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))

plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")

plt.plot(combined_dates, combined_preds, color=custom_palette[0], linestyle='--', label="model fit + forecast")

plt.axvspan(future_dates[0], future_dates[-1], color=custom_palette[0], alpha=0.1)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (Tuned)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

def bootstrap_hw_intervals(model,train_values, horizon, n_boot=500):
    residuals = model.resid
    forecasts = np.zeros((n_boot, horizon))

    for i in range(n_boot):
        boot_sample = train_values + np.random.choice(residuals, size=len(train_values))
        boot_sample = np.maximum(boot_sample, 1e-6)

        boot_model = ExponentialSmoothing(
            boot_sample,
            trend="additive",
            damped_trend=True,
            seasonal="multiplicative",
            seasonal_periods=12
        ).fit(optimized=True)

        forecasts[i, :] = boot_model.forecast(steps=horizon)

    lower = np.percentile(forecasts, 10, axis=0)
    upper = np.percentile(forecasts, 90, axis=0)

    return pd.DataFrame({
        'horizon': range(1, horizon + 1),
        'point_forecast': model.forecast(steps=horizon),
        'ci_lower': lower,
        'ci_upper': upper
    })

np.random.seed(42)

train_values = data['Weight'].values
final_hw = ExponentialSmoothing(
    train_values,
    trend="additive", damped_trend=True,
    seasonal="multiplicative", seasonal_periods=12
).fit(optimized=True)

ci_df = bootstrap_hw_intervals(final_hw, train_values, horizon=3, n_boot=500)

ci_df.style.format("{:,.0f}")

future_dates = pd.date_range(
    start=data['Import Date'].iloc[-1] + pd.offsets.MonthBegin(1),
    periods=3,
    freq='MS'
)

forecast_table = pd.DataFrame({
    'Date': future_dates,
    'Forecast': ci_df['point_forecast'].round().astype(int),
    'Lower_80%_CI': np.floor(ci_df['ci_lower']).astype(int),
    'Upper_80%_CI': np.ceil(ci_df['ci_upper']).astype(int)
})
print(forecast_table)

        Date  Forecast  Lower_80%_CI  Upper_80%_CI
0 2025-01-01  59915877      31652931      67112960
1 2025-02-01  59477191      30411924      65507609
2 2025-03-01  75993847      38341421      84995671

triple_tuned = ExponentialSmoothing(
    data['Weight'],
    trend="additive",
    damped_trend=True,
    seasonal="multiplicative",
    seasonal_periods=12
).fit(optimized=True)

fitted = triple_tuned.fittedvalues
forecast_steps = 3
forecast = triple_tuned.forecast(steps=forecast_steps)

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(
    start=last_date + pd.offsets.MonthBegin(1),
    periods=forecast_steps,
    freq='MS'
)

combined_preds = np.concatenate([fitted.values, forecast.values])
combined_dates = pd.concat([data['Import Date'], pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))
plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")
plt.plot(combined_dates, combined_preds, color=custom_palette[0], linestyle='--',
         label="model fit + forecast")

plt.fill_between(
    future_dates,
    ci_df['ci_lower'].values,
    ci_df['ci_upper'].values,
    color=custom_palette[0],
    alpha=0.2,
    label='80% Confidence Interval'
)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))
plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (Tuned)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

train = data['Weight'][:-5]
test = data['Weight'][-5:]

triple_tuned_log = ExponentialSmoothing(np.log1p(train),
                              trend="additive",damped_trend=True,
                              seasonal="multiplicative",
                              seasonal_periods=12).fit(optimized=True)
triple_tuned_log_preds = np.expm1(triple_tuned_log.forecast(len(test)))

train = data[:-5]
test = data[-5:]

plt.plot(train['Import Date'], train['Weight'], color=custom_palette[2], label="train")
plt.plot(test['Import Date'], test['Weight'], color=custom_palette[1], linestyle="--", label="test")
plt.plot(test['Import Date'], triple_tuned_log_preds, color=custom_palette[0], linestyle="--", label="predictions")

def millions(x, pos):
    return f'{int(x/1e6)}M'

plt.gca().yaxis.set_major_formatter(FuncFormatter(millions))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (tuned log data)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

horizon = 3
exp_ts_tuned_log_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train_log = np.log1p(data['Weight'].iloc[:cutoff + 1])

    triple_tuned_log = ExponentialSmoothing(
        train_log,
        trend="additive", damped_trend=True,
        seasonal="multiplicative", seasonal_periods=12
    ).fit(optimized=True)

    yhat_log = triple_tuned_log.forecast(steps=horizon)
    yhat = np.expm1(yhat_log)

    y_true = data['Weight'].iloc[cutoff + 1 : cutoff + 1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        exp_ts_tuned_log_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

exp_ts_tuned_log_cv_df = pd.DataFrame(exp_ts_tuned_log_results)
exp_ts_tuned_log_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

exp_ts_tuned_log_cv = exp_ts_tuned_log_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

exp_ts_tuned_log_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

exp_ts_tuned_log_overall_MAE  = exp_ts_tuned_log_cv_df['AE'].mean()
exp_ts_tuned_log_overall_MAPE = exp_ts_tuned_log_cv_df['APE'].mean()

print(f"MAE: {exp_ts_tuned_log_overall_MAE:,.0f}")
print(f"MAPE (%): {exp_ts_tuned_log_overall_MAPE:.0%}")

MAE: 10,529,595
MAPE (%): 29%

triple_tuned_log = ExponentialSmoothing(
    np.log1p(data['Weight']),
    trend="additive",
    damped_trend=True,
    seasonal="multiplicative",
    seasonal_periods=12
).fit(optimized=True)

fitted_log = np.expm1(triple_tuned_log.fittedvalues)

forecast_steps = 3
forecast_log = np.expm1(triple_tuned_log.forecast(steps=forecast_steps))

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(
    start=last_date + pd.offsets.MonthBegin(1),
    periods=forecast_steps,
    freq='MS'
)

combined_preds_log = np.concatenate([fitted_log.values, forecast_log.values])
combined_dates_log = pd.concat([data['Import Date'], pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))

plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")

plt.plot(combined_dates_log, combined_preds_log, color=custom_palette[0], linestyle='--', label="model fit + forecast")

plt.axvspan(future_dates[0], future_dates[-1], color=custom_palette[0], alpha=0.1)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))

plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (Tuned - Log Data)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

def bootstrap_hw_intervals(model,train_values, horizon, n_boot=500):
    residuals = model.resid
    forecasts = np.zeros((n_boot, horizon))

    for i in range(n_boot):
        boot_sample = train_values + np.random.choice(residuals, size=len(train_values))
        boot_sample = np.maximum(boot_sample, 1e-6)

        boot_model = ExponentialSmoothing(
            boot_sample,
            trend="additive",
            damped_trend=True,
            seasonal="multiplicative",
            seasonal_periods=12
        ).fit(optimized=True)

        forecasts[i, :] = np.expm1(boot_model.forecast(steps=horizon))

    lower = np.percentile(forecasts, 10, axis=0)
    upper = np.percentile(forecasts, 90, axis=0)

    return pd.DataFrame({
        'horizon': range(1, horizon + 1),
        'point_forecast': np.expm1(model.forecast(steps=horizon)),
        'ci_lower': lower,
        'ci_upper': upper
    })

np.random.seed(42)

train_values = np.log1p(data['Weight'].values)
final_hw = ExponentialSmoothing(
    train_values,
    trend="additive", damped_trend=True,
    seasonal="multiplicative", seasonal_periods=12
).fit(optimized=True)

ci_df = bootstrap_hw_intervals(final_hw, train_values, horizon=3, n_boot=500)

ci_df.style.format("{:.0f}")

future_dates = pd.date_range(
    start=data['Import Date'].iloc[-1] + pd.offsets.MonthBegin(1),
    periods=3,
    freq='MS'
)

forecast_table = pd.DataFrame({
    'Date': future_dates,
    'Forecast': ci_df['point_forecast'].round().astype(int),
    'Lower_80_CI': np.floor(ci_df['ci_lower']).astype(int),
    'Upper_80_CI': np.ceil(ci_df['ci_upper']).astype(int)
})
print(forecast_table)

        Date  Forecast  Lower_80_CI  Upper_80_CI
0 2025-01-01  69648880     42252001     73726955
1 2025-02-01  66175597     40121527     69833012
2 2025-03-01  81596208     50389155     86459868

triple_tuned_log = ExponentialSmoothing(
    np.log1p(data['Weight']),
    trend="additive",
    damped_trend=True,
    seasonal="multiplicative",
    seasonal_periods=12
).fit(optimized=True)

fitted = np.expm1(triple_tuned_log.fittedvalues)
forecast_steps = 3
forecast = np.expm1(triple_tuned_log.forecast(steps=forecast_steps))

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(
    start=last_date + pd.offsets.MonthBegin(1),
    periods=forecast_steps,
    freq='MS'
)

combined_preds_log = np.concatenate([fitted.values, forecast.values])
combined_dates_log = pd.concat([data['Import Date'], pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))
plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")
plt.plot(combined_dates, combined_preds, color=custom_palette[0], linestyle='--',
         label="model fit + forecast")

plt.fill_between(
    future_dates,
    ci_df['ci_lower'].values,
    ci_df['ci_upper'].values,
    color=custom_palette[0],
    alpha=0.2,
    label='80% Confidence Interval'
)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))
plt.legend(loc='upper left')
plt.title("Triple Exponential Smoothing (Tuned - Log Data)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

data.head()

plot_acf(data['Weight'], lags=24)
plot_pacf(data['Weight'], lags=24)
plt.show()

data['y_diff'] = data['Weight'].diff()

diff_for_df_test = pd.DataFrame(data, columns=['Import Date','y_diff'])

diff_for_df_test.head()

adf_b4, pvalue_b4, usedlag_, nobs_, critical_values_, icbest_ = adfuller(diff_for_df_test['y_diff'].dropna())

print(f"ADF: {adf_b4:.2f}")
print(f"p-value: {pvalue_b4:.8f}")

ADF: -5.83
p-value: 0.00000040

plot_acf(np.log1p(data['Weight']), lags=24)
plot_pacf(np.log1p(data['Weight']), lags=24)
plt.show()

data_log = data.copy()

data_log['y_diff'] = np.log1p(data_log['Weight']).diff()

diff_for_df_test_log = pd.DataFrame(data_log, columns=['Import Date','y_diff'])

diff_for_df_test_log.head()

adf_b4, pvalue_b4, usedlag_, nobs_, critical_values_, icbest_ = adfuller(diff_for_df_test_log['y_diff'].dropna())

print(f"ADF: {adf_b4:.2f}")
print(f"p-value: {pvalue_b4:.4f}")

ADF: -3.08
p-value: 0.0282

sar_manual = sm.tsa.statespace.SARIMAX(data['Weight'],
                                order=(1,1,2),
                                seasonal_order=(1,1,1,12),
                                trend='c').fit()

sm.tsa.graphics.plot_acf(sar_manual.resid[sar_manual.loglikelihood_burn:]);

sm.tsa.graphics.plot_pacf(sar_manual.resid[sar_manual.loglikelihood_burn:]);

sar_manual.plot_diagnostics(figsize = (15,8),lags = 12);

horizon = 3
manual_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sar_manual = sm.tsa.SARIMAX(
        train['Weight'],
        order=(1,1,2),
        seasonal_order=(1,1,1,12),
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = sar_manual.forecast(steps=horizon)
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        manual_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

sarima_manual_cv_df = pd.DataFrame(manual_results)
sarima_manual_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_manual_cv = sarima_manual_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_manual_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_manual_overall_MAE  = sarima_manual_cv_df['AE'].mean()
sarima_manual_overall_MAPE = sarima_manual_cv_df['APE'].mean()

print(f"MAE: {sarima_manual_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_manual_overall_MAPE:.0%}")

MAE: 22,289,579
MAPE (%): 92%

sarima_auto_model = pm.auto_arima(data['Weight'],
                          start_p=0,
                          d=1,
                          start_q=0,
                          max_p=5,
                          max_d=1,
                          max_q=5,
                          start_P=0,
                          D=1,
                          start_Q=0,
                          max_P=2,
                          max_D=1,
                          max_Q=2,
                          max_order=5,
                          m=12,
                          seasonal=True,
                          stepwise=False,
                          suppress_warnings=True,
                          error_action='trace',
                          trace=True,
                          random_state=42
                          )

 ARIMA(0,1,0)(0,1,0)[12]             : AIC=1739.523, Time=0.00 sec
 ARIMA(0,1,0)(0,1,1)[12]             : AIC=inf, Time=0.03 sec
 ARIMA(0,1,0)(0,1,2)[12]             : AIC=1727.007, Time=0.06 sec
 ARIMA(0,1,0)(1,1,0)[12]             : AIC=1738.266, Time=0.01 sec
 ARIMA(0,1,0)(1,1,1)[12]             : AIC=inf, Time=0.05 sec
 ARIMA(0,1,0)(1,1,2)[12]             : AIC=1727.890, Time=0.08 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=1736.318, Time=0.05 sec
 ARIMA(0,1,0)(2,1,1)[12]             : AIC=1736.740, Time=0.07 sec
 ARIMA(0,1,0)(2,1,2)[12]             : AIC=1727.517, Time=0.12 sec
 ARIMA(0,1,1)(0,1,0)[12]             : AIC=1735.337, Time=0.01 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=1721.028, Time=0.03 sec
 ARIMA(0,1,1)(0,1,2)[12]             : AIC=1720.941, Time=0.08 sec
 ARIMA(0,1,1)(1,1,0)[12]             : AIC=1733.606, Time=0.02 sec
 ARIMA(0,1,1)(1,1,1)[12]             : AIC=1722.198, Time=0.05 sec
 ARIMA(0,1,1)(1,1,2)[12]             : AIC=1721.538, Time=0.10 sec
 ARIMA(0,1,1)(2,1,0)[12]             : AIC=1721.445, Time=0.05 sec
 ARIMA(0,1,1)(2,1,1)[12]             : AIC=1719.400, Time=0.08 sec
 ARIMA(0,1,1)(2,1,2)[12]             : AIC=1721.047, Time=0.18 sec
 ARIMA(0,1,2)(0,1,0)[12]             : AIC=1735.734, Time=0.01 sec
 ARIMA(0,1,2)(0,1,1)[12]             : AIC=1721.761, Time=0.04 sec
 ARIMA(0,1,2)(0,1,2)[12]             : AIC=1721.263, Time=0.05 sec
 ARIMA(0,1,2)(1,1,0)[12]             : AIC=1734.642, Time=0.03 sec
 ARIMA(0,1,2)(1,1,1)[12]             : AIC=1722.765, Time=0.06 sec
 ARIMA(0,1,2)(1,1,2)[12]             : AIC=1721.810, Time=0.08 sec
 ARIMA(0,1,2)(2,1,0)[12]             : AIC=1722.370, Time=0.07 sec
 ARIMA(0,1,2)(2,1,1)[12]             : AIC=1719.626, Time=0.11 sec
 ARIMA(0,1,3)(0,1,0)[12]             : AIC=1738.216, Time=0.01 sec
 ARIMA(0,1,3)(0,1,1)[12]             : AIC=1723.205, Time=0.05 sec
 ARIMA(0,1,3)(0,1,2)[12]             : AIC=inf, Time=0.13 sec
 ARIMA(0,1,3)(1,1,0)[12]             : AIC=1736.785, Time=0.03 sec
 ARIMA(0,1,3)(1,1,1)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(0,1,3)(2,1,0)[12]             : AIC=1724.118, Time=0.08 sec
 ARIMA(0,1,4)(0,1,0)[12]             : AIC=1752.785, Time=0.02 sec
 ARIMA(0,1,4)(0,1,1)[12]             : AIC=inf, Time=0.20 sec
 ARIMA(0,1,4)(1,1,0)[12]             : AIC=1749.054, Time=0.05 sec
 ARIMA(0,1,5)(0,1,0)[12]             : AIC=1749.277, Time=0.03 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=1737.477, Time=0.01 sec
 ARIMA(1,1,0)(0,1,1)[12]             : AIC=1722.759, Time=0.02 sec
 ARIMA(1,1,0)(0,1,2)[12]             : AIC=1723.188, Time=0.08 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=1735.043, Time=0.02 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=1724.120, Time=0.04 sec
 ARIMA(1,1,0)(1,1,2)[12]             : AIC=1723.898, Time=0.10 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=1723.241, Time=0.05 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=1721.606, Time=0.08 sec
 ARIMA(1,1,0)(2,1,2)[12]             : AIC=1723.274, Time=0.18 sec
 ARIMA(1,1,1)(0,1,0)[12]             : AIC=1734.593, Time=0.01 sec
 ARIMA(1,1,1)(0,1,1)[12]             : AIC=1722.593, Time=0.03 sec
 ARIMA(1,1,1)(0,1,2)[12]             : AIC=1722.358, Time=0.09 sec
 ARIMA(1,1,1)(1,1,0)[12]             : AIC=1733.523, Time=0.03 sec
 ARIMA(1,1,1)(1,1,1)[12]             : AIC=1723.656, Time=0.06 sec
 ARIMA(1,1,1)(1,1,2)[12]             : AIC=1723.142, Time=0.16 sec
 ARIMA(1,1,1)(2,1,0)[12]             : AIC=1723.147, Time=0.10 sec
 ARIMA(1,1,1)(2,1,1)[12]             : AIC=1721.285, Time=0.15 sec
 ARIMA(1,1,2)(0,1,0)[12]             : AIC=1737.860, Time=0.02 sec
 ARIMA(1,1,2)(0,1,1)[12]             : AIC=1723.370, Time=0.05 sec
 ARIMA(1,1,2)(0,1,2)[12]             : AIC=1722.906, Time=0.08 sec
 ARIMA(1,1,2)(1,1,0)[12]             : AIC=1736.527, Time=0.04 sec
 ARIMA(1,1,2)(1,1,1)[12]             : AIC=1724.354, Time=0.09 sec
 ARIMA(1,1,2)(2,1,0)[12]             : AIC=1724.165, Time=0.09 sec
 ARIMA(1,1,3)(0,1,0)[12]             : AIC=1740.450, Time=0.03 sec
 ARIMA(1,1,3)(0,1,1)[12]             : AIC=1724.894, Time=0.07 sec
 ARIMA(1,1,3)(1,1,0)[12]             : AIC=1738.903, Time=0.06 sec
 ARIMA(1,1,4)(0,1,0)[12]             : AIC=1754.413, Time=0.05 sec
 ARIMA(2,1,0)(0,1,0)[12]             : AIC=1738.596, Time=0.01 sec
 ARIMA(2,1,0)(0,1,1)[12]             : AIC=1722.994, Time=0.03 sec
 ARIMA(2,1,0)(0,1,2)[12]             : AIC=1723.012, Time=0.10 sec
 ARIMA(2,1,0)(1,1,0)[12]             : AIC=1736.581, Time=0.02 sec
 ARIMA(2,1,0)(1,1,1)[12]             : AIC=1724.229, Time=0.06 sec
 ARIMA(2,1,0)(1,1,2)[12]             : AIC=1723.460, Time=0.11 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=1723.305, Time=0.07 sec
 ARIMA(2,1,0)(2,1,1)[12]             : AIC=1721.126, Time=0.11 sec
 ARIMA(2,1,1)(0,1,0)[12]             : AIC=1736.721, Time=0.02 sec
 ARIMA(2,1,1)(0,1,1)[12]             : AIC=1724.152, Time=0.06 sec
 ARIMA(2,1,1)(0,1,2)[12]             : AIC=1723.896, Time=0.15 sec
 ARIMA(2,1,1)(1,1,0)[12]             : AIC=1735.717, Time=0.06 sec
 ARIMA(2,1,1)(1,1,1)[12]             : AIC=1725.208, Time=0.09 sec
 ARIMA(2,1,1)(2,1,0)[12]             : AIC=1724.794, Time=0.18 sec
 ARIMA(2,1,2)(0,1,0)[12]             : AIC=inf, Time=0.07 sec
 ARIMA(2,1,2)(0,1,1)[12]             : AIC=1725.331, Time=0.08 sec
 ARIMA(2,1,2)(1,1,0)[12]             : AIC=inf, Time=0.25 sec
 ARIMA(2,1,3)(0,1,0)[12]             : AIC=inf, Time=0.11 sec
 ARIMA(3,1,0)(0,1,0)[12]             : AIC=1739.770, Time=0.01 sec
 ARIMA(3,1,0)(0,1,1)[12]             : AIC=1724.591, Time=0.05 sec
 ARIMA(3,1,0)(0,1,2)[12]             : AIC=1724.467, Time=0.13 sec
 ARIMA(3,1,0)(1,1,0)[12]             : AIC=1738.185, Time=0.03 sec
 ARIMA(3,1,0)(1,1,1)[12]             : AIC=1725.766, Time=0.08 sec
 ARIMA(3,1,0)(2,1,0)[12]             : AIC=1724.963, Time=0.05 sec
 ARIMA(3,1,1)(0,1,0)[12]             : AIC=1738.339, Time=0.02 sec
 ARIMA(3,1,1)(0,1,1)[12]             : AIC=1726.101, Time=0.07 sec
 ARIMA(3,1,1)(1,1,0)[12]             : AIC=1737.398, Time=0.06 sec
 ARIMA(3,1,2)(0,1,0)[12]             : AIC=inf, Time=0.09 sec
 ARIMA(4,1,0)(0,1,0)[12]             : AIC=1738.851, Time=0.02 sec
 ARIMA(4,1,0)(0,1,1)[12]             : AIC=1723.257, Time=0.06 sec
 ARIMA(4,1,0)(1,1,0)[12]             : AIC=1737.427, Time=0.05 sec
 ARIMA(4,1,1)(0,1,0)[12]             : AIC=1740.138, Time=0.03 sec
 ARIMA(5,1,0)(0,1,0)[12]             : AIC=1740.185, Time=0.02 sec

Best model:  ARIMA(0,1,1)(2,1,1)[12]          
Total fit time: 6.406 seconds

print('order: ',sarima_auto_model.order)
print('seasonal order: ',sarima_auto_model.seasonal_order)

order:  (0, 1, 1)
seasonal order:  (2, 1, 1, 12)

horizon = 3
auto_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sar_auto = sm.tsa.SARIMAX(
        train['Weight'],
        order=sarima_auto_model.order,
        seasonal_order=sarima_auto_model.seasonal_order,
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = sar_auto.forecast(steps=horizon)
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        auto_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

sarima_auto_cv_df = pd.DataFrame(auto_results)
sarima_auto_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_auto_cv = sarima_auto_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_auto_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_auto_overall_MAE  = sarima_auto_cv_df['AE'].mean()
sarima_auto_overall_MAPE = sarima_auto_cv_df['APE'].mean()

print(f"MAE: {sarima_auto_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_auto_overall_MAPE:.0%}")

MAE: 22,830,464
MAPE (%): 92%

sarima_flex_full_auto_model = pm.auto_arima(data['Weight'], start_p=0, start_q=0,
                           max_p=5, max_q=5, m=12,
                           start_P=0, seasonal=True,
                           trace=True,
                           error_action='trace',
                           suppress_warnings=True,
                           stepwise=False)

 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=2177.775, Time=0.00 sec
 ARIMA(0,1,0)(0,0,1)[12] intercept   : AIC=2165.766, Time=0.02 sec
 ARIMA(0,1,0)(0,0,2)[12] intercept   : AIC=2167.578, Time=0.03 sec
 ARIMA(0,1,0)(1,0,0)[12] intercept   : AIC=2165.753, Time=0.01 sec
 ARIMA(0,1,0)(1,0,1)[12] intercept   : AIC=2167.895, Time=0.02 sec
 ARIMA(0,1,0)(1,0,2)[12] intercept   : AIC=2168.258, Time=0.10 sec
 ARIMA(0,1,0)(2,0,0)[12] intercept   : AIC=2167.717, Time=0.03 sec
 ARIMA(0,1,0)(2,0,1)[12] intercept   : AIC=2169.315, Time=0.06 sec
 ARIMA(0,1,0)(2,0,2)[12] intercept   : AIC=2169.717, Time=0.08 sec
 ARIMA(0,1,1)(0,0,0)[12] intercept   : AIC=2179.894, Time=0.00 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=2166.436, Time=0.01 sec
 ARIMA(0,1,1)(0,0,2)[12] intercept   : AIC=2168.188, Time=0.03 sec
 ARIMA(0,1,1)(1,0,0)[12] intercept   : AIC=2166.356, Time=0.01 sec
 ARIMA(0,1,1)(1,0,1)[12] intercept   : AIC=2168.056, Time=0.04 sec
 ARIMA(0,1,1)(1,0,2)[12] intercept   : AIC=2168.931, Time=0.16 sec
 ARIMA(0,1,1)(2,0,0)[12] intercept   : AIC=2168.324, Time=0.03 sec
 ARIMA(0,1,1)(2,0,1)[12] intercept   : AIC=2170.126, Time=0.08 sec
 ARIMA(0,1,1)(2,0,2)[12] intercept   : AIC=2170.268, Time=0.11 sec
 ARIMA(0,1,2)(0,0,0)[12] intercept   : AIC=2181.685, Time=0.01 sec
 ARIMA(0,1,2)(0,0,1)[12] intercept   : AIC=2169.206, Time=0.02 sec
 ARIMA(0,1,2)(0,0,2)[12] intercept   : AIC=2170.851, Time=0.04 sec
 ARIMA(0,1,2)(1,0,0)[12] intercept   : AIC=2167.322, Time=0.03 sec
 ARIMA(0,1,2)(1,0,1)[12] intercept   : AIC=2169.289, Time=0.06 sec
 ARIMA(0,1,2)(1,0,2)[12] intercept   : AIC=2170.894, Time=0.19 sec
 ARIMA(0,1,2)(2,0,0)[12] intercept   : AIC=2169.232, Time=0.08 sec
 ARIMA(0,1,2)(2,0,1)[12] intercept   : AIC=2170.714, Time=0.23 sec
 ARIMA(0,1,3)(0,0,0)[12] intercept   : AIC=inf, Time=0.02 sec
 ARIMA(0,1,3)(0,0,1)[12] intercept   : AIC=2163.536, Time=0.04 sec
 ARIMA(0,1,3)(0,0,2)[12] intercept   : AIC=2165.030, Time=0.09 sec
 ARIMA(0,1,3)(1,0,0)[12] intercept   : AIC=2163.944, Time=0.03 sec
 ARIMA(0,1,3)(1,0,1)[12] intercept   : AIC=2165.261, Time=0.08 sec
 ARIMA(0,1,3)(2,0,0)[12] intercept   : AIC=2165.940, Time=0.08 sec
 ARIMA(0,1,4)(0,0,0)[12] intercept   : AIC=2190.432, Time=0.01 sec
 ARIMA(0,1,4)(0,0,1)[12] intercept   : AIC=2166.588, Time=0.03 sec
 ARIMA(0,1,4)(1,0,0)[12] intercept   : AIC=2170.921, Time=0.02 sec
 ARIMA(0,1,5)(0,0,0)[12] intercept   : AIC=2207.147, Time=0.01 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=2179.898, Time=0.00 sec
 ARIMA(1,1,0)(0,0,1)[12] intercept   : AIC=2166.756, Time=0.01 sec
 ARIMA(1,1,0)(0,0,2)[12] intercept   : AIC=2168.507, Time=0.03 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=2166.761, Time=0.01 sec
 ARIMA(1,1,0)(1,0,1)[12] intercept   : AIC=2168.404, Time=0.04 sec
 ARIMA(1,1,0)(1,0,2)[12] intercept   : AIC=2169.811, Time=0.09 sec
 ARIMA(1,1,0)(2,0,0)[12] intercept   : AIC=2168.726, Time=0.03 sec
 ARIMA(1,1,0)(2,0,1)[12] intercept   : AIC=2170.041, Time=0.14 sec
 ARIMA(1,1,0)(2,0,2)[12] intercept   : AIC=2171.055, Time=0.10 sec
 ARIMA(1,1,1)(0,0,0)[12] intercept   : AIC=2181.893, Time=0.01 sec
 ARIMA(1,1,1)(0,0,1)[12] intercept   : AIC=2165.653, Time=0.06 sec
 ARIMA(1,1,1)(0,0,2)[12] intercept   : AIC=2167.068, Time=0.12 sec
 ARIMA(1,1,1)(1,0,0)[12] intercept   : AIC=2166.422, Time=0.04 sec
 ARIMA(1,1,1)(1,0,1)[12] intercept   : AIC=2167.478, Time=0.09 sec
 ARIMA(1,1,1)(1,0,2)[12] intercept   : AIC=2170.507, Time=0.18 sec
 ARIMA(1,1,1)(2,0,0)[12] intercept   : AIC=2168.412, Time=0.12 sec
 ARIMA(1,1,1)(2,0,1)[12] intercept   : AIC=2171.732, Time=0.08 sec
 ARIMA(1,1,2)(0,0,0)[12] intercept   : AIC=2176.453, Time=0.02 sec
 ARIMA(1,1,2)(0,0,1)[12] intercept   : AIC=2166.681, Time=0.03 sec
 ARIMA(1,1,2)(0,0,2)[12] intercept   : AIC=2168.227, Time=0.09 sec
 ARIMA(1,1,2)(1,0,0)[12] intercept   : AIC=2167.104, Time=0.04 sec
 ARIMA(1,1,2)(1,0,1)[12] intercept   : AIC=2168.535, Time=0.06 sec
 ARIMA(1,1,2)(2,0,0)[12] intercept   : AIC=2169.104, Time=0.08 sec
 ARIMA(1,1,3)(0,0,0)[12] intercept   : AIC=2178.578, Time=0.01 sec
 ARIMA(1,1,3)(0,0,1)[12] intercept   : AIC=2165.220, Time=0.03 sec
 ARIMA(1,1,3)(1,0,0)[12] intercept   : AIC=2166.425, Time=0.04 sec
 ARIMA(1,1,4)(0,0,0)[12] intercept   : AIC=2194.552, Time=0.02 sec
 ARIMA(2,1,0)(0,0,0)[12] intercept   : AIC=2181.819, Time=0.01 sec
 ARIMA(2,1,0)(0,0,1)[12] intercept   : AIC=2168.990, Time=0.01 sec
 ARIMA(2,1,0)(0,0,2)[12] intercept   : AIC=2170.728, Time=0.04 sec
 ARIMA(2,1,0)(1,0,0)[12] intercept   : AIC=2168.859, Time=0.02 sec
 ARIMA(2,1,0)(1,0,1)[12] intercept   : AIC=2170.606, Time=0.05 sec
 ARIMA(2,1,0)(1,0,2)[12] intercept   : AIC=2172.016, Time=0.10 sec
 ARIMA(2,1,0)(2,0,0)[12] intercept   : AIC=2170.848, Time=0.04 sec
 ARIMA(2,1,0)(2,0,1)[12] intercept   : AIC=2172.192, Time=0.15 sec
 ARIMA(2,1,1)(0,0,0)[12] intercept   : AIC=2178.184, Time=0.01 sec
 ARIMA(2,1,1)(0,0,1)[12] intercept   : AIC=2166.520, Time=0.03 sec
 ARIMA(2,1,1)(0,0,2)[12] intercept   : AIC=2168.028, Time=0.07 sec
 ARIMA(2,1,1)(1,0,0)[12] intercept   : AIC=2167.254, Time=0.03 sec
 ARIMA(2,1,1)(1,0,1)[12] intercept   : AIC=2168.418, Time=0.05 sec
 ARIMA(2,1,1)(2,0,0)[12] intercept   : AIC=2169.253, Time=0.08 sec
 ARIMA(2,1,2)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(2,1,2)(0,0,1)[12] intercept   : AIC=2168.122, Time=0.06 sec
 ARIMA(2,1,2)(1,0,0)[12] intercept   : AIC=2168.469, Time=0.07 sec
 ARIMA(2,1,3)(0,0,0)[12] intercept   : AIC=2181.756, Time=0.03 sec
 ARIMA(3,1,0)(0,0,0)[12] intercept   : AIC=2183.352, Time=0.01 sec
 ARIMA(3,1,0)(0,0,1)[12] intercept   : AIC=2169.259, Time=0.02 sec
 ARIMA(3,1,0)(0,0,2)[12] intercept   : AIC=2170.921, Time=0.05 sec
 ARIMA(3,1,0)(1,0,0)[12] intercept   : AIC=2168.899, Time=0.02 sec
 ARIMA(3,1,0)(1,0,1)[12] intercept   : AIC=2170.722, Time=0.09 sec
 ARIMA(3,1,0)(2,0,0)[12] intercept   : AIC=2170.885, Time=0.05 sec
 ARIMA(3,1,1)(0,0,0)[12] intercept   : AIC=2175.367, Time=0.02 sec
 ARIMA(3,1,1)(0,0,1)[12] intercept   : AIC=2166.695, Time=0.05 sec
 ARIMA(3,1,1)(1,0,0)[12] intercept   : AIC=2167.554, Time=0.05 sec
 ARIMA(3,1,2)(0,0,0)[12] intercept   : AIC=2172.477, Time=0.04 sec
 ARIMA(4,1,0)(0,0,0)[12] intercept   : AIC=2181.824, Time=0.01 sec
 ARIMA(4,1,0)(0,0,1)[12] intercept   : AIC=2168.351, Time=0.03 sec
 ARIMA(4,1,0)(1,0,0)[12] intercept   : AIC=2167.570, Time=0.03 sec
 ARIMA(4,1,1)(0,0,0)[12] intercept   : AIC=2173.096, Time=0.02 sec
 ARIMA(5,1,0)(0,0,0)[12] intercept   : AIC=2183.108, Time=0.01 sec

Best model:  ARIMA(0,1,3)(0,0,1)[12] intercept
Total fit time: 4.848 seconds

print('order: ',sarima_flex_full_auto_model.order)
print('seasonal order: ',sarima_flex_full_auto_model.seasonal_order)

order:  (0, 1, 3)
seasonal order:  (0, 0, 1, 12)

horizon = 3
flex_full_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sar_flex_full = sm.tsa.SARIMAX(
        train['Weight'],
        order=sarima_flex_full_auto_model.order,
        seasonal_order=sarima_flex_full_auto_model.seasonal_order,
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = sar_flex_full.forecast(steps=horizon)
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        flex_full_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

sarima_flex_full_cv_df = pd.DataFrame(flex_full_results)
sarima_flex_full_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_flex_full_cv = sarima_flex_full_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_flex_full_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_flex_full_overall_MAE  = sarima_flex_full_cv_df['AE'].mean()
sarima_flex_full_overall_MAPE = sarima_flex_full_cv_df['APE'].mean()

print(f"MAE: {sarima_flex_full_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_flex_full_overall_MAPE:.0%}")

MAE: 9,843,491
MAPE (%): 33%

decomp_raw = seasonal_decompose(data['Weight'], model='multiplicative', period=12)

data['Weight_log'] = np.log1p(data['Weight'])
decomp_log = seasonal_decompose(data['Weight_log'], model='additive', period=12)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

decomp_raw.trend.plot(ax=axes[0,0], color='black'); axes[0,0].set_title('Raw Trend')
decomp_log.trend.plot(ax=axes[0,1], color='black'); axes[0,1].set_title('Log Trend')
decomp_raw.seasonal.plot(ax=axes[1,0], color='black'); axes[1,0].set_title('Raw Seasonal')
decomp_log.seasonal.plot(ax=axes[1,1], color='black'); axes[1,1].set_title('Log Seasonal')

plt.tight_layout()
plt.show()

sarima_log_auto = pm.auto_arima(np.log1p(data['Weight']),
                           start_p=0, start_q=0,
                           max_p=5, max_q=5, m=12,
                           start_P=0, seasonal=True,
                           d=1, D=1,
                           trace=True,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=False)

 ARIMA(0,1,0)(0,1,0)[12]             : AIC=53.203, Time=0.01 sec
 ARIMA(0,1,0)(0,1,1)[12]             : AIC=inf, Time=0.04 sec
 ARIMA(0,1,0)(0,1,2)[12]             : AIC=inf, Time=0.13 sec
 ARIMA(0,1,0)(1,1,0)[12]             : AIC=53.361, Time=0.02 sec
 ARIMA(0,1,0)(1,1,1)[12]             : AIC=inf, Time=0.09 sec
 ARIMA(0,1,0)(1,1,2)[12]             : AIC=inf, Time=0.30 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=42.788, Time=0.07 sec
 ARIMA(0,1,0)(2,1,1)[12]             : AIC=inf, Time=0.26 sec
 ARIMA(0,1,0)(2,1,2)[12]             : AIC=42.631, Time=0.20 sec
 ARIMA(0,1,1)(0,1,0)[12]             : AIC=49.652, Time=0.01 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=inf, Time=0.07 sec
 ARIMA(0,1,1)(0,1,2)[12]             : AIC=inf, Time=0.27 sec
 ARIMA(0,1,1)(1,1,0)[12]             : AIC=49.971, Time=0.03 sec
 ARIMA(0,1,1)(1,1,1)[12]             : AIC=inf, Time=0.11 sec
 ARIMA(0,1,1)(1,1,2)[12]             : AIC=inf, Time=0.26 sec
 ARIMA(0,1,1)(2,1,0)[12]             : AIC=41.387, Time=0.12 sec
 ARIMA(0,1,1)(2,1,1)[12]             : AIC=inf, Time=0.29 sec
 ARIMA(0,1,1)(2,1,2)[12]             : AIC=40.412, Time=0.43 sec
 ARIMA(0,1,2)(0,1,0)[12]             : AIC=50.687, Time=0.01 sec
 ARIMA(0,1,2)(0,1,1)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(0,1,2)(0,1,2)[12]             : AIC=inf, Time=0.18 sec
 ARIMA(0,1,2)(1,1,0)[12]             : AIC=50.265, Time=0.03 sec
 ARIMA(0,1,2)(1,1,1)[12]             : AIC=inf, Time=0.20 sec
 ARIMA(0,1,2)(1,1,2)[12]             : AIC=inf, Time=0.14 sec
 ARIMA(0,1,2)(2,1,0)[12]             : AIC=42.050, Time=0.14 sec
 ARIMA(0,1,2)(2,1,1)[12]             : AIC=inf, Time=0.41 sec
 ARIMA(0,1,3)(0,1,0)[12]             : AIC=50.819, Time=0.03 sec
 ARIMA(0,1,3)(0,1,1)[12]             : AIC=inf, Time=0.23 sec
 ARIMA(0,1,3)(0,1,2)[12]             : AIC=inf, Time=0.61 sec
 ARIMA(0,1,3)(1,1,0)[12]             : AIC=50.793, Time=0.05 sec
 ARIMA(0,1,3)(1,1,1)[12]             : AIC=inf, Time=0.17 sec
 ARIMA(0,1,3)(2,1,0)[12]             : AIC=43.785, Time=0.22 sec
 ARIMA(0,1,4)(0,1,0)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(0,1,4)(0,1,1)[12]             : AIC=inf, Time=0.26 sec
 ARIMA(0,1,4)(1,1,0)[12]             : AIC=inf, Time=0.23 sec
 ARIMA(0,1,5)(0,1,0)[12]             : AIC=inf, Time=0.11 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=50.840, Time=0.01 sec
 ARIMA(1,1,0)(0,1,1)[12]             : AIC=inf, Time=0.11 sec
 ARIMA(1,1,0)(0,1,2)[12]             : AIC=inf, Time=0.22 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=51.450, Time=0.02 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=inf, Time=0.13 sec
 ARIMA(1,1,0)(1,1,2)[12]             : AIC=inf, Time=0.37 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=42.415, Time=0.08 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=inf, Time=0.33 sec
 ARIMA(1,1,0)(2,1,2)[12]             : AIC=42.016, Time=0.20 sec
 ARIMA(1,1,1)(0,1,0)[12]             : AIC=inf, Time=0.05 sec
 ARIMA(1,1,1)(0,1,1)[12]             : AIC=inf, Time=0.12 sec
 ARIMA(1,1,1)(0,1,2)[12]             : AIC=inf, Time=0.31 sec
 ARIMA(1,1,1)(1,1,0)[12]             : AIC=inf, Time=0.09 sec
 ARIMA(1,1,1)(1,1,1)[12]             : AIC=inf, Time=0.21 sec
 ARIMA(1,1,1)(1,1,2)[12]             : AIC=inf, Time=0.50 sec
 ARIMA(1,1,1)(2,1,0)[12]             : AIC=inf, Time=0.42 sec
 ARIMA(1,1,1)(2,1,1)[12]             : AIC=inf, Time=0.59 sec
 ARIMA(1,1,2)(0,1,0)[12]             : AIC=inf, Time=0.06 sec
 ARIMA(1,1,2)(0,1,1)[12]             : AIC=inf, Time=0.23 sec
 ARIMA(1,1,2)(0,1,2)[12]             : AIC=inf, Time=0.27 sec
 ARIMA(1,1,2)(1,1,0)[12]             : AIC=inf, Time=0.12 sec
 ARIMA(1,1,2)(1,1,1)[12]             : AIC=inf, Time=0.26 sec
 ARIMA(1,1,2)(2,1,0)[12]             : AIC=inf, Time=0.57 sec
 ARIMA(1,1,3)(0,1,0)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(1,1,3)(0,1,1)[12]             : AIC=inf, Time=0.19 sec
 ARIMA(1,1,3)(1,1,0)[12]             : AIC=inf, Time=0.21 sec
 ARIMA(1,1,4)(0,1,0)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(2,1,0)(0,1,0)[12]             : AIC=52.606, Time=0.01 sec
 ARIMA(2,1,0)(0,1,1)[12]             : AIC=inf, Time=0.10 sec
 ARIMA(2,1,0)(0,1,2)[12]             : AIC=inf, Time=0.25 sec
 ARIMA(2,1,0)(1,1,0)[12]             : AIC=53.020, Time=0.03 sec
 ARIMA(2,1,0)(1,1,1)[12]             : AIC=inf, Time=0.24 sec
 ARIMA(2,1,0)(1,1,2)[12]             : AIC=inf, Time=0.28 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=43.435, Time=0.13 sec
 ARIMA(2,1,0)(2,1,1)[12]             : AIC=inf, Time=0.36 sec
 ARIMA(2,1,1)(0,1,0)[12]             : AIC=inf, Time=0.05 sec
 ARIMA(2,1,1)(0,1,1)[12]             : AIC=inf, Time=0.16 sec
 ARIMA(2,1,1)(0,1,2)[12]             : AIC=inf, Time=0.37 sec
 ARIMA(2,1,1)(1,1,0)[12]             : AIC=inf, Time=0.20 sec
 ARIMA(2,1,1)(1,1,1)[12]             : AIC=inf, Time=0.22 sec
 ARIMA(2,1,1)(2,1,0)[12]             : AIC=inf, Time=0.44 sec
 ARIMA(2,1,2)(0,1,0)[12]             : AIC=inf, Time=0.09 sec
 ARIMA(2,1,2)(0,1,1)[12]             : AIC=inf, Time=0.25 sec
 ARIMA(2,1,2)(1,1,0)[12]             : AIC=inf, Time=0.24 sec
 ARIMA(2,1,3)(0,1,0)[12]             : AIC=inf, Time=0.17 sec
 ARIMA(3,1,0)(0,1,0)[12]             : AIC=53.024, Time=0.01 sec
 ARIMA(3,1,0)(0,1,1)[12]             : AIC=inf, Time=0.11 sec
 ARIMA(3,1,0)(0,1,2)[12]             : AIC=inf, Time=0.41 sec
 ARIMA(3,1,0)(1,1,0)[12]             : AIC=53.495, Time=0.03 sec
 ARIMA(3,1,0)(1,1,1)[12]             : AIC=inf, Time=0.18 sec
 ARIMA(3,1,0)(2,1,0)[12]             : AIC=44.831, Time=0.10 sec
 ARIMA(3,1,1)(0,1,0)[12]             : AIC=52.793, Time=0.03 sec
 ARIMA(3,1,1)(0,1,1)[12]             : AIC=inf, Time=0.20 sec
 ARIMA(3,1,1)(1,1,0)[12]             : AIC=52.657, Time=0.08 sec
 ARIMA(3,1,2)(0,1,0)[12]             : AIC=inf, Time=0.09 sec
 ARIMA(4,1,0)(0,1,0)[12]             : AIC=51.680, Time=0.01 sec
 ARIMA(4,1,0)(0,1,1)[12]             : AIC=inf, Time=0.25 sec
 ARIMA(4,1,0)(1,1,0)[12]             : AIC=51.126, Time=0.06 sec
 ARIMA(4,1,1)(0,1,0)[12]             : AIC=53.524, Time=0.03 sec
 ARIMA(5,1,0)(0,1,0)[12]             : AIC=53.380, Time=0.02 sec

Best model:  ARIMA(0,1,1)(2,1,2)[12]          
Total fit time: 17.021 seconds

print('order: ',sarima_log_auto.order)
print('seasonal order: ',sarima_log_auto.seasonal_order)

order:  (0, 1, 1)
seasonal order:  (2, 1, 2, 12)

horizon = 3
log_auto_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sarima_log_auto_model = sm.tsa.SARIMAX(
        np.log1p(train['Weight']),
        order=sarima_log_auto.order,
        seasonal_order=sarima_log_auto.seasonal_order,
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = np.expm1(sarima_log_auto_model.forecast(steps=horizon))
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        log_auto_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

sarima_log_auto_cv_df = pd.DataFrame(log_auto_results)
sarima_log_auto_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_log_auto_cv = sarima_log_auto_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_log_auto_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_log_auto_overall_MAE  = sarima_log_auto_cv_df['AE'].mean()
sarima_log_auto_overall_MAPE = sarima_log_auto_cv_df['APE'].mean()

print(f"MAE: {sarima_log_auto_overall_MAE:,.0f}")
print(f"MAPE: {sarima_log_auto_overall_MAPE:.0%}")

MAE: 9,876,261
MAPE: 27%

sarima_log_full_flex = pm.auto_arima(np.log1p(data['Weight']),
                           start_p=0, start_q=0,
                           max_p=5, max_q=5, m=12,
                           start_P=0, seasonal=True,
                           trace=True,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=False)

 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=74.720, Time=0.01 sec
 ARIMA(0,1,0)(0,0,1)[12] intercept   : AIC=58.069, Time=0.02 sec
 ARIMA(0,1,0)(0,0,2)[12] intercept   : AIC=60.069, Time=0.07 sec
 ARIMA(0,1,0)(1,0,0)[12] intercept   : AIC=59.645, Time=0.02 sec
 ARIMA(0,1,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.13 sec
 ARIMA(0,1,0)(1,0,2)[12] intercept   : AIC=inf, Time=0.21 sec
 ARIMA(0,1,0)(2,0,0)[12] intercept   : AIC=61.620, Time=0.06 sec
 ARIMA(0,1,0)(2,0,1)[12] intercept   : AIC=inf, Time=0.11 sec
 ARIMA(0,1,0)(2,0,2)[12] intercept   : AIC=inf, Time=0.23 sec
 ARIMA(0,1,1)(0,0,0)[12] intercept   : AIC=76.596, Time=0.01 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=59.177, Time=0.03 sec
 ARIMA(0,1,1)(0,0,2)[12] intercept   : AIC=61.165, Time=0.07 sec
 ARIMA(0,1,1)(1,0,0)[12] intercept   : AIC=59.840, Time=0.03 sec
 ARIMA(0,1,1)(1,0,1)[12] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,1,1)(1,0,2)[12] intercept   : AIC=inf, Time=0.28 sec
 ARIMA(0,1,1)(2,0,0)[12] intercept   : AIC=61.839, Time=0.06 sec
 ARIMA(0,1,1)(2,0,1)[12] intercept   : AIC=60.055, Time=0.16 sec
 ARIMA(0,1,1)(2,0,2)[12] intercept   : AIC=inf, Time=0.30 sec
 ARIMA(0,1,2)(0,0,0)[12] intercept   : AIC=78.331, Time=0.01 sec
 ARIMA(0,1,2)(0,0,1)[12] intercept   : AIC=60.751, Time=0.04 sec
 ARIMA(0,1,2)(0,0,2)[12] intercept   : AIC=62.720, Time=0.09 sec
 ARIMA(0,1,2)(1,0,0)[12] intercept   : AIC=inf, Time=0.10 sec
 ARIMA(0,1,2)(1,0,1)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(0,1,2)(1,0,2)[12] intercept   : AIC=inf, Time=0.33 sec
 ARIMA(0,1,2)(2,0,0)[12] intercept   : AIC=inf, Time=0.24 sec
 ARIMA(0,1,2)(2,0,1)[12] intercept   : AIC=inf, Time=0.29 sec
 ARIMA(0,1,3)(0,0,0)[12] intercept   : AIC=inf, Time=0.07 sec
 ARIMA(0,1,3)(0,0,1)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(0,1,3)(0,0,2)[12] intercept   : AIC=inf, Time=0.23 sec
 ARIMA(0,1,3)(1,0,0)[12] intercept   : AIC=inf, Time=0.11 sec
 ARIMA(0,1,3)(1,0,1)[12] intercept   : AIC=inf, Time=0.16 sec
 ARIMA(0,1,3)(2,0,0)[12] intercept   : AIC=inf, Time=0.31 sec
 ARIMA(0,1,4)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(0,1,4)(0,0,1)[12] intercept   : AIC=inf, Time=0.13 sec
 ARIMA(0,1,4)(1,0,0)[12] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,1,5)(0,0,0)[12] intercept   : AIC=inf, Time=0.06 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=76.610, Time=0.01 sec
 ARIMA(1,1,0)(0,0,1)[12] intercept   : AIC=59.273, Time=0.03 sec
 ARIMA(1,1,0)(0,0,2)[12] intercept   : AIC=61.265, Time=0.06 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=60.149, Time=0.02 sec
 ARIMA(1,1,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.11 sec
 ARIMA(1,1,0)(1,0,2)[12] intercept   : AIC=inf, Time=0.24 sec
 ARIMA(1,1,0)(2,0,0)[12] intercept   : AIC=62.148, Time=0.07 sec
 ARIMA(1,1,0)(2,0,1)[12] intercept   : AIC=inf, Time=0.17 sec
 ARIMA(1,1,0)(2,0,2)[12] intercept   : AIC=inf, Time=0.29 sec
 ARIMA(1,1,1)(0,0,0)[12] intercept   : AIC=inf, Time=0.03 sec
 ARIMA(1,1,1)(0,0,1)[12] intercept   : AIC=inf, Time=0.09 sec
 ARIMA(1,1,1)(0,0,2)[12] intercept   : AIC=inf, Time=0.20 sec
 ARIMA(1,1,1)(1,0,0)[12] intercept   : AIC=inf, Time=0.09 sec
 ARIMA(1,1,1)(1,0,1)[12] intercept   : AIC=55.142, Time=0.11 sec
 ARIMA(1,1,1)(1,0,2)[12] intercept   : AIC=inf, Time=0.29 sec
 ARIMA(1,1,1)(2,0,0)[12] intercept   : AIC=inf, Time=0.24 sec
 ARIMA(1,1,1)(2,0,1)[12] intercept   : AIC=inf, Time=0.32 sec
 ARIMA(1,1,2)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(1,1,2)(0,0,1)[12] intercept   : AIC=inf, Time=0.10 sec
 ARIMA(1,1,2)(0,0,2)[12] intercept   : AIC=inf, Time=0.22 sec
 ARIMA(1,1,2)(1,0,0)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(1,1,2)(1,0,1)[12] intercept   : AIC=inf, Time=0.13 sec
 ARIMA(1,1,2)(2,0,0)[12] intercept   : AIC=55.573, Time=0.27 sec
 ARIMA(1,1,3)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(1,1,3)(0,0,1)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(1,1,3)(1,0,0)[12] intercept   : AIC=inf, Time=0.13 sec
 ARIMA(1,1,4)(0,0,0)[12] intercept   : AIC=inf, Time=0.05 sec
 ARIMA(2,1,0)(0,0,0)[12] intercept   : AIC=78.450, Time=0.01 sec
 ARIMA(2,1,0)(0,0,1)[12] intercept   : AIC=61.165, Time=0.04 sec
 ARIMA(2,1,0)(0,0,2)[12] intercept   : AIC=63.157, Time=0.08 sec
 ARIMA(2,1,0)(1,0,0)[12] intercept   : AIC=61.923, Time=0.04 sec
 ARIMA(2,1,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(2,1,0)(1,0,2)[12] intercept   : AIC=inf, Time=0.32 sec
 ARIMA(2,1,0)(2,0,0)[12] intercept   : AIC=63.922, Time=0.08 sec
 ARIMA(2,1,0)(2,0,1)[12] intercept   : AIC=inf, Time=0.29 sec
 ARIMA(2,1,1)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(2,1,1)(0,0,1)[12] intercept   : AIC=inf, Time=0.08 sec
 ARIMA(2,1,1)(0,0,2)[12] intercept   : AIC=inf, Time=0.21 sec
 ARIMA(2,1,1)(1,0,0)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(2,1,1)(1,0,1)[12] intercept   : AIC=inf, Time=0.14 sec
 ARIMA(2,1,1)(2,0,0)[12] intercept   : AIC=inf, Time=0.29 sec
 ARIMA(2,1,2)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(2,1,2)(0,0,1)[12] intercept   : AIC=inf, Time=0.11 sec
 ARIMA(2,1,2)(1,0,0)[12] intercept   : AIC=inf, Time=0.10 sec
 ARIMA(2,1,3)(0,0,0)[12] intercept   : AIC=inf, Time=0.05 sec
 ARIMA(3,1,0)(0,0,0)[12] intercept   : AIC=80.360, Time=0.01 sec
 ARIMA(3,1,0)(0,0,1)[12] intercept   : AIC=62.262, Time=0.05 sec
 ARIMA(3,1,0)(0,0,2)[12] intercept   : AIC=64.229, Time=0.09 sec
 ARIMA(3,1,0)(1,0,0)[12] intercept   : AIC=62.851, Time=0.05 sec
 ARIMA(3,1,0)(1,0,1)[12] intercept   : AIC=inf, Time=0.16 sec
 ARIMA(3,1,0)(2,0,0)[12] intercept   : AIC=64.851, Time=0.12 sec
 ARIMA(3,1,1)(0,0,0)[12] intercept   : AIC=inf, Time=0.05 sec
 ARIMA(3,1,1)(0,0,1)[12] intercept   : AIC=inf, Time=0.17 sec
 ARIMA(3,1,1)(1,0,0)[12] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(3,1,2)(0,0,0)[12] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(4,1,0)(0,0,0)[12] intercept   : AIC=81.534, Time=0.01 sec
 ARIMA(4,1,0)(0,0,1)[12] intercept   : AIC=63.438, Time=0.05 sec
 ARIMA(4,1,0)(1,0,0)[12] intercept   : AIC=62.527, Time=0.12 sec
 ARIMA(4,1,1)(0,0,0)[12] intercept   : AIC=inf, Time=0.08 sec
 ARIMA(5,1,0)(0,0,0)[12] intercept   : AIC=82.538, Time=0.02 sec

Best model:  ARIMA(1,1,1)(1,0,1)[12] intercept
Total fit time: 11.417 seconds

print('order: ',sarima_log_full_flex.order)
print('seasonal order: ',sarima_log_full_flex.seasonal_order)

order:  (1, 1, 1)
seasonal order:  (1, 0, 1, 12)

horizon = 3
log_full_flex_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sarima_log_full_flex_model = sm.tsa.SARIMAX(
        np.log1p(train['Weight']),
        order=sarima_log_full_flex.order,
        seasonal_order=sarima_log_full_flex.seasonal_order,
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = np.expm1(sarima_log_full_flex_model.forecast(steps=horizon))
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        log_full_flex_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape
        })

sarima_log_full_flex_cv_df = pd.DataFrame(log_full_flex_results)
sarima_log_full_flex_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_log_full_flex_cv = sarima_log_full_flex_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_log_full_flex_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_log_full_flex_overall_MAE  = sarima_log_full_flex_cv_df['AE'].mean()
sarima_log_full_flex_overall_MAPE = sarima_log_full_flex_cv_df['APE'].mean()

print(f"MAE: {sarima_log_full_flex_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_log_full_flex_overall_MAPE:.0%}")

MAE: 9,324,128
MAPE (%): 26%

sarima_log_manual_order = (2,1,1)
sarima_log_manual_seasonal_order = (1,1,1,12)

horizon = 3
log_manual_results = []

for cutoff in range(44, 60):
    if cutoff + horizon >= len(data):
        break

    train = data.iloc[:cutoff+1]

    sarima_log_manual_model = sm.tsa.SARIMAX(
        np.log1p(train['Weight']),
        order=sarima_log_manual_order,
        seasonal_order=sarima_log_manual_seasonal_order,
        trend='c',
        use_exact_diffuse=True
    ).fit(disp=False)

    yhat = np.expm1(sarima_log_manual_model.forecast(steps=horizon))
    y_true = data['Weight'].iloc[cutoff+1: cutoff+1 + horizon]

    for h in range(horizon):
        ae = abs(yhat.iloc[h] - y_true.iloc[h])
        ape = ae / max(abs(y_true.iloc[h]), 1e-8)

        log_manual_results.append({
            'cutoff': cutoff,
            'horizon': h + 1,
            'cutoff_month': data['Import Date'].iloc[cutoff].to_period('M').to_timestamp(),
            'month': data['Import Date'].iloc[cutoff + h + 1].to_period('M').to_timestamp(),
            'y': y_true.iloc[h],
            'yhat': yhat.iloc[h],
            'AE': ae,
            'APE': ape

        })

sarima_log_manual_cv_df = pd.DataFrame(log_manual_results)
sarima_log_manual_cv_df.style.format({
    'cutoff_month': '{:%m-%d-%Y}',
    'month': '{:%m-%d-%Y}',
    'y': '{:,.0f}',
    'yhat': '{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

sarima_log_manual_cv = sarima_log_manual_cv_df.groupby('horizon').agg(
    MAE=('AE', 'mean'),
    MAPE=('APE', 'mean'))

sarima_log_manual_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_log_manual_overall_MAE  = sarima_log_manual_cv_df['AE'].mean()
sarima_log_manual_overall_MAPE = sarima_log_manual_cv_df['APE'].mean()

print(f"MAE: {sarima_log_manual_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_log_manual_overall_MAPE:.0%}")

MAE: 10,270,420
MAPE (%): 29%

final_order = sarima_log_auto_model.model.order
final_seasonal_order = sarima_log_auto_model.model.seasonal_order

print(final_order)
print(final_seasonal_order)

(0, 1, 1)
(2, 1, 2, 12)

sarima_final_cv = sarima_log_auto_cv

sarima_final_cv.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}'})

sarima_final_overall_MAE = sarima_log_auto_overall_MAE
sarima_final_overall_MAPE = sarima_log_auto_overall_MAPE

print(f"MAE: {sarima_final_overall_MAE:,.0f}")
print(f"MAPE (%): {sarima_final_overall_MAPE:.0%}")

MAE: 9,876,261
MAPE (%): 27%

sar_final = sm.tsa.SARIMAX(
    np.log1p(data['Weight']),
    order=final_order,
    seasonal_order=final_seasonal_order,
    trend='c',
    use_exact_diffuse=True
).fit(disp=False)

fitted_sar_final_log = sar_final.fittedvalues

log_min, log_max = np.percentile(np.log1p(data['Weight']), [1, 99])
fitted_sar_final_log = np.clip(fitted_sar_final_log, log_min, log_max)
fitted_sar_final = np.expm1(fitted_sar_final_log)

fitted_dates = data['Import Date'].iloc[-len(fitted_sar_final):].reset_index(drop=True)

forecast_steps = 3
forecast_obj = sar_final.get_forecast(steps=forecast_steps)
forecast_mean_log = forecast_obj.predicted_mean
forecast_mean = np.expm1(forecast_mean_log)
conf_int_log = forecast_obj.conf_int(alpha=0.1)
lower = np.expm1(conf_int_log.iloc[:, 0])
upper = np.expm1(conf_int_log.iloc[:, 1])

last_date = data['Import Date'].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.offsets.MonthBegin(1), periods=forecast_steps, freq='MS')

combined_preds_sar_final = np.concatenate([fitted_sar_final.values, forecast_mean.values])
combined_dates_sar_final = pd.concat([fitted_dates, pd.Series(future_dates)], ignore_index=True)

plt.figure(figsize=(12, 5))
plt.plot(data['Import Date'], data['Weight'], color=custom_palette[2], label="actual")
plt.plot(combined_dates_sar_final, combined_preds_sar_final, color=custom_palette[0], linestyle='--', label="model fit + forecast")
plt.fill_between(future_dates, lower, upper, color=custom_palette[0], alpha=0.2, label='80% Confidence Interval')
plt.axvspan(future_dates[0], future_dates[-1], color=custom_palette[0], alpha=0.1)
plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{int(x/1e6)}M'))
plt.legend(loc='upper left')
plt.title("SARIMA Model Final - Short Grid Search (Log Data)")
plt.xlabel("Date")
plt.ylabel("Weight")
plt.grid(alpha=0.3)
plt.show()

sar_final.plot_diagnostics(figsize = (15,8),lags = 12);

sar_final.plot_diagnostics(figsize=(15,8), lags=24)

sar_final.summary()

norm_val, norm_p, skew, kurtosis = sar_final.test_normality('jarquebera')[0]
lb_val, lb_p = sar_final.test_serial_correlation(method='ljungbox',)[0]
het_val, het_p = sar_final.test_heteroskedasticity('breakvar')[0]

# Ljung-Box: look at largest lag
lb_val = lb_val[-1]
lb_p = lb_p[-1]
durbin_watson = sm.stats.stattools.durbin_watson(
    sar_final.filter_results.standardized_forecasts_error[0, sar_final.loglikelihood_burn:])

print('Normality: val={:.3f}, p={:.3f}'.format(norm_val, norm_p));
print('Ljung-Box: val={:.3f}, p={:.3f}'.format(lb_val, lb_p));
print('Heteroskedasticity: val={:.3f}, p={:.3f}'.format(het_val, het_p));
print('Durbin-Watson: d={:.2f}'.format(durbin_watson))

Normality: val=1.326, p=0.515
Ljung-Box: val=6.658, p=0.673
Heteroskedasticity: val=0.635, p=0.373
Durbin-Watson: d=1.95

df=monthly_df.copy()
df['ds'] = df['Import Date']
df['y'] = df['Weight']

df.drop(df.columns[0:6], axis=1, inplace=True)

df.head()

m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
m.fit(df)

future = m.make_future_dataframe(periods=3, freq='MS')
print(future.head())
print(future.tail())

          ds
0 2020-01-01
1 2020-02-01
2 2020-03-01
3 2020-04-01
4 2020-05-01
           ds
58 2024-11-01
59 2024-12-01
60 2025-01-01
61 2025-02-01
62 2025-03-01

forecast = m.predict(future)
print(forecast.columns)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Index(['ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper',
       'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
       'yearly', 'yearly_lower', 'yearly_upper', 'multiplicative_terms',
       'multiplicative_terms_lower', 'multiplicative_terms_upper', 'yhat'],
      dtype='object')

plt.figure(figsize=(12, 6))

plt.plot(df["ds"], df["y"], label="Actual", color=custom_palette[2])

plt.plot(forecast["ds"], forecast["yhat"], label="Forecast", color=custom_palette[0], linestyle='--')

future_forecast = forecast[forecast['ds'] > df['ds'].max()]
plt.fill_between(
    future_forecast['ds'],
    future_forecast['yhat_lower'],
    future_forecast['yhat_upper'],
    color=custom_palette[0], alpha=0.2, label='80% Confidence Interval'
)

ax = plt.gca()
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

plt.legend()
plt.ylabel("Cocoa Bean Imports (lbs)")
plt.xlabel("Date")
plt.title("Prophet")
plt.show()

fig = m.plot_components(forecast)

for ax in fig.axes:
    for line in ax.get_lines():
        line.set_color('black')
plt.show()

period = "1MS"  # Month start
cutoffs = pd.date_range(start="2023-09-01", end="2024-07-01", freq=period) # End is last starting date
horizon = "93 days"  # Forecast horizon for each cutoff
prophet_cv = cross_validation(m, cutoffs=cutoffs, horizon=horizon, disable_tqdm=True)

prophet_cv['AE'] = abs(prophet_cv['yhat'] - prophet_cv['y'])
prophet_cv['APE'] = abs(prophet_cv['yhat'] - prophet_cv['y']) / abs(prophet_cv['y'])

prophet_cv['ds'] = pd.to_datetime(prophet_cv['ds'])
prophet_cv['cutoff'] = pd.to_datetime(prophet_cv['cutoff'])
prophet_cv['horizon'] = prophet_cv['ds'].dt.to_period('M').astype(int) - prophet_cv['cutoff'].dt.to_period('M').astype(int)

prophet_cv.style.format({'ds':'{:%m-%d-%Y}','cutoff':'{:%m-%d-%Y}','y':'{:,.0f}','yhat':'{:,.0f}','yhat_lower':'{:,.0f}','yhat_upper':'{:,.0f}','AE': '{:,.0f}', 'APE': '{:.2f}'})

16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:56 - cmdstanpy - INFO - Chain [1] done processing
16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:56 - cmdstanpy - INFO - Chain [1] done processing
16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:56 - cmdstanpy - INFO - Chain [1] done processing
16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:56 - cmdstanpy - INFO - Chain [1] done processing
16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:57 - cmdstanpy - INFO - Chain [1] done processing
16:44:57 - cmdstanpy - INFO - Chain [1] start processing
16:44:58 - cmdstanpy - INFO - Chain [1] done processing

MAE_by_horizon = prophet_cv.groupby(['horizon'])['AE'].mean()
MAE_by_horizon = pd.DataFrame(MAE_by_horizon).rename(columns={'AE': 'MAE'})

MAPE_by_horizon = prophet_cv.groupby(['horizon'])['APE'].mean()
MAPE_by_horizon = pd.DataFrame(MAPE_by_horizon).rename(columns={'APE': 'MAPE'})

prophet_horizon_table = MAE_by_horizon.merge(MAPE_by_horizon, on='horizon')
prophet_horizon_table.style.format({'MAE': '{:,.0f}', 'MAPE': '{:.0%}'})

prophet_p = performance_metrics(prophet_cv, rolling_window=1)
prophet_p.style.format({
    'mse': '{:,.0f}',
    'rmse': '{:,.0f}',
    'mae': '{:,.0f}',
    'mape': '{:.0%}',
    'coverage': '{:.0%}',
    'mdape': '{:.0%}',
    'smape': '{:.0%}',
    'coverage' : '{:.0%}'
})

prophet_overall_MAE  = prophet_cv['AE'].mean()
prophet_overall_MAPE = prophet_cv['APE'].mean()

print(f"MAE:{prophet_overall_MAE:,.0f}")
print(f"MAPE (%): {prophet_overall_MAPE:.0%}")

MAE:17,556,222
MAPE (%): 60%

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.02],
    'changepoint_range': [0.8, 0.95],
    'seasonality_prior_scale': [0.1, 1.0, 5.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    'growth': ['linear', 'logistic']
}

all_params = list(itertools.product(*param_grid.values()))
rmses = []

for params in all_params:
    param_dict = dict(zip(param_grid.keys(), params))
    temp_df = df.copy()

    if param_dict['growth'] == 'logistic':
        temp_df['floor'] = 0
        temp_df['cap'] = temp_df['y'].max() * 1.2

    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        **param_dict
    )
    model.fit(temp_df)

    df_cv = cross_validation(model, cutoffs=cutoffs, horizon='93 days', disable_tqdm=True)
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].mean())

tuning_results = pd.DataFrame([dict(zip(param_grid.keys(), p)) for p in all_params])
tuning_results['rmse'] = rmses
print(tuning_results.nsmallest(1, 'rmse'))

best_params_prep = all_params[np.argmin(rmses)]
best_params = dict(zip(param_grid.keys(), best_params_prep))

print(best_params)

{'changepoint_prior_scale': 0.01, 'changepoint_range': 0.8, 'seasonality_prior_scale': 0.1, 'seasonality_mode': 'multiplicative', 'growth': 'linear'}

cap_value = df['y'].max() * 1.2

df = df.copy()
df['floor'] = 0
df['cap'] = cap_value

m = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    **best_params
)

m.fit(df)

future = m.make_future_dataframe(periods=3, freq='MS')
future['floor'] = 0
future['cap'] = cap_value

forecast = m.predict(future)

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])

plt.figure(figsize=(12, 6))

plt.plot(df["ds"], df["y"], label="Actual", color=custom_palette[2])

plt.plot(forecast["ds"], forecast["yhat"], label="Forecast", color=custom_palette[0], linestyle='--')

future_forecast = forecast[forecast['ds'] > df['ds'].max()]
plt.fill_between(
    future_forecast['ds'],
    future_forecast['yhat_lower'],
    future_forecast['yhat_upper'],
    color=custom_palette[0], alpha=0.2, label='80% Confidence Interval'
)

ax = plt.gca()
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

plt.legend()
plt.ylabel("Cocoa Bean Imports (lbs)")
plt.xlabel("Date")
plt.title("Prophet (Tuned)")
plt.show()

16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing

           ds          yhat    yhat_lower    yhat_upper
0  2020-01-01  9.962309e+07  7.728316e+07  1.192664e+08
1  2020-02-01  9.822106e+07  7.851117e+07  1.183878e+08
2  2020-03-01  1.212633e+08  1.003751e+08  1.409421e+08
3  2020-04-01  1.050092e+08  8.537475e+07  1.263587e+08
4  2020-05-01  8.638201e+07  6.760903e+07  1.061323e+08
..        ...           ...           ...           ...
58 2024-11-01  1.970418e+07  4.289487e+05  4.089143e+07
59 2024-12-01  2.307426e+07  4.615000e+06  4.479443e+07
60 2025-01-01  4.070195e+07  1.941385e+07  5.989525e+07
61 2025-02-01  3.952993e+07  1.908779e+07  5.936743e+07
62 2025-03-01  4.800308e+07  2.820561e+07  6.871246e+07

[63 rows x 4 columns]

fig = m.plot_components(forecast)

for ax in fig.axes:
    for line in ax.get_lines():
        line.set_color('black')
plt.show()

period = "1MS"  # Month start
cutoffs = pd.date_range(start="2023-09-01", end="2024-08-01", freq=period) # End is last starting date
horizon = "92 days"  # Forecast horizon for each cutoff
prophet_tuned_cv = cross_validation(m, cutoffs=cutoffs, horizon=horizon, disable_tqdm=True)

prophet_tuned_cv['AE'] = abs(prophet_tuned_cv['yhat'] - prophet_tuned_cv['y'])
prophet_tuned_cv['APE'] = (abs(prophet_tuned_cv['yhat'] - prophet_tuned_cv['y']) / abs(prophet_tuned_cv['y']))

prophet_tuned_cv['ds'] = pd.to_datetime(prophet_tuned_cv['ds'])
prophet_tuned_cv['cutoff'] = pd.to_datetime(prophet_tuned_cv['cutoff'])
prophet_tuned_cv['horizon'] = prophet_tuned_cv['ds'].dt.to_period('M').astype(int) - prophet_tuned_cv['cutoff'].dt.to_period('M').astype(int)
prophet_tuned_cv.style.format({'ds':'{:%m-%d-%Y}','cutoff':'{:%m-%d-%Y}','y':'{:,.0f}','yhat':'{:,.0f}','yhat_lower':'{:,.0f}','yhat_upper':'{:,.0f}','AE': '{:,.0f}', 'APE': '{:.0%}'})

16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing
16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing
16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing
16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing
16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:17 - cmdstanpy - INFO - Chain [1] done processing
16:48:17 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing
16:48:18 - cmdstanpy - INFO - Chain [1] start processing
16:48:18 - cmdstanpy - INFO - Chain [1] done processing

MAE_by_horizon = prophet_tuned_cv.groupby(['horizon'])['AE'].mean()
MAE_by_horizon = pd.DataFrame(MAE_by_horizon).rename(columns={'AE': 'MAE'})

MAPE_by_horizon = prophet_tuned_cv.groupby(['horizon'])['APE'].mean()
MAPE_by_horizon = pd.DataFrame(MAPE_by_horizon).rename(columns={'APE': 'MAPE'})

prophet_tuned_horizon_table = MAE_by_horizon.merge(MAPE_by_horizon,on='horizon')
prophet_tuned_horizon_table.style.format({'MAE': '{:,.0f}', 'MAPE': '{:.0%}'})

prophet_tuned_p = performance_metrics(prophet_tuned_cv, rolling_window=1)
prophet_tuned_p.style.format({
    'mse': '{:,.0f}',
    'rmse': '{:,.0f}',
    'mae': '{:,.0f}',
    'mape': '{:.0%}',
    'coverage': '{:.0%}',
    'mdape': '{:.0%}',
    'smape': '{:.0%}',
    'coverage' : '{:.0%}'
})

prophet_tuned_overall_MAE  = prophet_tuned_cv['AE'].mean()
prophet_tuned_overall_MAPE = prophet_tuned_cv['APE'].mean()

print(f"MAE: {prophet_tuned_overall_MAE:,.0f}")
print(f"MAPE (%): {prophet_tuned_overall_MAPE:.0%}")

MAE: 11,510,718
MAPE (%): 46%

df_log = df.copy()

df_log['y'] = np.log1p(df_log['y'])
df_log['floor'] = np.log1p(df_log['floor'])
df_log['cap'] = np.log1p(df_log['cap'])

df_log.head()

df_log.head()

param_grid_log = {
    'changepoint_prior_scale': [0.001, 0.01, 0.02],
    'changepoint_range': [0.8, 0.95],
    'seasonality_prior_scale': [0.1, 1.0, 5.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    'growth': ['linear', 'logistic']
}

all_params_log = list(itertools.product(*param_grid_log.values()))
rmses_log = []

for params in all_params_log:
    param_dict_log = dict(zip(param_grid_log.keys(), params))

    if param_dict_log['growth'] == 'logistic':
        df_log['floor'] = 0
        df_log['cap'] = df_log['y'].max() * 1.2

    model_log = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        **param_dict_log
    )
    model_log.fit(df_log)

    cv_log = cross_validation(model_log, cutoffs=cutoffs, horizon='93 days', disable_tqdm=True)

    cv_orig = cv_log.copy()
    cv_orig['y_orig'] = np.expm1(cv_log['y'])
    cv_orig['yhat_orig'] = np.expm1(cv_log['yhat'])
    df_p_orig = performance_metrics(cv_orig, rolling_window=1)
    rmses_log.append(df_p_orig['rmse'].mean())

tuning_results_log = pd.DataFrame([dict(zip(param_grid_log.keys(), p)) for p in all_params_log])
tuning_results_log['rmse'] = rmses_log
print(tuning_results_log.nsmallest(1, 'rmse'))

best_params_prep_log = all_params_log[np.argmin(rmses_log)]
best_params_log = dict(zip(param_grid_log.keys(), best_params_prep_log))

print(best_params_log)

{'changepoint_prior_scale': 0.02, 'changepoint_range': 0.95, 'seasonality_prior_scale': 0.1, 'seasonality_mode': 'multiplicative', 'growth': 'logistic'}

cv_orig['AE'] = abs(cv_orig['yhat_orig'] - cv_orig['y_orig'])
cv_orig['APE'] = abs(cv_orig['yhat_orig'] - cv_orig['y_orig']) / abs(cv_orig['y_orig'])

cv_orig.style.format({
    'ds':'{:%m-%d-%Y}',
    'y':'{:,.2f}',
    'yhat':'{:,.2f}',
    'yhat_lower':'{:,.2f}',
    'yhat_upper':'{:,.2f}',
    'y_orig':'{:,.0f}',
    'cutoff':'{:%m-%d-%Y}',
    'yhat_orig':'{:,.0f}',
    'yhat_lower_orig':'{:,.0f}',
    'yhat_upper_orig':'{:,.0f}',
    'AE': '{:,.0f}',
    'APE': '{:.0%}'
})

m_log = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    **best_params_log
)

m_log.fit(df_log)

future_log = m_log.make_future_dataframe(periods=3, freq='MS')
future_log['floor'] = np.log1p(0)
future_log['cap'] = np.log1p(cap_value)

forecast_log = m_log.predict(future_log)

forecast_log['yhat_orig'] = np.expm1(forecast_log['yhat'])
forecast_log['yhat_lower_orig'] = np.expm1(forecast_log['yhat_lower'])
forecast_log['yhat_upper_orig'] = np.expm1(forecast_log['yhat_upper'])

plt.figure(figsize=(12, 6))

plt.plot(df['ds'], df['y'], label="Actual", color=custom_palette[2])

plt.plot(forecast_log['ds'], forecast_log['yhat_orig'], label="Forecast", color=custom_palette[0], linestyle='--')

future_forecast_log = forecast_log[forecast_log['ds'] > df_log['ds'].max()]
plt.fill_between(
    future_forecast_log['ds'],
    future_forecast_log['yhat_lower_orig'],
    future_forecast_log['yhat_upper_orig'],
    color=custom_palette[0],
    alpha=0.2,
    label='80% Confidence Interval'
)

ax = plt.gca()
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

plt.legend()
plt.ylabel("Cocoa Bean Imports (lbs)")
plt.xlabel("Date")
plt.title("Prophet (Tuned - Log Data)")
plt.show()

16:51:30 - cmdstanpy - INFO - Chain [1] start processing
16:51:30 - cmdstanpy - INFO - Chain [1] done processing

fig = m_log.plot_components(forecast_log)

for ax in fig.axes:
    for line in ax.get_lines():
        line.set_color('black')
plt.show()

prophet_tuned_log_cv= cv_orig

prophet_tuned_log_cv['AE'] = abs(prophet_tuned_log_cv['yhat_orig'] - prophet_tuned_log_cv['y_orig'])
prophet_tuned_log_cv['APE'] = (abs(prophet_tuned_log_cv['yhat_orig'] - prophet_tuned_log_cv['y_orig']) / abs(prophet_tuned_log_cv['y_orig']))

prophet_tuned_log_cv['ds'] = pd.to_datetime(prophet_tuned_log_cv['ds'])
prophet_tuned_log_cv['cutoff'] = pd.to_datetime(prophet_tuned_log_cv['cutoff'])
prophet_tuned_log_cv['horizon'] = prophet_tuned_log_cv['ds'].dt.to_period('M').astype(int) - prophet_tuned_log_cv['cutoff'].dt.to_period('M').astype(int)
prophet_tuned_log_cv.style.format({'ds':'{:%m-%d-%Y}','cutoff':'{:%m-%d-%Y}','y':'{:,.2f}','y_orig':'{:,.0f}','yhat':'{:,.2f}','yhat_lower':'{:,.2f}',
                                   'yhat_upper':'{:,.2f}','yhat_orig':'{:,.0f}','yhat_lower_orig':'{:,.0f}',
                                   'yhat_upper_orig':'{:,.0f}','AE': '{:,.0f}', 'APE': '{:.0%}'})

MAE_by_horizon_log = prophet_tuned_log_cv.groupby(['horizon'])['AE'].mean()
MAE_by_horizon_log = pd.DataFrame(MAE_by_horizon_log).rename(columns={'AE': 'MAE'})

MAPE_by_horizon_log = prophet_tuned_log_cv.groupby(['horizon'])['APE'].mean()
MAPE_by_horizon_log = pd.DataFrame(MAPE_by_horizon_log).rename(columns={'APE': 'MAPE'})

prophet_tuned_log_horizon_table = MAE_by_horizon_log.merge(MAPE_by_horizon_log,on='horizon')
prophet_tuned_log_horizon_table.style.format({'MAE': '{:,.0f}', 'MAPE': '{:.0%}'})

prophet_tuned_log_overall_MAE  = prophet_tuned_log_cv['AE'].mean()
prophet_tuned_log_overall_MAPE = prophet_tuned_log_cv['APE'].mean()

print(f"MAE: {prophet_tuned_log_overall_MAE:,.0f}")
print(f"MAPE (%): {prophet_tuned_log_overall_MAPE:.0%}")

MAE: 11,333,481
MAPE (%): 44%

final_table = pd.DataFrame({
    'Model': [
        'ExponentialSmoothing','ExponentialSmoothing',
        'Sarima','Sarima','Sarima','Sarima','Sarima','Sarima',
        'Prophet','Prophet'
    ],
    'Log Data': ['N','Y','N','N','N','Y','Y','Y','N','Y'],
    'Tuning': [
        'Manual','Manual',
        'Short Grid Search','Grid Search','Manual',
        'Short Grid Search','Grid Search','Manual',
        'Grid Search','Grid Search'
    ],

    'MAE': [
        exp_ts_tuned_overall_MAE,
        exp_ts_tuned_log_overall_MAE,
        sarima_auto_overall_MAE,
        sarima_flex_full_overall_MAE,
        sarima_manual_overall_MAE,
        sarima_log_auto_overall_MAE,
        sarima_log_full_flex_overall_MAE,
        sarima_log_manual_overall_MAE,
        prophet_tuned_overall_MAE,
        prophet_tuned_log_overall_MAE
    ],

    'MAPE': [
        exp_ts_tuned_overall_MAPE,
        exp_ts_tuned_log_overall_MAPE,
        sarima_auto_overall_MAPE,
        sarima_flex_full_overall_MAPE,
        sarima_manual_overall_MAPE,
        sarima_log_auto_overall_MAPE,
        sarima_log_full_flex_overall_MAPE,
        sarima_log_manual_overall_MAPE,
        prophet_tuned_overall_MAPE,
        prophet_tuned_log_overall_MAPE
    ],

    'MAE_H1': [
        exp_ts_tuned_cv['MAE'][1],
        exp_ts_tuned_log_cv['MAE'][1],
        sarima_auto_cv['MAE'][1],
        sarima_flex_full_cv['MAE'][1],
        sarima_manual_cv['MAE'][1],
        sarima_log_auto_cv['MAE'][1],
        sarima_log_full_flex_cv['MAE'][1],
        sarima_log_manual_cv['MAE'][1],
        prophet_tuned_horizon_table['MAE'][1],
        prophet_tuned_log_horizon_table['MAE'][1]
    ],

    'MAE_H2': [
        exp_ts_tuned_cv['MAE'][2],
        exp_ts_tuned_log_cv['MAE'][2],
        sarima_auto_cv['MAE'][2],
        sarima_flex_full_cv['MAE'][2],
        sarima_manual_cv['MAE'][2],
        sarima_log_auto_cv['MAE'][2],
        sarima_log_full_flex_cv['MAE'][2],
        sarima_log_manual_cv['MAE'][2],
        prophet_tuned_horizon_table['MAE'][2],
        prophet_tuned_log_horizon_table['MAE'][2]
    ],

    'MAE_H3': [
        exp_ts_tuned_cv['MAE'][3],
        exp_ts_tuned_log_cv['MAE'][3],
        sarima_auto_cv['MAE'][3],
        sarima_flex_full_cv['MAE'][3],
        sarima_manual_cv['MAE'][3],
        sarima_log_auto_cv['MAE'][3],
        sarima_log_full_flex_cv['MAE'][3],
        sarima_log_manual_cv['MAE'][3],
        prophet_tuned_horizon_table['MAE'][3],
        prophet_tuned_log_horizon_table['MAE'][3]
    ],

    'MAPE_H1': [
        exp_ts_tuned_cv['MAPE'][1],
        exp_ts_tuned_log_cv['MAPE'][1],
        sarima_auto_cv['MAPE'][1],
        sarima_flex_full_cv['MAPE'][1],
        sarima_manual_cv['MAPE'][1],
        sarima_log_auto_cv['MAPE'][1],
        sarima_log_full_flex_cv['MAPE'][1],
        sarima_log_manual_cv['MAPE'][1],
        prophet_tuned_horizon_table['MAPE'][1],
        prophet_tuned_log_horizon_table['MAPE'][1]
    ],

    'MAPE_H2': [
        exp_ts_tuned_cv['MAPE'][2],
        exp_ts_tuned_log_cv['MAPE'][2],
        sarima_auto_cv['MAPE'][2],
        sarima_flex_full_cv['MAPE'][2],
        sarima_manual_cv['MAPE'][2],
        sarima_log_auto_cv['MAPE'][2],
        sarima_log_full_flex_cv['MAPE'][2],
        sarima_log_manual_cv['MAPE'][2],
        prophet_tuned_horizon_table['MAPE'][2],
        prophet_tuned_log_horizon_table['MAPE'][2]
    ],

    'MAPE_H3': [
        exp_ts_tuned_cv['MAPE'][3],
        exp_ts_tuned_log_cv['MAPE'][3],
        sarima_auto_cv['MAPE'][3],
        sarima_flex_full_cv['MAPE'][3],
        sarima_manual_cv['MAPE'][3],
        sarima_log_auto_cv['MAPE'][3],
        sarima_log_full_flex_cv['MAPE'][3],
        sarima_log_manual_cv['MAPE'][3],
        prophet_tuned_horizon_table['MAPE'][3],
        prophet_tuned_log_horizon_table['MAPE'][3]
    ]
})

final_table.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}',
    'MAE_H1': '{:,.0f}',
    'MAE_H2': '{:,.0f}',
    'MAE_H3': '{:,.0f}',
    'MAPE': '{:.0%}',
    'MAPE_H1': '{:.0%}',
    'MAPE_H2': '{:.0%}',
    'MAPE_H3': '{:.0%}'
})

final_model=final_table.iloc[[0]]
final_model.style.format({
    'MAE': '{:,.0f}',
    'MAPE': '{:.0%}',
    'MAE_H1': '{:,.0f}',
    'MAE_H2': '{:,.0f}',
    'MAE_H3': '{:,.0f}',
    'MAPE': '{:.0%}',
    'MAPE_H1': '{:.0%}',
    'MAPE_H2': '{:.0%}',
    'MAPE_H3': '{:.0%}'
})

display(Image(url="https://raw.githubusercontent.com/lindsayalexandra14/ds_portfolio/main/1_projects/machine_learning/time_series_and_survival/cocoa_imports/cocoa_imports_final_model_plot.png",
    width=800))

	cutoff	horizon	cutoff_month	month	y	yhat	AE	APE
0	44	1	09-01-2023	10-01-2023	15,172,388	6,902,291	8,270,097	55%
1	44	2	09-01-2023	11-01-2023	11,741,002	3,755,247	7,985,755	68%
2	44	3	09-01-2023	12-01-2023	17,305,596	16,758,376	547,220	3%
3	45	1	10-01-2023	11-01-2023	11,741,002	6,045,882	5,695,120	49%
4	45	2	10-01-2023	12-01-2023	17,305,596	19,019,260	1,713,664	10%
5	45	3	10-01-2023	01-01-2024	64,702,033	66,344,501	1,642,468	3%
6	46	1	11-01-2023	12-01-2023	17,305,596	20,419,934	3,114,338	18%
7	46	2	11-01-2023	01-01-2024	64,702,033	67,744,089	3,042,056	5%
8	46	3	11-01-2023	02-01-2024	65,375,575	44,310,438	21,065,137	32%
9	47	1	12-01-2023	01-01-2024	64,702,033	65,781,660	1,079,627	2%
10	47	2	12-01-2023	02-01-2024	65,375,575	49,667,363	15,708,212	24%
11	47	3	12-01-2023	03-01-2024	62,364,919	83,586,898	21,221,979	34%
12	48	1	01-01-2024	02-01-2024	65,375,575	49,337,538	16,038,037	25%
13	48	2	01-01-2024	03-01-2024	62,364,919	83,257,077	20,892,158	33%
14	48	3	01-01-2024	04-01-2024	34,933,233	88,121,981	53,188,748	152%
15	49	1	02-01-2024	03-01-2024	62,364,919	88,140,823	25,775,904	41%
16	49	2	02-01-2024	04-01-2024	34,933,233	93,005,732	58,072,499	166%
17	49	3	02-01-2024	05-01-2024	35,504,037	69,119,308	33,615,271	95%
18	50	1	03-01-2024	04-01-2024	34,933,233	85,707,057	50,773,824	145%
19	50	2	03-01-2024	05-01-2024	35,504,037	61,820,637	26,316,600	74%
20	50	3	03-01-2024	06-01-2024	26,935,635	52,160,066	25,224,431	94%
21	51	1	04-01-2024	05-01-2024	35,504,037	43,120,813	7,616,776	21%
22	51	2	04-01-2024	06-01-2024	26,935,635	33,690,321	6,754,686	25%
23	51	3	04-01-2024	07-01-2024	19,175,494	854,284	18,321,210	96%
24	52	1	05-01-2024	06-01-2024	26,935,635	29,169,740	2,234,105	8%
25	52	2	05-01-2024	07-01-2024	19,175,494	-3,643,501	22,818,995	119%
26	52	3	05-01-2024	08-01-2024	21,449,146	-6,523,088	27,972,234	130%
27	53	1	06-01-2024	07-01-2024	19,175,494	-4,737,622	23,913,116	125%
28	53	2	06-01-2024	08-01-2024	21,449,146	-7,620,189	29,069,335	136%
29	53	3	06-01-2024	09-01-2024	23,828,291	2,650,308	21,177,983	89%
30	54	1	07-01-2024	08-01-2024	21,449,146	1,861,780	19,587,366	91%
31	54	2	07-01-2024	09-01-2024	23,828,291	12,006,067	11,822,224	50%
32	54	3	07-01-2024	10-01-2024	20,121,692	-3,091,509	23,213,201	115%
33	55	1	08-01-2024	09-01-2024	23,828,291	23,815,662	12,629	0%
34	55	2	08-01-2024	10-01-2024	20,121,692	9,571,620	10,550,072	52%
35	55	3	08-01-2024	11-01-2024	23,311,088	5,205,149	18,105,939	78%
36	56	1	09-01-2024	10-01-2024	20,121,692	9,577,936	10,543,756	52%
37	56	2	09-01-2024	11-01-2024	23,311,088	5,211,465	18,099,623	78%
38	56	3	09-01-2024	12-01-2024	31,759,202	12,984,396	18,774,806	59%

	MAE	MAPE
horizon
1	13,434,977	49%
2	17,911,221	65%
3	21,851,587	75%

	cutoff	horizon	cutoff_month	month	y	yhat	AE	APE
0	44	1	09-01-2023	10-01-2023	15,172,388	19,758,979	4,586,591	30%
1	44	2	09-01-2023	11-01-2023	11,741,002	18,827,434	7,086,432	60%
2	44	3	09-01-2023	12-01-2023	17,305,596	20,571,930	3,266,334	19%
3	45	1	10-01-2023	11-01-2023	11,741,002	17,314,661	5,573,659	47%
4	45	2	10-01-2023	12-01-2023	17,305,596	18,983,433	1,677,837	10%
5	45	3	10-01-2023	01-01-2024	64,702,033	44,986,142	19,715,891	30%
6	46	1	11-01-2023	12-01-2023	17,305,596	16,869,278	436,318	3%
7	46	2	11-01-2023	01-01-2024	64,702,033	39,890,376	24,811,657	38%
8	46	3	11-01-2023	02-01-2024	65,375,575	39,351,743	26,023,832	40%
9	47	1	12-01-2023	01-01-2024	64,702,033	40,252,571	24,449,462	38%
10	47	2	12-01-2023	02-01-2024	65,375,575	39,708,749	25,666,826	39%
11	47	3	12-01-2023	03-01-2024	62,364,919	52,317,225	10,047,694	16%
12	48	1	01-01-2024	02-01-2024	65,375,575	47,579,349	17,796,226	27%
13	48	2	01-01-2024	03-01-2024	62,364,919	62,858,839	493,920	1%
14	48	3	01-01-2024	04-01-2024	34,933,233	54,889,734	19,956,501	57%
15	49	1	02-01-2024	03-01-2024	62,364,919	70,265,094	7,900,175	13%
16	49	2	02-01-2024	04-01-2024	34,933,233	61,276,429	26,343,196	75%
17	49	3	02-01-2024	05-01-2024	35,504,037	49,089,874	13,585,837	38%
18	50	1	03-01-2024	04-01-2024	34,933,233	59,021,731	24,088,498	69%
19	50	2	03-01-2024	05-01-2024	35,504,037	47,261,433	11,757,396	33%
20	50	3	03-01-2024	06-01-2024	26,935,635	43,538,778	16,603,143	62%
21	51	1	04-01-2024	05-01-2024	35,504,037	41,299,719	5,795,682	16%
22	51	2	04-01-2024	06-01-2024	26,935,635	38,068,462	11,132,827	41%
23	51	3	04-01-2024	07-01-2024	19,175,494	21,628,102	2,452,608	13%
24	52	1	05-01-2024	06-01-2024	26,935,635	36,359,652	9,424,017	35%
25	52	2	05-01-2024	07-01-2024	19,175,494	20,666,188	1,490,694	8%
26	52	3	05-01-2024	08-01-2024	21,449,146	20,135,102	1,314,044	6%
27	53	1	06-01-2024	07-01-2024	19,175,494	18,916,032	259,462	1%
28	53	2	06-01-2024	08-01-2024	21,449,146	18,452,365	2,996,781	14%
29	53	3	06-01-2024	09-01-2024	23,828,291	22,018,223	1,810,068	8%
30	54	1	07-01-2024	08-01-2024	21,449,146	18,532,698	2,916,448	14%
31	54	2	07-01-2024	09-01-2024	23,828,291	22,114,177	1,714,114	7%
32	54	3	07-01-2024	10-01-2024	20,121,692	16,034,955	4,086,737	20%
33	55	1	08-01-2024	09-01-2024	23,828,291	23,203,068	625,223	3%
34	55	2	08-01-2024	10-01-2024	20,121,692	16,811,767	3,309,925	16%
35	55	3	08-01-2024	11-01-2024	23,311,088	16,044,099	7,266,989	31%
36	56	1	09-01-2024	10-01-2024	20,121,692	16,951,574	3,170,118	16%
37	56	2	09-01-2024	11-01-2024	23,311,088	16,176,222	7,134,866	31%
38	56	3	09-01-2024	12-01-2024	31,759,202	17,721,125	14,038,077	44%

	MAE	MAPE
horizon
1	8,232,452	24%
2	9,662,806	29%
3	10,782,135	30%

	horizon	point_forecast	ci_lower	ci_upper
0	1	59,915,877	31,652,931	67,112,959
1	2	59,477,191	30,411,924	65,507,608
2	3	75,993,847	38,341,421	84,995,670

Model 1: Smoothing¶

Model 2: SARIMA¶

Model 3: Prophet¶

Results¶

	Continent	Country	Import Date	Month	Year	Weight	Price per Pound	Spend
0	Oceania	PAPUA NEW GUINEA	2020-01-01	1	2020	1463040	1.247744	1825500
1	North America	GRENADA	2020-01-01	1	2020	1090	3.827523	4172
2	South America	COLOMBIA	2020-01-01	1	2020	151900	1.676208	254616
3	South America	VENEZUELA	2020-01-01	1	2020	226802	1.242489	281799
4	South America	ECUADOR	2020-01-01	1	2020	6719234	1.381276	9281115

	Import Date	Weight	Spend	Price per Pound	Year	Month
0	2020-01-01	77863938	110827113	1.423343	2020	Jan
1	2020-02-01	118967413	183396139	1.541566	2020	Feb
2	2020-03-01	117594405	160633331	1.365995	2020	Mar
3	2020-04-01	72523979	109295892	1.507031	2020	Apr
4	2020-05-01	58180114	71059481	1.221371	2020	May

	horizon	point_forecast	ci_lower	ci_upper
0	1	69648880	42252001	73726954
1	2	66175597	40121527	69833012
2	3	81596208	50389155	86459868

Model	Plot	Observation
AR(p)	PACF	Cuts off after lag p
MA(q)	ACF	Cuts off after lag q

	Import Date	y_diff
0	2020-01-01	NaN
1	2020-02-01	41103475.0
2	2020-03-01	-1373008.0
3	2020-04-01	-45070426.0
4	2020-05-01	-14343865.0

	Import Date	y_diff
0	2020-01-01	NaN
1	2020-02-01	0.423887
2	2020-03-01	-0.011608
3	2020-04-01	-0.483324
4	2020-05-01	-0.220374

Dep. Variable:	Weight	No. Observations:	60
Model:	SARIMAX(0, 1, 1)x(2, 1, [1, 2], 12)	Log Likelihood	-26.152
Date:	Tue, 13 Jan 2026	AIC	92.304
Time:	16:44:55	BIC	134.190
Sample:	0	HQIC	108.688
	- 60
Covariance Type:	opg

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	0.0003	0.024	0.014	0.989	-0.046	0.047
ma.L1	-0.3927	0.174	-2.261	0.024	-0.733	-0.052
ar.S.L12	0.0559	0.491	0.114	0.909	-0.907	1.019
ar.S.L24	-0.6244	0.245	-2.547	0.011	-1.105	-0.144
ma.S.L12	-1.6591	0.782	-2.122	0.034	-3.191	-0.127
ma.S.L24	0.7801	1.083	0.720	0.471	-1.342	2.902
sigma2	0.0312	0.032	0.973	0.331	-0.032	0.094

Ljung-Box (L1) (Q):	0.03	Jarque-Bera (JB):	1.33
Prob(Q):	0.86	Prob(JB):	0.52
Heteroskedasticity (H):	0.63	Skew:	0.03
Prob(H) (two-sided):	0.37	Kurtosis:	2.18

	ds	yhat	yhat_lower	yhat_upper
58	2024-11-01	2.387980e+06	-1.588334e+07	2.176104e+07
59	2024-12-01	1.790853e+07	-1.263356e+06	3.646359e+07
60	2025-01-01	6.511386e+07	4.651289e+07	8.381275e+07
61	2025-02-01	3.929141e+07	1.980159e+07	5.699398e+07
62	2025-03-01	7.277707e+07	5.357060e+07	9.143605e+07

	MAE	MAPE
horizon
1	16,481,605	72%
2	23,139,574	99%
3	27,247,558	105%

	ds	yhat	yhat_lower	yhat_upper	y	cutoff	AE	APE	horizon
0	10-01-2023	34,571,294	16,929,854	52,300,645	15,172,388	09-01-2023	19,398,906	1.28	1
1	11-01-2023	36,122,420	18,245,770	52,255,631	11,741,002	09-01-2023	24,381,418	2.08	2
2	12-01-2023	6,271,949	-9,598,850	23,445,941	17,305,596	09-01-2023	11,033,647	0.64	3
3	11-01-2023	39,374,964	23,385,461	56,874,108	11,741,002	10-01-2023	27,633,962	2.35	1
4	12-01-2023	8,371,796	-8,831,315	25,519,546	17,305,596	10-01-2023	8,933,800	0.52	2
5	01-01-2024	50,233,151	33,238,142	67,156,617	64,702,033	10-01-2023	14,468,882	0.22	3
6	12-01-2023	14,152,574	-3,117,462	31,062,282	17,305,596	11-01-2023	3,153,022	0.18	1
7	01-01-2024	49,940,755	32,061,018	68,962,455	64,702,033	11-01-2023	14,761,278	0.23	2
8	02-01-2024	78,604,042	60,051,070	95,263,367	65,375,575	11-01-2023	13,228,467	0.20	3
9	01-01-2024	49,872,672	32,245,314	67,248,874	64,702,033	12-01-2023	14,829,361	0.23	1
10	02-01-2024	78,598,462	61,289,389	97,393,705	65,375,575	12-01-2023	13,222,887	0.20	2
11	03-01-2024	106,659,231	89,317,085	123,659,483	62,364,919	12-01-2023	44,294,312	0.71	3
12	02-01-2024	79,579,136	62,051,163	96,549,506	65,375,575	01-01-2024	14,203,561	0.22	1
13	03-01-2024	108,929,837	91,319,256	126,149,664	62,364,919	01-01-2024	46,564,918	0.75	2
14	04-01-2024	64,384,252	48,206,371	81,351,503	34,933,233	01-01-2024	29,451,019	0.84	3
15	03-01-2024	106,524,646	88,859,720	123,044,772	62,364,919	02-01-2024	44,159,727	0.71	1
16	04-01-2024	62,391,175	44,772,206	79,368,691	34,933,233	02-01-2024	27,457,942	0.79	2
17	05-01-2024	52,155,620	34,999,897	69,397,706	35,504,037	02-01-2024	16,651,583	0.47	3
18	04-01-2024	62,532,257	44,824,399	82,380,783	34,933,233	03-01-2024	27,599,024	0.79	1
19	05-01-2024	51,432,949	32,080,191	68,912,081	35,504,037	03-01-2024	15,928,912	0.45	2
20	06-01-2024	51,258,120	32,798,947	70,240,545	26,935,635	03-01-2024	24,322,485	0.90	3
21	05-01-2024	51,418,592	33,825,254	69,345,604	35,504,037	04-01-2024	15,914,555	0.45	1
22	06-01-2024	50,860,837	31,374,903	69,865,398	26,935,635	04-01-2024	23,925,202	0.89	2
23	07-01-2024	20,165,735	1,083,522	38,248,827	19,175,494	04-01-2024	990,241	0.05	3
24	06-01-2024	50,956,609	32,704,522	70,184,129	26,935,635	05-01-2024	24,020,974	0.89	1
25	07-01-2024	20,052,688	1,345,047	38,428,082	19,175,494	05-01-2024	877,194	0.05	2
26	08-01-2024	11,858,470	-5,635,972	31,271,205	21,449,146	05-01-2024	9,590,676	0.45	3
27	07-01-2024	20,360,144	3,062,241	39,273,490	19,175,494	06-01-2024	1,184,650	0.06	1
28	08-01-2024	11,755,945	-5,622,807	30,095,651	21,449,146	06-01-2024	9,693,201	0.45	2
29	09-01-2024	21,479,723	3,124,096	39,337,768	23,828,291	06-01-2024	2,348,568	0.10	3
30	08-01-2024	11,759,345	-5,355,922	31,470,017	21,449,146	07-01-2024	9,689,801	0.45	1
31	09-01-2024	21,470,890	3,739,985	39,615,374	23,828,291	07-01-2024	2,357,401	0.10	2
32	10-01-2024	-2,962,042	-20,434,241	15,681,012	20,121,692	07-01-2024	23,083,734	1.15	3

	ds	yhat	yhat_lower	yhat_upper	y	cutoff	AE	APE	horizon
0	10-01-2023	33,229,787	11,664,038	55,107,299	15,172,388	09-01-2023	18,057,399	119%	1
1	11-01-2023	29,595,579	7,474,233	52,542,841	11,741,002	09-01-2023	17,854,577	152%	2
2	12-01-2023	33,518,306	11,316,066	56,809,617	17,305,596	09-01-2023	16,212,710	94%	3
3	11-01-2023	29,070,280	7,444,246	51,398,724	11,741,002	10-01-2023	17,329,278	148%	1
4	12-01-2023	32,972,679	11,906,542	55,297,136	17,305,596	10-01-2023	15,667,083	91%	2
5	01-01-2024	55,687,186	33,451,409	79,134,164	64,702,033	10-01-2023	9,014,847	14%	3
6	12-01-2023	32,629,136	10,178,688	54,321,667	17,305,596	11-01-2023	15,323,540	89%	1
7	01-01-2024	54,859,769	32,539,603	77,908,837	64,702,033	11-01-2023	9,842,264	15%	2
8	02-01-2024	53,190,108	30,502,895	74,613,935	65,375,575	11-01-2023	12,185,467	19%	3
9	01-01-2024	53,960,554	31,453,828	76,575,435	64,702,033	12-01-2023	10,741,479	17%	1
10	02-01-2024	52,331,759	27,977,161	72,979,562	65,375,575	12-01-2023	13,043,816	20%	2
11	03-01-2024	65,484,230	44,422,431	87,358,738	62,364,919	12-01-2023	3,119,311	5%	3
12	02-01-2024	53,740,013	31,782,154	75,018,236	65,375,575	01-01-2024	11,635,562	18%	1
13	03-01-2024	67,386,820	44,595,487	89,681,636	62,364,919	01-01-2024	5,021,901	8%	2
14	04-01-2024	59,994,313	39,978,217	81,240,379	34,933,233	01-01-2024	25,061,080	72%	3
15	03-01-2024	69,350,348	49,191,804	89,845,352	62,364,919	02-01-2024	6,985,429	11%	1
16	04-01-2024	61,888,080	40,804,267	85,041,513	34,933,233	02-01-2024	26,954,847	77%	2
17	05-01-2024	49,866,421	28,641,993	71,141,075	35,504,037	02-01-2024	14,362,384	40%	3
18	04-01-2024	60,571,527	38,448,887	82,950,568	34,933,233	03-01-2024	25,638,294	73%	1
19	05-01-2024	48,773,749	26,712,346	69,985,555	35,504,037	03-01-2024	13,269,712	37%	2
20	06-01-2024	45,989,469	24,985,167	66,965,842	26,935,635	03-01-2024	19,053,834	71%	3
21	05-01-2024	46,132,527	23,910,277	68,482,260	35,504,037	04-01-2024	10,628,490	30%	1
22	06-01-2024	43,490,712	21,942,703	65,468,718	26,935,635	04-01-2024	16,555,077	61%	2
23	07-01-2024	28,107,791	4,560,907	48,738,353	19,175,494	04-01-2024	8,932,297	47%	3
24	06-01-2024	42,796,734	20,858,750	63,711,689	26,935,635	05-01-2024	15,861,099	59%	1
25	07-01-2024	27,625,300	5,703,997	49,364,851	19,175,494	05-01-2024	8,449,806	44%	2
26	08-01-2024	26,912,706	6,032,490	46,924,072	21,449,146	05-01-2024	5,463,560	25%	3
27	07-01-2024	27,330,458	6,202,446	49,208,356	19,175,494	06-01-2024	8,154,964	43%	1
28	08-01-2024	26,610,724	5,113,688	47,524,449	21,449,146	06-01-2024	5,161,578	24%	2
29	09-01-2024	30,252,317	8,513,849	52,273,577	23,828,291	06-01-2024	6,424,026	27%	3
30	08-01-2024	25,616,407	4,133,047	46,448,137	21,449,146	07-01-2024	4,167,261	19%	1
31	09-01-2024	28,990,145	7,503,903	50,644,769	23,828,291	07-01-2024	5,161,854	22%	2
32	10-01-2024	22,165,792	2,011,392	43,446,943	20,121,692	07-01-2024	2,044,100	10%	3
33	09-01-2024	28,701,339	10,205,185	48,470,442	23,828,291	08-01-2024	4,873,048	20%	1
34	10-01-2024	21,676,952	1,194,331	40,322,400	20,121,692	08-01-2024	1,555,260	8%	2
35	11-01-2024	18,732,463	-853,543	40,308,048	23,311,088	08-01-2024	4,578,625	20%	3

	MAE	MAPE
horizon
1	12,449,653	54%
2	11,544,815	47%
3	10,537,687	37%

	ds	y	cap
0	2020-01-01	18.170473	19.037795
1	2020-02-01	18.594360	19.037795
2	2020-03-01	18.582752	19.037795
3	2020-04-01	18.099428	19.037795
4	2020-05-01	17.879054	19.037795

	ds	yhat	yhat_lower	yhat_upper	y	cutoff	y_orig	yhat_orig	AE	APE
0	10-01-2023	17.47	17.20	17.71	16.53	09-01-2023	15,172,388	38,590,845	23,418,457	154%
1	11-01-2023	17.49	17.24	17.73	16.28	09-01-2023	11,741,002	39,427,067	27,686,065	236%
2	12-01-2023	16.42	16.16	16.68	16.67	09-01-2023	17,305,596	13,487,256	3,818,340	22%
3	11-01-2023	17.64	17.35	17.92	16.28	10-01-2023	11,741,002	45,870,173	34,129,171	291%
4	12-01-2023	16.51	16.23	16.78	16.67	10-01-2023	17,305,596	14,823,024	2,482,572	14%
5	01-01-2024	17.57	17.26	17.82	17.99	10-01-2023	64,702,033	42,506,011	22,196,022	34%
6	12-01-2023	16.81	16.51	17.15	16.67	11-01-2023	17,305,596	19,974,901	2,669,305	15%
7	01-01-2024	17.57	17.24	17.89	17.99	11-01-2023	64,702,033	42,553,675	22,148,358	34%
8	02-01-2024	17.75	17.43	18.06	18.00	11-01-2023	65,375,575	51,167,470	14,208,105	22%
9	01-01-2024	17.58	17.25	17.90	17.99	12-01-2023	64,702,033	43,184,329	21,517,704	33%
10	02-01-2024	17.76	17.45	18.07	18.00	12-01-2023	65,375,575	51,784,211	13,591,364	21%
11	03-01-2024	18.06	17.74	18.35	17.95	12-01-2023	62,364,919	69,550,697	7,185,778	12%
12	02-01-2024	17.82	17.49	18.14	18.00	01-01-2024	65,375,575	54,631,649	10,743,926	16%
13	03-01-2024	18.14	17.82	18.43	17.95	01-01-2024	62,364,919	75,588,776	13,223,857	21%
14	04-01-2024	17.74	17.43	18.05	17.37	01-01-2024	34,933,233	50,672,081	15,738,848	45%
15	03-01-2024	18.16	17.84	18.49	17.95	02-01-2024	62,364,919	77,281,349	14,916,430	24%
16	04-01-2024	17.76	17.44	18.07	17.37	02-01-2024	34,933,233	51,536,485	16,603,252	48%
17	05-01-2024	17.61	17.30	17.91	17.39	02-01-2024	35,504,037	44,366,014	8,861,977	25%
18	04-01-2024	17.76	17.44	18.08	17.37	03-01-2024	34,933,233	51,504,978	16,571,745	47%
19	05-01-2024	17.60	17.27	17.90	17.39	03-01-2024	35,504,037	44,167,722	8,663,685	24%
20	06-01-2024	17.60	17.28	17.95	17.11	03-01-2024	26,935,635	44,130,722	17,195,087	64%
21	05-01-2024	17.59	17.26	17.89	17.39	04-01-2024	35,504,037	43,407,845	7,903,808	22%
22	06-01-2024	17.58	17.26	17.90	17.11	04-01-2024	26,935,635	43,166,578	16,230,943	60%
23	07-01-2024	17.07	16.74	17.37	16.77	04-01-2024	19,175,494	25,913,220	6,737,726	35%
24	06-01-2024	17.58	17.27	17.89	17.11	05-01-2024	26,935,635	43,273,502	16,337,867	61%
25	07-01-2024	17.07	16.77	17.39	16.77	05-01-2024	19,175,494	25,927,086	6,751,592	35%
26	08-01-2024	16.84	16.51	17.13	16.88	05-01-2024	21,449,146	20,524,610	924,536	4%
27	07-01-2024	17.07	16.75	17.39	16.77	06-01-2024	19,175,494	25,962,683	6,787,189	35%
28	08-01-2024	16.83	16.51	17.14	16.88	06-01-2024	21,449,146	20,406,110	1,043,036	5%
29	09-01-2024	17.04	16.72	17.37	16.99	06-01-2024	23,828,291	25,145,438	1,317,147	6%
30	08-01-2024	16.83	16.51	17.13	16.88	07-01-2024	21,449,146	20,314,748	1,134,398	5%
31	09-01-2024	17.03	16.69	17.35	16.99	07-01-2024	23,828,291	24,945,848	1,117,557	5%
32	10-01-2024	16.48	16.18	16.81	16.82	07-01-2024	20,121,692	14,391,758	5,729,934	28%
33	09-01-2024	17.04	16.72	17.36	16.99	08-01-2024	23,828,291	25,143,218	1,314,927	6%
34	10-01-2024	16.49	16.17	16.81	16.82	08-01-2024	20,121,692	14,497,193	5,624,499	28%
35	11-01-2024	16.29	15.96	16.61	16.96	08-01-2024	23,311,088	11,830,985	11,480,103	49%

	Model	Log Data	Tuning	MAE	MAPE	MAE_H1	MAE_H2	MAE_H3	MAPE_H1	MAPE_H2	MAPE_H3
0	ExponentialSmoothing	N	Manual	9,559,131	27%	8,232,452	9,662,806	10,782,135	24%	29%	30%
1	ExponentialSmoothing	Y	Manual	10,529,595	29%	8,454,615	10,832,569	12,301,600	24%	30%	33%
2	Sarima	N	Short Grid Search	22,830,464	92%	17,133,558	23,203,094	28,154,741	69%	97%	110%
3	Sarima	N	Grid Search	9,843,491	33%	8,015,458	10,640,699	10,874,315	29%	37%	33%
4	Sarima	N	Manual	22,289,579	92%	16,481,605	23,139,574	27,247,558	72%	99%	105%
5	Sarima	Y	Short Grid Search	9,876,261	27%	8,184,446	9,970,251	11,474,088	22%	27%	31%
6	Sarima	Y	Grid Search	9,324,128	26%	7,426,062	9,404,383	11,141,939	21%	27%	30%
7	Sarima	Y	Manual	10,270,420	29%	8,355,908	10,535,363	11,919,988	24%	30%	33%
8	Prophet	N	Grid Search	11,510,718	46%	12,449,653	11,544,815	10,537,687	54%	47%	37%
9	Prophet	Y	Grid Search	11,333,481	44%	13,120,411	11,263,898	9,616,134	59%	44%	29%

Date	Forecast	Lower_CI	Upper_CI
2025-01-01	59,915,879	31,650,287	67,112,957
2025-02-01	59,477,191	30,411,927	65,506,481
2025-03-01	75,993,848	38,341,516	84,996,332