Question

1 Approved Answer

Posted on Sep 05, 2024

Question: Upload your Python notebook where the last cell shows a plot of Google Trends for all three drink categories between January 2012 and January

Question: Upload your Python notebook where the last cell shows a plot of Google Trends for all three drink categories between January 2012 and January 2014. Make sure the plot is well labeled (Plot title, Legend, Axes). Code used so far: # Import basic libraries. import pandas as pd import numpy as np import statsmodels as sm import plotly.express as px # This to suppress some annoying warnings. # Do not do that before you are going to present # the result -- warnings are often useful. import warnings warnings.filterwarnings("ignore") trends_data = pd.read_csv('../Downloads/SkyRose.csv') # convert multiple year, month and day columns to date and drop them afterwards trends_data['date'] = pd.to_datetime(trends_data[['year', 'month','day']]) trends_data.index = trends_data.date trends_data = trends_data.drop(['year', 'month','day','date'], axis=1) # let's take a look at the first few months trends_data.head() name = 'WhiteWine' df = trends_data[[name]] df.head() # If you want to aggregate data by year: trends_data = trends_data.resample("Y").sum() # 'M' for month 'D' for day trends_data.head() ## Visualization # df[name].diff()#.dropna() #.dropna() removes the initial (1st) date with no value. # Plot the data as it is. fig = px.line(df, labels = {'date':'Date', 'value': "# of Searches"}) fig.add_scatter(x = df.index, y = df[name].diff(), name = '1st Difference') fig.show() ## Winters model (triple exponential smoothing) from statsmodels.tsa.holtwinters import ExponentialSmoothing # Indicate for how many periods do you want to forecast PERIODS_AHEAD=24 # Fitting the model and making forecast. ets_model = ExponentialSmoothing(df, trend='add', seasonal='mul', seasonal_periods=12) ets_fit = ets_model.fit() # Visualization of intial data, fitted model and forecasts. fig = px.line(df, labels = {'x': 'Date', 'y':'# of Searches'}) fig.add_scatter(x = ets_fit.fittedvalues.index, y = ets_fit.fittedvalues, name = 'Fitted Values') fig.add_scatter(x = ets_fit.forecast(PERIODS_AHEAD).index, y = ets_fit.forecast(PERIODS_AHEAD), name = 'Forecast') fig.show() ## Cross-Validation # Find how much data to put in the training set: len(df)*0.8 train = df[:57] test = df[57:] fitted_model = ExponentialSmoothing(train[name],trend='add',seasonal='add',seasonal_periods=12).fit() # 'add' = additiive, 'mul' = multiplicative fig = px.line(df, labels = {'x': 'Date', 'y':'# of Searches'}, title = "Winters model predictions") fig.add_scatter(x = fitted_model.fittedvalues.index, y = fitted_model.fittedvalues, name = 'Fitted Values') fig.add_scatter(x = fitted_model.forecast(len(test)).index, y = fitted_model.forecast(len(test)), name = 'Forecast') fig.show() percent_errors = np.abs((test[name] - fitted_model.forecast(len(test))) / test[name]) *100 print("Winters Model MAPE = ", np.mean(percent_errors), "%") # Explore Autocorrelation from statsmodels.graphics.tsaplots import plot_acf, plot_pacf plot_acf(df).show() plot_pacf(df).show() from statsmodels.tsa.stattools import adfuller adf = adfuller(df) print(f'ADF Statistic {adf[0]}') # adf[0] - returns the ADF statistic value print(f'p-value {adf[1]}') # adf[1] - returns the p-value -- if this value is high, the data is non-stationary # Explore autocorrelation after differencing # df.diff().dropna() plot_acf(df.diff(12).dropna()).show() plot_pacf(df.diff(12).dropna()).show() ## Check if the data is stationary after the first differencing adf = adfuller(df.diff(12).dropna()) print(f'ADF Statistic {adf[0]}') # adf[0] - returns the ADF statistic value print(f'p-value {adf[1]}') # adf[1] - returns the p-value -- if this value is high, the data is non-stationary ## ARIMA and SARIMA (Seasonal ARIMA) from statsmodels.tsa.arima.model import ARIMA # Fit an ARIMA model of order (p,d,q) model = ARIMA(df, order=(12,1,1)) model_fit = model.fit() print(model_fit.summary()) #### Check normality of the residuals of the model model_fit.plot_diagnostics(figsize = (16,6)).show() ### Fit ARIMA model using an auto_arima method from pmdarima #install pmdarima package !pip install pmdarima import pmdarima as pm model_fit = pm.auto_arima(df, start_p=1, start_q=1, start_d=1, max_p=12, max_q=12, max_d=12, seasonal=False, error_action='ignore', suppress_warnings=True, trace = True, stepwise = True, stationary=False) print(model_fit.summary()) ### Forecast for the next 2 years and plot output n_periods = 24 fig = px.line(df, labels = {'x': 'Date', 'y':'# of Searches'}, title = "ARIMA model predictions") fig.add_scatter(x = model_fit.predict(n_periods).index, y = model_fit.predict(n_periods), name = 'Forecast') fig.show() ### Fit SARIMA -- a seasonal ARIMA model model_fit = pm.auto_arima(df, seasonal=True, m=12, transparams=True, start_p=1, start_q=1, start_d=1, start_P=1, start_Q=1, start_D=1, max_order=12, maxiter=300, stationary=False, trace = True, stepwise = True, error_action='ignore', suppress_warnings=True) print(model_fit.summary()) ### Forecast for the next 2 years and plot output n_periods = 24 fig = px.line(df, labels = {'date': 'Date', 'value':'Google Trends'}, title = "ARIMA model predictions") fig.add_scatter(x = model_fit.predict(n_periods).index, y = model_fit.predict(n_periods), name = 'Forecast') fig.show() train = df[:57] test = df[57:] model_fit = pm.auto_arima(train, seasonal=True, m=12, transparams=True, start_p=1, start_q=1, start_d=1, start_P=1, start_Q=1, start_D=1, max_order=12, maxiter=300, stationary=False, trace = True, stepwise = True, error_action='ignore', suppress_warnings=True) fig = px.line(train, labels = {'x': 'Date', 'y':'# of Searches'}, title = "ARIMA model predictions") fig.add_scatter(x = test.index, y = test[name], name = 'Test Data') fig.add_scatter(x = model_fit.predict(len(test)).index, y = model_fit.predict(len(test)), name = 'Forecast') fig.show() percent_errors = np.abs((test[name] - model_fit.predict(len(test))) / test[name]) *100 print("SARIMA Model MAPE = ", np.mean(percent_errors), "%")