### Examples on fitting models and prediction ### Simulated data: Tot=1000; beta0 = 1; beta1 = 0.01; beta2 = 3; timet = seq(0, 999, by=1); X = arima.sim(Tot, model=list(ar=c(0.8))); lineartrend = beta0 + beta1*timet; seasontrend = beta2*cos(2*pi*10*timet/Tot); trend = beta0 + beta1*timet + beta2*cos(2*pi*10*timet/Tot); par(mfrow=c(3,1)); plot(lineartrend, type="l"); plot(seasontrend, type="l"); plot(trend, type="l"); tsdata = trend + X; par(mfrow=c(1,1)); plot(tsdata); ### Remove the linear trend by fitting a linear model linreg = glm(tsdata ~ timet, family="gaussian"); names(linreg) linfitval = linreg$coef[1] + linreg$coef[2]*timet; resid1 = tsdata - linfitval; par(mfrow=c(2,1)); plot(resid1); acf(resid1, lag.max=600); ### Remove the periodic/seasonality trend by differencing ### or by fitting a sine + cosine curve ### Estimate the periodicity par(mfrow=c(3,1)); plot(resid1); title(paste("TS with linear trend removed")); fftresid1 = fft(resid1); plot(abs(fftresid1)); title(paste("Absolute Fourier coefficients")) plot(abs(fftresid1[2:20])); title(paste("Absolute Fourier coefficients")) ### Fit the sine and cosine on the residuals sinx = sin(2*pi*10*timet/Tot); cosx = cos(2*pi*10*timet/Tot); par(mfrow=c(1,1)); plot(resid1/max(resid1)); lines((1:Tot), sinx, col=2); lines((1:Tot), cosx, col=3); ##### periodreg = glm(resid1 ~ sinx + cosx, family="gaussian"); summary(periodreg) periodfit = periodreg$coef[3]*cosx; resid2 = resid1 - periodfit; par(mfrow=c(3,1)); plot(resid2); title(paste("TS with linear and seasonality removed")) acf(resid2, lag.max=600); acf(resid1, lag.max=600); ### Is there seasonality present in the residuals? par(mfrow=c(2,1)); plot(resid2); title(paste("TS with linear trend and seasonality removed")); fftresid2 = fft(resid2); plot(abs(fftresid2)); title(paste("Absolute Fourier coefficients")) ### Rather than fitting the seasonal trend, estimate the ### seasonal trend by averaging across blocks of one cycle. BlockLen = floor(Tot/10); CycleData = matrix(nrow=BlockLen, ncol=10); for (k in 1:10){ indexstart=(k-1)*BlockLen + 1; indexend = k*BlockLen; CycleData[, k] = resid1[indexstart:indexend]; } ### Then average across all blocks seasonest1 = apply(CycleData, 1, mean); seasonest = rep(seasonest1, 10); resid3 = resid1 - seasonest; par(mfrow=c(2,1)); plot(resid3); title(paste("TS with linear and seasonality removed")) acf(resid3, ### The treshold (blue lines) should be larger in magnitude because ### we actually test simultaneously at MANY lags ### SUMMARY trend fit trendfit = linfitval + seasonest; resid = tsdata - trendfit; par(mfrow=c(3,1)); plot(trendfit); title(paste("Linear + Seasonal trend")); plot(resid); title(paste("TS with trend removed")); acf(resid, lag.max=600); ####################################################################### ####################################################################### ### Fit ARMA Model to the residuals ### Choose the best model by AICC resid=resid2; ### Fit AR(1) p=1; q=0; fit10 = arima(resid, order=c(p,0,q), method="ML") aic10 = -2*fit10$loglik + 2*Tot*(p+q+1)/(Tot - (p+q+2)); ### Fit ARMA(2,1) p=2; q=1; fit21 = arima(resid, order=c(p,0,q), method="ML") aic21 = -2*fit21$loglik + 2*Tot*(p+q+1)/(Tot - (p+q+2)); aicmat = matrix(nrow=2, ncol=2); Tot = length(resid); for (p in (1:2)){ for (q in (1:2)){ fitx = arima(resid, order=c(p,0,q), method="CSS-ML"); aicx = -2*fitx$loglik + 2*Tot*(p+q+1)/(Tot - (p+q+2)); aicmat[p,q] = aicx;}} ### Choose the best model by BIC bicmat = matrix(nrow=2, ncol=2); Tot = length(resid); for (p in (1:2)){ for (q in (1:2)){ fitx = arima(resid, order=c(p,0,q), method="ML"); bicx = (Tot - p - q)*log(Tot*fitx$sigma2/(Tot-p-q)) + Tot*(1 + log(sqrt(2*pi))) + (p+q)*log( ((sum(resid^2)) - Tot*fitx$sigma2)/(p+q) ); bicmat[p,q] = bicx;}} ### Select the best model from the AR family araicmat = c(1:10); arbicmat = c(1:10); q=0; for (p in 1:10){ fitx = arima(resid, order=c(p,0,q), method="CSS-ML"); aicx = -2*fitx$loglik + 2*Tot*(p+q+1)/(Tot - (p+q+2)); bicx = (Tot - p - q)*log(Tot*fitx$sigma2/(Tot-p-q)) + Tot*(1 + log(sqrt(2*pi))) + (p+q)*log( ((sum(resid^2)) - Tot*fitx$sigma2)/(p+q) ); araicmat[p] = aicx; arbicmat[p] = bicx;} cbind(araicmat, arbicmat) ### Choose AR(1) residmodel = arima(resid, order=c(1,0,0), method="ML"); ### Diagnostics on AR(1) ### Do the residuals of the AR(1) model look like ### white noise? par(mfrow=c(2,1)); plot(residmodel$resid); acf(residmodel$resid, lag.max=15); ########################################################################## ########################################################################## #### One-Step ahead forecast for the stochastic part predout = predict(residmodel, n.ahead=1); lowpi = predout$pred - 1.96*predout$se; hipi = predout$pred + 1.96*predout$se; ### Include the trend trend1001 = linreg$coef[1] + linreg$coef[2]*1001 + seasonest1[1]