?filter
?acf
set.seed(530476)
N = 16
w = rnorm(N)
x = filter(w, filter=rep(1/5,5))
some_ts = cbind(w, x)
some_ts
## Time Series:
## Start = 1
## End = 16
## Frequency = 1
## w x
## 1 1.23631391 NA
## 2 0.78710744 NA
## 3 0.81519587 1.10301804
## 4 1.19440525 0.63245977
## 5 1.48206772 0.32931051
## 6 -1.11647744 0.14128862
## 7 -0.72863888 -0.27947937
## 8 -0.12491356 -0.14966540
## 9 -0.90943470 0.09717911
## 10 2.13113758 0.25192277
## 11 0.11774511 0.47017839
## 12 0.04507944 0.82286839
## 13 0.96636451 0.78492754
## 14 0.85401530 0.86675345
## 15 1.94143334 NA
## 16 0.52687464 NA
plot(some_ts)
x_acf = acf(x, na.action = na.omit)
str(acf(x, na.action = na.pass, plot=F))
## List of 6
## $ acf : num [1:13, 1, 1] 1 0.64 0.265 -0.133 -0.478 ...
## $ type : chr "correlation"
## $ n.used: int 16
## $ lag : num [1:13, 1, 1] 0 1 2 3 4 5 6 7 8 9 ...
## $ series: chr "x"
## $ snames: NULL
## - attr(*, "class")= chr "acf"
str(acf(x, na.action = na.omit, plot=F))
## List of 6
## $ acf : num [1:11, 1, 1] 1 0.64 0.265 -0.133 -0.478 ...
## $ type : chr "correlation"
## $ n.used: int 12
## $ lag : num [1:11, 1, 1] 0 1 2 3 4 5 6 7 8 9 ...
## $ series: chr "x"
## $ snames: NULL
## - attr(*, "class")= chr "acf"
data.frame(lag = x_acf$lag,
acf = x_acf$acf)
## lag acf
## 1 0 1.00000000
## 2 1 0.64044331
## 3 2 0.26515841
## 4 3 -0.13337146
## 5 4 -0.47796442
## 6 5 -0.53302529
## 7 6 -0.43248404
## 8 7 -0.27296786
## 9 8 -0.02072622
## 10 9 0.15040616
## 11 10 0.16646444
By the way, could you tell what is the difference in those calls?
cbind(x, w)data.frame(x, w)ts(data.frame(x, w))Sedimentary deposits from one location in Massachusetts for 634 years, beginning nearly 12,000 years ago.
?astsa::varve
varve_more = cbind(astsa::varve, log(astsa::varve), diff(log(astsa::varve)))
plot(varve_more)
?log
?diff
Lets look at some built-in datasets in R.
library(help = "datasets")
For example, Quarterly Approval Ratings of US Presidents
presidents
## Qtr1 Qtr2 Qtr3 Qtr4
## 1945 NA 87 82 75
## 1946 63 50 43 32
## 1947 35 60 54 55
## 1948 36 39 NA NA
## 1949 69 57 57 51
## 1950 45 37 46 39
## 1951 36 24 32 23
## 1952 25 32 NA 32
## 1953 59 74 75 60
## 1954 71 61 71 57
## 1955 71 68 79 73
## 1956 76 71 67 75
## 1957 79 62 63 57
## 1958 60 49 48 52
## 1959 57 62 61 66
## 1960 71 62 61 57
## 1961 72 83 71 78
## 1962 79 71 62 74
## 1963 76 64 62 57
## 1964 80 73 69 69
## 1965 71 64 69 62
## 1966 63 46 56 44
## 1967 44 52 38 46
## 1968 36 49 35 44
## 1969 59 65 65 56
## 1970 66 53 61 52
## 1971 51 48 54 49
## 1972 49 61 NA NA
## 1973 68 44 40 27
## 1974 28 25 24 24
plot(presidents, main="Quarterly Approval Ratings of US Presidents")
For example, Monthly Airline Passenger Numbers 1949-1960
AirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
plot(cbind(AirPassengers, log10(AirPassengers)),
main="Monthly Airline Passenger Numbers 1949-1960")
AirPassengers_df = data.frame(
p = log10(AirPassengers),
t = time(AirPassengers),
m = as.factor(cycle(AirPassengers))
)
head(AirPassengers_df)
## p t m
## 1 2.049218 1949.000 1
## 2 2.071882 1949.083 2
## 3 2.120574 1949.167 3
## 4 2.110590 1949.250 4
## 5 2.082785 1949.333 5
## 6 2.130334 1949.417 6
makeDF = function(x) {
df = data.frame(
y = log10(x),
t = time(x),
s = as.factor(cycle(x))
)
}
seaLS1 = function(x) {
df = makeDF(x)
lm(y ~ t - 1 + s, data = df)
}
seaLS2 = function(x) {
df = makeDF(x)
lm(y ~ t + s, data = df)
}
seaLS3 = function(x) {
df = makeDF(x)
lm(y ~ t + s, data = df, contrasts = list(s = contr.sum))
}
seaLS4 = function(x) {
df = makeDF(x)
lm(y ~ t, data = df)
}
mod1 = seaLS1(AirPassengers)
mod2 = seaLS2(AirPassengers)
mod3 = seaLS3(AirPassengers)
mod4 = seaLS4(AirPassengers)
Can you tell the difference between seaLS1, seaLS2, seaLS3 and seaLS4? (Try model.matrix(mod1) etc!)
summary(mod1)
##
## Call:
## lm(formula = y ~ t - 1 + s, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.067911 -0.017813 0.001597 0.019139 0.057468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## t 5.247e-02 6.217e-04 84.40 <2e-16 ***
## s1 -1.002e+02 1.215e+00 -82.47 <2e-16 ***
## s2 -1.002e+02 1.215e+00 -82.47 <2e-16 ***
## s3 -1.002e+02 1.215e+00 -82.42 <2e-16 ***
## s4 -1.002e+02 1.215e+00 -82.43 <2e-16 ***
## s5 -1.002e+02 1.215e+00 -82.43 <2e-16 ***
## s6 -1.001e+02 1.215e+00 -82.38 <2e-16 ***
## s7 -1.001e+02 1.216e+00 -82.34 <2e-16 ***
## s8 -1.001e+02 1.216e+00 -82.34 <2e-16 ***
## s9 -1.002e+02 1.216e+00 -82.39 <2e-16 ***
## s10 -1.002e+02 1.216e+00 -82.43 <2e-16 ***
## s11 -1.003e+02 1.216e+00 -82.48 <2e-16 ***
## s12 -1.002e+02 1.216e+00 -82.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02576 on 131 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 9.734e+04 on 13 and 131 DF, p-value: < 2.2e-16
summary(mod2)
##
## Call:
## lm(formula = y ~ t + s, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.067911 -0.017813 0.001597 0.019139 0.057468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002e+02 1.215e+00 -82.467 < 2e-16 ***
## t 5.247e-02 6.217e-04 84.399 < 2e-16 ***
## s2 -9.578e-03 1.051e-02 -0.911 0.36400
## s3 4.698e-02 1.052e-02 4.468 1.69e-05 ***
## s4 3.340e-02 1.052e-02 3.176 0.00186 **
## s5 3.237e-02 1.052e-02 3.078 0.00254 **
## s6 8.542e-02 1.052e-02 8.121 2.98e-13 ***
## s7 1.306e-01 1.052e-02 12.411 < 2e-16 ***
## s8 1.265e-01 1.052e-02 12.026 < 2e-16 ***
## s9 6.371e-02 1.052e-02 6.054 1.39e-08 ***
## s10 3.705e-03 1.052e-02 0.352 0.72537
## s11 -5.871e-02 1.053e-02 -5.577 1.34e-07 ***
## s12 -9.260e-03 1.053e-02 -0.879 0.38082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02576 on 131 degrees of freedom
## Multiple R-squared: 0.9835, Adjusted R-squared: 0.982
## F-statistic: 649.4 on 12 and 131 DF, p-value: < 2.2e-16
summary(mod3)
##
## Call:
## lm(formula = y ~ t + s, data = df, contrasts = list(s = contr.sum))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.067911 -0.017813 0.001597 0.019139 0.057468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002e+02 1.215e+00 -82.419 < 2e-16 ***
## t 5.247e-02 6.217e-04 84.399 < 2e-16 ***
## s1 -3.709e-02 7.124e-03 -5.207 7.25e-07 ***
## s2 -4.667e-02 7.122e-03 -6.553 1.18e-09 ***
## s3 9.887e-03 7.121e-03 1.388 0.167355
## s4 -3.693e-03 7.119e-03 -0.519 0.604821
## s5 -4.724e-03 7.119e-03 -0.664 0.508153
## s6 4.832e-02 7.118e-03 6.789 3.56e-10 ***
## s7 9.347e-02 7.118e-03 13.130 < 2e-16 ***
## s8 8.943e-02 7.119e-03 12.562 < 2e-16 ***
## s9 2.661e-02 7.119e-03 3.738 0.000276 ***
## s10 -3.339e-02 7.121e-03 -4.689 6.81e-06 ***
## s11 -9.580e-02 7.122e-03 -13.451 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02576 on 131 degrees of freedom
## Multiple R-squared: 0.9835, Adjusted R-squared: 0.982
## F-statistic: 649.4 on 12 and 131 DF, p-value: < 2.2e-16
summary(mod4)
##
## Call:
## lm(formula = y ~ t, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.134016 -0.045113 -0.007798 0.042291 0.128280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -99.969307 2.839818 -35.20 <2e-16 ***
## t 0.052367 0.001453 36.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06038 on 142 degrees of freedom
## Multiple R-squared: 0.9015, Adjusted R-squared: 0.9008
## F-statistic: 1300 on 1 and 142 DF, p-value: < 2.2e-16
anova(mod1, mod4)
## Analysis of Variance Table
##
## Model 1: y ~ t - 1 + s
## Model 2: y ~ t
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 131 0.08690
## 2 142 0.51774 -11 -0.43085 59.047 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_two_hists = function(x1, x2) {
colvec = c(rgb(1,0,0,0.5), rgb(0,0,1,0.5))
rangevec = c(min(x1, x2), max(x1, x2))
hist(x1,
breaks=seq(rangevec[1], rangevec[2], length.out=20),
col=colvec[1],
xlim=rangevec,
main="Residual",
xlab="residual"
)
hist(x2,
breaks=seq(rangevec[1], rangevec[2], length.out=20),
col=colvec[2],
xlim=rangevec,
add=T
)
legend("topright",
legend=c(deparse(substitute(x1)), deparse(substitute(x2))),
col=colvec,
pch=15)
}
plot_two_hists(resid(mod1), resid(mod4))
qqnorm(resid(mod1)); qqline(resid(mod1))
qqnorm(resid(mod4)); qqline(resid(mod4))
Look at the example of JohnsonJohnson
?JohnsonJohnson
The help of JohnsonJohnson has the example:
JJ <- log10(JohnsonJohnson)
plot(cbind(x=JohnsonJohnson, logx=JJ),
main="Johnson & Johnson Quarterly Earnings")
fit <- StructTS(JJ, type = "BSM")
tsdiag(fit)
sm <- tsSmooth(fit)
plot(cbind(JJ, sm[, 1], sm[, 3]-0.5), plot.type = "single",
col = c("black", "green", "blue"))
abline(h = -0.5, col = "grey60")
monthplot(fit)