p.24-25の補講
<t検定を手作業で計算してみる>
データ
pulse1 <- c(98, 88, 100, 96, 107, 114)
pulse2 <- c(86, 73, 95, 92, 99, 116)
t検定表を使ってP値と自由度dfからt統計量を得る
http://www.biwako.shiga-u.ac.jp/sensei/mnaka/ut/tdistinvtab.html
<t検定のコマンドt.testの中身を確認する>
> help(t.test) 説明表示
> t.test
function (x, ...)
UseMethod("t.test")
<bytecode: 0x0000000008fb1c70>
<environment: namespace:stats>
> getS3method("t.test","default") t検定関数のコード表示
function (x, y = NULL, alternative = c("two.sided", "less", "greater"),
mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95,
...) デフォールト値が設定されている
{
alternative <- match.arg(alternative)
if (!missing(mu) && (length(mu) != 1 || is.na(mu)))
stop("'mu' must be a single number")
if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) ||
conf.level < 0 || conf.level > 1))
stop("'conf.level' must be a single number between 0 and 1")
if (!is.null(y)) {
dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
if (paired)
xok <- yok <- complete.cases(x, y)
else {
yok <- !is.na(y)
xok <- !is.na(x)
}
y <- y[yok]
}
else {
dname <- deparse(substitute(x))
if (paired)
stop("'y' is missing for paired test")
xok <- !is.na(x)
yok <- NULL
}
x <- x[xok]
if (paired) {
x <- x - y
y <- NULL
}
nx <- length(x)
mx <- mean(x)
vx <- var(x)
if (is.null(y)) { データが1つだったら
if (nx < 2)
stop("not enough 'x' observations")
df <- nx - 1
stderr <- sqrt(vx/nx)
if (stderr < 10 * .Machine$double.eps * abs(mx))
stop("data are essentially constant")
tstat <- (mx - mu)/stderr
method <- if (paired)
"Paired t-test"
else "One Sample t-test"
estimate <- setNames(mx, if (paired)
"mean of the differences"
else "mean of x")
}
else { データが2つの場合
ny <- length(y)
if (nx < 1 || (!var.equal && nx < 2))
stop("not enough 'x' observations")
if (ny < 1 || (!var.equal && ny < 2))
stop("not enough 'y' observations")
if (var.equal && nx + ny < 3)
stop("not enough observations")
my <- mean(y)
vy <- var(y)
method <- paste(if (!var.equal)
"Welch", "Two Sample t-test")
estimate <- c(mx, my)
names(estimate) <- c("mean of x", "mean of y")
if (var.equal) {
df <- nx + ny - 2
v <- 0
if (nx > 1)
v <- v + (nx - 1) * vx
if (ny > 1)
v <- v + (ny - 1) * vy
v <- v/df
stderr <- sqrt(v * (1/nx + 1/ny))
}
else {
stderrx <- sqrt(vx/nx)
stderry <- sqrt(vy/ny)
stderr <- sqrt(stderrx^2 + stderry^2)
df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny -
1))
}
if (stderr < 10 * .Machine$double.eps * max(abs(mx),
abs(my)))
stop("data are essentially constant")
tstat <- (mx - my - mu)/stderr
}
if (alternative == "less") {
pval <- pt(tstat, df) P値計算
cint <- c(-Inf, tstat + qt(conf.level, df))
}
else if (alternative == "greater") {
pval <- pt(tstat, df, lower.tail = FALSE)
cint <- c(tstat - qt(conf.level, df), Inf)
}
else {
pval <- 2 * pt(-abs(tstat), df)
alpha <- 1 - conf.level
cint <- qt(1 - alpha/2, df)
cint <- tstat + c(-cint, cint)
}
cint <- mu + cint * stderr 信頼区間計算
names(tstat) <- "t"
names(df) <- "df"
names(mu) <- if (paired || !is.null(y))
"difference in means"
else "mean"
attr(cint, "conf.level") <- conf.level
rval <- list(statistic = tstat, parameter = df, p.value = pval,
conf.int = cint, estimate = estimate, null.value = mu,
alternative = alternative, method = method, data.name = dname)
class(rval) <- "htest"
return(rval)
}
<bytecode: 0x00000000091922e8>
<environment: namespace:stats>
Rの結果と比較する
データ
pulse1 <- c(98, 88, 100, 96, 107, 114)
pulse2 <- c(86, 73, 95, 92, 99, 116)
> t.test(pulse1-pulse2)
One Sample t-test
data: pulse1 - pulse2
t = 2.8265, df = 5, p-value = 0.03683
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
0.6338082 13.3661918
sample estimates:
mean of x
7
> tmp <- pulse1-pulse2 2データの差を変数に代入
> tstat <- ( mean(tmp) -0 ) / ( sqrt( var(tmp)/ length(tmp) ) ) t統計量計算
> df = length(tmp)-1 自由度df計算
> df 自由度値の確認
[1] 5
> pval <- 2 * pt(-abs(tstat), df) P値計算
> pval
[1] 0.03682573 P=0.05より小さいので、2データに有意差あり。
t分布の確認
x <- seq(-10,10, 0.1)
> x
[1] -10.0 -9.9 -9.8 -9.7 -9.6 -9.5 -9.4 -9.3 -9.2
[10] -9.1 -9.0 -8.9 -8.8 -8.7 -8.6 -8.5 -8.4 -8.3
[19] -8.2 -8.1 -8.0 -7.9 -7.8 -7.7 -7.6 -7.5 -7.4
[28] -7.3 -7.2 -7.1 -7.0 -6.9 -6.8 -6.7 -6.6 -6.5
[37] -6.4 -6.3 -6.2 -6.1 -6.0 -5.9 -5.8 -5.7 -5.6
[46] -5.5 -5.4 -5.3 -5.2 -5.1 -5.0 -4.9 -4.8 -4.7
[55] -4.6 -4.5 -4.4 -4.3 -4.2 -4.1 -4.0 -3.9 -3.8
[64] -3.7 -3.6 -3.5 -3.4 -3.3 -3.2 -3.1 -3.0
> plot(x,dt(x,5))
> par(new=T)
> plot(x,pt(x,5))