Skip to content

Latest commit

 

History

History
204 lines (168 loc) · 5.66 KB

150525sup.md

File metadata and controls

204 lines (168 loc) · 5.66 KB
p.24-25の補講


<t検定を手作業で計算してみる>

データ
pulse1 <- c(98, 88, 100, 96, 107, 114)
pulse2 <- c(86, 73, 95, 92, 99, 116)

t検定表を使ってP値と自由度dfからt統計量を得る
http://www.biwako.shiga-u.ac.jp/sensei/mnaka/ut/tdistinvtab.html






<t検定のコマンドt.testの中身を確認する>

> help(t.test)  説明表示


> t.test
function (x, ...) 
UseMethod("t.test")
<bytecode: 0x0000000008fb1c70>
<environment: namespace:stats>


> getS3method("t.test","default") t検定関数のコード表示


function (x, y = NULL, alternative = c("two.sided", "less", "greater"), 
    mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95, 
    ...)  デフォールト値が設定されている
{
    alternative <- match.arg(alternative)
    if (!missing(mu) && (length(mu) != 1 || is.na(mu))) 
        stop("'mu' must be a single number")
    if (!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || 
        conf.level < 0 || conf.level > 1)) 
        stop("'conf.level' must be a single number between 0 and 1")
    if (!is.null(y)) {
        dname <- paste(deparse(substitute(x)), "and", deparse(substitute(y)))
        if (paired) 
            xok <- yok <- complete.cases(x, y)
        else {
            yok <- !is.na(y)
            xok <- !is.na(x)
        }
        y <- y[yok]
    }
    else {
        dname <- deparse(substitute(x))
        if (paired) 
            stop("'y' is missing for paired test")
        xok <- !is.na(x)
        yok <- NULL
    }
    x <- x[xok]
    if (paired) {
        x <- x - y
        y <- NULL
    }

    nx <- length(x)
    mx <- mean(x)
    vx <- var(x)

    if (is.null(y)) { データが1つだったら
        if (nx < 2) 
            stop("not enough 'x' observations")
        df <- nx - 1
        stderr <- sqrt(vx/nx)
        if (stderr < 10 * .Machine$double.eps * abs(mx)) 
            stop("data are essentially constant")
        tstat <- (mx - mu)/stderr
        method <- if (paired) 
            "Paired t-test"
        else "One Sample t-test"
        estimate <- setNames(mx, if (paired) 
            "mean of the differences"
        else "mean of x")
    }
    else { データが2つの場合
        ny <- length(y)
        if (nx < 1 || (!var.equal && nx < 2)) 
            stop("not enough 'x' observations")
        if (ny < 1 || (!var.equal && ny < 2)) 
            stop("not enough 'y' observations")
        if (var.equal && nx + ny < 3) 
            stop("not enough observations")

        my <- mean(y)
        vy <- var(y)
        method <- paste(if (!var.equal) 
            "Welch", "Two Sample t-test")
        estimate <- c(mx, my)
        names(estimate) <- c("mean of x", "mean of y")

        if (var.equal) {
            df <- nx + ny - 2
            v <- 0
            if (nx > 1) 
                v <- v + (nx - 1) * vx
            if (ny > 1) 
                v <- v + (ny - 1) * vy
            v <- v/df
            stderr <- sqrt(v * (1/nx + 1/ny))
        }
        else {
            stderrx <- sqrt(vx/nx) 
            stderry <- sqrt(vy/ny) 
            stderr <- sqrt(stderrx^2 + stderry^2)
            df <- stderr^4/(stderrx^4/(nx - 1) + stderry^4/(ny - 
                1))
        }
        if (stderr < 10 * .Machine$double.eps * max(abs(mx), 
            abs(my))) 
            stop("data are essentially constant")
        tstat <- (mx - my - mu)/stderr
    }
    if (alternative == "less") { 
        pval <- pt(tstat, df)  P値計算
        cint <- c(-Inf, tstat + qt(conf.level, df))
    }
    else if (alternative == "greater") {
        pval <- pt(tstat, df, lower.tail = FALSE)
        cint <- c(tstat - qt(conf.level, df), Inf)
    }
    else {
        pval <- 2 * pt(-abs(tstat), df)
        alpha <- 1 - conf.level
        cint <- qt(1 - alpha/2, df)
        cint <- tstat + c(-cint, cint)
    }
    cint <- mu + cint * stderr 信頼区間計算

    names(tstat) <- "t"
    names(df) <- "df"
    names(mu) <- if (paired || !is.null(y)) 
        "difference in means"
    else "mean"
    attr(cint, "conf.level") <- conf.level
    rval <- list(statistic = tstat, parameter = df, p.value = pval, 
        conf.int = cint, estimate = estimate, null.value = mu, 
        alternative = alternative, method = method, data.name = dname)

    class(rval) <- "htest"

    return(rval)
}
<bytecode: 0x00000000091922e8>
<environment: namespace:stats>


Rの結果と比較する

データ
pulse1 <- c(98, 88, 100, 96, 107, 114)
pulse2 <- c(86, 73, 95, 92, 99, 116)

> t.test(pulse1-pulse2)

 One Sample t-test

data:  pulse1 - pulse2
t = 2.8265, df = 5, p-value = 0.03683
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
  0.6338082 13.3661918
sample estimates:
mean of x 
        7 

> tmp <- pulse1-pulse2    2データの差を変数に代入
> tstat <- ( mean(tmp) -0 ) / ( sqrt( var(tmp)/ length(tmp) ) ) t統計量計算
> df = length(tmp)-1  自由度df計算
> df     自由度値の確認
[1] 5
> pval <- 2 * pt(-abs(tstat), df)    P値計算
> pval
[1] 0.03682573   P=0.05より小さいので、2データに有意差あり。


t分布の確認
x <-  seq(-10,10, 0.1)

> x 
  [1] -10.0  -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2
 [10]  -9.1  -9.0  -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3
 [19]  -8.2  -8.1  -8.0  -7.9  -7.8  -7.7  -7.6  -7.5  -7.4
 [28]  -7.3  -7.2  -7.1  -7.0  -6.9  -6.8  -6.7  -6.6  -6.5
 [37]  -6.4  -6.3  -6.2  -6.1  -6.0  -5.9  -5.8  -5.7  -5.6
 [46]  -5.5  -5.4  -5.3  -5.2  -5.1  -5.0  -4.9  -4.8  -4.7
 [55]  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.0  -3.9  -3.8
 [64]  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.0  

> plot(x,dt(x,5))
> par(new=T)
> plot(x,pt(x,5))