R軟件學習筆記-6(各種檢驗)

a<-rnorm(2000,0,1)   ###########產生2000個隨機數
plot(a[order(a)],dnorm(a[order(a)]),type="l",main="抽樣分佈",xlab="統計量",ylab="概率密度")         ######第一個參數是X值,第二個是Y值


###############利用bootstrap模擬獨立樣本均值差的抽樣分佈
par(mfrow=c(2,1),mar=c(4,4,4,4))         #####2*1的佈局,每個佈局的4個邊界均空出4個單位
set.seed(12345)          #####計算機每次產生的隨機數並不是固定的,但是通過set.seed()後獲得的隨機數都是固定的
Pop1<-rnorm(10000,mean=2,sd=2)   ###兩總體方差相等
Pop2<-rnorm(10000,mean=10,sd=2)
Diff<-vector()          #一開始賦值爲空的向量
Sdx1<-vector()
Sdx2<-vector()
for(i in 1:2000){
 x1<-sample(Pop1,size=100,replace=TRUE)            ##########    replace是否放回抽樣,Pop1是要從中抽樣的樣本總體,size表示抽取多少次
 x2<-sample(Pop2,size=120,replace=TRUE)
 Diff<-c(Diff,(mean(x1)-mean(x2)))        //這是一種遞歸的做法,通過這樣把diff填充完整
 Sdx1<-c(Sdx1,sd(x1))
 Sdx2<-c(Sdx2,sd(x2))
}

plot(density(Diff),xlab="mean(x1)-mean(x2)",ylab="Density",main="均值差的抽樣分佈(等方差)",cex.main=0.7,cex.lab=0.7)


points(mean(Diff),sd(Diff),pch=1,col=1)        ##在原來的圖形上添加點,pch: Plot CHaracter,點的形狀,如0爲空心正方形,1爲空心圓,2爲空心三角形

S1<-mean(Sdx1)

S2<-mean(Sdx2)
Sp<-((100-1)*S1^2+(120-1)*S2^2)/(100+120-2)

points((2-10),sqrt(Sp/100+Sp/120),pch=2,col=2)




set.seed(12345)
Pop1<-rnorm(10000,mean=2,sd=2)    ###兩總體方差不等
Pop2<-rnorm(10000,mean=10,sd=4)
Diff<-vector()
Sdx1<-vector()
Sdx2<-vector()
for(i in 1:2000){
 x1<-sample(Pop1,size=100,replace=TRUE)
 x2<-sample(Pop2,size=120,replace=TRUE)
 Diff<-c(Diff,(mean(x1)-mean(x2)))
 Sdx1<-c(Sdx1,sd(x1))
 Sdx2<-c(Sdx2,sd(x2))
 }
plot(density(Diff),xlab="mean(x1)-mean(x2)",ylab="Density",main="均值差的抽樣分佈(不等方差)",cex.main=0.7,cex.lab=0.7)
points(mean(Diff),sd(Diff),pch=1,col=1)
S1<-mean(Sdx1)
S2<-mean(Sdx2)
points((2-10),sqrt(S1^2/100+S2^2/120),pch=2,col=2)



#############獨立樣本均值檢驗示例
Forest<-read.table(file="ForestData.txt",header=TRUE,sep="    ")
Forest$month<-factor(Forest$month,levels=c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"))
Tmp<-subset(Forest,Forest$month=="jan" | Forest$month=="aug")
t.test(temp~month,data=Tmp,paired=FALSE,var.equal=TRUE)
t.test(temp~month,data=Tmp,paired=FALSE,var.equal=FALSE)

################levene's方差齊性檢驗
library("car")
leveneTest(Tmp$temp,Tmp$month, center=mean)


##################利用bootstrap模擬樣本均值的抽樣分佈
set.seed(12345)
Pop<-rnorm(100000,mean=4,sd=2)  #正態總體,均值爲4,標準差爲2
MeanX<-vector()
for(i in 1:2000){
 x<-sample(Pop,size=1000,replace=TRUE)
 MeanX<-c(MeanX,mean(x))
}
plot(density(MeanX),xlab="mean(x)",ylab="Density",main="樣本均值的抽樣分佈",cex.main=0.7,cex.lab=0.7)
points(mean(MeanX),sd(MeanX),pch=1,col=1)
points(4,sqrt(2^2/1000),pch=2,col=2)

##############配對樣本均值檢驗示例
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
ReportCard<-na.omit(ReportCard)
t.test(ReportCard$chi,ReportCard$math,paired=TRUE)

###############單樣本的均值檢驗示例
Diff<-ReportCard$chi-ReportCard$math
t.test(Diff,mu=0)


setwd("C:\\Users\\Administrator\\Desktop\\統計軟件\\數據與程序")
################t檢驗的功效分析
install.packages("pwr")
library("pwr")
pwr.t2n.test(n1=2,n2=184,d=4.8,sig.level=0.05,alternative="two.sided")
pwr.t.test(n=58,sig.level=0.05,power=0.8,type="paired",alternative="two.sided")

##############相關係數檢驗的功效分析
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
Tmp<-ReportCard[complete.cases(ReportCard),]
cor.test(Tmp[,5],Tmp[,7],alternative="two.side",method="pearson")
library("pwr")
pwr.r.test(r=0.75,sig.level=0.05,n=58,alternative="two.sided")

##############列聯表卡方檢驗的功效分析
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
Tmp<-ReportCard[complete.cases(ReportCard),]
(CrossTable<-table(Tmp[,c(2,12)]))
(ResChisq<-chisq.test(CrossTable,correct=FALSE))
library("pwr")
pwr.chisq.test(sig.level=0.05,N=58,power=0.9,df=3)

####################計算效應量
(prob<-matrix(c(0.42,0.28,0.03,0.07,0.10,0.10),nrow=3,ncol=2,byrow=TRUE))
ES.w2(prob)
pwr.chisq.test(w=ES.w2(prob),df=(3-1)*(2-1),sig.level=0.05,power=0.9)


##################獨立樣本的曼-惠特尼U檢驗
Forest<-read.table(file="ForestData.txt",header=TRUE,sep="    ")
Forest$month<-factor(Forest$month,levels=c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"))
Tmp<-subset(Forest,Forest$month=="jan" | Forest$month=="aug")
wilcox.test(temp~month,data=Tmp)
##################獨立樣本的K-S檢驗
x1<-subset(Forest,Forest$month=="jan")
x2<-subset(Forest,Forest$month=="aug")
ks.test(x1$temp,x2$temp)


###############配對樣本的Wilcoxon符號秩檢驗
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
ReportCard<-na.omit(ReportCard)
wilcox.test(ReportCard$chi,ReportCard$math,paired=TRUE)

sum(outer(ReportCard$chi,ReportCard$math,"-")<0)
sum(outer(ReportCard$math,ReportCard$chi,"-")<0)


################置換檢驗
install.packages("coin")
library("coin")

x1<-c(15,13,11,14,12,10)
x2<-c(1,1,1,2,2,2)
x<-data.frame(x1=x1,x2=x2)
oneway_test(x1~as.factor(x2),data=x,distribution="exact")

#############獨立樣本均值的置換檢驗示例
Forest<-read.table(file="ForestData.txt",header=TRUE,sep="    ")
Forest$month<-factor(Forest$month,levels=c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"))
Tmp<-subset(Forest,Forest$month=="jan" | Forest$month=="aug")
t.test(temp~month,data=Tmp,paired=FALSE,var.equal=TRUE)
Tmp$month<-as.vector(Tmp$month)
Tmp$month<-as.factor(Tmp$month)
oneway_test(temp~month,data=Tmp,distribution="exact")
oneway_test(temp~month,data=Tmp,distribution="asymptotic")
oneway_test(temp~month,data=Tmp,distribution=approximate(B=1000))


###############spearman等級相關係數置換檢驗
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
Tmp<-ReportCard[complete.cases(ReportCard),]
cor.test(Tmp[,5],Tmp[,7],alternative="two.side",method="spearman")
set.seed(12345)
spearman_test(math~phy,data=Tmp,distribution=approximate(B=1000))

###############卡方置換檢驗
Tmp<-ReportCard[complete.cases(ReportCard),]
CrossTable<-table(Tmp[,c(2,12)])  #編制性別和平均分等級的列聯表
chisq.test(CrossTable,correct=FALSE)
chisq_test(sex~avScore,data=Tmp,distribution="asymptotic")
set.seed(12345)
chisq_test(sex~avScore,data=Tmp,distribution=approximate(B=1000))

##############配對總體分佈差的置換檢驗
ReportCard<-read.table(file="ReportCard.txt",header=TRUE,sep=" ")
ReportCard<-na.omit(ReportCard)
wilcox.test(ReportCard$chi,ReportCard$math,paired=TRUE)
wilcoxsign_test(chi~math,data=ReportCard,distribution="asymptotic")

###############兩樣本均值差的自舉法檢驗

DiffMean<-function(DataSet,indices){
 ReSample<-DataSet[indices,]
 diff<-tapply(ReSample[,1],INDEX=as.factor(ReSample[,2]),FUN=mean)
 return(diff[1]-diff[2])
}

install.packages("boot")
library("boot")
Forest<-read.table(file="ForestData.txt",header=TRUE,sep="    ")
Forest$month<-factor(Forest$month,levels=c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"))
Tmp<-subset(Forest,Forest$month=="jan" | Forest$month=="aug")
Tmp<-cbind(Tmp$temp,Tmp$month)
set.seed(12345)
BootObject<-boot(data=Tmp,statistic=DiffMean,R=20)
BootObject$t0
mean(BootObject$t,na.rm=TRUE)
print(BootObject)
plot(BootObject)
boot.ci(BootObject,conf=0.95,type=c("norm","perc"))

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章