R version 3.0.1 (2013-05-16) -- "Good Sport" # part a. 1x2, 1xK tables > # nightmares Hersen (1971) > # dichotomize (freq, sometimes) vs (seldom, never) > nt = c(115, 237) > prop.table(nt) [1] 0.3267045 0.6732955 > # note for grouped data (sucess, trials) for *.test > binom.test(115, 352) # trials, successes Exact binomial test data: 115 and 352 number of successes = 115, number of trials = 352, p-value = 7.312e-11 alternative hypothesis: true probability of success is not equal to 0.5 95 percent confidence interval: 0.2779287 0.3784258 sample estimates: probability of success 0.3267045 > prop.test(115, 352) # trials, successes 1-sample proportions test with continuity correction data: 115 out of 352, null probability 0.5 X-squared = 41.5938, df = 1, p-value = 1.124e-10 alternative hypothesis: true p is not equal to 0.5 95 percent confidence interval: 0.2784580 0.3788004 sample estimates: p 0.3267045 > chisq.test(nt) Chi-squared test for given probabilities data: nt X-squared = 42.2841, df = 1, p-value = 7.893e-11 > ?chisq.test > chisq.test(nt, simulate.p.value = TRUE, B = 2000) Chi-squared test for given probabilities with simulated p-value (based on 2000 replicates) data: nt X-squared = 42.2841, df = NA, p-value = 0.0004998 > chisq.test(nt, simulate.p.value = TRUE, B = 10000) Chi-squared test for given probabilities with simulated p-value (based on 10000 replicates) data: nt X-squared = 42.2841, df = NA, p-value = 9.999e-05 > data(Titanic) #version in base R; this is grouped data > dim(Titanic) [1] 4 2 2 2 > head(Titanic) [1] 0 0 35 0 0 0 > Titanic , , Age = Child, Survived = No Sex Class Male Female 1st 0 0 2nd 0 0 3rd 35 17 Crew 0 0 , , Age = Adult, Survived = No Sex Class Male Female 1st 118 4 2nd 154 13 3rd 387 89 Crew 670 3 , , Age = Child, Survived = Yes Sex Class Male Female 1st 5 1 2nd 11 13 3rd 13 14 Crew 0 0 , , Age = Adult, Survived = Yes Sex Class Male Female 1st 57 140 2nd 14 80 3rd 75 76 Crew 192 20 > str(Titanic) table [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ... - attr(*, "dimnames")=List of 4 ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew" ..$ Sex : chr [1:2] "Male" "Female" ..$ Age : chr [1:2] "Child" "Adult" ..$ Survived: chr [1:2] "No" "Yes" # Find a raw data version > install.packages("effects") > library(effects) > data(TitanicSurvival) > head(TitanicSurvival) survived sex age passengerClass Allen, Miss. Elisabeth Walton yes female 29.0000 1st Allison, Master. Hudson Trevor yes male 0.9167 1st Allison, Miss. Helen Loraine no female 2.0000 1st Allison, Mr. Hudson Joshua Crei no male 30.0000 1st Allison, Mrs. Hudson J C (Bessi no female 25.0000 1st Anderson, Mr. Harry yes male 48.0000 1st > str(TitanicSurvival) 'data.frame': 1309 obs. of 4 variables: $ survived : Factor w/ 2 levels "no","yes": 2 2 1 1 1 2 2 1 2 1 ... $ sex : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ... $ age : num 29 0.917 2 30 25 ... $ passengerClass: Factor w/ 3 levels "1st","2nd","3rd": 1 1 1 1 1 1 1 1 1 1 ... > # no crew in these data--diff between 2201 and 1309 > tsurv = TitanicSurvival # rename > dim(tsurv) [1] 1309 4 > attach(tsurv) > table(survived) survived no yes 809 500 > #673 crew also died > prop.table(survived) Error in Summary.factor(c(2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, : sum not meaningful for factors > prop.table(table(survived)) survived no yes 0.618029 0.381971 > prop.test(table(survived)) 1-sample proportions test with continuity correction data: table(survived), null probability 0.5 X-squared = 72.4706, df = 1, p-value < 2.2e-16 alternative hypothesis: true p is not equal to 0.5 95 percent confidence interval: 0.5910134 0.6443440 sample estimates: p 0.618029 > ?prop.test starting httpd help server ... done > # continuity correction TRUE by default > prop.test(table(survived), correct = FALSE) 1-sample proportions test without continuity correction data: table(survived), null probability 0.5 X-squared = 72.9419, df = 1, p-value < 2.2e-16 alternative hypothesis: true p is not equal to 0.5 95 percent confidence interval: 0.5913992 0.6439681 sample estimates: p 0.618029 > binom.test(table(survived)) Exact binomial test data: table(survived) number of successes = 809, number of trials = 1309, p-value < 2.2e-16 alternative hypothesis: true probability of success is not equal to 0.5 95 percent confidence interval: 0.5910851 0.6444417 sample estimates: probability of success 0.618029 > #this matters more when away from .5 > detach(tsurv) > 809/1309 [1] 0.618029 > prop.test(809, 1309) 1-sample proportions test with continuity correction data: 809 out of 1309, null probability 0.5 X-squared = 72.4706, df = 1, p-value < 2.2e-16 alternative hypothesis: true p is not equal to 0.5 95 percent confidence interval: 0.5910134 0.6443440 sample estimates: p 0.618029 > binom.test(809, 1309) Exact binomial test data: 809 and 1309 number of successes = 809, number of trials = 1309, p-value < 2.2e-16 alternative hypothesis: true probability of success is not equal to 0.5 95 percent confidence interval: 0.5910851 0.6444417 sample estimates: probability of success 0.618029 > table(sex, passengerClass, survived) , , survived = no passengerClass sex 1st 2nd 3rd female 5 12 110 male 118 146 418 , , survived = yes passengerClass sex 1st 2nd 3rd female 139 94 106 male 61 25 75 #----------------------------------------------------------------------------------- Multinomial Data (1 X C tables) SW exs: snapdragon colors (1x3) p.392; flax seed (1x6) p.395; chi-sq tests by hand Phillips death data exs (see linked readings, overhead ex) > # Categories 6 before to 5 after. count 119 is birth month > deaths = c(90, 100, 87, 96, 101, 86, 119, 118, 121, 114, 113, 106) > k = -6:5 > rbind(k, deaths) k -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 deaths 90 100 87 96 101 86 119 118 121 114 113 106 > sum(deaths) [1] 1251 > chisq.test(deaths) Chi-squared test for given probabilities data: deaths X-squared = 17.1918, df = 11, p-value = 0.1023 > sum((deaths-1251/12)^2/(1251/12)) #sum(obs-exp)^2/exp [1] 17.19185 > qchisq(.95, 11) [1] 19.67514 > #harvestmoon SW text p.398, Phillips ref > #another Phillips data set 348 deaths, (400 notable americans) chi-sq = 22.1 overheads