# refer to van Buuren S and Groothuis-Oudshoorn K (2011). mice: Multivariate Imputation by Chained Equations in R. Journal of Statistical Software, 45(3), 1-67. http://www.jstatsoft.org/v45/i03/ R version 2.14.1 (2011-12-22) > install.packages("mice") # imputation package > install.packages("VIM") # additional graphics >library(mice) > library(VIM) #number of observations per patterns for all pairs of variables > data(nhanes) > md.pairs(nhanes) $mr > head(nhanes) $rr age bmi hyp chl age bmi hyp chl age bmi hyp chl age 0 0 0 0 1 1 NA NA NA age 25 16 17 15 bmi 9 0 1 2 2 2 22.7 1 187 bmi 16 16 16 13 hyp 8 0 0 1 3 1 NA 1 187 hyp 17 16 17 14 chl 10 3 3 0 4 3 NA NA NA chl 15 13 14 15 5 1 20.4 1 113 $mm 6 3 NA NA 184 $rm age bmi hyp chl > md.pattern(nhanes) #age complete age bmi hyp chl age 0 0 0 0 age hyp bmi chl age 0 9 8 10 bmi 0 9 8 7 13 1 1 1 1 0 bmi 0 0 0 3 hyp 0 8 8 7 1 1 1 0 1 1 hyp 0 1 0 3 chl 0 7 7 10 3 1 1 1 0 1 chl 0 2 1 0 1 1 0 0 1 2 7 1 0 0 0 3 0 8 9 10 27 #multiply imputed data set is stored in the object impdrr of class mids > impdrr = mice(nhanes, seed =23109) # m=5 imps is default iter imp variable 1 1 bmi hyp chl 1 2 bmi hyp chl 1 3 bmi hyp chl .................... 5 3 bmi hyp chl 5 4 bmi hyp chl 5 5 bmi hyp chl #default method, numerical data, predictive mean matching (pmm) #The complete() function extracts the five imputed data sets from the imp object as a long (row-stacked) matrix with 125 records > stripplot(impdrr, pch = 20, cex = 1.2) #The fit object has class mira and contains the results of 5 complete-data analyses > fitdrr = with(impdrr, lm(chl ~ age + bmi)) # pool separate results > round(summary(pool(fitdrr)), 2) # match Stef JSS results est se t df Pr(>|t|) lo 95 hi 95 nmis fmi lambda (Intercept) -34.16 76.07 -0.45 6.81 0.67 -215.05 146.73 NA 0.57 0.47 age 34.33 14.86 2.31 4.04 0.08 -6.76 75.42 0 0.75 0.65 bmi 6.21 2.21 2.81 8.80 0.02 1.20 11.23 9 0.48 0.37 > impdrr2 = mice(nhanes, seed =23009) #try again or set m=50, set.seed > fitdrr2 = with(impdrr2, lm(chl ~ age + bmi)) > round(summary(pool(fitdrr2)), 2) #this is not closer to JSS, diff seed est se t df Pr(>|t|) lo 95 hi 95 nmis fmi lambda (Intercept) -32.13 59.67 -0.54 14.04 0.60 -160.08 95.82 NA 0.29 0.20 age 35.76 10.50 3.41 8.52 0.01 11.80 59.72 0 0.49 0.38 bmi 6.12 1.87 3.28 14.86 0.01 2.14 10.11 9 0.27 0.17 >