 use "C:\Documents and Settings\gillian raab\My Documents\aprojects\peaslaptop\ex6datafiles\data\test2.dta", clear 
/*------------------------------------------------------------------------------
This file has 40 complete 40 with x missing
40 with y missing missingness determined by var1 
-------------------------------------------------------------------------*/
gen  x=invnorm(uniform()) 
gen  y=6*x+ invnorm(uniform())
/*-------------complete data---------------------------------*/
save xys.dta, replace
replace y=.  if var1==2
replace x=. if var1==3
twoway (scatter x y)
save forimp.dta,replace
/*------------------------------ ready to impute-----------------------*/
use forimp.dta
sort x
/*-------sort var1-----------------------------------------------------------------------------
Testing with 50 imputations to see if draw variability differs
 from that for predictive matching
--------------------------------------------------------------------------------------*/
ice2 x y using testimp1, match gen(miss) m(30) cycle(5) replace
use testimp1.dta
graph twoway  (scatter x y if missx==1  & missy==0,msymbol(Oh) )(scatter x y if missx==0  & missy==1,msymbol(Oh))(scatter x y if missx==0  & missy==0)


/*------------------------------------------------------------------------------------
Same thing with ice results same name data saved
--------------------------------------------------------------------------------------*/
use "C:\Documents and Settings\gillian raab\My Documents\aprojects\peaslaptop\ex6datafiles\data\test2.dta", clear
ice x y using testimp1, gen(miss) m(100) cycle(5) replace
use testimp1.dta
twoway  (scatter x y, mlabel(_i) ) if var1>1 
*tabstat x if var1==3, statistics( mean sd ) by(_i) columns(variables)
*tabstat y if var1==2, statistics( mean sd ) by(_i) columns(variables)

/*------------------------------------------------------------------------------------
Now with draw option save to new data set 
This looks OK
--------------------------------------------------------------------------------------*/
use "C:\Documents and Settings\gillian raab\My Documents\aprojects\peaslaptop\ex6datafiles\data\test2.dta", clear
ice x y using testimp2, gen(miss) m(100) cycle(5) replace draw
use testimp2.dta
twoway  (scatter x y, mlabel(_i) ) if var1>1 
tabstat x if var1==3, statistics( mean sd ) by(_i) columns(variables)
tabstat y if var1==2, statistics( mean sd ) by(_i) columns(variables)
/*------------------------------------------------------------------------------------
Now with more cycles and predictive matching to see if this helps 

It does, but it shows that the variation between the predicted values
of the same point depends on the density of the data at that point
This means that there is essentially no variation between imputations
for points in a sparse part of the distribution.
--------------------------------------------------------------------------------------*/
use "C:\Documents and Settings\gillian raab\My Documents\aprojects\peaslaptop\ex6datafiles\data\test2.dta", clear
mvis x y using testimp3, gen(miss) m(100) cycle(100) replace
use testimp3.dta
twoway  (scatter x y, mlabel(_i) ) if var1>1 
tabstat x if var1==3, statistics( mean sd ) by(_i) columns(variables)
tabstat y if var1==2, statistics( mean sd ) by(_i) columns(variables)


