/* This program implements the Manski IV bound and SV bound estimators (including inference) in BSV (2012) */ /* Date: October 9, 2010 */ /* Authors: Jay Bhattacharya, Azeem Shaikh, and Ed Vytlacil */ /* This version replaces the max verison of the Q(theta) definition with a sum of squares version */ #delimit ; capture clear; capture log close; set mem 50m; set seed 210391; set matsize 500; set more off; capture log close; log using "pqdbounds v3.log", replace; global alpha 0.05; global crit1 = invnorm(1 - $alpha/2); global crit2 = invnorm(1 - $alpha); global instrument weekday2; global numsubs = 1000; global subsampsz = 50; global quietly = ""; *set trace on; set tracesep on; capture program drop pqdbound; program define pqdbound; /* This program calculates the 90th and 95th percentiles of the Q(theta) distribution */; /* for PQD bounds using the subsample draws in sub_sample.dta. */; /* These are the critical values in the test in BSV paper. */; args t; /* The single argument is a number reflecting mortality horizon */; /* e.g. if t=7, then the program uses dth7 */; /* The program assumes that the data set consists of subsample draws from rhc.dta */; /* Calculate pieces of Qb(theta) for each subsample */; gen d_`t' = swang; /* E[D] */; gen dy_`t' = swang*dth`t'; /* E[DY] */; gen notdy_`t' = (1-swang)*dth`t'; /* E[(1-D)Y] */; gen dnoty_`t' = swang*(1-dth`t'); /* E[D(1-Y)] */; gen notdnoty_`t' = (1-swang)*(1-dth`t'); /* E[(1-D)(1-Y)] */; preserve; collapse (count) n_z = dth`t' (sd) sd_d_z = d_`t' sd_y_z = dth`t' sd_dy_z = dy_`t' sd_notdy_z = notdy_`t' sd_dnoty_z = dnoty_`t' sd_notdnoty_z = notdnoty_`t' (mean) e_d_z = d_`t' e_y_z = dth`t' e_dy_z = dy_`t' e_notdy_z = notdy_`t' e_dnoty_z = dnoty_`t' e_notdnoty_z = notdnoty_`t', by(subsampnum $instrument); * codebook subsamp; /* Put the conditional means from each subsample draws on a single row */; $quietly reshape wide n_z sd_d_z sd_y_z sd_dy_z sd_notdy_z sd_dnoty_z sd_notdnoty_z e_d_z e_y_z e_dy_z e_notdy_z e_dnoty_z e_notdnoty_z, i(subsampnum) j($instrument); /* Evaluate Q(theta) at different values of theta for each subsample. */; /* The grid over theta goes from -1 to 1 by 0.02. */; $quietly expand 101; sort subsamp; gen theta = -1.0 + 0.02*(_n-1) - 2.02*(subsampnum-1); $quietly replace theta = 0 if abs(theta) < 1.0e-12; tempfile cond_z; sort subsampnum; save `cond_z'; restore; preserve; collapse (count) n_zd = dth`t' (sd) sd_y_zd = dth`t' (mean) e_y_zd = dth`t', by(subsampnum $instrument d_`t'); * codebook subsamp; /* The first digit represents whether the instrument is 1 or 0; the second digit represents whether swang is 1 or 0 */; gen comb = 10*$instrument + d_`t'; drop $instrument; drop d_`t'; reshape wide n_zd sd_y_zd e_y_zd, i(subsampnum) j(comb); rename n_zd0 n_zd00; rename sd_y_zd0 sd_y_zd00; rename e_y_zd0 e_y_zd00; rename n_zd1 n_zd01; rename sd_y_zd1 sd_y_zd01; rename e_y_zd1 e_y_zd01; replace sd_y_zd01 = 0 if sd_y_zd01 == .; replace sd_y_zd11 = 0 if sd_y_zd11 == .; replace sd_y_zd00 = 0 if sd_y_zd00 == .; replace sd_y_zd10 = 0 if sd_y_zd10 == .; tempfile cond_zd; sort subsampnum; save `cond_zd'; use `cond_z'; merge subsampnum using `cond_zd'; drop _merge; /* Calculate Q(theta) for each subsample using theta and the conditional means. */; pqd_qtheta `t'; $quietly replace q_theta1 = q_theta1* sqrt($subsampsz); $quietly replace q_theta2 = q_theta2* sqrt($subsampsz); /* Calculate the 90th and 95th percentile of the Q(theta) distribution for each theta */; collapse (p90) pqd1bound`t'_90 = q_theta1 pqd2bound`t'_90 = q_theta2 (p95) pqd1bound`t'_95 = q_theta1 pqd2bound`t'_95 = q_theta2, by(theta); gen pqdbound`t'_90 = pqd1bound`t'_90; gen pqdbound`t'_95 = pqd1bound`t'_95; drop pqd1bound`t'_95 pqd2bound`t'_95 pqd1bound`t'_90 pqd2bound`t'_90; /* Save the result and restore the original subsample draws dataset */; sort theta; $quietly save pqdbound`t'_temp.dta, replace; restore; end; capture program drop pqdbound2; program define pqdbound2; /* This program calculates Q(theta) for PQD bounds using the the original dataset rhc.dta */; args t numobs; /* The argument t is a number reflecting mortality horizon */; /* e.g. if t=7, then the program uses dth7 */; /* The argument numobs is the number of observations in the */; /* original data. */; /* Calculate pieces of Qb(theta) for each subsample */; gen d_`t' = swang; /* E[D] */; gen dy_`t' = swang*dth`t'; /* E[DY] */; gen notdy_`t' = (1-swang)*dth`t'; /* E[(1-D)Y] */; gen dnoty_`t' = swang*(1-dth`t'); /* E[D(1-Y)] */; gen notdnoty_`t' = (1-swang)*(1-dth`t'); /* E[(1-D)(1-Y)] */; preserve; collapse (count) n_z = dth`t' (sd) sd_d_z = d_`t' sd_y_z = dth`t' sd_dy_z = dy_`t' sd_notdy_z = notdy_`t' sd_dnoty_z = dnoty_`t' sd_notdnoty_z = notdnoty_`t' (mean) e_d_z = d_`t' e_y_z = dth`t' e_dy_z = dy_`t' e_notdy_z = notdy_`t' e_dnoty_z = dnoty_`t' e_notdnoty_z = notdnoty_`t', by(subsampnum $instrument); /* Put the conditional means from each subsample draws on a single row */; $quietly reshape wide n_z sd_d_z sd_y_z sd_dy_z sd_notdy_z sd_dnoty_z sd_notdnoty_z e_d_z e_y_z e_dy_z e_notdy_z e_dnoty_z e_notdnoty_z, i(subsampnum) j($instrument); tempfile cond_z; replace subsampnum = 0; sort subsampnum; save `cond_z'; restore; preserve; collapse (count) n_zd = dth`t' (sd) sd_y_zd = dth`t' (mean) e_y_zd = dth`t', by(subsampnum $instrument d_`t'); /* The first digit represents whether the instrument is 1 or 0; the second digit represents whether swang is 1 or 0 */; gen comb = 10*$instrument + d_`t'; drop $instrument; drop d_`t'; reshape wide n_zd sd_y_zd e_y_zd, i(subsampnum) j(comb); rename n_zd0 n_zd00; rename sd_y_zd0 sd_y_zd00; rename e_y_zd0 e_y_zd00; rename n_zd1 n_zd01; rename sd_y_zd1 sd_y_zd01; rename e_y_zd1 e_y_zd01; replace sd_y_zd01 = 0 if sd_y_zd01 == .; replace sd_y_zd11 = 0 if sd_y_zd11 == .; replace sd_y_zd00 = 0 if sd_y_zd00 == .; replace sd_y_zd10 = 0 if sd_y_zd10 == .; tempfile cond_dz; replace subsampnum = 0; sort subsampnum; save `cond_dz'; use `cond_z'; $quietly merge subsampnum using `cond_dz'; drop _merge; /* Evaluate Q(theta) at different values of theta for each subsample. */; /* The grid over theta goes from -1 to 1 by 0.02. */; $quietly expand 101; sort subsamp; gen theta = -1.0 + 0.02*(_n-1); $quietly replace theta = 0 if abs(theta) < 1.0e-12; /* Calculate Q(theta) using theta and the conditional means. */; pqd_qtheta `t'; $quietly replace q_theta1 = q_theta1 * sqrt(`numobs'); $quietly replace q_theta2 = q_theta2 * sqrt(`numobs'); $quietly gen q_theta = q_theta1; /* Save the result and restore the original dataset */; sort theta; rename q_theta q_theta`t'; pqd_point `t'; $quietly save pqdbound`t'_rhc.dta, replace; restore; end; capture program drop pqd_qtheta; program define pqd_qtheta; /* This program calculates q_theta for the PQD bounds estimator using the formulas in BSV (2012) */; /* t is the number of days since hospital admission */; args t; $quietly gen a = e_d_z1 - e_d_z0; $quietly gen b = e_y_z1 - e_y_z0; $quietly gen c_tilde = e_dy_z1 - e_notdy_z0 - e_d_z0; $quietly gen c = e_y_zd11 - e_y_zd00; $quietly gen c1_prime = e_y_zd11 - e_y_zd10; $quietly gen c2_prime = e_y_zd01 - e_y_zd10; $quietly gen c3_prime = e_y_zd11 - e_y_zd00; $quietly gen c4_prime = e_y_zd01 - e_y_zd00; $quietly gen c1_dbprime = e_dy_z1 - e_notdy_z1; $quietly gen c2_dbprime = e_dy_z0 - e_notdy_z1; $quietly gen c3_dbprime = e_dy_z1 - e_notdy_z0; $quietly gen c4_dbprime = e_dy_z0 - e_notdy_z0; $quietly gen h1 = e_y_z1 - e_y_z0; $quietly gen h2 = e_y_z1 - e_y_zd00; $quietly gen h3 = e_y_zd11 - e_y_z0; $quietly gen h4 = e_y_zd11 - e_y_zd00; $quietly gen se_a = sqrt( (((sd_d_z1)^2) / n_z1) + (((sd_d_z0)^2) / n_z0) ) ; $quietly gen se_b = sqrt( (((sd_y_z1)^2) / n_z1) + (((sd_y_z0)^2) / n_z0) ) ; $quietly gen se_c_tilde = sqrt( (((sd_dy_z1)^2) / n_z1) + (((sd_notdnoty_z0)^2) / n_z0) ); $quietly gen se_c = sqrt( (((sd_y_zd11)^2) / n_zd11) + (((sd_y_zd00)^2) / n_zd00) ); $quietly gen se_c1_prime = sqrt( (((sd_y_zd11)^2) / n_zd11) + (((sd_y_zd10)^2) / n_zd10) ); $quietly gen se_c2_prime = sqrt( (((sd_y_zd01)^2) / n_zd01) + (((sd_y_zd10)^2) / n_zd10) ); $quietly gen se_c3_prime = sqrt( (((sd_y_zd11)^2) / n_zd11) + (((sd_y_zd00)^2) / n_zd00) ); $quietly gen se_c4_prime = sqrt( (((sd_y_zd01)^2) / n_zd01) + (((sd_y_zd00)^2) / n_zd00) ); $quietly gen se_c1_dbprime = sqrt(( 4*(sd_dy_z1)^2 + (sd_y_z1)^2 - 4*e_dy_z1*(1-e_y_z1) ) / n_z1); $quietly gen se_c2_dbprime = sqrt( (((sd_dy_z0)^2) / n_z0) + (((sd_notdy_z1)^2) / n_z1) ); $quietly gen se_c3_dbprime = sqrt( (((sd_dy_z1)^2) / n_z1) + (((sd_notdy_z0)^2) / n_z0) ); $quietly gen se_c4_dbprime = sqrt(( 4*(sd_dy_z0)^2 + (sd_y_z0)^2 - 4*e_dy_z0*(1-e_y_z0) ) / n_z0); $quietly gen se_h1 = sqrt( (((sd_y_z1)^2) / n_z1) + (((sd_y_z0)^2) / n_z0) ); $quietly gen se_h2 = sqrt( (((sd_y_z1)^2) / n_z1) + (((sd_y_zd00)^2) / n_zd00) ); $quietly gen se_h3 = sqrt( (((sd_y_zd11)^2) / n_zd11) + (((sd_y_z0)^2) / n_z0) ); $quietly gen se_h4 = sqrt( (((sd_y_zd11)^2) / n_zd11) + (((sd_y_zd00)^2) / n_zd00) ); * summ sd_y_zd10 sd_y_zd01 sd_y_zd00 n_zd10 n_zd01 n_zd00; * summ se_* c*_* h*; $quietly gen q_theta1 = max(-b/se_b, 0)^2 + max((b - theta)/se_b, 0)^2 + max((theta - c)/se_c, 0)^2 if theta > 0; $quietly replace q_theta1 = max( b/se_b, (c_tilde - theta)/se_c_tilde, (theta - h1)/se_h1, (theta - h2)/se_h2, (theta - h3)/se_h3, (theta - h4)/se_h4, 0 ) if theta < 0; $quietly replace q_theta1 = abs(b)/se_b if theta == 0; $quietly gen q_theta2 = max( abs(a)/se_a, (c1_dbprime - theta)/se_c1_dbprime, (c2_dbprime - theta)/se_c2_dbprime, (c3_dbprime - theta)/se_c3_dbprime, (c4_dbprime - theta)/se_c4_dbprime, (theta - c1_prime)/se_c1_prime, (theta - c2_prime)/se_c2_prime, (theta - c3_prime)/se_c3_prime, (theta - c4_prime)/se_c4_prime); end; capture program drop pqd_point; program define pqd_point; /* This program calculates the PQD bounds "point" estimator (no inference) */; /* t is the number of days since hospital admission */; args t; gen bl_pqd`t' = b if b>=0; gen bu_pqd`t' = c3_prime if b>=0; replace bl_pqd`t' = c_tilde if b<0; replace bu_pqd`t' = e_dy_z1 + (1-e_d_z1)*min(e_y_zd11,e_y_zd10) - e_notdy_z0 - e_d_z0*max(e_y_zd01,e_y_zd00) if b<0; end; capture program drop calc_bounds; program define calc_bounds; /* This program runs the pqdbound program for a given group of patients */; args grp; /* grp is the subset of data to be analyzed (all, cat11, cat13, cat18, cat19) */; use "sub_sample_`grp'.dta"; /* Calculate Q(theta) thresholds for the Manski bound for mortality at varying horizons */; pqdbound 7; /* For dth7 */; pqdbound 30; /* For dth30 */; pqdbound 60; /* For dth60 */; pqdbound 90; /* For dth90 */; pqdbound 180; /* For dth180 */; /* Merge all the horizons in to a single dataset */; /* Each row has thresholds for a different value of theta */; use pqdbound7_temp, clear; foreach i of numlist 30 60 90 180 {; $quietly merge theta using pqdbound`i'_temp; $quietly summ _merge; if ((r(mean) != 3)|(r(sd) !=0)) {; display "Merge with pqdbound`i'_temp.dta for `grp' failed" as error; exit; }; drop _merge; sort theta; }; save pqdbound_thresholds_`grp', replace; !del pqdbound7_temp.dta !del pqdbound30_temp.dta !del pqdbound60_temp.dta !del pqdbound90_temp.dta !del pqdbound180_temp.dta /* Calculate Q(theta) for the original data */; use "limited_data_`grp'.dta"; pqdbound2 7 _N; pqdbound2 30 _N; pqdbound2 60 _N; pqdbound2 90 _N; pqdbound2 180 _N; /* Merge all the horizons in to a single dataset */; /* Each row has q_theta for a different value of theta */; use pqdbound7_rhc, clear; foreach i of numlist 30 60 90 180 {; $quietly merge theta using pqdbound`i'_rhc; $quietly summ _merge; if ((r(mean) != 3)|(r(sd) !=0)) {; display "Merge with pqdbound`i'_rhc.dta for `grp' failed" as error; exit; }; drop _merge; sort theta; }; $quietly save pqdbound_qtheta_`grp'.dta, replace; !del pqdbound7_rhc.dta !del pqdbound30_rhc.dta !del pqdbound60_rhc.dta !del pqdbound90_rhc.dta !del pqdbound180_rhc.dta /* Merge with threshold file */; $quietly merge theta using pqdbound_thresholds_`grp'.dta; $quietly summ _merge; if ((r(mean) != 3)|(r(sd) !=0)) {; display "Merge with pqdbound_thresholds_`grp'.dta failed" as error; exit; }; drop _merge; * set more on; * list theta q_theta* *95; * set more off; /* Characterize the confidence interval */; foreach i of numlist 7 30 60 90 180 {; $quietly gen in`i'_90 = q_theta`i' <= pqdbound`i'_90; $quietly gen in`i'_95 = q_theta`i' <= pqdbound`i'_95; * list theta q_theta`i' in`i'_90 in`i'_95 pqdbound`i'_90 pqdbound`i'_95; gen in_pqd`i' = (bu_pqd`i' >= theta) * (bl_pqd`i' <= theta); replace in`i'_90 = 1 if in_pqd`i' == 1; replace in`i'_95 = 1 if in_pqd`i' == 1; gen t_in`i'_90 = theta*in`i'_90; replace t_in`i'_90 = . if t_in`i'_90==0; gen t_in`i'_95 = theta*in`i'_95; replace t_in`i'_95 = . if t_in`i'_95==0; list theta q_theta`i' in`i'_90 in`i'_95 in_pqd`i'; }; foreach i of numlist 7 30 60 90 180 {; egen lb`i'_90 = min(t_in`i'_90); egen ub`i'_90 = max(t_in`i'_90); egen lb`i'_95 = min(t_in`i'_95); egen ub`i'_95 = max(t_in`i'_95); }; save pqdbound_raw_conf_`grp'.dta, replace; collapse (max) lb7_90 lb30_90 lb60_90 lb90_90 lb180_90 lb7_95 lb30_95 lb60_95 lb90_95 lb180_95 (min) ub7_90 ub30_90 ub60_90 ub90_90 ub180_90 ub7_95 ub30_95 ub60_95 ub90_95 ub180_95; /* Put 90% and 95% confidence bounds into different rows */; $quietly gen id = 1; $quietly reshape long lb7_ lb30_ lb60_ lb90_ lb180_ ub7_ ub30_ ub60_ ub90_ ub180_, i(id) j(size); foreach i of numlist 7 30 60 90 180 {; rename lb`i'_ `grp'_lb_conf`i'; rename ub`i'_ `grp'_ub_conf`i'; }; drop id; $quietly reshape long `grp'_lb_conf `grp'_ub_conf, i(size) j(period); $quietly gen estimator = "pqdbound"; $quietly replace `grp'_ub_conf = 1 if `grp'_ub_conf == .; $quietly replace `grp'_lb_conf = -1 if `grp'_lb_conf == .; !del pqdbound_qtheta_`grp'.dta !del pqdbound_thresholds_`grp'.dta /* Save the final file */; sort size period; save pqdbound_conf_`grp'.dta, replace; list; end; /* ******************************************************************************** */; /* Calculate PQD bound confidence intervals */; calc_bounds all; foreach i of numlist 11/15 17/19 {; calc_bounds cat`i'; }; use pqdbound_conf_all.dta; foreach i of numlist 11/15 17/19 {; sort size period; merge size period using pqdbound_conf_cat`i'.dta; $quietly summ _merge; if ((r(mean) != 3)|(r(sd) !=0)) {; display "Merge with pqdbound_conf_cat`i'.dta failed" as error; exit; }; drop _merge; }; save pqdbound_conf.dta, replace; !del pqdbound_conf_all.dta !del pqdbound_conf_cat11.dta !del pqdbound_conf_cat12.dta !del pqdbound_conf_cat13.dta !del pqdbound_conf_cat14.dta !del pqdbound_conf_cat15.dta !del pqdbound_conf_cat17.dta !del pqdbound_conf_cat18.dta !del pqdbound_conf_cat19.dta clear; use pqdbound_raw_conf_all.dta; keep theta in*; gen str group = "all"; save pqdbound_raw_conf_all.dta, replace; foreach i of numlist 11/15 17/19 {; use pqdbound_raw_conf_cat`i'; keep theta in*; gen str group = "cat`i'"; save pqdbound_raw_conf_cat`i', replace; }; clear; use pqdbound_raw_conf_all.dta; foreach i of numlist 11/15 17/19 {; append using pqdbound_raw_conf_cat`i'; }; foreach i of numlist 7 30 60 90 180 {; foreach j of numlist 90 95 {; replace in`i'_`j' = . if in`i'_`j' == 0; if (`i' != `j') {; rename in`i'_`j' in`j'_`i'; }; }; }; reshape long in95_ in90_ in_pqd, i(theta group) j(days); rename in95_ in95; rename in90_ in90; replace in95 = theta if in95 == 1; replace in90 = theta if in90 == 1; replace in_pqd = theta if in_pqd == 1; replace in_pqd = . if in_pqd == 0; rename in95 in_pqd95; rename in90 in_pqd90; save pqdbound_raw_conf.dta, replace; !del pqdbound_raw_conf_all.dta !del pqdbound_raw_conf_cat11.dta !del pqdbound_raw_conf_cat12.dta !del pqdbound_raw_conf_cat13.dta !del pqdbound_raw_conf_cat14.dta !del pqdbound_raw_conf_cat15.dta !del pqdbound_raw_conf_cat17.dta !del pqdbound_raw_conf_cat18.dta !del pqdbound_raw_conf_cat19.dta /* Done! */; set more on;