/*break the data and apply ttest*/
data kaz20;set kaz2;
if engaged=0;
run;
data kaz21;set kaz2;
if engaged=1;
run;
proc sort data=kaz20;by subgroup variable;run;
proc sort data=kaz21;by subgroup variable;run;
proc sql;
create table new as
select
a.subgroup as school_level,
a.Variable as varriable_name,
a.n as engage0_n,
a.Mean as engage0_mean,
a.StdDev as engage0_SD,
a.Min as enagage0_min,
a.Max as enagage0_max,
b.n as engage1_n,
b.Mean as engage1_mean,
b.StdDev as engage1_SD,
b.Min as enagage1_min,
b.Max as enagage1_max
from kaz20 a
join kaz21 b
on a.subgroup=b.subgroup and a.Variable=b.Variable
;
data new2;set new;
/*QC
engage1_n=100;
engage1_mean=.5;
engage1_SD=.2;
engage0_n=120;
engage0_mean=.55;
engage0_SD=.2;
*/
/*t-test*/
difference=engage1_mean-engage0_mean;
/*https://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm*/
POOLED_SE=sqrt( ( (engage1_SD*engage1_SD) / engage1_n ) + ( (engage0_SD*engage0_SD ) / engage0_n ) );
T_value=abs(difference)/POOLED_SE;
P_value=(1-probnorm(T_value))*2;
*if P_value < 0.1 then sig="t";
if P_value < 0.05 then sig="* ";
if P_value < 0.01 then sig="** ";
if P_value < 0.001 then sig="***";
if P_value =. then sig="";
run;