libname rhfsv2 'J:\REH\Surveys\RHFS\2012 RHFS\PUF\V 2.0';run; data rhfs;set rhfsv2.InterviewProps_031214;run; *Step 1: Create a property-level occupancy rate using the trenoc_r and numunits_r variables; data rhfs;set rhfs;ocrate=trenoc_r/numunits_r;run; *Note that a few RHFS cases have far more occupied units than numunits_r.; *Step 2: Create a dataset of individual buildings by size (number of units) for buildings 1 through 19; %macro rr(i=); data tt;set rhfs; count=0; if unitsb1=&i then count=count+1; if unitsb2=&i then count=count+1; if unitsb3=&i then count=count+1; if unitsb4=&i then count=count+1; if unitsb5=&i then count=count+1; if unitsb6=&i then count=count+1; if unitsb7=&i then count=count+1; if unitsb8=&i then count=count+1; if unitsb9=&i then count=count+1; if unitsb10=&i then count=count+1; if unitsb11=&i then count=count+1; if unitsb12=&i then count=count+1; if unitsb13=&i then count=count+1; if unitsb14=&i then count=count+1; if unitsb15=&i then count=count+1; if unitsb16=&i then count=count+1; if unitsb17=&i then count=count+1; if unitsb18=&i then count=count+1; if unitsb19=&i then count=count+1; *if unitsb20pl=&i then count=count+1; bldgsize=&i; bldgs = (weight*count); Units=((weight*count)*&i); OCUnits=((weight*count)*&i*ocrate); keep controlpuf bldgs units OCunits count bldgsize; data jj;set jj tt; data jj;set jj;if count^=.;if count^=0; %mend; %macro jj2(); data jj;format controlpuf $10. bldgs units bldgsize; %do z=1 %to 244; *244 is the highest number of units in any building 1 through 19; %rr(i=&z); %end; run; %mend; %jj2();run; *Step 3: Sum up the number of buildings and units by building size; proc sort data=jj;by bldgsize; proc means sum data=jj noprint; by bldgsize; var bldgs units ocunits; output out=jj3 sum=RHFS_Bldgs1_19 RHFS_Units1_19 RHFS_OcUnits1_19;run; data jj3;set jj3;drop _type_ _freq_;run; *USER NOTE: At this point, the sum of the variable RHFS_Units1_19 in jj3 should equal the sum of RHFS variables unitsb1 through unitsb19; /*proc means sum data=jj3;var RHFS_Units1_19;run;*/ *Step 4: Create a dataset of individual buildings by unit size for THE 20+ BLDG by creating pseudo-building past on distribution of buildings 1 - 19; %macro pseduo(); data pptemp;format bldgsize RHFS_Bldgs20 RHFS_Units20 RHFS_OcUnits20; %do z=1 %to 50; data tt4;set rhfs; if unitsb20pl>0; keep unitsb1-unitsb19 unitsb20pl controlpuf weight ocrate; run; *The model for distributing units into psuedo-buildings; proc sort data=tt4;by controlpuf unitsb20pl;run; proc transpose data=tt4 out=tt5; by controlpuf UNITSB20PL weight ocrate; run; data tt6;set tt5;if col1>0;if _name_ not in ('count4','count3','NUMUNITS_R','FOVER20BLD'); drop _label_ _name_; run; data pp;set tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6 tt6;run; *It is important to note that this algorithm includes a random component. As such, the results from multiple runs of this algorithm will vary.; data pp;set pp;random=RANUNI(-1); proc sort data=pp;by controlpuf random;run; data pp1;set pp;by controlpuf; if first.controlpuf then newtot=UNITSB20PL;newtot+-col1;num=1;run; data pp2;set pp1;if newtot>=0;run; data pp2;set pp2; rename col1=bldgsize;rename num=count;rename weight=bldgs;drop random newtot UNITSB20PL; units=col1*weight; ocunits=col1*weight*ocrate; run; proc sort data=pp2;by bldgsize; proc means sum data=pp2 noprint; by bldgsize; var bldgs units ocunits; output out=pp3 sum=RHFS_Bldgs20 RHFS_Units20 RHFS_OcUnits20; data pp3;set pp3;drop _type_ _freq_; data pptemp;set pptemp pp3;run; %end; %mend; %pseduo();run; *Step 5: Average the number of buildings in each building size across the 50 runs ; proc sort data=pptemp;by bldgsize; proc means mean data=pptemp noprint; by bldgsize; var RHFS_Bldgs20 RHFS_Units20 RHFS_OcUnits20; output out=ppfinal mean=RHFS_Bldgs20 RHFS_Units20 RHFS_OcUnits20;run; data ppfinal;set ppfinal;drop _type_ _freq_;if bldgsize^=.;run; *Step 6: Add together the size groupings; data rhfsbybldg;merge jj3 ppfinal;by bldgsize;run; data rhfsbybldg;set rhfsbybldg; if bldgsize^=1; if RHFS_Bldgs20=. then RHFS_Bldgs20=0; if RHFS_units20=. then RHFS_units20=0; if RHFS_ocunits20 = . then RHFS_ocunits20=0; RHFS_Bldgs = RHFS_BLDGS1_19 + RHFS_Bldgs20; RHFS_Units = RHFS_units1_19 + RHFS_units20; RHFS_OcUnits = RHFS_ocunits1_19 + RHFS_ocunits20; keep bldgsize rhfs_bldgs rhfs_units rhfs_ocunits; run; *Step 7: Create an AHS dataset of MF buildings, by size, for comparision to RHFS; libname ahs2011 'J:\REH\Surveys\AHS\AHS2011\PUF\v1.2';run; data ww;set ahs2011.owner;keep control ownhere;run; data zz; set ahs2011.newhouse; keep control condo nunits wgt90geo tenure istatus proj hudadmin market;run; proc sort data=ww;by control; proc sort data=zz;by control; data zz1;merge zz ww;by control;rename nunits=bldgsize;run; data zz2;set zz1; if condo='3'; if istatus in ('1','2','3'); *all types of interviews; if tenure in ('2','3'); *only the renters, which when combines with above statement amount to only renter-occupied units; if bldgsize>1; *Get rid of single family units; if bldgsize in (2,3,4) and ownhere='1' then delete; *Get rid of small buildings where owner lives on site; if proj^='1'; *Get rid of public housing; if hudadmin^='1'; *Get rid of public housing; run; proc freq data=zz2 noprint; weight wgt90geo; table bldgsize / out=zz3;run; data zz3;set zz3; ahs_bldgs=count/bldgsize; ahs_ocunits=count; keep bldgsize ahs_bldgs ahs_ocunits; run; *Step 8: Merge RHFS and AHS estimate; proc sort data=rhfsbybldg;by bldgsize; proc sort data=zz3;by bldgsize; data COUNTS;merge rhfsbybldg zz3;by bldgsize;run; *Step 9: Summarize by building size group; data counts2;set counts; ngrp=0; if bldgsize<=24 then ngrp=bldgsize; *if nunits >= 10 and nunits <=24 then ngrp=1024; if bldgsize >= 25 and bldgsize <=49 then ngrp=2549; if bldgsize >= 50 and bldgsize <=99 then ngrp=5099; if bldgsize >= 100 then ngrp=10000; run; proc means sum data=counts2 noprint; by ngrp; var rhfs_bldgs rhfs_units rhfs_ocunits ahs_bldgs ahs_ocunits; output out=counts3 sum=rhfs_bldgs rhfs_unit rhfs_ocunits ahs_bldgs ahs_ocunits;run; data counts3;set counts3;drop _type_ _freq_;run; proc export data=counts3 outfile='J:\REH\Surveys\RHFS\2012 RHFS\Documentation\Weighting\totals8.xls' replace;run;