'SAS loop through geocoded dataset to find closest locations from another geocoded dataset
I have 2 lists of locations that I have geocoded with latitude and longitude. I am able to take a single row from a dataset and loop through the other list grabbing the closest 3 locations. I am looking to loop through the entire list and create a new dataset with all of the closest ID's. I received a good amount of help from the SAS forum on geocoding in order to get this code.
/*Facility_ID*/
data _null_;
set LTC_ll(obs=1); \*I want the entire loop, not just first variable*\
call symputx('ll1', catx(',',y,x));
run;
/** lat/long of first location;*/
* extracting lat/long & coalesce ID numbers for identification of pharmacies;
proc sql outobs=10;
create table lat_long as
select Y, X, state, Provider_Pin
from pharm_ll;
*
place number of zip in a macro variable
in this example you know it is 5
but you might not know in another use of the SAS code
;
data _null_;
call symputx('nlls',obs);
stop;
set lat_long nobs=obs;
run;
* create a macro that contains a loop to access Google Maps multiple time;
%macro distance_time;
/* clear the log on each iteration */
dm 'clear log';
* delete any data set named DISTANCE_TIME that might exist in the WORK library;
proc datasets lib=work nolist;
delete distance_time;
quit;
%do j=1 %to &nlls;
data _null_;
nrec = &j;
set lat_long point=nrec;
call symputx('ll2', catx(',',y,x));
stop;
run;
* lat/long of centroid of zip 12203 hard-coded as part of the URL;
filename x url "https://www.google.com/maps/dir/&ll1/&ll2/?force=lite";
filename z temp;
* same technique used in the example with a pair of lat/long coodinates;
data _null_;
infile x recfm=f lrecl=1 end=eof;
file z recfm=f lrecl=1;
input @1 x $char1.;
put @1 x $char1.;
if eof;
call symputx('filesize',_n_);
run;
* drive time as a numeric variable;
data temp;
infile z recfm=f lrecl=&filesize. eof=done;
input @ 'miles' +(-15) @ '"' distance :comma12. text $30.;
units = scan(text,1,'"');
text = scan(text,3,'"');
* convert times to seconds;
select;
* combine days and hours;
when (find(text,'d') ne 0) time = sum(86400*input(scan(text,1,' '),best.),
3600*input(scan(text,3,' '),best.));
* combine hours and minutes;
when (find(text,'h') ne 0) time = sum(3600*input(scan(text,1,' '),best.),
60*input(scan(text,3,' '),best.));
* just minutes;
otherwise time = 60*input(scan(text,1,' '),best.);
end;
output;
keep distance time;
stop;
done:
output;
run;
filename x clear;
filename z clear;
* add an observation to the data set DISTANCE_TIME;
proc append base=distance_time data=temp;
run;
%end;
%mend;
* use the macro;
%distance_time;
*
add variables from original data set to new data set distance_time
use geodist function to calculate straight line distance
;
data distance_time;
set distance_time;
set lat_long point=_n_;
straight_line = round(geodist(&ll1,y,x,'DM'), 0.01);
run;
/*creating variables for the top 3 closest*/
proc sort data=distance_time;
by distance;
run;
data top3;
set distance_time;
keep time distance Provider_Pin;
format time time6.;
if _N_ <=3 then output;
run;
/*formats data for top ID and distance*/
proc summary data=top3 nway;
output out=pharm_form (drop=_:)
idgroup(out[3] (Provider_Pin distance)=)/autoname;
run;
proc print data=pharm_form;
run;
After looping through the first list, I would like to attach a ID from the original dataset to be able to match it back. The output should look something like this:
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|

