Wednesday 28 March 2018

xpt to sas

%macro xpttosas;
filename a "C:\Users\mypc\Desktop\sdata";
%let open=%sysfunc(dopen(a));
%put &open;
%let cl=%sysfunc(dnum(&open.));
%put &cl;
%let x=C:\Users\mypc\Desktop\sdata;
%let y=C:\Users\mypc\Desktop\sasd;
libname y1 "&y";
%do i=1 %to &cl;
%let dr=%scan(%sysfunc(dread(&open.,&i.)),1,'.');
libname xpts xport "&x\&dr..xpt";
proc copy in=xpts out=y1;
select &dr.;
run;
%end;
%mend xpttosas;

sas to xpt

%macro dd (s1,s2,s3,s4,s5);
%do i=1 %to 5;
data &&s&i;
set sashelp.class;
run;
%end;
%MEND;
%DD (dm,cm,ae,ce,sv);
%macro sasxpt;
proc sql;
select left(put(COUNT (memname),2.) )into :cnt from dictionary.tables where libname="WORK"   AND
MEMTYPE="DATA";
select memname into :x1-:x&cnt from dictionary.tables where libname="WORK"   AND
MEMTYPE="DATA";
QUIT;
%do i=1 %to &cnt;
proc cport data=&&x&i  file="C:\Users\hcl\Desktop\ra\&&x&i...xpt";
run;
%end;
%mend;
options symbolgen;
%sasxpt

%put &x5 ;

Thursday 22 February 2018

procedure :means freq print univariate Tabulate sort summary

PRINT: Syntax 
   PROC PRINT DATA= SAS-data-set
              DOUBLE
              NOOBS
              UNIFORM
              LABEL
              SPLIT= 'split-character'
              N
              ROUND
              HEADING= direction
              ROWS= page-format
              WIDTH= column-width;
      VAR variable-list;
      ID variable-list;
      BY variable-list;
           PAGEBY BY-variable;
           SUMBY BY-variable;
      SUM variable-list;

SORT: Syntax 
   PROC SORT DATA= SAS-data-set
             OUT= SAS-data-set
             NODUPLICATES|NODUPREC|NODUP
             NODUPKEY
             EQUALS|NOEQUALS
             REVERSE
             FORCE
             ASCII
             EBCDIC
             DANISH|NORWEGIAN
             FINNISH|SWEDISH
             NATIONAL
             SORTSEQ= collating-sequence
             SORTSIZE= memory-specification
             TAGSORT
             Host Options;
      BY <DESCENDING> variable ... ;     /* required */
UNIVARIATE: Syntax PROC UNIVARIATE DATA= SASdataset NOPRINT PLOT FREQ NORMAL PCTLDEF= value VARDEF= DF|WEIGHT|WGT|N|WDF ROUND= roundoff unit...; VAR variables; BY variables; FREQ variable; WEIGHT variable; ID variables; OUTPUT OUT= SASdataset keyword= names...; MEANS: Syntax PROC MEANS <option-list> <statistic-keyword-list>; VAR variable-list; BY variable-list; CLASS variable-list; FREQ variable; WEIGHT variable; ID variable-list; OUTPUT <OUT= SAS-data-set> <output-statistic-list> <MINID|MAXID <(var-1<(id-list-1)> <...var-n<(id-list-n)>>)>=name-list>;


Proc Freq: Syntax ;
The FREQ Procedure produces one-way to n-way frequency 

PROC FREQ < Options > ;
TABLES Requests < / Options > ;
BY Variables ;
WEIGHT Variable < / Options> ;
TEST Options ;
EXACT Statistic-Options < / computation-options > ;
OUTPUT < OUT=SAS-data-set > Options ;
RUN;

Proc Freq Data = Events;
Tables TRT*AE / Chisq;
Weight CNT;
Output Out=Chs_sta Chisq;
Run;


Proc SUMMARY and Proc MEANS are essentially the same procedure. Both procedures compute descriptive statistics. 
The main difference concerns the default type of output they produce. 
Proc MEANS by default produces printed output in the LISTING window or other open destination whereas Proc SUMMARY does not. 
Inclusion of the print option on the Proc SUMMARY statement will output results to the output window.

proc means data = sashelp.shoes;
run;

proc summary data = sashelp.shoes print; 
run;

PROC SUMMARY: syntax:

PROC SUMMARY <option>;
VAR variable(s)</options>;
BY variable;
CLASS variable(s) </ option(s)>;
FREQ variable;
ID variable(s);
OUTPUT <OUT=SAS-data-set><output-statistic-specification(s)>
<id-group-specification(s)> <maximum-id-specification(s)> 
<minimum-id-specification(s)></ option(s)> ;
TYPES request(s);
WAYS list;
WEIGHT variable;
run;

STATEMENT
BY-Calculate separate statistics for each BY group
CLASS-Identify variables whose values define subgroups for the analysis
FREQ-Identify a variable whose values represent the frequency of each observation
ID-Include additional identification variables in the output data set
OUTPUT-Create an output data set that contains specified statistics and identification variables
TYPES-Identify specific combinations of class variables to use to subdivide the data
VAR-Identify the analysis variables and their order in the results
WAYS-Specify the number of ways to make unique combinations of class variables
WEIGHT-Identify a variable whose values weight each observation in the statistical calculations.


Proc Tabulate :Syntax:- 
Proc Tabulate<options>;
Class variable list; 
Var analysis variable list; 
By variable list; 
Table table-specifications/<table options>; 
Classlev variable<s>/<options>; 
run;


Wednesday 21 February 2018

AVERAGE

proc sort data=sashelp.class out=avgd;
by sex;
run;

proc sql;
create table avg  as select avg(age)as avrg, sex from sashelp.class
group by sex;
quit;
data bb;
SET AVGD avg;
BY SEX;
if missing (age)THEN DO
SEX= ""  ;
age=avrg;
END;
DROP AVRG;
run;


average

data base;
input id visit$ aval;
cards;
101 scr  20
101 base 25
101 v1   25
101 v2   26
102 scr  25
102 base 30
102 v1   25
102 v2   30
103 scr  23
103 base 33
;
run;

create seq
data gg;
set base;
by id;
if first.id=1 then seq=1;
else seq+1;
run;


data ds avg1;
set gg;
by id;
if seq > 2 then output avg1;
else output ds;
run;

proc sql;
create table dos as select avg(aval) as avg,id from avg1
group by id;
quit;
data final;
set ds avg1 dos;
by id;
if missing (aval) then aval=avg;
/*drop seq avg;*/
run; 

Tuesday 20 February 2018

concept of LOCF WOCF BOCF MIN MAX

data HAVE;
input SUBJID  VISITN   AVAL;
cards;
001  1  26
001  2  41         
001  3  35
002  1  12
002  2  21
003  1  22
;
run;

/*locf*/
data want;
set have;
by SUBJID;
output;
if last.SUBJID;
DTYPE="LOCF";
if VISITN<5
then do VISITN = VISITN+1 to 5;
output;
end;
run;
/*wocf*/
proc sort data=have out=ds;
by subjid aval;
run;

data wocf;
set ds;
by subjid;
output;
if last.subjid=1;
dtype="wocf";
if visitn <5 then  do
visitn=visitn+1 to 5;
output;
end;
run;
proc sort data=wocf;
by subjid visitn aval;
run;

/*bocl*/
data base;
input id visit$ aval;
cards;
101 scr  20
101 base 25
101 v1   25
101 v2   26
102 scr  25
102 base 30
102 v1   25
102 v2   30
103 scr  23
103 base 33
;
run;

proc sort data=base out=base2;
by id decending visit ;
run;

data bocf;
set base2;
by id ;
if first.id=1 then seq=1;
else seq+1;
output;
if last.id=1;
dtype="bocf";
if seq <5 then do
seq=seq+1 to 5;
output;
end;
run;

proc sort data=bocf out=bocf1;
by id aval;
run;

data avg base1 post;
set base;
by id ;
if first.id=1 then seq=1;
else seq+1;
if  seq > 2 then output post;
else output base1;
run;
data ret;
set post;
by id;
if first.id=1 then x=aval;
else x+aval;
run;

Monday 19 February 2018

calculation of LOCF

data HAVE;
input SUBJID  VISITN   AVAL;
cards;
001  1  26
001  2  41         
001  3  35
002  1  12
002  2  21
003 1   22
;
run;
data want;
set have;
by SUBJID;
output;
if last.SUBJID;
DTYPE="LOCF";
if VISITN<5
then do VISITN = VISITN+1 to 5;
output;
end;
run;

Sunday 18 February 2018

highest nth value in proc sql

data have;
input EMPNO Salary empcode $;
cards;
111 4000 A
112 6000 A
114 2000 A
115 8000 A
223 2000 B
226 1000 B
228 3000 B
300 500 C
333 700 C
345 300 C
356 200 C
320 700 C
;
run;
proc sql;
create table want as
 select *,
  (select count(*) from 
    (select distinct Salary,empcode from have) as b 
   where b.empcode=a.empcode and b.Salary gt a.Salary) as n
  from have as a
   where calculated n=2;
quit;