Skip to content
Snippets Groups Projects
Commit 785c5343 authored by Angie Liu's avatar Angie Liu
Browse files

Upload New File

parent d0870a0e
No related branches found
No related tags found
No related merge requests found
/* ----------------------------------------------------- */
/* AEDE 6120 */
/* Lab 3 */
* Importing data files from the repository;
* Making plots;
* SQL in SAS;
* Descriptive statistics;
* T-test of difference in means;
* Linear regression;
/* ----------------------------------------------------- */
* SP21 AEDE6120 repository;
* 'https://code.osu.edu/liu.6200/sp21-aede6120/-/tree/master/LabData' ;
* 'https://code.osu.edu/liu.6200/sp21-aede6120/-/tree/master/LabCode' ;
/* Importing data files from the repository */
* step1: set a filename for the data file;
filename mydata url "https://code.osu.edu/liu.6200/sp21-aede6120/-/raw/master/LabData/wageEX.csv";
* step2: import the data file;
proc import
file=mydata
out=work.wagedata replace
dbms=csv;
run;
* step3: clear the filename;
filename mydata clear;
/* Making plots */
*scatter plot;
proc sgplot data=sashelp.cars;
scatter x=mpg_city y=horsepower;
run;
*line plot;
proc sgplot data=sashelp.cars;
vline mpg_city / response=horsepower stat=mean;
run;
*line plot for time series;
*step 1: sort the data;
proc sort data=sashelp.stocks out=stocks_sort;
by date;
run;
*step 2: make the plot;
proc sgplot data=stocks_sort (where=(stock='IBM'));
series x=date y=close;
run;
*plot modification;
proc sgplot data=stocks_sort (where=(stock='IBM'));
title "IMB Closing Price 1986-2006";
series x=Date y=Close / markers markerattrs=(color=blue symbol='asterisk')
lineattrs=(color=red)
legendlabel="IBM";
xaxis grid;
yaxis label="Closing Price";
reg y=close x=date;
run;
/* SQL in SAS */
* SELECT - columns (variables) that you want;
* FROM - tables (datasets) that you want;
* ON - join conditions that must be met;
* WHERE - row (observation) conditions that must be met;
* GROUP BY - summarize by these columns;
* HAVING - summary conditions that must be met;
* ORDER BY - sort by these columns;
*example 1;
proc sql;
create table sql_new as
select wage, female,
educ as education,
(wage)**2 as wagesq,
log(wage) as lwage
from wagedata
where female ne 0
order by educ;
run;
* = eq;
* >= ge;
* <= le;
* ~= ne;
*example 2;
proc sql;
create table sql_new as
select female,
count(female) as countn
from wagedata
group by female;
run;
/* Descriptive statistics */
*simple descriptive statistics;
proc means data=wagedata;
run;
*simple descriptive stats by group;
*step 1: sort data by group;
proc sort data=wagedata out=wagedata_sort;
by female;
run;
*step 2: generate descriptive stats;
proc means data=wagedata_sort;
class female;
run;
*more descriptive statistics;
proc univariate data=wagedata_sort;
var wage; *can add more variables;
by female; *optional;
run;
*correlations by group;
*step 1: sort data by group;
proc sort data=wagedata;
by female;
run;
*step 2: generate correlation;
proc corr data=wagedata;
var wage educ exper;
by female;
run;
/* T-test of difference in means */
ods graphics off;
proc ttest data=wagedata;
var wage educ exper;
class female;
run;
/* Linear regression */
ods graphics off;
proc reg data=wagedata;
model wage=educ exper married;
output out=ols residual=resid predicted=pred;
run;
quit;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment