https://data.oecd.org/healthcare/length-of-hospital-stay.htm
WWC version 4
Studying R on datacamp
My memo
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median GDP and median life expectancy per continent in 2007
by_continent_2007 <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarize(medianGdpPercap = median(gdpPercap),
medianLifeExp = median(lifeExp))
# Use a scatter plot to compare the median GDP and median life expectancy
ggplot(by_continent_2007, aes(x = medianGdpPercap, y = medianLifeExp, color = continent)) +
geom_point()
R regression models
year1<-filter(core2,year==1)
linearMod_y1 <- lm(zpost_test ~ treat + pre_test +Male +Grade09+Grade10+Grade11+Grade12+Other+Black+hispanic+Par_BA+par_edu_miss, data=year1)
print(linearMod_y1)
Tire
Water bottle for Dogs
Memo for R from DataCamp
core2 %>%
# filter(year==4) %>%
group_by(year) %>%
summarize(mean_score=mean(post_test), mean_score2=mean(pre_test))
core2 %>%
filter(year==4) %>%
summarize(mean_score=mean(post_test), mean_score2=mean(pre_test))
# Scatter plot comparing pop and lifeExp, with color representing continent
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10()
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Scatter plot comparing pop and gdpPercap, with both axes on a log scale
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
geom_point() +
scale_x_log10() +
scale_y_log10()
#this is a popular data editing package
library(tidyverse)
#this includes datasets
library(gapminder)
#this one includes summarize()
library(FSA)
#The dataset gapminder comes from gapminder package
#lifeExp by subgroup continent
result_mean1<-Summarize(lifeExp ~ continent, data=gapminder)
#lifeExp by subgroup continent and year
result_mean2<-Summarize(lifeExp ~ continent + year, data=gapminder)
#results are in result_mean1 and result_mean2
gapminder %>%
mutate(lifeExp = lifeExp * 12)
library(gapminder)
library(dplyr)
# Filter, mutate, and arrange the gapminder dataset
gapminder %>%
filter(year == 2007) %>%
mutate(lifeExpMonths = 12 * lifeExp) %>%
arrange(desc(lifeExpMonths))
ggplot(abc1,aes(x=GrowthMindset, y=SelfEfficacy))+
geom_point()
#create scales
#problem is that this creates a scale even when there are missing values
wholedata<-transmute(wholedata,flag,dataID,commonID,treat,
GrowthMindset=rowMeans(cbind(q0008_0001, q0008_0002, q0008_0003, q0008_0004, q0008_0005, q0008_0006, q0008_0007, q0008_0008),na.rm=TRUE),
SelfEfficacy=rowMeans(cbind(q0009_0001, q0009_0002, q0009_0003, q0009_0004, q0009_0005),na.rm=TRUE),
MSelfEfficacy=rowMeans(cbind(q0010_0001, q0010_0002, q0010_0003, q0010_0004, q0010_0005, q0010_0006, q0010_0007),na.rm=TRUE),
MathAnxiety=rowMeans(cbind(q0011_0001, q0011_0002, q0011_0003, q0011_0004, q0011_0005, q0011_0006),na.rm=TRUE),
TeacherUse=rowMeans(cbind(q0012_0001, q0012_0002, q0012_0003, q0012_0004, q0013_0001, q0013_0002, q0013_0003, q0013_0004),na.rm=TRUE)
)
R basics
Set two datasets (the two datasets must have identical variables)
total<-rbind(a,b)
Keep the rows I want.
subset(products, profit > 12)
Summarize()
Using R to do regular data things
# SINGLE AGGREGATE
#sapply(abc[c("GrowthMindset", "SelfEfficacy", "MSelfEfficacy","MathAnxiety","TeacherUse")], mean)
#this just gets means
sapply(abc[c("GrowthMindset", "SelfEfficacy", "MSelfEfficacy","MathAnxiety","TeacherUse")], function(x) mean(x, na.rm=TRUE))
#this gives me a matrix of means
aggregate(cbind(GrowthMindset, SelfEfficacy, MSelfEfficacy) ~ treat, abc, function(x) mean(x, na.rm=TRUE))-> result1
#this gets me a full results
aggregate(cbind(GrowthMindset, SelfEfficacy, MSelfEfficacy, MathAnxiety,TeacherUse) ~ treat, abc,
function(x) c(sum=sum(x), mean=mean(x), min=min(x), q1=quantile(x)[2],
median=median(x), q3=quantile(x)[4], max=max(x), sd=sd(x)))