day2_practice3
anscombe
apply(anscombe,2,mean)
apply(anscombe,2,sd)
cor(anscombe)
attach(anscombe)
cal_corr = c(cor(x1,y1),cor(x2,y2),cor(x3,y3),cor(x4,y4))
names(cal_corr) = paste(paste("x",1:4,sep=''),paste("y",1:4,sep=''),sep=' vs. ')
cal_corr
res = list()
res[[1]] = lm(y1~x1)
res[[2]] = lm(y2~x2)
res[[3]] = lm(y3~x3)
res[[4]] = lm(y4~x4)
summ = matrix(0,4,4)
cname = paste(paste("x",1:4,sep=''),paste("y",1:4,sep=''),sep=' vs. ')
colnames(summ) = cname
rownames(summ) = c("estimated beta0", "estmated beta1","estiamted error variance","R-squre")
for(i in 1:4)
{
temp = summary(res[[i]])
summ[1:2,i] = temp[[4]][1:2,1]
summ[3,i] = temp[[6]]
summ[4,i] = temp[[8]]
}
summ
#x11()
#par(mfrow=c(2,2))
for(i in 1:4)
{
plot(anscombe[,i],anscombe[,i+4],pch=16,col=2,main=cname[i],
xlab=paste("x",i,sep=''),
ylab=paste("y",i,sep=''),
xlim=c(4,20),ylim=c(3,13))
abline(summ[1:2,i],col=4,lwd=2)
}
# ggplot2
library(ggplot2)
head(mtcars)
p <- ggplot(mtcars, aes(wt, mpg, color=cyl))
p <- p + geom_point()
p # print(p)
attributes(p)
p$data
p$layers
p$scales
p$mapping
p$theme
p$coordinate
p$facet
p$plot_env
p$labels
summary(p)
p <- ggplot(mtcars, aes(factor(cyl), fill=factor(cyl)))
p <- p + geom_bar(width=.5)
p <- p + facet_grid(. ~ gear)
p
# aes
p <- ggplot(data=iris,aes(x=Sepal.Length, y=Sepal.Width))
p + geom_point(aes(color=Species))
p <- ggplot(data=iris)
p <- p + xlab("Length") + ylab("Width")
p <- p + geom_point(mapping=
aes(x=Sepal.Length, y=Sepal.Width), colour="blue", pch=19)
p <- p + geom_point(mapping=
aes(x=Petal.Length, y=Petal.Width), colour="red", pch=17)
p
summary(p)
p <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p = p + geom_point(colour="orange", size=6)
p
p <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p = p + geom_point(aes(colour=cyl, size=gear))
p
#geom_line
x11()
p <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p + geom_line()
x11()
p2 <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p2 + geom_line(aes(x=wt, y=mpg),linetype=2,size=2,color=2)
# geom_abline
mw_coef = coef(lm(mpg~wt,mtcars))
p <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p <- p + xlim(1, 5) + ylim(10, 35) + geom_point()
p + geom_abline(intercept = mw_coef[1], slope = mw_coef[2],color=4)
p + geom_vline(xintercept = 3,color=2) +
geom_hline(yintercept = 20,color=2)
# geom_abline
p <- ggplot(data=mtcars, aes(x=wt, y=mpg))
p = p + geom_point()
p + geom_smooth()
p + geom_smooth(aes(linetype=factor(cyl)))
# geom_bar
p <- ggplot(data=mtcars, aes(factor(cyl)))
p + geom_bar(fill="steelblue",color='black')
p + geom_bar(aes(fill=factor(gear)),color='black')
# geom_area
huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))
p = ggplot(data=huron, aes(x=year,y=level))
p + geom_area()
p + geom_area(fill='steelblue') +
coord_cartesian(ylim=c(min(huron$level)-2,max(huron$level)+2))
p = ggplot(data=huron,
aes(x=year,y=level,
ymin=huron$level-2,
ymax=huron$level+2))
p + geom_ribbon(fill='steelblue')
# geom_boxplot
p <- ggplot(mtcars, aes(factor(cyl), mpg))
p + geom_boxplot()
p + geom_boxplot(aes(fill=factor(carb)))
# geom_histogram
movies = read.csv(file='movies.csv',header=T)
head(movies)
dim(movies)
p <- ggplot(data=movies, aes(x=rating))
p + geom_histogram()
p + geom_histogram(binwidth=1)
p + geom_histogram(binwidth=1,
aes(y=..density.., fill=..count..)) +
geom_density(color='red') +
scale_fill_gradient(low='white',high='#496ff5')
# geom_density
p <- ggplot(movies, aes(x = rating))
p + geom_density()
p + geom_density(aes(fill=factor(mpaa)), alpha=0.25)
# geom_text
p <- ggplot(mtcars,
aes(x=wt, y=mpg,
label=rownames(mtcars)))
p <- p + geom_point()
p + geom_text(aes(x=wt+0.05,
color=factor(cyl)),size=5)
library(reshape2)
library(mapproj)
library(maps)
crimes <- data.frame(state = tolower(rownames(USArrests)), USArrests)
head(crimes)
states_map <- map_data("state")
head(states_map)
p <- ggplot(crimes, aes(map_id=state))
p <- p + geom_map(aes(fill=Murder), map=states_map)
p <- p + expand_limits(x=states_map$long, y=states_map$lat)
p + coord_map()
#ECDF
df <- data.frame(x = c(rnorm(100, 0, 2),rnorm(100, 1, 4)), g = gl(2, 100))
p <- ggplot(df, aes(x, color = g))
p + stat_ecdf()
p + stat_ecdf(geom="line", size=1)
# stat_function
dat <- data.frame(x = rnorm(100))
p <- ggplot(dat, aes(x = x))
p <- p + geom_density(fill = "green",
alpha = 0.15)
p + stat_function(fun = dnorm,
color = "red", fill="red",
alpha=0.15, geom="area")
# coord_cartesian
p <- ggplot(data=mtcars, aes(x=disp, y=wt))
p <- p + geom_smooth()
p
p + coord_cartesian(xlim=c(325, 500), ylim=c(3,6))
#coord_flip
p <- ggplot(mtcars,
aes(factor(cyl), mpg))
p + geom_boxplot() +
coord_flip()
p <- ggplot(mtcars,
aes(factor(cyl)))
p + geom_bar(fill="steelblue",
color='black') +
coord_flip()
# coord_fixed()
x <- c(500, 350, 700, 600, 400)
y <- c(10, 20, 30, 30, 20)
dat <- data.frame(x, y)
p <- ggplot(data=dat, aes(x=x, y=y))
p <- p + geom_point(size=5)
p
p + coord_fixed(ratio=1)
# coord_map
library(maps)
world <- map_data("world")
head(world)
korea <- world[grep("Korea$", world$region),]
head(korea)
p <- ggplot(korea, aes(x=long, y=lat, group=group))
p <- p + geom_polygon(fill="white", colour="black")
p
p + coord_fixed(ratio=1)
#coord_trans
p <- ggplot(data=diamonds,
aes(x=carat, y=price, colour=factor(cut)))
p <- p + geom_point()
p
p + coord_trans(x = "log10", y = "log10")
# gg map
library(ggmap)
lon = 126.653
lat = 37.45
map = get_googlemap("Incheon",zoom=12,maptype="roadmap",
markers=data.frame(lon,lat))
ggmap(map)
# get_map
food = read.csv(file='food.csv',header=T)
head(food)
loc = geocode("Incheon")
loc = as.numeric(loc)
loc = c(lon,lat)
map = get_map(loc,zoom=13,maptype='satellite')
p = ggmap(map, extent='device') +
geom_point(
aes(x=Lon,y=Lat,size=Rating,fill=Rating),
alpha=0.6,pch=21,data=food) +
scale_size(range=c(0,10)) +
geom_text(aes(x=Lon+0.001,y=Lat+0.001,
label=Name),color="white",size=5,data=food)
p
# Wordcloud in R
#install.packages(c('tm', 'SnowballC', 'wordcloud'))
library(tm)
library(SnowballC)
library(wordcloud)
jeopQ <- read.csv('JEOPARDY_CSV.csv', stringsAsFactors = FALSE)
### Data Cleaning
jeopCorpus <- Corpus(VectorSource(jeopQ$Question))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation) ## . , ? ! ()
jeopCorpus <- tm_map(jeopCorpus, removeWords,
c("the", "this", "The", "This", stopwords('english')))
## stopwords("en")
jeopCorpus <- tm_map(jeopCorpus, stemDocument) ## went -> go
wordcloud(jeopCorpus, max.words = 100, random.order = FALSE)
day2_practice3.txt · Last modified: by 127.0.0.1
