anscombe apply(anscombe,2,mean) apply(anscombe,2,sd) cor(anscombe) attach(anscombe) cal_corr = c(cor(x1,y1),cor(x2,y2),cor(x3,y3),cor(x4,y4)) names(cal_corr) = paste(paste("x",1:4,sep=''),paste("y",1:4,sep=''),sep=' vs. ') cal_corr res = list() res[[1]] = lm(y1~x1) res[[2]] = lm(y2~x2) res[[3]] = lm(y3~x3) res[[4]] = lm(y4~x4) summ = matrix(0,4,4) cname = paste(paste("x",1:4,sep=''),paste("y",1:4,sep=''),sep=' vs. ') colnames(summ) = cname rownames(summ) = c("estimated beta0", "estmated beta1","estiamted error variance","R-squre") for(i in 1:4) { temp = summary(res[[i]]) summ[1:2,i] = temp[[4]][1:2,1] summ[3,i] = temp[[6]] summ[4,i] = temp[[8]] } summ #x11() #par(mfrow=c(2,2)) for(i in 1:4) { plot(anscombe[,i],anscombe[,i+4],pch=16,col=2,main=cname[i], xlab=paste("x",i,sep=''), ylab=paste("y",i,sep=''), xlim=c(4,20),ylim=c(3,13)) abline(summ[1:2,i],col=4,lwd=2) } # ggplot2 library(ggplot2) head(mtcars) p <- ggplot(mtcars, aes(wt, mpg, color=cyl)) p <- p + geom_point() p # print(p) attributes(p) p$data p$layers p$scales p$mapping p$theme p$coordinate p$facet p$plot_env p$labels summary(p) p <- ggplot(mtcars, aes(factor(cyl), fill=factor(cyl))) p <- p + geom_bar(width=.5) p <- p + facet_grid(. ~ gear) p # aes p <- ggplot(data=iris,aes(x=Sepal.Length, y=Sepal.Width)) p + geom_point(aes(color=Species)) p <- ggplot(data=iris) p <- p + xlab("Length") + ylab("Width") p <- p + geom_point(mapping= aes(x=Sepal.Length, y=Sepal.Width), colour="blue", pch=19) p <- p + geom_point(mapping= aes(x=Petal.Length, y=Petal.Width), colour="red", pch=17) p summary(p) p <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p = p + geom_point(colour="orange", size=6) p p <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p = p + geom_point(aes(colour=cyl, size=gear)) p #geom_line x11() p <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p + geom_line() x11() p2 <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p2 + geom_line(aes(x=wt, y=mpg),linetype=2,size=2,color=2) # geom_abline mw_coef = coef(lm(mpg~wt,mtcars)) p <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p <- p + xlim(1, 5) + ylim(10, 35) + geom_point() p + geom_abline(intercept = mw_coef[1], slope = mw_coef[2],color=4) p + geom_vline(xintercept = 3,color=2) + geom_hline(yintercept = 20,color=2) # geom_abline p <- ggplot(data=mtcars, aes(x=wt, y=mpg)) p = p + geom_point() p + geom_smooth() p + geom_smooth(aes(linetype=factor(cyl))) # geom_bar p <- ggplot(data=mtcars, aes(factor(cyl))) p + geom_bar(fill="steelblue",color='black') p + geom_bar(aes(fill=factor(gear)),color='black') # geom_area huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron)) p = ggplot(data=huron, aes(x=year,y=level)) p + geom_area() p + geom_area(fill='steelblue') + coord_cartesian(ylim=c(min(huron$level)-2,max(huron$level)+2)) p = ggplot(data=huron, aes(x=year,y=level, ymin=huron$level-2, ymax=huron$level+2)) p + geom_ribbon(fill='steelblue') # geom_boxplot p <- ggplot(mtcars, aes(factor(cyl), mpg)) p + geom_boxplot() p + geom_boxplot(aes(fill=factor(carb))) # geom_histogram movies = read.csv(file='movies.csv',header=T) head(movies) dim(movies) p <- ggplot(data=movies, aes(x=rating)) p + geom_histogram() p + geom_histogram(binwidth=1) p + geom_histogram(binwidth=1, aes(y=..density.., fill=..count..)) + geom_density(color='red') + scale_fill_gradient(low='white',high='#496ff5') # geom_density p <- ggplot(movies, aes(x = rating)) p + geom_density() p + geom_density(aes(fill=factor(mpaa)), alpha=0.25) # geom_text p <- ggplot(mtcars, aes(x=wt, y=mpg, label=rownames(mtcars))) p <- p + geom_point() p + geom_text(aes(x=wt+0.05, color=factor(cyl)),size=5) library(reshape2) library(mapproj) library(maps) crimes <- data.frame(state = tolower(rownames(USArrests)), USArrests) head(crimes) states_map <- map_data("state") head(states_map) p <- ggplot(crimes, aes(map_id=state)) p <- p + geom_map(aes(fill=Murder), map=states_map) p <- p + expand_limits(x=states_map$long, y=states_map$lat) p + coord_map() #ECDF df <- data.frame(x = c(rnorm(100, 0, 2),rnorm(100, 1, 4)), g = gl(2, 100)) p <- ggplot(df, aes(x, color = g)) p + stat_ecdf() p + stat_ecdf(geom="line", size=1) # stat_function dat <- data.frame(x = rnorm(100)) p <- ggplot(dat, aes(x = x)) p <- p + geom_density(fill = "green", alpha = 0.15) p + stat_function(fun = dnorm, color = "red", fill="red", alpha=0.15, geom="area") # coord_cartesian p <- ggplot(data=mtcars, aes(x=disp, y=wt)) p <- p + geom_smooth() p p + coord_cartesian(xlim=c(325, 500), ylim=c(3,6)) #coord_flip p <- ggplot(mtcars, aes(factor(cyl), mpg)) p + geom_boxplot() + coord_flip() p <- ggplot(mtcars, aes(factor(cyl))) p + geom_bar(fill="steelblue", color='black') + coord_flip() # coord_fixed() x <- c(500, 350, 700, 600, 400) y <- c(10, 20, 30, 30, 20) dat <- data.frame(x, y) p <- ggplot(data=dat, aes(x=x, y=y)) p <- p + geom_point(size=5) p p + coord_fixed(ratio=1) # coord_map library(maps) world <- map_data("world") head(world) korea <- world[grep("Korea$", world$region),] head(korea) p <- ggplot(korea, aes(x=long, y=lat, group=group)) p <- p + geom_polygon(fill="white", colour="black") p p + coord_fixed(ratio=1) #coord_trans p <- ggplot(data=diamonds, aes(x=carat, y=price, colour=factor(cut))) p <- p + geom_point() p p + coord_trans(x = "log10", y = "log10") # gg map library(ggmap) lon = 126.653 lat = 37.45 map = get_googlemap("Incheon",zoom=12,maptype="roadmap", markers=data.frame(lon,lat)) ggmap(map) # get_map food = read.csv(file='food.csv',header=T) head(food) loc = geocode("Incheon") loc = as.numeric(loc) loc = c(lon,lat) map = get_map(loc,zoom=13,maptype='satellite') p = ggmap(map, extent='device') + geom_point( aes(x=Lon,y=Lat,size=Rating,fill=Rating), alpha=0.6,pch=21,data=food) + scale_size(range=c(0,10)) + geom_text(aes(x=Lon+0.001,y=Lat+0.001, label=Name),color="white",size=5,data=food) p # Wordcloud in R #install.packages(c('tm', 'SnowballC', 'wordcloud')) library(tm) library(SnowballC) library(wordcloud) jeopQ <- read.csv('JEOPARDY_CSV.csv', stringsAsFactors = FALSE) ### Data Cleaning jeopCorpus <- Corpus(VectorSource(jeopQ$Question)) jeopCorpus <- tm_map(jeopCorpus, removePunctuation) ## . , ? ! () jeopCorpus <- tm_map(jeopCorpus, removeWords, c("the", "this", "The", "This", stopwords('english'))) ## stopwords("en") jeopCorpus <- tm_map(jeopCorpus, stemDocument) ## went -> go wordcloud(jeopCorpus, max.words = 100, random.order = FALSE)