day2_practice1
Differences
This shows you the differences between two versions of the page.
day2_practice1 [2017/12/12 11:51] – created hyjeong | day2_practice1 [2021/03/17 13:09] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | .Platform | ||
+ | setwd('/ | ||
+ | raw_dat = read.csv(file=" | ||
+ | | ||
+ | head(raw_dat[, | ||
+ | dim(raw_dat) | ||
+ | |||
+ | # gl ( group의 수, group의 크기 ) | ||
+ | # 1, | ||
+ | gr_ind = gl(2, 221) # 1 x 221, 2 x 221 = c(1, | ||
+ | |||
+ | dat_mat <- t(as.matrix(raw_dat[, | ||
+ | dim(dat_mat) | ||
+ | rownames(dat_mat) <- paste0(" | ||
+ | colnames(dat_mat) <- raw_dat[, | ||
+ | head(dat_mat[, | ||
+ | |||
+ | indx <- which(is.na(dat_mat), | ||
+ | indx | ||
+ | col_ind = indx[,2] | ||
+ | col_m = apply(dat_mat[, | ||
+ | col_m | ||
+ | dat_mat[indx] = col_m | ||
+ | sum(is.na(dat_mat)) | ||
+ | dim(dat_mat) | ||
+ | |||
+ | uq_names <- unique(colnames(dat_mat)) | ||
+ | p <- length(uq_names) ; n <- dim(dat_mat)[1] | ||
+ | expr_dat <- matrix(0, | ||
+ | for(i in 1:p) { | ||
+ | expr_dat[, | ||
+ | dat_mat[, | ||
+ | cat(' | ||
+ | colnames(expr_dat) <- uq_names | ||
+ | rownames(expr_dat) <- rownames(dat_mat) | ||
+ | head(expr_dat[, | ||
+ | dim(expr_dat); | ||
+ | |||
+ | #Set working directory | ||
+ | setwd(' | ||
+ | # Read a dataset | ||
+ | dat = read.table(" | ||
+ | head(dat) | ||
+ | dat$Job | ||
+ | attach(dat) | ||
+ | Job | ||
+ | plot(Job, main=" | ||
+ | box() | ||
+ | freq = table(dat[, | ||
+ | barplot(freq) | ||
+ | |||
+ | |||
+ | data(VADeaths) | ||
+ | library(gplots) | ||
+ | #x11() | ||
+ | barplot2(VADeaths, | ||
+ | col = gray(seq(0.4, | ||
+ | | ||
+ | ylim = c(0, 100)) | ||
+ | title(main = "Death Rates in Virginia", | ||
+ | |||
+ | hh <- t(VADeaths)[, | ||
+ | mybarcol <- " | ||
+ | ci.l <- hh * 0.85 | ||
+ | ci.u <- hh * 1.15 | ||
+ | mp <- barplot2(hh, | ||
+ | col = gray(seq(0.4, | ||
+ | legend = colnames(VADeaths), | ||
+ | ylim = c(0, 100), | ||
+ | main = "Death Rates in Virginia", | ||
+ | font.main = 4, | ||
+ | sub = "Faked 95 percent error bars", | ||
+ | cex.names = 1.5, | ||
+ | plot.ci = TRUE, | ||
+ | ci.l = ci.l, ci.u = ci.u, plot.grid = TRUE) | ||
+ | box() | ||
+ | |||
+ | |||
+ | |||
+ | #x11() | ||
+ | pie(freq, main=" | ||
+ | pie(rep(1, 24), col = rainbow(24), | ||
+ | pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12) | ||
+ | lbl = c(" | ||
+ | " | ||
+ | names(pie.sales) = paste0(lbl," | ||
+ | pie(pie.sales, | ||
+ | |||
+ | |||
+ | # histogram | ||
+ | x <- expr_dat[, | ||
+ | #x11() | ||
+ | hist(x, | ||
+ | hist(x, | ||
+ | hist(x, | ||
+ | |||
+ | |||
+ | #boxplot | ||
+ | mat = expr_dat[, | ||
+ | #x11() # 비어있는 그림 창 생성 | ||
+ | boxplot(mat) | ||
+ | res = boxplot(mat, | ||
+ | res | ||
+ | |||
+ | #x11() | ||
+ | c_name = colnames(expr_dat) | ||
+ | plot(expr_dat[, | ||
+ | # | ||
+ | plot(expr_dat[, | ||
+ | |||
+ | # plot | ||
+ | pop_dat = read.csv(file=' | ||
+ | #x11() | ||
+ | plot(pop_dat[, | ||
+ | # | ||
+ | plot(pop_dat[, | ||
+ | |||
+ | # scatter plot | ||
+ | #x11() | ||
+ | # pch는 점의 모양 선택, (e.g., pch=16 => 채워진 원) | ||
+ | ind1 = 8; ind2=12 | ||
+ | plot(expr_dat[, | ||
+ | | ||
+ | cor_mat = cor(expr_dat) | ||
+ | which.max(cor_mat[ind1, | ||
+ | ind1 = 8; ind2=200 | ||
+ | plot(expr_dat[, | ||
+ | | ||
+ | |||
+ | |||
+ | #x11() | ||
+ | #pairs example | ||
+ | ind = c(2, | ||
+ | pairs(expr_dat[, | ||
+ | pairs(expr_dat[, | ||
+ | pch = 21, bg = c(" | ||
+ | |||
+ | |||
+ | # Stat | ||
+ | mean(expr_dat[, | ||
+ | median(expr_dat[, | ||
+ | |||
+ | x = c(1, | ||
+ | tb_x = table(x); tb_x | ||
+ | as.numeric(names(tb_x)[which.max(tb_x)]) | ||
+ | Mode = function(vec) { | ||
+ | tb = table(vec) | ||
+ | return(as.numeric(names(tb)[which.max(tb)])) } | ||
+ | Mode(x) | ||
+ | |||
+ | quantile(expr_dat[, | ||
+ | quantile(expr_dat[, | ||
+ | |||
+ | min(expr_dat[, | ||
+ | max(expr_dat[, | ||
+ | range(expr_dat[, | ||
+ | |||
+ | x <- rnorm(100) | ||
+ | summary(x) # 수치형 자료의 summary | ||
+ | y <- c(' | ||
+ | summary(y) # 문자형 자료의 summary | ||
+ | f.y <- factor(y); summary(f.y) # 요인의 summary | ||
+ | |||
+ | var(expr_dat[, | ||
+ | sum((expr_dat[, | ||
+ | sd(expr_dat[, | ||
+ | |||
+ | # CV | ||
+ | height=c(72, | ||
+ | sd(height)/ | ||
+ | |||
+ | |||
+ | install.packages(" | ||
+ | library(moments) | ||
+ | skewness(expr_dat[, | ||
+ | kurtosis(expr_dat[, | ||
+ | hist(expr_dat[, | ||
+ | x = seq(5.5, | ||
+ | lines(x, | ||
+ | |||
+ | # contingency table | ||
+ | #1차원 도수분포표 | ||
+ | table(mtcars$cyl) | ||
+ | table(mtcars$am) | ||
+ | #2차원 분할표 | ||
+ | table(mtcars$cyl, | ||
+ | #3차원 분할표 | ||
+ | table(mtcars$cyl, | ||
+ | |||
+ | # cov , cor | ||
+ | cov(expr_dat[, | ||
+ | cov(expr_dat[, | ||
+ | var(expr_dat[, | ||
+ | |||
+ | cor(expr_dat[, | ||
+ | cor(expr_dat[, | ||
+ | |||
+ | |