day2_practice1
Differences
This shows you the differences between two versions of the page.
| day2_practice1 [2017/12/12 11:51] – created hyjeong | day2_practice1 [2021/03/17 13:09] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | .Platform | ||
| + | setwd('/ | ||
| + | raw_dat = read.csv(file=" | ||
| + | | ||
| + | head(raw_dat[, | ||
| + | dim(raw_dat) | ||
| + | |||
| + | # gl ( group의 수, group의 크기 ) | ||
| + | # 1, | ||
| + | gr_ind = gl(2, 221) # 1 x 221, 2 x 221 = c(1, | ||
| + | |||
| + | dat_mat <- t(as.matrix(raw_dat[, | ||
| + | dim(dat_mat) | ||
| + | rownames(dat_mat) <- paste0(" | ||
| + | colnames(dat_mat) <- raw_dat[, | ||
| + | head(dat_mat[, | ||
| + | |||
| + | indx <- which(is.na(dat_mat), | ||
| + | indx | ||
| + | col_ind = indx[,2] | ||
| + | col_m = apply(dat_mat[, | ||
| + | col_m | ||
| + | dat_mat[indx] = col_m | ||
| + | sum(is.na(dat_mat)) | ||
| + | dim(dat_mat) | ||
| + | |||
| + | uq_names <- unique(colnames(dat_mat)) | ||
| + | p <- length(uq_names) ; n <- dim(dat_mat)[1] | ||
| + | expr_dat <- matrix(0, | ||
| + | for(i in 1:p) { | ||
| + | expr_dat[, | ||
| + | dat_mat[, | ||
| + | cat(' | ||
| + | colnames(expr_dat) <- uq_names | ||
| + | rownames(expr_dat) <- rownames(dat_mat) | ||
| + | head(expr_dat[, | ||
| + | dim(expr_dat); | ||
| + | |||
| + | #Set working directory | ||
| + | setwd(' | ||
| + | # Read a dataset | ||
| + | dat = read.table(" | ||
| + | head(dat) | ||
| + | dat$Job | ||
| + | attach(dat) | ||
| + | Job | ||
| + | plot(Job, main=" | ||
| + | box() | ||
| + | freq = table(dat[, | ||
| + | barplot(freq) | ||
| + | |||
| + | |||
| + | data(VADeaths) | ||
| + | library(gplots) | ||
| + | #x11() | ||
| + | barplot2(VADeaths, | ||
| + | col = gray(seq(0.4, | ||
| + | | ||
| + | ylim = c(0, 100)) | ||
| + | title(main = "Death Rates in Virginia", | ||
| + | |||
| + | hh <- t(VADeaths)[, | ||
| + | mybarcol <- " | ||
| + | ci.l <- hh * 0.85 | ||
| + | ci.u <- hh * 1.15 | ||
| + | mp <- barplot2(hh, | ||
| + | col = gray(seq(0.4, | ||
| + | legend = colnames(VADeaths), | ||
| + | ylim = c(0, 100), | ||
| + | main = "Death Rates in Virginia", | ||
| + | font.main = 4, | ||
| + | sub = "Faked 95 percent error bars", | ||
| + | cex.names = 1.5, | ||
| + | plot.ci = TRUE, | ||
| + | ci.l = ci.l, ci.u = ci.u, plot.grid = TRUE) | ||
| + | box() | ||
| + | |||
| + | |||
| + | |||
| + | #x11() | ||
| + | pie(freq, main=" | ||
| + | pie(rep(1, 24), col = rainbow(24), | ||
| + | pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12) | ||
| + | lbl = c(" | ||
| + | " | ||
| + | names(pie.sales) = paste0(lbl," | ||
| + | pie(pie.sales, | ||
| + | |||
| + | |||
| + | # histogram | ||
| + | x <- expr_dat[, | ||
| + | #x11() | ||
| + | hist(x, | ||
| + | hist(x, | ||
| + | hist(x, | ||
| + | |||
| + | |||
| + | #boxplot | ||
| + | mat = expr_dat[, | ||
| + | #x11() # 비어있는 그림 창 생성 | ||
| + | boxplot(mat) | ||
| + | res = boxplot(mat, | ||
| + | res | ||
| + | |||
| + | #x11() | ||
| + | c_name = colnames(expr_dat) | ||
| + | plot(expr_dat[, | ||
| + | # | ||
| + | plot(expr_dat[, | ||
| + | |||
| + | # plot | ||
| + | pop_dat = read.csv(file=' | ||
| + | #x11() | ||
| + | plot(pop_dat[, | ||
| + | # | ||
| + | plot(pop_dat[, | ||
| + | |||
| + | # scatter plot | ||
| + | #x11() | ||
| + | # pch는 점의 모양 선택, (e.g., pch=16 => 채워진 원) | ||
| + | ind1 = 8; ind2=12 | ||
| + | plot(expr_dat[, | ||
| + | | ||
| + | cor_mat = cor(expr_dat) | ||
| + | which.max(cor_mat[ind1, | ||
| + | ind1 = 8; ind2=200 | ||
| + | plot(expr_dat[, | ||
| + | | ||
| + | |||
| + | |||
| + | #x11() | ||
| + | #pairs example | ||
| + | ind = c(2, | ||
| + | pairs(expr_dat[, | ||
| + | pairs(expr_dat[, | ||
| + | pch = 21, bg = c(" | ||
| + | |||
| + | |||
| + | # Stat | ||
| + | mean(expr_dat[, | ||
| + | median(expr_dat[, | ||
| + | |||
| + | x = c(1, | ||
| + | tb_x = table(x); tb_x | ||
| + | as.numeric(names(tb_x)[which.max(tb_x)]) | ||
| + | Mode = function(vec) { | ||
| + | tb = table(vec) | ||
| + | return(as.numeric(names(tb)[which.max(tb)])) } | ||
| + | Mode(x) | ||
| + | |||
| + | quantile(expr_dat[, | ||
| + | quantile(expr_dat[, | ||
| + | |||
| + | min(expr_dat[, | ||
| + | max(expr_dat[, | ||
| + | range(expr_dat[, | ||
| + | |||
| + | x <- rnorm(100) | ||
| + | summary(x) # 수치형 자료의 summary | ||
| + | y <- c(' | ||
| + | summary(y) # 문자형 자료의 summary | ||
| + | f.y <- factor(y); summary(f.y) # 요인의 summary | ||
| + | |||
| + | var(expr_dat[, | ||
| + | sum((expr_dat[, | ||
| + | sd(expr_dat[, | ||
| + | |||
| + | # CV | ||
| + | height=c(72, | ||
| + | sd(height)/ | ||
| + | |||
| + | |||
| + | install.packages(" | ||
| + | library(moments) | ||
| + | skewness(expr_dat[, | ||
| + | kurtosis(expr_dat[, | ||
| + | hist(expr_dat[, | ||
| + | x = seq(5.5, | ||
| + | lines(x, | ||
| + | |||
| + | # contingency table | ||
| + | #1차원 도수분포표 | ||
| + | table(mtcars$cyl) | ||
| + | table(mtcars$am) | ||
| + | #2차원 분할표 | ||
| + | table(mtcars$cyl, | ||
| + | #3차원 분할표 | ||
| + | table(mtcars$cyl, | ||
| + | |||
| + | # cov , cor | ||
| + | cov(expr_dat[, | ||
| + | cov(expr_dat[, | ||
| + | var(expr_dat[, | ||
| + | |||
| + | cor(expr_dat[, | ||
| + | cor(expr_dat[, | ||
| + | |||
| + | | ||
