day2_practice1
.Platform
setwd('/BiO/example/dataset')
raw_dat = read.csv(file="Ex_data.csv",
header=T,stringsAsFactors=F)
head(raw_dat[,1:20]) # 열의수가 많아 1:20의 열만 선택
dim(raw_dat)
# gl ( group의 수, group의 크기 )
# 1,2,group의 수 해당하는 Factor 형 벡터 생성
gr_ind = gl(2, 221) # 1 x 221, 2 x 221 = c(1,...,1,2,...,2)
dat_mat <- t(as.matrix(raw_dat[,-1]))
dim(dat_mat)
rownames(dat_mat) <- paste0("S",1:nrow(dat_mat))
colnames(dat_mat) <- raw_dat[,1]
head(dat_mat[,1:20])
indx <- which(is.na(dat_mat),T)
indx
col_ind = indx[,2]
col_m = apply(dat_mat[,col_ind],2,mean,na.rm=T)
col_m
dat_mat[indx] = col_m
sum(is.na(dat_mat))
dim(dat_mat)
uq_names <- unique(colnames(dat_mat))
p <- length(uq_names) ; n <- dim(dat_mat)[1]
expr_dat <- matrix(0,n,p)
for(i in 1:p) {
expr_dat[,i] = apply(as.matrix(
dat_mat[,colnames(dat_mat)==uq_names[i]]),1,mean)
cat('\n',i,'-th step') }
colnames(expr_dat) <- uq_names
rownames(expr_dat) <- rownames(dat_mat)
head(expr_dat[,1:20])
dim(expr_dat); sum(is.na(expr_dat))
#Set working directory
setwd('./')
# Read a dataset
dat = read.table("Ex211.txt",header=T)
head(dat)
dat$Job
attach(dat)
Job
plot(Job, main="직업의 막대그림", ylab="인원수(명)", ylim=c(0,15))
box()
freq = table(dat[,6])
barplot(freq)
data(VADeaths)
library(gplots)
#x11()
barplot2(VADeaths, beside = TRUE,
col = gray(seq(0.4,0.9,length=5)),
legend = rownames(VADeaths),
ylim = c(0, 100))
title(main = "Death Rates in Virginia",font.main = 4)
hh <- t(VADeaths)[,5:1]
mybarcol <- "gray20"
ci.l <- hh * 0.85
ci.u <- hh * 1.15
mp <- barplot2(hh, beside = TRUE,
col = gray(seq(0.4,0.9,length=5)),
legend = colnames(VADeaths),
ylim = c(0, 100),
main = "Death Rates in Virginia",
font.main = 4,
sub = "Faked 95 percent error bars",
cex.names = 1.5,
plot.ci = TRUE,
ci.l = ci.l, ci.u = ci.u, plot.grid = TRUE)
box()
#x11()
pie(freq, main="직업의 원그림")
pie(rep(1, 24), col = rainbow(24), radius = 0.9)
pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
lbl = c("Blueberry", "Cherry",
"Apple", "Boston Cream", "Other", "Vanilla Cream")
names(pie.sales) = paste0(lbl," (",pie.sales*100,"%)")
pie(pie.sales, col=rainbow(length(pie.sales)))
# histogram
x <- expr_dat[,10]
#x11()
hist(x,breaks= 20,col="gray",main=uq_names[10])
hist(x,breaks= 40,freq=F,col="lightblue",main=uq_names[10])
hist(x,breaks= 40,plot=F)
#boxplot
mat = expr_dat[,c(3,4,7,8)]
#x11() # 비어있는 그림 창 생성
boxplot(mat)
res = boxplot(mat,plot=F)
res
#x11()
c_name = colnames(expr_dat)
plot(expr_dat[,1],expr_dat[,2],type='l',xlab=c_name[1],ylab=c_name[2])
#windows()
plot(expr_dat[,3],expr_dat[,4],type='l',xlab=c_name[1],ylab=c_name[2])
# plot
pop_dat = read.csv(file='table_2_2.csv')
#x11()
plot(pop_dat[,1],pop_dat[,2],type='l',xlab='연도',ylab='인구수')
#windows()
plot(pop_dat[,1],pop_dat[,2],type='b',xlab='연도',ylab='인구수')
# scatter plot
#x11()
# pch는 점의 모양 선택, (e.g., pch=16 => 채워진 원)
ind1 = 8; ind2=12
plot(expr_dat[,ind1],expr_dat[,ind2],type='p',pch=16,
xlab=uq_names[ind1],ylab=uq_names[ind2])
cor_mat = cor(expr_dat)
which.max(cor_mat[ind1,-ind1])
ind1 = 8; ind2=200
plot(expr_dat[,ind1],expr_dat[,ind2],type='p',pch=16,
xlab=uq_names[ind1],ylab=uq_names[ind2])
#x11()
#pairs example
ind = c(2,8,12,200)
pairs(expr_dat[,ind])
pairs(expr_dat[,ind], "Expression Data",
pch = 21, bg = c("red", "blue")[gr_ind])
# Stat
mean(expr_dat[,10])
median(expr_dat[,10])
x = c(1,2,3,1,2,5,5,3,3,3,2)
tb_x = table(x); tb_x
as.numeric(names(tb_x)[which.max(tb_x)])
Mode = function(vec) {
tb = table(vec)
return(as.numeric(names(tb)[which.max(tb)])) }
Mode(x)
quantile(expr_dat[,1],0.25)
quantile(expr_dat[,1],c(0.25,0.5,0.75))
min(expr_dat[,1])
max(expr_dat[,1])
range(expr_dat[,1])
x <- rnorm(100)
summary(x) # 수치형 자료의 summary
y <- c('red','blue','red','white')
summary(y) # 문자형 자료의 summary
f.y <- factor(y); summary(f.y) # 요인의 summary
var(expr_dat[,1])
sum((expr_dat[,1]-mean(expr_dat[,1]))^2)/(n-1)
sd(expr_dat[,1])
# CV
height=c(72, 74, 68, 76, 74, 69, 72, 79, 70, 69, 77, 73)
sd(height)/mean(height)*100
install.packages("moments")
library(moments)
skewness(expr_dat[,1])
kurtosis(expr_dat[,1])
hist(expr_dat[,1],freq=F)
x = seq(5.5,8.5,length=100)
lines(x,dnorm(x,mean=mean(expr_dat[,1]),sd(expr_dat[,1])))
# contingency table
#1차원 도수분포표
table(mtcars$cyl)
table(mtcars$am)
#2차원 분할표
table(mtcars$cyl,mtcars$am)
#3차원 분할표
table(mtcars$cyl,mtcars$am,mtcars$gear)
# cov , cor
cov(expr_dat[,1],expr_dat[,5])
cov(expr_dat[,c(1,5,8)])
var(expr_dat[,1])
cor(expr_dat[,1],expr_dat[,5])
cor(expr_dat[,c(1,5,8)])
day2_practice1.txt · Last modified: by 127.0.0.1
