Genome Informatics Laboratory at KRIBB

setwd('/BiO/example/dataset')

x <- 1:5
y <- -2:2
if(any(x<0)) print(x) # 하나의 논리값을 위해 any 사용
if(any(y<0)) print(abs(y))
if(y<0) print(y) # 벡터논리값은 error
if(any(y<0)) 
{
  print(abs(y))
  cat("\n y contains negative values")
}
# print(x)는 x의 내용을 보여 주는 함수.

if(pi>3) cat("\n expr(T)") else cat("\n expr(F)")
if(pi<3) cat("\n expr(T)") else cat("\n expr(F)")
x <- 1:3
if(length(x)==5) {
  if(sum(x)== 15)
    cat("\n Vector x length=",length(x),", sum = ",sum(x))
} else {
  cat("\n Vector x length !=",length(x))
}
# if(length(x)==5 && sum(x)==15) 동시 비교 가능


x = c(10,3,6,9)
y = c(1,5,4,12)
ifelse(x>y,x,y)
score = c(80, 75, 40, 98)
grade = ifelse(score >= 50, "pass", "fail")
data.frame(score, grade)

y <- -2:2
ifelse(y >=0, y, -y)
abs(y)

tmp <- c(3,-1,1,-2,0)
sn=ifelse(tmp>0,"pos",ifelse(tmp<0,"neg","zero"))
data.frame(tmp,sn)

# for
s = 0
for(i in 1:100)
{
  s = s + i
}
# s 변수를 이용하여 계속 누적합을 계산한다.

sum(1:100) # 동일한 값, for문보다 효율적인 계산

# file 여러개 생성 10개씩 잘라서 저장
dir.create('./datafile')
f_pre = './datafile/file_'
f_post = '.txt'
dat = mtcars
n = nrow(dat)
cut = 10
nfile = ceiling(n/cut)
for(i in 1:(nfile-1))
{
  ind = (cut*(i-1)+1):(cut*i)
  write.table(dat[ind,],
              file=paste0(f_pre,i,f_post),sep='\t')
}
ind = (cut*i+1):n
write.table(dat[ind,],
            file=paste0(f_pre,i+1,f_post),sep='\t')


# while 

# Sum from 1 to 100
s = 0
i = 1 
while( i<=100 )
{
  s = s+i
  i = i+1
}
# for문과 다르게 조건과 관련된 변수의 증감을 조절해야 함.

# 특정 단어를 만나기 전까지 Parsing 작업 수행
ch = c("A/B/C/D/F","A/AA","BB/B","Quit","CC/C")
xp = list()
i = 1
while(ch[i]!="Quit" & i <= length(ch))
{
  xp[[i]] = unlist(strsplit(ch[i],'/'))
  print(xp[[i]])
  i = i + 1  
}
table(unlist(xp))

s = 0
for ( i in 1:10)
{
  s = s+i
  if(s>=10) break
} # stop if sum of 1:x >= 10
c(i,s)

s = 0
for ( i in 1:10)
{
  if(i %% 2 ==1) next  
  s = s+i   
}  # sum even values from 1 to 10

# system.time 함수 이용
# 함수에 대한 시간 측정이 용이 
system.time({ sum(as.double(1:1e6)) })
# for 문을 쓰면 { } 블럭화 해야 함.
system.time( { s = 0; 
for( i in 1:1e6) {
  s = s+ i } } )
# 1e6 = 1 * 10^6, 1e-4 = 1 * 10^-4
y <- system.time({ sum(as.double(1:1e6)) })
y[1]; y[2]; y[3]



# Sys.time() 함수 이용
# 현재시간과 실행 후의 시간을 비교하여 직접 계산
# 반복문에 대한 시간측정이 용이 
tic = Sys.time() #현재시간
sum(as.double(1:1e6))
toc = Sys.time() #실행이 끝난 시간
as.numeric(toc-tic, units="secs")

tic = Sys.time()
s = 0
for(i in 1:1e6)
{
  s = s+i
}
toc = Sys.time() #실행이 끝난 시간
as.numeric(toc-tic, units="secs")

s = 0
i = 1
while(i <= 100)
{
  s = s + i
}
# i를 증가시키는 것을 누락, 조건이 항상 참이므로 무한루프
# Esc를 눌러 빠져나온 뒤 s값을 확인하면 매우 많이 실행 된 것을 확인할 수 있다.


# User defined function

wd_count = function(x,sep=" ")
{
  temp = gsub("[(),.?!/ ]",sep,x)
  temp = unlist(strsplit(temp,sep))
  temp = unlist(temp)
  temp = temp[temp!=""]
  return(table(temp))
}

tx_data = c("Hello, I like statistics.",
            "I want to go out.",
            "You need to take a break.",
            "You are so nice.")
res = wd_count(tx_data)
sort(res,decreasing=T)

#Fibonacci
fibo <- function(n)
{
  if(n==1) return(1)
  if(n==2) return(rep(1,2))
  x <- rep(1,n)
  for(i in 3:n)
    x[i] <- x[i-1]+x[i-2]
  return(x)
}
fibo(1)
fibo(2)
fibo(10)

# function
a <- c(1,3,5,6)
std.ftn <- function(x) # 함수의 정의는 가능
  return(mean(x),var(x), (x-mean(x))/sd(x))
std.ftn(a)  # error

std.ftn2 <- function(x)
  return(list(mean=mean(x),var=var(x),
              std=(x-mean(x))/sd(x)))
std.ftn2(a)

ls # 함수 원형, 실행되는 것이 아닌 정의된 것을 보여줌.
ls() # 함수 실행

wd_count 
fibo
std.ftn
read.table # 인수의 이름을 파악할 때, 도움이 된다.

a <- c(1,3,5)
noact <- function(x)
{
  loc <- 3
  return(loc)
}
noact(a)
loc # error

a <- c(1,3,5)
noact <- function(x){
  a[1] <- 3
  return(a)
}
noact(10)
a

a <- c(1,3,5)
noact <- function(x){
  a[1] <- 3; glb <<- c(1,2)
  return(a) }
noact(10)
a; glb

a <- c(1,3,5)
noact <- function(x,type=1){
  if(type==1) a[1] <- 3
  if(type==2) a[1] <<- 3   
  return(a) }
noact(10) # noact(10,1)
a
noact(5,2)
a