1 Whtat is R?

1.1 History

  • 1980 from S (AT&T Bell Laboratories)
  • Created by Ross Ihaka and Robert Gentleman, University of Auckland, New Zealand
  • Developed currently by R Development Core Team

1.2 For What?

  • Statistical computing
  • Data analysis
  • Graphics

2 Why is R?

4 Simple examples

4.1 apply

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
apply(x,2,sd)
apply(x,2,mean)
apply(x, 2, sort)

4.2 ls

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
beta <- c(1,2,0,3,0,0)
y <- x%*%beta+rnorm(n)
fit <- lm(y~x-1)
summary(fit)
## glment
library("glmnet")
fit.glmnet <- cv.glmnet(x,y,family="gaussian")
plot(fit.glmnet)
coef(fit.glmnet)

4.3 glm

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
beta <- c(1,2,0,3,0,0)
beta0 <- 1.5
mu <- exp(beta0+x%*%beta)
u <- runif(n)
y <- (u<=(mu/(1+mu)))
fit <- glm(y~x,family = gaussian)
fit
## glmnet
fit.glmnet <- cv.glmnet(x,y,family="binomial")
plot(fit.glmnet)
coef(fit.glmnet)

4.4 read.table

pvalues1 = read.table("pvaluesW6All.txt")
read.table(pvalues1, file="test_rt.txt")
save(pvalues1,file="test_save.txt")
write.csv(tab,file=filenames,row.names=F)

5 Visualization

5.1 Manhattan Plot

library(ggplot2)
source("ManhattanPlot.R")
pvalues1 = read.table("pvaluesW6All.txt")
ng = ftable(pvalues1[,7])
p = dim(pvalues1)[1]
snp_posit = NULL
for(k in 1:22)snp_posit = c(snp_posit,seq(ng[k]))
par(mar = c(5,8,1,1),oma = c(1,1,1,1), mgp=c(3,1.2,0))
par(mfrow=c(2,1))
for(k in c(1,3)){
  pvalue = data.frame(cbind(pvalues1[,7],c(1:p),snp_posit,pvalues1[,k]))
  colnames(pvalue) <- c("CHR","SNP","BP","P")
  manhattan(pvalue,pch=20)
}

5.2 plot

set.seed(1)
x <- rnorm(1000)
y <- 2*x + rnorm(1000)
fit <- lm(y~x)
plot(x,y,main="Linear regression", col="gray")
abline(coef(fit))

5.3 ggvis plot

library(ggvis)
mtcars %>%
  ggvis(~wt, ~mpg) %>%
  layer_smooths(span = input_slider(0.5, 1, value = 1, step=0.1)) %>%
  layer_points(size := input_slider(100, 1000, value = 100, ticks=F, 
                                    pre="pre_", post="_post"))

5.4 animation plot

library(animation)
library(plyr)
oopt = ani.options(interval = 0.3, nmax = 101)
a <- sort(rnorm(100, 2))
b <- sort(rnorm(100, 7))
out <- vector("list", 101)
for (i in 1:ani.options("nmax")) {
  ji <- seq(from = 0, to = 5, by = .05)
  a <- jitter(a, factor = 1, amount = ji[i])
  fab1 <- lm(a ~ b)
  coe <- summary(fab1)$coefficients
  r2 <- summary(fab1)$r.squared
  if (coe[2, 4] < .0001) p <- " < .0001"
  if (coe[2, 4] < .001 & coe[2, 4] > .0001) p <- " < .001"
  if (coe[2, 4] > .01) p <- round(coe[2, 4], 3)
  plot(a ~ b, main = "Linear model")
  abline(fab1, col = "red", lw = 2)
  text(x = min(b) + 2, y = max(a) - 1, 
       labels = paste("t = ", round(coe[2, 3], 3), ", p = ", p, ", R2 = ", round(r2, 3)))
  out[[i]] <- c(coe[2, 3], coe[2, 4], r2)
  ani.pause()
  }
ani.options(oopt)

5.5 3D plot (install XQuartz on MacOS)

# library(rgl)
# library(scatterplot3d)
x1=seq(-3,3,by = 0.1)
a1=1
a2=1
x2=sqrt((9-a1*x1^2)/a2)
x3=seq(-4,4,by = 0.1)
x4=sqrt((16-a1*x3^2)/a2)
plot(x3,x4)
points(x1,x2)

xy=rbind(cbind(x1,x2),cbind(x1,-x2),cbind(x3,x4),cbind(x3,-x4))

plot(xy[c(123:284),1],xy[c(123:284),2],col=2,pch = 16)
points(xy[c(1:122),1],xy[c(1:122),2],col=3,pch = 16)

z1=xy[,1]^2
z2=xy[,2]^2
z3=sqrt(2)*xy[,1]*xy[,2]
library(scatterplot3d)
scatterplot3d(z1,z2,z3,pch = 3)
library(rgl)
open3d()
plot3d(z1[c(1:122)], z2[c(1:122)], z3[c(1:122)],col = 3,size = 6)
plot3d(z1[c(123:284)], z2[c(123:284)], z3[c(123:284)],col = 2,size = 6,add = TRUE)

######
# install.packages("caTools")  # install external package
library(caTools)             # external package providing write.gif function
jet.colors <- colorRampPalette(c("red", "blue", "#007FFF", "cyan", "#7FFF7F",
                                 "yellow", "#FF7F00", "red", "#7F0000"))
dx <- 1500                    # define width
dy <- 1400                    # define height
C  <- complex(real = rep(seq(-2.2, 1.0, length.out = dx), each = dy),
              imag = rep(seq(-1.2, 1.2, length.out = dy), dx))
C <- matrix(C, dy, dx)       # reshape as square matrix of complex numbers
Z <- 0                       # initialize Z to zero
X <- array(0, c(dy, dx, 20)) # initialize output 3D array
for (k in 1:20) {            # loop with 20 iterations
  Z <- Z^2 + C               # the central difference equation
  X[, , k] <- exp(-abs(Z))   # capture results
}
write.gif(X, "Mandelbrot.gif", col = jet.colors, delay = 100)

5.6 More amazing plots:

5.7 Website:

library(shiny)
library(FactSum)
  # Application title
  #headerPanel("Factorial of n")
  
  # Sidebar with a slider input for number of observations
  inputPanel(
    selectInput("is_sum", "Calculate sum 1!+2+..+n!?", 
                choices = c("No", "Yes")),
    numericInput("obs", "Nature number input:", 10)
  )

  sumInput <- reactive({
    switch(input$is_sum,
           "No" = 0,
           "Yes" = 1)
  })
  
renderPrint({
    fit <- fact(input$obs, sumInput())
    cat("Length of factorial:",fit$len_fact,"\n")
    cat("Number of zeros in the last:",fit$nzeros,"\n")
    cat("Factorial of ",input$obs,":\n")
    cat(fit$fact)
    if(sumInput()){
      cat("\n Length of Sum of factorials:\n",fit$len_sum,"\n")
      cat("Sum of factorials",input$obs,":\n")
      cat(fit$fact_sum)
    }
  })
  • Example 2, Plots
inputPanel(
  selectInput("n_breaks", label = "Number of bins:",
              choices = c(10, 20, 35, 50), selected = 20),
  
  sliderInput("bw_adjust", label = "Bandwidth adjustment:",
              min = 0.2, max = 2, value = 1, step = 0.2)
)

renderPlot({
  hist(faithful$eruptions, probability = TRUE, breaks = as.numeric(input$n_breaks),
       xlab = "Duration (minutes)", main = "Geyser eruption duration")
  
  dens <- density(faithful$eruptions, adjust = input$bw_adjust)
  lines(dens, col = "blue")
})
  • Example 3, table
shinyAppDir(
  system.file("examples/06_tabsets", package = "shiny"),
  options = list(
    width = "100%", height = 550
  )
)

5.8 What else?

  • Animation
  • SVM: e1071
  • Optimization: DEoptim, quadqrog, linprog, CVXR, RcppDE
  • Parallel: doParallel, Rmpi, snow, multicore, RcppParallel
  • Rmarkdown
  • Deep learning: tf & keras
  • Workflowr
  • Distributed computation: sparklyr, SuperR

6 R package

6.1 How many are they?

  • more than 15,000 (to Sep. 2018)

6.3 How to use them?

  install.packages("rpk",dir)
  library(rpk)

6.4 Can I unzip them?

  • Yes, you can unzip them and
  • repack them

7 Developing R packages

7.1 How to develop them?

  • Rstudio
  • R command-line:
    package.skeleton("myrpk")

7.2 How to pack them?

  • cmd (windows) or terminal (MacOS):
    R CMD build myrpk
    R CMD Rd2pdf myrpk
    R CMD check myrpk

7.3 Where and How to share them?

8 Example: FactSum

8.1 Summary

R package "FactSum" calculates the factorial of a large integer, which may be much greater than the maximum memory of any data type. FactSum implements dramatically fast. It takes only 0.45 seconds to cumpute 10000! (it approximates 2.8E+35660), and 0.98 seconds to compute 10000! and sum=1!+2!+3!+...+10000! simultaneously. It takes only one minute to cumpute 100000! (it approximates 2.8E+456574), and less then two minutes to compute 10000! and sum=1!+2!+3!+...+100000! simultaneously.

8.2 Installation

    #install.packages("devtools")
    library(devtools)
    install_github("xliusufe/FactSum")

8.3 Usage

8.4 Example

library(FactSum)
fact(200,1)

8.5 Development

8.6 See also sqrtn

9 External languages

9.1 How to call C/C++ or FORTRAN

  • Rtools
  • Interface to compile code (For C/C++)
   .C
   .Call
   .External
  • foo.dll (Windows) or foo.so (MacOS):
     R CMD SHLIB foo.c
  • compile in Rpk

9.2 How to use Rcpp?