12 Useful R codes

# Convert numeric to factor

temdata[,2:9] <- lapply(temdata[,2:9], as.factor) 

# Convert factor to numeric
as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
temdata[,2:5] <- lapply(temdata[,2:5], as.numeric.factor) 

# Have frequencies table for multiple columns

dems=apply(temdata[,5:11], 2, function(x){table(x,temdata$grp)})
library (plyr)
mydems <- ldply (mydems, data.frame)

# Aggregate variables by grp

uncagg=aggregate(. ~ grp, data = temdata, FUN=mean, na.rm=TRUE)

uncaggfaster=temdata[, lapply(.SD, mean,na.rm=T), by = grp]

# Find max in a table
which.max(x)


# Update R
if(!require(installr)) { 
  install.packages("installr"); require(installr)} #load / install+load installr
updateR()


# Create dummy variable from a factor
head(temdata)
for(level in unique(temdata$zp)){
  temdata[paste("dummy", level, sep = "_")] <- ifelse(temdata$zp == level, 1, 0)
}


# Using semi colon to send multiple input
x=rnorm(10000,5,10)
mean(x);var(x);sqrt(var(x))

# Remove an object
y=rnorm(10)
rm(y)

# Empty the working space
rm(list=ls())

# Remove all but some
rm(list=setdiff(ls(),c("temdata", "temdata2")))


# Integer division
7%/%2

# Modulo = remainder
5%%2 

# Define and print 
(count=c(25,12,7,4,6,2,1,0,2))

# Read csv by clicking
data=read.csv(file.choose(),header=TRUE,)

#Combine more than 1 csv files
filenames <- list.files()
temdata=do.call("rbind", lapply(filenames, read.csv, header = F))
write.table(temdata, file ="temdata.binded.csv" , sep = ",",col.names = F, row.names = F)


#Multiple QQ plot
  #split screen
layout(matrix(1:9, nc = 3))
sapply(names(temdata)[1:9], function(x) {
  qqnorm(temdata[[x]], main = x)
  qqline(temdata[[x]])
})




#Split for more plots

par(mfrow=c(3,3))


#Double for loop
x=matrix(1:15,3,5)
for(i in seq_len(nrow(x)))
{
  for(j in seq_len(ncol(x)))
  {
    print(x[i,j])
      
  }
}



#While loop
count=0
while(count<10){
  print(count)
  count=count+1
}


#Missing data 
convert -999s to NAs 

read.csv("x.csv", na.strings="-999")
temdata[is.na(temdata)] <- 0


#convert NAs to -99s

vector[which(vector== NA)]= (-99)
temdata[is.na(temdata)]= (-99)

#if you are having trouble converting <NA> (but not NA)
temdata=read.csv("temdata.csv",stringsAsFactors=FALSE)


# add group mean

temdata2=merge(temdata, aggregate(X ~ grp, data = temdata, FUN=mean, na.rm=TRUE), 
          by = "grp", suffixes = c("", ".mean"),all=T)


temdata2=merge(temdata, aggregate(cbind(X1 ,X2 ,X3 , X4) ~ grp, data = temdata, FUN=mean,                      na.rm=TRUE), by = "grp", suffixes = c("", ".mean"),all=T)

temdata2=merge(temdata, 
          ddply(temdata, c("grp"), function(x) colMeans(x[c("X1" ,"X2","X3" , "X4")])), 
          by = "grp", suffixes = c("", ".mean"),all=T)




#ifelse
y=c(1,2,3,4,5,5,5)
y2=ifelse(y==5,NA,y)
y2


temdata <- data.frame (ID=c(2,3,4,5), Hunger =c(415,452,550,318 ))

temdata$newcol<-ifelse(temdata[,2]>=300 & temdata[,2]<400,350,
                       ifelse(temdata[,2]>=400 &temdata[,2]<500,450,
                              ifelse(temdata[,2]>=500 & temdata[,2]<600,550,NA)))



#if 
x=5
y=if(x>6){1}else{0}
y=if(x>6){1} else if(x==5) {99} else {0}


#sort a dataframe by the order of the elements in B
temdata[order(temdata$B),]                            

#sort the dataframe in reverse order
temdata[rev(order(temdata$B)),]                         


#create combinations
m=c(54,38,51,62,18,31,58,74,35,34)
f=c(41,18,19,39,44,18,58,21,38)

mean(m)
mean(f)

combn(m,8,FUN=mean)
combn(f,8)

min(combn(m,8,FUN=mean))
max(combn(f,8,mean))


#setting contrasts
options('contrasts')
options(contrasts=c('contr.sum','contr.poly'))
options(contrasts=c('contr.treatment','contr.poly'))


# delete if all NA 
temdata=temdata[apply(temdata,1,function(x)any(!is.na(x))),]


# add group frequency
temdata=ddply(temdata, "grp", transform, cellsize = count(grp)[2])


#create new folder
dir.create("testdir")

#split data frame
library(datasets)
head(airquality)
splitdata=split(airquality,airquality$Month)
splitdata
str(splitdata)
splitdata[[2]]


x=list(a=1:5, b=rnorm(10))
x
lapply(x,mean)

# output is always a list

x=1:4
lapply(x,runif)
lapply(x,runif,min=10, max=20)


x=list(a=matrix(1:4,2,2),b=matrix(1:6,3,2))

lapply(x,function(elt) elt[,1])


# sapply

x=list(a=1:5, b=rnorm(10),c=runif(10))
x
lapply(x,mean)
sapply(x,mean)



#apply generally used for rows or columns

x=matrix(rnorm(200),20,10)
x
apply(x,2,mean)
apply(x,1,sum)

#tapply 
x=c(1:10,rnorm(10),runif(10,3,5))
f=gl(3,10)
?gl
h=factor(rep(1:3,each=10))
tapply(x,f,mean)
tapply(x,h,mean)
tapply(x,h,mean,simplify=F)
tapply(x,h,range)




#missing data proportion percentage
propmiss <- function(temdata) lapply(temdata,function(x) data.frame(nmiss=sum(is.na(x)), n=length(x), propmiss=sum(is.na(x))/length(x)))
propmiss(temdata)


#upper case
temdata$childid=toupper(temdata$childid)



# plot graph individual all variables

plotpdf="C:/Users/Desktop/work/multiplePLOTS.pdf"
pdf(file=plotpdf)
for (i in 7:55){
  muis=round(mean(temdata[,i],na.rm=T),3)
  sdis=round(sd(temdata[,i],na.rm=T),3)
  meansc=c("mean",muis)
  hist(temdata[,i],freq=F,main=names(temdata)[i],xlab=meansc)
  #lines(density(temdata[,i],na.rm=T))
  curve(dnorm(x, mean=muis, sd=sdis), add=TRUE)
  lines(density(temdata[,i],na.rm=T, adjust=2), lty="dotted", col="darkgreen", lwd=2)
  abline(v=muis,col="blue")
  abline(v=muis+3*sdis,col="red")
  abline(v=muis-3*sdis,col="red")
}

dev.off()


# read in upper directory
dd=read.csv("../temdata.csv")

12.1 More on the apaStyle package

Here is more details on the apaStyle package;

require(pastecs)
require(apaStyle)
library(rJava)
#if this throws an error
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_111') # for 64-bit version

#define a data set 

apa.descriptives(data = temdataet[,1:5], variables = names(temdataet[,1:5]), report = "", title = "test", filename = "test.docx", note = NULL, position = "lower", merge = FALSE, landscape = FALSE, save = TRUE)

example <- data.frame(c("Column 1", "Column 2", "Column 3"), c(3.45, 5.21, 2.64), c(1.23, 1.06, 1.12) )
apa.table(data = example, level1.header = c("Variable", "M", "SD"))


example <- data.frame( c("Column 1", "Column 2", "Column 3"), 
                       c(3.45, 5.21, 2.64), 
                       c(1.23, 1.06, 1.12), 
                       c(8.22, 25.12, 30.27), 
                       c("+", "**", "***") )


apa.table( data = example, level1.header = c("", "Descriptives", "Inferential"), 
           level1.colspan = c(1, 2, 1), 
           level2.header = c("Variable", "M", "SD", "t-value", "*") )$table

12.2 A useful shiny application

Below is a Shiny app example (Figure 12.1) to calculate sample size for an analyses of covariance design;

knitr::include_app('https://burakaydin.shinyapps.io/ancovaN/', height = '800px')

Figure 12.1: ANCOVA sample size calculator

12.3 Update bookdown

bookdown::publish_book(render = “local”)

Adler, Daniel, and Duncan Murdoch. 2017. Rgl: 3D Visualization Using Opengl. https://CRAN.R-project.org/package=rgl.

Aho, Ken. 2016. Asbio: A Collection of Statistical Tools for Biologists. https://CRAN.R-project.org/package=asbio.

Allaire, JJ, Joe Cheng, Yihui Xie, Jonathan McPherson, Winston Chang, Jeff Allen, Hadley Wickham, Aron Atkins, and Rob Hyndman. 2016. Rmarkdown: Dynamic Documents for R. http://rmarkdown.rstudio.com.

Appelbaum, Mark I., and Elliot M. Cramer. 1976. “Balancing - Analysis of Variance by Another Name.” Journal of Educational Statistics 1 (3): 233–52.

Bakeman, Roger. 2005. “Recommended Effect Size Statistics for Repeated Measures Designs.” Behavior Research Methods 37 (3): 379–84. doi:10.3758/BF03192707.

Bates, Douglas, Martin Mächler, Ben Bolker, and Steve Walker. 2015. “Fitting Linear Mixed-Effects Models Using lme4.” Journal of Statistical Software 67 (1): 1–48. doi:10.18637/jss.v067.i01.

Box, George E. P., and Norman R. Draper. 1987. Empirical Model-Building and Response Surfaces. New York: Wiley.

Breheny, Patrick, and Woodrow Burchett. 2016. Visreg: Visualization of Regression Models. https://CRAN.R-project.org/package=visreg.

Carlson, James E., and Neil H. Timm. 1974. “Analysis of Nonorthogonal Fixed-Effects Designs.” Psychological Bulletin 81 (9): 563–70.

Cohen, Jacob. 1962. “The Statistical Power of Abnormal-Social Psychological Research: A Review.” The Journal of Abnormal and Social Psychology 65 (3): 145–53.

Daunic, Ann P., Stephen W. Smith, Cynthia W. Garvan, Brian R. Barber, Mallory K. Becker, Christine D. Peters, Gregory G. Taylor, Christopher L. Van Loan, Wei Li, and Arlene H. Naranjo. 2012. “Reducing Developmental Risk for Emotional/Behavioral Problems: A Randomized Controlled Trial Examining the Tools for Getting Along Curriculum.” Journal of School Psychology 50 (2): 149–66.

de Vreeze, Jort. 2016. ApaStyle: Generate Apa Tables for Ms Word. https://CRAN.R-project.org/package=apaStyle.

Field, Andy P., Jeremy Miles, and Zoë Field. 2012. Discovering Statistics Using R. Thousand Oaks, Calif;London; Sage.

Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel/Hierarchical Models. Cambridge;New York; Cambridge University Press.

Hirshleifer, Sarojini, David McKenzie, Rita Almeida, and Cristobal Ridao-Cano. 2016. “The Impact of Vocational Training for the Unemployed: Experimental Evidence from Turkey.” The Economic Journal 126 (597): 2115–46.

Holm, Sture. 1979. “A Simple Sequentially Rejective Multiple Test Procedure.” Scandinavian Journal of Statistics 6 (2): 65–70.

Højsgaard, Søren, and Ulrich Halekoh. 2016. DoBy: Groupwise Statistics, Lsmeans, Linear Contrasts, Utilities. https://CRAN.R-project.org/package=doBy.

Komsta, Lukasz, and Frederick Novomestky. 2015. Moments: Moments, Cumulants, Skewness, Kurtosis and Related Tests. https://CRAN.R-project.org/package=moments.

Lakens, Daniel. 2013. “Calculating and Reporting Effect Sizes to Facilitate Cumulative Science: A Practical Primer for T-Tests and Anovas.” Frontiers in Psychology 4: 863. doi:10.3389/fpsyg.2013.00863.

Lawrence, Michael A. 2016. Ez: Easy Analysis and Visualization of Factorial Experiments. https://CRAN.R-project.org/package=ez.

Lemon, Jim, Ben Bolker, Sander Oom, Eduardo Klein, Barry Rowlingson, Hadley Wickham, Anupam Tyagi, et al. 2016. Plotrix: Various Plotting Functions. https://CRAN.R-project.org/package=plotrix.

Lumley, Thomas, and Achim Zeileis. 2015. Sandwich: Robust Covariance Matrix Estimators. https://CRAN.R-project.org/package=sandwich.

Mair, Patrick, and Rand Wilcox. 2016. WRS2: A Collection of Robust Statistical Methods. https://CRAN.R-project.org/package=WRS2.

Muenchen, Robert A. 2011. R for Sas and Spss Users. 2nd ed. New York: Springer.

Myers, Jerome L., A. Well, Robert F. Lorch, and Ebooks Corporation. 2013. Research Design and Statistical Analysis. 3rd ed. New York: Routledge.

Obrien, Robert M. 2007. “A Caution Regarding Rules of Thumb for Variance Inflation Factors.” Quality and Quantity 41 (5).

Olejnik, Stephen, and James Algina. 2003. “Generalized Eta and Omega Squared Statistics: Measures of Effect Size for Some Common Research Designs.” Psychological Methods 8 (4): 434–47.

Olsson, Ulf. 1979. “Maximum Likelihood Estimation of the Polychoric Correlation Coefficient.” Psychometrika 44 (4). Springer: 443–60.

Pearl, Judea. 2009. Causality: Models, Reasoning, and Inference. 2nd ed. Cambridge;New York: Cambridge University Press.

R Core Team. 2016a. Foreign: Read Data Stored by Minitab, S, Sas, Spss, Stata, Systat, Weka, dBase, ... https://CRAN.R-project.org/package=foreign.

———. 2016b. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.

Rawlings, John O., Sastry G. Pantula, and David A. Dickey. 1998. Applied Regression Analysis: A Research Tool. 2nd ed. New York: Springer.

Revelle, William. 2016. Psych: Procedures for Psychological, Psychometric, and Personality Research. https://CRAN.R-project.org/package=psych.

RStudio Team. 2016. RStudio: Integrated Development Environment for R. Boston, MA: RStudio, Inc. http://www.rstudio.com/.

Sarkar, Deepayan. 2016. Lattice: Trellis Graphics for R. https://CRAN.R-project.org/package=lattice.

Tippmann, Sylvia. 2015. “Programming Tools: Adventures with R: A Guide to the Popular, Free Statistics and Visualization Software That Gives Scientists Control of Their Own Data Analysis.” Nature, no. 7532: 109.

Torchiano, Marco. 2016. Effsize: Efficient Effect Size Computation. https://CRAN.R-project.org/package=effsize.

Uebersax, John S. 2015. “Introduction to the Tetrachoric and Polychoric Correlation Coefficients.” Obtenido de Http://Www. John-Uebersax. Com/Stat/Tetra. Htm.[Links].

Verzani, John. 2014. Using R for Introductory Statistics. Second. Boca Raton: CRC Press Taylor; Francis Group.

Wickham, Hadley. 2011. “The Split-Apply-Combine Strategy for Data Analysis.” Journal of Statistical Software 40 (1): 1–29. http://www.jstatsoft.org/v40/i01/.

———. 2016. Tidyr: Easily Tidy Data with ‘Spread()‘ and ‘Gather()‘ Functions. https://CRAN.R-project.org/package=tidyr.

Wickham, Hadley, and Winston Chang. 2016. Ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics. https://CRAN.R-project.org/package=ggplot2.

Wilcox, Rand R. 2012. Introduction to Robust Estimation and Hypothesis Testing. 3rd;3; US: Academic Press.

Xie, Yihui. 2016. Bookdown: Authoring Books with R Markdown. https://CRAN.R-project.org/package=bookdown.