R

MongoDB and R

Using an MongoDB server together with R Write data to database # Connection settings library(mongolite) USER = "me" PASS = "myfancypw" HOST = "myHost.com" DBCON = 'admin' db='myDb' collection = "MyCollection" How to work with a connection object # Make connection URI = sprintf("mongodb://%s:%s@%s/%s", USER, PASS, HOST, DBCON) c <- mongo(collection = collection, db = db, url = URI) # insert data.frame into Collection c$insert(d) # create index for field c$index(add = '{"depth" : 1}') # create index for field id in Collection with name Samples with unique property cmd = 'db.

Basic data operations

Basic data operations Indexing and subsetting Data aggregation Database operations Basic data operations Indexing and subsetting # Filter dataframe on value subset(data, Diagnosis != 'SIBS') # Get selection of data with matching columns bigData[! bigData$compareVal %in% smallData$compare,] # filter on complete data sets dt = dt[!is.na(colx),] Control structures # Switch mutationTypeTmp = switch(changeInBaseLength, '1->1' = 'SUB', '0->1' = 'INS', '1->0' = 'DEL', NA ) Data aggregation # pivot table table(data$Gender,data$Diagnosis) # apply function to values by factor aggregate(data$AGE~data$Diagnosis,FUN = mean) # tranpose matrix/dataframe t(data) # apply conditionwise function to data aggregate(d$rt, by=list(d$condition), FUN=mean,na.

Environment

Environment Info about envirnoment # .libPath() Keep R updated # in Windows install.packages("installr") library(installr) updateR()

Plotting

Data visualization ggplot simple plot commands # adding title to ggplot-object gg + ggtitle("My title") #customize labels of axes gg +xlab("Dose (mg)") gg + ylab("Teeth length") # divide legend in two columns gg + guides(fill=guide_legend(ncol=2)) Correlation matrix z <- cor(myData,use="pairwise.complete.obs") xTable = xtable(z) pdf('CON.pdf') ggplot(z.m, aes(X1, X2, fill = value)) + geom_tile() + scale_fill_gradient(low = "white", high = "black") dev.off() Boxplot with error bars dist=ggplot(distdataSum, aes(x=Region, y=Value, colour=Diagnosis, group=Diagnosis)) + scale_colour_manual(values=c("#CC0000","#FF9900","#0000CC"),name="Diagnostic group")+ geom_errorbar(aes(ymin=Value-se, ymax=Value+se, group=Diagnosis), colour="black", width=.

Statistics

Statistics Linear statistics # get summary stats summary (lm(d$rt~d$condition)) # multiple regression lm1 = lm(d$values~d$factor_1 * d$factor2) anova (lm1); # mixed effect model lmer(DV~ IV1 + IV2 + (1|RadnomEffect),data=subset(myData, !ID %in% bplot$out)) # Calculate z-scores zscores = (d$values - mean(d$values)) / sd($d$values) # Get probabilities pnorm(zscore) # set quantitative value to factor data$Diagnosis = factor (data$Diagnosis, levels = c('CON','SIBS','ADHD')) # filter outliers bplot = boxplot(data$dependent_measure~data$independent_measure) outliers = data$ID[data$dependent_measure %in% bplot$out ] Groupwise stats with dplyr library(dplyr) library(broom) lms = dt.

Utilities

Utilities Rscript Option parser # Add option parser library suppressPackageStartupMessages(library(optparse)) # Add options to script; option_list <- list( # opt$export is set to true in case parameter is provided (store_true), otherwise false make_option(c("--export"), action = "store_true", type = "logical", default = FALSE, help = "Export data"), make_option(c("--myParameter"), action = "store", type = "character", default = NULL, help = "Description of input parameter")) # Method call opt <- parse_args(OptionParser(option_list = option_list)) # make option mandatory if (is.

File in out

Important libraries connections {base} Read in # Read in table and fill empty values d = read.table(fName, sep ='\t', fill = TRUE, stringsAsFactors = FALSE) # Read in table with different seperators d = read.delim(fName, fill = TRUE, stringsAsFactors = FALSE) d = read.csv(fName, FALSE) # Read json files library(jsonlite) a <-fromJSON(fName) # Read excel file library(xlsx) wb <- loadWorkbook(file = WF_TEMPLATE_FILENAME) # scan is more flexible but needs to know at forehand the expected data structure d = scan(fName, what = list(character(),double(0),double()), skip =1) # Fast read d = fread(fName, sep ='\t', stringsAsFactors = FALSE) # process script file source('~/Ranalyses/PaperOneFinal/getData.