- Basic data operations
- Indexing and subsetting
- Data aggregation
- Database operations
Basic data operations
Indexing and subsetting
# Filter dataframe on value
subset(data, Diagnosis != 'SIBS')
# Get selection of data with matching columns
bigData[! bigData$compareVal %in% smallData$compare,]
# filter on complete data sets
dt = dt[!is.na(colx),]
Control structures
# Switch
mutationTypeTmp = switch(changeInBaseLength,
'1->1' = 'SUB',
'0->1' = 'INS',
'1->0' = 'DEL',
NA
)
Data aggregation
# pivot table
table(data$Gender,data$Diagnosis)
# apply function to values by factor
aggregate(data$AGE~data$Diagnosis,FUN = mean)
# tranpose matrix/dataframe
t(data)
# apply conditionwise function to data
aggregate(d$rt, by=list(d$condition), FUN=mean,na.rm=TRUE)
# create new factor of equally sized bins
library(Hmisc)
d$bin = as.numeric(cut2(data$numVal, g=6))
# make long dataframe wide by adding levels of column B as rows to column A. Extra columns can be added to keep important row info
dcast(myData, extraInfoCol + colA ~ colB ,value.var= 'columnNameCommonValue')
# make wide dataframe long by keeping only identity columns A en B
melt(dWide, id =c('colA', 'ColB'))
Database operations
# Fetch Database data
query = 'Select * FROM MyTable'
data = dbGetQuery(mysql,query)