Basic data operations

  • Basic data operations
    • Indexing and subsetting
    • Data aggregation
  • Database operations

Basic data operations

Indexing and subsetting

# Filter dataframe on value
subset(data, Diagnosis != 'SIBS')

# Get selection of data with matching columns
bigData[! bigData$compareVal %in% smallData$compare,]

# filter on complete data sets
dt = dt[!is.na(colx),]

Control structures

# Switch
mutationTypeTmp = switch(changeInBaseLength,
                           '1->1' = 'SUB',
                           '0->1' = 'INS',
                           '1->0' = 'DEL',
                           NA
  )

Data aggregation

# pivot table
table(data$Gender,data$Diagnosis)

# apply function to values by factor
aggregate(data$AGE~data$Diagnosis,FUN = mean)

# tranpose matrix/dataframe
t(data)

# apply conditionwise function to data
aggregate(d$rt, by=list(d$condition), FUN=mean,na.rm=TRUE)


# create new factor of equally sized bins
library(Hmisc)
d$bin = as.numeric(cut2(data$numVal, g=6))  

# make long dataframe wide by adding levels of column B  as rows to column A. Extra columns can be added to keep important row info
dcast(myData, extraInfoCol + colA ~ colB  ,value.var= 'columnNameCommonValue')

# make wide dataframe long by keeping only identity columns A en B
melt(dWide, id =c('colA', 'ColB'))

Database operations

# Fetch Database data
query = 'Select * FROM MyTable'

data = dbGetQuery(mysql,query)