Everyday R code (2)

#filter

test<-c(1,4)

control<-c(2,5)

data2<-filter(data, type %in% c(test,control) & freq!=1)

#rank based on ID, for the same ID, choose one record of it

data$value<-seq(1,nrow(data)) #get a new column called value, 1,2,3,…

data<-data.table(data)

data<-data[,valRank:=rank(-value),by=c(“ID”,”SURVEY_ID”,”CODE”)]

data<-filter(data,valRank==1)

nrow(data)

data<-data.frame(data)

data2<-subset(data,select=-c(value,valRank)) #delete 2 columns

unique(data2$CODE)

a<-unique(data[,c(1,2,3)]) #selecting 3 columns

b<-data.frame(table(a$ID)) #count of each ID

head(b)

colnames(b)<-c(“ID”,”freq”) # rename columns

# merge, left outer join is the most commonly used one.

data2<-merge (data,b,by=” ID”, all.x=TRUE)

#Outer join: merge(x = df1, y = df2, by = “CustomerId”, all = TRUE)

#Left outer: merge(x = df1, y = df2, by = “CustomerId”, all.x = TRUE)

#Right outer: merge(x = df1, y = df2, by = “CustomerId”, all.y = TRUE)

#Cross join: merge(x = df1, y = df2, by = NULL)

#Removed rows with ID as na

data2<-data[!is.na(data$ID),]

## write 2 columns together into one

data$survey<-paste(data$NAME,data$CODE)