#filter
test<-c(1,4)
control<-c(2,5)
data2<-filter(data, type %in% c(test,control) & freq!=1)
#rank based on ID, for the same ID, choose one record of it
data$value<-seq(1,nrow(data)) #get a new column called value, 1,2,3,…
data<-data.table(data)
data<-data[,valRank:=rank(-value),by=c(“ID”,”SURVEY_ID”,”CODE”)]
data<-filter(data,valRank==1)
nrow(data)
data<-data.frame(data)
data2<-subset(data,select=-c(value,valRank)) #delete 2 columns
unique(data2$CODE)
a<-unique(data[,c(1,2,3)]) #selecting 3 columns
b<-data.frame(table(a$ID)) #count of each ID
head(b)
colnames(b)<-c(“ID”,”freq”) # rename columns
# merge, left outer join is the most commonly used one.
data2<-merge (data,b,by=” ID”, all.x=TRUE)
#Outer join: merge(x = df1, y = df2, by = “CustomerId”, all = TRUE)
#Left outer: merge(x = df1, y = df2, by = “CustomerId”, all.x = TRUE)
#Right outer: merge(x = df1, y = df2, by = “CustomerId”, all.y = TRUE)
#Cross join: merge(x = df1, y = df2, by = NULL)
#Removed rows with ID as na
data2<-data[!is.na(data$ID),]
## write 2 columns together into one
data$survey<-paste(data$NAME,data$CODE)
Very helpful. Can you post the file (skill.csv)? Thanks.