Everyday R code (8) logistic regression

# we have a data set with all numeric variables (some of the columns only have 0 or 1). And we want to run a logistic regression model. First we checked correlation and exclude the variables which has high correlation (remove one and keep the other). COR<-cor(Answered_data2) data<-subset(data,select=-c(A,C,D,AA,CC)) #we need to run correlation multiple times … Read more

Everyday R code (7)

#If some code does not work, we need to figure out if it is because some library is not loaded. Like filter, we need dplyr library to be loaded first. library(dplyr) T<-filter(P_HMD_TEST_PRED,Tabc==1) # we usually using conditions like this A<-filter(data, EE==1 & EE==2), how about we do not want the logic of and, we want … Read more

Everyday R code (6)

# R is simple to do data analyst work. If you want to write complicated loop and agrithem, that would be another story. Here is the code using R in a simple way.   #sometimes we need to install new packages, our R studio is running on the server. So it has a path for … Read more

Everyday R code (5)

#This is the most commonly used R code at work everyday. #read data from data.txt file and data2.txt file PP_C<-read.delim(“data1.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE ) CES<-read.delim(“data2.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE ) #check how many rows there nrow(PP_C) nrow(CES) #check how many unique COMM_ID there length(unique(PP_C$COMM_ID)) length(unique(CES$COMM_ID)) #check column names for each file colnames(PP_C) colnames(CES) #select specific columns to be … Read more

Everyday R code (4)

#otherwise, we usually write it into a flag column this way, before we pivot it into excel and using excel pivot table there to make tons filter changes request from the manager. data$Int<-NA data$Int[data$Site %in% c(“a”, “b”,  “c”  )]<-“Int” data$Int[data$Site  %in% c(“C”,”G”, “O”,”E”,”I”)]<-“notInt” #pivot tableSummary<-data[,list(ContactCount=length(ID)),by=list(Int,Group,SKILL,RESPONSE,HAS_POLL, TYPE,survey_question_code)] write.csv(tableSummary,’summaryTable.csv’) # Make sure you included all the possible filters, then … Read more

Everyday R code (3)

  #excluding some rows, need to write ‘!’ in front of everything, because the expression df1$id %in% idNums1 produces a logical vector. To negate it, you need to negate the whole vector like this !(df1$id %in% idNums1) test<-filter(cesab7, !( KILL_ID %in% c(3,6,9,10))) #including some rows and filter others out test<-filter(cesab7, KILL_ID %in% c(3,6,9,10))   ## … Read more

Everyday R code (2)

#filter test<-c(1,4) control<-c(2,5) data2<-filter(data, type %in% c(test,control) & freq!=1) #rank based on ID, for the same ID, choose one record of it data$value<-seq(1,nrow(data)) #get a new column called value, 1,2,3,… data<-data.table(data) data<-data[,valRank:=rank(-value),by=c(“ID”,”SURVEY_ID”,”CODE”)] data<-filter(data,valRank==1) nrow(data) data<-data.frame(data) data2<-subset(data,select=-c(value,valRank)) #delete 2 columns unique(data2$CODE) a<-unique(data[,c(1,2,3)]) #selecting 3 columns b<-data.frame(table(a$ID))   #count of each ID head(b) colnames(b)<-c(“ID”,”freq”) # rename columns # … Read more

Everyday R code (1)

Importing Data #call commonly used libraries library(RSQLite) library(plyr) library(dplyr) library(data.table) library(reshape) #Importing Data #Importing .txt file data<-read.delim(“skill.txt”,header=T,sep=’\t’,comment.char=””,quote = “”, row.names = NULL, stringsAsFactors = FALSE) #Importing .csv file data<-read.csv(“skill.csv”,header=T,stringsAsFactors = FALSE) #Importing .Rdata # The names depend on the original data name in the .Rdata file. load(“skill.Rdata”) # If previously you saved the file using … Read more