#This is the most commonly used R code at work everyday.
#read data from data.txt file and data2.txt file
PP_C<-read.delim(“data1.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE )
CES<-read.delim(“data2.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE )
#check how many rows there
nrow(PP_C)
nrow(CES)
#check how many unique COMM_ID there
length(unique(PP_C$COMM_ID))
length(unique(CES$COMM_ID))
#check column names for each file
colnames(PP_C)
colnames(CES)
#select specific columns to be used
D1<-subset(CES,select=c(COMM_ID,SITEID,SIC))
# check the rows on the top of it
head(D1)
# read data from .csv file
siteGroup<-read.csv(“data3.csv”,header=T,stringsAsFactors = FALSE)
head(siteGroup)
#left join siteGroup to D1 using SITEID which is unique here
D2<-merge(D1,siteGroup, by=”SITEID”,all.x=T)
#write a new column called IorG which would be an indicator for different Group
D2$IorG<-NA
D2$IorG[D2$Group %in% c(“India”, “Japan”, “China” )]<-“I”
D2$IorG[D2$Group %in% c(“US”,”CA”)]<-“G”
#count of IorG by Group in table format. For example, GroupA has 10 I & 20 G, Group B has 30 I & 50 G.
table(D2$Group,D2$IorG)
#usually it’s data frame and we need to transfer it to data.table format, then we can use the command
# for flagedtable
D2<-data.table(D2)
#this is very useful to get a table for pivot purpose with any filter change fast.
#data name is D2, it has columns with names ID,A,B,C,D, HASit,answered,IorG,Group,SITEID, and SIC.
#for each content combination of (HASit,answered,IorG,Group,SITEID,SIC) in by=list(), we calculated how many ID does each group has, and the summary of A, B,C,D.
#using this data which is written to flagedtable, we can export it to excel and do pivot table in excel #based on filter requirement and so on easily.
flagedtable<-D2[,list(ContactCount=length(ID),Acount=sum(A),Bcount=sum(B),NUM=sum(C),DC=sum(D)),by=list(HASit,answered,IorG,Group,SITEID,SIC)]
nrow(flagedtable)
#write it into a csv file. When doing pivot table in csv, it can only save one sheet which is the current one. So usually we need to save it as excel workbook and become a file like XXX.xlsx.
write.csv(flagedtable,’Q1.csv’)