Everyday R code (5)

#This is the most commonly used R code at work everyday.

#read data from data.txt file and data2.txt file

PP_C<-read.delim(“data1.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE )

CES<-read.delim(“data2.txt”,header=T,sep=’\t’,comment.char=””,quote=””,row.names=NULL, stringsAsFactors= FALSE )

#check how many rows there

nrow(PP_C)

nrow(CES)

#check how many unique COMM_ID there

length(unique(PP_C$COMM_ID))

length(unique(CES$COMM_ID))

#check column names for each file

colnames(PP_C)

colnames(CES)

#select specific columns to be used

D1<-subset(CES,select=c(COMM_ID,SITEID,SIC))

# check the rows on the top of it

head(D1)

# read data from .csv file

siteGroup<-read.csv(“data3.csv”,header=T,stringsAsFactors = FALSE)

head(siteGroup)

#left join siteGroup to D1 using SITEID which is unique here

D2<-merge(D1,siteGroup, by=”SITEID”,all.x=T)

#write a new column called IorG which would be an indicator for different Group

D2$IorG<-NA

D2$IorG[D2$Group %in% c(“India”, “Japan”,  “China”  )]<-“I”

D2$IorG[D2$Group %in% c(“US”,”CA”)]<-“G”

#count of IorG by Group in table format. For example, GroupA has 10 I & 20 G, Group B has 30 I & 50 G.

table(D2$Group,D2$IorG)

#usually it’s data frame and we need to transfer it to data.table format, then we can use the command

# for flagedtable

D2<-data.table(D2)

#this is very useful to get a table for pivot purpose with any filter change fast.

#data name is D2, it has columns with names ID,A,B,C,D, HASit,answered,IorG,Group,SITEID, and SIC.

#for each content combination of (HASit,answered,IorG,Group,SITEID,SIC) in by=list(),  we calculated how many ID does each group has, and the summary of A, B,C,D.

#using this data which is written to flagedtable, we can export it to excel and do pivot table in excel #based on filter requirement and so on easily.

flagedtable<-D2[,list(ContactCount=length(ID),Acount=sum(A),Bcount=sum(B),NUM=sum(C),DC=sum(D)),by=list(HASit,answered,IorG,Group,SITEID,SIC)]

nrow(flagedtable)

#write it into a csv file. When doing pivot table in csv, it can only save one sheet which is the current one. So usually we need to save it as excel workbook and become a file like XXX.xlsx.

write.csv(flagedtable,’Q1.csv’)

Leave a Comment