Importing Data
#call commonly used libraries
library(RSQLite)
library(plyr)
library(dplyr)
library(data.table)
library(reshape)
#Importing Data
#Importing .txt file
data<-read.delim(“skill.txt”,header=T,sep=’\t’,comment.char=””,quote = “”,
row.names = NULL, stringsAsFactors = FALSE)
#Importing .csv file
data<-read.csv(“skill.csv”,header=T,stringsAsFactors = FALSE)
#Importing .Rdata
# The names depend on the original data name in the .Rdata file.
load(“skill.Rdata”)
# If previously you saved the file using save(thedata, file = “skill.RData”), when running load(“skill.Rdata”), you’ll get thedata. You can use colnames(thedata), nrow(thedata), and so on to check ‘thedata’ and use it.
# If previously you used save.image(file=”skill.Rdata”), you saved everything in the working space. When running load(“skill.Rdata”), you’ll get whatever in your working space when you saved it, and it might take long time to load everything.
# After the data is imported into ‘data’, we used the following code for checking purpose
nrow(data) #
colnames(data)
head(data)
tail(data)
length(unique(data$categoricalColumn))
table(data$categoricalColumn)
sum(is.na(data$ID)==T)/length(data$ID) #to get % of missing ID
table(data$SURVEYname,data$TYPE)
View(data) ## It is ‘View’ not ‘view’
summary(Data)
View(head(data,10))
sum(is.na(data$HAS_it))
#Replace Blanks with NAs
data[data==””]<-NA
#percentage of contacts with type=1
o<-(unique(cesab2[,c(“COMM_ID”,”type”)]))
nrow(unique(filter(o,type==1)))/nrow(o)
#no. of contacts with type in (3, 5, 10, 11)
o<-filter(l, type %in% c(3, 5, 10, 11) & CODE==’A’)
length(unique(o$ID))
# % contacts that HAS_it=Y
o<-unique(data[data$type==1,c(1,9,11)]) #unique contacts
nrow(filter(o,HAS_it==”Y”))/nrow(o)
1-nrow(o[is.na(o$COMM_ID),])/nrow(o)
table(data$HAS_it)
nrow(data)