#--------------------------------------------------------
# To install packages from Bioconductor
#--------------------------------------------------------

#if (!requireNamespace("BiocManager", quietly = TRUE))
#  install.packages("BiocManager")
#BiocManager::install("RDAVIDWebService")
#BiocManager::install("org.Hs.eg.db")
#--------------------------------------------------------

# Load packages
library("RDAVIDWebService")
library("org.Hs.eg.db")

# Read dataset
DEG.list <- read.csv("FC721_DEG_FC_YvsO.csv", header=T, stringsAsFactors=FALSE, check.names=FALSE)

# Explore dataset
head(DEG.list)
str(DEG.list)
summary(DEG.list)

# When we explore the dataset we need to check:
# 1. Are the types of the columns make sense (i.e. are numeric columns numeric, etc. )
# 2. How many missing values we have in each column
# 3. Are the ranges of numeric columns make sense. 
#    For example body temperature can not be negative, p-values cannot exceed 1, etc.
# 4. Do we have appropriate column names. 

# Here it does not look like the first column has a name. 
# We can turn this column into "row names" for this dataset or we can give this column a name.
# Let's first check how many unique values are there
 length (unique(DEG.list[,1]) )
 # Looks like all names in this column are unique. We can turn them into row names:
 DEG.list <- read.csv("Session7_DEG_FC_YvsO.csv", 
                      header=T, 
                      row.names = 1,    # column index that contains row names
                      stringsAsFactors=FALSE, 
                      check.names=FALSE)
# Let's check our dataframe again 
str(DEG.list)
head(DEG.list)
 
 
# ------------------------------------------------------
#  Connect to DAVID Web Service
# ------------------------------------------------------
david <- DAVIDWebService$new(email='chz2009@bu.edu', 
                             url="https://david.ncifcrf.gov/webservice/services/DAVIDWebService.DAVIDWebServiceHttpSoap12Endpoint/")

# Check if we connected successfully
is.connected(david)

num = 20
keys <- row.names( DEG.list ) [1:num]


# org.Hs.eg.db - is an organism specific package, providing detailed information about species.
# To access the objects in this package we will use select() method from the package AnnotationDbi

egids <- AnnotationDbi::select(org.Hs.eg.db, keys=keys, 
                               columns=c("SYMBOL", "ENTREZID"), 
                               keytype="SYMBOL")

# What kind of object egids? What's inside egids object? 
class(egids)
str(egids)
head(egids)


result <- addList(david, egids[,2], idType = "ENTREZ_GENE_ID", listName = "Top DEGs", listType = "Gene")
# What kind of object is result? What's inside result object? 
class(result)
str(result)

setAnnotationCategories(david, c("GOTERM_BP_DIRECT", "KEGG_PATHWAY"))
results.DAVID <- getFunctionalAnnotationChart(david)

# let's check what we have in results.DAVID
class(results.DAVID)
str(results.DAVID)
head(results.DAVID)

# Save results.David in a file
getFunctionalAnnotationChartFile(david, "FuncAnnotChart.tsv")