#--------------------------------------------------- # # # Graphics (ggplot2) # #--------------------------------------------------- # # There are 3 main environments in R to create graphics: # R base utilities # lattice package # ggplot2 package # There is also powerful "grid" package and a couple in-development interactive packages #In this tutorial we will concentrate on a popular ggplot2 library ##Useful Online resources: # The R Graph Gallery: https://www.r-graph-gallery.com/ # ggplot Quick reference: http://r-statistics.co/ggplot2-cheatsheet.html # Data Visualization with ggplot2: http://r4ds.had.co.nz/data-visualisation.html # Graphics for Communication: http://r4ds.had.co.nz/graphics-for-communication.html # A ggplot2 tutorial with examples: http://r-statistics.co/ #load package library(ggplot2) ### Epilepsy attacks dataset # Data from a clinical trial of 59 patients with epilepsy (Breslow, 1996). # Thall and Vail reported data from a clinical trial of 59 patients with epilepsy, 31 of whom were # randomized to receive the anti-epilepsy drug Progabide and 28 of whom received a placebo. # Baseline data consisted of the patient's age and the number of epileptic seizures recorded during 8 week # period prior to randomization. # The response consisted of counts of seizures occuring during the four consecutive # follow-up periods of two weeks each. #* ID - Patient identification number #* Y1 - Number of epilepsy attacks patients have during the first follow-up period #* Y2 - Number of epilepsy attacks patients have during the second follow-up period #* Y3 - Number of epilepsy attacks patients have during the third follow-up period #* Y4 - Number of epilepsy attacks patients have during the forth follow-up period #* Base - Number of epileptic attacks recorded during 8 week period prior to randomization #* Age - Age of the patients #* Trt - a factor with levels placebo progabide indicating whether the anti-epilepsy drug Progabide has been applied or not #* Ysum - Total number of epilepsy attacks patients have during the four follow-up periods #* Age10 - Age of the patients devided by 10 #* Base4 - Variable Base devided by 4 # Read data dt <- read.csv("http://rcs.bu.edu/classes/FC764/epilepsy.csv") # Explore the data str(dt) head(dt) #--------------------------------------------------- # # ggplot() function #--------------------------------------------------- # #ggplot package works with dataframes. Building the plot is done by adding more layers to the existing plot. #The variables (column names from the dataframe) that are used in the plot should be passed through aes() function (aesthetic): ggplot(dt, aes(x=Base, y=Ysum)) #Once the plot is initialized we can start adding "geom" layers to it. #To make a simple scatterplot we use geom_point() ggplot(dt, aes(x=Base, y=Ysum)) + geom_point() #We can add a prediction line through the points using geom_smooth(). By default it adds confidense bands to the graph: ggplot(dt, aes(x=Base, y=Ysum)) + geom_point() + geom_smooth() #If we want to draw a linear model regression line, we can specify *lm* method: ggplot(dt, aes(x=Base, y=Ysum)) + geom_point() + geom_smooth(method="lm") #To find all various options to geom_smooth see help for this function ?geom_smooth #The graph can be stored in R object and additional layers can be added to it later: g <- ggplot(dt, aes(x=Base, y=Ysum)) g1 <- g + geom_point() g2 <- g1 + geom_smooth(method="lm") g2 #--------------------------------------------------- # ### Specifying the title and axis labels #--------------------------------------------------- # # Let's add a title and labels to our existing plot: g2 + ggtitle("Epilepsy attacks", subtitle="based on the data from Breslow, 1996") + xlab("Number of attacks prior to randomization") + ylab("Number of attacks after randomization") #Similar result can be achived using a single function: g2 + labs(title = "Epilepsy attacks", caption="Breslow, 1996", x="Number of attacks prior to randomization", y="Number of attacks after randomization") #--------------------------------------------------- # # Color and Size of the points #--------------------------------------------------- # #The color and size of the elements in the plot are controlled through the aes() function of the related geom : g <- ggplot(dt, aes(x=Base, y=Ysum)) g + geom_point(col = "blue", size=3) + geom_smooth(method="lm", col="brown") #Change color based on the value of categorical variable use aes() function: g <- ggplot(dt, aes(x=Base, y=Ysum)) g + geom_point(aes(col=Trt), size=3) + geom_smooth(method="lm", col="firebrick") #We can move position of the legend with the theme() function: g <- ggplot(dt, aes(x=Base, y=Ysum)) g + geom_point(aes(col=Trt), size=3) + geom_smooth(method="lm", col="firebrick")+ theme( legend.position="bottom")+ labs(color="Treatment") # change legend title #Set legned.position to *None* if you want to remove legend completely #--------------------------------------------------- # # ggplot2 themes #--------------------------------------------------- # #There are a few themes you can use: # * theme_gray # * theme_bw # * theme_light # * theme_dark # * theme_minimal # * theme_classic # * theme_linedraw # * theme_void #Final scatterplot: g <- ggplot(dt, aes(x=Base, y=Ysum)) + geom_point(aes(col=Trt), size=5) + geom_smooth(method="lm", col="firebrick")+ theme( legend.position="bottom")+ ggtitle("Epilepsy attacks", subtitle="based on the data from Breslow, 1996") + xlab("Number of attacks prior to randomization") + ylab("Number of attacks after randomization")+ labs(color="Treatment") g #--------------------------------------------------- # # Barplot #--------------------------------------------------- # ggplot(dt, aes(x = Trt)) + geom_bar(stat="count") # Add title, colors, theme g<- ggplot(dt, aes(x = Trt)) + geom_bar(stat="count", fill="steelblue4") + xlab("Treatment") + ylab("count")+ ggtitle("Epilepsy observations") + theme_bw()+ theme(plot.title=element_text(size=14,face="bold" )) # Add text (ot top of each bar) g + geom_text( stat='count', aes(label=..count..), size=5, vjust=-1) #--------------------------------------------------- # # Boxplot #--------------------------------------------------- # g<- ggplot(dt, aes(x=Trt, y=Ysum)) + geom_boxplot() g #### Exercise: # Change X and Y labels and add a title to the plot # Add small amount of random variation to the locaion of each observation point g + geom_jitter(width=0.2) # Let's change the size of the title and make it bold: g + geom_jitter(width=0.2)+ ggtitle("Number of attacks vs. Treatment")+ theme ( plot.title=element_text(size=14,face="bold" ) ) #--------------------------------------------------- # # Histogram #--------------------------------------------------- # #default histogram ggplot(dt, aes(x=Age)) + geom_histogram() #By default the number of bins is not necessarily optimal. Let's try some other number of bins ggplot(dt, aes(x=Age)) + geom_histogram(binwidth=5) #Let's improve the appearance: g<- ggplot(dt, aes(x=Age)) + geom_histogram(binwidth=5, col="black", fill="white") g #We can add density curve to the plot: g<-ggplot(dt, aes(x=Age)) + geom_histogram(binwidth=5, col="black", fill="white", aes(y=..density..)) + geom_density( alpha=.2, fill="darkorange") g