# ============================ # === R Graphics === # === Bob Muenchen === # === muenchen@utk.edu === # ============================ # === Graphics Books & Web Sites === # # Hadley Wickham's web page: # http://had.co.nz/ggplot2/ # # Discussion list devoted to ggplot2: # http://groups.google.com/group/ggplot2 # # Blog devoted to R http://R-Bloggers.com # # Graphics site http://addictedtor.free.fr/graphiques/ # # R Graphics, by Paul Murrell (Traditional) # # Lattice: Multivariate Data Visualization with R # by Deepayan Sarkar # # The Grammar of Graphics, by Leeland Wilkinson # # The ggplot2 Package, by Hadley Wickham # # R for SAS and SPSS Users, by Robert A. Muenchen # # R for Stata Users, by Muenchen & Joseph Hilbe # Install the ggplot2 package one time: install.packages("ggplot2") # Load it in each program that uses it: library("ggplot2") # Load data, zap missing values & attach setwd("c:/myRfolder") load("mydata100.RData") mydata100 <- na.omit(mydata100) attach(mydata100) head(mydata100) # ===TRADITIONAL OR BASE GRAPHICS=== # # R's first graphics package # # The only one that is "generic" # # Extremely flexible # # Not easy to use with groups # # Uses Traditional Graphics System plot(workshop) # Bar plot plot(workshop, gender) # Split bar plot plot(gender, workshop) # Split the other way plot(workshop, posttest) # Box plot plot(posttest, workshop) # Strip plot plot(posttest) # Index plot plot(pretest, posttest) # Scatter plot hist(posttest) # Histogram rug(posttest) # Add "rug" points to X axis # ---Adding Embelishments--- # # These work with traditional # graphics functions plot(pretest, posttest) plot(pretest, posttest, pch = 19, # Plot CHaracter cex = 2, # Character EXpansion main = "My Main Title", xlab = "My X Axis Label", ylab = "My Y Axis Label") grid() par() # Graphics PARameters # Generic means it will plot... methods(plot) # ---Plotting Groups--- # Uses MultiFrame parameter # 2 rows, 1 column par(mfrow = c(2, 1) ) plot( workshop[gender == "Female"], main = "The Females") plot( workshop[gender == "Male" ], main = "The Males" ) par(mfrow = c(1, 1) ) # What is wrong with the plot? # ---Plotting Scatter with Regression--- # Manual approach shows basic idea: plot(pretest, posttest) abline( c(18.78, 0.845) ) # y = 18.78 + 0.845x # The more R-like approach: plot(pretest, posttest) myModel <- lm(posttest ~ pretest, data = mydata100) names(myModel) myModel$coef abline( myModel$coef ) # ===Lattice Graphics=== # Also called trellis, panel, or "by" plots # # Good with groups # # Similar to SAS SGPLOT procedures # and Stata graphics # # Not as flexible as the ggplot2 package # # Uses Grid Graphics System library("lattice") xyplot(pretest ~ posttest | workshop, data = mydata100, type = c("p", "r") ) # p=points, r=regression # ===The ggplot2 Package=== # Follows Wilkinson's Grammar of Graphics # # Works with underlying graphics concepts, # not pre-defined graph types # # Enables you to create any data graphic # that you can conceive of (except mosaic) # # Used in R, Protovis, SPSS, Tableau # # Uses Grid Graphics System # ---The "Quick" Approach: quickplot() or qplot()--- # # Good: # It's quick and easy for simple plots # Its syntax imitates the plot function # Titles and labels done the same way # Can add to its plots with function below # # Bad: not very flexible (one dataset only) qplot(workshop) # Bar plot qplot(posttest) # Histogram qplot(workshop, gender) # Useless qplot(workshop, posttest) # Strip plot (vertical) qplot(posttest, workshop) # Strip plot (horizontal) qplot(pretest, posttest) # Scatter plot # (Recall hard work of traditional) # Bar plot of workshop by gender qplot(workshop, facets = gender ~ .) # Scatter plot of pretest and posttest qplot(pretest, posttest) # Again, with point & line "geom" qplot(pretest, posttest, geom = c("point", "smooth"), method = "lm") # ===The Grammar of Graphics Approach: ggplot()=== # ---The Six Parts of a Graph--- # # Aesthetics: the impact of each variable # # Geoms: geometric objects # # Statistics: regression line... # # Scales: legend # # Coordinate System: Cartesian vs. polar # # Facets: plot by group(s) # Histogram ggplot(mydata100, aes(posttest) ) + geom_histogram() # ---Bar Plots--- # Simple Bar ggplot(mydata100, aes(workshop) ) + geom_bar() # Bar plot of workshop with genders stacked ggplot(mydata100, aes(workshop, fill=gender) ) + geom_bar(position = "stack") # Again, in grey scale ggplot(mydata100, aes(workshop, fill = gender) ) + geom_bar(position = "stack") + scale_fill_grey() # Bar plot of workshop and gender # "dodging" each other ggplot(mydata100, aes(workshop, fill=gender) ) + geom_bar(position = "dodge") # Bar plot of workshop faceted by gender ggplot(mydata100, aes(workshop) ) + geom_bar() + facet_grid(gender ~ .) # Box plot of posttest by workshop ggplot(mydata100, aes(workshop, posttest) ) + geom_boxplot() + geom_point() # or geom_jitter() # As above, with facets for gender ggplot(mydata100, aes(workshop, posttest) ) + geom_boxplot() + geom_point() + facet_grid(. ~ gender) # ---Scatter Plots--- # Simple scatterplot ggplot(mydata100, aes(pretest, posttest ) ) + geom_point( ) # Set point shape by gender ggplot(mydata100, aes(pretest, posttest, shape = gender ) ) + geom_point( ) # Add regression lines by gender ggplot(mydata100, aes(pretest, posttest, shape = gender, linetype = gender ) ) + geom_point( ) + geom_smooth(method = "lm") # Repeat plot by workshop (rows) gender (columns) ggplot(mydata100, aes(pretest, posttest, shape = gender ) ) + geom_point( ) + geom_smooth( method="lm" ) + facet_grid( workshop ~ gender) # Same thing specifying all defaults # Now you see ggplot's full power ggplot() + layer( data = mydata100, mapping = aes(x = pretest, y = posttest), geom = "point", stat = "identity" ) + layer( data = mydata100, mapping = aes(x = pretest, y = posttest), geom = "smooth", stat = "smooth", method = "lm" ) + facet_grid( workshop~gender )+ coord_cartesian() # From R for SAS and SPSS Users by Robert A. Muenchen: # (Based on a version by Hadley Wickham) # ---------------------------------------------------------------- # Strengths & Weaknesses of R's Graphics Packages # ---------------------------------------------------------------- # Traditional lattice ggplot2 # (or base) # ---------------------------------------------------------------- # Automatic output for # different objects Yes No No # Automatic legends No Sometimes Yes # Easily repeats plots # for different groups No Yes Yes # Easy to use with multiple # data sources Yes No Yes # Allows you to build # plots piece by piece Yes No Yes # Allows you to replace # pieces after creation No No Yes # Consistent functions No No Yes # Attractiveness of default # settings Good Good Excellent # Can do mosaic plots Yes Yes No # Control extends beyond # data graphics Yes No No # Underlying graphics system Traditional Grid Grid # ---------------------------------------------------------------- # # # ================================================================ # ========= GGplot2 Options ============== # ========= From Elegant Graphics for Data Analysis ============== # ========== by Hadley Wickham ============== # ================================================================ # ---------------------------------------------------------------- # GEOMS AVAILABLE IN ggplot2 # ---------------------------------------------------------------- # Name Description # ---------------------------------------------------------------- # abline Line, speci ed by slope and intercept # area Area plots # bar Bars, rectangles with bases on y-axis # blank Blank, draws nothing # boxplot Box-and-whisker plot # contour Display contours of a 3d surface in 2d # crossbar Hollow bar with middle indicated by horizontal line # density Display a smooth density estimate # density_2d Contours from a 2d density estimate # errorbar Error bars # histogram Histogram # hline Line, horizontal # interval Base for all interval (range) geoms # jitter Points, jittered to reduce overplotting # line Connect observations, in order of x value # linerange An interval represented by a vertical line # path Connect observations, in original order # point Points, as for a scatterplot # pointrange An interval represented by a vertical line, # with a point in the middle # polygon Polygon, a lled path # quantile Add quantile lines from a quantile regression # ribbon Ribbons, y range with continuous x values # rug Marginal rug plots # segment Single line segments # smooth Add a smoothed condition mean # step Connect observations by stairs # text Textual annotations # tile Tile plot as densely as possible, assuming that # every tile is the same size # vline Line, vertical # ----------------------------------------------------------- # # # # ----------------------------------------------------------- # DEFAULT STATISTICS AND AESTHETICS # ----------------------------------------------------------- # Name Default Aesthetics # stat # ----------------------------------------------------------- # abline abline colour, linetype, size # area identity colour, ll, linetype, size, x, y # bar bin colour, ll, linetype, size, weight, x # bin2d bin2d colour, ll, linetype, size, weight, # xmax, xmin, ymax, ymin # blank identity # boxplot boxplot colour, ll, lower, middle, size, upper, # weight, x, ymax, ymin # contour contour colour, linetype, size, weight, x, y # crossbar identity colour, ll, linetype, size, x, y, ymax, ymin # density density colour, ll, linetype, size, weight, x, y # density2d density2d colour, linetype, size, weight, x, y # errorbar identity colour, linetype, size, width, x, ymax, ymin # freqpoly bin colour, linetype, size # hex binhex colour, ll, size, x, y # histogram bin colour, ll, linetype, size, weight, x # hline hline colour, linetype, size # jitter identity colour, ll, shape, size, x, y # line identity colour, linetype, size, x, y # linerange identity colour, linetype, size, x, ymax, ymin # path identity colour, linetype, size, x, y # point identity colour, ll, shape, size, x, y # pointrange identity colour, ll, linetype, shape, size, x, y, ymax, ymin # polygon identity colour, ll, linetype, size, x, y # quantile quantile colour, linetype, size, weight, x, y # rect identity colour, ll, linetype, size, xmax, xmin, ymax, ymin # ribbon identity colour, ll, linetype, size, x, ymax, ymin # rug identity colour, linetype, size # segment identity colour, linetype, size, x, xend, y, yend # smooth smooth alpha, colour, ll, linetype, size, weight, x, y # step identity colour, linetype, size, x, y # text identity angle, colour, hjust, label, size, vjust, x, y # tile identity colour, ll, linetype, size, x, y # vline vline colour, linetype, size # ------------------------------------------------------------ # # # # ------------------------------------------------------------ # STATS IN ggplot2 # ------------------------------------------------------------ # bin Bin data # boxplot Calculate components of box-and-whisker plot # contour Contours of 3d data # density Density estimation, 1d # density_2d Density estimation, 2d # function Superimpose a function # identity Don't transform data # qq Calculation for quantile-quantile plot # quantile Continuous quantiles # smooth Add a smoother # spoke Convert angle and radius to xend and yend # step Create stair steps # sum Sum unique values. Useful for overplotting # on scatter-plots # summary Summarise y values at # ------------------------------------------------------------ #