# install.packages('ggplot2')
library(ggplot2)
# install.packages('nycflights13')
# install.packages('dplyr')
library(nycflights13)
library(dplyr)
head(flights)
## Source: local data frame [6 x 16]
##
## year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## (int) (int) (int) (int) (dbl) (int) (dbl) (chr) (chr)
## 1 2013 1 1 517 2 830 11 UA N14228
## 2 2013 1 1 533 4 850 20 UA N24211
## 3 2013 1 1 542 2 923 33 AA N619AA
## 4 2013 1 1 544 -1 1004 -18 B6 N804JB
## 5 2013 1 1 554 -6 812 -25 DL N668DN
## 6 2013 1 1 554 -4 740 12 UA N39463
## Variables not shown: flight (int), origin (chr), dest (chr), air_time
## (dbl), distance (dbl), hour (dbl), minute (dbl)
plot <- ggplot(flights, aes(dep_delay, arr_delay))
plot
plot + geom_point()
plot + geom_point(colour='blue', size=5, shape=25, alpha=.01)
plot + geom_point(aes(colour=origin))
plot + geom_point() +
geom_smooth(aes(colour=factor(origin)), method='lm')
Create a line chart with points as well. Place distance on the x axis and air time on the y axis. Use the origin as a factor for color for the lines and shape of the points. Make the points have alpha of .1 and be purple in color. Make sure you use a linear model as your method.
ggplot(flights, aes(distance, air_time)) +
geom_point(aes(shape=origin), alpha=.1, colour='purple') +
geom_smooth(aes(colour=origin), method='lm')
plot2 <- ggplot(flights, aes(distance, air_time))
plot2 + geom_boxplot(outlier.colour='red', outlier.shape=25)
plot <- ggplot(flights, aes(carrier))
plot + geom_bar(aes(weight=arr_delay, fill=origin), position='dodge')
plot <- ggplot(flights, aes(arr_delay))
plot + geom_histogram(aes(fill=origin, colour=origin))
plot + geom_histogram(binwidth = 0.5)
plot + geom_histogram(bins = 100)
plot + geom_histogram(aes(y=..density..))
plot + geom_density()
plot +
geom_density(aes(colour=origin, fill=origin), position='stack') +
xlim(0, 50)
Create a histogram of departure delay. Use 100 bins. Add a density chart of the same data. Make the line and fill color red, with an alpha of .1. Zoom in so the x axis only goes from 0 to 200.
ggplot(flights, aes(dep_delay)) +
geom_histogram(aes(y=..density..), bins=100) +
geom_density(colour='red', fill='red', alpha=.1) +
xlim(c(0, 200))
# install.packages('maps')
# install.packages('choroplethr')
library(maps)
library(choroplethr)
data(df_state_demographics)
states <- map_data('state')
map_data <- merge(df_state_demographics, states, by='region')
head(map_data)
## region total_population percent_white percent_black percent_asian
## 1 alabama 4799277 67 26 1
## 2 alabama 4799277 67 26 1
## 3 alabama 4799277 67 26 1
## 4 alabama 4799277 67 26 1
## 5 alabama 4799277 67 26 1
## 6 alabama 4799277 67 26 1
## percent_hispanic per_capita_income median_rent median_age long
## 1 4 23680 501 38.1 -87.46201
## 2 4 23680 501 38.1 -87.48493
## 3 4 23680 501 38.1 -87.52503
## 4 4 23680 501 38.1 -87.53076
## 5 4 23680 501 38.1 -87.57087
## 6 4 23680 501 38.1 -87.58806
## lat group order subregion
## 1 30.38968 1 1 <NA>
## 2 30.37249 1 2 <NA>
## 3 30.37249 1 3 <NA>
## 4 30.33239 1 4 <NA>
## 5 30.32665 1 5 <NA>
## 6 30.32665 1 6 <NA>
plot <- ggplot(map_data, aes(long, lat))
plot + geom_polygon(aes(group=group, fill=median_rent))
Create a map of the US where the color is median age.
data(df_state_demographics)
states <- map_data('state')
map_data <- merge(df_state_demographics, states, by='region')
ggplot(map_data, aes(long, lat)) +
geom_polygon(aes(group=group, fill=median_age))
labels <- map_data %>% group_by(region) %>%
summarise(lat=mean(unique(lat)), long=mean(unique(long)))
head(labels)
## Source: local data frame [6 x 3]
##
## region lat long
## (chr) (dbl) (dbl)
## 1 alabama 31.81524 -86.89066
## 2 arizona 34.11208 -113.09847
## 3 arkansas 34.56208 -91.45330
## 4 california 36.71384 -120.49842
## 5 colorado 39.34763 -105.37428
## 6 connecticut 41.36615 -72.75057
new_plot <- plot +
# add outlines
geom_polygon(aes(group=group, fill=median_rent),
colour='black', size = .2, alpha = 0.5) +
# add new color information
scale_fill_gradient2(midpoint = median(map_data$median_rent),
limits=c(min(map_data$median_rent),
max(map_data$median_rent)),
low='red', mid='white', high='blue') +
theme(axis.line = element_blank(), axis.text.x = element_blank(),
axis.text.y = element_blank(), axis.ticks = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_blank()) +
annotate("text", x=labels$long, y=labels$lat, label=labels$region,
color = "midnightblue", size = 3)
new_plot
# install.packages('plotly')
library(plotly)
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
set.seed(2)
d <- diamonds[sample(nrow(diamonds), 1000), ]
m = list(l = 50, r = 50, b = 100, t = 100, pad = 4)
plot_ly(d, x = carat, y = price, text = paste('Clarity: ', clarity),
mode = 'markers', color = cut, size = depth) %>%
layout(autosize = F, width = 1000, height = 500, margin = m)
Create a scatter plot with a sample of 1000 data points from diamonds. Use price as your y axis and depth as your x axis. In the text displayed on hover, have it print ‘Cut:’ and then the cut of the diamond. Make the color be the size be the carat and the color be the color of the diamond.
plot_ly(d, x = depth, y = price, text = paste('Cut: ', cut),
mode = 'markers', color = color, size = carat) %>%
layout(autosize = F, width = 1000, height = 500, margin = m)
plot_ly(d, x = price, color = cut, type = "box") %>%
layout(autosize = F, width = 1000, height = 500, margin = m)
plot <- ggplot(d, aes(carat, price)) +
geom_point(aes(colour=factor(cut))) +
geom_smooth() +
facet_wrap(~ cut)
ggplotly(plot) %>%
layout(autosize = F, width = 1000, height = 400, margin = m)
ggplotly(new_plot) %>%
layout(autosize = F, width = 1000, height = 500, margin = m)
Remember the histogram and density plot you made earlier? Create a histogram of departure delay. Use 100 bins. Add a density chart of the same data. Make the line and fill color red, with an alpha of .1. Zoom in so the x axis only goes from 0 to 200. Make it with ggplotly!
p <- ggplot(flights, aes(dep_delay)) +
geom_histogram(aes(y=..density..), bins=100) +
geom_density(colour='red', fill='red', alpha=.1) +
xlim(c(0, 200))
ggplotly(p) %>%
layout(autosize = F, width = 1000, height = 400, margin = m)
map_data2 <- map_data %>% group_by(region) %>%
summarise(total_population=mean(total_population),
median_age=mean(median_age), median_rent=mean(median_rent))
states <- data.frame('abb'=state.abb, 'name'=state.name,
'region'=tolower(state.name))
map_data2 <- map_data2 %>% left_join(states, by='region')
map_data2$hover <- with(map_data2, paste(name, '<br>', 'Population',
total_population, '<br>',
'Median Age', median_age))
l <- list(color = toRGB("white"), width = 2)
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE
)
plot_ly(map_data2, z = median_rent, text = hover, locations = abb,
type = 'choropleth', locationmode = 'USA-states',
color = median_rent, marker = list(line = l),
colorbar = list(title = 'USD')) %>%
layout(geo=g, autosize = F, width = 1000, height = 500, margin = m)
# install.packages('shiny')
library(shiny)
# install.packages('lubridate')
library(lubridate)
eco <- economics # from ggplot2
eco$year <- year(eco$date)
eco$month <- month(eco$date)
eco$unemploy_pct <- round((eco$unemploy / eco$pop)*100, 2)
head(eco)
## # A tibble: 6 x 9
## date pce pop psavert uempmed unemploy year month
## <date> <dbl> <int> <dbl> <dbl> <int> <dbl> <dbl>
## 1 1967-07-01 507.4 198712 12.5 4.5 2944 1967 7
## 2 1967-08-01 510.5 198911 12.5 4.7 2945 1967 8
## 3 1967-09-01 516.3 199113 11.7 4.6 2958 1967 9
## 4 1967-10-01 512.9 199311 12.5 4.9 3143 1967 10
## 5 1967-11-01 518.1 199498 12.5 4.7 3066 1967 11
## 6 1967-12-01 525.8 199657 12.1 4.8 3018 1967 12
## # ... with 1 more variables: unemploy_pct <dbl>
library(shiny)
shinyUI(fluidPage(
titlePanel('Shiny Example'),
sidebarLayout(
sidebarPanel(
sliderInput('year',
'Year:',
min = 1967,
max = 2015,
value = 1967,
animate = TRUE,
sep='')
),
mainPanel(
plotOutput('economics_plot'),
plotOutput('economics_plot_full')
)
)
))
library(shiny)
library(ggplot2)
eco <- economics # from ggplot2
eco$year <- year(eco$date)
eco$month <- month(eco$date)
eco$unemploy_pct <- round((eco$unemploy / eco$pop)*100, 2)
shinyServer(function(input, output) {
get_data <- reactive({
eco %>% filter(year==input$year)
})
output$economics_plot <- renderPlot({
data <- get_data()
year <- seq(min(eco$year), max(eco$year), length.out = input$year)
ggplot(data, aes(pce, unemploy_pct, size=uempmed)) +
geom_point() +
xlab('Personal Consumption Expenditures (Billions $)') +
ylab('Unemployment Percent') +
ggtitle('GGPlot Economics') +
theme_bw()
})
output$economics_plot_full <- renderPlot({
ggplot(eco, aes(pce, unemploy_pct)) +
geom_point(aes(size=uempmed, colour=year)) +
xlab('Personal Consumption Expenditures (Billions $)') +
ylab('Unemployment Percent') +
ggtitle('GGPlot Economics') +
theme_bw()
})
})
runApp('/Users/mm87597/Documents/Lab_Meetings/GWIS_GRiD_R_viz_workshop/shiny_example')
Use everything we’ve gone over today to make your own shiny app. Make one ggplot chart, one plotly chart, and one using both (ggplotly). Use whatever data you like from ggplot2, plotly, nycflights13, or maps. To see your options, you can do data()
. To see more information on a specific data set, such as diamonds, do ?diamonds
. We will be around to answer questions as you have them, so please let us know if you have any!