#Color blind friendly palette courtesy of Cookbook for R (http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# Disable large numbers from using scientific notation
options(scipen = 999)
summary<-read.csv('summary.csv')
Factor the month field to print in chronological order rather than the alphabetical default. Also factor the user_type column to display subscribers before casual users.
summary$month <- factor(summary$month, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
summary$user_type <- factor(summary$user_type, levels=c("member", "casual"))
Plot the data, comparing the rides-per-month of members and casual users.
ggplot(data = summary, aes(x=month, y=total_rides, fill=user_type, color=user_type)) +
geom_bar(stat = "identity", position="dodge") +
labs(x="", y="Rides", fill="User Type", color="User Type") +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values=cbPalette) +
scale_colour_manual(values=cbPalette) +
ggtitle("Total Rides Per Month") +
theme(plot.title = element_text(hjust = 0.5))
As average_ride_time is saved as a string, it is necessary to convert it to a timestamp in order to manipulate it as such.
summary$avg_ride_time <- as_hms(paste0(summary$avg_ride_time, ':00'))
Plot the data, comparing the average-ride-time per month of members and casual users.
ggplot(data = summary, aes(x=month, y=avg_ride_time, fill=user_type, color=user_type)) +
geom_bar(stat = "identity", position="dodge") +
#scale_y_datetime(name=waiver(),labels=date_format("%H:%M"), breaks=date_breaks(), date_breaks="5 min") +
scale_y_time(breaks=date_breaks("5 min")) +
labs(x="", y="Avg. Ride Time", fill="User Type", color="User Type") +
scale_fill_manual(values=cbPalette) +
scale_colour_manual(values=cbPalette) +
ggtitle("Average Ride Time Per Month") +
theme(plot.title = element_text(hjust = 0.5))
Plot the data, comparing the rides-per-day-of-week of members and casual users.
dailytotal<-read.csv('daily_total.csv')
By default, R orders days alphabetically, so they must be factored to place them in the desired chronological order.
dailytotal$day <- factor(dailytotal$day, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
ggplot(data = dailytotal, aes(x=day, y=total_rides, fill=user_type, color=user_type)) +
geom_bar(stat = "identity", position="dodge") +
labs(x="", y="Rides", fill="User Type", color="User Type") +
scale_fill_manual(values=cbPalette) +
scale_colour_manual(values=cbPalette) +
ggtitle("Total Rides Per Day") +
theme(plot.title = element_text(hjust = 0.5))
dailyavg<-read.csv('daily_avg.csv')
Since we’re using a new data set, we once again must factor the days into the desired order.
dailyavg$day <- factor(dailyavg$day, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
Convert avg_ride_time from a text field to a timestamp.
dailyavg$avg_ride_time <- as_hms(paste0(dailyavg$avg_ride_time, ':00'))
Plot the data, comparing the average-ride-time per day of week of members and casual users.
ggplot(data = dailyavg, aes(x=day, y=avg_ride_time, fill=user_type, color=user_type)) +
geom_bar(stat = "identity", position="dodge") +
scale_y_time(breaks=date_breaks("5 min")) +
labs(x="", y="Avg. Ride Time", fill="User Type", color="User Type") +
scale_fill_manual(values=cbPalette) +
scale_colour_manual(values=cbPalette) +
ggtitle("Average Ride Time Per Day") +
theme(plot.title = element_text(hjust = 0.5))
As the fifth visualization depicts a different category of data from the first four, I opted to change which colors it used to avoid any confusion.
cbPalette2 <- c("#D55E00", "#56B4E9", "#CC79A7", "#009E73", "#F0E442", "#0072B2", "#999999", "#E69F00")
biketype<-read.csv('biketype.csv')
Plot the data, comparing the bike type preferences of members and casual users.
ggplot(data = biketype, aes(x=user_type, y=ride_count, fill=bike_type)) +
geom_bar(stat = "identity", position="dodge") +
scale_fill_manual(name = "Bike Types", labels=c("classic", "electric"), values=cbPalette2) +
scale_y_continuous(name="Rides", labels = comma) +
labs(x="Member Type", y="Rides") +
ggtitle("Rides by Bike Type") +
geom_text(aes(label=scales::comma(ride_count)), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5))