Initial Options

#Color blind friendly palette courtesy of Cookbook for R (http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# Disable large numbers from using scientific notation
options(scipen = 999)

Visualization One

Read the Data

summary<-read.csv('summary.csv')

Factoring Months

Factor the month field to print in chronological order rather than the alphabetical default. Also factor the user_type column to display subscribers before casual users.

summary$month <- factor(summary$month, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))

summary$user_type <- factor(summary$user_type, levels=c("member", "casual"))

Generating the first visualization

Plot the data, comparing the rides-per-month of members and casual users.

ggplot(data = summary, aes(x=month, y=total_rides, fill=user_type, color=user_type)) +
  geom_bar(stat = "identity", position="dodge") +
  labs(x="", y="Rides", fill="User Type", color="User Type") +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_manual(values=cbPalette) +
  scale_colour_manual(values=cbPalette) +
  ggtitle("Total Rides Per Month") +
  theme(plot.title = element_text(hjust = 0.5))

Visualization Two

Manipulate Time Field

As average_ride_time is saved as a string, it is necessary to convert it to a timestamp in order to manipulate it as such.

summary$avg_ride_time <- as_hms(paste0(summary$avg_ride_time, ':00'))

Generate Second Visualization

Plot the data, comparing the average-ride-time per month of members and casual users.

ggplot(data = summary, aes(x=month, y=avg_ride_time, fill=user_type, color=user_type)) +
  geom_bar(stat = "identity", position="dodge") +
  #scale_y_datetime(name=waiver(),labels=date_format("%H:%M"), breaks=date_breaks(), date_breaks="5 min") +
  scale_y_time(breaks=date_breaks("5 min")) +
  labs(x="", y="Avg. Ride Time", fill="User Type", color="User Type") +
  scale_fill_manual(values=cbPalette) +
  scale_colour_manual(values=cbPalette) +
  ggtitle("Average Ride Time Per Month") +
  theme(plot.title = element_text(hjust = 0.5))

Visualization Three

Plot the data, comparing the rides-per-day-of-week of members and casual users.

Read New Data

dailytotal<-read.csv('daily_total.csv')

Factor Days

By default, R orders days alphabetically, so they must be factored to place them in the desired chronological order.

dailytotal$day <- factor(dailytotal$day, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))

Generate Third Visualization

ggplot(data = dailytotal, aes(x=day, y=total_rides, fill=user_type, color=user_type)) +
  geom_bar(stat = "identity", position="dodge") +
  labs(x="", y="Rides", fill="User Type", color="User Type") +
  scale_fill_manual(values=cbPalette) +
  scale_colour_manual(values=cbPalette) +
  ggtitle("Total Rides Per Day") +
  theme(plot.title = element_text(hjust = 0.5))

Visualization Four

Read New Data

dailyavg<-read.csv('daily_avg.csv')

Factor Days

Since we’re using a new data set, we once again must factor the days into the desired order.

dailyavg$day <- factor(dailyavg$day, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))

Manipulate Time Field

Convert avg_ride_time from a text field to a timestamp.

dailyavg$avg_ride_time <- as_hms(paste0(dailyavg$avg_ride_time, ':00'))

Generate Fourth Visualization

Plot the data, comparing the average-ride-time per day of week of members and casual users.

ggplot(data = dailyavg, aes(x=day, y=avg_ride_time, fill=user_type, color=user_type)) +
  geom_bar(stat = "identity", position="dodge") +
  scale_y_time(breaks=date_breaks("5 min")) +
  labs(x="", y="Avg. Ride Time", fill="User Type", color="User Type") +
  scale_fill_manual(values=cbPalette) +
  scale_colour_manual(values=cbPalette) +
  ggtitle("Average Ride Time Per Day") +
  theme(plot.title = element_text(hjust = 0.5))

Visualization Five

Update Palette

As the fifth visualization depicts a different category of data from the first four, I opted to change which colors it used to avoid any confusion.

cbPalette2 <- c("#D55E00", "#56B4E9", "#CC79A7", "#009E73", "#F0E442", "#0072B2", "#999999", "#E69F00")

Read New Data

biketype<-read.csv('biketype.csv')

Generate Fifth Visualization

Plot the data, comparing the bike type preferences of members and casual users.

ggplot(data = biketype, aes(x=user_type, y=ride_count, fill=bike_type)) +
  geom_bar(stat = "identity", position="dodge") +
  scale_fill_manual(name = "Bike Types", labels=c("classic", "electric"), values=cbPalette2) +
  scale_y_continuous(name="Rides", labels = comma) +
  labs(x="Member Type", y="Rides") +
  ggtitle("Rides by Bike Type") +
  geom_text(aes(label=scales::comma(ride_count)), position=position_dodge(width=0.9), vjust=-0.25) +
  theme(plot.title = element_text(hjust = 0.5))