Date and time features
The first set of features we extract are date and time related features. Specifically, we would like to know the day of the week and the time of the day (based on our own cutoffs).
library(dplyr)
library(lubridate)
weekday_labels <- c('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')
cut_levels <- c(1, 5, 9, 12, 16, 18, 22) # used to bucket hour of day into
hour_labels <- c('1AM-5AM', '5AM-9AM', '9AM-12PM', '12PM-4PM', '4PM-6PM', '6PM-10PM', '10PM-1AM')
nyc_taxi <- mutate(nyc_taxi,
pickup_hour = addNA(cut(hour(pickup_datetime), cut_levels)),
pickup_dow = factor(wday(pickup_datetime), levels = 1:7, labels = weekday_labels),
dropoff_hour = addNA(cut(hour(dropoff_datetime), cut_levels)),
dropoff_dow = factor(wday(dropoff_datetime), levels = 1:7, labels = weekday_labels),
trip_duration = as.integer(as.duration(dropoff_datetime - pickup_datetime))
)
levels(nyc_taxi$pickup_hour) <- hour_labels
levels(nyc_taxi$dropoff_hour) <- hour_labels
head(nyc_taxi)
pickup_datetime dropoff_datetime passenger_count trip_distance
1 2015-01-15 19:05:40 2015-01-15 19:28:18 5 8.33
2 2015-01-25 00:13:06 2015-01-25 00:24:51 1 3.37
3 2015-01-25 00:13:08 2015-01-25 00:34:57 1 3.72
4 2015-01-25 00:13:09 2015-01-25 01:02:40 1 10.20
5 2015-01-04 13:44:52 2015-01-04 13:46:38 1 0.36
6 2015-01-04 13:44:52 2015-01-04 14:04:23 1 8.98
pickup_longitude pickup_latitude rate_code_id dropoff_longitude dropoff_latitude
1 -73.9 40.8 standard -74.0 40.8
2 -73.9 40.8 standard -74.0 40.8
3 -74.0 40.8 standard -74.0 40.7
4 -74.0 40.8 standard -73.9 40.7
5 -74.0 40.8 standard -74.0 40.8
6 -73.9 40.8 standard -74.0 40.8
payment_type fare_amount extra mta_tax tip_amount tolls_amount
1 card 26.0 1.0 0.5 8.08 5.33
2 card 12.5 0.5 0.5 0.00 0.00
3 card 16.5 0.5 0.5 3.56 0.00
4 cash 39.0 0.5 0.5 0.00 0.00
5 cash 3.5 0.0 0.5 0.00 0.00
6 card 27.0 0.0 0.5 0.00 5.33
improvement_surcharge total_amount pickup_hour pickup_dow dropoff_hour
1 0.3 41.2 6PM-10PM Thu 6PM-10PM
2 0.3 13.8 10PM-1AM Sun 10PM-1AM
3 0.3 21.4 10PM-1AM Sun 10PM-1AM
4 0.3 40.3 10PM-1AM Sun 10PM-1AM
5 0.3 4.3 12PM-4PM Sun 12PM-4PM
6 0.3 33.1 12PM-4PM Sun 12PM-4PM
dropoff_dow trip_duration
1 Thu 1358
2 Sun 705
3 Sun 1309
4 Sun 2971
5 Sun 106
6 Sun 1171