lubridate: 处理日期和时间的利器

R
packages
发布日期

2023年9月20日星期三

修改的

2023年12月23日星期六


Setup

lubridate 可以单独导入,也可以通过导入 tidyverse 使用。

代码
library(tidyverse)

1. 生成日期和时间

代码
today() # 日期  
[1] "2024-01-26"
代码
now() # 日期和时间
[1] "2024-01-26 21:03:01 GMT"
代码
ymd(20230920)
[1] "2023-09-20"
代码
ymd("2017-3-30") # 年月日
[1] "2017-03-30"
代码
mdy("March 30th, 2017") # 月日年
[1] "2017-03-30"
代码
dmy("30-Mar-2017") # 日月年
[1] "2017-03-30"
代码
ymd_hms("2017-3-30 20:11:59") # 年月日 和小时、分钟、钞
[1] "2017-03-30 20:11:59 UTC"
代码
mdy_hm("03/30/2017 08:01")
[1] "2017-03-30 08:01:00 UTC"

1.1. Example

代码
library(nycflights13)
glimpse(flights)
Rows: 336,776
Columns: 19
$ year           <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
$ month          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ day            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ dep_time       <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, …
$ sched_dep_time <int> 515, 529, 540, 545, 600, 558, 600, 600, 600, 600, 600, …
$ dep_delay      <dbl> 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1…
$ arr_time       <int> 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849,…
$ sched_arr_time <int> 819, 830, 850, 1022, 837, 728, 854, 723, 846, 745, 851,…
$ arr_delay      <dbl> 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -1…
$ carrier        <chr> "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "…
$ flight         <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 4…
$ tailnum        <chr> "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N394…
$ origin         <chr> "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA",…
$ dest           <chr> "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD",…
$ air_time       <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 1…
$ distance       <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, …
$ hour           <dbl> 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6…
$ minute         <dbl> 15, 29, 40, 45, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0…
$ time_hour      <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 0…
代码
flights %>% 
  select(origin,year,month,day,hour,minute) %>% 
  head
# A tibble: 6 × 6
  origin  year month   day  hour minute
  <chr>  <int> <int> <int> <dbl>  <dbl>
1 EWR     2013     1     1     5     15
2 LGA     2013     1     1     5     29
3 JFK     2013     1     1     5     40
4 JFK     2013     1     1     5     45
5 LGA     2013     1     1     6      0
6 EWR     2013     1     1     5     58
代码
flights %>%
  mutate(flight_date = ymd_hm(paste(year, month, day, hour, minute))) %>%
  select(origin, dest, flight_date) %>%
  head()
# A tibble: 6 × 3
  origin dest  flight_date        
  <chr>  <chr> <dttm>             
1 EWR    IAH   2013-01-01 05:15:00
2 LGA    IAH   2013-01-01 05:29:00
3 JFK    MIA   2013-01-01 05:40:00
4 JFK    BQN   2013-01-01 05:45:00
5 LGA    ATL   2013-01-01 06:00:00
6 EWR    ORD   2013-01-01 05:58:00
代码
flights %>%
  mutate(flight_date = make_datetime(year, month, day, hour, minute)) %>%
  select(origin, dest, flight_date) %>%
  head() # 和上面结果一样,但更简洁
# A tibble: 6 × 3
  origin dest  flight_date        
  <chr>  <chr> <dttm>             
1 EWR    IAH   2013-01-01 05:15:00
2 LGA    IAH   2013-01-01 05:29:00
3 JFK    MIA   2013-01-01 05:40:00
4 JFK    BQN   2013-01-01 05:45:00
5 LGA    ATL   2013-01-01 06:00:00
6 EWR    ORD   2013-01-01 05:58:00

2. 提取日期和时间

代码
flights %>%
  mutate(flight_date = make_date(year, month, day)) %>%
  mutate(
    weekday = wday(flight_date, label = T),
    month_name = month(flight_date, label = T)
  ) %>%
  select(origin, flight_date, weekday, month_name) %>%
  head()
# A tibble: 6 × 4
  origin flight_date weekday month_name
  <chr>  <date>      <ord>   <ord>     
1 EWR    2013-01-01  Tue     Jan       
2 LGA    2013-01-01  Tue     Jan       
3 JFK    2013-01-01  Tue     Jan       
4 JFK    2013-01-01  Tue     Jan       
5 LGA    2013-01-01  Tue     Jan       
6 EWR    2013-01-01  Tue     Jan       
代码
birthday_tom <- "Tom's birthday is on Feb 7 1993 at 18:28."
mdy_hm(birthday_tom) 
[1] "1993-02-07 18:28:00 UTC"

3. 小工具

代码
now() + years(7) # 7年后的今天
[1] "2031-01-26 21:03:08 GMT"
代码
wday(now() + years(7) )# 7年后的今天是周几?
[1] 1
代码
flights %>%
  filter(time_hour >= ymd("2013-09-01") &
           time_hour < ymd("2013-10-01")) %>%
  select(origin, month) %>%
  head
# A tibble: 6 × 2
  origin month
  <chr>  <int>
1 JFK        9
2 JFK        9
3 EWR        9
4 JFK        9
5 JFK        9
6 EWR        9

4. 时间跨度

代码
dseconds()
[1] "1s"
代码
dminutes(1)
[1] "60s (~1 minutes)"
代码
dhours(1)
[1] "3600s (~1 hours)"

5. 计算

代码
months(3) + days(1) + minutes(8)
[1] "3m 1d 0H 8M 0S"
代码
months(3) * 2
[1] "6m 0d 0H 0M 0S"
代码
start <- ymd_hms("2023-01-01 12:00:00")
end <- ymd_hms("2023-01-15 12:00:00")
iv <- interval(start,end)
iv
[1] 2023-01-01 12:00:00 UTC--2023-01-15 12:00:00 UTC

6. 和ggplot2配合

代码
flights %>%
  filter(carrier %in% c("9E", "US", "AA", "MQ")) %>%
  mutate(week_day = wday(time_hour, label = T)) %>%
  ggplot(aes(week_day)) +
  geom_bar(fill = "steelblue", alpha = 0.8) +
  facet_wrap( ~ carrier) +
  theme_bw() +
  labs(title = "Number of flights by carrier and weekday",
       x = "Week days",
       y = "")

代码
theme(plot.title = element_text(hjust = 0.5))
List of 1
 $ plot.title:List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0.5
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi FALSE
 - attr(*, "validate")= logi TRUE
代码
flights %>%
  filter(time_hour<ymd("2013-10-01"),
    carrier %in% c("9E", "US", "AA", "MQ")) %>%
  mutate(week_day = wday(time_hour, label = T)) %>%
  ggplot(aes(time_hour,color=carrier)) +
  geom_freqpoly(linewidth=1.5) +
  theme_bw() +
  labs(title = "Number of flights by carrier",
       x = "",
       y = "")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

代码
theme(plot.title = element_text(hjust = 0.5))
List of 1
 $ plot.title:List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0.5
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi FALSE
 - attr(*, "validate")= logi TRUE

7. 视频

Lubridate - how to manipulate date and time data in R

回到顶部