


> crime
# A tibble: 8 x 8
  iso    year  theft robbery burglary theft_price robbery_price burglary_price
  <chr> <dbl>  <dbl>   <dbl>    <dbl>       <dbl>         <dbl>          <dbl>
1 ALB    2003   3694     199      874        32.9          115           49.3
2 ALB    2004   3694     199      874        38.2          134           57.3
3 ALB    2005   3694     199      874        42.8          150           64.2
4 ALB    2006   3450     164      779        47.0          165           70.5
5 AUS    2003 722334   14634   586266       408.4         1427          612.4 
6 AUS    2004 636717   14634   512551       481.3         1683          721.2 
7 AUS    2005 598700   14634   468558       536.7         1877          804.5 
8 AUS    2006 594111   14634   433974       564.8         1973          846.5 

我想创建一个新列,其中包含每种犯罪类型的产品及其价格,因此theftx theft_price=theft_prod等。在我的实际数据集中,我有更多的犯罪类型,因此我需要的东西可以扩展到该子集所包含的更多变量。


crime %>%
  mutate_at(vars(theft, robbery, burglary),
            funs(prod = . * ????))




  1. gather 上所有测量列
  2. mutate一个新列measure_type,指示它是计数还是价格,然后_price从中删除crime_type现在,我们在犯罪类型和用于该犯罪的度量标准上有单独的列。每行都是一个单一的等年-犯罪量度组合。
  3. spread犯罪类型回退了,所以现在我们对所有犯罪都有单独的countprice列,然后与相乘mutate
  4. (可选)如果您想以多种格式放回去,我们只需要汇总countprice和新product列,unite以结合犯罪类型再spread退出即可。
tbl <- read_table2(
"iso    year  theft robbery burglary theft_price robbery_price burglary_price
ALB    2003   3694     199      874        32.9          115           49.3
ALB    2004   3694     199      874        38.2          134           57.3
ALB    2005   3694     199      874        42.8          150           64.2
ALB    2006   3450     164      779        47.0          165           70.5
AUS    2003 722334   14634   586266       408.4         1427          612.4
AUS    2004 636717   14634   512551       481.3         1683          721.2
AUS    2005 598700   14634   468558       536.7         1877          804.5
AUS    2006 594111   14634   433974       564.8         1973          846.5"
tidy_tbl <- tbl %>%
  gather(crime_type, measure, -iso, - year) %>%
    measure_type = if_else(str_detect(crime_type, "_price$"), "price", "count"),
    crime_type = str_remove(crime_type, "_price")
    ) %>%
  spread(measure_type, measure) %>%
  mutate(product = count * price)
#> # A tibble: 24 x 6
#>    iso    year crime_type count price product
#>    <chr> <int> <chr>      <dbl> <dbl>   <dbl>
#>  1 ALB    2003 burglary     874  49.3  43088.
#>  2 ALB    2003 robbery      199 115    22885 
#>  3 ALB    2003 theft       3694  32.9 121533.
#>  4 ALB    2004 burglary     874  57.3  50080.
#>  5 ALB    2004 robbery      199 134    26666 
#>  6 ALB    2004 theft       3694  38.2 141111.
#>  7 ALB    2005 burglary     874  64.2  56111.
#>  8 ALB    2005 robbery      199 150    29850 
#>  9 ALB    2005 theft       3694  42.8 158103.
#> 10 ALB    2006 burglary     779  70.5  54920.
#> # ... with 14 more rows

tidy_tbl %>%
  gather(measure_type, measure, count:product) %>% 
  unite("colname", crime_type, measure_type) %>%
  spread(colname, measure)
#> # A tibble: 8 x 11
#>   iso    year burglary_count burglary_price burglary_product robbery_count
#>   <chr> <int>          <dbl>          <dbl>            <dbl>         <dbl>
#> 1 ALB    2003            874           49.3           43088.           199
#> 2 ALB    2004            874           57.3           50080.           199
#> 3 ALB    2005            874           64.2           56111.           199
#> 4 ALB    2006            779           70.5           54920.           164
#> 5 AUS    2003         586266          612.        359029298.         14634
#> 6 AUS    2004         512551          721.        369651781.         14634
#> 7 AUS    2005         468558          804.        376954911          14634
#> 8 AUS    2006         433974          846.        367358991          14634
#> # ... with 5 more variables: robbery_price <dbl>, robbery_product <dbl>,
#> #   theft_count <dbl>, theft_price <dbl>, theft_product <dbl>



