通过对名称进行分组来汇总每列

堆栈豆

我想使用数据集中提供的权重计算加权方差，同时对国家和城市进行分组，但是函数返回NAs：

library(Hmisc) #for the 'wtd.var' function
weather_winter.std<-weather_winter %>% 
 group_by(country, capital_city) %>%
 summarise(across(starts_with("winter"),wtd.var))

控制台提供的输出（长格式时）：

# A tibble: 35 x 3
# Groups:   country [35]
   country  capital_city winter
   <chr>    <chr>         <dbl>
 1 ALBANIA  Tirane           NA
 2 AUSTRIA  Vienna           NA
 3 BELGIUM  Brussels         NA
 4 BULGARIA Sofia            NA
 5 CROATIA  Zagreb           NA
 6 CYPRUS   Nicosia          NA
 7 CZECHIA  Prague           NA
 8 DENMARK  Copenhagen       NA
 9 ESTONIA  Tallinn          NA
10 FINLAND  Helsinki         NA
# … with 25 more rows

这是我用来将数据从宽格式转换为长格式的代码：

  weather_winter <- weather_winter %>% pivot_longer(-c(31:33))
  weather_winter$name <- NULL
  names(weather_winter)[4] <- "winter"

一些示例数据：

structure(list(`dec-wet_2011` = c(12.6199998855591, 12.6099996566772, 
14.75, 11.6899995803833, 18.2899990081787), `dec-wet_2012` = c(13.6300001144409, 
14.2199993133545, 14.2299995422363, 16.1000003814697, 18.0299987792969
), `dec-wet_2013` = c(4.67999982833862, 5.17000007629395, 4.86999988555908, 
7.56999969482422, 5.96000003814697), `dec-wet_2014` = c(14.2999992370605, 
14.4799995422363, 13.9799995422363, 15.1499996185303, 16.1599998474121
), `dec-wet_2015` = c(0.429999977350235, 0.329999983310699, 1.92999994754791, 
3.30999994277954, 7.42999982833862), `dec-wet_2016` = c(1.75, 
1.29999995231628, 3.25999999046326, 6.60999965667725, 8.67999935150146
), `dec-wet_2017` = c(13.3400001525879, 13.3499994277954, 15.960000038147, 
10.6599998474121, 14.4699993133545), `dec-wet_2018` = c(12.210000038147, 
12.4399995803833, 11.1799993515015, 10.75, 18.6299991607666), 
    `dec-wet_2019` = c(12.7199993133545, 13.3800001144409, 13.9899997711182, 
    10.5299997329712, 12.3099994659424), `dec-wet_2020` = c(15.539999961853, 
    16.5200004577637, 11.1799993515015, 14.7299995422363, 13.5499992370605
    ), `jan-wet_2011` = c(8.01999950408936, 7.83999967575073, 
    10.2199993133545, 13.8899993896484, 14.5299997329712), `jan-wet_2012` = c(11.5999994277954, 
    11.1300001144409, 12.5500001907349, 10.1700000762939, 22.6199989318848
    ), `jan-wet_2013` = c(17.5, 17.4099998474121, 15.5599994659424, 
    13.3199996948242, 20.9099998474121), `jan-wet_2014` = c(12.5099992752075, 
    12.2299995422363, 15.210000038147, 9.73999977111816, 9.63000011444092
    ), `jan-wet_2015` = c(17.6900005340576, 16.9799995422363, 
    11.75, 9.9399995803833, 19), `jan-wet_2016` = c(15.6099996566772, 
    15.5, 14.5099992752075, 10.3899993896484, 18.4499988555908
    ), `jan-wet_2017` = c(9.17000007629395, 9.61999988555908, 
    9.30999946594238, 15.8499994277954, 11.210000038147), `jan-wet_2018` = c(8.55999946594238, 
    9.10999965667725, 13.2599992752075, 9.85999965667725, 15.8899993896484
    ), `jan-wet_2019` = c(17.0699996948242, 16.8699989318848, 
    14.5699996948242, 19.0100002288818, 19.4699993133545), `jan-wet_2020` = c(6.75999975204468, 
    6.25999975204468, 6.00999975204468, 5.35999965667725, 8.15999984741211
    ), `feb-wet_2011` = c(9.1899995803833, 8.63999938964844, 
    6.21999979019165, 9.82999992370605, 4.67999982833862), `feb-wet_2012` = c(12.2699995040894, 
    11.6899995803833, 8.27999973297119, 14.9399995803833, 13.0499992370605
    ), `feb-wet_2013` = c(15.3599996566772, 15.9099998474121, 
    17.0599994659424, 13.3599996566772, 16.75), `feb-wet_2014` = c(10.1999998092651, 
    11.1399993896484, 13.8599996566772, 10.7399997711182, 7.35999965667725
    ), `feb-wet_2015` = c(11.9200000762939, 12.2699995040894, 
    8.01000022888184, 14.5299997329712, 5.71999979019165), `feb-wet_2016` = c(14.6999998092651, 
    14.7799997329712, 16.7899990081787, 4.90000009536743, 19.3500003814697
    ), `feb-wet_2017` = c(8.98999977111816, 9.17999935150146, 
    11.7699995040894, 6.3899998664856, 13.9899997711182), `feb-wet_2018` = c(16.75, 
    16.8599987030029, 12.0599994659424, 16.1900005340576, 8.51000022888184
    ), `feb-wet_2019` = c(7.58999967575073, 7.26999998092651, 
    8.21000003814697, 7.57999992370605, 8.81999969482422), `feb-wet_2020` = c(10.6399993896484, 
    10.4399995803833, 13.4399995803833, 8.53999996185303, 19.939998626709
    ), country = c("SERBIA", "SERBIA", "SLOVENIA", "GREECE", 
    "CZECHIA"), capital_city = c("Belgrade", "Belgrade", "Ljubljana", 
    "Athens", "Prague"), weight = c(20.25, 19.75, 14.25, 23.75, 
    14.25)), row.names = c(76L, 75L, 83L, 16L, 5L), class = "data.frame")

科技商品

您的代码似乎提供了正确答案，现在有更多数据：

# Groups:   country [4]
  country  capital_city winter
  <chr>    <chr>         <dbl>
1 CZECHIA  Prague         27.2
2 GREECE   Athens         14.6
3 SERBIA   Belgrade       19.1
4 SLOVENIA Ljubljana      16.3

这就是你要找的吗？

我冒昧地简化了您的代码：

weather_winter <- weather_winter %>% 
  pivot_longer(-c(31:33), values_to = "winter") %>% 
  select(-name)

weather_winter.std <- weather_winter %>% 
  group_by(country, capital_city) %>%
  summarise(winter = wtd.var(winter))

只有一个“冬天”列，就不需要across().

最后，您没有使用权重。如果需要这些，则将最后一行更改为：

summarise(winter = wtd.var(winter, weights = weight))

给予：

# A tibble: 4 x 3
# Groups:   country [4]
  country  capital_city winter
  <chr>    <chr>         <dbl>
1 CZECHIA  Prague         26.3
2 GREECE   Athens         14.2
3 SERBIA   Belgrade       18.8
4 SLOVENIA Ljubljana      15.8

本文收集自互联网，转载请注明来源。

如有侵权，请联系 [email protected] 删除。