样品是野生型或突变型。我想在数据框列中找到与受访者相关的野生型wt
与突变型样本的数量。在该列中,第一个字母表示突变体的数量,最后两位数字(之后)表示该组的总数。是和之间的区别。mut
Responder
Group1
res.sig
n_mutated_group1
or
wt
total
mut
# Number of samples in wild-type versus mutant with respect to the most significantly enriched genes
total <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(last_letter = substr(n_mutated_group1,-2,-1)) %>%
pull(last_letter)))
mut <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(first_letter = substr(n_mutated_group1,1,1)) %>%
pull(first_letter)))
wt <- total-mut
签名
> dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9",
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B",
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "1 of 25", "7 of 25"
), n_mutated_group2 = c("9 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "7 of 25", "1 of 25"), p_value = c(0.00163083541184905,
0.00163083541184905, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.0487971536957187,
0.0487971536957187), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0,
Inf, 0, Inf, 0.111488645279478, 8.96952328636894), OR_low = c(0,
2.56647319276964, 0, 1.33358819424024, 0, 1.33358819424024, 0,
1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356,
1.0079479819766), OR_high = c(0.38963976043749, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.992114690322592, 436.703138665198
), fdr = c(0.109265972593886, 0.109265972593886, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.467058471087594, 0.467058471087594)), row.names = c(NA,
-14L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000002adab171ef0>)
问题:
在这里,我得到的total
是 asNA_real_
而不是总数。
您可以提取列中字符串total
之后的所有内容,并且可以提取之前的所有内容。您可以使用以下代码:of
mut
of
library(tidyverse)
total <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(first_letter = sub('.+of(.+)', '\\1', n_mutated_group1)) %>%
pull(first_letter)))
mut <- sum(as.numeric(res.sig %>%
filter(Group1 == "Responder") %>%
mutate(last_letter = sub("\\of.*", "", n_mutated_group1)) %>%
pull(last_letter)))
wt <- total-mut
wt
输出重量:
[1] 129
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句