I am trying to work out how the Sub works in R. I was trying to break a list up into two sections. For instance, i have this working for list_2, where Var1 = 0_300 and var2 = minus 5.
list_2 <- c("area_0_300_minus5",
"area_0_300_minus4" ,
"area_0_300_minus3" ,
"area_0_300_minus2" ,
"area_0_300_minus1" ,
"area_0_300_0" ,
"area_0_300_1" ,
"area_0_300_2" ,
"area_0_300_3" ,
"area_0_300_4" ,
"area_0_300_5" ,
"area_300_600_minus5" ,
"area_300_600_minus4" ,
"area_300_600_minus3" ,
"area_300_600_minus2" ,
"area_300_600_minus1" ,
"area_300_600_0" ,
"area_300_600_1" ,
"area_300_600_2" ,
"area_300_600_3" ,
"area_300_600_4" ,
"area_300_600_5" ,
"area_600_800_minus5" ,
"area_600_800_minus4" ,
"area_600_800_minus3" ,
"area_600_800_minus2" ,
"area_600_800_minus1" ,
"area_600_800_0" ,
"area_600_800_1" ,
"area_600_800_2" ,
"area_600_800_3" ,
"area_600_800_4" ,
"area_600_800_5" )
var1_working = sub("^.*_(\\d+_\\d+)_.*$", "\\1", list_2)
var2_working = sub("^.*_(.*)$", "\\1", list_2)
But in my list 1, i cant seem to extract 0_300 etc to equal to var1 and the 'm5'/ 'm4' / 'm3' / 'm2' / 'm1' / '0' / '1' etc etc to equal to Var2.
list_1 <- c("as.factor(radius_ring)0_300:as.factor(year_delta)0:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)2:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)2:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)2:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)2:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)3:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)3:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)3:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)3:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)4:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)4:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)4:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)4:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)5:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)5:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)5:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)5:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m1:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m1:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m1:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)m1:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m2:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m2:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m2:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m2:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m3:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m3:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)m3:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m3:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m4:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m5:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)m5:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m5:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m5:units")
var1_nonworking = sub("^.*_(\\d+_\\d+)_.*$", "\\1", list_1)
var2_nonworking = sub("^.*_(.*)$", "\\1", list_1)
I am actually just a bit unsure on how the pattern extraction works "^.*_(\\d+_\\d+)_.*$", "\\1"
which means quite little to me to be able to adapt it to my list 1.
Hope this makes sense
Two approaches:
strcapture
returns a frame, one column per capture group.
strcapture(".*\\)([^:]*).*\\)([^:]*):.*", list_1,
proto = list(var1 = "", var2 = ""))[c(1:3, 42:44),]
# var1 var2
# 1 0_300 0
# 2 300_600 0
# 3 600_800 0
# 42 300_600 m5
# 43 600_800 m5
# 44 800_1000 m5
gregexpr
to extract zero or more per line of text.
gre <- gregexpr("(?<=\\))([^:]*)(?=:)", list_1, perl = TRUE)
regmatches(list_1, gre)[c(1:3, 42:44)]
# [[1]]
# [1] "0_300" "0"
# [[2]]
# [1] "300_600" "0"
# [[3]]
# [1] "600_800" "0"
# [[4]]
# [1] "300_600" "m5"
# [[5]]
# [1] "600_800" "m5"
# [[6]]
# [1] "800_1000" "m5"
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments