我有一个 48 列的 4032 行数据框。前 24 列是为几种化合物计算的通量,每行是在 30 分钟的时间分辨率下计算的通量。下面的 24 列是通量的计算检测限 (LOD),对于前 24 列中相同顺序的相同化合物。
我想逐行查看,对于为相应列中的每个化合物计算的相应 LOD,每列中的每个化合物是否 > LOD 或 <-LOD。最后,我想创建一个新的数据帧,如果通量通过此条件,则写入该值,否则写入 NA。
我分享了我的数据集的简化版本:
structure(list(mz31_fluxmax = c(0.0075314, 0.039237, -0.0091778,
-0.0074935, -0.0062872, -0.012777), mz33_fluxmax = c(-0.10383,
0.26369, -0.073705, -0.052205, -0.055995, -0.30571), mz39_fluxmax = c(0.13112,
-0.24524, 0.099267, 0.14686, 0.23026, 0.2555), mz42_fluxmax = c(-0.0064381,
0.0068372, 0.010509, 0.013523, -0.0039596, 0.018889), mz45_fluxmax = c(0.024457,
0.10681, 0.033549, 0.034579, -0.052483, 0.057419), mz47_fluxmax = c(-0.030953,
-0.060969, -0.027106, 0.04804, 0.048647, -0.050288), mz59_fluxmax = c(0.030912,
0.063897, 0.03306, 0.042901, -0.032359, -0.052992), mz61_fluxmax = c(-0.013039,
-0.018731, -0.017816, 0.035933, 0.025714, 0.023489), mz69_fluxmax = c(0.02081,
0.021299, -0.0077438, 0.011213, 0.019074, -0.02709), mz71_fluxmax = c(0.008063,
-0.0069763, 0.0023735, 0.0043432, 0.003758, 0.010974), mz75_fluxmax = c(-1.8245e-17,
7.0344e-18, -0.0006465, 0.00086653, -0.00052278, 0.00056043),
mz79_fluxmax = c(-0.0099819, 0.029971, 0.011572, 0.009469,
0.02177, -0.032429), mz85_fluxmax = c(0.0068045, -0.021908,
0.0050362, -0.0090931, -0.0058598, -0.019743), mz87_fluxmax = c(0.0090713,
0.011222, 0.0051697, 0.0097271, 0.0021328, 0.0090713), mz93_fluxmax = c(-0.029838,
0.05316, 0.044835, 0.021252, -0.040539, 0.039774), mz99_fluxmax = c(-0.0072673,
0.0077081, -0.0037859, -0.0046982, -0.0010743, 0.0071997),
mz101_fluxmax = c(0.0048883, 0.011394, 0.0029878, -0.006759,
0.0065672, 0.010028), mz107_fluxmax = c(-0.027853, -0.054236,
0.023384, 0.022094, 0.022981, 0.036405), mz111_fluxmax = c(-0.0016328,
0.0066329, -0.0018345, 0.004555, 0.0015514, 0.0032013), mz113_fluxmax = c(-0.0013015,
0.0055934, 0.00089352, 0.0015395, -0.0011601, 0.0038798),
mz135_fluxmax = c(-0.0061842, -0.0098238, 0.0036505, 0.0052973,
0.0029078, 0.012724), mz137_fluxmax = c(0.026894, 0.034569,
0.016971, -0.00055361, 0.03223, 0.0020253), mz149_fluxmax = c(-0.0017587,
-0.0033536, 0.00090186, -0.00060427, -0.00083038, 0.0017915
), mz155_fluxmax = c(0.0011551, 0.00011869, 0.00052767, 0.00054035,
-5.7848e-05, -1.2613e-05), mz31_LOD = c(0.0056881436858662,
0.014850612037564, 0.00263459553228289, 0.00479935397746244,
0.0152440068257583, 0.0178542775892762), mz33_LOD = c(0.0125308028387973,
0.00911763719872646, 0.0151284350477026, 0.0372508988086331,
0.0402229125266234, 0.0936355242726306), mz39_LOD = c(0.0301850520395113,
0.0296992069156593, 0.0201949605533048, 0.217490160513958,
0.00223029803079041, 0.124007419481375), mz42_LOD = c(0.00320496355324591,
0.000990716035552583, 0.00114254522034714, 0.00153880263591558,
0.00948843346611039, 0.00829842969627028), mz45_LOD = c(0.0330936038635234,
0.0167556608587841, 0.0122716423260542, 0.000398211936512332,
0.00540592950218144, 0.0183693318587938), mz47_LOD = c(0.0111770867410492,
0.00282666705854054, 0.00172080651807461, 0.0115511710261517,
0.0156374551396285, 0.0544621906247567), mz59_LOD = c(0.0159506436971311,
0.0467280850597503, 0.00896526672250792, 0.00208209542259193,
0.0196628887796654, 0.00302598893847008), mz61_LOD = c(0.0016309734207739,
0.000905825894770442, 0.00793279030609907, 0.0131829166139475,
0.0108149832147901, 0.0153864222552258), mz69_LOD = c(0.00638344838052493,
0.00465756945316134, 0.000733281330641999, 0.00235604303405109,
0.00314352406984064, 0.00504395302927101), mz71_LOD = c(0.000455024687674437,
0.00326558077604542, 0.000174790097425541, 0.00121549851806748,
0.00163842732208755, 0.000892298876604984), mz75_LOD = c(NA,
NA, 0.00145895087681435, 4.90107803327739e-05, 0.000251573571031492,
0.00363292289535981), mz79_LOD = c(0.00521113925555237, 0.0103629801610154,
0.0118890958199121, 0.0122255131032432, 0.00536736523974168,
0.00568381024749507), mz85_LOD = c(0.0132788357415617, 0.00102839338218391,
0.00940732247246199, 0.000348774983294675, 0.00298067320381836,
0.00205641452275468), mz87_LOD = c(0.00201091935375826, 0.000776210717592691,
0.00279198390479745, 0.00141482880373932, 0.000748541000610013,
0.00281145814206216), mz93_LOD = c(0.00697408929207704, 0.0260339773064747,
0.00810401572478017, 0.00100041305177681, 0.00665795713420106,
0.00396693358778718), mz99_LOD = c(0.00402957819499522, 0.000566331511400743,
0.00155300896677703, 0.00232847303855026, 0.00464435693739678,
0.00171045854038109), mz101_LOD = c(0.00178420487408269,
0.00115586456923503, 0.00254601356943224, 0.00310985936245129,
0.00432584813531501, 0.00243251979505525), mz107_LOD = c(0.00407638866821389,
0.0229674850748965, 0.00701861441818298, 0.0116410684433383,
0.00485523640022218, 0.0155737255675545), mz111_LOD = c(0.000843805958946711,
0.00287785932050435, 0.00134575880747311, 0.000532630272225315,
0.00201047010477024, 0.00283236237275034), mz113_LOD = c(0.000636492422450974,
0.000453940678672287, 0.00108923919956853, 0.000493113580579477,
0.000200586155571694, 0.000500537860017757), mz135_LOD = c(0.00203273369486478,
0.00908905787659258, 0.000826768270592192, 0.00179533094202209,
0.00202657955605344, 0.00809631808214351), mz137_LOD = c(0.010197651904802,
0.00809757134440575, 0.00307654713824166, 0.00113203086563082,
0.00217444118117416, 0.00803526410617303), mz149_LOD = c(4.94861889361863e-05,
0.00217371652333924, 0.000952885071549479, 0.000215375843276559,
0.000171446563764392, 9.19079668394535e-05), mz155_LOD = c(0.000246712993094256,
0.00185548030033775, 9.85004369721625e-05, NA, 0.000121478907895942,
NA)), row.names = c(NA, 6L), class = "data.frame")
所以,具体举个例子:我想看看mz31_fluxmax
第一行是 >mz31_LOD
还是 <- mz31_LOD
。如果满足条件,则将 的值mz31_fluxmax
写入新数据帧,否则写入 NA。然后依此类推下一行。显然,我想为每一列迭代这个过程。
我没有尝试任何代码。我真的不知道如何做到这一点。
像这样的东西?假设 df 是您的数据框:
mat1 = df[,1:24]
mat2 = df[,25:48]
mat1[abs(mat1)>mat2] = NA
mat1
请注意,您在 LOD 列上有 NA。由于 LOD 列中的所有值都是正值,因此您的问题可以简化为,如果通量最大值的绝对值大于 LOD,则将其设置为 NA。
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句