我正在尝试将一个字母的AA变体替换为3个字母的代码(以便于阅读)。一切正常,但几乎没有错误。以下是我的注释代码。谢谢

x <- c("p.G12C","p.F121S","p.P124S","p.P124L","p.E13D",
        "p.E203K","p.Q209P","p.Q209P","p.Q209L")

aa3 <- c("Ala", "Arg", "Asn", "Asp", "Cys", "Glu", "Gln", "Gly", "His",
           "Ile", "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp",
           "Tyr", "Val")
aa1 <- c("A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K",
           "M", "F", "P", "S", "T", "W", "Y", "V")

for (i in 1:length(aa1))
{
  xy <- gsub(aa1[i],aa3[i],x,ignore.case = F)
}


输出量

# Note that E, F and Q have unusual 3 letter replacement.
  I could not figure out what is causing this.
xy
[1] "p.Gly12Cys"    "p.Prohe121Ser" "p.Pro124Ser"   "p.Pro124Leu"
    "p.Glylu13Asp"  "p.Glylu203Lys" "p.Glyln209Pro" "p.Glyln209Pro" "p.Glyln209Leu"


预期产量

"p.Gly12Cys"    "p.Phe121Ser" "p.Pro124Ser"   "p.Pro124Leu"   "p.Glu13Asp"
"p.Glu203Lys" "p.Gln209Pro" "p.Gln209Pro" "p.Gln209Leu"


错误

outputs "p.Prohe121Ser"instead of "p.Phe121Ser"
"p.Glylu13Asp" instead of  "p.Glu13Asp"

最佳答案

我们可以使用mgsub

library(qdap)
mgsub(aa1, aa3, x)
#[1] "p.Gly12Cys"    "p.Phe121Ser"   "p.Pro124Ser"   "p.Pro124Leu"
#[5] "p.Glu13Alasp"  "p.Glu203Leuys" "p.Gln209Pro"   "p.Gln209Pro"
#[9] "p.Gln209Leu"


更新资料

 d1 <- read.csv(text=sub('(..)(.)(\\d+)(.)', '\\1,\\2,\\3,\\4', x),
          header=FALSE, stringsAsFactors=FALSE)
 d1[c(2,4)] <- lapply(d1[,c(2,4)], function(x) aa3[match(x, aa1)])
 do.call(paste0, d1)
 #[1] "p.Gly12Cys"  "p.Phe121Ser" "p.Pro124Ser" "p.Pro124Leu" "p.Glu13Asp"
 #[6] "p.Glu203Lys" "p.Gln209Pro" "p.Gln209Pro" "p.Gln209Leu"




或使用gsubfn

library(gsubfn)
gsubfn('[A-Z]', setNames(as.list(aa3), aa1), x)
#[1] "p.Gly12Cys"  "p.Phe121Ser" "p.Pro124Ser" "p.Pro124Leu" "p.Glu13Asp"
#[6] "p.Glu203Lys" "p.Gln209Pro" "p.Gln209Pro" "p.Gln209Leu"

08-28 21:45