补充DNA序列

 ds87vdsa 发布于 2023-02-12 22:41

假设我有一个DNA序列.我想得到它的补充.我使用了以下代码,但我没有得到它.我究竟做错了什么 ?

s=readline()
ATCTCGGCGCGCATCGCGTACGCTACTAGC
p=unlist(strsplit(s,""))
h=rep("N",nchar(s))
unlist(lapply(p,function(d){
for b in (1:nchar(s)) {    
    if (p[b]=="A") h[b]="T"
    if (p[b]=="T") h[b]="A"
    if (p[b]=="G") h[b]="C"
    if (p[b]=="C") h[b]="G"
}

Spacedman.. 13

使用chartr为此目的而构建的:

> s
[1] "ATCTCGGCGCGCATCGCGTACGCTACTAGC"
> chartr("ATGC","TACG",s)
[1] "TAGAGCCGCGCGTAGCGCATGCGATGATCG"

只需给它两个等长的字符串和你的字符串.还对翻译参数进行了矢量化:

> chartr("ATGC","TACG",c("AAAACG","TTTTT"))
[1] "TTTTGC" "AAAAA" 

注意我正在替换DNA的字符串表示而不是矢量.为了转换向量,我创建了一个查找映射作为命名向量和索引:

> p
 [1] "A" "T" "C" "T" "C" "G" "G" "C" "G" "C" "G" "C" "A" "T" "C" "G" "C" "G" "T"
[20] "A" "C" "G" "C" "T" "A" "C" "T" "A" "G" "C"
> map=c("A"="T", "T"="A","G"="C","C"="G")
> unname(map[p])
 [1] "T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C" "A"
[20] "T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G"


Martin Morga.. 12

该Bioconductor的包Biostrings对这种操作的许多有用的功能.安装一次:

source("http://bioconductor.org/biocLite.R")
biocLite("Biostrings")

然后用

library(Biostrings)
dna = DNAStringSet(c("ATCTCGGCGCGCATCGCGTACGCTACTAGC", "ACCGCTA"))
complement(dna)


42-.. 5

sapply(p, switch,  "A"="T", "T"="A","G"="C","C"="G")
  A   T   C   T   C   G   G   C   G   C   G   C   A   T   C   G   C   G   T 
"T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C" "A" 
  A   C   G   C   T   A   C   T   A   G   C 
"T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G" 

如果您不想要补充名称,可以随时将它们剥离unname.

unname(sapply(p, switch,  "A"="T", "T"="A","G"="C","C"="G") )
 [1] "T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C"
[19] "A" "T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G"
> 

可能是由没有美感的人定制的!:) (2认同)


JeremyS.. 5

还有一个seqinr包

library(seqinr)
comp(seq) # gives complement
rev(comp(seq)) # gives the reverse complement

Biostrings具有更小的内存配置文件,但seqinr也很好,因为您可以选择基础的情况(包括混合)并将它们更改为您想要的任何内容,例如,如果您想要在同一序列中混合使用T和U. Biostrings迫使你有T或U.

5 个回答
  • 使用chartr为此目的而构建的:

    > s
    [1] "ATCTCGGCGCGCATCGCGTACGCTACTAGC"
    > chartr("ATGC","TACG",s)
    [1] "TAGAGCCGCGCGTAGCGCATGCGATGATCG"
    

    只需给它两个等长的字符串和你的字符串.还对翻译参数进行了矢量化:

    > chartr("ATGC","TACG",c("AAAACG","TTTTT"))
    [1] "TTTTGC" "AAAAA" 
    

    注意我正在替换DNA的字符串表示而不是矢量.为了转换向量,我创建了一个查找映射作为命名向量和索引:

    > p
     [1] "A" "T" "C" "T" "C" "G" "G" "C" "G" "C" "G" "C" "A" "T" "C" "G" "C" "G" "T"
    [20] "A" "C" "G" "C" "T" "A" "C" "T" "A" "G" "C"
    > map=c("A"="T", "T"="A","G"="C","C"="G")
    > unname(map[p])
     [1] "T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C" "A"
    [20] "T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G"
    

    2023-02-12 22:53 回答
  • 该Bioconductor的包Biostrings对这种操作的许多有用的功能.安装一次:

    source("http://bioconductor.org/biocLite.R")
    biocLite("Biostrings")
    

    然后用

    library(Biostrings)
    dna = DNAStringSet(c("ATCTCGGCGCGCATCGCGTACGCTACTAGC", "ACCGCTA"))
    complement(dna)
    

    2023-02-12 22:56 回答
  • 还有一个seqinr包

    library(seqinr)
    comp(seq) # gives complement
    rev(comp(seq)) # gives the reverse complement
    

    Biostrings具有更小的内存配置文件,但seqinr也很好,因为您可以选择基础的情况(包括混合)并将它们更改为您想要的任何内容,例如,如果您想要在同一序列中混合使用T和U. Biostrings迫使你有T或U.

    2023-02-12 23:03 回答
  • 作为补充,无论大小写,都可以使用chartr()

    n <- "ACCTGccatGCATC"
    chartr("acgtACGT", "tgcaTGCA", n)
    # [1] "TGGACggtaCGTAG"
    

    要更进一步并反向互补核苷酸序列,可以使用以下功能:

    library(stringi)
    
    rc <- function(nucSeq)
      return(stri_reverse(chartr("acgtACGT", "tgcaTGCA", nucSeq)))
    
    rc("AcACGTgtT")
    # [1] "AacACGTgT"
    

    2023-02-12 23:04 回答
  • sapply(p, switch,  "A"="T", "T"="A","G"="C","C"="G")
      A   T   C   T   C   G   G   C   G   C   G   C   A   T   C   G   C   G   T 
    "T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C" "A" 
      A   C   G   C   T   A   C   T   A   G   C 
    "T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G" 
    

    如果您不想要补充名称,可以随时将它们剥离unname.

    unname(sapply(p, switch,  "A"="T", "T"="A","G"="C","C"="G") )
     [1] "T" "A" "G" "A" "G" "C" "C" "G" "C" "G" "C" "G" "T" "A" "G" "C" "G" "C"
    [19] "A" "T" "G" "C" "G" "A" "T" "G" "A" "T" "C" "G"
    > 
    

    2023-02-12 23:05 回答
撰写答案
今天,你开发时遇到什么问题呢?
立即提问
热门标签
PHP1.CN | 中国最专业的PHP中文社区 | PNG素材下载 | DevBox开发工具箱 | json解析格式化 |PHP资讯 | PHP教程 | 数据库技术 | 服务器技术 | 前端开发技术 | PHP框架 | 开发工具 | 在线工具
Copyright © 1998 - 2020 PHP1.CN. All Rights Reserved 京公网安备 11010802041100号 | 京ICP备19059560号-4 | PHP1.CN 第一PHP社区 版权所有