需求:

只获取 ath 物种的 hairpin 序列

文件格式如下所示,以>打头的为 header,紧跟的为序列[AUCG]+ (Perl5 regexp 格式

#!/usr/bin/env perl6
my Bool $now = False ;
# 词法变量还是用 my 声明
# 此外,perl5中的 local 在6 中是没有的
# our, state
# temp let 可以作用于声明好的变量上
my Str $seq; for @*ARGS[0].IO.lines -> $l {
# Perl5中的 @ARGV 在 perl6 中使用 动态变量@*ARGS 来代替, # $now = False ;
if $l ~~ /^\>/ {
#Perl6 正则中除了 a-z A-Z 和下划线不用转义,其他的都需要转义。
if $l ~~ /^ \> ath / {
#perl6 正则中的空白可以随便加,不作为匹配部分,你可以副词:s (:sigspace)来关闭
say $seq.flip.trans('A' => 'T','U' => 'A','C' => 'G','G' => 'C') if $seq.Bool;
# 把 pri-RNA 转成 cDNA
# 先反转整个序列,再 AUCG -> TAGC
$seq = '';
$now = True ;
say $l ;
next
}else{
$now = False
}
}
if $now and $l ~~ /^ <[AUCG]> + $/ {
say $l
}
}

文件 hairpin.fa

>ame-mir-9865 MI0031791 Apis mellifera miR-9865 stem-loop
AAGAUGGAAUUGAUUUAUGUGGUGAUUGUGCAGUAGCACAAUUAGAAGCUGAAAAACCAC
UGCACGAUACAUUACAUAGAUUACUUCCUAUUAA
>ame-mir-9866 MI0031792 Apis mellifera miR-9866 stem-loop
GAUGAGAGGAUUCGAGUCGGCGGGGUAUGAGUAAUACGUUCAGGCAUGUACCUCGUAUCC
CCUUGGCUCGCAAUGCCCUCUCCCU
>ame-mir-9867 MI0031793 Apis mellifera miR-9867 stem-loop
UCCUGAGGCAAGACGGUAUGGACGGUAGAGACGAGUCAUGAUCCUCGUCCUGCCGCGUCA
CCGUUAUCCUCAUCC
>ame-mir-9868 MI0031794 Apis mellifera miR-9868 stem-loop
UCGGCGAUAAAAGCUCGGCGUUUCAUCGAUGCCUGGCCUGCUCUUUCCCCGUGGUUAAGC
UCGAUGAAGUCGUCGGGUUUAGCCGG
...
>ath-MIR8167d MI0031739 Arabidopsis thaliana miR8167d stem-loop
CAUCUUUGAGAUUUUACACAGUAGUCAUGGAGUUUUUGGAAGAGAGAAAGUGGAGAUGUG
GAGAUCGUGGGGAUG
>ath-MIR8167e MI0031740 Arabidopsis thaliana miR8167e stem-loop
CAUCUUUGAGAUUUUACACAGUAGUCAUGGAGUUUUUGGAAGAGAGAAAGUGGAGAUGUG
GAGAUCGUGGGGAUG
>ath-MIR8167f MI0031741 Arabidopsis thaliana miR8167f stem-loop
CAUCUUUGAGAUUUUACACAGUAGUCAUGGAGUUUUUGGAAGAGAGAAAGUGGAGAUGUG
GAGAUCGUGGGGAUG
.....
05-08 08:08