我只是C编程的初学者。请在以下问题中帮助我。

问题:一个程序搜索包含给定字符序列的给定数组。这些字符被限制为字母A,G,T或C。序列中的最后一个字符被设置为代码0,以便易于检测结尾。

在这里找不到我在做什么错,但是一直出错。

/*A program that searches through a given array that contains a sequence of characters. These characters are restricted
to be the letters A, G, T, or C. The last character in the sequence is set to be the code 0, so that the end is easily
detected. That array should be declared and initialized.*/

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void input_sequence(int length,char input[]);
void search(char C[],char DNA[],int length);

int main(void) {
    //Given array
    char DNA[] = {'A', 'G', 'C', 'G', 'G', 'G', 'A', 'C', 'C', 'G', 'T', 'C',
          'C', 'C', 'G', 'A', 'C', 'A', 'T', 'T', 'G', 'A', 'T', 'G',
          'A', 'A', 'G', 'G', 'G', 'T', 'C', 'A', 'T', 'A', 'G', 'A',
          'C', 'C', 'C', 'A', 'A', 'T', 'A', 'C', 'G', 'C', 'C', 'A',
          'C', 'C', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'G', 'T', 'T',
          'T', 'T', 'C', 'C', 'T', 'G', 'T', 'G', 'T', 'C', 'T', 'T',
          'C', 'C', 'A', 'T', 'T', 'G', 'A', 'G', 'T', 'A', 'G', 'A',
          'T', 'T', 'G', 'A', 'C', 'A', 'C', 'T', 'C', 'C', 'C', 'A',
          'G', 'A', 'T', 'G', '\0'};
    int length,i=0,k;
    /*Program should repeatedly ask the user for two things: the length of a search sequence,
    and the search sequence itself*/
    /*The program should terminate when the length of the input sequence is zero or less*/
    do{
        printf("Enter length of DNA sequence to match: ");
        scanf("%d",&length);
        Search sequence array
        char input[length];
        //input sequence length has to be >0
        if(length>0){
            input_sequence(length,input[]);
            /*The elements of the search sequence may take on one of five characters: A,G,T,C and *. The
            meaning of the ‘*’ character is that it matches all four nucleotides: A,G,T and C.*/
            for(i=0; i<length; i++){
                k=0;
                if(input[i]!='A'&&input[i]!='G'&&input[i]!='T'&&input[i]!='C'&&input[i]!='*'){
                    printf("Erroneous character input ’%c’ exiting\n",input[i]);
                    k=1;
                }
                if(k==1)
                    break;
            }
            if(k==0){
                search(input,DNA,length);
            }
            k=0;
        }
    }
    while(length>0);
    printf("Goodbye");

    return (EXIT_SUCCESS);
}

//Function to search for input sequence in the given array
void search(char C[],char DNA[],int length){
    int numFound = 0,i,foundIndex;
    bool found = false;
    for(i=0;i<length && !found;i++) {
        int n=0;
        char temp=C[i];
        if (temp==DNA[i]) {
            numFound++;
            if (numFound == length) {
                found = true;
                foundIndex = i - (length-1);
            }
        }
        else numFound = 0;
    }
    if (found)
        printf("Match of search sequence found at element %d\n",foundIndex);
}

void input_sequence(int length,char input[]){
    int i;
    printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
    for(i=0; i<length; i++){
        scanf(" %c", &input[i]);
        }
}

最佳答案

下面是使用GNU C library regexp的示例:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>

void search(const char *regexp_str, const char *DNA, int length)
{
    int reti;
    const char *p = DNA;
    const int n_matches = 5;
    regmatch_t m[n_matches];
    regex_t regex;
    (void)length;

    reti = regcomp(&regex, regexp_str, 0);
    if(reti) {
        printf("Could not compile regex: %s\n", regexp_str);
        return;
    }

    while(1) {//based on http://www.lemoda.net/c/unix-regex/
        int nomatch = regexec(&regex, p, n_matches, m, 0);
        if(nomatch) {
            printf ("No more matches.\n");
            return;
        }
        if(m[0].rm_so != -1) {
            int start = m[0].rm_so + (p - DNA);
            int finish = m[0].rm_eo + (p - DNA);
            printf("'%.*s' (bytes %d:%d)\n",
                    m[0].rm_eo - m[0].rm_so, m[0].rm_so + p,
                    start, finish);
        }
        p += m[0].rm_eo;
    }
    regfree(&regex);
}

int main(void) {
    const char *DNA = "AGCGGGACCGTCCCGACATTGATGAAGGGTCATAGACCCA"
                      "ATACGCCACCACCCCAAGTTTTCCTGTGTCTTCCATTGAG"
                      "TAGATTGACACTCCCAGATG";
    while(1) {
        int length;
        char input[256];

        printf("Enter length of DNA sequence to match: ");
        fgets(input, sizeof(input), stdin);
        length = strtol(input, NULL, 10);
        if(length <= 0) {//input sequence length has to be >0
            break;
        } else if(length >= (int)(sizeof(input) - 1)) {
            printf("ERROR: Too big length=%d, max supported length=%d\n",
                   length, sizeof(input) - 1);
            break;
        }

        while(1) {
            const char *validInputs = "AGTC*";
            printf("Enter %d characters (one of AGTC*) as a search sequence: ",length);
            fgets(input, sizeof(input), stdin);

            int valid = 1;
            for(int i = 0; i < length; i++) {
                if(strchr(validInputs, input[i]) == NULL) {
                  printf("Erroneous character input '%c' in '%s'\n", input[i], input);
                  valid = 0;
                  break;
                }
            }
            if(valid) {
                break;
            }
        }
        input[length] = 0;
        //substitute '*' on '.' for using in regexp
        char *ptr = input;
        while((ptr = strchr(ptr, '*')) != NULL) {
            *ptr = '.';
        };
        printf("search for: %s\n", input);
        search(input, DNA, length);
    }
    printf("Goodbye\n");
    return (EXIT_SUCCESS);
}


在其他使用C ++ 11 std::regex的情况下(仅需要更改search()):

#include <regex>
#include <iterator>

void search(const char *C, const char *DNA, int )
{
    std::regex regex(C);
    std::string str(DNA);
    auto words_begin = std::sregex_iterator(str.begin(), str.end(), regex);
    auto words_end = std::sregex_iterator();
    printf("Found %d matches:\n", std::distance(words_begin, words_end));
    for(std::sregex_iterator i = words_begin; i != words_end; ++i) {
        std::smatch match = *i;
        printf(" match: %s, pos=%d\n", match.str().c_str(), match.position());
    }
}

10-06 10:23
查看更多