本文介绍了试图加撇号的C特里的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图用C编写一个trie来读取文件并将文件中的所有单词添加到trie中,并且效果很好,但我无法接受撇号:

I'm trying to program a trie in C to read a file and add all the words in the file to the trie, and it works well, but I can't get it to accept apostrophes:

 typedef struct node
{
    bool wordBool;
    struct node* next[27]; // 26 letters and one space for the apostrophe
}
node;

node* base;
int numWords = 0;

bool load(const char* dictionary)
{

FILE* dictionaryf = fopen(dictionary, "r"); // the file to read

base = malloc(sizeof(node));

node variable;
node *currNode = &variable;

int n = 0;

while((n = fgetc(dictionaryf)) != EOF)
{
   if (n == '\n')
   {
      if (!currNode->wordBool)
      {
          currNode->wordBool = true;
          numWords++;
      }
      currNode = base;
   }
   else if (n == 39) //I tried putting this in so it would accept apostrophes
    {
         if(currNode->next[n-39] == NULL)
        {
            currNode->next[n-39] = malloc(sizeof(node));
        }
        currNode = currNode->next[n-39];
    }
   else {
        if(currNode->next[n-96] == NULL)
        {
            currNode->next[n-96] = malloc(sizeof(node));
        }
        currNode = currNode->next[n-96];
   }
}
if (currNode!= base && !currNode->wordBool)
{
    currNode->wordBool = true;
    numWords++;
}
printf("%i\n", numWords);
fclose(dictionaryf);
return true;
}

这是创建特里树的代码,但不会在其中添加撇号

this is the code that makes the trie, but it won't put apostrophes into the trie

推荐答案

此代码与您的代码紧密相关,但是解决了一个稍有不同的问题,因为它可以接受任意文本文件并对其进行处理而与其中的字符无关.在英语使用者的典型样式中,它将重音字符视为非字母"(部分是因为它不使用setlocale(),部分是因为它不处理多字节或宽字符).它计算每个单词出现的次数(在64位计算机上的数据结构中不占用额外空间).当然,它包括打印功能,这对于检查其是否正确完成工作很重要.

This code is closely based on yours, but tackles a slightly different problem in that it accepts an arbitrary text file and processes it regardless of the characters in it. It treats accented characters as 'non-alphabetic' in the typical style of English-speakers (in part because it doesn't use setlocale(), and in part because it doesn't process multi-byte or wide characters). It counts the number of times each word is present (which takes up no extra space in the data structure on a 64-bit machine). It includes a printing function which is important for checking that it did the job correctly, of course.

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node
{
    bool wordBool;
    int  wordCount;
    struct node *next[27]; // 26 letters and one space for the apostrophe
} node;

static const char trie_map[] = "'abcdefghijklmnopqrstuvwxyz";
static node *base = 0;
static int numWords = 0;

static void oom(void)
{
    fprintf(stderr, "Out of memory\n");
    exit(EXIT_FAILURE);
}

static int trie_index(char c)
{
    char *p = strchr(trie_map, tolower(c));
    if (p == 0)
        return -1;
    else
        return (p - trie_map);
}

static
bool load(const char *dictionary)
{
    FILE *dictionaryf = fopen(dictionary, "r"); // the file to read
    if (dictionaryf == 0)
        return false;

    base = calloc(sizeof(node), 1);

    node *currNode = base;

    int n;

    while ((n = fgetc(dictionaryf)) != EOF)
    {
        n = trie_index(n);
        if (n >= 0)
        {
            if (currNode->next[n] == NULL)
            {
                currNode->next[n] = calloc(sizeof(node), 1);
                if (currNode->next[n] == NULL)
                    oom();
            }
            currNode = currNode->next[n];
        }
        else if (currNode != base)
        {
            if (!currNode->wordBool)
            {
                currNode->wordBool = true;
                numWords++;
            }
            currNode->wordCount++;
            currNode = base;
        }
        /* else: consecutive non-letters, non-apostrophes */
    }

    if (currNode != base && !currNode->wordBool)
    {
        currNode->wordBool = true;
        numWords++;
    }
    printf("%i distinct words\n", numWords);
    fclose(dictionaryf);
    return true;
}

static void print_trie(node *trie, char *buffer, size_t buflen)
{
    if (trie != 0)
    {
        if (trie->wordBool)
            printf("Word: %3d [%s]\n", trie->wordCount, buffer);
        size_t len = strlen(buffer);
        if (len >= buflen - 2)
        {
            fprintf(stderr, "Word too long!\n[%s]\n", buffer);
            exit(EXIT_FAILURE);
        }
        for (int i = 0; i < 27; i++)
        {
            if (trie->next[i] != 0)
            {
                buffer[len] = trie_map[i];
                buffer[len+1] = '\0';
                print_trie(trie->next[i], buffer, buflen);
            }
        }
    }
}

int main(int argc, char **argv)
{
    const char *data = "data";
    if (argc == 2)
        data = argv[1];
    if (load(data))
    {
        printf("Loaded file '%s' OK\n", data);
        char buffer[256] = "";
        print_trie(base, buffer, sizeof(buffer));
    }
    else
        printf("Load failed!\n");

    return 0;
}

在其自己的源代码(trie-31.c)上运行时,它会产生:

When run on its own source code (trie-31.c), it produces:

94 distinct words
Loaded file 'trie-31.c' OK
Word:   3 [']
Word:   1 ['abcdefghijklmnopqrstuvwxyz]
Word:   1 [and]
Word:   1 [apostrophe]
Word:   1 [apostrophes]
Word:   2 [argc]
Word:   2 [argv]
Word:   7 [base]
Word:   2 [bool]
Word:  10 [buffer]
Word:   3 [buflen]
Word:   2 [c]
Word:   2 [calloc]
Word:   8 [char]
Word:   1 [consecutive]
Word:   3 [const]
Word:   1 [ctype]
Word:  14 [currnode]
Word:   1 [d]
Word:   5 [data]
Word:   2 [dictionary]
Word:   4 [dictionaryf]
Word:   1 [distinct]
Word:   4 [else]
Word:   1 [eof]
Word:   4 [exit]
Word:   1 [failed]
Word:   2 [failure]
Word:   1 [false]
Word:   1 [fclose]
Word:   1 [fgetc]
Word:   3 [file]
Word:   1 [fopen]
Word:   2 [for]
Word:   2 [fprintf]
Word:   5 [h]
Word:   7 [i]
Word:  14 [if]
Word:   5 [include]
Word:   2 [index]
Word:   7 [int]
Word:   4 [len]
Word:   2 [letters]
Word:   3 [load]
Word:   1 [loaded]
Word:   1 [long]
Word:   1 [main]
Word:   4 [map]
Word:   1 [memory]
Word:  16 [n]
Word:   7 [next]
Word:   8 [node]
Word:   2 [non]
Word:   2 [null]
Word:   4 [numwords]
Word:   1 [of]
Word:   1 [ok]
Word:   1 [one]
Word:   2 [oom]
Word:   1 [out]
Word:   3 [p]
Word:   3 [print]
Word:   4 [printf]
Word:   1 [r]
Word:   1 [read]
Word:   5 [return]
Word:   2 [s]
Word:   1 [s']
Word:   2 [size]
Word:   3 [sizeof]
Word:   1 [space]
Word:   7 [static]
Word:   1 [stdbool]
Word:   2 [stderr]
Word:   1 [stdio]
Word:   1 [stdlib]
Word:   1 [strchr]
Word:   1 [string]
Word:   1 [strlen]
Word:   2 [struct]
Word:   2 [t]
Word:   2 [the]
Word:   1 [to]
Word:   1 [tolower]
Word:   1 [too]
Word:  15 [trie]
Word:   3 [true]
Word:   1 [typedef]
Word:   3 [void]
Word:   1 [while]
Word:   2 [word]
Word:   6 [wordbool]
Word:   3 [wordcount]
Word:   1 [words]

其中一些单词包含撇号(单独存在''abcdefghijklmnopqrstuvwxyzs').对于文件 great.panjandrum ,其中包含:

Some of the words include apostrophes (there's ' on its own, 'abcdefghijklmnopqrstuvwxyz and s'). For the file great.panjandrum which contains:

So she went into the garden
to cut a cabbage-leaf
to make an apple-pie
and at the same time
a great she-bear coming down the street
pops its head into the shop
What no soap
So he died
and she very imprudently married the Barber
and there were present
the Picninnies
and the Joblillies
and the Garyulies
and the great Panjandrum himself
with the little round button at top
and they all fell to playing the game of catch-as-catch-can
till the gunpowder ran out at the heels of their boots

输出为:

66 distinct words
Loaded file 'great.panjandrum' OK
Word:   2 [a]
Word:   1 [all]
Word:   1 [an]
Word:   7 [and]
Word:   1 [apple]
Word:   1 [as]
Word:   3 [at]
Word:   1 [barber]
Word:   1 [bear]
Word:   1 [boots]
Word:   1 [button]
Word:   1 [cabbage]
Word:   1 [can]
Word:   2 [catch]
Word:   1 [coming]
Word:   1 [cut]
Word:   1 [died]
Word:   1 [down]
Word:   1 [fell]
Word:   1 [game]
Word:   1 [garden]
Word:   1 [garyulies]
Word:   2 [great]
Word:   1 [gunpowder]
Word:   1 [he]
Word:   1 [head]
Word:   1 [heels]
Word:   1 [himself]
Word:   1 [imprudently]
Word:   2 [into]
Word:   1 [its]
Word:   1 [joblillies]
Word:   1 [leaf]
Word:   1 [little]
Word:   1 [make]
Word:   1 [married]
Word:   1 [no]
Word:   2 [of]
Word:   1 [out]
Word:   1 [panjandrum]
Word:   1 [picninnies]
Word:   1 [pie]
Word:   1 [playing]
Word:   1 [pops]
Word:   1 [present]
Word:   1 [ran]
Word:   1 [round]
Word:   1 [same]
Word:   3 [she]
Word:   1 [shop]
Word:   2 [so]
Word:   1 [soap]
Word:   1 [street]
Word:  13 [the]
Word:   1 [their]
Word:   1 [there]
Word:   1 [they]
Word:   1 [till]
Word:   1 [time]
Word:   3 [to]
Word:   1 [top]
Word:   1 [very]
Word:   1 [went]
Word:   1 [were]
Word:   1 [what]
Word:   1 [with]

这篇关于试图加撇号的C特里的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!

08-15 10:02