python - 马尔可夫分析，格式化

我有一个程序，该程序从文本文件中读取大量文本，然后根据文本内容将内容随机显示为短故事。该程序有效，但最后一部分显示的内容过于笨拙且效率低下，我想知道是否有人对如何更有效地输入文本然后将其显示为字符串显示任何想法，但是允许它跨越多行（本质上是自动换行），这样它就不仅仅是在控制台右侧延续的巨大文本字符串。

from __future__ import print_function, division

import sys

import random

# global variables
suffix_map = {}        # map from prefixes to a list of suffixes
prefix = ()            # current tuple of words
big_list = []

def process_file(filename, order=2):
    """Reads a file and performs Markov analysis.

    filename: string
    order: integer number of words in the prefix

    returns: map from prefix to list of possible suffixes.
    """
    fp = open(filename)

    for line in fp:
        for word in line.rstrip().split():
            process_word(word, order)


def process_word(word, order=3):
    """Processes each word.

    word: string
    order: integer

    During the first few iterations, all we do is store up the words;
    after that we start adding entries to the dictionary.
    """
    global prefix
    if len(prefix) < order:
        prefix += (word,)
        return

    try:
        suffix_map[prefix].append(word)
    except KeyError:
        # if there is no entry for this prefix, make one
        suffix_map[prefix] = [word]

    prefix = shift(prefix, word)


def random_text(n=300):
    """Generates random wordsfrom the analyzed text.

    Starts with a random prefix from the dictionary.

    n: number of words to generate
    """
    global big_list
    # choose a random prefix (not weighted by frequency)
    start = random.choice(list(suffix_map.keys()))

    for i in range(n):
        suffixes = suffix_map.get(start, None)
        if suffixes == None:
            random_text(n-i)
            return
        # choose a random suffix
        word = random.choice(suffixes)
        big_list.append(word + " ")
        start = shift(start, word)


def shift(t, word):
    """Forms a new tuple by removing the head and adding word to the tail.

    t   : tuple of strings
    word: string

    Returns: tuple of strings
    """
    return t[1:] + (word,)


def list_to_str_format():
    global big_list
    whole = " ".join(str(i) for i in big_list)

    # 25 words per line
    l1 = big_list[:25]
    l2 = big_list[26:50]
    l3 = big_list[51:75]
    l4 = big_list[76:100]
    l5 = big_list[101:125]
    l6 = big_list[126:150]
    l7 = big_list[151:175]
    l8 = big_list[176:200]
    l9 = big_list[201:225]
    l10 = big_list[226:250]
    l11 = big_list[256:275]
    l12 = big_list[276:300]

    str_1 = " ".join(str(i) for i in l1).capitalize()
    str_2 = " ".join(str(i) for i in l2)
    str_3 = " ".join(str(i) for i in l3)
    str_4 = " ".join(str(i) for i in l4)
    str_5 = " ".join(str(i) for i in l5)
    str_6 = " ".join(str(i) for i in l6)
    str_7 = " ".join(str(i) for i in l7)
    str_8 = " ".join(str(i) for i in l8)
    str_9 = " ".join(str(i) for i in l9)
    str_10 = " ".join(str(i) for i in l10)
    str_11 = " ".join(str(i) for i in l11)
    str_12 = " ".join(str(i) for i in l12)

    print(str_1)
    print(str_2)
    print(str_3)
    print(str_4)
    print(str_5)
    print(str_6)
    print(str_7)
    print(str_8)
    print(str_9)
    print(str_10)
    print(str_11)
    print(str_12)


def main(filename, n=300, order=3):
    try:
        n = int(n)
        order = int(order)
    except ValueError as e:
        print('Usage: %d filename [# of words] [prefix length]' % e)
    else:
        process_file(filename, order)
        random_text(n)
        list_to_str_format()
        print()


main('C:\\Users\\Desktop\\TheBrothersKaramazov.txt')

最佳答案

我允许自己更改您的加入方式，从而增加了两倍的空间。您必须导入模块re

def list_to_str_format(line_length=80):
    global big_list
    whole = "".join(str(i) for i in big_list)
    regex = re.compile('(.*?(\s))*')
    while whole != "":
        break_pos = regex.match(whole[:line_length]).end()
        print(whole[:break_pos])
        whole = whole[break_pos:]

关于python - 马尔可夫分析，格式化，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/50781490/