出于学习目的,我试图将Chef解释器项目转换为python 3.4,并尝试将所涉及的库打包为最新版本,但是谈到funcparserlib时,我有些烦恼。

这是Chef脚本:

from pprint import pprint
from collections import namedtuple
import re
import logging

import funcparserlib.parser as p
from funcparserlib.lexer import make_tokenizer
from funcparserlib.lexer import Spec
from funcparserlib.contrib.lexer import space, newline
from funcparserlib.contrib.common import sometok, unarg

from common import *

log = logging.getLogger('preserve.chefparser')
#log.addHandler(logging.StreamHandler())
#log.setLevel(logging.DEBUG)

pos = 0

# order matters
instruction_spec = [
    Spec(x.lower().split()[0], x) for x in [
        'Take', 'Put', 'Fold', 'Add', 'Remove', 'Combine', 'Divide', 'Stir', 'Mix', 'Clean', 'Pour', 'Set aside', 'Refrigerate', 'from', 'the', 'for', 'contents of the', 'until', 'refrigerator', 'minute', 'minutes', 'hour', 'hours', 'well'
    ]
]
instruction_spec.insert(0, Spec('to', r'to'))
instruction_spec.insert(0, Spec('into', r'into'))
instruction_spec.insert(0, Spec('add_dry', 'Add dry ingredients'))
instruction_spec.insert(0, Spec('liquefy', 'Liquefy|Liquify'))
instruction_spec.append(Spec('serve_with', r'Serve with'))

instruction_spec.append(Spec('bowl', 'mixing bowl'))
instruction_spec.append(Spec('dish', 'baking dish'))
instruction_spec.append(space)
instruction_spec.append(Spec('string', '[A-Za-z]+'))
instruction_spec.append(Spec('ordinal', '[0-9]+(st|nd|rd|th)'))
instruction_spec.append(Spec('number', '[0-9]+'))

tokens = [
    Spec('ingredients_start', 'Ingredients'),
    Spec('method_start', r'^Method', re.MULTILINE),
    Spec('dry_measure', r' g | kg | pinch[es]? '),
    Spec('liquid_measure', r' ml | l | dash[es]? '),
    Spec('mix_measure', r'cup[s]?|teaspoon[s]?|tablespoon[s]?'),
    Spec('measure_type', 'heaped|level'),
    # TODO hours minutes
    Spec('cooking_time', r'Cooking time:'),
    # TODO gas mark
    Spec('oven', r'Pre\-heat oven to'),
    Spec('oven_temp', 'degrees Celcius'),
    # serve is treated separate here as it is
    # not necessary for it to appear
    # following 'Method.'
    # But it is treated as just another
    # instruction by the interpreter
    Spec('serve', r'^Serves', re.MULTILINE),
    Spec('number', '[0-9]+'),
    space,
    Spec('period', r'\.'),
    Spec('string', r'[^\.\r\n]+'),
]

def tokenize_minus_whitespace(token_list, input):
    return [x for x in make_tokenizer(token_list)(input) if x.type not in ['space']]

def tokenize_instruction(spec):
    return tokenize_minus_whitespace(instruction_spec, spec)

def tokenize(input):
    return tokenize_minus_whitespace(tokens, input)

def parse_instruction(spec):
    string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
    ordinal = sometok('ordinal')
    bowl = sometok('bowl')
    the = sometok('the')
    dish = sometok('dish')
    to = sometok('to')
    into = sometok('into')

    concat = lambda list: ' '.join(list)

    take_i = sometok('take') + (p.oneplus(string) >> concat) + sometok('from') + sometok('refrigerator')

    put_i = sometok('put') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat)  + p.skip(into) + p.maybe(ordinal|the) + bowl

    liquefy_1 = sometok('liquefy') + sometok('contents') + p.maybe(ordinal) + bowl
    liquefy_2 = sometok('liquefy') + (p.oneplus(string) >> concat)
    liquefy_i = liquefy_1 | liquefy_2

    pour_i = sometok('pour') + sometok('contents') + p.maybe(ordinal) + bowl + sometok('into') + the + p.maybe(ordinal) + dish

    fold_i = sometok('fold') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal|the) + bowl

    # cleanup repitition
    add_i = sometok('add') + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal|the) + bowl)

    remove_i = sometok('remove') + (p.oneplus(string) >> concat) + p.maybe(sometok('from') + p.maybe(ordinal|the) + bowl)

    combine_i = sometok('combine') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)

    divide_i = sometok('divide') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)

    add_dry_i = sometok('add_dry') + p.maybe(to + p.maybe(ordinal|the) + bowl)

    stir_1 = sometok('stir') + p.maybe(the + p.maybe(ordinal|the) + bowl) + sometok('for') + sometok('number') + (sometok('minute')|sometok('minutes'))
    stir_2 = sometok('stir') + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl
    stir_i = stir_1 | stir_2

    mix_i = sometok('mix') + p.maybe(the + p.maybe(ordinal) + bowl) + sometok('well')

    clean_i = sometok('clean') + p.maybe(ordinal|the) + bowl

    loop_start_i = (sometok('string') + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ('loop_start', x))
    loop_end_i = (sometok('string') + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok('until') + string) >> (lambda x: ('loop_end', x))

    set_aside_i = sometok('set') >> (lambda x: (x, None))

    serve_with_i = sometok('serve_with') + (p.oneplus(string) >> concat)

    refrigerate_i = sometok('refrigerate') + p.maybe(sometok('for') + sometok('number') + (sometok('hour')|sometok('hours')))

    instruction = ( take_i
                  | put_i
                  | liquefy_i
                  | pour_i
                  | add_i
                  | fold_i
                  | remove_i
                  | combine_i
                  | divide_i
                  | add_dry_i
                  | stir_i
                  | mix_i
                  | clean_i
                  | loop_end_i      # -| ORDER matters
                  | loop_start_i    # -|
                  | set_aside_i
                  | serve_with_i
                  | refrigerate_i
                  ) >> (lambda x: Instruction(x[0].lower().replace(' ', '_'), x[1:]))

    return instruction.parse(tokenize_instruction(spec))

def parse(input):
    period = sometok('period')
    string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
    number = sometok('number')

    title = string + p.skip(period) >> RecipeTitle
    ingredients_start = sometok('ingredients_start') + p.skip(period) >> IngredientStart

    dry_measure = p.maybe(sometok('measure_type')) + sometok('dry_measure')
    liquid_measure = sometok('liquid_measure')
    mix_measure = sometok('mix_measure')

    # is this valid ? 'g of butter', unit w/o initial_value
    ingredient = (p.maybe(number)
                  + p.maybe(dry_measure
                           | liquid_measure
                           | mix_measure)
                  + string >> unarg(Ingredient)
                 )

    ingredients = p.many(ingredient)

    cooking_time = (p.skip(sometok('cooking_time'))
                    + (number
                      >> unarg(CookingTime))

                    + p.skip(sometok('period'))
                   )

    oven_temp = (p.skip(sometok('oven'))
                + p.many(number)
                + p.skip(sometok('oven_temp'))
                >> unarg(Oven)
                )

    method_start = sometok('method_start') + p.skip(period)

    comment = p.skip(p.many(string|period))
    header = title + p.maybe(comment)

    instruction = (string
                   + p.skip(period)
                  ) >> parse_instruction

    instructions = p.many(instruction)

    program = (method_start + instructions) >> unarg(MethodStart)

    serves = (sometok('serve') + number >> (lambda x: Serve('serve', x[1])) ) + p.skip(period)

    ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection)

    recipe = ( header
             + p.maybe(ingredients_section)
             + p.maybe(cooking_time)
             + p.maybe(oven_temp)
             + p.maybe(program)
             + p.maybe(serves)
             ) >> RecipeNode

    main_parser = p.oneplus(recipe)
    return main_parser.parse(tokenize(input))


运行脚本失败:

ImportError: cannot import name 'Spec'


我拥有的funcparserlib.lexer的版本是:

#Snipped some licence. Hint it's MIT.

__all__ = ['make_tokenizer', 'Token', 'LexerError']

import re


class LexerError(Exception):
    def __init__(self, place, msg):
        self.place = place
        self.msg = msg

    def __str__(self):
        s = 'cannot tokenize data'
        line, pos = self.place
        return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)


class Token(object):
    def __init__(self, type, value, start=None, end=None):
        self.type = type
        self.value = value
        self.start = start
        self.end = end

    def __repr__(self):
        return 'Token(%r, %r)' % (self.type, self.value)

    def __eq__(self, other):
        # FIXME: Case sensitivity is assumed here
        return self.type == other.type and self.value == other.value

    def _pos_str(self):
        if self.start is None or self.end is None:
            return ''
        else:
             sl, sp = self.start
             el, ep = self.end
             return '%d,%d-%d,%d:' % (sl, sp, el, ep)

    def __str__(self):
        s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
        return s.strip()

    @property
    def name(self):
        return self.value

    def pformat(self):
        return "%s %s '%s'" % (self._pos_str().ljust(20),
                                self.type.ljust(14),
                                self.value)


def make_tokenizer(specs):
    """[(str, (str, int?))] -> (str -> Iterable(Token))"""

    def compile_spec(spec):
        name, args = spec
        return name, re.compile(*args)

    compiled = [compile_spec(s) for s in specs]

    def match_specs(specs, str, i, position):
        line, pos = position
        for type, regexp in specs:
            m = regexp.match(str, i)
            if m is not None:
                value = m.group()
                nls = value.count('\n')
                n_line = line + nls
                if nls == 0:
                    n_pos = pos + len(value)
                else:
                    n_pos = len(value) - value.rfind('\n') - 1
                return Token(type, value, (line, pos + 1), (n_line, n_pos))
        else:
            errline = str.splitlines()[line - 1]
            raise LexerError((line, pos + 1), errline)

    def f(str):
        length = len(str)
        line, pos = 1, 0
        i = 0
        while i < length:
            t = match_specs(compiled, str, i, (line, pos))
            yield t
            line, pos = t.end
            i += len(t.value)

    return f

# This is an example of a token spec. See also [this article][1] for a
# discussion of searching for multiline comments using regexps
# (including `*?`).
#
#   [1]: http://ostermiller.org/findcomment.html
_example_token_specs = [
    ('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
    ('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
    ('COMMENT', (r'//.*',)),
    ('NL', (r'[\r\n]+',)),
    ('SPACE', (r'[ \t\r\n]+',)),
    ('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
    ('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
    ('INT', (r'[0-9]+',)),
    ('INT', (r'\$[0-9A-Fa-f]+',)),
    ('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)),
    ('STRING', (r"'([^']|(''))*'",)),
    ('CHAR', (r'#[0-9]+',)),
    ('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]
#tokenize = make_tokenizer(_example_token_specs)


而且我可以确定为什么它不能导入Spec!那里没有规格!伙计们,最好的办法是什么?我可以做一个简单的“查找替换”来推进该项目吗?拖延我可以在网上找到的存储库(还有一些令人困惑的地方)对我没有太大帮助,但是也许我错过了一些东西。

最佳答案

您不需要Specs类,在funcparserlib的当前版本中,如果需要设置令牌生成器,则只需声明一个元组列表。

请参阅lexer模块中的示例:

_example_token_specs = [
    ('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
    ('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
    ('COMMENT', (r'//.*',)),
    ('NL', (r'[\r\n]+',)),
    ('SPACE', (r'[ \t\r\n]+',)),
    ('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
    ('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
    ('INT', (r'[0-9]+',)),
    ('INT', (r'\$[0-9A-Fa-f]+',)),
    ('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)),
    ('STRING', (r"'([^']|(''))*'",)),
    ('CHAR', (r'#[0-9]+',)),
    ('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]


根据Specs的来源,funcparserlib类已过时。

关于python - Funcparserlib.lexer.Spec ImportError:无法导入名称“Spec”,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/29063520/

10-10 06:15