出于学习目的,我试图将Chef解释器项目转换为python 3.4,并尝试将所涉及的库打包为最新版本,但是谈到funcparserlib时,我有些烦恼。
这是Chef脚本:
from pprint import pprint
from collections import namedtuple
import re
import logging
import funcparserlib.parser as p
from funcparserlib.lexer import make_tokenizer
from funcparserlib.lexer import Spec
from funcparserlib.contrib.lexer import space, newline
from funcparserlib.contrib.common import sometok, unarg
from common import *
log = logging.getLogger('preserve.chefparser')
#log.addHandler(logging.StreamHandler())
#log.setLevel(logging.DEBUG)
pos = 0
# order matters
instruction_spec = [
Spec(x.lower().split()[0], x) for x in [
'Take', 'Put', 'Fold', 'Add', 'Remove', 'Combine', 'Divide', 'Stir', 'Mix', 'Clean', 'Pour', 'Set aside', 'Refrigerate', 'from', 'the', 'for', 'contents of the', 'until', 'refrigerator', 'minute', 'minutes', 'hour', 'hours', 'well'
]
]
instruction_spec.insert(0, Spec('to', r'to'))
instruction_spec.insert(0, Spec('into', r'into'))
instruction_spec.insert(0, Spec('add_dry', 'Add dry ingredients'))
instruction_spec.insert(0, Spec('liquefy', 'Liquefy|Liquify'))
instruction_spec.append(Spec('serve_with', r'Serve with'))
instruction_spec.append(Spec('bowl', 'mixing bowl'))
instruction_spec.append(Spec('dish', 'baking dish'))
instruction_spec.append(space)
instruction_spec.append(Spec('string', '[A-Za-z]+'))
instruction_spec.append(Spec('ordinal', '[0-9]+(st|nd|rd|th)'))
instruction_spec.append(Spec('number', '[0-9]+'))
tokens = [
Spec('ingredients_start', 'Ingredients'),
Spec('method_start', r'^Method', re.MULTILINE),
Spec('dry_measure', r' g | kg | pinch[es]? '),
Spec('liquid_measure', r' ml | l | dash[es]? '),
Spec('mix_measure', r'cup[s]?|teaspoon[s]?|tablespoon[s]?'),
Spec('measure_type', 'heaped|level'),
# TODO hours minutes
Spec('cooking_time', r'Cooking time:'),
# TODO gas mark
Spec('oven', r'Pre\-heat oven to'),
Spec('oven_temp', 'degrees Celcius'),
# serve is treated separate here as it is
# not necessary for it to appear
# following 'Method.'
# But it is treated as just another
# instruction by the interpreter
Spec('serve', r'^Serves', re.MULTILINE),
Spec('number', '[0-9]+'),
space,
Spec('period', r'\.'),
Spec('string', r'[^\.\r\n]+'),
]
def tokenize_minus_whitespace(token_list, input):
return [x for x in make_tokenizer(token_list)(input) if x.type not in ['space']]
def tokenize_instruction(spec):
return tokenize_minus_whitespace(instruction_spec, spec)
def tokenize(input):
return tokenize_minus_whitespace(tokens, input)
def parse_instruction(spec):
string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
ordinal = sometok('ordinal')
bowl = sometok('bowl')
the = sometok('the')
dish = sometok('dish')
to = sometok('to')
into = sometok('into')
concat = lambda list: ' '.join(list)
take_i = sometok('take') + (p.oneplus(string) >> concat) + sometok('from') + sometok('refrigerator')
put_i = sometok('put') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + p.skip(into) + p.maybe(ordinal|the) + bowl
liquefy_1 = sometok('liquefy') + sometok('contents') + p.maybe(ordinal) + bowl
liquefy_2 = sometok('liquefy') + (p.oneplus(string) >> concat)
liquefy_i = liquefy_1 | liquefy_2
pour_i = sometok('pour') + sometok('contents') + p.maybe(ordinal) + bowl + sometok('into') + the + p.maybe(ordinal) + dish
fold_i = sometok('fold') + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal|the) + bowl
# cleanup repitition
add_i = sometok('add') + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal|the) + bowl)
remove_i = sometok('remove') + (p.oneplus(string) >> concat) + p.maybe(sometok('from') + p.maybe(ordinal|the) + bowl)
combine_i = sometok('combine') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)
divide_i = sometok('divide') + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal|the) + bowl)
add_dry_i = sometok('add_dry') + p.maybe(to + p.maybe(ordinal|the) + bowl)
stir_1 = sometok('stir') + p.maybe(the + p.maybe(ordinal|the) + bowl) + sometok('for') + sometok('number') + (sometok('minute')|sometok('minutes'))
stir_2 = sometok('stir') + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl
stir_i = stir_1 | stir_2
mix_i = sometok('mix') + p.maybe(the + p.maybe(ordinal) + bowl) + sometok('well')
clean_i = sometok('clean') + p.maybe(ordinal|the) + bowl
loop_start_i = (sometok('string') + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ('loop_start', x))
loop_end_i = (sometok('string') + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok('until') + string) >> (lambda x: ('loop_end', x))
set_aside_i = sometok('set') >> (lambda x: (x, None))
serve_with_i = sometok('serve_with') + (p.oneplus(string) >> concat)
refrigerate_i = sometok('refrigerate') + p.maybe(sometok('for') + sometok('number') + (sometok('hour')|sometok('hours')))
instruction = ( take_i
| put_i
| liquefy_i
| pour_i
| add_i
| fold_i
| remove_i
| combine_i
| divide_i
| add_dry_i
| stir_i
| mix_i
| clean_i
| loop_end_i # -| ORDER matters
| loop_start_i # -|
| set_aside_i
| serve_with_i
| refrigerate_i
) >> (lambda x: Instruction(x[0].lower().replace(' ', '_'), x[1:]))
return instruction.parse(tokenize_instruction(spec))
def parse(input):
period = sometok('period')
string = p.oneplus(sometok('string')) >> (lambda x: ' '.join(x))
number = sometok('number')
title = string + p.skip(period) >> RecipeTitle
ingredients_start = sometok('ingredients_start') + p.skip(period) >> IngredientStart
dry_measure = p.maybe(sometok('measure_type')) + sometok('dry_measure')
liquid_measure = sometok('liquid_measure')
mix_measure = sometok('mix_measure')
# is this valid ? 'g of butter', unit w/o initial_value
ingredient = (p.maybe(number)
+ p.maybe(dry_measure
| liquid_measure
| mix_measure)
+ string >> unarg(Ingredient)
)
ingredients = p.many(ingredient)
cooking_time = (p.skip(sometok('cooking_time'))
+ (number
>> unarg(CookingTime))
+ p.skip(sometok('period'))
)
oven_temp = (p.skip(sometok('oven'))
+ p.many(number)
+ p.skip(sometok('oven_temp'))
>> unarg(Oven)
)
method_start = sometok('method_start') + p.skip(period)
comment = p.skip(p.many(string|period))
header = title + p.maybe(comment)
instruction = (string
+ p.skip(period)
) >> parse_instruction
instructions = p.many(instruction)
program = (method_start + instructions) >> unarg(MethodStart)
serves = (sometok('serve') + number >> (lambda x: Serve('serve', x[1])) ) + p.skip(period)
ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection)
recipe = ( header
+ p.maybe(ingredients_section)
+ p.maybe(cooking_time)
+ p.maybe(oven_temp)
+ p.maybe(program)
+ p.maybe(serves)
) >> RecipeNode
main_parser = p.oneplus(recipe)
return main_parser.parse(tokenize(input))
运行脚本失败:
ImportError: cannot import name 'Spec'
我拥有的funcparserlib.lexer的版本是:
#Snipped some licence. Hint it's MIT.
__all__ = ['make_tokenizer', 'Token', 'LexerError']
import re
class LexerError(Exception):
def __init__(self, place, msg):
self.place = place
self.msg = msg
def __str__(self):
s = 'cannot tokenize data'
line, pos = self.place
return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
class Token(object):
def __init__(self, type, value, start=None, end=None):
self.type = type
self.value = value
self.start = start
self.end = end
def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value)
def __eq__(self, other):
# FIXME: Case sensitivity is assumed here
return self.type == other.type and self.value == other.value
def _pos_str(self):
if self.start is None or self.end is None:
return ''
else:
sl, sp = self.start
el, ep = self.end
return '%d,%d-%d,%d:' % (sl, sp, el, ep)
def __str__(self):
s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
return s.strip()
@property
def name(self):
return self.value
def pformat(self):
return "%s %s '%s'" % (self._pos_str().ljust(20),
self.type.ljust(14),
self.value)
def make_tokenizer(specs):
"""[(str, (str, int?))] -> (str -> Iterable(Token))"""
def compile_spec(spec):
name, args = spec
return name, re.compile(*args)
compiled = [compile_spec(s) for s in specs]
def match_specs(specs, str, i, position):
line, pos = position
for type, regexp in specs:
m = regexp.match(str, i)
if m is not None:
value = m.group()
nls = value.count('\n')
n_line = line + nls
if nls == 0:
n_pos = pos + len(value)
else:
n_pos = len(value) - value.rfind('\n') - 1
return Token(type, value, (line, pos + 1), (n_line, n_pos))
else:
errline = str.splitlines()[line - 1]
raise LexerError((line, pos + 1), errline)
def f(str):
length = len(str)
line, pos = 1, 0
i = 0
while i < length:
t = match_specs(compiled, str, i, (line, pos))
yield t
line, pos = t.end
i += len(t.value)
return f
# This is an example of a token spec. See also [this article][1] for a
# discussion of searching for multiline comments using regexps
# (including `*?`).
#
# [1]: http://ostermiller.org/findcomment.html
_example_token_specs = [
('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
('COMMENT', (r'//.*',)),
('NL', (r'[\r\n]+',)),
('SPACE', (r'[ \t\r\n]+',)),
('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
('INT', (r'[0-9]+',)),
('INT', (r'\$[0-9A-Fa-f]+',)),
('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)),
('STRING', (r"'([^']|(''))*'",)),
('CHAR', (r'#[0-9]+',)),
('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]
#tokenize = make_tokenizer(_example_token_specs)
而且我可以确定为什么它不能导入Spec!那里没有规格!伙计们,最好的办法是什么?我可以做一个简单的“查找替换”来推进该项目吗?拖延我可以在网上找到的存储库(还有一些令人困惑的地方)对我没有太大帮助,但是也许我错过了一些东西。
最佳答案
您不需要Specs类,在funcparserlib
的当前版本中,如果需要设置令牌生成器,则只需声明一个元组列表。
请参阅lexer模块中的示例:
_example_token_specs = [
('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)),
('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)),
('COMMENT', (r'//.*',)),
('NL', (r'[\r\n]+',)),
('SPACE', (r'[ \t\r\n]+',)),
('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)),
('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)),
('INT', (r'[0-9]+',)),
('INT', (r'\$[0-9A-Fa-f]+',)),
('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)),
('STRING', (r"'([^']|(''))*'",)),
('CHAR', (r'#[0-9]+',)),
('CHAR', (r'#\$[0-9A-Fa-f]+',)),
]
根据
Specs
的来源,funcparserlib
类已过时。关于python - Funcparserlib.lexer.Spec ImportError:无法导入名称“Spec”,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/29063520/