projetAnsible/myenv/lib/python3.12/site-packages/arpeggio/peg.py
2024-12-09 06:16:28 +01:00

291 lines
10 KiB
Python

# -*- coding: utf-8 -*-
#######################################################################
# Name: peg.py
# Purpose: Implementing PEG language
# Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# Copyright: (c) 2009-2017 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
# License: MIT License
#######################################################################
from __future__ import print_function, unicode_literals
import sys
import codecs
import copy
import re
from arpeggio import Sequence, OrderedChoice, Optional, ZeroOrMore, \
OneOrMore, UnorderedGroup, EOF, EndOfFile, PTNodeVisitor, \
SemanticError, CrossRef, GrammarError, StrMatch, And, Not, Parser, \
ParserPython, visit_parse_tree
from arpeggio import RegExMatch as _
if sys.version < '3':
text = unicode
else:
text = str
__all__ = ['ParserPEG']
# Lexical invariants
LEFT_ARROW = "<-"
ORDERED_CHOICE = "/"
ZERO_OR_MORE = "*"
ONE_OR_MORE = "+"
OPTIONAL = "?"
UNORDERED_GROUP = "#"
AND = "&"
NOT = "!"
OPEN = "("
CLOSE = ")"
# PEG syntax rules
def peggrammar(): return OneOrMore(rule), EOF
def rule(): return rule_name, LEFT_ARROW, ordered_choice, ";"
def ordered_choice(): return sequence, ZeroOrMore(ORDERED_CHOICE, sequence)
def sequence(): return OneOrMore(prefix)
def prefix(): return Optional([AND, NOT]), sufix
def sufix(): return expression, Optional([OPTIONAL,
ZERO_OR_MORE,
ONE_OR_MORE,
UNORDERED_GROUP])
def expression(): return [regex, rule_crossref,
(OPEN, ordered_choice, CLOSE),
str_match]
# PEG Lexical rules
def regex(): return [("r'", _(r'''[^'\\]*(?:\\.[^'\\]*)*'''), "'"),
('r"', _(r'''[^"\\]*(?:\\.[^"\\]*)*'''), '"')]
def rule_name(): return _(r"[a-zA-Z_]([a-zA-Z_]|[0-9])*")
def rule_crossref(): return rule_name
def str_match(): return _(r'''(?s)('[^'\\]*(?:\\.[^'\\]*)*')|'''
r'''("[^"\\]*(?:\\.[^"\\]*)*")''')
def comment(): return "//", _(".*\n")
# Escape sequences supported in PEG literal string matches
PEG_ESCAPE_SEQUENCES_RE = re.compile(r"""
\\ ( [\n\\'"abfnrtv] | # \\x single-character escapes
[0-7]{1,3} | # \\ooo octal escape
x[0-9A-Fa-f]{2} | # \\xXX hex escape
u[0-9A-Fa-f]{4} | # \\uXXXX hex escape
U[0-9A-Fa-f]{8} | # \\UXXXXXXXX hex escape
N\{[- 0-9A-Z]+\} # \\N{name} Unicode name or alias
)
""", re.VERBOSE | re.UNICODE)
class PEGVisitor(PTNodeVisitor):
"""
Visitor that transforms parse tree to a PEG parser for the given language.
"""
def __init__(self, root_rule_name, comment_rule_name, ignore_case,
*args, **kwargs):
super(PEGVisitor, self).__init__(*args, **kwargs)
self.root_rule_name = root_rule_name
self.comment_rule_name = comment_rule_name
self.ignore_case = ignore_case
# Used for linking phase
self.peg_rules = {
"EOF": EndOfFile()
}
def visit_peggrammar(self, node, children):
def _resolve(node):
"""
Resolves CrossRefs from the parser model.
"""
if node in self.resolved:
return node
self.resolved.add(node)
def get_rule_by_name(rule_name):
try:
return self.peg_rules[rule_name]
except KeyError:
raise SemanticError("Rule \"{}\" does not exists."
.format(rule_name))
def resolve_rule_by_name(rule_name):
if self.debug:
self.dprint("Resolving crossref {}".format(rule_name))
resolved_rule = get_rule_by_name(rule_name)
while type(resolved_rule) is CrossRef:
target_rule = resolved_rule.target_rule_name
resolved_rule = get_rule_by_name(target_rule)
# If resolved rule hasn't got the same name it
# should be cloned and preserved in the peg_rules cache
if resolved_rule.rule_name != rule_name:
resolved_rule = copy.copy(resolved_rule)
resolved_rule.rule_name = rule_name
self.peg_rules[rule_name] = resolved_rule
if self.debug:
self.dprint("Resolving: cloned to {} = > {}"
.format(resolved_rule.rule_name,
resolved_rule.name))
return resolved_rule
if isinstance(node, CrossRef):
# The root rule is a cross-ref
resolved_rule = resolve_rule_by_name(node.target_rule_name)
return _resolve(resolved_rule)
else:
# Resolve children nodes
for i, n in enumerate(node.nodes):
node.nodes[i] = _resolve(n)
self.resolved.add(node)
return node
# Find root and comment rules
self.resolved = set()
comment_rule = None
for rule in children:
if rule.rule_name == self.root_rule_name:
root_rule = _resolve(rule)
if rule.rule_name == self.comment_rule_name:
comment_rule = _resolve(rule)
assert root_rule, "Root rule not found!"
return root_rule, comment_rule
def visit_rule(self, node, children):
rule_name = children[0]
if len(children) > 2:
retval = Sequence(nodes=children[1:])
else:
retval = children[1]
retval.rule_name = rule_name
retval.root = True
# Keep a map of parser rules for cross reference
# resolving.
self.peg_rules[rule_name] = retval
return retval
def visit_sequence(self, node, children):
if len(children) > 1:
return Sequence(nodes=children[:])
else:
# If only one child rule exists reduce.
return children[0]
def visit_ordered_choice(self, node, children):
if len(children) > 1:
retval = OrderedChoice(nodes=children[:])
else:
# If only one child rule exists reduce.
retval = children[0]
return retval
def visit_prefix(self, node, children):
if len(children) == 2:
if children[0] == NOT:
retval = Not()
else:
retval = And()
if type(children[1]) is list:
retval.nodes = children[1]
else:
retval.nodes = [children[1]]
else:
# If there is no optional prefix reduce.
retval = children[0]
return retval
def visit_sufix(self, node, children):
if len(children) == 2:
if type(children[0]) is list:
nodes = children[0]
else:
nodes = [children[0]]
if children[1] == ZERO_OR_MORE:
retval = ZeroOrMore(nodes=nodes)
elif children[1] == ONE_OR_MORE:
retval = OneOrMore(nodes=nodes)
elif children[1] == OPTIONAL:
retval = Optional(nodes=nodes)
else:
retval = UnorderedGroup(nodes=nodes[0].nodes)
else:
retval = children[0]
return retval
def visit_rule_crossref(self, node, children):
return CrossRef(node.value)
def visit_regex(self, node, children):
match = _(children[0], ignore_case=self.ignore_case)
match.compile()
return match
def visit_str_match(self, node, children):
match_str = node.value[1:-1]
# Scan the string literal, and sequentially match those escape
# sequences which are syntactically valid Python. Attempt to convert
# those, raising ``GrammarError`` for any semantically invalid ones.
def decode_escape(match):
try:
return codecs.decode(match.group(0), "unicode_escape")
except UnicodeDecodeError:
raise GrammarError("Invalid escape sequence '%s'." %
match.group(0))
match_str = PEG_ESCAPE_SEQUENCES_RE.sub(decode_escape, match_str)
return StrMatch(match_str, ignore_case=self.ignore_case)
class ParserPEG(Parser):
def __init__(self, language_def, root_rule_name, comment_rule_name=None,
*args, **kwargs):
"""
Constructs parser from textual PEG definition.
Args:
language_def (str): A string describing language grammar using
PEG notation.
root_rule_name(str): The name of the root rule.
comment_rule_name(str): The name of the rule for comments.
"""
super(ParserPEG, self).__init__(*args, **kwargs)
self.root_rule_name = root_rule_name
self.comment_rule_name = comment_rule_name
# PEG Abstract Syntax Graph
self.parser_model, self.comments_model = self._from_peg(language_def)
# Comments should be optional and there can be more of them
if self.comments_model:
self.comments_model.root = True
self.comments_model.rule_name = comment_rule_name
# In debug mode export parser model to dot for
# visualization
if self.debug:
from arpeggio.export import PMDOTExporter
root_rule = self.parser_model.rule_name
PMDOTExporter().exportFile(
self.parser_model, "{}_peg_parser_model.dot".format(root_rule))
def _parse(self):
return self.parser_model.parse(self)
def _from_peg(self, language_def):
parser = ParserPython(peggrammar, comment, reduce_tree=False,
debug=self.debug)
parser.root_rule_name = self.root_rule_name
parse_tree = parser.parse(language_def)
return visit_parse_tree(parse_tree, PEGVisitor(self.root_rule_name,
self.comment_rule_name,
self.ignore_case,
debug=self.debug))