summaryrefslogtreecommitdiff
path: root/tools/net/sunrpc/xdrgen/xdr_parse.py
blob: 241e96c1fdd9c776c14298b132e31d7efe6b1aec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python3
# ex: set filetype=python:

"""Common parsing code for xdrgen"""

import sys
from typing import Callable

from lark import Lark
from lark.exceptions import UnexpectedInput, UnexpectedToken, VisitError


# Set to True to emit annotation comments in generated source
annotate = False

# Set to True to emit enum value validation in decoders
enum_validation = True

# Map internal Lark token names to human-readable names
TOKEN_NAMES = {
    "__ANON_0": "identifier",
    "__ANON_1": "number",
    "SEMICOLON": "';'",
    "LBRACE": "'{'",
    "RBRACE": "'}'",
    "LPAR": "'('",
    "RPAR": "')'",
    "LSQB": "'['",
    "RSQB": "']'",
    "LESSTHAN": "'<'",
    "MORETHAN": "'>'",
    "EQUAL": "'='",
    "COLON": "':'",
    "COMMA": "','",
    "STAR": "'*'",
    "$END": "end of file",
}


class XdrParseError(Exception):
    """Raised when XDR parsing fails"""


def set_xdr_annotate(set_it: bool) -> None:
    """Set 'annotate' if --annotate was specified on the command line"""
    global annotate
    annotate = set_it


def get_xdr_annotate() -> bool:
    """Return True if --annotate was specified on the command line"""
    return annotate


def set_xdr_enum_validation(set_it: bool) -> None:
    """Set 'enum_validation' based on command line options"""
    global enum_validation
    enum_validation = set_it


def get_xdr_enum_validation() -> bool:
    """Return True when enum validation is enabled for decoder generation"""
    return enum_validation


def make_error_handler(source: str, filename: str) -> Callable[[UnexpectedInput], bool]:
    """Create an error handler that reports the first parse error and aborts.

    Args:
        source: The XDR source text being parsed
        filename: The name of the file being parsed

    Returns:
        An error handler function for use with Lark's on_error parameter
    """
    lines = source.splitlines()

    def handle_parse_error(e: UnexpectedInput) -> bool:
        """Report a parse error with context and abort parsing"""
        line_num = e.line
        column = e.column
        line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""

        # Build the error message
        msg_parts = [f"{filename}:{line_num}:{column}: parse error"]

        # Show what was found vs what was expected
        if isinstance(e, UnexpectedToken):
            token = e.token
            if token.type == "__ANON_0":
                found = f"identifier '{token.value}'"
            elif token.type == "__ANON_1":
                found = f"number '{token.value}'"
            else:
                found = f"'{token.value}'"
            msg_parts.append(f"Unexpected {found}")

            # Provide helpful expected tokens list
            expected = e.expected
            if expected:
                readable = [
                    TOKEN_NAMES.get(exp, exp.lower().replace("_", " "))
                    for exp in sorted(expected)
                ]
                if len(readable) == 1:
                    msg_parts.append(f"Expected {readable[0]}")
                elif len(readable) <= 4:
                    msg_parts.append(f"Expected one of: {', '.join(readable)}")
        else:
            msg_parts.append(str(e).split("\n")[0])

        # Show the offending line with a caret pointing to the error
        msg_parts.append("")
        msg_parts.append(f"    {line_text}")
        prefix = line_text[: column - 1].expandtabs()
        msg_parts.append(f"    {' ' * len(prefix)}^")

        sys.stderr.write("\n".join(msg_parts) + "\n")
        raise XdrParseError()

    return handle_parse_error


def handle_transform_error(e: VisitError, source: str, filename: str) -> None:
    """Report a transform error with context.

    Args:
        e: The VisitError from Lark's transformer
        source: The XDR source text being parsed
        filename: The name of the file being parsed
    """
    lines = source.splitlines()

    # Extract position from the tree node if available
    line_num = 0
    column = 0
    if hasattr(e.obj, "meta") and e.obj.meta:
        line_num = e.obj.meta.line
        column = e.obj.meta.column

    line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""

    # Build the error message
    msg_parts = [f"{filename}:{line_num}:{column}: semantic error"]

    # The original exception is typically a KeyError for undefined types
    if isinstance(e.orig_exc, KeyError):
        msg_parts.append(f"Undefined type '{e.orig_exc.args[0]}'")
    else:
        msg_parts.append(str(e.orig_exc))

    # Show the offending line with a caret pointing to the error
    if line_text:
        msg_parts.append("")
        msg_parts.append(f"    {line_text}")
        prefix = line_text[: column - 1].expandtabs()
        msg_parts.append(f"    {' ' * len(prefix)}^")

    sys.stderr.write("\n".join(msg_parts) + "\n")


def xdr_parser() -> Lark:
    """Return a Lark parser instance configured with the XDR language grammar"""

    return Lark.open(
        "grammars/xdr.lark",
        rel_to=__file__,
        start="specification",
        debug=True,
        strict=True,
        propagate_positions=True,
        parser="lalr",
        lexer="contextual",
    )