Source code for textmate_grammar.parser

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

import onigurumacffi as re

from .elements import Capture, ContentBlockElement, ContentElement
from .handler import POS, ContentHandler, Pattern
from .utils.exceptions import IncludedParserNotFound
from .utils.logger import LOGGER, track_depth

if TYPE_CHECKING:
    from .parsers.base import LanguageParser



[docs]
class GrammarParser(ABC):
    """The abstract grammar parser object"""


[docs]
    @staticmethod
    def initialize(grammar: dict, **kwargs):
        """
        Initializes the parser based on the grammar.

        :param grammar: The grammar to initialize the parser with.
        :param kwargs: Additional keyword arguments.
        :return: The initialized parser.
        """
        if "include" in grammar:
            return grammar["include"]
        elif "match" in grammar:
            return MatchParser(grammar, **kwargs)
        elif "begin" in grammar and "end" in grammar:
            return BeginEndParser(grammar, **kwargs)
        elif "begin" in grammar and "while" in grammar:
            return BeginWhileParser(grammar, **kwargs)
        elif "patterns" in grammar:
            return PatternsParser(grammar, **kwargs)
        else:
            return TokenParser(grammar, **kwargs)


    def __init__(
        self,
        grammar: dict,
        language_parser: LanguageParser | None = None,
        key: str = "",
        is_capture: bool = False,
        **kwargs,
    ) -> None:
        """
        Initialize a Parser object.

        :param grammar: The grammar dictionary.
        :param language: The language parser object. Defaults to None.
        :param key: The key for the parser. Defaults to "".
        :param is_capture: Indicates if the parser is a capture. Defaults to False.
        :param kwargs: Additional keyword arguments.

        :return: None
        """
        self.grammar = grammar
        self.language_parser = language_parser
        self.key = key
        self.token = grammar.get("name", "")
        self.is_capture = is_capture
        self.initialized = False
        self.anchored = False

    @property
    def comment(self) -> str:
        return self.grammar.get("comment", "")

    @property
    def disabled(self) -> bool:
        return self.grammar.get("disabled", False)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}:<{self.key}>"

    def _init_captures(self, grammar: dict, key: str = "captures", **kwargs) -> dict:
        """Initializes a captures dictionary"""
        captures = {}
        if key in grammar:
            for group_id, pattern in grammar[key].items():
                captures[int(group_id)] = self.initialize(
                    pattern, language_parser=self.language_parser, is_capture=True
                )
        return captures

    def _find_include(self, key: str, **kwargs) -> GrammarParser:
        """Find the included grammars and during repository initialization"""
        if not self.language_parser:
            raise IncludedParserNotFound(key)

        if key in ["$self", "$base"]:  # TODO there is a difference between these
            return self.language_parser
        elif key[0] == "#":
            return self.language_parser.repository.get(key[1:], None)
        else:
            return self.language_parser._find_include_scopes(key)

    @abstractmethod
    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
        """The abstract method which all parsers much implement

        The ``_parse`` method is called by ``parse``, which will additionally parse any nested Capture elements.
        The ``_parse`` method should contain all the rules for the extended parser.

        :param handler: The content handler to handle the parsed elements.
        :param starting: The starting position of the parsing.
        :param kwargs: Additional keyword arguments.
        :return: A tuple containing the parsing result, a list of parsed elements, and the ending position of the parsing.
        """
        pass

    def _initialize_repository(self, **kwargs) -> None:
        """Initializes the repository's inclusions.

        When the grammar has patterns, this method should called to initialize its inclusions.
        This should occur after all sub patterns have been initialized.
        """
        return


[docs]
    def parse(
        self,
        handler: ContentHandler,
        starting: POS = (0, 0),
        boundary: POS | None = None,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[int, int] | None]:
        """
        The method to parse a handler using the current grammar.

        :param handler: The ContentHandler object that will handle the parsed content.
        :param starting: The starting position for parsing. Defaults to (0, 0).
        :param boundary: The boundary position for parsing. Defaults to None.
        :param **kwargs: Additional keyword arguments that can be passed to the parser.

        :return: A tuple containing:
            - parsed: A boolean indicating whether the parsing was successful.
            - elements: A list of Capture or ContentElement objects representing the parsed content.
            - span: A tuple containing the starting and ending positions of the parsed content, or None if parsing failed.
        """
        if not self.initialized and self.language_parser is not None:
            self.language_parser._initialize_repository()
        parsed, elements, span = self._parse(handler, starting, boundary=boundary, **kwargs)
        return parsed, elements, span



[docs]
    def match_and_capture(
        self,
        handler: ContentHandler,
        pattern: Pattern,
        starting: POS,
        boundary: POS,
        parsers: dict[int, GrammarParser] | None = None,
        parent_capture: Capture | None = None,
        **kwargs,
    ) -> tuple[tuple[POS, POS] | None, str, list[Capture | ContentElement]]:
        """Matches a pattern and its capture groups.

        Matches the pattern on the handler between the starting and boundary positions. If a pattern is matched,
        its capture groups are initialized as Capture objects. These are only parsed after the full handler has been
        parsed. This occurs in GrammarParser.parse when calling parse_captures.

        :param handler: The content handler to match the pattern on.
        :param pattern: The pattern to match.
        :param starting: The starting position for the match.
        :param boundary: The boundary position for the match.
        :param parsers: A dictionary of parsers.
        :param parent_capture: The parent capture object.
        :param kwargs: Additional keyword arguments.
        :return: A tuple containing the span of the match, the matched string, and a list of capture objects or content elements.
        """
        if parsers is None:
            parsers = {}
        matching, span = handler.search(pattern, starting=starting, boundary=boundary, **kwargs)

        if matching:
            if parsers:
                capture = Capture(
                    handler,
                    pattern,
                    matching,
                    parsers,
                    starting,
                    boundary,
                    key=self.key,
                    **kwargs,
                )
                if parent_capture is not None and capture == parent_capture:
                    return None, "", []
                else:
                    return span, matching.group(), [capture]
            else:
                return span, matching.group(), []
        else:
            return None, "", []





[docs]
class TokenParser(GrammarParser):
    """The parser for grammars for which only the token is provided."""

    def __init__(self, grammar: dict, **kwargs) -> None:
        super().__init__(grammar, **kwargs)
        self.initialized = True

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}:{self.token}"

    @track_depth
    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        boundary: POS,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
        """The parse method for grammars for which only the token is provided.

        When no regex patterns are provided. The element is created between the initial and boundary positions.
        """
        content = handler.read_pos(starting, boundary)
        elements: list[Capture | ContentElement] = [
            ContentElement(
                token=self.token,
                grammar=self.grammar,
                content=content,
                characters=handler.chars(starting, boundary),
            )
        ]
        handler.anchor = boundary[1]
        LOGGER.info(
            f"{self.__class__.__name__} found < {repr(content)} >",
            self,
            starting,
            kwargs.get("depth", 0),
        )
        return True, elements, (starting, boundary)




[docs]
class MatchParser(GrammarParser):
    """The parser for grammars for which a match pattern is provided."""

    def __init__(self, grammar: dict, **kwargs) -> None:
        super().__init__(grammar, **kwargs)
        self.exp_match = re.compile(grammar["match"])
        self.parsers = self._init_captures(grammar, key="captures")
        if "\\G" in grammar["match"]:
            self.anchored = True

    def __repr__(self) -> str:
        if self.token:
            return f"{self.__class__.__name__}:{self.token}"
        else:
            identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
            return f"{self.__class__.__name__}:<{identifier}>"

    def _initialize_repository(self, **kwargs) -> None:
        """When the grammar has patterns, this method should called to initialize its inclusions."""
        self.initialized = True
        for key, value in self.parsers.items():
            if not isinstance(value, GrammarParser):
                self.parsers[key] = self._find_include(value)
        for parser in self.parsers.values():
            if not parser.initialized:
                parser._initialize_repository()

    @track_depth
    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        boundary: POS,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
        """The parse method for grammars for which a match pattern is provided."""

        span, content, captures = self.match_and_capture(
            handler,
            pattern=self.exp_match,
            starting=starting,
            boundary=boundary,
            parsers=self.parsers,
            **kwargs,
        )

        if span is None:
            LOGGER.debug(
                f"{self.__class__.__name__} no match",
                self,
                starting,
                kwargs.get("depth", 0),
            )
            return False, [], None

        LOGGER.info(
            f"{self.__class__.__name__} found < {repr(content)} >",
            self,
            starting,
            kwargs.get("depth", 0),
        )

        if self.token:
            elements: list[Capture | ContentElement] = [
                ContentElement(
                    token=self.token,
                    grammar=self.grammar,
                    content=content,
                    characters=handler.chars(*span),
                    children=captures,
                )
            ]
        else:
            elements = captures

        return True, elements, span




[docs]
class ParserHasPatterns(GrammarParser, ABC):
    def __init__(self, grammar: dict, **kwargs) -> None:
        super().__init__(grammar, **kwargs)
        self.patterns = [
            self.initialize(pattern, language_parser=self.language_parser)
            for pattern in grammar.get("patterns", [])
        ]

    def _initialize_repository(self):
        """When the grammar has patterns, this method should called to initialize its inclusions."""
        self.initialized = True
        self.patterns = [
            parser if isinstance(parser, GrammarParser) else self._find_include(parser)
            for parser in self.patterns
        ]
        for parser in self.patterns:
            if not parser.initialized:
                parser._initialize_repository()

        # Copy patterns from included pattern parsers
        pattern_parsers = [parser for parser in self.patterns if isinstance(parser, PatternsParser)]
        for parser in pattern_parsers:
            parser_index = self.patterns.index(parser)
            self.patterns[parser_index : parser_index + 1] = parser.patterns

        # Injection grammars
        for exception_scopes, injection_pattern in self.language_parser.injections:
            if self.token:
                if self.token.split(".")[0] not in exception_scopes:
                    self.patterns.append(injection_pattern)
            elif self.is_capture:
                self.patterns.append(injection_pattern)




[docs]
class PatternsParser(ParserHasPatterns):
    """The parser for grammars for which several patterns are provided."""

    @track_depth
    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        boundary: POS | None = None,
        greedy: bool = False,
        find_one: bool = True,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS]]:
        """The parse method for grammars for which a match pattern is provided."""

        if boundary is None:
            boundary = (len(handler.lines) - 1, handler.line_lengths[-1])

        parsed = False
        elements: list[Capture | ContentElement] = []
        patterns = [parser for parser in self.patterns if not parser.disabled]

        current = (starting[0], starting[1])

        while current < boundary:
            for parser in patterns:
                # Try to find patterns
                parsed, captures, span = parser._parse(
                    handler,
                    current,
                    boundary=boundary,
                    greedy=greedy,
                    **kwargs,
                )
                if parsed:
                    if find_one:
                        LOGGER.info(
                            f"{self.__class__.__name__} found single element",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                        return True, captures, span
                    elements.extend(captures)
                    current = span[1]
                    break
            else:
                if find_one:
                    break

            if not parsed and not greedy:
                # Try again if previously allowed no leading white space charaters, only when multple patterns are to be found
                options_span, options_elements = {}, {}
                for parser in patterns:
                    parsed, captures, span = parser._parse(
                        handler,
                        current,
                        boundary=boundary,
                        greedy=True,
                        **kwargs,
                    )
                    if parsed:
                        options_span[parser] = span
                        options_elements[parser] = captures
                        LOGGER.debug(
                            f"{self.__class__.__name__} found pattern choice",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )

                if options_span:
                    parser = sorted(
                        options_span,
                        key=lambda parser: (
                            *options_span[parser][0],
                            patterns.index(parser),
                        ),
                    )[0]
                    current = options_span[parser][1]
                    elements.extend(options_elements[parser])
                    LOGGER.info(
                        f"{self.__class__.__name__} chosen pattern of {parser}",
                        self,
                        current,
                        kwargs.get("depth", 0),
                    )
                elif self != self.language_parser:
                    break
                else:
                    remainder = handler.read_line(current)
                    if not remainder.isspace():
                        LOGGER.warning(
                            f"{self.__class__.__name__} remainder of line not parsed: {remainder}",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                    if current[0] + 1 <= len(handler.lines):
                        current = (current[0] + 1, 0)
                    else:
                        LOGGER.debug(
                            f"{self.__class__.__name__} EOF encountered",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                        break

            if current == starting:
                LOGGER.warning(
                    f"{self.__class__.__name__} handler did not move after a search round",
                    self,
                    starting,
                    kwargs.get("depth", 0),
                )
                break

            line_length = handler.line_lengths[current[0]]
            if current[1] in [line_length, line_length - 1]:
                try:
                    empty_lines = next(
                        i for i, v in enumerate(handler.line_lengths[current[0] + 1 :]) if v > 1
                    )
                    current = (current[0] + 1 + empty_lines, 0)
                except StopIteration:
                    break

        if self.token:
            elements = [
                ContentElement(
                    token=self.token,
                    grammar=self.grammar,
                    content=handler.read_pos(starting, boundary),
                    characters=handler.chars(starting, boundary),
                    children=elements,
                )
            ]

        return bool(elements), elements, (starting, current)




[docs]
class BeginEndParser(ParserHasPatterns):
    """The parser for grammars for which a begin/end pattern is provided."""

    def __init__(self, grammar: dict, **kwargs) -> None:
        super().__init__(grammar, **kwargs)
        if "contentName" in grammar:
            self.token = grammar["contentName"]
            self.between_content = True
        else:
            self.token = grammar.get("name")
            self.between_content = False
        self.apply_end_pattern_last = grammar.get("applyEndPatternLast", False)
        self.exp_begin = re.compile(grammar["begin"])
        self.exp_end = re.compile(grammar["end"])
        self.parsers_begin = self._init_captures(grammar, key="beginCaptures")
        self.parsers_end = self._init_captures(grammar, key="endCaptures")
        if "\\G" in grammar["begin"]:
            self.anchored = True

    def __repr__(self) -> str:
        if self.token:
            return f"{self.__class__.__name__}:{self.token}"
        else:
            identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
            return f"{self.__class__.__name__}:<{identifier}>"

    def _initialize_repository(self, **kwargs) -> None:
        """When the grammar has patterns, this method should called to initialize its inclusions."""
        self.initialized = True
        super()._initialize_repository()
        for key, value in self.parsers_end.items():
            if not isinstance(value, GrammarParser):
                self.parsers_end[key] = self._find_include(value)
        for key, value in self.parsers_begin.items():
            if not isinstance(value, GrammarParser):
                self.parsers_begin[key] = self._find_include(value)
        for parser in self.parsers_begin.values():
            if not parser.initialized:
                parser._initialize_repository()
        for parser in self.parsers_end.values():
            if not parser.initialized:
                parser._initialize_repository()

    @track_depth
    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        boundary: POS,
        greedy: bool = False,
        **kwargs,
    ) -> tuple[bool, list[Capture | ContentElement], tuple[POS, POS] | None]:
        """The parse method for grammars for which a begin/end pattern is provided."""

        begin_span, _, begin_elements = self.match_and_capture(
            handler,
            self.exp_begin,
            starting,
            boundary=boundary,
            parsers=self.parsers_begin,
            greedy=greedy,
            **kwargs,
        )

        if not begin_span:
            LOGGER.debug(
                f"{self.__class__.__name__} no begin match",
                self,
                starting,
                kwargs.get("depth", 0),
            )
            return False, [], None
        LOGGER.info(
            f"{self.__class__.__name__} found begin",
            self,
            starting,
            kwargs.get("depth", 0),
        )

        # Get initial and boundary positions
        current = begin_span[1]
        if boundary is None:
            boundary = (len(handler.lines) - 1, handler.line_lengths[-1])

        # Define loop parameters
        end_elements: list[Capture | ContentElement] = []
        mid_elements: list[Capture | ContentElement] = []
        patterns = [parser for parser in self.patterns if not parser.disabled]
        first_run = True

        while current <= boundary:
            parsed = False

            # Create boolean that is enabled when a parser is recursively called. In this its end pattern should
            # be applied last, otherwise the same span will be recognzed as the end pattern by the upper level parser
            apply_end_pattern_last = False

            # Try to find patterns first with no leading whitespace charaters allowed
            for parser in patterns:
                parsed, capture_elements, capture_span = parser._parse(
                    handler, current, boundary=boundary, greedy=False, **kwargs
                )
                if parsed:
                    if parser == self:
                        apply_end_pattern_last = True
                    LOGGER.debug(
                        f"{self.__class__.__name__} found pattern (no ws)",
                        self,
                        current,
                        kwargs.get("depth", 0),
                    )
                    break

            # Try to find the end pattern with no leading whitespace charaters allowed
            end_span, _, end_elements = self.match_and_capture(
                handler,
                self.exp_end,
                current,
                boundary=boundary,
                parsers=self.parsers_end,
                greedy=False,
                **kwargs,
            )

            if not parsed and not end_span:
                # Try to find the patterns and end pattern allowing for leading whitespace charaters

                LOGGER.info(
                    f"{self.__class__.__name__} getting all pattern options",
                    self,
                    current,
                    kwargs.get("depth", 0),
                )

                options_span, options_elements = {}, {}
                for parser in patterns:
                    parsed, capture_elements, capture_span = parser._parse(
                        handler,
                        current,
                        boundary=boundary,
                        greedy=True,
                        **kwargs,
                    )
                    if parsed:
                        options_span[parser] = capture_span
                        options_elements[parser] = capture_elements
                        LOGGER.debug(
                            f"{self.__class__.__name__} found pattern choice",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )

                if options_span:
                    parsed = True
                    parser = sorted(
                        options_span,
                        key=lambda parser: (
                            *options_span[parser][0],
                            patterns.index(parser),
                        ),
                    )[0]
                    capture_span = options_span[parser]
                    capture_elements = options_elements[parser]

                    if parser == self:
                        apply_end_pattern_last = True

                    LOGGER.info(
                        f"{self.__class__.__name__} chosen pattern of {parser}",
                        self,
                        current,
                        kwargs.get("depth", 0),
                    )

                end_span, end_content, end_elements = self.match_and_capture(
                    handler,
                    self.exp_end,
                    current,
                    boundary=boundary,
                    parsers=self.parsers_end,
                    greedy=True,
                    **kwargs,
                )

            if end_span:
                if parsed:
                    # Check whether the capture pattern has the same closing positions as the end pattern
                    capture_before_end = handler.prev(capture_span[1])
                    if handler.read(capture_before_end, skip_newline=False) == "\n":
                        # If capture pattern ends with \n, both left and right of \n is considered end
                        pattern_at_end = end_span[1] in [
                            capture_before_end,
                            capture_span[1],
                        ]
                    else:
                        pattern_at_end = end_span[1] == capture_span[1]

                    end_before_pattern = end_span[0] <= capture_span[0]
                    empty_span_end = end_span[1] == end_span[0]

                    if pattern_at_end and (end_before_pattern or empty_span_end):
                        if empty_span_end:
                            # Both found capture pattern and end pattern are accepted, break pattern search
                            LOGGER.debug(
                                f"{self.__class__.__name__} capture+end: both accepted, break",
                                self,
                                current,
                                kwargs.get("depth", 0),
                            )
                            mid_elements.extend(capture_elements)
                            closing = end_span[0] if self.between_content else end_span[1]
                            break
                        elif not self.apply_end_pattern_last and not apply_end_pattern_last:
                            # End pattern prioritized over capture pattern, break pattern search
                            LOGGER.debug(
                                f"{self.__class__.__name__} capture+end: end prioritized, break",
                                self,
                                current,
                                kwargs.get("depth", 0),
                            )
                            closing = end_span[0] if self.between_content else end_span[1]
                            break
                        else:
                            # Capture pattern prioritized over end pattern, continue pattern search
                            LOGGER.debug(
                                f"{self.__class__.__name__} capture+end: capture prioritized, continue",
                                self,
                                current,
                                kwargs.get("depth", 0),
                            )
                            mid_elements.extend(capture_elements)
                            current = capture_span[1]

                    elif capture_span[0] < end_span[0]:
                        # Capture pattern found before end pattern, continue pattern search
                        LOGGER.debug(
                            f"{self.__class__.__name__} capture<end: leading capture, continue",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                        mid_elements.extend(capture_elements)
                        current = capture_span[1]
                    else:
                        # End pattern found before capture pattern, break pattern search
                        LOGGER.debug(
                            f"{self.__class__.__name__} end<capture: leading end, break",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                        closing = end_span[0] if self.between_content else end_span[1]
                        break
                else:
                    # No capture pattern found, accept end pattern and break pattern search
                    LOGGER.debug(
                        f"{self.__class__.__name__} end: break",
                        self,
                        current,
                        kwargs.get("depth", 0),
                    )
                    closing = end_span[0] if self.between_content else end_span[1]
                    break
            else:  # No end pattern found
                if parsed:
                    # Append found capture pattern and find next starting position
                    mid_elements.extend(capture_elements)

                    if handler.read(capture_span[1], skip_newline=False) == "\n":
                        # Next character after capture pattern is newline

                        LOGGER.debug(
                            f"{self.__class__.__name__} capture: next is newline, continue",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )

                        end_span, _, _ = self.match_and_capture(
                            handler,
                            self.exp_end,
                            capture_span[1],
                            boundary=boundary,
                            parsers=self.parsers_end,
                            allow_leading_all=False,
                            **kwargs,
                        )

                        if end_span and end_span[1] <= handler.next(capture_span[1]):
                            # Potential end pattern can be found directly after the found capture pattern
                            current = capture_span[1]
                        else:
                            # Skip the newline character in the next pattern search round
                            current = handler.next(capture_span[1])
                    else:
                        LOGGER.debug(
                            f"{self.__class__.__name__} capture: continue",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                        current = capture_span[1]
                else:
                    # No capture patterns nor end patterns found. Skip the current line.
                    line = handler.read_line(current)

                    if line and not line.isspace():
                        LOGGER.warning(
                            f"No patterns found in line, skipping < {repr(line)} >",
                            self,
                            current,
                            kwargs.get("depth", 0),
                        )
                    current = handler.next((current[0], handler.line_lengths[current[0]]))

            if apply_end_pattern_last:
                current = handler.next(current)

            if first_run:
                # Skip all parsers that were anchored to the begin pattern after the first round
                patterns = [parser for parser in patterns if not parser.anchored]
                first_run = False
        else:
            # Did not break out of while loop, set closing to boundary
            closing = boundary
            end_span = ((0, 0), boundary)

        start = begin_span[1] if self.between_content else begin_span[0]

        content = handler.read_pos(start, closing)
        LOGGER.info(
            f"{self.__class__.__name__} found < {repr(content)} >",
            self,
            start,
            kwargs.get("depth", 0),
        )

        # Construct output elements
        if self.token:
            elements: list[Capture | ContentElement] = [
                ContentBlockElement(
                    token=self.token,
                    grammar=self.grammar,
                    content=content,
                    characters=handler.chars(start, closing),
                    children=mid_elements,
                    begin=begin_elements,
                    end=end_elements,
                )
            ]
        else:
            elements = begin_elements + mid_elements + end_elements

        return True, elements, (begin_span[0], end_span[1])




[docs]
class BeginWhileParser(PatternsParser):
    """The parser for grammars for which a begin/end pattern is provided."""

    def __init__(self, grammar: dict, **kwargs) -> None:
        super().__init__(grammar, **kwargs)
        if "contentName" in grammar:
            self.token = grammar["contentName"]
            self.between_content = True
        else:
            self.token = grammar.get("name")
            self.between_content = False
        self.exp_begin = re.compile(grammar["begin"])
        self.exp_while = re.compile(grammar["while"])
        self.parsers_begin = self._init_captures(grammar, key="beginCaptures")
        self.parsers_while = self._init_captures(grammar, key="whileCaptures")

    def __repr__(self) -> str:
        if self.token:
            return f"{self.__class__.__name__}:{self.token}"
        else:
            identifier = self.key if self.key else "_".join(self.comment.lower().split(" "))
            return f"{self.__class__.__name__}:<{identifier}>"

    def _initialize_repository(self):
        """When the grammar has patterns, this method should called to initialize its inclusions."""
        self.initialized = True
        super()._initialize_repository()
        for key, value in self.parsers_end.items():
            if not isinstance(value, GrammarParser):
                self.parsers_end[key] = self._find_include(value)
        for key, value in self.parsers_while.items():
            if not isinstance(value, GrammarParser):
                self.parsers_while[key] = self._find_include(value)
        for parser in self.parsers_begin.values():
            if not parser.initialized:
                parser._initialize_repository()
        for parser in self.parsers_while.values():
            if not parser.initialized:
                parser._initialize_repository()

    def _parse(
        self,
        handler: ContentHandler,
        starting: POS,
        **kwargs,
    ):
        """The parse method for grammars for which a begin/while pattern is provided."""
        raise NotImplementedError