From e1b585bbcbe511c87e19736fb5484f6d6b4fcb3d Mon Sep 17 00:00:00 2001 From: bygu4 Date: Thu, 2 Jan 2025 03:01:33 +0300 Subject: [PATCH] update regular_expression docs --- pyformlang/regular_expression/python_regex.py | 14 +- pyformlang/regular_expression/regex.py | 267 ++++++++---------- pyformlang/regular_expression/regex_reader.py | 38 ++- 3 files changed, 150 insertions(+), 169 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 0d446eb..66e73d0 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -1,6 +1,4 @@ -""" -A class to read Python format regex -""" +"""A class to read Python format regex.""" from typing import List, Tuple, Union, Pattern from re import compile as compile_regex @@ -56,7 +54,7 @@ class PythonRegex(Regex): - """ Represents a regular expression as used in Python. + r"""Represents a regular expression as used in Python. It adds the following features to the basic regex: @@ -70,9 +68,9 @@ class PythonRegex(Regex): Parameters ---------- - python_regex : Union[str, Pattern[str]] - The regex represented as a string or a compiled regex ( - re.compile(...)) + python_regex: + The regex represented as a string or a compiled regex + (re.compile(...)). Raises ------ @@ -95,10 +93,10 @@ class PythonRegex(Regex): True >>> p_regex.accepts(["d"]) False - """ def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: + """Initializes the regex in python format.""" if isinstance(python_regex, str): compile_regex(python_regex) # Check if it is valid else: diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 9fee921..abede98 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -1,6 +1,4 @@ -""" -Representation of a regular expression -""" +"""Representation of a regular expression.""" from typing import List, Iterable, Tuple, Optional @@ -16,10 +14,10 @@ class Regex(RegexReader): - """ Represents a regular expression + r"""Representation of a regular expression. - Pyformlang implements the operators of textbooks, which deviate slightly \ - from the operators in Python. For a representation closer to Python one, \ + Pyformlang implements the operators of textbooks, which deviate slightly + from the operators in Python. For a representation closer to Python one, please use :class:`~pyformlang.regular_expression.PythonRegex` * The concatenation can be represented either by a space or a dot (.) @@ -27,21 +25,21 @@ class Regex(RegexReader): * The Kleene star is represented by * * The epsilon symbol can either be "epsilon" or $ - It is also possible to use parentheses. All symbols except the space, ., \ - |, +, *, (, ), epsilon and $ can be part of the alphabet. All \ - other common regex operators (such as []) are syntactic sugar that can be \ - reduced to the previous operators. Another main difference is that the \ - alphabet is not reduced to single characters as it is the case in Python. \ - For example, "python" is a single symbol in Pyformlang, whereas it is the \ - concatenation of six symbols in regular Python. + It is also possible to use parentheses. All symbols except the space, ., + |, +, *, (, ), epsilon and $ can be part of the alphabet. All + other common regex operators (such as []) are syntactic sugar that can be + reduced to the previous operators. Another main difference is that the + alphabet is not reduced to single characters as it is the case in Python. + For example, "python" is a single symbol in Pyformlang, whereas it is the + concatenation of six symbols in regular Python. All special characters except epsilon can be escaped with a backslash (\ double backslash \\ in strings). Parameters ---------- - regex : str - The regex represented as a string + regex: + The regex represented as a string. Raises ------ @@ -50,7 +48,6 @@ class Regex(RegexReader): Examples -------- - >>> regex = Regex("abc|d") Check if the symbol "abc" is accepted @@ -84,26 +81,24 @@ class Regex(RegexReader): Give the equivalent finite-state automaton >>> regex_concat.to_epsilon_nfa() - """ def __init__(self, regex: str) -> None: + """Initializes the regex from the given string.""" super().__init__(regex) self.sons: List[Regex] # type: ignore self._counter = 0 self._enfa: Optional[EpsilonNFA] = None def get_number_symbols(self) -> int: - """ Gives the number of symbols in the regex + """Gets the number of symbols in the regex. Returns - ---------- - n_symbols : int - The number of symbols in the regex + ------- + The number of symbols in the regex. Examples -------- - >>> regex = Regex("a|b*") >>> regex.get_number_symbols() 2 @@ -115,52 +110,58 @@ def get_number_symbols(self) -> int: return 1 def get_number_operators(self) -> int: - """ Gives the number of operators in the regex + """Gets the number of operators in the regex. Returns - ---------- - n_operators : int - The number of operators in the regex + ------- + The number of operators in the regex. Examples -------- - >>> regex = Regex("a|b*") >>> regex.get_number_operators() 2 The two operators are "|" and "*". - """ if self.sons: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 def to_minimal_dfa(self) -> DeterministicFiniteAutomaton: - """ Builds minimal dfa from current regex """ + """Builds a minimal DFA from current regex. + + Returns + ------- + The minimal DFA equivalent to the current regex. + """ enfa = self._to_epsilon_nfa_internal() dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) return dfa.minimize() def to_epsilon_nfa(self) -> EpsilonNFA: - """ Transforms the regular expression into an epsilon NFA + """Transforms the regular expression into an epsilon NFA. Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - An epsilon NFA equivalent to the regex + ------- + An epsilon NFA equivalent to the regex. Examples -------- - >>> regex = Regex("abc|d") >>> regex.to_epsilon_nfa() - """ return self._to_epsilon_nfa_internal().copy() def _to_epsilon_nfa_internal(self) -> EpsilonNFA: - """ Transforms the regular expression into an epsilon NFA """ + """Transforms the regular expression into an epsilon NFA. + + For internal use to prevent protected `enfa` member modification. + + Returns + ------- + An epsilon NFA equivalent to the regex. + """ if self._enfa is None: self._enfa = EpsilonNFA() s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) @@ -182,14 +183,16 @@ def _process_to_enfa(self, enfa: EpsilonNFA, s_from: State, s_to: State) -> None: - """ Internal function to add a regex to a given epsilon NFA + """Internal function to add a regex to a given epsilon NFA. Parameters ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - s_to : :class:`~pyformlang.finite_automaton.State` - The destination state + enfa: + Epsilon NFA to add the regex to. + s_from: + The source state. + s_to: + The destination state. """ if self.sons: self._process_to_enfa_when_sons(enfa, s_from, s_to) @@ -277,27 +280,25 @@ def _get_next_state_enfa(self) -> State: return s_final def get_tree_str(self, depth: int = 0) -> str: - """ Get a string representation of the tree behind the regex + """Get a string representation of the tree behind the regex. Parameters ---------- - depth: int - The current depth, 0 by default + depth: + The current depth, 0 by default. + Returns ------- - representation: str - The tree representation + The tree representation of the regex. Examples -------- - >>> regex = Regex("abc|d*") >>> print(regex.get_tree_str()) Operator(Union) Symbol(abc) Operator(Kleene Star) Symbol(d) - """ temp = " " * depth + str(self.head) + "\n" for son in self.sons: @@ -305,31 +306,27 @@ def get_tree_str(self, depth: int = 0) -> str: return temp def to_cfg(self, starting_symbol: str = "S") -> CFG: - """ - Turns the regex into a context-free grammar + """Turns the regex into a context-free grammar. Parameters ---------- - starting_symbol : :class:`~pyformlang.cfg.Variable`, optional - The starting symbol + starting_symbol: + The starting symbol of the grammar. Returns ------- - cfg : :class:`~pyformlang.cfg.CFG` - An equivalent context-free grammar + An equivalent context-free grammar. Examples -------- - >>> regex = Regex("(a|b)* c") >>> my_cfg = regex.to_cfg() >>> my_cfg.contains(["c"]) True - """ productions, _ = self._get_production(starting_symbol) cfg_res = CFG(start_symbol=to_variable(starting_symbol), - productions=set(productions)) + productions=set(productions)) return cfg_res def _get_production(self, current_symbol: str, count: int = 0) \ @@ -348,27 +345,26 @@ def _get_production(self, current_symbol: str, count: int = 0) \ return next_productions, count def __repr__(self) -> str: + """Gets the string representation of the regex.""" return self.head.get_str_repr([str(son) for son in self.sons]) def union(self, other: "Regex") -> "Regex": - """ Makes the union with another regex + """Makes the union with another regex. Equivalent to: - >>> regex0 or regex1 + >>> regex0 | regex1 Parameters ---------- - other : :class:`~pyformlang.regular_expression.Regex` - The other regex + other: + The other regex. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - The union of the two regex + ------- + The union of the two regexps. Examples -------- - >>> regex0 = Regex("a b") >>> regex1 = Regex("c") >>> regex_union = regex0.union(regex1) @@ -377,9 +373,8 @@ def union(self, other: "Regex") -> "Regex": Or equivalently: - >>> regex_union = regex0 or regex1 + >>> regex_union = regex0 | regex1 >>> regex_union.accepts(["a", "b"]) - """ regex = Regex("") regex.head = Union() @@ -387,21 +382,19 @@ def union(self, other: "Regex") -> "Regex": return regex def __or__(self, other: "Regex") -> "Regex": - """ Makes the union with another regex + """Makes the union with another regex. Parameters ---------- - other : :class:`~pyformlang.regular_expression.Regex` - The other regex + other: + The other regex. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - The union of the two regex + ------- + The union of the two regexps. Examples -------- - >>> regex0 = Regex("a b") >>> regex1 = Regex("c") >>> regex_union = regex0.union(regex1) @@ -412,31 +405,29 @@ def __or__(self, other: "Regex") -> "Regex": Or equivalently: - >>> regex_union = regex0 or regex1 + >>> regex_union = regex0 | regex1 >>> regex_union.accepts(["a", "b"]) True """ return self.union(other) def concatenate(self, other: "Regex") -> "Regex": - """ Concatenates a regular expression with an other one + """Concatenates a regular expression with another one. Equivalent to: - >>> regex0 + regex1 + >>> regex0 + regex1 Parameters ---------- - other : :class:`~pyformlang.regular_expression.Regex` - The other regex + other: + The other regex. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - The concatenation of the two regex + ------- + The concatenation of the two regexps. Examples -------- - >>> regex0 = Regex("a b") >>> regex1 = Regex("c") >>> regex_union = regex0.concatenate(regex1) @@ -457,21 +448,19 @@ def concatenate(self, other: "Regex") -> "Regex": return regex def __add__(self, other: "Regex") -> "Regex": - """ Concatenates a regular expression with an other one + """Concatenates a regular expression with another one. Parameters ---------- - other : :class:`~pyformlang.regular_expression.Regex` - The other regex + other: + The other regex. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - The concatenation of the two regex + ------- + The concatenation of the two regexps. Examples -------- - >>> regex0 = Regex("a b") >>> regex1 = Regex("c") >>> regex_union = regex0.concatenate(regex1) @@ -485,28 +474,24 @@ def __add__(self, other: "Regex") -> "Regex": >>> regex_union = regex0 + regex1 >>> regex_union.accepts(["a", "b", "c"]) True - """ return self.concatenate(other) def kleene_star(self) -> "Regex": - """ Makes the kleene star of the current regex + """Gets the kleene star of the current regex. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - The kleene star of the current regex + ------- + The kleene star of the current regex. Examples -------- - >>> regex = Regex("a") >>> regex_kleene = regex.kleene_star() >>> regex_kleene.accepts([]) True >>> regex_kleene.accepts(["a", "a", "a"]) True - """ regex = Regex("") regex.head = KleeneStar() @@ -514,19 +499,19 @@ def kleene_star(self) -> "Regex": return regex def from_string(self, regex_str: str) -> "Regex": - """ Construct a regex from a string. For internal usage. + """Construct a regex from a string. - Equivalent to the constructor of Regex + Equivalent to the constructor of Regex. Parameters ---------- - regex_str : str - The string representation of the regex + regex_str: + The string representation of the regex. Returns ------- - regex : :class:`~pyformlang.regular_expression.Regex` - The regex + regex: + The regex as a string. Examples -------- @@ -535,49 +520,42 @@ def from_string(self, regex_str: str) -> "Regex": , which is equivalent to: >>> Regex("a b c") - """ return Regex(regex_str) def accepts(self, word: Iterable[str]) -> bool: - """ - Check if a word matches (completely) the regex + """Check if a word matches (completely) the regex. Parameters ---------- - word : iterable of str - The word to check + word: + The word to check. Returns ------- - is_accepted : bool - Whether the word is recognized or not + Whether the word is recognized or not. Examples -------- - >>> regex = Regex("abc|d") Check if the symbol "abc" is accepted >>> regex.accepts(["abc"]) True - """ return self._to_epsilon_nfa_internal().accepts(word) @classmethod def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex": - """ Creates a regular expression from given finite automaton + """Creates a regular expression from given finite automaton. Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - A regular expression equivalent to the current Epsilon NFA + ------- + A regular expression equivalent to the given finite automaton. Examples -------- - >>> enfa = EpsilonNFA() >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ (0, "epsilon", 2)]) @@ -586,7 +564,6 @@ def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex": >>> regex = enfa.to_regex() >>> regex.accepts(["abc"]) True - """ copies = [automaton.copy() for _ in automaton.final_states] final_states = list(automaton.final_states) @@ -605,15 +582,16 @@ def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex": @classmethod def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str: - """ Get the regex of an automaton when it only composed of a start and - a final state + """Gets the regex of the automaton in a simple form. + + Gets the regex of an automaton when it only composed of a start and + a final state. CAUTION: For internal use only! Returns - ---------- - regex : str - A regex representing the automaton + ------- + A regex representing the automaton. """ if not automaton.final_states or not automaton.start_states: return "" @@ -636,19 +614,18 @@ def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str: @classmethod def _get_bi_transitions(cls, automaton: FiniteAutomaton) \ -> Tuple[str, str, str, str]: - """ Internal method to compute the transition in the case of a \ - simple automaton + """Compute the transition in the case of a simple automaton. Returns - start_to_start : str - The transition from the start state to the start state - start_to_end : str - The transition from the start state to the end state - end_to_start : str - The transition from the end state to the start state - end_to_end : str - The transition from the end state to the end state - ---------- + ------- + start_to_start: + The transition from the start state to the start state. + start_to_end: + The transition from the start state to the end state. + end_to_start: + The transition from the end state to the start state. + end_to_end: + The transition from the end state to the end state. """ start = list(automaton.start_states)[0] end = list(automaton.final_states)[0] @@ -674,13 +651,13 @@ def _get_bi_transitions(cls, automaton: FiniteAutomaton) \ @classmethod def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None: - """ Remove all states which are not the start state or a final state + """Remove all states which are not the start state or a final state. CAREFUL: This method modifies the current automaton, for internal usage only! The function _create_or_transitions is supposed to be called before - calling this function + calling this function. """ cls._create_or_transitions(automaton) states = automaton.states.copy() @@ -691,7 +668,7 @@ def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None: @classmethod def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None: - """ Removes a given state from the epsilon NFA + """Removes a given state from the epsilon NFA. CAREFUL: This method modifies the current automaton, for internal usage only! @@ -701,9 +678,8 @@ def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None: Parameters ---------- - state : :class:`~pyformlang.finite_automaton.State` - The state to remove - + state: + The state to remove. """ # First compute all endings out_transitions = {} @@ -737,9 +713,9 @@ def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None: @classmethod def _create_or_transitions(cls, automaton: FiniteAutomaton) -> None: - """ Creates a OR transition instead of several connections + """Creates a OR transition instead of several connections. - CAREFUL: This method modifies the automaton and is designed for \ + CAREFUL: This method modifies the automaton and is designed for internal use only! """ for state in automaton.states: @@ -770,7 +746,7 @@ def __get_regex_sub(cls, start_to_end: str, end_to_start: str, end_to_end: str) -> str: - """ Combines the transitions in the regex simple function """ + """Combines the transitions in the regex simple function.""" if not start_to_end: return "" temp, part1 = cls.__get_temp(start_to_end, end_to_start, end_to_end) @@ -789,10 +765,7 @@ def __get_temp(cls, start_to_end: str, end_to_start: str, end_to_end: str) -> Tuple[str, str]: - """ - Gets a temp values in the computation - of the simple automaton regex. - """ + """Gets a temp values in the computation of the simple FA regex.""" temp = "epsilon" if (start_to_end != "epsilon" or end_to_end != "epsilon" diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 157847b..e999366 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -1,6 +1,4 @@ -""" -A class to read regex -""" +"""A class to parse regular expressions.""" from typing import List, Optional from re import sub @@ -16,12 +14,25 @@ class RegexReader: + """A class to parse regular expressions. + + Parses the given regex. + + Attributes + ---------- + head: + A root of the tree representing the regex. + sons: + The child regexps of the current one. + + Parameters + ---------- + regex: + The regex to parse. """ - A class to parse regular expressions - """ - # pylint: disable=too-few-public-methods def __init__(self, regex: str) -> None: + """Parses the given regex.""" self._current_node: Optional[Node] = None self.head: Node = Empty() self.sons: List[RegexReader] = [] @@ -93,7 +104,7 @@ def _compute_precedent_when_not_kleene_nor_union(self) -> None: 0, self._end_current_group) def _compute_precedence(self) -> None: - """ Add parenthesis for the first group in indicate precedence """ + """Adds parenthesis for the first group in indicate precedence.""" self._setup_precedence() if isinstance(self._current_node, KleeneStar): self._add_parenthesis_around_part_of_componants( @@ -112,7 +123,7 @@ def _set_next_end_group_and_node(self) -> None: self._components[self._end_current_group]) def _set_end_first_group_in_components(self, idx_from: int = 0) -> None: - """ Gives the end of the first group """ + """Gives the end of the first group.""" if idx_from >= len(self._components): self._end_current_group = idx_from elif self._components[idx_from] == ")": @@ -174,17 +185,16 @@ def _check_is_valid_single_first_symbol(self, first_symbol: Node) -> None: raise MisformedRegexError(MISFORMED_MESSAGE, self._regex) def from_string(self, regex_str: str) -> "RegexReader": - """ - Read a regex from a string + """Reads a regex from a string. + Parameters ---------- - regex_str : str - A regular expression + regex_str: + A regular expression to read. Returns ------- - parsed_regex : :class:`~pyformlang.regular_expression.RegexReader` - The parsed regex + The parsed regex. """ return RegexReader(regex_str)