From e1b585bbcbe511c87e19736fb5484f6d6b4fcb3d Mon Sep 17 00:00:00 2001
From: bygu4 <sacha05bugaev@gmail.com>
Date: Thu, 2 Jan 2025 03:01:33 +0300
Subject: [PATCH] update regular_expression docs

---
 pyformlang/regular_expression/python_regex.py |  14 +-
 pyformlang/regular_expression/regex.py        | 267 ++++++++----------
 pyformlang/regular_expression/regex_reader.py |  38 ++-
 3 files changed, 150 insertions(+), 169 deletions(-)

diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py
index 0d446eb..66e73d0 100644
--- a/pyformlang/regular_expression/python_regex.py
+++ b/pyformlang/regular_expression/python_regex.py
@@ -1,6 +1,4 @@
-"""
-A class to read Python format regex
-"""
+"""A class to read Python format regex."""
 
 from typing import List, Tuple, Union, Pattern
 from re import compile as compile_regex
@@ -56,7 +54,7 @@
 
 
 class PythonRegex(Regex):
-    """ Represents a regular expression as used in Python.
+    r"""Represents a regular expression as used in Python.
 
     It adds the following features to the basic regex:
 
@@ -70,9 +68,9 @@ class PythonRegex(Regex):
 
     Parameters
     ----------
-    python_regex : Union[str, Pattern[str]]
-        The regex represented as a string or a compiled regex (
-        re.compile(...))
+    python_regex:
+        The regex represented as a string or a compiled regex
+        (re.compile(...)).
 
     Raises
     ------
@@ -95,10 +93,10 @@ class PythonRegex(Regex):
     True
     >>> p_regex.accepts(["d"])
     False
-
     """
 
     def __init__(self, python_regex: Union[str, Pattern[str]]) -> None:
+        """Initializes the regex in python format."""
         if isinstance(python_regex, str):
             compile_regex(python_regex)  # Check if it is valid
         else:
diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py
index 9fee921..abede98 100644
--- a/pyformlang/regular_expression/regex.py
+++ b/pyformlang/regular_expression/regex.py
@@ -1,6 +1,4 @@
-"""
-Representation of a regular expression
-"""
+"""Representation of a regular expression."""
 
 from typing import List, Iterable, Tuple, Optional
 
@@ -16,10 +14,10 @@
 
 
 class Regex(RegexReader):
-    """ Represents a regular expression
+    r"""Representation of a regular expression.
 
-    Pyformlang implements the operators of textbooks, which deviate slightly \
-    from the operators in Python. For a representation closer to Python one, \
+    Pyformlang implements the operators of textbooks, which deviate slightly
+    from the operators in Python. For a representation closer to Python one,
     please use :class:`~pyformlang.regular_expression.PythonRegex`
 
     * The concatenation can be represented either by a space or a dot (.)
@@ -27,21 +25,21 @@ class Regex(RegexReader):
     * The Kleene star is represented by *
     * The epsilon symbol can either be "epsilon" or $
 
-    It is also possible to use parentheses. All symbols except the space, ., \
- |, +, *, (, ), epsilon and $ can be part of the alphabet. All \
- other common regex operators (such as []) are syntactic sugar that can be \
- reduced to the previous operators. Another main difference is that the \
- alphabet is not reduced to single characters as it is the case in Python. \
- For example, "python" is a single symbol in Pyformlang, whereas it is the \
- concatenation of six symbols in regular Python.
+    It is also possible to use parentheses. All symbols except the space, .,
+    |, +, *, (, ), epsilon and $ can be part of the alphabet. All
+    other common regex operators (such as []) are syntactic sugar that can be
+    reduced to the previous operators. Another main difference is that the
+    alphabet is not reduced to single characters as it is the case in Python.
+    For example, "python" is a single symbol in Pyformlang, whereas it is the
+    concatenation of six symbols in regular Python.
 
     All special characters except epsilon can be escaped with a backslash (\
     double backslash \\ in strings).
 
     Parameters
     ----------
-    regex : str
-        The regex represented as a string
+    regex:
+        The regex represented as a string.
 
     Raises
     ------
@@ -50,7 +48,6 @@ class Regex(RegexReader):
 
     Examples
     --------
-
     >>> regex = Regex("abc|d")
 
     Check if the symbol "abc" is accepted
@@ -84,26 +81,24 @@ class Regex(RegexReader):
     Give the equivalent finite-state automaton
 
     >>> regex_concat.to_epsilon_nfa()
-
     """
 
     def __init__(self, regex: str) -> None:
+        """Initializes the regex from the given string."""
         super().__init__(regex)
         self.sons: List[Regex] # type: ignore
         self._counter = 0
         self._enfa: Optional[EpsilonNFA] = None
 
     def get_number_symbols(self) -> int:
-        """ Gives the number of symbols in the regex
+        """Gets the number of symbols in the regex.
 
         Returns
-        ----------
-        n_symbols : int
-            The number of symbols in the regex
+        -------
+        The number of symbols in the regex.
 
         Examples
         --------
-
         >>> regex = Regex("a|b*")
         >>> regex.get_number_symbols()
         2
@@ -115,52 +110,58 @@ def get_number_symbols(self) -> int:
         return 1
 
     def get_number_operators(self) -> int:
-        """ Gives the number of operators in the regex
+        """Gets the number of operators in the regex.
 
         Returns
-        ----------
-        n_operators : int
-            The number of operators in the regex
+        -------
+        The number of operators in the regex.
 
         Examples
         --------
-
         >>> regex = Regex("a|b*")
         >>> regex.get_number_operators()
         2
 
         The two operators are "|" and "*".
-
         """
         if self.sons:
             return 1 + sum(son.get_number_operators() for son in self.sons)
         return 0
 
     def to_minimal_dfa(self) -> DeterministicFiniteAutomaton:
-        """ Builds minimal dfa from current regex """
+        """Builds a minimal DFA from current regex.
+
+        Returns
+        -------
+        The minimal DFA equivalent to the current regex.
+        """
         enfa = self._to_epsilon_nfa_internal()
         dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa)
         return dfa.minimize()
 
     def to_epsilon_nfa(self) -> EpsilonNFA:
-        """ Transforms the regular expression into an epsilon NFA
+        """Transforms the regular expression into an epsilon NFA.
 
         Returns
-        ----------
-        enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
-            An epsilon NFA equivalent to the regex
+        -------
+        An epsilon NFA equivalent to the regex.
 
         Examples
         --------
-
         >>> regex = Regex("abc|d")
         >>> regex.to_epsilon_nfa()
-
         """
         return self._to_epsilon_nfa_internal().copy()
 
     def _to_epsilon_nfa_internal(self) -> EpsilonNFA:
-        """ Transforms the regular expression into an epsilon NFA """
+        """Transforms the regular expression into an epsilon NFA.
+
+        For internal use to prevent protected `enfa` member modification.
+
+        Returns
+        -------
+        An epsilon NFA equivalent to the regex.
+        """
         if self._enfa is None:
             self._enfa = EpsilonNFA()
             s_initial = self._set_and_get_initial_state_in_enfa(self._enfa)
@@ -182,14 +183,16 @@ def _process_to_enfa(self,
                          enfa: EpsilonNFA,
                          s_from: State,
                          s_to: State) -> None:
-        """ Internal function to add a regex to a given epsilon NFA
+        """Internal function to add a regex to a given epsilon NFA.
 
         Parameters
         ----------
-        s_from : :class:`~pyformlang.finite_automaton.State`
-            The source state
-        s_to : :class:`~pyformlang.finite_automaton.State`
-            The destination state
+        enfa:
+            Epsilon NFA to add the regex to.
+        s_from:
+            The source state.
+        s_to:
+            The destination state.
         """
         if self.sons:
             self._process_to_enfa_when_sons(enfa, s_from, s_to)
@@ -277,27 +280,25 @@ def _get_next_state_enfa(self) -> State:
         return s_final
 
     def get_tree_str(self, depth: int = 0) -> str:
-        """ Get a string representation of the tree behind the regex
+        """Get a string representation of the tree behind the regex.
 
         Parameters
         ----------
-        depth: int
-            The current depth, 0 by default
+        depth:
+            The current depth, 0 by default.
+
         Returns
         -------
-        representation: str
-            The tree representation
+        The tree representation of the regex.
 
         Examples
         --------
-
         >>> regex = Regex("abc|d*")
         >>> print(regex.get_tree_str())
         Operator(Union)
          Symbol(abc)
          Operator(Kleene Star)
           Symbol(d)
-
         """
         temp = " " * depth + str(self.head) + "\n"
         for son in self.sons:
@@ -305,31 +306,27 @@ def get_tree_str(self, depth: int = 0) -> str:
         return temp
 
     def to_cfg(self, starting_symbol: str = "S") -> CFG:
-        """
-        Turns the regex into a context-free grammar
+        """Turns the regex into a context-free grammar.
 
         Parameters
         ----------
-        starting_symbol : :class:`~pyformlang.cfg.Variable`, optional
-            The starting symbol
+        starting_symbol:
+            The starting symbol of the grammar.
 
         Returns
         -------
-        cfg : :class:`~pyformlang.cfg.CFG`
-            An equivalent context-free grammar
+        An equivalent context-free grammar.
 
         Examples
         --------
-
         >>> regex = Regex("(a|b)* c")
         >>> my_cfg = regex.to_cfg()
         >>> my_cfg.contains(["c"])
         True
-
         """
         productions, _ = self._get_production(starting_symbol)
         cfg_res = CFG(start_symbol=to_variable(starting_symbol),
-                          productions=set(productions))
+                      productions=set(productions))
         return cfg_res
 
     def _get_production(self, current_symbol: str, count: int = 0) \
@@ -348,27 +345,26 @@ def _get_production(self, current_symbol: str, count: int = 0) \
         return next_productions, count
 
     def __repr__(self) -> str:
+        """Gets the string representation of the regex."""
         return self.head.get_str_repr([str(son) for son in self.sons])
 
     def union(self, other: "Regex") -> "Regex":
-        """ Makes the union with another regex
+        """Makes the union with another regex.
 
         Equivalent to:
-          >>> regex0 or regex1
+            >>> regex0 | regex1
 
         Parameters
         ----------
-        other : :class:`~pyformlang.regular_expression.Regex`
-            The other regex
+        other:
+            The other regex.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The union of the two regex
+        -------
+        The union of the two regexps.
 
         Examples
         --------
-
         >>> regex0 = Regex("a b")
         >>> regex1 = Regex("c")
         >>> regex_union = regex0.union(regex1)
@@ -377,9 +373,8 @@ def union(self, other: "Regex") -> "Regex":
 
         Or equivalently:
 
-        >>> regex_union = regex0 or regex1
+        >>> regex_union = regex0 | regex1
         >>> regex_union.accepts(["a", "b"])
-
         """
         regex = Regex("")
         regex.head = Union()
@@ -387,21 +382,19 @@ def union(self, other: "Regex") -> "Regex":
         return regex
 
     def __or__(self, other: "Regex") -> "Regex":
-        """ Makes the union with another regex
+        """Makes the union with another regex.
 
         Parameters
         ----------
-        other : :class:`~pyformlang.regular_expression.Regex`
-            The other regex
+        other:
+            The other regex.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The union of the two regex
+        -------
+        The union of the two regexps.
 
         Examples
         --------
-
         >>> regex0 = Regex("a b")
         >>> regex1 = Regex("c")
         >>> regex_union = regex0.union(regex1)
@@ -412,31 +405,29 @@ def __or__(self, other: "Regex") -> "Regex":
 
         Or equivalently:
 
-        >>> regex_union = regex0 or regex1
+        >>> regex_union = regex0 | regex1
         >>> regex_union.accepts(["a", "b"])
         True
         """
         return self.union(other)
 
     def concatenate(self, other: "Regex") -> "Regex":
-        """ Concatenates a regular expression with an other one
+        """Concatenates a regular expression with another one.
 
         Equivalent to:
-          >>> regex0 + regex1
+            >>> regex0 + regex1
 
         Parameters
         ----------
-        other : :class:`~pyformlang.regular_expression.Regex`
-            The other regex
+        other:
+            The other regex.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The concatenation of the two regex
+        -------
+        The concatenation of the two regexps.
 
         Examples
         --------
-
         >>> regex0 = Regex("a b")
         >>> regex1 = Regex("c")
         >>> regex_union = regex0.concatenate(regex1)
@@ -457,21 +448,19 @@ def concatenate(self, other: "Regex") -> "Regex":
         return regex
 
     def __add__(self, other: "Regex") -> "Regex":
-        """ Concatenates a regular expression with an other one
+        """Concatenates a regular expression with another one.
 
         Parameters
         ----------
-        other : :class:`~pyformlang.regular_expression.Regex`
-            The other regex
+        other:
+            The other regex.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The concatenation of the two regex
+        -------
+        The concatenation of the two regexps.
 
         Examples
         --------
-
         >>> regex0 = Regex("a b")
         >>> regex1 = Regex("c")
         >>> regex_union = regex0.concatenate(regex1)
@@ -485,28 +474,24 @@ def __add__(self, other: "Regex") -> "Regex":
         >>> regex_union = regex0 + regex1
         >>> regex_union.accepts(["a", "b", "c"])
         True
-
         """
         return self.concatenate(other)
 
     def kleene_star(self) -> "Regex":
-        """ Makes the kleene star of the current regex
+        """Gets the kleene star of the current regex.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The kleene star of the current regex
+        -------
+        The kleene star of the current regex.
 
         Examples
         --------
-
         >>> regex = Regex("a")
         >>> regex_kleene = regex.kleene_star()
         >>> regex_kleene.accepts([])
         True
         >>> regex_kleene.accepts(["a", "a", "a"])
         True
-
         """
         regex = Regex("")
         regex.head = KleeneStar()
@@ -514,19 +499,19 @@ def kleene_star(self) -> "Regex":
         return regex
 
     def from_string(self, regex_str: str) -> "Regex":
-        """ Construct a regex from a string. For internal usage.
+        """Construct a regex from a string.
 
-        Equivalent to the constructor of Regex
+        Equivalent to the constructor of Regex.
 
         Parameters
         ----------
-        regex_str : str
-            The string representation of the regex
+        regex_str:
+            The string representation of the regex.
 
         Returns
         -------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            The regex
+        regex:
+            The regex as a string.
 
         Examples
         --------
@@ -535,49 +520,42 @@ def from_string(self, regex_str: str) -> "Regex":
         , which is equivalent to:
 
         >>> Regex("a b c")
-
         """
         return Regex(regex_str)
 
     def accepts(self, word: Iterable[str]) -> bool:
-        """
-        Check if a word matches (completely) the regex
+        """Check if a word matches (completely) the regex.
 
         Parameters
         ----------
-        word : iterable of str
-            The word to check
+        word:
+            The word to check.
 
         Returns
         -------
-        is_accepted : bool
-            Whether the word is recognized or not
+        Whether the word is recognized or not.
 
         Examples
         --------
-
         >>> regex = Regex("abc|d")
 
         Check if the symbol "abc" is accepted
 
         >>> regex.accepts(["abc"])
         True
-
         """
         return self._to_epsilon_nfa_internal().accepts(word)
 
     @classmethod
     def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex":
-        """ Creates a regular expression from given finite automaton
+        """Creates a regular expression from given finite automaton.
 
         Returns
-        ----------
-        regex : :class:`~pyformlang.regular_expression.Regex`
-            A regular expression equivalent to the current Epsilon NFA
+        -------
+        A regular expression equivalent to the given finite automaton.
 
         Examples
         --------
-
         >>> enfa = EpsilonNFA()
         >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \
         (0, "epsilon", 2)])
@@ -586,7 +564,6 @@ def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex":
         >>> regex = enfa.to_regex()
         >>> regex.accepts(["abc"])
         True
-
         """
         copies = [automaton.copy() for _ in automaton.final_states]
         final_states = list(automaton.final_states)
@@ -605,15 +582,16 @@ def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex":
 
     @classmethod
     def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str:
-        """ Get the regex of an automaton when it only composed of a start and
-        a final state
+        """Gets the regex of the automaton in a simple form.
+
+        Gets the regex of an automaton when it only composed of a start and
+        a final state.
 
         CAUTION: For internal use only!
 
         Returns
-        ----------
-        regex : str
-            A regex representing the automaton
+        -------
+        A regex representing the automaton.
         """
         if not automaton.final_states or not automaton.start_states:
             return ""
@@ -636,19 +614,18 @@ def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str:
     @classmethod
     def _get_bi_transitions(cls, automaton: FiniteAutomaton) \
             -> Tuple[str, str, str, str]:
-        """ Internal method to compute the transition in the case of a \
-        simple automaton
+        """Compute the transition in the case of a simple automaton.
 
         Returns
-        start_to_start : str
-            The transition from the start state to the start state
-        start_to_end : str
-            The transition from the start state to the end state
-        end_to_start : str
-            The transition from the end state to the start state
-        end_to_end : str
-            The transition from the end state to the end state
-        ----------
+        -------
+        start_to_start:
+            The transition from the start state to the start state.
+        start_to_end:
+            The transition from the start state to the end state.
+        end_to_start:
+            The transition from the end state to the start state.
+        end_to_end:
+            The transition from the end state to the end state.
         """
         start = list(automaton.start_states)[0]
         end = list(automaton.final_states)[0]
@@ -674,13 +651,13 @@ def _get_bi_transitions(cls, automaton: FiniteAutomaton) \
 
     @classmethod
     def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None:
-        """ Remove all states which are not the start state or a final state
+        """Remove all states which are not the start state or a final state.
 
         CAREFUL: This method modifies the current automaton, for internal usage
         only!
 
         The function _create_or_transitions is supposed to be called before
-        calling this function
+        calling this function.
         """
         cls._create_or_transitions(automaton)
         states = automaton.states.copy()
@@ -691,7 +668,7 @@ def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None:
 
     @classmethod
     def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None:
-        """ Removes a given state from the epsilon NFA
+        """Removes a given state from the epsilon NFA.
 
         CAREFUL: This method modifies the current automaton, for internal usage
         only!
@@ -701,9 +678,8 @@ def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None:
 
         Parameters
         ----------
-        state : :class:`~pyformlang.finite_automaton.State`
-            The state to remove
-
+        state:
+            The state to remove.
         """
         # First compute all endings
         out_transitions = {}
@@ -737,9 +713,9 @@ def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None:
 
     @classmethod
     def _create_or_transitions(cls, automaton: FiniteAutomaton) -> None:
-        """ Creates a OR transition instead of several connections
+        """Creates a OR transition instead of several connections.
 
-        CAREFUL: This method modifies the automaton and is designed for \
+        CAREFUL: This method modifies the automaton and is designed for
         internal use only!
         """
         for state in automaton.states:
@@ -770,7 +746,7 @@ def __get_regex_sub(cls,
                         start_to_end: str,
                         end_to_start: str,
                         end_to_end: str) -> str:
-        """ Combines the transitions in the regex simple function """
+        """Combines the transitions in the regex simple function."""
         if not start_to_end:
             return ""
         temp, part1 = cls.__get_temp(start_to_end, end_to_start, end_to_end)
@@ -789,10 +765,7 @@ def __get_temp(cls,
                    start_to_end: str,
                    end_to_start: str,
                    end_to_end: str) -> Tuple[str, str]:
-        """
-        Gets a temp values in the computation
-        of the simple automaton regex.
-        """
+        """Gets a temp values in the computation of the simple FA regex."""
         temp = "epsilon"
         if (start_to_end != "epsilon"
                 or end_to_end != "epsilon"
diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py
index 157847b..e999366 100644
--- a/pyformlang/regular_expression/regex_reader.py
+++ b/pyformlang/regular_expression/regex_reader.py
@@ -1,6 +1,4 @@
-"""
-A class to read regex
-"""
+"""A class to parse regular expressions."""
 
 from typing import List, Optional
 from re import sub
@@ -16,12 +14,25 @@
 
 
 class RegexReader:
+    """A class to parse regular expressions.
+
+    Parses the given regex.
+
+    Attributes
+    ----------
+    head:
+        A root of the tree representing the regex.
+    sons:
+        The child regexps of the current one.
+
+    Parameters
+    ----------
+    regex:
+        The regex to parse.
     """
-    A class to parse regular expressions
-    """
-    # pylint: disable=too-few-public-methods
 
     def __init__(self, regex: str) -> None:
+        """Parses the given regex."""
         self._current_node: Optional[Node] = None
         self.head: Node = Empty()
         self.sons: List[RegexReader] = []
@@ -93,7 +104,7 @@ def _compute_precedent_when_not_kleene_nor_union(self) -> None:
                 0, self._end_current_group)
 
     def _compute_precedence(self) -> None:
-        """ Add parenthesis for the first group in indicate precedence """
+        """Adds parenthesis for the first group in indicate precedence."""
         self._setup_precedence()
         if isinstance(self._current_node, KleeneStar):
             self._add_parenthesis_around_part_of_componants(
@@ -112,7 +123,7 @@ def _set_next_end_group_and_node(self) -> None:
                 self._components[self._end_current_group])
 
     def _set_end_first_group_in_components(self, idx_from: int = 0) -> None:
-        """ Gives the end of the first group """
+        """Gives the end of the first group."""
         if idx_from >= len(self._components):
             self._end_current_group = idx_from
         elif self._components[idx_from] == ")":
@@ -174,17 +185,16 @@ def _check_is_valid_single_first_symbol(self, first_symbol: Node) -> None:
             raise MisformedRegexError(MISFORMED_MESSAGE, self._regex)
 
     def from_string(self, regex_str: str) -> "RegexReader":
-        """
-        Read a regex from a string
+        """Reads a regex from a string.
+
         Parameters
         ----------
-        regex_str : str
-            A regular expression
+        regex_str:
+            A regular expression to read.
 
         Returns
         -------
-        parsed_regex : :class:`~pyformlang.regular_expression.RegexReader`
-            The parsed regex
+        The parsed regex.
         """
         return RegexReader(regex_str)