From e0fa5024cfb112d696486b881273f2c653e0e383 Mon Sep 17 00:00:00 2001 From: Dmitry Gritsenko Date: Mon, 4 Dec 2023 09:25:11 +0400 Subject: [PATCH 1/4] feat: wordstreamer integration --- README.md | 79 +++++++++++++++++++------------------ pyproject.toml | 3 +- rgx/entities.py | 91 ++++++++++++++++++++++++------------------- test/test_creation.py | 2 +- 4 files changed, 97 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index e871c3d..7a364d2 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ -Many people complain about unreadable and complex syntax of regular expressions. +[![wordstreamer badge](https://img.shields.io/badge/renderable-what?label=wordstreamer&color=%2333bb33)](https://github.com/evtn/wordstreamer) + +Many people complain about unreadable and complex syntax of regular expressions. Many others complain about how they can't remember all constructs and features. -`rgx` solves those problems: it is a straightforward regexp builder. It also places non-capturing groups where needed to respect intended operator priority. -It can produce a regular expression string to use in `re.compile` or any other regex library of your choice. +`rgx` solves those problems: it is a straightforward regexp builder. It also places non-capturing groups where needed to respect intended operator priority. +It can produce a regular expression string to use in `re.compile` or any other regex library of your choice. In other words, with `rgx` you can build a regular expression from parts, using straightforward and simple expressions. @@ -68,19 +70,19 @@ int_regex = re.compile(str(integer)) ## Quickstart -*in this readme, `x` means some pattern object. Occasionaly, `y` is introduced to mean some other pattern object (or literal)* +_in this readme, `x` means some pattern object. Occasionaly, `y` is introduced to mean some other pattern object (or literal)_ ### Literals and pattern objects -`rgx` operates mostly on so-called "pattern objects" — `rgx.entities.RegexPattern` istances. +`rgx` operates mostly on so-called "pattern objects" — `rgx.entities.RegexPattern` istances. Your starting point would be `rgx.pattern` — it creates pattern objects from literals (and from pattern objects, which doesn't make a lot of sense). -- `rgx.pattern(str, escape: bool = True)` creates a literal pattern — one that exactly matches given string. If you want to disable escaping, pass `escape=False` -- `rgx.pattern(tuple[AnyRegexPattern])` creates a non-capturing group of patterns (nested literals will be converted too) -- `rgx.pattern(list[str])` creates a character class (for example, `rgx.pattern(["a", "b", "c"])` creates pattern `[abc]`, that matches any character of those in brackets) - - Same can be achieved by `rgx.pattern("a").to("c")` or `rgx.pattern("a") | "b" | "c"` +- `rgx.pattern(str, escape: bool = True)` creates a literal pattern — one that exactly matches given string. If you want to disable escaping, pass `escape=False` +- `rgx.pattern(tuple[AnyRegexPattern])` creates a non-capturing group of patterns (nested literals will be converted too) +- `rgx.pattern(list[str])` creates a character class (for example, `rgx.pattern(["a", "b", "c"])` creates pattern `[abc]`, that matches any character of those in brackets) + - Same can be achieved by `rgx.pattern("a").to("c")` or `rgx.pattern("a") | "b" | "c"` -Most operations with pattern objects support using Python literals on one side, for example: `rgx.pattern("a") | b` would produce `[ab]` pattern object (specifically, `rgx.entities.Chars`) +Most operations with pattern objects support using Python literals on one side, for example: `rgx.pattern("a") | b` would produce `[ab]` pattern object (specifically, `rgx.entities.Chars`) ### Rendering patterns @@ -95,7 +97,7 @@ p = x | y rendered_with_str = str(p) # "one|two" rendered_with_method = p.render_str() # "one|two" rendered_with_method_flags = p.render_str("im") # (?im)one|two -``` +``` ### Capturing Groups @@ -119,8 +121,8 @@ print(named_x_reference) # (?P=x) ``` -To create a capturing group, use `x.capture()`, or `rgx.reference(group: int)` for a reference. -To create a named capturing group, use `rgx.named(name: str, x)`, or `rgx.named(name: str)` for a named reference. +To create a capturing group, use `x.capture()`, or `rgx.reference(group: int)` for a reference. +To create a named capturing group, use `rgx.named(name: str, x)`, or `rgx.named(name: str)` for a named reference. ### Character classes @@ -131,7 +133,7 @@ from rgx import pattern, meta az = pattern("a").to("z") # rgx.Chars.to(other: str | Literal | Chars) print(az) # [a-z] -digits_or_space = pattern(["1", "2", "3", meta.WHITESPACE]) +digits_or_space = pattern(["1", "2", "3", meta.WHITESPACE]) print(digits_or_space) # [123\s] print(az | digits_or_space) # [a-z123\s] @@ -170,7 +172,7 @@ capture = x.capture() print( capture + conditional(1, y, z) ) -``` +``` ### Repeating patterns @@ -212,15 +214,16 @@ a.some() # a* # or (what) +-(a * 38) # a* ``` -Here's what's going on: -`pattern.repeat(count, lazy)` returns a `{count, count}` `Range` object + +Here's what's going on: +`pattern.repeat(count, lazy)` returns a `{count, count}` `Range` object `pattern * count` is the same as `pattern.repeat(count, False)` `Range` implements `or_more`, `or_less` and `to` methods: -- `Range.or_more()` [or `+Range`] moves (on a copy) upper bound of range to infinity (actually `None`) -- `Range.or_less()` [or `-Range`] moves (on a copy) lower bound of range to 0 -- `Range.to(count)` [or `Range >> count` (right shift)] replaces upper bound with given number +- `Range.or_more()` [or `+Range`] moves (on a copy) upper bound of range to infinity (actually `None`) +- `Range.or_less()` [or `-Range`] moves (on a copy) lower bound of range to 0 +- `Range.to(count)` [or `Range >> count` (right shift)] replaces upper bound with given number Also, RegexPattern implements unary plus (`+pattern`) as an alias for `pattern.many()` @@ -467,7 +470,8 @@ print(meta.CHAR_ESCAPE(320000)) # \U0004e200 `rgx.unicode_meta` is a collection of functions and constants, mostly for `\p` and `\P` usage: -Functions: +Functions: + ```python unicode_meta.PROPERTY(value: str) # renders into `\p{value}` (any character with property specified by value, e.g. `PROPERTY("Ll") -> \p{Ll}`) unicode_meta.PROPERTY_INVERSE(value: str) # matches all characters *not* matched by corresponding `PROPERTY` (`\P{value}`) @@ -475,7 +479,9 @@ unicode_meta.PROPERTY_INVERSE(value: str) # matches all characters *not* matched unicode_meta.NAMED_PROPERTY(name: str, value: str) # renders into `\p{name=value}` and matches any character which property `name` equals `value` unicode_meta.NAMED_PROPERTY_INVERSE(name: str, value: str) # same, but inverted (`\P{name=value}`) ``` -Constants: + +Constants: + ```python unicode_meta.LETTER = PROPERTY("L") unicode_meta.NON_LETTER = PROPERTY_INVERSE("L") @@ -486,14 +492,14 @@ unicode_meta.NON_WHITESPACE = PROPERTY_INVERSE("Z") unicode_meta.DIGIT = PROPERTY("Nd") unicode_meta.NON_DIGIT = PROPERTY("Nd") ``` + ## Extending -You can extend generation by subclassing one of the classes of `rgx.entities` module. -The one neccessary method to provide is `.render(self)`. It should return an iterable of strings (e.g. `["something"]`). +You can extend generation by subclassing one of the classes of `rgx.entities` module. +The one neccessary method to provide is `.render(self)`. It should return an iterable of strings (e.g. `["something"]`). Built-in components (and this section) are using generators for that purpose, but you're free to choose whatever works for you. For example, if you want to render a PCRE accept control verb - `(*ACCEPT)`, you can do it like this: - ```python from rgx.entities import RegexPattern, Concat from rgx import pattern @@ -511,7 +517,7 @@ def accept(self) -> Concat: RegexPattern.accept = accept -x = pattern("something").accept() +x = pattern("something").accept() print(x) # something(*ACCEPT) ``` @@ -542,8 +548,8 @@ x = pattern("something").accept() # something(*ACCEPT) ### Priority -If your extension has to rely on some priority, you can use `respect_priority` function. -Let's say you want to add a `x/y` operation, which does something (wow) and has prority between `a|b` and `ab` — so `a|b/cd` is the same as `a|(?:b/(?:cd))`. +If your extension has to rely on some priority, you can use `respect_priority` function. +Let's say you want to add a `x/y` operation, which does something (wow) and has prority between `a|b` and `ab` — so `a|b/cd` is the same as `a|(?:b/(?:cd))`. ```python from rgx.entities import RegexPattern, Concat, Option, AnyRegexPattern, respect_priority, pattern @@ -553,7 +559,7 @@ class MagicSlash(RegexPattern): priority = (Concat.priority + Option.priority) // 2 # let's take something in the middle def __init__(self, left: RegexPattern, right: RegexPattern): - self.left = respect_priority(left, self.priority) # you need to wrap all parts of your expression in respect_priority() + self.left = respect_priority(left, self.priority) # you need to wrap all parts of your expression in respect_priority() self.right = respect_priority(right, self.priority) # ...and pass your expression priority as a second argument def render(self) -> Iterable[str]: @@ -594,17 +600,16 @@ print( ``` - ## Common questions ### Difference between `(x, y)` and `x + y` -Previous examples used `()` and `+`, and the difference might not be so obvious. +Previous examples used `()` and `+`, and the difference might not be so obvious. -- `x + y` creates a concatenation of patterns (`rgx.entities.Concat`), with no extra characters apart from those of patterns -- `x + y` can be used only if at least one of the operands is a pattern object (that is, created with one of `rgx` functions or is one of `rgx` constants) -- `x + y` produces a pattern object itself, so you won't need to call `pattern` on it to call pattern methods +- `x + y` creates a concatenation of patterns (`rgx.entities.Concat`), with no extra characters apart from those of patterns +- `x + y` can be used only if at least one of the operands is a pattern object (that is, created with one of `rgx` functions or is one of `rgx` constants) +- `x + y` produces a pattern object itself, so you won't need to call `pattern` on it to call pattern methods -- `pattern((x, y))` creates a non-capturing group (`rgx.entities.NonCapturingGroup`): `pattern((x, y)).render_str()` -> `(?:xy)` -- `(x, y)` can be used with any pattern-like literals or pattern objects -- `(x, y)` is a tuple literal, so you can't use pattern methods on it directly or convert it into a complete expression (you need to use `rgx.pattern` on it first) \ No newline at end of file +- `pattern((x, y))` creates a non-capturing group (`rgx.entities.NonCapturingGroup`): `pattern((x, y)).render_str()` -> `(?:xy)` +- `(x, y)` can be used with any pattern-like literals or pattern objects +- `(x, y)` is a tuple literal, so you can't use pattern methods on it directly or convert it into a complete expression (you need to use `rgx.pattern` on it first) diff --git a/pyproject.toml b/pyproject.toml index 10ae16d..efc9520 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rgx" -version = "1.4.5" +version = "2.0.0" description = "Typed, simple and readable regexp generation" authors = ["Dmitry Gritsenko "] license = "MIT" @@ -11,6 +11,7 @@ keywords = ["regex", "regexp", "regular expressions"] [tool.poetry.dependencies] python = "^3.7" +wordstreamer = "^0.1.2" [tool.poetry.dev-dependencies] diff --git a/rgx/entities.py b/rgx/entities.py index 41c69b0..cfd3394 100644 --- a/rgx/entities.py +++ b/rgx/entities.py @@ -12,14 +12,16 @@ TYPE_CHECKING, ) +from wordstreamer import Context, Renderable as BaseRenderable, Renderer, TokenStream +from wordstreamer.core import Marker +from wordstreamer.internal_types import Payload + if TYPE_CHECKING: from typing import Literal as LiteralType, Self -import itertools import re -StrGen = Iterable[str] CharType = Union[str, "CharRange", "Literal"] LiteralPart = Union[Tuple["AnyRegexPattern", ...], List[CharType], str] AnyRegexPattern = Union[LiteralPart, "RegexPattern"] @@ -98,11 +100,12 @@ def respect_priority(contents: AnyRegexPattern, other_priority: int) -> RegexPat return contents -class RegexPattern: +class RegexPattern(BaseRenderable): priority: int = 100 * priority_step optimized = False + default_context: Context = Context(Renderer()) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: """ Internal method @@ -110,6 +113,9 @@ def render(self) -> StrGen: """ return NotImplemented + def stream(self, context: Context) -> TokenStream: + return self.render(context) + def case_insensitive(self) -> RegexPattern: return self.set_flags("i") @@ -161,21 +167,23 @@ def merge_flags_abstract( return new_parts, common_flags - def render_str(self, flags: str = "") -> str: + def render_str(self, flags: str = "", payload: Payload | None = None) -> str: """ Renders given pattern into a string with specified global flags. """ - parts: list[Iterable[str]] = [] + renderer = Renderer(payload) + + parts: list[BaseRenderable] = [] if flags: - parts.append(GlobalFlags(flags).render()) + parts.append(GlobalFlags(flags)) - parts.append(self.optimize().render()) + parts.append(self.optimize()) - return "".join(itertools.chain(*parts)) + return "".join(map(renderer.render_string, parts)) def __repr__(self) -> str: return self.render_str() @@ -452,16 +460,16 @@ class GroupBase(RegexPattern): def __init__(self, *contents: AnyRegexPattern): self.contents = pattern(contents) - def render_prefix(self) -> StrGen: + def render_prefix(self) -> TokenStream: yield self.prefix def case_insensitive(self): return self.apply(lambda x: x.case_insensitive()) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield "(" yield from self.render_prefix() - yield from self.contents.render() + yield from self.contents.render(context) yield ")" def apply(self, fn: Processor) -> Self: @@ -602,15 +610,17 @@ def accepts(self, char: str) -> bool: return True return False - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: if len(self.contents) == 1: contents = self.contents[0] if contents.is_single_char(): - yield from contents.render_literal() + yield from contents.render_literal(context) return yield "[" + for char in self.contents: - yield from char.render() + yield from char.render(context) + yield "]" def to(self, other: str | Literal | Chars) -> Chars: @@ -662,12 +672,12 @@ def exclude(self, chars: AnyRegexPattern) -> Chars: class ReversedChars(CharBase): - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield "[" yield "^" for char in self.contents: if isinstance(char, (Literal, CharRange)): - yield from char.render() + yield from char.render(context) elif char in Chars.non_special: yield char else: @@ -692,7 +702,7 @@ def __or__(self, other: AnyRegexPattern) -> Union[Option, ReversedChars]: return Option(self, other) -class CharRange: +class CharRange(BaseRenderable): min_char = 0 max_char = 0x10FFFF @@ -726,7 +736,7 @@ def accepts(self, char: str) -> bool: def render_char(char: int) -> str: return re.escape(chr(char)) - def render(self) -> StrGen: + def stream(self, context: Context) -> TokenStream: if self.meta: yield self.meta return @@ -748,11 +758,14 @@ def render(self) -> StrGen: if self.stop != CharRange.max_char: yield self.render_char(self.stop) - def render_literal(self) -> StrGen: + def render(self, context: Context) -> TokenStream: + return self.stream(context) + + def render_literal(self, context: Context) -> TokenStream: if self.meta: yield self.meta return - yield from Literal(chr(self.start)).render() + yield from Literal(chr(self.start)).render(context) @staticmethod def exclude_bounds(bounds: Bounds, exclude: Bounds) -> list[Bounds]: @@ -805,7 +818,7 @@ def is_single_char(self) -> bool: return self.start == self.stop def __repr__(self): - return "".join(self.render()) + return Renderer().render_string(self) def __eq__(self, other: object): if not isinstance(other, CharRange): @@ -869,9 +882,9 @@ def __add__(self, other: AnyRegexPattern) -> Concat: def case_insensitive(self) -> RegexPattern: return self.apply(lambda x: x.case_insensitive()) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: for part in self.contents: - yield from part.render() + yield from part.render(context) def merge_flags(self) -> LocalFlags | Concat: processed, common_flags = self.merge_flags_abstract(self.contents) @@ -911,13 +924,13 @@ def merge_flags(self) -> LocalFlags | Option: return LocalFlags(new, "".join(common_flags)) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: if not self.alternatives: return - yield from self.alternatives[0].render() + yield from self.alternatives[0].render(context) for alternative in self.alternatives[1:]: yield "|" - yield from alternative.render() + yield from alternative.render(context) def __or__(self, other: AnyRegexPattern) -> Option: return Option(*self.alternatives, other) @@ -938,11 +951,11 @@ def __init__(self, contents: AnyRegexPattern, flags: str): def case_insensitive(self) -> RegexPattern: return self.apply(lambda x: x.case_insensitive()) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield "(?" yield self.flags yield ":" - yield from self.contents.render() + yield from self.contents.render(context) yield ")" def apply(self, fn: Processor) -> Self: @@ -1036,7 +1049,7 @@ def to(self, count: int) -> Range: def __rshift__(self, count: int) -> Range: return self.to(count) - def render_quantifier(self) -> StrGen: + def render_quantifier(self) -> TokenStream: if self.max_count is None: if not self.min_count: yield "*" @@ -1068,11 +1081,11 @@ def render_quantifier(self) -> StrGen: yield "}" - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: if self.max_count == 0: return - yield from self.contents.render() + yield from self.contents.render(context) if self.min_count == self.max_count == 1: return @@ -1123,13 +1136,13 @@ def case_insensitive(self) -> RegexPattern: contents = self.contents.case_insensitive() if self.contents else None return NamedPattern(self.name, contents) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield "(?P" if self.contents: yield "<" yield self.name yield ">" - yield from self.contents.render() + yield from self.contents.render(context) else: yield "=" yield self.name @@ -1193,13 +1206,13 @@ def __init__( self.true_option = respect_priority(true_option, Option.priority + 1) self.false_option = respect_priority(false_option, Option.priority + 1) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield "(?(" yield str(self.group) yield ")" - yield from self.true_option.render() + yield from self.true_option.render(context) yield "|" - yield from self.false_option.render() + yield from self.false_option.render(context) yield ")" def apply(self, fn: Processor) -> Self: @@ -1222,7 +1235,7 @@ def __init__(self, contents: str) -> None: def to(self, other: str | Literal | Chars) -> Chars: return Chars([self]).to(other) - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield re.escape(self.contents) def apply(self, fn: Processor) -> Self: @@ -1236,7 +1249,7 @@ class UnescapedLiteral(Literal): """ - def render(self) -> StrGen: + def render(self, context: Context) -> TokenStream: yield str(self.contents) diff --git a/test/test_creation.py b/test/test_creation.py index 6397ecd..f417b64 100644 --- a/test/test_creation.py +++ b/test/test_creation.py @@ -54,4 +54,4 @@ def test_flags(self): def test_that_render_on_regex_pattern_is_not_implemented_i_know_this_is_stupid_but_still( self, ): - assert RegexPattern().render() == NotImplemented + assert RegexPattern().render(RegexPattern.default_context) == NotImplemented From 9cd14de7cc1397583cce3eadc69a3342a3544b0c Mon Sep 17 00:00:00 2001 From: Dmitry Gritsenko Date: Mon, 4 Dec 2023 09:34:22 +0400 Subject: [PATCH 2/4] feat: some ws integration tweaks --- README.md | 22 +++++++++++----------- rgx/__init__.py | 9 ++++++++- rgx/entities.py | 19 +++++++++++++++---- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7a364d2..c8f3ed8 100644 --- a/README.md +++ b/README.md @@ -496,18 +496,18 @@ unicode_meta.NON_DIGIT = PROPERTY("Nd") ## Extending You can extend generation by subclassing one of the classes of `rgx.entities` module. -The one neccessary method to provide is `.render(self)`. It should return an iterable of strings (e.g. `["something"]`). +The one neccessary method to provide is `.render(self, context: rgx.Context)`. It should return an iterable of strings (e.g. `["something"]`). Built-in components (and this section) are using generators for that purpose, but you're free to choose whatever works for you. For example, if you want to render a PCRE accept control verb - `(*ACCEPT)`, you can do it like this: ```python from rgx.entities import RegexPattern, Concat -from rgx import pattern +from rgx import pattern, Context from typing import Iterable class Accept(RegexPattern): - def render(self) -> Iterable[str]: + def render(self, context: Context) -> Iterable[str]: yield "(*ACCEPT)" @@ -525,7 +525,7 @@ Or like this: ```python from rgx.entities import RegexPattern, Concat -from rgx import pattern +from rgx import pattern, Context from typing import Iterable @@ -533,12 +533,12 @@ class Accept(RegexPattern): def __init__(self, accepted_pattern: RegexPattern): self.accepted_pattern = accepted_pattern - def render(self) -> Iterable[str]: - yield from accepted_pattern.render() + def render(self, context: Context) -> Iterable[str]: + yield from accepted_pattern.render(context) yield "(*ACCEPT)" -def accept(self) -> Concat: +def accept(self) -> Accept: return Accept(self) RegexPattern.accept = accept @@ -552,7 +552,7 @@ If your extension has to rely on some priority, you can use `respect_priority` f Let's say you want to add a `x/y` operation, which does something (wow) and has prority between `a|b` and `ab` — so `a|b/cd` is the same as `a|(?:b/(?:cd))`. ```python -from rgx.entities import RegexPattern, Concat, Option, AnyRegexPattern, respect_priority, pattern +from rgx.entities import RegexPattern, Concat, Option, AnyRegexPattern, respect_priority, pattern, Context from typing import Iterable class MagicSlash(RegexPattern): @@ -562,10 +562,10 @@ class MagicSlash(RegexPattern): self.left = respect_priority(left, self.priority) # you need to wrap all parts of your expression in respect_priority() self.right = respect_priority(right, self.priority) # ...and pass your expression priority as a second argument - def render(self) -> Iterable[str]: - yield from self.left.render() + def render(self, context: Context) -> Iterable[str]: + yield from self.left.render(context) yield "/" - yield from self.right.render() + yield from self.right.render(context) def slash(self, other: AnyRegexPattern) -> MagicSlash: # AnyRegexPattern is either a RegexPattern instance or a Python literal diff --git a/rgx/__init__.py b/rgx/__init__.py index aa604db..56dee0a 100644 --- a/rgx/__init__.py +++ b/rgx/__init__.py @@ -1 +1,8 @@ -from .entities import pattern, NamedPattern as named, group_reference as reference, ConditionalPattern as conditional, char_range \ No newline at end of file +from .entities import ( + pattern, + NamedPattern as named, + group_reference as reference, + ConditionalPattern as conditional, + char_range as char_range, + Context as Context, +) diff --git a/rgx/entities.py b/rgx/entities.py index cfd3394..eb05868 100644 --- a/rgx/entities.py +++ b/rgx/entities.py @@ -6,6 +6,7 @@ Tuple, List, Union, + cast, overload, Iterable, Sequence, @@ -94,10 +95,12 @@ def respect_priority(contents: AnyRegexPattern, other_priority: int) -> RegexPat if isinstance(contents, NonCapturingGroup): return respect_priority(contents.contents, other_priority) - if contents.priority < other_priority: - return NonCapturingGroup(contents) - - return contents + return cast( + RegexPattern, + contents.respect_priority( + _PriorityShell(other_priority), + ), + ) class RegexPattern(BaseRenderable): @@ -105,6 +108,9 @@ class RegexPattern(BaseRenderable): optimized = False default_context: Context = Context(Renderer()) + def wrap(self): + return NonCapturingGroup(self) + def render(self, context: Context) -> TokenStream: """ Internal method @@ -453,6 +459,11 @@ def named(self, name: str) -> NamedPattern: return NamedPattern(name, self) +class _PriorityShell(RegexPattern): + def __init__(self, priority: int) -> None: + self.priority = priority + + class GroupBase(RegexPattern): contents: RegexPattern prefix: str From 97d4b0828a2c6004227c998de3de611ddff37e1d Mon Sep 17 00:00:00 2001 From: Dmitry Gritsenko Date: Mon, 4 Dec 2023 09:40:54 +0400 Subject: [PATCH 3/4] fix: unused imports and better respect_priority --- rgx/entities.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/rgx/entities.py b/rgx/entities.py index eb05868..ff5f63c 100644 --- a/rgx/entities.py +++ b/rgx/entities.py @@ -8,14 +8,12 @@ Union, cast, overload, - Iterable, Sequence, TYPE_CHECKING, ) from wordstreamer import Context, Renderable as BaseRenderable, Renderer, TokenStream -from wordstreamer.core import Marker -from wordstreamer.internal_types import Payload +from wordstreamer.internal_types import Comparator, Payload if TYPE_CHECKING: from typing import Literal as LiteralType, Self @@ -90,14 +88,9 @@ def pattern(literal: AnyRegexPattern, escape: bool = True) -> RegexPattern: def respect_priority(contents: AnyRegexPattern, other_priority: int) -> RegexPattern: - contents = pattern(contents) - - if isinstance(contents, NonCapturingGroup): - return respect_priority(contents.contents, other_priority) - return cast( RegexPattern, - contents.respect_priority( + pattern(contents).respect_priority( _PriorityShell(other_priority), ), ) @@ -499,6 +492,14 @@ def optimize(self) -> RegexPattern: return self.contents.optimize() return super().optimize() + def respect_priority( + self, + operation: BaseRenderable, + comparator: Comparator | None = None, + side: str = "none", + ) -> BaseRenderable: + return self.contents.respect_priority(operation, comparator, side) + class Lookahead(GroupBase): prefix = "?=" From 808fb6e45620f54c6ed61858ae6802050dcaee90 Mon Sep 17 00:00:00 2001 From: Dmitry Gritsenko Date: Mon, 4 Dec 2023 10:47:42 +0400 Subject: [PATCH 4/4] fix: bump wordstreamer version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index efc9520..ad313b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ keywords = ["regex", "regexp", "regular expressions"] [tool.poetry.dependencies] python = "^3.7" -wordstreamer = "^0.1.2" +wordstreamer = "^0.1.3" [tool.poetry.dev-dependencies]