diff --git a/guidance/_grammar.py b/guidance/_grammar.py index 7500b54a3..8b63df031 100644 --- a/guidance/_grammar.py +++ b/guidance/_grammar.py @@ -966,10 +966,6 @@ def _re_with_temperature(grammar, temperature, visited_set): # return ModelVariable(name) -def active_role_end() -> ModelVariable: - return ModelVariable("active_role_end") - - def eos_token() -> ModelVariable: return ModelVariable("eos_token") diff --git a/guidance/library/_gen.py b/guidance/library/_gen.py index 71b969e8c..775843103 100644 --- a/guidance/library/_gen.py +++ b/guidance/library/_gen.py @@ -8,7 +8,7 @@ from ._any_char import any_char from .._grammar import capture from ._regex import regex as regex_grammar -from .._grammar import token_limit, eos_token, active_role_end, with_temperature +from .._grammar import token_limit, eos_token, with_temperature from ._tool import Tool from ._block import block @@ -129,7 +129,7 @@ def gen( if isinstance(stop, str): stop = [stop] if regex is None: - stop = stop + [select([eos_token(), active_role_end()])] + stop = stop + [eos_token()] if stop_regex is None: stop_regex = [] diff --git a/guidance/library/_role.py b/guidance/library/_role.py index 843ed904e..737d86a56 100644 --- a/guidance/library/_role.py +++ b/guidance/library/_role.py @@ -1,27 +1,16 @@ from .._guidance import guidance -from ._block import block +from ._block import ContextBlock from ._set_attribute import set_attribute -nodisp_start = "<||_#NODISP_||>" -nodisp_end = "<||_/NODISP_||>" -span_start = "<||_html:_||>" -span_end = "<||_html:_||>" -@guidance -def role_opener(lm, role_name, **kwargs): - indent = getattr(lm, "indent_roles", True) +class RoleBlock(ContextBlock): + def __init__(self, role_name, opener, closer, name=None): + super().__init__(opener, closer, name=name) + self.role_name = role_name - # Block start container (centers elements) - if indent: - lm += f"<||_html:
{role_name.lower()}
_||>" - - # Start of either debug or HTML no disp block - if indent: - lm += nodisp_start - else: - lm += span_start - +@guidance +def role_opener(lm, role_name, **kwargs): # TODO [HN]: Temporary change while I instrument chat_template in transformers only. # Eventually have all models use chat_template. if hasattr(lm, "get_role_start"): @@ -32,48 +21,25 @@ def role_opener(lm, role_name, **kwargs): raise Exception( f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?" ) - - # End of either debug or HTML no disp block - if indent: - lm += nodisp_end - else: - lm += span_end - return lm @guidance def role_closer(lm, role_name, **kwargs): - indent = getattr(lm, "indent_roles", True) - # Start of either debug or HTML no disp block - if indent: - lm += nodisp_start - else: - lm += span_start - # TODO [HN]: Temporary change while I instrument chat_template in transformers only. # Eventually have all models use chat_template. if hasattr(lm, "get_role_end"): lm += lm.get_role_end(role_name) elif hasattr(lm, "chat_template"): lm += lm.chat_template.get_role_end(role_name) - - # End of either debug or HTML no disp block - if indent: - lm += nodisp_end - else: - lm += span_end - - # End of top container - if indent: - lm += "<||_html:
_||>" - return lm + # TODO HN: Add a docstring to better describe arbitrary role functions def role(role_name, text=None, **kwargs): if text is None: - return block( + return RoleBlock( + role_name=role_name, opener=role_opener(role_name, **kwargs), closer=role_closer(role_name, **kwargs), ) diff --git a/guidance/models/_model.py b/guidance/models/_model.py index bdde3f2c0..bbe44adb2 100644 --- a/guidance/models/_model.py +++ b/guidance/models/_model.py @@ -11,7 +11,7 @@ from pprint import pprint -from typing import Dict, TYPE_CHECKING +from typing import Dict, Optional, TYPE_CHECKING import numpy as np @@ -38,6 +38,8 @@ "Failed to load guidance.cpp, falling back to Python mirror implementations..." ) from .. import _cpp as cpp + +from ._model_state import ModelState, Text, Object, RoleOpener, RoleCloser from ._guidance_engine_metrics import GuidanceEngineMetrics from .._utils import softmax, CaptureEvents from .._parser import EarleyCommitParser, Parser @@ -62,14 +64,6 @@ # define some constants we will reuse many times _null_grammar = string("") -format_pattern = re.compile(r"<\|\|_.*?_\|\|>", flags=re.DOTALL) -nodisp_pattern = re.compile( - r"<\|\|_#NODISP_\|\|>.*?<\|\|_/NODISP_\|\|>", flags=re.DOTALL -) -html_pattern = re.compile(r"<\|\|_html:(.*?)_\|\|>", flags=re.DOTALL) -image_pattern = re.compile(r"<\|_image:(.*?)\|>") - - class EngineCallResponse: @@ -850,57 +844,19 @@ def __init__(self, engine, echo=True, **kwargs): self.echo = echo self.token_count = 0 # tracks how many tokens our byte state represents self.max_display_rate = 0.2 # this controls how frequently we are allowed to redraw the display (in seconds) - self.opened_blocks = {} # what context blocks have been opened but not closed + self.opened_blocks: dict["ContextBlock", tuple[int, Optional(Object)]] = {} # what context blocks have been opened but not closed # self.compute_log_probs = compute_log_probs # private attributes self._variables = {} # these are the state variables stored with the model self._variables_log_probs = {} # these are the state variables stored with the model self._cache_state = {} # mutable caching state used to save computation - self._state = "" # the current bytes that represent the state of the model + self._state = ModelState() # the current bytes that represent the state of the model self._event_queue = None # TODO: these are for streaming results in code, but that needs implemented self._event_parent = None self._last_display = 0 # used to track the last display call to enable throttling self._last_event_stream = 0 # used to track the last event streaming call to enable throttling - @property - def active_role_end(self): - """The default end patterns we should use for `gen` calls. - TODO: move this logic into the gen call...we can do with if we allow model_variables to run functions. - - These patterns are computed dynamically by the model object because they can depend on - what the current open roles are, which is something - """ - - # add any active non-empty role ends. Ignore role ends that are spaces - parts = [] - for _, role_end_str in self.opened_blocks.values(): - role_end_str = format_pattern.sub("", role_end_str) - if len(role_end_str) > 0 and not re.fullmatch(r"\s+", role_end_str): - parts.append(role_end_str) - - return select(parts) - - def _html(self): - """Generate HTML that displays the model object.""" - display_out = self._state - for context in reversed(self.opened_blocks): - display_out += self.opened_blocks[context][1] - display_out = html.escape(display_out) - display_out = nodisp_pattern.sub("", display_out) - display_out = html_pattern.sub(lambda x: html.unescape(x.group(1)), display_out) - display_out = image_pattern.sub( - lambda x: '', - display_out, - ) - display_out = ( - "
"
-            + display_out
-            + "
" - ) - return display_out def _send_to_event_queue(self, value): """For streaming in code. @@ -924,6 +880,7 @@ def copy(self): new_lm._variables = self._variables.copy() new_lm._variables_log_probs = self._variables_log_probs.copy() new_lm.opened_blocks = self.opened_blocks.copy() + new_lm._state = self._state.copy() # create a new clean event queue new_lm._event_queue = None # we start with no event queue because nobody is listening to us yet @@ -938,7 +895,7 @@ def copy(self): return new_lm - def _inplace_append(self, value, force_silent=False): + def _inplace_append(self, obj: Object, force_silent: bool = False): """This is the base way to add content to the current LM object that is being constructed. All updates to the model state should eventually use this function. @@ -951,7 +908,7 @@ def _inplace_append(self, value, force_silent=False): """ # update the byte state - self._state += str(value) # TODO: make _state to be bytes not a string + self._state.append(obj) # see if we should update the display if not force_silent: @@ -995,20 +952,30 @@ def reset(self, clear_variables=True): self._variables_log_probs = {} return self + def _html(self): + out = self._state._html() + for context in reversed(self.opened_blocks): + _, closer = self.opened_blocks[context] + if closer is not None: + out += closer._html() + return out + def _repr_html_(self): if ipython_is_imported: clear_output(wait=True) return self._html() - def _current_prompt(self): + def _current_prompt(self) -> str: """The current prompt in bytes (which is the state without the context close tags).""" - return format_pattern.sub("", self._state) + return str(self._state) def __str__(self): """A string representation of the current model object (that includes context closers).""" - out = self._current_prompt() + out = str(self._state) for context in reversed(self.opened_blocks): - out += format_pattern.sub("", self.opened_blocks[context][1]) + _, closer = self.opened_blocks[context] + if closer is not None: + out += str(closer) return out def __add__(self, value): @@ -1019,6 +986,8 @@ def __add__(self, value): value : guidance grammar The grammar used to extend the current model. """ + # Import in function to guard against circular import + from ..library._role import RoleBlock # create the new lm object we will return # (we need to do this since Model objects are immutable) @@ -1034,7 +1003,7 @@ def __add__(self, value): new_blocks.append(context) # mark this so we don't re-add when computing the opener or closer (even though we don't know the close text yet) - lm.opened_blocks[context] = (0, "") + lm.opened_blocks[context] = (0, None) # find what old blocks need to be removed old_blocks = [] @@ -1046,20 +1015,44 @@ def __add__(self, value): del lm.opened_blocks[context] # close any newly closed contexts - for (pos, close_text), context in old_blocks: + for (pos, closer), context in old_blocks: + assert closer is not None if context.name is not None: - lm._variables[context.name] = format_pattern.sub( - "", lm._state[pos:] - ) - lm += context.closer + # Capture + lm._variables[context.name] = str(lm._state[pos:]) + lm._inplace_append(closer) # apply any newly opened contexts (new from this object's perspective) for context in new_blocks: - lm += context.opener + if isinstance(context, RoleBlock): + # Apply the opener (a grammar) + with grammar_only(): + # TODO: be careful about the temp lm's display? (e.g. with silent()) + tmp = lm + context.opener + open_text = str(tmp._state[len(lm._state):]) # get the new state added by calling the opener + # Add that new state as text in a RoleOpener + lm._inplace_append( + RoleOpener( + role_name=context.role_name, + text=open_text, + indent=getattr(lm, "indent_roles", True) + ) + ) + else: + lm += context.opener with grammar_only(): + # TODO: be careful about the temp lm's display? (e.g. with silent()) tmp = lm + context.closer - close_text = tmp._state[len(lm._state):] # get the new state added by calling the closer - lm.opened_blocks[context] = (len(lm._state), close_text) + close_text = str(tmp._state[len(lm._state):]) # get the new state added by calling the closer + if isinstance(context, RoleBlock): + closer = RoleCloser( + role_name=context.role_name, + text=close_text, + indent=getattr(lm, "indent_roles", True) + ) + else: + closer = Text(text=close_text) + lm.opened_blocks[context] = (len(lm._state), closer) # clear out names that we override if context.name is not None: @@ -1075,7 +1068,9 @@ def __add__(self, value): # we have no embedded objects if len(parts) == 1: - lm._inplace_append(value) + lm._inplace_append( + Text(text=value) + ) out = lm # if we have embedded objects we have to convert the string to a grammar tree @@ -1119,7 +1114,8 @@ def __add__(self, value): ) # this flushes the display - out._inplace_append("") + # TODO: directly call _update_display? + out._inplace_append(Text(text="")) return out @@ -1147,9 +1143,8 @@ def __getitem__(self, key): else: for context in list(reversed(self.opened_blocks)): if context.name == key: - return format_pattern.sub( - "", self._state[self.opened_blocks[context][0] :] - ) + pos, _ = self.opened_blocks[context] + return str(self._state[pos:]) raise KeyError(f"Model does not contain the variable '{key}'") @@ -1328,11 +1323,19 @@ def _run_stateless(self, stateless_function, temperature=0.0, top_p=1.0, n=1): if len(chunk.new_bytes) > 0: generated_value += new_text if chunk.is_generated: - lm += f"<||_html:_||>" - lm += new_text - if chunk.is_generated: - lm += "<||_html:_||>" - + self._inplace_append( + Text( + text = new_text, + # TODO: this will be slightly wrong if we have a delayed byte string + probability = chunk.new_bytes_prob + ) + ) + else: + self._inplace_append( + Text( + text = new_text, + ) + ) # last_is_generated = chunk.is_generated if len(chunk.capture_groups) > 0: diff --git a/guidance/models/_model_state.py b/guidance/models/_model_state.py new file mode 100644 index 000000000..72cd02940 --- /dev/null +++ b/guidance/models/_model_state.py @@ -0,0 +1,129 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Optional, overload +import base64 +import html + + +@dataclass(frozen=True, slots=True) +class Object: + def _html(self) -> str: + raise NotImplementedError + + def __str__(self) -> str: + raise NotImplementedError + + +@dataclass(frozen=True, slots=True) +class Text(Object): + text: str + probability: Optional[float] = None + + def __str__(self) -> str: + return self.text + + def _html(self) -> str: + escaped_text = html.escape(self.text) + if self.probability is not None: + style = f"background-color: rgba({165*(1-self.probability) + 0}, {165*self.probability + 0}, 0, {0.15}); border-radius: 3px;" + return f"{escaped_text}" + return escaped_text + + +@dataclass(frozen=True, slots=True) +class Image(Object): + id: str + data: bytes + + def __str__(self) -> str: + raise NotImplementedError + + def _html(self) -> str: + return f"""""" + + +@dataclass(frozen=True, slots=True) +class RoleOpener(Object): + role_name: str + text: str + indent: bool + + def _html(self) -> str: + out = "" + if self.indent: + out += f"
{self.role_name.lower()}
" + else: + out += "" + out += self.text + out += "" + return out + + def __str__(self) -> str: + return self.text + + +@dataclass(frozen=True, slots=True) +class RoleCloser(Object): + role_name: str + text: str + indent: bool + + def _html(self) -> str: + out = "" + if self.indent: + out += "
" + else: + out += "" + out += self.text + out += "" + return out + + def __str__(self) -> str: + return self.text + + +class ModelState: + __slots__ = ["objects"] + + def __init__(self) -> None: + self.objects: list[Object] = [] + + def copy(self) -> ModelState: + new = ModelState() + new.objects = self.objects.copy() + return new + + @overload + def __getitem__(self, index: int) -> Object: ... + + @overload + def __getitem__(self, index: slice) -> ModelState: ... + + def __getitem__(self, index): + if isinstance(index, int): + return self.objects[index] + elif isinstance(index, slice): + new = ModelState() + new.objects = self.objects[index] + return new + raise TypeError(f"Index must be int or slice, not {type(index)}") + + def append(self, obj: Object) -> None: + self.objects.append(obj) + + def __len__(self) -> int: + return len(self.objects) + + # TODO: opened blocks + def __str__(self) -> str: + out = "" + for obj in self.objects: + out += str(obj) + return out + + def _html(self) -> str: + out = "
"
+        for obj in self.objects:
+            out += obj._html()
+        out += "
" + return out