From ef6806da5186c9edf9473a35f1f2b3884cc8117f Mon Sep 17 00:00:00 2001 From: Ruihang Lai Date: Sat, 16 Nov 2024 13:29:17 -0500 Subject: [PATCH] [Docs] README revamp and documentation initialization This PR sets up the XGrammar documentation and reorganizes the README. --- .github/workflows/documentation.yaml | 42 ++++ README.md | 199 +++++------------- docs/.gitignore | 1 + docs/Makefile | 20 ++ docs/README.md | 30 +++ .../img/mlc-logo-with-text-landscape.svg | 87 ++++++++ docs/conf.py | 102 +++++++++ docs/index.rst | 32 +++ docs/make.bat | 35 +++ docs/requirements.txt | 8 + docs/start/install.rst | 95 +++++++++ docs/start/quick_start.rst | 17 ++ docs/tutorials/json_generation.rst | 199 ++++++++++++++++++ scripts/build_site.sh | 10 + scripts/gh_deploy_site.sh | 20 ++ 15 files changed, 745 insertions(+), 152 deletions(-) create mode 100644 .github/workflows/documentation.yaml create mode 100644 docs/.gitignore create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/_static/img/mlc-logo-with-text-landscape.svg create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/start/install.rst create mode 100644 docs/start/quick_start.rst create mode 100644 docs/tutorials/json_generation.rst create mode 100644 scripts/build_site.sh create mode 100644 scripts/gh_deploy_site.sh diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml new file mode 100644 index 0000000..9c5e1ce --- /dev/null +++ b/.github/workflows/documentation.yaml @@ -0,0 +1,42 @@ +name: Build Docs + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test_linux: + name: Deploy Docs + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Configuring build Environment + run: | + sudo apt-get update + python -m pip install -U pip wheel + + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.0' + + - name: Installing dependencies + run: | + python -m pip install -r docs/requirements.txt + gem install jekyll jekyll-remote-theme + + - name: Deploying on GitHub Pages + if: github.ref == 'refs/heads/main' + run: | + git remote set-url origin https://x-access-token:${{ secrets.MLC_GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY + git config --global user.email "mlc-gh-actions-bot@nomail" + git config --global user.name "mlc-gh-actions-bot" + ./scripts/gh_deploy_site.sh diff --git a/README.md b/README.md index 98d9701..252d1a2 100644 --- a/README.md +++ b/README.md @@ -1,152 +1,47 @@ -## XGrammar - -Cross-platform Near-zero Overhead Grammar-guided Generation for LLMs - -- G1: Universal: support any common tokenizer, and common grammar -- G2: Efficient: Grammar should not cause additional burden for generation -- G3: Cross-platform: pure C++ impl, portable for every platform, construct E2E pipeline on every platform -- G4: Easy to understand and maintain - -This project is under active development. - -### Compile and Install - -```bash -# install requirements -sudo apt install cmake -python3 -m pip install ninja pybind11 torch - -# build XGrammar core and Python bindings -# see scripts/config.cmake for configuration options -mkdir build -cd build -# specify your own CUDA architecture -cmake .. -G Ninja -DXGRAMMAR_CUDA_ARCHITECTURES=89 -ninja - -# install Python package -cd ../python -python3 -m pip install . - -# optional: add the python directory to PATH -echo "export PATH=\$PATH:$(pwd)" >> ~/.bashrc -``` - -### Python Usage Guide - -#### Step 1:Construction of grammar - -```python -from xgrammar import BNFGrammar, BuiltinGrammar -from pydantic import BaseModel - -# Method 1: provide a GBNF grammar string -# For specification, see https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md -gbnf_grammar = """ -root ::= (expr "=" term "\n")+ -expr ::= term ([-+*/] term)* -term ::= num | "(" expr ")" -num ::= [0-9]+ -""" - -gbnf_grammar = BNFGrammar(gbnf_grammar) - -# Method 2: unconstrained JSON -json_grammar = BuiltinGrammar.json() - -# Method 3: provide a Pydantic model -class Person(BaseModel): - name: str - age: int -json_schema_pydantic = BuiltinGrammar.json_schema(Person) - -# Method 4: provide a JSON schema string -person_schema = { - "title": "Person", - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "age": { - "type": "integer", - } - }, - "required": ["name", "age"] -} -json_schema_str = BuiltinGrammar.json_schema(json.dumps(person_schema)) -``` - -#### Step 2: Compiling grammars -The compilation is multi-threaded and cached for every grammar. - -```python -from xgrammar import TokenizerInfo, CachedGrammarCompiler, CompiledGrammar, GrammarMatcher -from transformers import AutoTokenizer - -# 1. Convert huggingface tokenizer to TokenizerInfo (once per model) -tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") -tokenizer_info = TokenizerInfo.from_huggingface(tokenizer) -``` - -Method 1: Use CachedGrammarCompiler to avoid compile grammar multiple times -```python -# 2. Construct CachedGrammarCompiler (once per model) -compiler = CachedGrammarCompiler(tokenizer_info, max_threads=8) - -# 3. Fetch CompiledGrammar and construct GrammarMatcher (once per request) -compiled_grammar = compiler.compile_json_schema(json_schema_str) -matcher = GrammarMatcher(compiled_grammar) -``` - -Method 2: Compile grammar directly -```python -# 2. Construct CompiledGrammar directly (once per grammar) -compiled_grammar = CompiledGrammar(grammar, tokenizer_info, max_threads=8) - -# 3. Construct GrammarMatcher (once per request) -matcher = GrammarMatcher(compiled_grammar) -``` - -#### Step 3: Grammar-guided generation - -For single-batch generation: -```python -import torch - -token_bitmask = GrammarMatcher.allocate_token_bitmask(matcher.vocab_size) -while True: - logits = LLM.inference() # logits is a tensor of shape (vocab_size,) on GPU - matcher.fill_next_token_bitmask(logits, token_bitmask) - GrammarMatcher.apply_token_bitmask_inplace(logits, token_bitmask) - - prob = torch.softmax(logits, dim=-1) # get probability from logits - next_token_id = Sampler.sample(logits) # use your own sampler - - matcher.accept_token(next_token_id) - if matcher.is_terminated(): # or your own termination condition - break -``` - -For multi-batch generation: -```python -import torch - -matchers: List[GrammarMatcher] # The grammar matcher for every request -token_bitmasks = GrammarMatcher.allocate_token_bitmask(matchers[0].vocab_size, batch_size) -while True: - logits = LLM.inference() # logits is a tensor of shape (batch_size, vocab_size) on GPU - # This for loop is parallelizable using threading.Thread. But estimate the overhead in your - # engine. - for i in range(len(matchers)): - matchers[i].fill_next_token_bitmask(token_bitmasks, i) - GrammarMatcher.apply_token_bitmask_inplace(logits, token_bitmasks) - - prob = torch.softmax(logits, dim=-1) # get probability from logits - next_token_ids = Sampler.sample(logits) # use your own sampler - - for i in range(len(matchers)): - matchers[i].accept_token(next_token_ids[i]) - if matchers[i].is_terminated(): # or your own termination condition - requests[i].terminate() -``` +
+ +# XGrammar + +[![Documentation](https://img.shields.io/badge/docs-latest-green)](https://xgrammar.mlc.ai/docs/) +[![License](https://img.shields.io/badge/license-apache_2-blue)](https://github.com/mlc-ai/xgrammar/blob/main/LICENSE) + +**Flexible, Portable and Fast Structured Generation** + + +[Get Started](#get-started) | [Documentation](https://xgrammar.mlc.ai/docs/) + +
+ +## Overview + +XGrammar is open-source solution for flexible, portable, and fast structured generations, +aiming at bring flexible zero-overhead structure generation everywhere. +It supports general context-free grammar to enable a broad range of structures +while bringing careful system optimizations to enable fast executions at tens of microseconds level. +XGrammar features a minimal and portable c++ backend that can be easily integrated into multiple environments and frameworks, +and is co-designed with the LLM inference engine, which enables outperformance over existing structure +generation solutions and enables zero-overhead structured generation in LLM inference. + + + + +## Get Started + +Please visit our [documentation](https://xgrammar.mlc.ai/docs/) to get started with XGrammar. +- [Installation](https://xgrammar.mlc.ai/docs/installation) +- [Quick start](https://xgrammar.mlc.ai/docs/quick_start) + + + diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..69fa449 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..3449de1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= python -m sphinx +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..29aed7f --- /dev/null +++ b/docs/README.md @@ -0,0 +1,30 @@ +# XGrammar Documentation + +The documentation was built upon [Sphinx](https://www.sphinx-doc.org/en/master/). + +## Dependencies + +Run the following command in this directory to install dependencies first: + +```bash +pip3 install -r requirements.txt +``` + +## Build the Documentation + +Then you can build the documentation by running: + +```bash +make html +``` + +## View the Documentation + +Run the following command to start a simple HTTP server: + +```bash +cd _build/html +python3 -m http.server +``` + +Then you can view the documentation in your browser at `http://localhost:8000` (the port can be customized by appending ` -p PORT_NUMBER` in the python command above). diff --git a/docs/_static/img/mlc-logo-with-text-landscape.svg b/docs/_static/img/mlc-logo-with-text-landscape.svg new file mode 100644 index 0000000..e122d32 --- /dev/null +++ b/docs/_static/img/mlc-logo-with-text-landscape.svg @@ -0,0 +1,87 @@ + +image/svg+xml + + + + + + + + + + + + + + + + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..efc890c --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +import os +import sys + +import tlcpack_sphinx_addon + +# -- General configuration ------------------------------------------------ + +sys.path.insert(0, os.path.abspath("../python")) +sys.path.insert(0, os.path.abspath("../")) +autodoc_mock_imports = ["torch"] + +# General information about the project. +project = "XGrammar" +author = "XGrammar Contributors" +copyright = "2024, %s" % author + +# Version information. + +version = "0.1.0" +release = "0.1.0" + +extensions = [ + "sphinx_tabs.tabs", + "sphinx_toolbox.collapse", + "sphinxcontrib.httpdomain", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_reredirects", +] + +redirects = {} + +source_suffix = [".rst"] + +language = "en" + +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ---------------------------------------------- + +# The theme is set by the make target +import sphinx_rtd_theme + +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +templates_path = [] + +html_static_path = [] + +footer_copyright = "© 2024 XGrammar" +footer_note = " " + +# html_logo = "_static/img/mlc-logo-with-text-landscape.svg" + +html_theme_options = { + "logo_only": False, +} + +header_links = [ + ("Home", "https://xgrammar.mlc.ai/"), + ("Github", "https://github.com/mlc-ai/xgrammar"), +] + +header_dropdown = { + "name": "Other Resources", + "items": [ + ("MLC Course", "https://mlc.ai/"), + ("MLC Blog", "https://blog.mlc.ai/"), + ("MLC LLM", "https://llm.mlc.ai/"), + ("Web LLM", "https://webllm.mlc.ai/"), + ("SGLang", "https://github.com/sgl-project/sglang"), + ], +} + +html_context = { + "footer_copyright": footer_copyright, + "footer_note": footer_note, + "header_links": header_links, + "header_dropdown": header_dropdown, + "display_github": True, + "github_user": "mlc-ai", + "github_repo": "xgrammar", + "github_version": "main/docs/", + "theme_vcs_pageview_mode": "edit", + # "header_logo": "/path/to/logo", + # "header_logo_link": "", + # "version_selecter": "", +} + + +# add additional overrides +templates_path += [tlcpack_sphinx_addon.get_templates_path()] +html_static_path += [tlcpack_sphinx_addon.get_static_path()] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..daef3a0 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,32 @@ +👋 Welcome to XGrammar +====================== + +`GitHub `_ + + + +XGrammar is open-source solution for flexible, portable, and fast structured generations. +The mission of this project is to bring flexible zero-overhead structure generation everywhere. + +Quick Start +----------- + +Check out :ref:`quick-start` for quick start examples of using MLC LLM. + + +.. toctree:: + :maxdepth: 1 + :caption: Get Started + :hidden: + + start/install.rst + start/quick_start.rst + +.. toctree:: + :maxdepth: 1 + :caption: Tutorials + :hidden: + + tutorials/json_generation.rst + .. tutorials/backend_integration.rst .. TODO + .. tutorials/web_sdk.rst .. TODO diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..32bb245 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..2658857 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,8 @@ +sphinx-tabs == 3.4.1 +sphinx-rtd-theme +sphinx == 5.2.3 +sphinx-toolbox == 3.4.0 +tlcpack-sphinx-addon==0.2.2 +sphinxcontrib_httpdomain==1.8.1 +sphinxcontrib-napoleon==0.7 +sphinx-reredirects==0.1.2 diff --git a/docs/start/install.rst b/docs/start/install.rst new file mode 100644 index 0000000..f69cfd8 --- /dev/null +++ b/docs/start/install.rst @@ -0,0 +1,95 @@ +.. _installation: + +Installation +============ + +.. contents:: Table of Contents + :local: + :depth: 2 + +XGrammar Python Package can be installed directly from a prebuilt developer package, +or built from source. + + +.. _installation_prebuilt_package: + +Option 1. Prebuilt Package +-------------------------- + +We provide nightly built pip wheels for XGrammar via pip. +Select your operating system/compute platform and run the command in your terminal: + +.. note:: + ❗ Whenever using Python, it is highly recommended to use **conda** to manage an isolated Python environment to avoid missing dependencies, incompatible versions, and package conflicts. + Please make sure your conda environment has Python and pip installed. + +.. code-block:: bash + + conda activate your-environment + # TODO + python -m pip install ... + + +.. python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cpu mlc-ai-nightly-cpu + + +Then you can verify installation in command line: + +.. code-block:: bash + + python -c "import xgrammar; print(xgrammar)" + # Prints out: + +| + +.. _installation_build_from_source: + +Option 2. Build from Source +--------------------------- + +We also provide options to build XGrammar from source. +This step is useful when you want to make modification or obtain a specific version of XGrammar. + + +**Step 1. Set up build dependency.** To build from source, you need to ensure that the following build dependencies are satisfied: + +* CMake >= 3.18 +* Git + +.. code-block:: bash + + # make sure to start with a fresh environment + conda env remove -n xgrammar-venv + # create the conda environment with build dependency + conda create -n xgrammar-venv -c conda-forge \ + "cmake>=3.18" \ + git \ + python=3.11 + # enter the build environment + conda activate xgrammar-venv + # install Python dependency + python3 -m pip install ninja pybind11 torch + + +**Step 2. Configure, build and install.** A standard git-based workflow is recommended to download XGrammar. + +.. code-block:: bash + + # 1. clone from GitHub + git clone --recursive https://github.com/mlc-ai/xgrammar.git && cd xgrammar + # 2. build XGrammar core and Python bindings + mkdir build && cd build + cmake .. -G Ninja + ninja + # 3. install the Python package + cd ../python + python3 -m pip install . + # 4. (optional) add the python directory to PATH + echo "export PATH=$(pwd):$PATH" >> ~/.bashrc + +**Step 3. Validate installation.** You may validate if XGrammar is compiled successfully in command line. +You should see the path you used to build from source with: + +.. code:: bash + + python -c "import xgrammar; print(xgrammar)" diff --git a/docs/start/quick_start.rst b/docs/start/quick_start.rst new file mode 100644 index 0000000..dd416c4 --- /dev/null +++ b/docs/start/quick_start.rst @@ -0,0 +1,17 @@ +.. _quick-start: + +Quick Start +=========== + +Example +------- + +TODO + + +What to Do Next +--------------- + +- Check out :ref:`tutorial-json-generation` for the usage guide of XGrammar. +- Report any problem or ask any question: open new issues in our `GitHub repo `_. + diff --git a/docs/tutorials/json_generation.rst b/docs/tutorials/json_generation.rst new file mode 100644 index 0000000..bd17b35 --- /dev/null +++ b/docs/tutorials/json_generation.rst @@ -0,0 +1,199 @@ +.. _tutorial-json-generation: + +JSON Generation +==================== + + +Install XGrammar +~~~~~~~~~~~~~~~~ + +:ref:`XGrammar ` is available via pip. +It is always recommended to install it in an isolated conda virtual environment. + + +.. _tutorial-json-generation-construct-grammar: + +Step 1: Construct a grammar +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +XGrammar provides the following methods to flexibly construct a grammar. +You can choose from any of the following ways to construct grammar from different sources. + +**Method 1: Construct with a GBNF string.** +The GBNF (GGML BNF) specification is available +`here `__. + + +.. code:: python + + from xgrammar import BNFGrammar + + # Method 1: Construct with a GBNF string. + gbnf_grammar = """ + root ::= (expr "=" term "\n")+ + expr ::= term ([-+*/] term)* + term ::= num | "(" expr ")" + num ::= [0-9]+ + """ + gbnf_grammar = BNFGrammar(gbnf_grammar) + + +**Method 2: Use the builtin JSON grammar.** + +.. code:: python + + from xgrammar import BuiltinGrammar + + # Method 2: Use the builtin JSON grammar. + json_grammar = BuiltinGrammar.json() + + +**Method 3: Construct from a Pydantic model.** + +.. code:: python + + from xgrammar import BuiltinGrammar + from pydantic import BaseModel + + # Method 3: Construct from a Pydantic model. + class Person(BaseModel): + name: str + age: int + json_schema_pydantic = BuiltinGrammar.json_schema(Person) + +**Method 4: Construct from a JSON schema string.** + +.. code:: python + + import json + from xgrammar import BuiltinGrammar + + # Method 4: Construct from a JSON schema string. + person_schema = { + "title": "Person", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "age": { + "type": "integer", + } + }, + "required": ["name", "age"] + } + json_schema_str = BuiltinGrammar.json_schema(json.dumps(person_schema)) + + +.. _tutorial-json-generation-compile-grammar: + +Step 2: Compile grammars +~~~~~~~~~~~~~~~~~~~~~~~~ + +XGrammar supports multi-threaded grammar compilation. +In addition, we provide a cache in the grammar compiler to avoid +repetitive compilation for a same grammar. + +To initialize a grammar compiler, we first need to obtain +information from the target tokenizer. +As an example, here we use the Llama-3 model tokenizer. + +.. code:: python + + from xgrammar import TokenizerInfo + from transformers import AutoTokenizer + + # Obtain XGrammar TokenizerInfo from HuggingFace tokenizer (once per model). + tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + tokenizer_info = TokenizerInfo.from_huggingface(tokenizer) + + +Now we can create a grammar compiler :class:`xgrammar.CachedGrammarCompiler` +and compile the constructed grammar. +Notably, we cache all the compiled grammars, so each grammar will be compiled +at most once. + +.. code:: python + + from xgrammar import CachedGrammarCompiler + + # Construct CachedGrammarCompiler. + compiler = CachedGrammarCompiler(tokenizer_info, max_threads=8) + # Compiler the grammar. + compiled_grammar = compiler.compile_json_schema(json_schema_str) + + +Alternatively, we also provide the no-cache compiler, which does not +cache grammars after compilation. + +.. code:: python + + from xgrammar import CompiledGrammar + + # Construct CompiledGrammar (no cache). + compiler = CompiledGrammar(tokenizer_info, max_threads=8) + # Compiler the grammar. + compiled_grammar = compiler.compile_json_schema(json_schema_str) + + + +.. _tutorial-json-generation-grammar-guided-generation: + +Step 3: Grammar-guided generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We can now use the compiled grammar in structured generation. +Below are two pseudo Python code examples for +single-request generation and batch-request generation respectively. + +**Single-request generation.** + +.. code:: python + + from xgrammar import GrammarMatcher + import torch + + # Create a grammar matcher from the compiled grammar. + matcher = GrammarMatcher(compiled_grammar) + + token_bitmask = GrammarMatcher.allocate_token_bitmask(matcher.vocab_size) + while True: + logits = LLM.inference() # logits is a tensor of shape (vocab_size,) on GPU + matcher.fill_next_token_bitmask(logits, token_bitmask) + GrammarMatcher.apply_token_bitmask_inplace(logits, token_bitmask) + + prob = torch.softmax(logits, dim=-1) # get probability from logits + next_token_id = Sampler.sample(logits) # use your own sampler + + matcher.accept_token(next_token_id) + if matcher.is_terminated(): # or your own termination condition + break + + +**Batch-request generation.** + +.. code:: python + + from xgrammar import GrammarMatcher + import torch + + batch_size = 10 + # Create a grammar matcher for each request. + matchers = [GrammarMatcher(compiled_grammar) for i in range(batch_size)] + token_bitmasks = GrammarMatcher.allocate_token_bitmask(matchers[0].vocab_size, batch_size) + while True: + logits = LLM.inference() # logits is a tensor of shape (batch_size, vocab_size) on GPU + # This for loop is parallelizable using threading.Thread. But estimate the overhead in your + # engine. + for i in range(batch_size): + matchers[i].fill_next_token_bitmask(token_bitmasks, i) + GrammarMatcher.apply_token_bitmask_inplace(logits, token_bitmasks) + + prob = torch.softmax(logits, dim=-1) # get probability from logits + next_token_ids = Sampler.sample(logits) # use your own sampler + + for i in range(batch_size): + matchers[i].accept_token(next_token_ids[i]) + if matchers[i].is_terminated(): # or your own termination condition + requests[i].terminate() + diff --git a/scripts/build_site.sh b/scripts/build_site.sh new file mode 100644 index 0000000..062f809 --- /dev/null +++ b/scripts/build_site.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -euxo pipefail + +export PYTHONPATH=$PWD/python +cd docs && make html && cd .. + +cd site && jekyll b && cd .. + +rm -rf site/_site/docs +cp -r docs/_build/html site/_site/docs diff --git a/scripts/gh_deploy_site.sh b/scripts/gh_deploy_site.sh new file mode 100644 index 0000000..1b21c52 --- /dev/null +++ b/scripts/gh_deploy_site.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# NOTE: this script is triggered by github action automatically +# when megred into main + +set -euxo pipefail + +scripts/build_site.sh + +git fetch +git checkout -B gh-pages origin/gh-pages +rm -rf docs .gitignore +mkdir -p docs +cp -rf site/_site/* docs +touch docs/.nojekyll + +DATE=`date` +git add docs && git commit -am "Build at ${DATE}" +git push origin gh-pages +git checkout main && git submodule update +echo "Finish deployment at ${DATE}"