forked from microsoft/graphrag
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'microsoft-main' into cutomize
- Loading branch information
Showing
39 changed files
with
955 additions
and
2,892 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
name: Python Smoke Tests | ||
on: | ||
push: | ||
branches: [main] | ||
pull_request: | ||
branches: [main] | ||
|
||
permissions: | ||
contents: read | ||
pull-requests: read | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | ||
# Only run the for the latest commit | ||
cancel-in-progress: true | ||
|
||
env: | ||
POETRY_VERSION: 1.8.3 | ||
|
||
jobs: | ||
python-ci: | ||
strategy: | ||
matrix: | ||
python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510 | ||
os: [ubuntu-latest, windows-latest] | ||
env: | ||
DEBUG: 1 | ||
GRAPHRAG_LLM_TYPE: "azure_openai_chat" | ||
GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding" | ||
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }} | ||
GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }} | ||
GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }} | ||
GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }} | ||
GRAPHRAG_CACHE_CONTAINER_NAME: "cicache" | ||
GRAPHRAG_CACHE_BASE_DIR": "cache" | ||
GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }} | ||
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }} | ||
GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL: ${{ secrets.GRAPHRAG_ENTITY_EXTRACTION_ENCODING_MODEL }} | ||
# We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4 | ||
GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4 | ||
GRAPHRAG_LLM_RPM: 270 # 1,080 / 4 | ||
GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4 | ||
GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4 | ||
GRAPHRAG_CHUNK_SIZE: 1200 | ||
GRAPHRAG_CHUNK_OVERLAP: 0 | ||
# Azure AI Search config | ||
AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }} | ||
AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }} | ||
|
||
runs-on: ${{ matrix.os }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- uses: dorny/paths-filter@v3 | ||
id: changes | ||
with: | ||
filters: | | ||
python: | ||
- 'graphrag/**/*' | ||
- 'poetry.lock' | ||
- 'pyproject.toml' | ||
- '**/*.py' | ||
- '**/*.toml' | ||
- '**/*.ipynb' | ||
- '.github/workflows/python*.yml' | ||
- 'tests/smoke/*' | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
|
||
- name: Install Poetry | ||
uses: abatilo/[email protected] | ||
with: | ||
poetry-version: $POETRY_VERSION | ||
|
||
- name: Install dependencies | ||
shell: bash | ||
run: | | ||
poetry self add setuptools wheel | ||
poetry run python -m pip install gensim | ||
poetry install | ||
- name: Build | ||
run: | | ||
poetry build | ||
- name: Install Azurite | ||
id: azuright | ||
uses: potatoqualitee/[email protected] | ||
|
||
- name: Smoke Test | ||
if: steps.changes.outputs.python == 'true' | ||
run: | | ||
poetry run poe test_smoke | ||
- uses: actions/upload-artifact@v4 | ||
if: always() | ||
with: | ||
name: smoke-test-artifacts-${{ matrix.python-version }}-${{ matrix.poetry-version }}-${{ runner.os }} | ||
path: tests/fixtures/*/output | ||
|
||
- name: E2E Test | ||
if: steps.changes.outputs.python == 'true' | ||
run: | | ||
./scripts/e2e-test.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"changes": [ | ||
{ | ||
"description": "Added default columns for vector store at create_pipeline_config. No change for other cases.", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Change json parsing error in the map step of global search to warning", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Fix Local Search breaking when loading Embeddings input. Defaulting overwrite to True as in the rest of the vector store config", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Fix json parsing when LLM returns faulty responses", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Fix missing community reports and refactor community context builder", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Fixed a bug that erased the vector database, added a new parameter to specify the config file path, and updated the documentation accordingly.", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Try parsing json before even repairing", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Update Prompt Tuning meta prompts with finer examples", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Update default entity extraction and gleaning prompts to reduce hallucinations", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "add encoding-model to entity/claim extraction config", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "add encoding-model to text chunking config", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "add user prompt to history-tracking llm", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "update config reader to allow for zero gleans", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "update config-reader to allow for empty chunk-by arrays", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "update history-tracking LLm to use 'assistant' instead of 'system' in output history.", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "use history argument in hash key computation; add history input to cache data", | ||
"type": "patch" | ||
} | ||
], | ||
"created_at": "2024-08-06T00:25:52+00:00", | ||
"version": "0.2.1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"changes": [ | ||
{ | ||
"description": "Add a check if there is no community record added in local search context", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Add sepparate workflow for Python Tests", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Docs updates", | ||
"type": "patch" | ||
}, | ||
{ | ||
"description": "Run smoke tests on 4o", | ||
"type": "patch" | ||
} | ||
], | ||
"created_at": "2024-08-08T22:40:57+00:00", | ||
"version": "0.2.2" | ||
} |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.