Skip to content

Commit

Permalink
Merge branch 'main' into lite
Browse files Browse the repository at this point in the history
  • Loading branch information
TheoMcCabe authored May 16, 2024
2 parents 8977e8e + e23e02f commit 4ead7b1
Show file tree
Hide file tree
Showing 32 changed files with 385 additions and 874 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,10 @@ webapp/.next/
gpt_engineer/benchmark/benchmarks/apps/dataset
gpt_engineer/benchmark/benchmarks/mbpp/dataset

<<<<<<< lite
prompt
=======
gpt_engineer/benchmark/minimal_bench_config.toml

test.json
>>>>>>> main
22 changes: 22 additions & 0 deletions docs/open_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,28 @@ That's it.

*If sth. doesn't work as expected, or you figure out how to improve the open LLM support please let us know.*

Using Open Router models
==================

In case you don't posses the hardware to run local LLM's yourself you can use the hosting on [Open Router](https://openrouter.ai) and pay as you go for the tokens.

To set it up you need to Sign In and load purchase 💰 the LLM credits. Pricing per token is different for (each model](https://openrouter.ai/models), but mostly cheaper then Open AI.

Then create the API key.

To for example use [Meta: Llama 3 8B Instruct (extended)](https://openrouter.ai/models/meta-llama/llama-3-8b-instruct:extended) with `gpte` we need to set:

```bash
export OPENAI_API_BASE="https://openrouter.ai/api/v1"
export OPENAI_API_KEY="sk-key-from-open-router"
export MODEL_NAME="meta-llama/llama-3-8b-instruct:extended"
export LOCAL_MODEL=true
```

```bash
gpte <project_dir> $MODEL_NAME --lite --temperature 0.1
```

Using Azure models
==================

Expand Down
37 changes: 36 additions & 1 deletion gpt_engineer/applications/cli/file_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import toml

from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.default.file_store import FileStore
from gpt_engineer.core.default.paths import metadata_path
from gpt_engineer.core.files_dict import FilesDict
from gpt_engineer.core.git import filter_by_gitignore, is_git_repo
Expand All @@ -54,11 +55,15 @@ class FileSelector:
IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"}
FILE_LIST_NAME = "file_selection.toml"
COMMENT = (
"# Remove '#' to select a file.\n\n"
"# Remove '#' to select a file or turn off linting.\n\n"
"# Linting with BLACK (Python) enhances code suggestions from LLMs. "
"To disable linting, uncomment the relevant option in the linting settings.\n\n"
"# gpt-engineer can only read selected files. "
"Including irrelevant files will degrade performance, "
"cost additional tokens and potentially overflow token limit.\n\n"
)
LINTING_STRING = '[linting]\n# "linting" = "off"\n\n'
isLinting = True

def __init__(self, project_path: Union[str, Path]):
"""
Expand Down Expand Up @@ -113,6 +118,13 @@ def ask_for_files(self) -> FilesDict:
print(f"Warning: File not found {file_path}")
except UnicodeDecodeError:
print(f"Warning: File not UTF-8 encoded {file_path}, skipping")

if self.isLinting:
file_store = FileStore()
files = FilesDict(content_dict)
linted_files = file_store.linting(files)
return linted_files

return FilesDict(content_dict)

def editor_file_selector(
Expand Down Expand Up @@ -155,13 +167,25 @@ def editor_file_selector(
# Write to the toml file
with open(toml_file, "w") as f:
f.write(self.COMMENT)
f.write(self.LINTING_STRING)
f.write(s)

else:
# Load existing files from the .toml configuration
all_files = self.get_current_files(root_path)
s = toml.dumps({"files": {x: "selected" for x in all_files}})

# get linting status from the toml file
with open(toml_file, "r") as file:
linting_status = toml.load(file)
if (
"linting" in linting_status
and linting_status["linting"].get("linting", "").lower() == "off"
):
self.isLinting = False
self.LINTING_STRING = '[linting]\n"linting" = "off"\n\n'
print("\nLinting is disabled")

with open(toml_file, "r") as file:
selected_files = toml.load(file)

Expand All @@ -179,6 +203,7 @@ def editor_file_selector(
# Write the merged list back to the .toml for user review and modification
with open(toml_file, "w") as file:
file.write(self.COMMENT) # Ensure to write the comment
file.write(self.LINTING_STRING)
file.write(s)

print(
Expand Down Expand Up @@ -267,6 +292,16 @@ def get_files_from_toml(
selected_files = []
edited_tree = toml.load(toml_file) # Load the edited .toml file

# check if users have disabled linting or not
if (
"linting" in edited_tree
and edited_tree["linting"].get("linting", "").lower() == "off"
):
self.isLinting = False
print("\nLinting is disabled")
else:
self.isLinting = True

# Iterate through the files in the .toml and append selected files to the list
for file, _ in edited_tree["files"].items():
selected_files.append(file)
Expand Down
4 changes: 2 additions & 2 deletions gpt_engineer/applications/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def prompt_yesno() -> bool:
)
def main(
project_path: str = typer.Argument(".", help="path"),
model: str = typer.Argument("gpt-4-turbo", help="model id string"),
model: str = typer.Argument("gpt-4o", help="model id string"),
temperature: float = typer.Option(
0.1,
"--temperature",
Expand Down Expand Up @@ -460,7 +460,7 @@ def main(

if not files_dict or files_dict_before == files_dict:
print(
f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path} folder in a github issue?"
f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path}/logs folder in a github issue?"
)

else:
Expand Down
21 changes: 15 additions & 6 deletions gpt_engineer/benchmark/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from gpt_engineer.applications.cli.main import load_env_if_needed
from gpt_engineer.benchmark.bench_config import BenchConfig
from gpt_engineer.benchmark.benchmarks.load import get_benchmark
from gpt_engineer.benchmark.run import print_results, run
from gpt_engineer.benchmark.run import export_yaml_results, print_results, run

app = typer.Typer() # creates a CLI app

Expand Down Expand Up @@ -72,8 +72,12 @@ def main(
),
],
bench_config: Annotated[
Optional[str], typer.Argument(help="optional task name in benchmark")
str, typer.Argument(help="optional task name in benchmark")
] = os.path.join(os.path.dirname(__file__), "default_bench_config.toml"),
yaml_output: Annotated[
Optional[str],
typer.Option(help="print results for each task", show_default=False),
] = None,
verbose: Annotated[
bool, typer.Option(help="print results for each task", show_default=False)
] = False,
Expand All @@ -85,13 +89,12 @@ def main(
----------
path_to_agent : str
The file path to the Python module that contains a function called 'default_config_agent'.
benchmarks : str
A comma-separated string of benchmark names to run.
bench_config : Optional[str], default=default_bench_config.toml
bench_config : str, default=default_bench_config.toml
Configuration file for choosing which benchmark problems to run. See default config for more details.
yaml_output: Optional[str], default=None
Pass a path to a yaml file to have results written to file.
verbose : bool, default=False
A flag to indicate whether to print results for each task.
Returns
-------
None
Expand All @@ -101,6 +104,7 @@ def main(
config = BenchConfig.from_toml(bench_config)
print("using config file: " + bench_config)
benchmarks = list()
benchmark_results = dict()
for specific_config_name in vars(config):
specific_config = getattr(config, specific_config_name)
if hasattr(specific_config, "active"):
Expand All @@ -124,6 +128,11 @@ def main(
)
print_results(results)
print()
benchmark_results[benchmark_name] = {
"detailed": [result.to_dict() for result in results]
}
if yaml_output is not None:
export_yaml_results(yaml_output, benchmark_results, config.to_dict())


if __name__ == "__main__":
Expand Down
27 changes: 20 additions & 7 deletions gpt_engineer/benchmark/bench_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataclasses import dataclass, field
from pathlib import Path

from tomlkit.items import Integer

from gpt_engineer.core.project_config import read_config


Expand All @@ -11,6 +13,7 @@ class AppsConfig:
test_end_index: int | None = 1
train_start_index: int | None = 0
train_end_index: int | None = 0
examples_per_problem: int | None = 10


@dataclass
Expand All @@ -25,19 +28,13 @@ class GptmeConfig:
active: bool | None = True


@dataclass
class GptengConfig:
active: bool | None = True


@dataclass
class BenchConfig:
"""Configuration for the GPT Engineer CLI and gptengineer.app via `gpt-engineer.toml`."""

apps: AppsConfig = field(default_factory=AppsConfig)
mbpp: MbppConfig = field(default_factory=MbppConfig)
gptme: GptmeConfig = field(default_factory=GptmeConfig)
gpteng: GptengConfig = field(default_factory=GptengConfig)

@classmethod
def from_toml(cls, config_file: Path | str):
Expand All @@ -52,5 +49,21 @@ def from_dict(cls, config_dict: dict):
apps=AppsConfig(**config_dict.get("apps", {})),
mbpp=MbppConfig(**config_dict.get("mbpp", {})),
gptme=GptmeConfig(**config_dict.get("gptme", {})),
gpteng=GptengConfig(**config_dict.get("gpteng", {})),
)

@staticmethod
def recursive_resolve(data_dict):
for key, value in data_dict.items():
if isinstance(value, Integer):
data_dict[key] = int(value)
elif isinstance(value, dict):
BenchConfig.recursive_resolve(value)

def to_dict(self):
dict_config = {
benchmark_name: {key: val for key, val in spec_config.__dict__.items()}
for benchmark_name, spec_config in self.__dict__.items()
}
BenchConfig.recursive_resolve(dict_config)

return dict_config
5 changes: 3 additions & 2 deletions gpt_engineer/benchmark/benchmarks/apps/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from gpt_engineer.core.prompt import Prompt

DATASET_PATH = Path(__file__).parent / "dataset"
MAX_N_TEST_EXAMPLES = 10


class AppsAssertion:
Expand Down Expand Up @@ -106,7 +105,9 @@ def load_apps(config: AppsConfig) -> Benchmark:
expected=problem.outputs[i],
command="python main.py" + ' "' + problem.inputs[i] + '"',
).evaluate
for i in range(min(len(problem.outputs), MAX_N_TEST_EXAMPLES))
for i in range(
min(len(problem.outputs), config.examples_per_problem)
)
},
)
)
Expand Down
8 changes: 0 additions & 8 deletions gpt_engineer/benchmark/benchmarks/gpteng/__init__.py

This file was deleted.

Loading

0 comments on commit 4ead7b1

Please sign in to comment.