Merge branch 'main' into lite

AntonOsika · May 16, 2024 · 4ead7b1 · 4ead7b1
2 parents 8977e8e + e23e02f
commit 4ead7b1
Show file tree

Hide file tree

Showing 32 changed files with 385 additions and 874 deletions.
diff --git a/.gitignore b/.gitignore
@@ -93,4 +93,10 @@ webapp/.next/
 gpt_engineer/benchmark/benchmarks/apps/dataset
 gpt_engineer/benchmark/benchmarks/mbpp/dataset
 
+<<<<<<< lite
 prompt
+=======
+gpt_engineer/benchmark/minimal_bench_config.toml
+
+test.json
+>>>>>>> main
diff --git a/docs/open_models.md b/docs/open_models.md
@@ -114,6 +114,28 @@ That's it.
 
 *If sth. doesn't work as expected, or you figure out how to improve the open LLM support please let us know.*
 
+Using Open Router models
+==================
+
+In case you don't posses the hardware to run local LLM's yourself you can use the hosting on [Open Router](https://openrouter.ai) and pay as you go for the tokens.
+
+To set it up you need to Sign In and load purchase 💰 the LLM credits. Pricing per token is different for (each model](https://openrouter.ai/models), but mostly cheaper then Open AI.
+
+Then create the API key.
+
+To for example use [Meta: Llama 3 8B Instruct (extended)](https://openrouter.ai/models/meta-llama/llama-3-8b-instruct:extended) with `gpte` we need to set:
+
+```bash
+export OPENAI_API_BASE="https://openrouter.ai/api/v1"
+export OPENAI_API_KEY="sk-key-from-open-router"
+export MODEL_NAME="meta-llama/llama-3-8b-instruct:extended"
+export LOCAL_MODEL=true
+```
+
+```bash
+gpte <project_dir> $MODEL_NAME --lite --temperature 0.1
+```
+
 Using Azure models
 ==================
 

diff --git a/gpt_engineer/applications/cli/file_selector.py b/gpt_engineer/applications/cli/file_selector.py
@@ -28,6 +28,7 @@
 import toml
 
 from gpt_engineer.core.default.disk_memory import DiskMemory
+from gpt_engineer.core.default.file_store import FileStore
 from gpt_engineer.core.default.paths import metadata_path
 from gpt_engineer.core.files_dict import FilesDict
 from gpt_engineer.core.git import filter_by_gitignore, is_git_repo
@@ -54,11 +55,15 @@ class FileSelector:
     IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"}
     FILE_LIST_NAME = "file_selection.toml"
     COMMENT = (
-        "# Remove '#' to select a file.\n\n"
+        "# Remove '#' to select a file or turn off linting.\n\n"
+        "# Linting with BLACK (Python) enhances code suggestions from LLMs. "
+        "To disable linting, uncomment the relevant option in the linting settings.\n\n"
         "# gpt-engineer can only read selected files. "
         "Including irrelevant files will degrade performance, "
         "cost additional tokens and potentially overflow token limit.\n\n"
     )
+    LINTING_STRING = '[linting]\n# "linting" = "off"\n\n'
+    isLinting = True
 
     def __init__(self, project_path: Union[str, Path]):
         """
@@ -113,6 +118,13 @@ def ask_for_files(self) -> FilesDict:
                 print(f"Warning: File not found {file_path}")
             except UnicodeDecodeError:
                 print(f"Warning: File not UTF-8 encoded {file_path}, skipping")
+
+        if self.isLinting:
+            file_store = FileStore()
+            files = FilesDict(content_dict)
+            linted_files = file_store.linting(files)
+            return linted_files
+
         return FilesDict(content_dict)
 
     def editor_file_selector(
@@ -155,13 +167,25 @@ def editor_file_selector(
             # Write to the toml file
             with open(toml_file, "w") as f:
                 f.write(self.COMMENT)
+                f.write(self.LINTING_STRING)
                 f.write(s)
 
         else:
             # Load existing files from the .toml configuration
             all_files = self.get_current_files(root_path)
             s = toml.dumps({"files": {x: "selected" for x in all_files}})
 
+            # get linting status from the toml file
+            with open(toml_file, "r") as file:
+                linting_status = toml.load(file)
+            if (
+                "linting" in linting_status
+                and linting_status["linting"].get("linting", "").lower() == "off"
+            ):
+                self.isLinting = False
+                self.LINTING_STRING = '[linting]\n"linting" = "off"\n\n'
+                print("\nLinting is disabled")
+
             with open(toml_file, "r") as file:
                 selected_files = toml.load(file)
 
@@ -179,6 +203,7 @@ def editor_file_selector(
             # Write the merged list back to the .toml for user review and modification
             with open(toml_file, "w") as file:
                 file.write(self.COMMENT)  # Ensure to write the comment
+                file.write(self.LINTING_STRING)
                 file.write(s)
 
         print(
@@ -267,6 +292,16 @@ def get_files_from_toml(
         selected_files = []
         edited_tree = toml.load(toml_file)  # Load the edited .toml file
 
+        # check if users have disabled linting or not
+        if (
+            "linting" in edited_tree
+            and edited_tree["linting"].get("linting", "").lower() == "off"
+        ):
+            self.isLinting = False
+            print("\nLinting is disabled")
+        else:
+            self.isLinting = True
+
         # Iterate through the files in the .toml and append selected files to the list
         for file, _ in edited_tree["files"].items():
             selected_files.append(file)

diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py
@@ -247,7 +247,7 @@ def prompt_yesno() -> bool:
 )
 def main(
     project_path: str = typer.Argument(".", help="path"),
-    model: str = typer.Argument("gpt-4-turbo", help="model id string"),
+    model: str = typer.Argument("gpt-4o", help="model id string"),
     temperature: float = typer.Option(
         0.1,
         "--temperature",
@@ -460,7 +460,7 @@ def main(
 
             if not files_dict or files_dict_before == files_dict:
                 print(
-                    f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path} folder in a github issue?"
+                    f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path}/logs folder in a github issue?"
                 )
 
             else:

diff --git a/gpt_engineer/benchmark/__main__.py b/gpt_engineer/benchmark/__main__.py
@@ -32,7 +32,7 @@
 from gpt_engineer.applications.cli.main import load_env_if_needed
 from gpt_engineer.benchmark.bench_config import BenchConfig
 from gpt_engineer.benchmark.benchmarks.load import get_benchmark
-from gpt_engineer.benchmark.run import print_results, run
+from gpt_engineer.benchmark.run import export_yaml_results, print_results, run
 
 app = typer.Typer()  # creates a CLI app
 
@@ -72,8 +72,12 @@ def main(
         ),
     ],
     bench_config: Annotated[
-        Optional[str], typer.Argument(help="optional task name in benchmark")
+        str, typer.Argument(help="optional task name in benchmark")
     ] = os.path.join(os.path.dirname(__file__), "default_bench_config.toml"),
+    yaml_output: Annotated[
+        Optional[str],
+        typer.Option(help="print results for each task", show_default=False),
+    ] = None,
     verbose: Annotated[
         bool, typer.Option(help="print results for each task", show_default=False)
     ] = False,
@@ -85,13 +89,12 @@ def main(
     ----------
     path_to_agent : str
         The file path to the Python module that contains a function called 'default_config_agent'.
-    benchmarks : str
-        A comma-separated string of benchmark names to run.
-    bench_config : Optional[str], default=default_bench_config.toml
+    bench_config : str, default=default_bench_config.toml
         Configuration file for choosing which benchmark problems to run. See default config for more details.
+    yaml_output: Optional[str], default=None
+        Pass a path to a yaml file to have results written to file.
     verbose : bool, default=False
         A flag to indicate whether to print results for each task.
-
     Returns
     -------
     None
@@ -101,6 +104,7 @@ def main(
     config = BenchConfig.from_toml(bench_config)
     print("using config file: " + bench_config)
     benchmarks = list()
+    benchmark_results = dict()
     for specific_config_name in vars(config):
         specific_config = getattr(config, specific_config_name)
         if hasattr(specific_config, "active"):
@@ -124,6 +128,11 @@ def main(
         )
         print_results(results)
         print()
+        benchmark_results[benchmark_name] = {
+            "detailed": [result.to_dict() for result in results]
+        }
+    if yaml_output is not None:
+        export_yaml_results(yaml_output, benchmark_results, config.to_dict())
 
 
 if __name__ == "__main__":

diff --git a/gpt_engineer/benchmark/bench_config.py b/gpt_engineer/benchmark/bench_config.py
@@ -1,6 +1,8 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 
+from tomlkit.items import Integer
+
 from gpt_engineer.core.project_config import read_config
 
 
@@ -11,6 +13,7 @@ class AppsConfig:
     test_end_index: int | None = 1
     train_start_index: int | None = 0
     train_end_index: int | None = 0
+    examples_per_problem: int | None = 10
 
 
 @dataclass
@@ -25,19 +28,13 @@ class GptmeConfig:
     active: bool | None = True
 
 
-@dataclass
-class GptengConfig:
-    active: bool | None = True
-
-
 @dataclass
 class BenchConfig:
     """Configuration for the GPT Engineer CLI and gptengineer.app via `gpt-engineer.toml`."""
 
     apps: AppsConfig = field(default_factory=AppsConfig)
     mbpp: MbppConfig = field(default_factory=MbppConfig)
     gptme: GptmeConfig = field(default_factory=GptmeConfig)
-    gpteng: GptengConfig = field(default_factory=GptengConfig)
 
     @classmethod
     def from_toml(cls, config_file: Path | str):
@@ -52,5 +49,21 @@ def from_dict(cls, config_dict: dict):
             apps=AppsConfig(**config_dict.get("apps", {})),
             mbpp=MbppConfig(**config_dict.get("mbpp", {})),
             gptme=GptmeConfig(**config_dict.get("gptme", {})),
-            gpteng=GptengConfig(**config_dict.get("gpteng", {})),
         )
+
+    @staticmethod
+    def recursive_resolve(data_dict):
+        for key, value in data_dict.items():
+            if isinstance(value, Integer):
+                data_dict[key] = int(value)
+            elif isinstance(value, dict):
+                BenchConfig.recursive_resolve(value)
+
+    def to_dict(self):
+        dict_config = {
+            benchmark_name: {key: val for key, val in spec_config.__dict__.items()}
+            for benchmark_name, spec_config in self.__dict__.items()
+        }
+        BenchConfig.recursive_resolve(dict_config)
+
+        return dict_config
diff --git a/gpt_engineer/benchmark/benchmarks/apps/load.py b/gpt_engineer/benchmark/benchmarks/apps/load.py
@@ -24,7 +24,6 @@
 from gpt_engineer.core.prompt import Prompt
 
 DATASET_PATH = Path(__file__).parent / "dataset"
-MAX_N_TEST_EXAMPLES = 10
 
 
 class AppsAssertion:
@@ -106,7 +105,9 @@ def load_apps(config: AppsConfig) -> Benchmark:
                         expected=problem.outputs[i],
                         command="python main.py" + ' "' + problem.inputs[i] + '"',
                     ).evaluate
-                    for i in range(min(len(problem.outputs), MAX_N_TEST_EXAMPLES))
+                    for i in range(
+                        min(len(problem.outputs), config.examples_per_problem)
+                    )
                 },
             )
         )

diff --git a/gpt_engineer/benchmark/benchmarks/gpteng/__init__.py b/gpt_engineer/benchmark/benchmarks/gpteng/__init__.py