diff --git a/.gitignore b/.gitignore index 5cbb23225b..e290f81e33 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,5 @@ gpt_engineer/benchmark/benchmarks/apps/dataset gpt_engineer/benchmark/benchmarks/mbpp/dataset prompt + +.feature \ No newline at end of file diff --git a/gpt_engineer/applications/interactive_cli/agent.py b/gpt_engineer/applications/interactive_cli/agent.py index 11bb4634f0..33ae178988 100644 --- a/gpt_engineer/applications/interactive_cli/agent.py +++ b/gpt_engineer/applications/interactive_cli/agent.py @@ -1,12 +1,13 @@ from feature import Feature from file_selection import FileSelection from repository import Repository +from settings import Settings from agent_steps import ( initialize_new_feature, update_user_file_selection, check_for_unstaged_changes, confirm_feature_context_and_task_with_user, - run_improve_function, + run_task_loop, adjust_feature_task_or_files, update_task_description, ) @@ -34,17 +35,19 @@ def __init__( self.file_selection = FileSelection(project_path, repository) - def init(self): + def init(self, settings: Settings): - initialize_new_feature(self.ai, self.feature, self.repository) + initialize_new_feature( + self.ai, self.feature, self.repository, settings.no_branch + ) update_user_file_selection(self.file_selection) update_task_description(self.feature) - self.resume() + self.resume(settings) - def resume(self): + def resume(self, settings: Settings): implement = False @@ -57,7 +60,7 @@ def resume(self): check_for_unstaged_changes(self.repository) - run_improve_function( + run_task_loop( self.project_path, self.feature, self.repository, diff --git a/gpt_engineer/applications/interactive_cli/agent_steps.py b/gpt_engineer/applications/interactive_cli/agent_steps.py index cf8b85bd47..0d619cb6cf 100644 --- a/gpt_engineer/applications/interactive_cli/agent_steps.py +++ b/gpt_engineer/applications/interactive_cli/agent_steps.py @@ -28,7 +28,9 @@ def validate(self, document): ) -def initialize_new_feature(ai: AI, feature: Feature, repository: Repository): +def initialize_new_feature( + ai: AI, feature: Feature, repository: Repository, no_branch: bool +): feature.clear_feature() update_feature_description(feature) @@ -37,9 +39,9 @@ def initialize_new_feature(ai: AI, feature: Feature, repository: Repository): branch_name = cli_input("\nConfirm branch name: ", default=branch_name) - repository.create_branch(branch_name) - - print("\nFeature branch created.\n") + if not no_branch: + repository.create_branch(branch_name) + print("\nFeature branch created.\n") def update_user_file_selection(file_selection: FileSelection): @@ -77,14 +79,14 @@ def confirm_feature_context_and_task_with_user( feature: Feature, file_selection: FileSelection ): file_selection.update_yaml_from_tracked_files() + file_string = file_selection.get_pretty_from_yaml() feature_description = feature.get_description() - file_string = file_selection.get_pretty_from_yaml() task = feature.get_task() # list feature, files and task print(f"Feature: {feature_description}\n\n") - print(f"Files: {file_string}\n\n") + print(f"Files: \n\nrepo\n{file_string}\n\n") print(f"Task: {task}\n\n") # do you want to attempt this task? @@ -117,7 +119,7 @@ def adjust_feature_task_or_files(): # -def run_improve_function( +def run_task_loop( project_path, feature: Feature, repository: Repository, @@ -172,18 +174,19 @@ def review_changes( if result == "r": print("Deleting changes and rerunning generation...") repository.undo_unstaged_changes() - run_improve_function(project_path, feature, repository, ai, file_selection) + run_task_loop(project_path, feature, repository, ai, file_selection) if result == "c": - print("You have chosen to retry the generation.") + print("Completing task... ") repository.stage_all_changes() feature.complete_task() + file_selection.update_yaml_from_tracked_files() if cli_input("Do you want to start a new task? y/n: ").lower() in [ "y", "yes", ]: update_task_description(feature) - run_improve_function(project_path, feature, repository, ai, file_selection) + run_task_loop(project_path, feature, repository, ai, file_selection) return if result == "u": diff --git a/gpt_engineer/applications/interactive_cli/file_selection.py b/gpt_engineer/applications/interactive_cli/file_selection.py index ad3a03b09a..304877fe83 100644 --- a/gpt_engineer/applications/interactive_cli/file_selection.py +++ b/gpt_engineer/applications/interactive_cli/file_selection.py @@ -3,6 +3,7 @@ import subprocess from typing import List, Tuple +from collections import defaultdict import yaml @@ -17,40 +18,61 @@ def __init__(self, project_path: str, repository): self.yaml_path = os.path.join(project_path, ".feature", "files.yml") self._initialize() - def _create_nested_structure_from_file_paths(self, files_paths): - files_paths.sort() - file_structure = [] - for filepath in files_paths: - parts = filepath.split("/") - # Filter out the '.ticket' directory from paths - if ".ticket" in parts or ".feature" in parts: - continue - node = file_structure - for i, part in enumerate(parts[:-1]): - # Append '/' to part if it's a directory - directory = part if part.endswith("/") else part + "/" - found = False - for item in node: - if isinstance(item, dict) and directory in item: - node = item[directory] - found = True - break - if not found: - new_node = [] - # Insert directory at the correct position (before any file) - index = next( - (idx for idx, item in enumerate(node) if isinstance(item, str)), - len(node), - ) - node.insert(index, {directory: new_node}) - node = new_node - # Add the file to the last node, ensuring directories are listed first - if not parts[-1].endswith("/"): - node.append(parts[-1]) - - return file_structure - - def _write_yaml_with_comments(self, yaml_content): + def _paths_to_tree(self, paths): + def nested_dict(): + return defaultdict(nested_dict) + + tree = nested_dict() + + files_marker = "(./)" + + for path in paths: + parts = path.split(os.sep) + file = parts.pop() + d = tree + for part in parts: + d = d[part] + if files_marker not in d: + d[files_marker] = [] + d[files_marker].append(file) + + def default_to_regular(d): + if isinstance(d, defaultdict): + d = {k: default_to_regular(v) for k, v in d.items()} + return d + + def ordered_dict(data): + if isinstance(data, dict): + keys = sorted(data.keys(), key=lambda x: (x == files_marker, x)) + return {k: ordered_dict(data[k]) for k in keys} + return data + + ordered_tree = ordered_dict(default_to_regular(tree)) + + return ordered_tree + # return yaml.dump(tree, sort_keys=False) + + def _tree_to_paths(self, tree): + + files_marker = "(./)" + + def traverse_tree(tree, base_path=""): + paths = [] + if tree: + for key, value in tree.items(): + if key == files_marker: + if value: + for file in value: + paths.append(os.path.join(base_path, file)) + else: + subfolder_path = os.path.join(base_path, key) + paths.extend(traverse_tree(value, subfolder_path)) + return paths + + # tree = yaml.safe_load(yaml_content) + return traverse_tree(tree) + + def _write_yaml_with_header(self, yaml_content): with open(self.yaml_path, "w") as file: file.write( f"""# Complete list of files shared with the AI @@ -69,69 +91,44 @@ def _initialize(self): print("YAML file is missing or empty, generating YAML...") - file_structure = self._create_nested_structure_from_file_paths( - self.repository.get_tracked_files() - ) + tree = self._paths_to_tree(self.repository.get_tracked_files()) - self._write_yaml_with_comments( - yaml.safe_dump( - file_structure, default_flow_style=False, sort_keys=False, indent=2 - ) - ) + self._write_yaml_with_header(yaml.dump(tree, sort_keys=False)) def _get_from_yaml(self) -> Tuple[List[str], List[str]]: with open(self.yaml_path, "r") as file: - original_content = file.readlines()[3:] # Skip the 3 instruction lines + original_content_lines = file.readlines()[ + 3: + ] # Skip the 3 instruction lines # Create a version of the content with all lines uncommented - uncommented_content = "".join(line.lstrip("# ") for line in original_content) - - # Load the original and uncommented content as YAML - original_structure = yaml.safe_load("".join(original_content)) - uncommented_structure = yaml.safe_load(uncommented_content) + commented_content = "".join(original_content_lines) + uncommented_content = "".join( + line.replace("# ", "").replace("#", "") for line in original_content_lines + ) - def recurse_items(items, path=""): - paths = [] - if isinstance(items, dict): - for key, value in items.items(): - new_path = os.path.join(path, key) - paths.extend(recurse_items(value, new_path)) - elif isinstance(items, list): - for item in items: - if isinstance(item, dict): - paths.extend(recurse_items(item, path)) - else: - paths.append(os.path.join(path, item)) - else: - paths.append(path) - return paths + print(uncommented_content) - original_paths = recurse_items(original_structure) - uncommented_paths = recurse_items(uncommented_structure) + included_files = self._tree_to_paths(yaml.safe_load(commented_content)) + all_files = self._tree_to_paths(yaml.safe_load(uncommented_content)) # Determine excluded files by finding the difference - excluded_files = list(set(uncommented_paths) - set(original_paths)) + excluded_files = list(set(all_files) - set(included_files)) - return (original_paths, excluded_files) + return (included_files, excluded_files) def _set_to_yaml(self, selected_files, excluded_files): # Dont worry about commenting lines if they are no excluded files if not excluded_files: - file_structure = self._create_nested_structure_from_file_paths( - selected_files - ) + tree = self._paths_to_tree(selected_files) - self._write_yaml_with_comments( - yaml.safe_dump( - file_structure, default_flow_style=False, sort_keys=False, indent=2 - ) - ) + self._write_yaml_with_header(yaml.dump(tree, sort_keys=False)) return all_files = list(selected_files) + list(excluded_files) - current_structure = self._create_nested_structure_from_file_paths(all_files) + current_tree = self._paths_to_tree(all_files) # Add a # in front of files which are excluded. This is a marker for us to go back and properly comment them out def mark_excluded_files(structure, prefix=""): @@ -144,7 +141,7 @@ def mark_excluded_files(structure, prefix=""): if full_path in excluded_files: structure[i] = f"#{item}" - mark_excluded_files(current_structure) + mark_excluded_files(current_tree) # Find all files marked for commenting - add comment and remove the mark. def comment_marked_files(yaml_content): @@ -153,18 +150,16 @@ def comment_marked_files(yaml_content): updated_lines = [] for line in lines: if "#" in line: - line = "#" + line.replace("#", "").strip() + line = "#" + line.replace("#", "") updated_lines.append(line) return "\n".join(updated_lines) - content = yaml.safe_dump( - current_structure, default_flow_style=False, sort_keys=False, indent=2 - ) + content = yaml.dump(tree, sort_keys=False) updated_content = comment_marked_files(content) - self._write_yaml_with_comments(updated_content) + self._write_yaml_with_header(updated_content) return @@ -177,6 +172,8 @@ def update_yaml_from_tracked_files(self): selected_files, excluded_files = self._get_from_yaml() + print(excluded_files) + # If there are no changes, do nothing if set(tracked_files) == set(selected_files + excluded_files): return @@ -219,8 +216,11 @@ def insert_path(tree, path_parts): # Helper function to format the tree into a string with ASCII graphics def format_tree(tree, prefix=""): lines = [] - # Sorted to keep alphabetical order - items = sorted(tree.items()) + # Separate directories and files + directories = {k: v for k, v in tree.items() if v} + files = {k: v for k, v in tree.items() if not v} + # Sort items to keep alphabetical order, directories first + items = sorted(directories.items()) + sorted(files.items()) for i, (key, sub_tree) in enumerate(items): if i == len(items) - 1: # Last item uses └── lines.append(prefix + "└── " + key) diff --git a/gpt_engineer/applications/interactive_cli/generation_tools.py b/gpt_engineer/applications/interactive_cli/generation_tools.py index ad3487840b..29c43117c8 100644 --- a/gpt_engineer/applications/interactive_cli/generation_tools.py +++ b/gpt_engineer/applications/interactive_cli/generation_tools.py @@ -128,3 +128,67 @@ def generate_suggested_tasks( xml = messages[-1].content.strip() return parse_task_xml_to_class(xml).tasks + + +def fuzzy_parse_yaml_files(ai: AI, yaml_string: str) -> str: + system_prompt = """ +You are a fuzzy yaml parser, who correctly parses yaml even if it is not strictly valid. + +A user has been given a yaml representation of a file structure, represented in block collections like so: + +- folder1/: + - folder2/: + - file3 + - file4 + - file1 + - file2 +- file5 +- file6 + +They have been asked to comment out any files that they wish to be excluded. + +An example of the yaml file after commenting might be something like this: + +- folder1/: + - folder2/: + # - file1 + # - file2 + - folder3/: + - file3 + # - file4 + - file5 +# - file6 +- file7 + +Although this isnt strictly correct yaml, their intentions are clear. + +Your job is to return the list of included files, and the list of excluded files as json. + +The json you should return will be like this: + +{ + "included_files": [ + "folder1/file5", + "folder1/folder3/file3", + "file7" + ], + "excluded_files": [ + "folder1/folder2/file1", + "folder1/folder2/file2", + "folder1/folder3/file4", + "folder1/file5", + ] +} + +Files can only be included or excluded, not both. If you are confused about the state of a file make your best guess - and if you really arent sure then mark it as included. +""" + + # ai.llm.callbacks.clear() # silent + + messages = ai.start(system_prompt, yaml_string, step_name="fuzzy-parse-yaml") + + # ai.llm.callbacks.append(StreamingStdOutCallbackHandler()) + + xml = messages[-1].content.strip() + + return parse_task_xml_to_class(xml).tasks diff --git a/gpt_engineer/applications/interactive_cli/main.py b/gpt_engineer/applications/interactive_cli/main.py index 073413311c..b6835b7c16 100644 --- a/gpt_engineer/applications/interactive_cli/main.py +++ b/gpt_engineer/applications/interactive_cli/main.py @@ -4,6 +4,7 @@ from dotenv import load_dotenv from feature import Feature from repository import Repository +from settings import Settings from gpt_engineer.core.ai import AI @@ -15,6 +16,12 @@ def main( project_path: str = typer.Argument(".", help="path"), model: str = typer.Argument("gpt-4-turbo", help="model id string"), new: bool = typer.Option(False, "--new", "-n", help="Initialize new feature."), + no_branch: bool = typer.Option( + False, + "--no-branch", + "-nb", + help="Do not create a new feature branch for this work.", + ), temperature: float = typer.Option( 0.1, "--temperature", @@ -56,10 +63,12 @@ def main( agent = FeatureAgent(project_path, feature, repository, ai) + settings = Settings(no_branch) + if new: - agent.init() + agent.init(settings) else: - agent.resume() + agent.resume(settings) if __name__ == "__main__": diff --git a/gpt_engineer/applications/interactive_cli/settings.py b/gpt_engineer/applications/interactive_cli/settings.py new file mode 100644 index 0000000000..fa12988c65 --- /dev/null +++ b/gpt_engineer/applications/interactive_cli/settings.py @@ -0,0 +1,3 @@ +class Settings: + def __init__(self, no_branch: bool = False): + self.no_branch = no_branch diff --git a/quicktest.py b/quicktest.py new file mode 100644 index 0000000000..0f2a445d4e --- /dev/null +++ b/quicktest.py @@ -0,0 +1,66 @@ +import os +import yaml +from collections import defaultdict + + +def paths_to_yaml(paths): + def nested_dict(): + return defaultdict(nested_dict) + + tree = nested_dict() + + for path in paths: + parts = path.split(os.sep) + file = parts.pop() + d = tree + for part in parts: + d = d[part] + if "/" not in d: + d["/"] = [] + d["/"].append(file) + + def default_to_regular(d): + if isinstance(d, defaultdict): + d = {k: default_to_regular(v) for k, v in d.items()} + return d + + tree = default_to_regular(tree) + + return yaml.dump(tree, sort_keys=False) + + +def yaml_to_paths(yaml_content): + def traverse_tree(tree, base_path=""): + paths = [] + for key, value in tree.items(): + if key == "./": + for file in value: + paths.append(os.path.join(base_path, file)) + else: + subfolder_path = os.path.join(base_path, key) + paths.extend(traverse_tree(value, subfolder_path)) + return paths + + tree = yaml.safe_load(yaml_content) + return traverse_tree(tree) + + +# Example usage +yaml_content = """ +folder: + ./: + # - file1.txt + - file2.txt + subfolder: + ./: + - file3.txt +""" + +paths = yaml_to_paths(yaml_content) +print(paths) + + +# paths = ["folder/file1.txt", "folder/file2.txt", "folder/subfolder/file3.txt"] + +# yaml_output = paths_to_yaml(paths) +# print(yaml_output) diff --git a/tests/applications/interactive_cli/__init__.py b/tests/applications/interactive_cli/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/applications/interactive_cli/test_file_selection.py b/tests/applications/interactive_cli/test_file_selection.py new file mode 100644 index 0000000000..6a069e58cf --- /dev/null +++ b/tests/applications/interactive_cli/test_file_selection.py @@ -0,0 +1,118 @@ +import os +import shutil +import tempfile +import unittest + +from gpt_engineer.applications.interactive_cli.file_selection import FileSelection + + +class MockRepository: + def __init__(self, files): + self.files = files + + def get_tracked_files(self): + return self.files + + +class TestFileSelection(unittest.TestCase): + + def setUp(self): + # Create a temporary directory for the test + self.test_dir = tempfile.mkdtemp() + self.project_path = self.test_dir + os.makedirs(os.path.join(self.project_path, ".feature"), exist_ok=True) + + # Initial file structure for the mock repository + self.initial_files = [ + "folder1/file1", + "folder1/file2", + "folder1/folder2/file3", + "folder1/folder2/file4", + "file5", + "file6", + ] + self.repository = MockRepository(self.initial_files) + + # Initialize the FileSelection object + self.file_selection = FileSelection(self.project_path, self.repository) + + def tearDown(self): + # Remove the temporary directory after the test + shutil.rmtree(self.test_dir) + + def test_lifecycle(self): + # Step 1: Create YAML file from the mock repository + self.file_selection._initialize() + expected_yaml_initial = """# Complete list of files shared with the AI +# Please comment out any files not needed as context for this change +# This saves money and avoids overwhelming the AI +- folder1/: + - folder2/: + - file3 + - file4 + - file1 + - file2 +- file5 +- file6 +""" + with open(self.file_selection.yaml_path, "r") as file: + initial_yaml_content = file.read() + + self.assertEqual(initial_yaml_content, expected_yaml_initial) + + # Step 2: Update the YAML file directly (simulating user comments) + edited_yaml_content = """# Complete list of files shared with the AI +# Please comment out any files not needed as context for this change +# This saves money and avoids overwhelming the AI +- folder1/: + - folder2/: + # - file3 + # - file4 + # - file1 + - file2 +# - file5 +- file6 +""" + with open(self.file_selection.yaml_path, "w") as file: + file.write(edited_yaml_content) + + # Step 3: Update tracked files in the repository and update the YAML file + new_files = [ + "folder1/file1", + "folder1/file2", + "folder1/folder2/file3", + "folder1/folder2/file4", + "file5", + "file6", + "newfile7", + ] + self.repository.files = new_files + self.file_selection.update_yaml_from_tracked_files() + + expected_yaml_updated = """# Complete list of files shared with the AI +# Please comment out any files not needed as context for this change +# This saves money and avoids overwhelming the AI +- folder1/: + - folder2/: + # - file3 + # - file4 + # - file1 + - file2 +# - file5 +- file6 +- newfile7 +""" + with open(self.file_selection.yaml_path, "r") as file: + updated_yaml_content = file.read() + + self.assertEqual(updated_yaml_content, expected_yaml_updated) + + # Step 4: Get files from YAML and verify + selected_files = self.file_selection.get_from_yaml() + expected_selected_files = ["folder1/file2", "file6", "newfile7"] + + self.assertEqual(selected_files, expected_selected_files) + + +if __name__ == "__main__": + unittest.main()