-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: +Reverse engineering related logic #800
Changes from all commits
78af904
7be58b0
831ddb1
af501f7
7ec0118
9e84e63
633c772
67bf899
1b9ce4a
aa09095
3a2f162
9bcaa67
c1552d7
23fa792
884a1df
bc5a509
2689cbc
865148d
fb7518c
6b527e3
525c62b
1008bbb
027f1e8
5f88e12
34225d0
df8f929
6d6248f
90182a5
b48f719
19126e4
cbdfac3
55d95fe
b366bf1
c380cf6
28727a0
e8d2819
fa622c2
1b0dfbc
dadd09b
0e864dc
c720c1d
739452e
5bc17f3
9263fb8
e14b43a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,11 @@ | |
@Time : 2023/12/19 | ||
@Author : mashenquan | ||
@File : rebuild_class_view.py | ||
@Desc : Rebuild class view info | ||
@Desc : Reconstructs class diagram from a source code project. | ||
""" | ||
import re | ||
|
||
from pathlib import Path | ||
from typing import Optional, Set, Tuple | ||
|
||
import aiofiles | ||
|
||
|
@@ -21,86 +22,144 @@ | |
GRAPH_REPO_FILE_REPO, | ||
) | ||
from metagpt.logs import logger | ||
from metagpt.repo_parser import RepoParser | ||
from metagpt.schema import ClassAttribute, ClassMethod, ClassView | ||
from metagpt.utils.common import split_namespace | ||
from metagpt.repo_parser import DotClassInfo, RepoParser | ||
from metagpt.schema import UMLClassView | ||
from metagpt.utils.common import concat_namespace, split_namespace | ||
from metagpt.utils.di_graph_repository import DiGraphRepository | ||
from metagpt.utils.graph_repository import GraphKeyword, GraphRepository | ||
|
||
|
||
class RebuildClassView(Action): | ||
""" | ||
Reconstructs a graph repository about class diagram from a source code project. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need more explicit functional description and design. Its function description is vague. The run interface has no real input parameters, and all input and output parameters are implicit. |
||
|
||
Attributes: | ||
graph_db (Optional[GraphRepository]): The optional graph repository. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a simple duplication of variables, and since this class is actually pydantic.BaseModel, I don't think the annotation here is needed and should be explained by the fields themselves. |
||
""" | ||
|
||
graph_db: Optional[GraphRepository] = None | ||
|
||
async def run(self, with_messages=None, format=config.prompt_schema): | ||
""" | ||
Implementation of `Action`'s `run` method. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
Args: | ||
with_messages (Optional[Type]): An optional argument specifying messages to react to. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This value requires appropriate type annotation and comments, rather than LLM generating text with no additional information |
||
format (str): The format for the prompt schema. | ||
""" | ||
iorisa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name | ||
graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json"))) | ||
self.graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json"))) | ||
repo_parser = RepoParser(base_directory=Path(self.i_context)) | ||
# use pylint | ||
class_views, relationship_views, package_root = await repo_parser.rebuild_class_views(path=Path(self.i_context)) | ||
await GraphRepository.update_graph_db_with_class_views(graph_db, class_views) | ||
await GraphRepository.update_graph_db_with_class_relationship_views(graph_db, relationship_views) | ||
await GraphRepository.update_graph_db_with_class_views(self.graph_db, class_views) | ||
await GraphRepository.update_graph_db_with_class_relationship_views(self.graph_db, relationship_views) | ||
await GraphRepository.rebuild_composition_relationship(self.graph_db) | ||
# use ast | ||
direction, diff_path = self._diff_path(path_root=Path(self.i_context).resolve(), package_root=package_root) | ||
symbols = repo_parser.generate_symbols() | ||
for file_info in symbols: | ||
# Align to the same root directory in accordance with `class_views`. | ||
file_info.file = self._align_root(file_info.file, direction, diff_path) | ||
await GraphRepository.update_graph_db_with_file_info(graph_db, file_info) | ||
await self._create_mermaid_class_views(graph_db=graph_db) | ||
await graph_db.save() | ||
|
||
async def _create_mermaid_class_views(self, graph_db): | ||
path = Path(self.context.git_repo.workdir) / DATA_API_DESIGN_FILE_REPO | ||
await GraphRepository.update_graph_db_with_file_info(self.graph_db, file_info) | ||
await self._create_mermaid_class_views() | ||
await self.graph_db.save() | ||
|
||
async def _create_mermaid_class_views(self) -> str: | ||
"""Creates a Mermaid class diagram using data from the `graph_db` graph repository. | ||
|
||
This method utilizes information stored in the graph repository to generate a Mermaid class diagram. | ||
Returns: | ||
mermaid class diagram file name. | ||
""" | ||
iorisa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
path = self.context.git_repo.workdir / DATA_API_DESIGN_FILE_REPO | ||
path.mkdir(parents=True, exist_ok=True) | ||
pathname = path / self.context.git_repo.workdir.name | ||
async with aiofiles.open(str(pathname.with_suffix(".mmd")), mode="w", encoding="utf-8") as writer: | ||
filename = str(pathname.with_suffix(".mmd")) | ||
async with aiofiles.open(filename, mode="w", encoding="utf-8") as writer: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally speaking, just write the file at the end. It seems that you don’t need to write files frequently in the middle process. |
||
content = "classDiagram\n" | ||
logger.debug(content) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Delete 81 |
||
await writer.write(content) | ||
# class names | ||
rows = await graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS) | ||
rows = await self.graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS) | ||
class_distinct = set() | ||
relationship_distinct = set() | ||
for r in rows: | ||
await RebuildClassView._create_mermaid_class(r.subject, graph_db, writer, class_distinct) | ||
content = await self._create_mermaid_class(r.subject) | ||
if content: | ||
await writer.write(content) | ||
class_distinct.add(r.subject) | ||
for r in rows: | ||
await RebuildClassView._create_mermaid_relationship(r.subject, graph_db, writer, relationship_distinct) | ||
|
||
@staticmethod | ||
async def _create_mermaid_class(ns_class_name, graph_db, file_writer, distinct): | ||
content, distinct = await self._create_mermaid_relationship(r.subject) | ||
if content: | ||
logger.debug(content) | ||
await writer.write(content) | ||
relationship_distinct.update(distinct) | ||
logger.info(f"classes: {len(class_distinct)}, relationship: {len(relationship_distinct)}") | ||
|
||
if self.i_context: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the meaning of i_context here? Need to be explained in the comments of the method |
||
r_filename = Path(filename).relative_to(self.context.git_repo.workdir) | ||
await self.graph_db.insert( | ||
subject=self.i_context, predicate="hasMermaidClassDiagramFile", object_=str(r_filename) | ||
) | ||
logger.info(f"{self.i_context} hasMermaidClassDiagramFile {filename}") | ||
return filename | ||
|
||
async def _create_mermaid_class(self, ns_class_name) -> str: | ||
"""Generates a Mermaid class diagram for a specific class using data from the `graph_db` graph repository. | ||
|
||
Args: | ||
ns_class_name (str): The namespace-prefixed name of the class for which the Mermaid class diagram is to be created. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you provide some example here? |
||
|
||
Returns: | ||
str: A Mermaid code block object in markdown representing the class diagram. | ||
""" | ||
fields = split_namespace(ns_class_name) | ||
if len(fields) > 2: | ||
# Ignore sub-class | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why should we ignore sub-class? what's its design purpose? |
||
return | ||
|
||
class_view = ClassView(name=fields[1]) | ||
rows = await graph_db.select(subject=ns_class_name) | ||
for r in rows: | ||
name = split_namespace(r.object_)[-1] | ||
name, visibility, abstraction = RebuildClassView._parse_name(name=name, language="python") | ||
if r.predicate == GraphKeyword.HAS_CLASS_PROPERTY: | ||
var_type = await RebuildClassView._parse_variable_type(r.object_, graph_db) | ||
attribute = ClassAttribute( | ||
name=name, visibility=visibility, abstraction=bool(abstraction), value_type=var_type | ||
) | ||
class_view.attributes.append(attribute) | ||
elif r.predicate == GraphKeyword.HAS_CLASS_FUNCTION: | ||
method = ClassMethod(name=name, visibility=visibility, abstraction=bool(abstraction)) | ||
await RebuildClassView._parse_function_args(method, r.object_, graph_db) | ||
class_view.methods.append(method) | ||
|
||
# update graph db | ||
await graph_db.insert(ns_class_name, GraphKeyword.HAS_CLASS_VIEW, class_view.model_dump_json()) | ||
return "" | ||
|
||
rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_DETAIL) | ||
if not rows: | ||
return "" | ||
dot_class_info = DotClassInfo.model_validate_json(rows[0].object_) | ||
class_view = UMLClassView.load_dot_class_info(dot_class_info) | ||
|
||
# update uml view | ||
await self.graph_db.insert(ns_class_name, GraphKeyword.HAS_CLASS_VIEW, class_view.model_dump_json()) | ||
# update uml isCompositeOf | ||
for c in dot_class_info.compositions: | ||
await self.graph_db.insert( | ||
subject=ns_class_name, | ||
predicate=GraphKeyword.IS + COMPOSITION + GraphKeyword.OF, | ||
object_=concat_namespace("?", c), | ||
) | ||
|
||
# update uml isAggregateOf | ||
for a in dot_class_info.aggregations: | ||
await self.graph_db.insert( | ||
subject=ns_class_name, | ||
predicate=GraphKeyword.IS + AGGREGATION + GraphKeyword.OF, | ||
object_=concat_namespace("?", a), | ||
) | ||
|
||
content = class_view.get_mermaid(align=1) | ||
logger.debug(content) | ||
await file_writer.write(content) | ||
distinct.add(ns_class_name) | ||
return content | ||
|
||
@staticmethod | ||
async def _create_mermaid_relationship(ns_class_name, graph_db, file_writer, distinct): | ||
async def _create_mermaid_relationship(self, ns_class_name: str) -> Tuple[str, Set]: | ||
"""Generates a Mermaid class relationship diagram for a specific class using data from the `graph_db` graph repository. | ||
|
||
Args: | ||
ns_class_name (str): The namespace-prefixed class name for which the Mermaid relationship diagram is to be created. | ||
|
||
Returns: | ||
Tuple[str, Set]: A tuple containing the relationship diagram as a string and a set of deduplication. | ||
""" | ||
s_fields = split_namespace(ns_class_name) | ||
if len(s_fields) > 2: | ||
# Ignore sub-class | ||
return | ||
return None, None | ||
|
||
predicates = {GraphKeyword.IS + v + GraphKeyword.OF: v for v in [GENERALIZATION, COMPOSITION, AGGREGATION]} | ||
mappings = { | ||
|
@@ -109,8 +168,9 @@ async def _create_mermaid_relationship(ns_class_name, graph_db, file_writer, dis | |
AGGREGATION: " o-- ", | ||
} | ||
content = "" | ||
distinct = set() | ||
for p, v in predicates.items(): | ||
rows = await graph_db.select(subject=ns_class_name, predicate=p) | ||
rows = await self.graph_db.select(subject=ns_class_name, predicate=p) | ||
for r in rows: | ||
o_fields = split_namespace(r.object_) | ||
if len(o_fields) > 2: | ||
|
@@ -121,94 +181,37 @@ async def _create_mermaid_relationship(ns_class_name, graph_db, file_writer, dis | |
distinct.add(link) | ||
content += f"\t{link}\n" | ||
|
||
if content: | ||
logger.debug(content) | ||
await file_writer.write(content) | ||
|
||
@staticmethod | ||
def _parse_name(name: str, language="python"): | ||
pattern = re.compile(r"<I>(.*?)<\/I>") | ||
result = re.search(pattern, name) | ||
|
||
abstraction = "" | ||
if result: | ||
name = result.group(1) | ||
abstraction = "*" | ||
if name.startswith("__"): | ||
visibility = "-" | ||
elif name.startswith("_"): | ||
visibility = "#" | ||
else: | ||
visibility = "+" | ||
return name, visibility, abstraction | ||
return content, distinct | ||
|
||
@staticmethod | ||
async def _parse_variable_type(ns_name, graph_db) -> str: | ||
rows = await graph_db.select(subject=ns_name, predicate=GraphKeyword.HAS_TYPE_DESC) | ||
if not rows: | ||
return "" | ||
vals = rows[0].object_.replace("'", "").split(":") | ||
if len(vals) == 1: | ||
return "" | ||
val = vals[-1].strip() | ||
return "" if val == "NoneType" else val + " " | ||
def _diff_path(path_root: Path, package_root: Path) -> (str, str): | ||
"""Returns the difference between the root path and the path information represented in the package name. | ||
|
||
@staticmethod | ||
async def _parse_function_args(method: ClassMethod, ns_name: str, graph_db: GraphRepository): | ||
rows = await graph_db.select(subject=ns_name, predicate=GraphKeyword.HAS_ARGS_DESC) | ||
if not rows: | ||
return | ||
info = rows[0].object_.replace("'", "") | ||
|
||
fs_tag = "(" | ||
ix = info.find(fs_tag) | ||
fe_tag = "):" | ||
eix = info.rfind(fe_tag) | ||
if eix < 0: | ||
fe_tag = ")" | ||
eix = info.rfind(fe_tag) | ||
args_info = info[ix + len(fs_tag) : eix].strip() | ||
method.return_type = info[eix + len(fe_tag) :].strip() | ||
if method.return_type == "None": | ||
method.return_type = "" | ||
if "(" in method.return_type: | ||
method.return_type = method.return_type.replace("(", "Tuple[").replace(")", "]") | ||
|
||
# parse args | ||
if not args_info: | ||
return | ||
splitter_ixs = [] | ||
cost = 0 | ||
for i in range(len(args_info)): | ||
if args_info[i] == "[": | ||
cost += 1 | ||
elif args_info[i] == "]": | ||
cost -= 1 | ||
if args_info[i] == "," and cost == 0: | ||
splitter_ixs.append(i) | ||
splitter_ixs.append(len(args_info)) | ||
args = [] | ||
ix = 0 | ||
for eix in splitter_ixs: | ||
args.append(args_info[ix:eix]) | ||
ix = eix + 1 | ||
for arg in args: | ||
parts = arg.strip().split(":") | ||
if len(parts) == 1: | ||
method.args.append(ClassAttribute(name=parts[0].strip())) | ||
continue | ||
method.args.append(ClassAttribute(name=parts[0].strip(), value_type=parts[-1].strip())) | ||
Args: | ||
path_root (Path): The root path. | ||
package_root (Path): The package root path. | ||
|
||
@staticmethod | ||
def _diff_path(path_root: Path, package_root: Path) -> (str, str): | ||
Returns: | ||
Tuple[str, str]: A tuple containing the representation of the difference ("+", "-", "=") and the path detail of the differing part. | ||
""" | ||
if len(str(path_root)) > len(str(package_root)): | ||
return "+", str(path_root.relative_to(package_root)) | ||
if len(str(path_root)) < len(str(package_root)): | ||
return "-", str(package_root.relative_to(path_root)) | ||
return "=", "." | ||
|
||
@staticmethod | ||
def _align_root(path: str, direction: str, diff_path: str): | ||
def _align_root(path: str, direction: str, diff_path: str) -> str: | ||
"""Aligns the path to the same root represented by `diff_path`. | ||
|
||
Args: | ||
path (str): The path to be aligned. | ||
direction (str): The direction of alignment ('+', '-', '='). | ||
diff_path (str): The path representing the difference. | ||
|
||
Returns: | ||
str: The aligned path. | ||
""" | ||
if direction == "=": | ||
return path | ||
if direction == "+": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It requires very clear annotations to explain its use and scenarios to reduce the cost of understanding.