-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrepo_topology_match.py
141 lines (127 loc) · 4.62 KB
/
repo_topology_match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from collections import defaultdict
from dataclasses import dataclass
from os.path import isdir
from statistics import mean, pstdev
from typing import List, Set
from click import command, option
import networkx as nx
from prettytable import PrettyTable
from tqdm import tqdm
from neo4j import GraphDatabase
import matplotlib.pyplot as plt
from random import sample
from sys import path
from os import scandir, remove
path.append("..")
from data_scripts.helpers import generate_image, fetch_path, to_json
@dataclass(repr=True)
class MTOCStatistics:
in_topology: Set
candidates: Set
matches: int
graph_nodes: int
@command()
@option("--cypher", "cypher_path")
@option("--type", "node_type")
@option("--status", "node_status")
@option("--latex", "latex", is_flag=True, default=False)
@option("--integrating", "integrating", is_flag=True, default=False)
def main(
cypher_path: str, node_type: str, node_status: str, latex: bool, integrating: bool
):
command = open(cypher_path, "r").read()
def run_command(tx):
result = tx.run(command)
records = list(result)
summary = result.consume()
return records, summary
with open("generate_neo4j_images/password", "r") as x:
password = x.readline().strip()
db = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", password))
session = db.session(database="neo4j")
records, _ = session.execute_read(run_command)
repo_to_matches_map = {}
for record in tqdm(records, total=len(records), leave=False):
cypher_nodes = record.get("nodes")
repo = cypher_nodes[0]._properties["repository"]
cypher_node_nums = list(
map(lambda x: x._properties["number"], record.get("nodes"))
)
graph = to_json(f"data/graph_{repo.replace('/','-')}.json")
candidates = list(
filter(lambda x: x["connected_component_size"][0] > 2, graph["nodes"])
)
if repo not in repo_to_matches_map:
repo_to_matches_map[repo] = MTOCStatistics(set(), set(), 0, len(candidates))
repo_to_matches_map[repo].in_topology.update(
list(
map(
lambda x: x["id"],
filter(
lambda x: x["id"] in cypher_node_nums,
candidates,
),
)
)
)
repo_to_matches_map[repo].matches += 1
for record in tqdm(records, total=len(records), leave=False):
cypher_nodes = record.get("nodes")
repo = cypher_nodes[0]._properties["repository"]
graph = to_json(f"data/graph_{repo.replace('/','-')}.json")
candidates = list(
filter(lambda x: x["connected_component_size"][0] > 2, graph["nodes"])
)
repo_to_matches_map[repo].candidates.update(
list(
map(
lambda x: x["id"],
filter(
lambda x: x["type"] == node_type
and x["status"] == node_status
or (
integrating
and (
(x["type"] == "issue" and x["status"] == "closed")
or (
x["type"] == "pull_request"
and x["status"] == "merged"
)
)
),
candidates,
),
)
)
)
repo_to_matches_map = dict(
sorted(
repo_to_matches_map.items(),
key=lambda x: len(x[1].candidates.difference(x[1].in_topology))
/ len(x[1].candidates),
reverse=True,
)
)
table = PrettyTable()
table.field_names = ["Repository", "MTRO", "Matches"]
for repo, mtoc in repo_to_matches_map.items():
table.add_row(
[
repo,
f"{len(mtoc.candidates.difference(mtoc.in_topology)) / len(mtoc.candidates):.2%}",
mtoc.matches,
]
)
print(table)
if latex:
print(table.get_latex_string().replace("%", "\\%"))
print(
f"Mean MTRO: {mean(list(map(lambda x: len(x[1].candidates.difference(x[1].in_topology)) / len(x[1].candidates), repo_to_matches_map.items()))):.2%}"
)
print(
f"STDev MTRO: {pstdev(list(map(lambda x: len(x[1].candidates.difference(x[1].in_topology)) / len(x[1].candidates), repo_to_matches_map.items()))):.2%}"
)
session.close()
db.close()
if __name__ == "__main__":
main()