-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarize_results.py
83 lines (74 loc) · 3.08 KB
/
summarize_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#! python3
import os
from pathlib import Path
from collections import Counter
from glob import glob
from typing import Tuple
def process_file(path: Path, response_prefixes: Counter[str]):
responses = []
with open(path, "r") as f:
lines = f.readlines()
current_response = ""
for line in lines:
if "##" in line:
if current_response:
responses.append(current_response)
current_response = ""
elif not current_response and "vulnerab" not in line.lower():
continue
else:
current_response += line
responses.append(current_response)
for response in responses:
prefix = response.split("\n")[0]
prefix = prefix.split(":")[0]
if not prefix and not response.strip():
print(response)
response_prefixes[prefix.lower()] += 1
def count_number_of_bugs(path: Path, bug_counts: Counter[str]):
with open(path, "r") as f:
lines = f.readlines()
number_prefixes = [f"{i}. " for i in range(15)]
number_prefixes.append("-")
for line in lines:
for prefix in number_prefixes:
if line.startswith(prefix):
short_path = "/".join(path.parts[-2:])
bug_counts[short_path] += 1
def summarize_file_types() -> Tuple[int, str]:
files = glob("./**/*", recursive=True)
files = [Path(f).resolve() for f in files]
suffixes = [f.suffix for f in files if ".git" not in str(
f) and f.is_file()]
files = Counter(suffixes)
del files[".md"]
del files[""]
num_code_files = sum(files.values())
file_frequency = str(files).removeprefix("Counter(")[:-1]
return num_code_files, file_frequency
if __name__ == "__main__":
code_dir = "/Users/chriskoch/projects/vulnerable_code_gpt3_analyzer"
files = os.listdir(code_dir)
files = [(code_dir / Path(f)).resolve().rglob("README.md") for f in files]
files = [f2 for f1 in files for f2 in f1]
response_prefixes = Counter()
bug_counts = Counter()
for path in files:
count_number_of_bugs(path, bug_counts)
process_file(path, response_prefixes)
total_bugs = sum(bug_counts.values())
num_files_scanned = sum(response_prefixes.values())
num_code_files, file_frequency = summarize_file_types()
vulnerable_files = num_code_files - \
response_prefixes["no vulnerabilities detected."]
response_prefixes["no response given"] += num_code_files - \
num_files_scanned
response_prefixes = str(response_prefixes).removeprefix("Counter(")[:-1]
print(
f"Vulnerabilities detected in {vulnerable_files} / {num_code_files} files.")
print(f"Detected {total_bugs} vulnerabilities in total.\n")
print(f"Frequency of cleaned GPT prompt response types (one response per file scanned):")
print(response_prefixes, "\n")
print("Distribution of file types in this repo: ")
print(f"{num_code_files} files of code in total (excluding markdown and flatfiles)")
print(file_frequency)