forked from Sensez/WS_GameSales
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdf_parser.py
37 lines (32 loc) · 1.16 KB
/
rdf_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import csv
baseEntity = "http://www.games.com/entity/"
baseProperty = "http://www.games.com/pred/"
triples = []
publishers = []
platforms = []
filename = "clean_data/" + input("Filename: ")
file_in = open(filename, 'r', encoding='utf-8')
reader = csv.reader(file_in)
for sub, pred, obj in reader:
if pred == 'Publisher':
publishers.append(obj)
elif pred == 'Platform':
platforms.append(obj)
triples.append((sub, pred, obj))
file_in.close()
file_platforms = open('clean_data/platforms_info.csv', 'r', encoding='utf-8')
platform_reader = csv.reader(file_platforms)
for sub, pred, obj in platform_reader:
triples.append((sub, pred, obj))
file_platforms.close()
file_out = open('games.nt', 'w')
for sub, pred, obj in triples:
uri_sub = '<' + baseEntity + str(sub).lower().replace(' ', '_') + '>'
uri_pred = '<' + baseProperty + str(pred).lower().replace(' ', '_') + '>'
if obj in platforms or obj in publishers:
uri_obj = '<' + baseEntity + str(obj).lower().replace(' ', '_') + '>'
else:
uri_obj = '"' + obj + '"'
file_out.write('{} {} {} .\n'.format(uri_sub, uri_pred, uri_obj))
file_out.close()
print("games.nt created")