-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdcat-to-htmltable.js
127 lines (114 loc) · 5.47 KB
/
dcat-to-htmltable.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
const N3 = require('n3');
const fs = require('fs');
// Extract command-line arguments
const args = process.argv.slice(2);
const metadataFilePath = args[0];
const datasetSubjectURI = args[1];
// Read the DCAT TTL file
const ttlData = fs.readFileSync(metadataFilePath, 'utf-8');
// Initialize N3 parser
const parser = new N3.Parser();
// Parse the TTL data
const store = new N3.Store();
parser.parse(ttlData, (error, quad, prefixes) => {
if (error) {
console.error('Error parsing TTL data:', error);
return;
}
// Add quad to store
if (quad)
store.addQuad(quad);
else {
// Initialize data structures
const datasetInfo = {};
const distributions = [];
// Process the parsed data
store.getQuads(null, null, null).forEach(quad => {
const subject = quad.subject.value;
const predicate = quad.predicate.value;
const object = quad.object;
// Check if object is a blank node
if (object.termType === 'BlankNode') {
// Create nested table for blank node
if (predicate === 'http://www.w3.org/ns/dcat#distribution') {
distributions[object.value] = distributions[object.value] || {};
} else {
datasetInfo[predicate] = datasetInfo[predicate] || {};
datasetInfo[predicate][object.value] = {};
}
store.getQuads(object, null, null).forEach(innerQuad => {
if (predicate === 'http://www.w3.org/ns/dcat#distribution') {
distributions[object.value][innerQuad.predicate.value] = innerQuad.object.value;
} else {
datasetInfo[predicate][object.value][innerQuad.predicate.value] = innerQuad.object.value;
}
});
} else {
// Extract dataset information for the specified dataset subject URI
if (subject === datasetSubjectURI) {
datasetInfo[predicate] = datasetInfo[predicate] || [];
datasetInfo[predicate].push(object.value);
}
// Extract distribution information
if (predicate === 'http://www.w3.org/ns/dcat#distribution') {
const distribution = {};
distributions.push(distribution);
if (object.termType === 'NamedNode') {
distribution['distributionURI'] = object.value;
}
}
}
});
// Generate HTML table for dataset information
let html = '<h2>Dataset Information</h2><table border="1" itemscope itemtype="http://schema.org/Dataset">';
Object.keys(datasetInfo).forEach(key => {
const label = extractLabel(key);
html += `<tr><td><a href="${key}" target="_blank">${label}</a></td><td itemprop="${key}">`;
if (typeof datasetInfo[key] === 'object' && !Array.isArray(datasetInfo[key])) {
Object.keys(datasetInfo[key]).forEach(subKey => {
//html += `<h3>${extractLabel(subKey)}</h3>`;
html += `<table border="1" itemscope itemtype="http://schema.org/PropertyValue">`;
Object.keys(datasetInfo[key][subKey]).forEach(subSubKey => {
html += `<tr itemprop="${subSubKey}"><td>${extractLabel(subSubKey)}</td><td>${formatValue(subSubKey, datasetInfo[key][subKey][subSubKey])}</td></tr>`;
});
html += `</table>`;
});
} else {
html += Array.isArray(datasetInfo[key]) ? formatValue(key, datasetInfo[key].join(', ')) : formatValue(key, datasetInfo[key]);
}
html += `</td></tr>`;
});
html += '</table>';
const distributionsArray = Object.keys(distributions).map(key => distributions[key]);
// Generate HTML table for distribution information
html += '<h2>Distributions</h2>';
// Iterate over the distributions array
distributionsArray.forEach((distribution, index) => {
html += `<h3>Distribution ${index + 1}</h3><table border="1" itemscope itemtype="http://schema.org/DataDownload">`;
Object.keys(distribution).forEach(key => {
const label = extractLabel(key);
html += `<tr><td><a href="${key}" target="_blank">${label}</a></td><td itemprop="${key}">${formatValue(key, distribution[key])}</td></tr>`;
});
html += '</table>';
});
// Write the HTML to a file or display it in the console
fs.writeFileSync('dataset_info.html', html);
console.log('HTML table generated successfully.');
}
});
// Function to extract label from URI
function extractLabel(uri) {
const parts = uri.split(/[#/]/);
return parts[parts.length - 1].replace(/^[a-z]/, match => match.toUpperCase());
}
// Function to format value
function formatValue(predicate, value) {
if (predicate == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") {
label = extractLabel(value);
return `<a href="${value}" target="_blank">${label}</a>`;
}
if (typeof value === 'string' && value.startsWith('http')) {
return `<a href="${value}" target="_blank">${value}</a>`;
}
return value;
}