-
Notifications
You must be signed in to change notification settings - Fork 1
/
collection.xconf
114 lines (114 loc) · 7.24 KB
/
collection.xconf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
<collection xmlns="http://exist-db.org/collection-config/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0">
<index xmlns:xs="http://www.w3.org/2001/XMLSchema">
<!-- Configure lucene full text index -->
<lucene>
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<analyzer id="ws" class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
<module uri="http://www.obdurodon.org/hoax" prefix="hoax" at="modules/functions.xqm"/>
<text qname="tei:body"/>
<text qname="tei:placeName"/>
<text qname="tei:TEI">
<!-- ==================================================== -->
<!-- Word count (field) -->
<!-- -->
<!-- word-count() function counts words (here, in body) -->
<!-- ==================================================== -->
<field
name="word-count"
expression="descendant::tei:body => hoax:word-count()"/>
<!-- ==================================================== -->
<!-- Political reference count (field) -->
<!-- -->
<!-- count() function counts political references -->
<!-- ==================================================== -->
<field
name="pol-count"
expression="descendant::tei:body/descendant::tei:rs[@ref='pol']
=> count()"/>
<!-- ==================================================== -->
<!-- Religious reference count (field) -->
<!-- -->
<!-- count() function counts religious references -->
<!-- ==================================================== -->
<field
name="rel-count"
expression="descendant::tei:body/descendant::tei:rs[@ref='rel']
=> count()"/>
<!-- ==================================================== -->
<!-- Ghost reference count (field) -->
<!-- -->
<!-- count() function counts ghost references -->
<!-- ==================================================== -->
<field
name="ghost-count"
expression="descendant::tei:body/descendant::tei:rs[@ref='ghost']
=> count()"/>
<!-- ==================================================== -->
<!-- Place reference count (field) -->
<!-- -->
<!-- count() function counts place references -->
<!-- ==================================================== -->
<field
name="place-count"
expression="descendant::tei:body/descendant::tei:placeName
=> count()"/>
<!-- ==================================================== -->
<!-- Publisher -->
<!-- facet: "publisher" -->
<!-- field: "formatted-publisher" -->
<!-- -->
<!-- Definite and indefinite articles are stored in @rend -->
<!-- on <bibl>; we append them, e.g., from -->
<!-- <publisher rend="The">Times</publisher> to -->
<!-- "Times, The" -->
<!-- ==================================================== -->
<facet dimension="publisher"
expression="descendant::tei:sourceDesc/descendant::tei:bibl/tei:publisher
! string-join((., @rend), ', ') => normalize-space()"/>
<field name="formatted-publisher"
expression="descendant::tei:sourceDesc/descendant::tei:bibl/tei:publisher
! string-join((@rend, .), ' ') => normalize-space()"/>
<!-- ==================================================== -->
<!-- Article title (field) -->
<!-- -->
<!-- format-title() moves definite and indefinite article -->
<!-- to end, e.g., from "A Ghost" to "Ghost, A" -->
<!-- ==================================================== -->
<field
name="formatted-title"
expression="descendant::tei:titleStmt/tei:title
! hoax:format-title(.)"/>
<!-- ==================================================== -->
<!-- Theme (field) -->
<!-- ==================================================== -->
<field name="theme" expression="descendant::tei:rs/@ref"/>
<!-- ==================================================== -->
<!-- Publication date (facet and field) -->
<!-- -->
<!-- Facet "publication-date" is hierarchical: -->
<!-- decade and then ISO YYYY-MM -->
<!-- Field "formatted-date" is human-readable full date -->
<!-- ==================================================== -->
<!-- hierarchical, decade and then year-month -->
<facet dimension="publication-date"
expression="descendant::tei:sourceDesc/descendant::tei:bibl/tei:date/@when
! (
(substring(., 1, 3) || '0'),
substring(., 1, 7)
)" hierarchical="yes"/>
<field name="formatted-date"
expression="descendant::tei:sourceDesc/descendant::tei:bibl/tei:date/@when
! xs:date(.)
! format-date(., '[MNn] [D], [Y]')"/>
<!-- ==================================================== -->
<!-- Incipit (field) -->
<!-- -->
<!-- First five words of <body>, with trailing ellipsis -->
<!-- ==================================================== -->
<field name="incipit"
expression="string-join((descendant::tei:body ! tokenize(.)[position() lt 6], '…'), ' ')"/>
</text>
</lucene>
</index>
</collection>