17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
# File 'lib/logstash/filters/ebscohost.rb', line 17
def Ebscohost.parse (input)
uri = URI(URI.unescape(input))
path = uri.path
params = {}
if (uri.query)
params = CGI::parse(uri.query)
end
data = {
"provider" => "ebscohost"
}
if ((match = /^\/(ehost|eds)\/([a-z]+)(?:\/[a-z]+)?$/i.match(path)))
category = match[2].downcase
if (match[1].downcase == 'eds')
data['platform_name'] = 'EBSCO Discovery Service'
end
case (category)
when 'results', 'resultsadvanced'
data['rtype'] = 'TOC'
data['mime'] = 'MISC'
when 'ebookviewer'
data['rtype'] = 'BOOK'
data['mime'] = 'PDF'
when 'pdfviewer'
data['rtype'] = 'ARTICLE'
data['mime'] = 'PDF'
when 'search'
data['rtype'] = 'SEARCH'
data['mime'] = 'MISC'
when 'detail'
data['rtype'] = 'REF'
data['mime'] = 'HTML'
if (uri.fragment)
hashedUrl = uri.fragment
query = CGI::parse(hashedUrl)
if (query.key?('AN'))
data['unit_id'] = query['AN'][0]
end
end
end
elsif ((match = /^\/pdf[a-z0-9_]*\/pdf\/\S+\/([a-z0-9]+)\.pdf$/i.match(path)))
data['rtype'] = 'ARTICLE'
data['mime'] = 'PDF'
data['unit_id'] = match[1]
elsif (path.downcase === '/contentserver.asp')
data['rtype'] = 'ARTICLE'
data['mime'] = 'PDF'
if (params.key?('K'))
data['unit_id'] = params['K'][0]
end
elsif (path.downcase == '/openurl')
data['rtype'] = 'OPENURL'
data['mime'] = 'HTML'
params.each do |key, value|
if (@openUrlFields.key?(key))
data[@openUrlFields[key]] = value[0]
end
end
if (params.key?('pages'))
pagesMatch = /^(\d+)-(\d+)$/.match(params['pages'][0]);
if (pagesMatch)
data['first_page'] = pagesMatch[1]
data['last_page'] = pagesMatch[2]
end
end
if (data['unit_id'] && data['unit_id'].downcase.start_with?('doi:'))
data['doi'] = data['unit_id'] = data['unit_id'][4..-1]
end
end
return data
end
|