Top Level Namespace
Defined Under Namespace
Modules: Configuration, Textminer
Classes: Array, Hash
Instance Method Summary
collapse
Instance Method Details
#detect_type(x) ⇒ Object
4
5
6
7
8
9
10
11
12
13
14
|
# File 'lib/textminer/mine_utils.rb', line 4
def detect_type(x)
ctype = x.['content-type']
case ctype
when 'text/xml'
'xml'
when 'text/plain'
'plain'
when 'application/pdf'
'pdf'
end
end
|
#is_elsevier_wiley(x) ⇒ Object
62
63
64
65
|
# File 'lib/textminer/mine_utils.rb', line 62
def is_elsevier_wiley(x)
tmp = x.match 'elsevier|wiley'
!tmp.nil?
end
|
#make_ext(x) ⇒ Object
16
17
18
19
20
21
22
23
24
25
|
# File 'lib/textminer/mine_utils.rb', line 16
def make_ext(x)
case x
when 'xml'
'xml'
when 'plain'
'txt'
when 'pdf'
'pdf'
end
end
|
#make_path(type) ⇒ Object
27
28
29
30
31
32
33
34
35
|
# File 'lib/textminer/mine_utils.rb', line 27
def make_path(type)
type = make_ext(type)
uuid = UUIDTools::UUID.random_create.to_s
path = uuid + '.' + type
return path
end
|
#parse_links(x, just_urls) ⇒ Object
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# File 'lib/textminer/link_methods_hash.rb', line 50
def parse_links(x, just_urls)
if x.nil?
return nil
else
if x.empty?
return x
else
if just_urls
if x[0].class != Array
return x.collect { |x| x['URL'] }.flatten
else
return x.collect { |x| x.collect { |z| z['URL'] }}.flatten
end
else
return x
end
end
end
end
|
#parse_pdf(x) ⇒ Object
58
59
60
|
# File 'lib/textminer/mine_utils.rb', line 58
def parse_pdf(x)
return Textminer.(x)
end
|
#parse_plain(x) ⇒ Object
53
54
55
56
|
# File 'lib/textminer/mine_utils.rb', line 53
def parse_plain(x)
text = read_disk(x)
return text
end
|
#parse_xml(x) ⇒ Object
47
48
49
50
51
|
# File 'lib/textminer/mine_utils.rb', line 47
def parse_xml(x)
text = read_disk(x)
xml = Nokogiri.parse(text)
return xml
end
|
#pull_links(x, y) ⇒ Object
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'lib/textminer/link_methods_hash.rb', line 37
def pull_links(x, y)
if x['message']['items'].nil?
tmp = self['message']['link']
if tmp.nil?
return nil
else
return tmp.select { |z| z['content-type'].match(/#{y}/) }.reject { |c| c.empty? }
end
else
return x['message']['items'].collect { |x| x['link'].select { |z| z['content-type'].match(/#{y}/) } }.reject { |c| c.empty? }
end
end
|
#read_disk(path) ⇒ Object
43
44
45
|
# File 'lib/textminer/mine_utils.rb', line 43
def read_disk(path)
return File.read(path)
end
|
#singlearray2hash(x) ⇒ Object
1
2
3
4
5
6
7
|
# File 'lib/textminer/tmutils.rb', line 1
def singlearray2hash(x)
if x.length == 1 && x.class == Array
return x[0]
else
return x
end
end
|
#write_disk(res, path) ⇒ Object
37
38
39
40
41
|
# File 'lib/textminer/mine_utils.rb', line 37
def write_disk(res, path)
f = File.new(path, "wb")
f.write(res.body)
f.close()
end
|