Top Level Namespace

Defined Under Namespace

Modules: Configuration, Textminer Classes: Array, Hash

Instance Method Summary collapse

Instance Method Details

#detect_type(x) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
# File 'lib/textminer/mine_utils.rb', line 4

def detect_type(x)
  ctype = x.headers['content-type']
  case ctype
  when 'text/xml'
    'xml'
  when 'text/plain'
    'plain'
  when 'application/pdf'
    'pdf'
  end
end

#is_elsevier_wiley(x) ⇒ Object



62
63
64
65
# File 'lib/textminer/mine_utils.rb', line 62

def is_elsevier_wiley(x)
  tmp = x.match 'elsevier|wiley'
  !tmp.nil?
end

#make_ext(x) ⇒ Object



16
17
18
19
20
21
22
23
24
25
# File 'lib/textminer/mine_utils.rb', line 16

def make_ext(x)
  case x
  when 'xml'
    'xml'
  when 'plain'
    'txt'
  when 'pdf'
    'pdf'
  end
end

#make_path(type) ⇒ Object



27
28
29
30
31
32
33
34
35
# File 'lib/textminer/mine_utils.rb', line 27

def make_path(type)
  # id = x.split('article/')[1].split('?')[0]
  # path = id + '.' + type
  # return path
  type = make_ext(type)
  uuid = UUIDTools::UUID.random_create.to_s
  path = uuid + '.' + type
  return path
end


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/textminer/link_methods_hash.rb', line 50

def parse_links(x, just_urls)
  if x.nil?
    return nil
  else
  	if x.empty?
  		return x
  	else
    	if just_urls
        if x[0].class != Array
          # return x[0]['URL']
          return x.collect { |x| x['URL'] }.flatten
        else
          return x.collect { |x| x.collect { |z| z['URL'] }}.flatten
          # return x.collect { |x| x['URL'] }.flatten.compact
      		# return x.collect { |x| x.collect { |z| z['URL'] }}.flatten
        end
    	else
    		return x
    	end
    end
  end
end

#parse_pdf(x) ⇒ Object



58
59
60
# File 'lib/textminer/mine_utils.rb', line 58

def parse_pdf(x)
  return Textminer.extract(x)
end

#parse_plain(x) ⇒ Object



53
54
55
56
# File 'lib/textminer/mine_utils.rb', line 53

def parse_plain(x)
  text = read_disk(x)
  return text
end

#parse_xml(x) ⇒ Object



47
48
49
50
51
# File 'lib/textminer/mine_utils.rb', line 47

def parse_xml(x)
  text = read_disk(x)
  xml = Nokogiri.parse(text)
  return xml
end


37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/textminer/link_methods_hash.rb', line 37

def pull_links(x, y)
  if x['message']['items'].nil?
    tmp = self['message']['link']
    if tmp.nil?
      return nil
    else
      return tmp.select { |z| z['content-type'].match(/#{y}/) }.reject { |c| c.empty? }
    end
  else
    return x['message']['items'].collect { |x| x['link'].select { |z| z['content-type'].match(/#{y}/) } }.reject { |c| c.empty? }
  end
end

#read_disk(path) ⇒ Object



43
44
45
# File 'lib/textminer/mine_utils.rb', line 43

def read_disk(path)
  return File.read(path)
end

#singlearray2hash(x) ⇒ Object



1
2
3
4
5
6
7
# File 'lib/textminer/tmutils.rb', line 1

def singlearray2hash(x)
  if x.length == 1 && x.class == Array
    return x[0]
  else
    return x
  end
end

#write_disk(res, path) ⇒ Object



37
38
39
40
41
# File 'lib/textminer/mine_utils.rb', line 37

def write_disk(res, path)
  f = File.new(path, "wb")
  f.write(res.body)
  f.close()
end