Class: Biblionet::Extractors::AuthorExtractor

Inherits:
Base
  • Object
show all
Defined in:
lib/bookshark/extractors/author_extractor.rb

Instance Attribute Summary collapse

Attributes inherited from Base

#biblionet_id, #filepath, #page, #url

Instance Method Summary collapse

Methods inherited from Base

#decode_text, decode_text, #load_page, #load_page_from_file, #load_page_from_url, #present?, #save_page

Methods included from FileManager

#list_directories, #list_files, #save_to

Constructor Details

#initialize(uri = nil) ⇒ AuthorExtractor

Returns a new instance of AuthorExtractor.



12
13
14
15
# File 'lib/bookshark/extractors/author_extractor.rb', line 12

def initialize(uri=nil)
  super(uri)        
  extract_author unless uri.nil? or @page.nil?        
end

Instance Attribute Details

#authorObject (readonly)

Returns the value of attribute author.



10
11
12
# File 'lib/bookshark/extractors/author_extractor.rb', line 10

def author
  @author
end

Instance Method Details

#extract_author(biblionet_id = @biblionet_id, author_page = @page) ⇒ Object

def to_json_pretty

JSON.pretty_generate(@author) unless @author.nil?

end



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bookshark/extractors/author_extractor.rb', line 27

def extract_author(biblionet_id=@biblionet_id, author_page=@page)
  puts "Extracting author: #{biblionet_id}"
  page = AuthorDataExtractor.new(author_page)
  
  identity = split_name(page.fullname)

  author_hash = {}
  if present?(identity[:lastname]) and present?(identity[:firstname]) 
    author_hash[:name] = identity[:lastname] + ', ' + identity[:firstname] 
  elsif 
    author_hash[:name] = identity[:lastname]
  end      
  author_hash[:firstname] = identity[:firstname]
  author_hash[:lastname] = identity[:lastname]
  author_hash[:extra_info] = identity[:extra_info]
  author_hash[:image] = page.image
  author_hash[:bio] = page.bio
  author_hash[:award] = page.awards
  author_hash[:b_id] = biblionet_id

  # puts JSON.pretty_generate(author_hash)

  if author_hash[:lastname].nil? and author_hash[:firstname].nil?
    return nil
  else
    return @author = author_hash
  end

end

#load_and_extract_author(uri = nil) ⇒ Object



18
19
20
21
# File 'lib/bookshark/extractors/author_extractor.rb', line 18

def load_and_extract_author(uri=nil)
  load_page(uri)
  extract_author unless uri.nil? or @page.nil?
end

#split_name(fullname) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/bookshark/extractors/author_extractor.rb', line 57

def split_name(fullname)
  #mathes digits-digits or digits- in text like: Tolkien, John Ronald Reuel, 1892-1973
  years_re = /\d+-\d*/

  parts = fullname.split(',').map(&:strip)  

  identity = {}
  identity[:lastname] = parts[0]
  
  if parts.length == 2
    if parts[1] =~ years_re
      identity[:extra_info] = parts[1]
    else
      identity[:firstname] = parts[1]
    end
  elsif parts.length == 3
    identity[:firstname] = parts[1]
    identity[:extra_info] = parts[2]
  end

  return identity

end