Class: Nlg::Extractors::Base
- Inherits:
-
Object
- Object
- Nlg::Extractors::Base
- Defined in:
- lib/bookshark/extractors/nlg/base.rb
Direct Known Subclasses
Instance Attribute Summary collapse
-
#nlg_id ⇒ Object
readonly
Returns the value of attribute nlg_id.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Class Method Summary collapse
Instance Method Summary collapse
-
#decode_text(encoded_text) ⇒ Object
Decodes text with escaped html entities and returns the decoded text.
-
#initialize(id = nil) ⇒ Base
constructor
A new instance of Base.
- #load_page(id = nil) ⇒ Object
- #load_page_by_id(id) ⇒ Object
- #present?(value) ⇒ Boolean
Constructor Details
#initialize(id = nil) ⇒ Base
Returns a new instance of Base.
18 19 20 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 18 def initialize(id=nil) load_page(id) end |
Instance Attribute Details
#nlg_id ⇒ Object (readonly)
Returns the value of attribute nlg_id.
16 17 18 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 16 def nlg_id @nlg_id end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
16 17 18 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 16 def page @page end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
16 17 18 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 16 def url @url end |
Class Method Details
.decode_text(encoded_text) ⇒ Object
70 71 72 73 74 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 70 def self.decode_text(encoded_text) # encoded_text = File.read(encoded_file_path) coder = HTMLEntities.new coder.decode(encoded_text) end |
Instance Method Details
#decode_text(encoded_text) ⇒ Object
Decodes text with escaped html entities and returns the decoded text.
Params:
encoded_text-
the text which contains encoded entities
66 67 68 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 66 def decode_text(encoded_text) self.class.decode_text(encoded_text) end |
#load_page(id = nil) ⇒ Object
22 23 24 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 22 def load_page(id=nil) load_page_by_id(id) unless id.nil? end |
#load_page_by_id(id) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 26 def load_page_by_id(id) begin @nlg_id = id unless id.nil? # id is expected to be the last number. @url = "http://nbib.nlg.gr/Record/#{@nlg_id}/Export?style=MARCXML" pp "Downloading page: #{@url}" Net::HTTP.start("nbib.nlg.gr") do |http| response = http.get("/Record/#{@nlg_id}/Export?style=MARCXML") pp response.content_type pp response.code raise EmptyPageError.new(@url) unless response.content_type == "text/xml" && response.code == "200" @page = response.body end rescue Errno::ENOENT => e pp "Page: #{@url} NOT FOUND." pp e rescue EmptyPageError => e pp "Page: #{@url} is EMPTY." pp e @page = nil rescue OpenURI::HTTPError => e pp e pp e.io.status rescue StandardError => e pp "Generic error #{e.class}. Will wait for 2 minutes and then try again." pp e sleep(120) retry end end |
#present?(value) ⇒ Boolean
76 77 78 |
# File 'lib/bookshark/extractors/nlg/base.rb', line 76 def present?(value) return (not value.nil? and not value.empty?) ? true : false end |