Class: Webtractor::Extractor
- Inherits:
-
Object
- Object
- Webtractor::Extractor
- Defined in:
- lib/webtractor/extractor.rb
Instance Attribute Summary collapse
-
#filters ⇒ Object
Returns the value of attribute filters.
Instance Method Summary collapse
- #add_filter(filter) ⇒ Object
- #clear_filters ⇒ Object
- #extract(text) ⇒ Object
- #extract_from_url(url) ⇒ Object
- #extract_from_xml(page) ⇒ Object
-
#initialize(params = {}) ⇒ Extractor
constructor
A new instance of Extractor.
- #remove_filter(filter) ⇒ Object
Constructor Details
#initialize(params = {}) ⇒ Extractor
Returns a new instance of Extractor.
5 6 7 8 9 |
# File 'lib/webtractor/extractor.rb', line 5 def initialize params={} @filters = params[:filters] || [Filters::DefaultFilter.new] @cache = params[:cache] || false @cache_params = params[:cache_params] || {} end |
Instance Attribute Details
#filters ⇒ Object
Returns the value of attribute filters.
3 4 5 |
# File 'lib/webtractor/extractor.rb', line 3 def filters @filters end |
Instance Method Details
#add_filter(filter) ⇒ Object
30 31 32 33 34 35 36 |
# File 'lib/webtractor/extractor.rb', line 30 def add_filter filter if filter.is_a?(Class) @filters << filter.new else @filters << filter end end |
#clear_filters ⇒ Object
43 44 45 |
# File 'lib/webtractor/extractor.rb', line 43 def clear_filters @filters.clear end |
#extract(text) ⇒ Object
11 12 13 |
# File 'lib/webtractor/extractor.rb', line 11 def extract text extract_from_xml(Nokogiri::HTML(text)) end |
#extract_from_url(url) ⇒ Object
23 24 25 26 27 28 |
# File 'lib/webtractor/extractor.rb', line 23 def extract_from_url url content = Cachy.cache_if(@cache, "webtractor.#{url}", @cache_params) do open(url).read end extract(content) end |
#extract_from_xml(page) ⇒ Object
15 16 17 18 19 20 21 |
# File 'lib/webtractor/extractor.rb', line 15 def extract_from_xml page title = page.xpath('//head/title').text @filters.each do |filter| page = filter.process(page) end Result.new(title, page) end |
#remove_filter(filter) ⇒ Object
38 39 40 41 |
# File 'lib/webtractor/extractor.rb', line 38 def remove_filter filter filter = filter.class unless filter.is_a?(Class) @filters = @filters.reject!{|f| f.is_a?(filter)} end |