Class: RDig::ContentExtractors::ContentExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/rdig/content_extractors.rb

Overview

Base class for Content Extractors. Extractors inheriting from this class will be auto-discovered and used when can_do returns true

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ ContentExtractor

Returns a new instance of ContentExtractor.



43
44
45
# File 'lib/rdig/content_extractors.rb', line 43

def initialize(config)
  @config = config
end

Class Method Details

.extractor_instancesObject



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rdig/content_extractors.rb', line 23

def self.extractor_instances
  @@extractor_instances ||= extractors.map { |ex_class| 
    RDig.logger.info "initializing content extractor: #{ex_class}"
    ex = nil
    begin
      ex = ex_class.new(RDig.configuration.content_extraction)
    rescue Exception
      RDig.logger.error "error: #{$!.message}\n#{$!.backtrace.join("\n")}"
    end
    ex
  }.compact
end

.extractorsObject



22
# File 'lib/rdig/content_extractors.rb', line 22

def self.extractors; @@extractors ||= [] end

.inherited(extractor) ⇒ Object



17
18
19
20
# File 'lib/rdig/content_extractors.rb', line 17

def self.inherited(extractor)
  super(extractor)
  self.extractors << extractor
end

.process(content, content_type) ⇒ Object



36
37
38
39
40
41
# File 'lib/rdig/content_extractors.rb', line 36

def self.process(content, content_type)
  self.extractor_instances.each { |extractor|
    return extractor.process(content) if extractor.can_do(content_type)
  }
  puts "unable to handle content type #{content_type}"
end

Instance Method Details

#can_do(content_type) ⇒ Object



47
48
49
# File 'lib/rdig/content_extractors.rb', line 47

def can_do(content_type)
  @pattern && content_type =~ @pattern
end