Class: ContentMapping

Inherits:
Object
  • Object
show all
Defined in:
lib/content_scrapper/content_mapping.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeContentMapping

Returns a new instance of ContentMapping.



7
8
9
# File 'lib/content_scrapper/content_mapping.rb', line 7

def initialize
  @content_xpaths_list = []
end

Instance Attribute Details

#content_xpaths_listObject (readonly)

Returns the value of attribute content_xpaths_list.



5
6
7
# File 'lib/content_scrapper/content_mapping.rb', line 5

def content_xpaths_list
  @content_xpaths_list
end

#iconv_fromObject (readonly)

Returns the value of attribute iconv_from.



5
6
7
# File 'lib/content_scrapper/content_mapping.rb', line 5

def iconv_from
  @iconv_from
end

#iconv_toObject (readonly)

Returns the value of attribute iconv_to.



5
6
7
# File 'lib/content_scrapper/content_mapping.rb', line 5

def iconv_to
  @iconv_to
end

#url_pattern_regexpObject (readonly)

Returns the value of attribute url_pattern_regexp.



5
6
7
# File 'lib/content_scrapper/content_mapping.rb', line 5

def url_pattern_regexp
  @url_pattern_regexp
end

Instance Method Details

#content_at(content_xpath) ⇒ Object



15
16
17
# File 'lib/content_scrapper/content_mapping.rb', line 15

def content_at(content_xpath)
  @content_xpaths_list << content_xpath
end

#iconv(args) ⇒ Object



19
20
21
22
# File 'lib/content_scrapper/content_mapping.rb', line 19

def iconv(args)
  @iconv_from = args[:from]
  @iconv_to = args[:to]
end

#matches_url?(url) ⇒ Boolean

Returns:

  • (Boolean)


24
25
26
# File 'lib/content_scrapper/content_mapping.rb', line 24

def matches_url?(url)
  url =~ @url_pattern_regexp
end

#scrap_content(doc, content_scrapper = nil) ⇒ Object



28
29
30
31
32
33
34
35
36
37
# File 'lib/content_scrapper/content_mapping.rb', line 28

def scrap_content(doc, content_scrapper = nil)
  @content_xpaths_list.each do |content_xpath|
    content_section = doc.xpath(content_xpath)
    content = content_section.to_a.join("\n")
    content = content_scrapper.clean_content(content) unless content_scrapper.nil?
    content = Iconv.conv(iconv_to, iconv_from, content) unless iconv_to.nil?
    return content if content_section.count > 0
  end
  nil
end

#url_pattern(pattern) ⇒ Object



11
12
13
# File 'lib/content_scrapper/content_mapping.rb', line 11

def url_pattern(pattern)
  @url_pattern_regexp = pattern
end