Class: FreeScrape::Item

Inherits:
Object
  • Object
show all
Defined in:
lib/free_scrape/item.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Item

Creates a new Item object with the specified name and the given options.



25
26
27
28
29
30
31
# File 'lib/free_scrape/item.rb', line 25

def initialize(url,options={})
  @url = url
  @name = options[:name]
  @summary = options[:summary]

  @categories = {}
end

Instance Attribute Details

#categoriesObject (readonly)

Categories the item is in



19
20
21
# File 'lib/free_scrape/item.rb', line 19

def categories
  @categories
end

#nameObject

Name of the item



13
14
15
# File 'lib/free_scrape/item.rb', line 13

def name
  @name
end

#summaryObject

Description of the item



16
17
18
# File 'lib/free_scrape/item.rb', line 16

def summary
  @summary
end

#urlObject (readonly)

URL of the item



10
11
12
# File 'lib/free_scrape/item.rb', line 10

def url
  @url
end

Class Method Details

.from(descriptor) ⇒ Object

Returns the Item object with the specified descriptor, which can be either a URI to freebase.com, an Item GUID or an Item name.



37
38
39
40
41
42
43
44
45
46
47
# File 'lib/free_scrape/item.rb', line 37

def Item.from(descriptor)
  descriptor = descriptor.to_s

  if descriptor =~ /^[0-9a-f]+$/
    return Item.guid(descriptor)
  elsif descriptor =~ /^http(s)?:\/\/(www\.)?freebase.com\/view/
    return Item.from_url(descriptor)
  else
    return Item.named(descriptor)
  end
end

.from_url(url) ⇒ Object

Creates the Item at the specified url.

Item.from_url('http://www.freebase.com/view/guid/9202a8c04000641f800000000301146f')
# => #<FreeScrape::Item:0xb73fdba0 ...>


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/free_scrape/item.rb', line 73

def Item.from_url(url)
  url = URI(url.to_s)
  page = FreeScrape.open_page(url)
  new_item = Item.new(url)

  content = page.at('#content_main')

  new_item.name = content.at('#title//h1').inner_text.strip
  new_item.summary = content.at('#title/div.article-container/div.article').inner_html.strip

  extract_value = lambda { |elem|
    if (item_link = elem.at('a.pv'))
      link_url = new_item.url.merge(item_link['href'])

      ItemLink.new(item_link.inner_text.strip, link_url)
    elsif elem.at('a.detail-view').nil?
      text = elem.inner_text.strip

      if text.empty?
        nil
      else
        text
      end
    end
  }

  content.search('div.domainsboxes//div.domainbox//div.typebox-container') do |domainbox|
    category_name = domainbox.at('//div.typebox-column-title/a').inner_text
    new_category = Category.new(category_name)

    domainbox.search('//div.prop-typebox') do |field|
      field_name = field.at('//span.prop-title').inner_text
      field_content = field.at('//div.prop-content')

      field_value = nil

      if (table = field_content.at('table.prop-table'))
        field_value = []

        column_names = table.search('tr/th/div.prop-table-cell').map do |div|
          div.inner_text.strip
        end

        table.search('tr[td]') do |row|
          field_row = {}
          index = 0

          row.search('td') do |cell|
            if (value = extract_value.call(cell))
              field_row[column_names[index]] = value
            end

            index += 1
          end

          field_value << field_row unless field_row.empty?
        end
      elsif (list = field_content.at('ul.prop-list'))
        field_value = []

        list.search('li.prop-list-item') do |list_item|
          if (value = extract_value.call(list_item))
            field_value << value
          end
        end
      end

      new_category.[field_name] = field_value
    end

    new_item.categories[new_category.name] = new_category
  end

  return new_item
end

.guid(guid) ⇒ Object

Returns the Item object with the specified guid.



63
64
65
# File 'lib/free_scrape/item.rb', line 63

def Item.guid(guid)
  Item.from_url("http://www.freebase.com/view/guid/#{guid}")
end

.named(name) ⇒ Object

Returns the Item object with the specified name.



52
53
54
55
56
57
58
# File 'lib/free_scrape/item.rb', line 52

def Item.named(name)
  name = name.split(' ').map { |word|
    word.downcase
  }.join('_')

  return Item.from_url("http://www.freebase.com/view/#{FreeScrape.language}/#{name}")
end

Instance Method Details

#[](name) ⇒ Object

Returns the Category with the specified name of the item.



159
160
161
# File 'lib/free_scrape/item.rb', line 159

def [](name)
  @categories[name]
end

#category_namesObject

Returns the category names of the item.



152
153
154
# File 'lib/free_scrape/item.rb', line 152

def category_names
  @categories.keys
end

#to_sObject

Returns the name of the item.



166
167
168
# File 'lib/free_scrape/item.rb', line 166

def to_s
  @name.to_s
end