Class: SemExtractor::Calais

Inherits:
SemExtractor show all
Defined in:
lib/apis/calais.rb

Instance Attribute Summary

Attributes inherited from SemExtractor

#api_key, #categories, #context, #geos, #terms

Instance Method Summary collapse

Methods inherited from SemExtractor

#set

Constructor Details

#initialize(options = {}) ⇒ Calais

Returns a new instance of Calais.



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/apis/calais.rb', line 4

def initialize(options={})
 self.set(options)
  Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|  
    node_type = h.xpath('rdf:type').first['resource']
    if node_type.include?('/type/cat/')
      @categories << { "name" => sanitize(h.xpath('c:categoryName')), "score"=> sanitize(h.xpath('c:score'))}
    elsif node_type.include?('/type/em/')
      @terms << { "name" => sanitize(h.xpath('c:name')), "score" => nil, "nationality" => sanitize(h.xpath('c:nationality')) }
    elsif node_type.include?('/type/sys/InstanceInfo/')
      #nothing to do, no info to take
    elsif node_type.include?('/type/sys/RelevanceInfo/')
      # I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
      @terms.last["score"] = sanitize(h.xpath('c:relevance'))
    elsif node_type.include?('/type/er/Geo/')
      @geos << { "name" => sanitize(h.xpath('c:name')) } 
    end
    }
end

Instance Method Details

#post_paramsObject



27
28
29
30
31
32
# File 'lib/apis/calais.rb', line 27

def post_params
  {
    'licenseID' => @api_key,
    'content'   => @context
  }
end

#uriObject



23
24
25
# File 'lib/apis/calais.rb', line 23

def uri
  URI.parse(gateway + '?' + URI.escape(post_params.collect{ |k, v| "#{k}=#{v}" }.join('&')))
end