Module: Baiduserp::Helper

Defined in:
lib/baiduserp/helper.rb

Class Method Summary collapse

Class Method Details

.get_content_safe(noko) ⇒ Object

get content safe from nokogiri search reasult



7
8
9
10
11
# File 'lib/baiduserp/helper.rb', line 7

def get_content_safe(noko)
  return nil if noko.nil?
  return nil if noko.empty?
  noko.first.content.strip
end

.normalize(data, weight_col = :weight, normalized_col = :normalized_weight) ⇒ Object

normalize weight of given data, the data must be a hash array structure. for example : [1, b: 2, 2, b: 3]



27
28
29
30
31
32
33
# File 'lib/baiduserp/helper.rb', line 27

def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
  total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
  data.each do|d|
    d[normalized_col] = d[weight_col].to_f/total_weight
  end
  data
end

.parse_data_click(str) ⇒ Object

parse data click value from baidu div property, which is a JSON like format



15
16
17
18
19
20
21
22
# File 'lib/baiduserp/helper.rb', line 15

def parse_data_click(str)
  JSON.parse(str
               .gsub("'",'"')
               .gsub(/({|,)([a-zA-Z0-9_]+):/, '\1"\2":')
               #.gsub(/'*([a-zA-Z0-9_]+)'*:/, '"\1":')
               #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
             )
end

.parse_site(url) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/baiduserp/helper.rb', line 35

def parse_site(url)
  begin
    url = Domainatrix.parse(url.to_s)
    site = url.domain + '.' + url.public_suffix
  rescue Exception => e
    puts "parse_site from url error:"
    puts url
    puts e.class
    puts e.message
    site = ''
  end
  site
end

.parse_subdomain(url) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/baiduserp/helper.rb', line 49

def parse_subdomain(url)
  begin
    url = Domainatrix.parse(url.to_s)
    subdomain = url.subdomain
  rescue Exception => e
    puts "parse_site from url error:"
    puts url
    puts e.class
    puts e.message
    subdomain = ''
  end
  subdomain
end