Class: Baiduserp::Analyser

Inherits:
Object
  • Object
show all
Defined in:
lib/baiduserp/analyser.rb

Instance Method Summary collapse

Constructor Details

#initialize(name, attrs = {}) ⇒ Analyser

Dir[File.expand_path(‘../analyser/*.rb’, __FILE__)].each{|f| require f}



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/baiduserp/analyser.rb', line 11

def initialize(name,attrs={})
  @name = name
  Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir

  @db_file = @name + ".sqlite"
  @attrs = attrs
  @keywords_imported = File.exists?(@db_file)

  @db = Sequel.sqlite(@db_file)

  migrate!

  @keywords = Class.new(Sequel::Model(@db[:keywords]))
  @weights = Class.new(Sequel::Model(@db[:weights]))

  import_keywords unless @keywords_imported
end

Instance Method Details

#_analyse_competitors(date = Date.today) ⇒ Object



123
124
125
126
127
128
129
130
# File 'lib/baiduserp/analyser.rb', line 123

def _analyse_competitors(date=Date.today)
  sites = Hash.new(0)
  @serps.where(:date => date).each do |serp|
    serp = YAML.load(serp[:content])
    serp.sem_sites.each {|site| sites[site] += 1}
  end
  puts YAML.dump(sites)
end

#generate_weights(date = Date.today) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/baiduserp/analyser.rb', line 88

def generate_weights(date=Date.today)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "Generating Weights", :total => serps.count)
  serps.each do |s|
    keyword_id = s[:keyword_id]
    serp = YAML.load(s[:content])

    serp.weights.each do |w|
      type = w[:type]
      name = w[:name]
      site = w[:site]
      subdomain = w[:subdomain]
      url = w[:url]
      side_rank = w[:side_rank]
      weight = w[:weight]
      side = w[:side]
      part = w[:part]
      normalized_weight = w[:normalized_weight]

      @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
        r.type = type
        r.name = name
        r.site = site
        r.subdomain = subdomain
        r.url = url
        r.weight = weight
        r.part = part
        r.normalized_weight = normalized_weight
      end
    end
    p.log keyword_id
    p.increment
  end
end

#import_keywords(file = @attrs[:keywords]) ⇒ Object



39
40
41
42
43
44
45
46
# File 'lib/baiduserp/analyser.rb', line 39

def import_keywords(file=@attrs[:keywords])
  CSV.foreach(file) do |l|
    @keywords.find_or_create(:term => l[0]) do |r| 
      r.search_volume = l[1]
      r.category = l[2]
    end
  end
end

#migrate!(db = @db, schema = 'weights') ⇒ Object



34
35
36
37
# File 'lib/baiduserp/analyser.rb', line 34

def migrate!(db = @db, schema = 'weights')
  Sequel.extension :migration, :core_extensions
  Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
end

#model_htmls(date = Date.today) ⇒ Object



48
49
50
51
52
# File 'lib/baiduserp/analyser.rb', line 48

def model_htmls(date=Date.today)
  db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
  migrate!(db, 'htmls')
  Class.new(Sequel::Model(db[:htmls]))
end

#model_serps(date = Date.today) ⇒ Object



54
55
56
57
58
# File 'lib/baiduserp/analyser.rb', line 54

def model_serps(date=Date.today)
  db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
  migrate!(db, 'serps')
  Class.new(Sequel::Model(db[:serps]))
end

#regenerate_serps(date = Date.today) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/baiduserp/analyser.rb', line 73

def regenerate_serps(date=Date.today)
  htmls = model_htmls(date)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count)
  htmls.each do |html|
    keyword_id = html[:keyword_id]
    html = html[:content]
    r = serps.find_or_create(:keyword_id => keyword_id) 
    r.update(:content => YAML.dump(Baiduserp.parse(html)))

    p.log keyword_id
    p.increment
  end
end

#run(date = Date.today) ⇒ Object



29
30
31
32
# File 'lib/baiduserp/analyser.rb', line 29

def run(date=Date.today)
  search(date)
  generate_weights(date)
end

#search(date = Date.today) ⇒ Object

Search Keywords -> Store Html -> Parse SERP



61
62
63
64
65
66
67
68
69
70
71
# File 'lib/baiduserp/analyser.rb', line 61

def search(date=Date.today)
  htmls = model_htmls(date)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count)
  @keywords.each do |k|
    htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) }
    serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) }
    p.log k.to_hash
    p.increment
  end
end