Class: Baiduserp::Analyser

Inherits:
Object
  • Object
show all
Defined in:
lib/baiduserp/analyser.rb

Instance Method Summary collapse

Constructor Details

#initialize(name, attrs = {}) ⇒ Analyser

Dir[File.expand_path(‘../analyser/*.rb’, __FILE__)].each{|f| require f}



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/baiduserp/analyser.rb', line 11

def initialize(name,attrs={})
  @name = name
  Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir

  @db_file = @name + ".sqlite"
  @attrs = attrs
  @keywords_imported = File.exists?(@db_file)

  @db = Sequel.sqlite(@db_file)

  migrate!

  @keywords = Class.new(Sequel::Model(@db[:keywords]))
  @weights = Class.new(Sequel::Model(@db[:weights]))

  import_keywords unless @keywords_imported
end

Instance Method Details

#_analyse_competitors(date = Date.today) ⇒ Object



120
121
122
123
124
125
126
127
# File 'lib/baiduserp/analyser.rb', line 120

def _analyse_competitors(date=Date.today)
  sites = Hash.new(0)
  @serps.where(:date => date).each do |serp|
    serp = YAML.load(serp[:content])
    serp.sem_sites.each {|site| sites[site] += 1}
  end
  puts YAML.dump(sites)
end

#generate_weights(date = Date.today) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/baiduserp/analyser.rb', line 86

def generate_weights(date=Date.today)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "Generating Weights", :total => serps.count, :format => '%t (%c/%C) %a %E |%w')
  serps.each do |s|
    keyword_id = s[:keyword_id]
    serp = YAML.load(s[:content])

    serp.weights.each do |w|
      type = w[:type]
      name = w[:name]
      site = w[:site]
      subdomain = w[:subdomain]
      url = w[:url]
      side_rank = w[:side_rank]
      weight = w[:weight]
      side = w[:side]
      part = w[:part]
      normalized_weight = w[:normalized_weight]

      @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
        r.type = type
        r.name = name
        r.site = site
        r.subdomain = subdomain
        r.url = url
        r.weight = weight
        r.part = part
        r.normalized_weight = normalized_weight
      end
    end
    p.increment
  end
end

#import_keywords(file = @attrs[:keywords]) ⇒ Object



39
40
41
42
43
44
45
46
# File 'lib/baiduserp/analyser.rb', line 39

def import_keywords(file=@attrs[:keywords])
  CSV.foreach(file) do |l|
    @keywords.find_or_create(:term => l[0]) do |r| 
      r.search_volume = l[1]
      r.category = l[2]
    end
  end
end

#migrate!(db = @db, schema = 'weights') ⇒ Object



34
35
36
37
# File 'lib/baiduserp/analyser.rb', line 34

def migrate!(db = @db, schema = 'weights')
  Sequel.extension :migration, :core_extensions
  Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
end

#model_htmls(date = Date.today) ⇒ Object



48
49
50
51
52
# File 'lib/baiduserp/analyser.rb', line 48

def model_htmls(date=Date.today)
  db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
  migrate!(db, 'htmls')
  Class.new(Sequel::Model(db[:htmls]))
end

#model_serps(date = Date.today) ⇒ Object



54
55
56
57
58
# File 'lib/baiduserp/analyser.rb', line 54

def model_serps(date=Date.today)
  db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
  migrate!(db, 'serps')
  Class.new(Sequel::Model(db[:serps]))
end

#regenerate_serps(date = Date.today) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/baiduserp/analyser.rb', line 72

def regenerate_serps(date=Date.today)
  htmls = model_htmls(date)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count, :format => '%t (%c/%C) %a %E |%w')
  htmls.each do |html|
    keyword_id = html[:keyword_id]
    html = html[:content]
    r = serps.find_or_create(:keyword_id => keyword_id) 
    r.update(:content => YAML.dump(Baiduserp.parse(html)))

    p.increment
  end
end

#run(date = Date.today) ⇒ Object



29
30
31
32
# File 'lib/baiduserp/analyser.rb', line 29

def run(date=Date.today)
  search(date)
  generate_weights(date)
end

#search(date = Date.today) ⇒ Object

Search Keywords -> Store Html -> Parse SERP



61
62
63
64
65
66
67
68
69
70
# File 'lib/baiduserp/analyser.rb', line 61

def search(date=Date.today)
  htmls = model_htmls(date)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count, :format => '%t (%c/%C) %a %E |%w')
  @keywords.each do |k|
    htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) }
    serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) }
    p.increment
  end
end