Class: Newrank

Inherits:
Object
  • Object
show all
Defined in:
lib/newrank.rb

Instance Method Summary collapse

Instance Method Details

#_md5(str) ⇒ Object

use js md5 algorightm, written by newrank, file in assets/newrank_md5.js



140
141
142
# File 'lib/newrank.rb', line 140

def _md5(str)
  js_context.call('newrank_md5', str, bare: true)
end

#crawl(newrank_id) ⇒ Object

crawl newrank info



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/newrank.rb', line 10

def crawl(newrank_id)
  doc = document(newrank_id.gsub("\u{a0}",""))
  if !doc.nil?
    score, uuid = score_and_uuid(doc)

    element = doc.css(".detail-fans-counts")[0]
    active_users_count = element.nil? ? 0 : element.text.gsub(",","").to_i

    element = doc.css(".info-detail-head-weixin-fun-introduce")[0]
    introduce = element.nil? ? "" : element.text

    week_data = week_data(doc)
    if !uuid.nil?
      posts_data = fetch_post(uuid)
    end
    {
      active_users_count: active_users_count,
      score: (score || 0),
      introduce: introduce,
      week_data: week_data,
      posts_data: (posts_data || {})
    }
  else
    {
      active_users_count: 0,
      score: 0,
      introduce: "",
      week_data: [],
      posts_data: {}
    }
  end
end

#document(newrank_account) ⇒ Object

get Nogogiri Document



70
71
72
73
74
# File 'lib/newrank.rb', line 70

def document()
  wait_for_seconds
  url = 'http://www.newrank.cn/public/info/detail.html?account=' + 
  Nokogiri::HTML(open(url, "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1", :read_timeout => 10), nil, 'utf-8')
end

#fetch_post(uuid) ⇒ Object

crawl posts



44
45
46
47
48
49
50
51
# File 'lib/newrank.rb', line 44

def fetch_post(uuid)
  nonce = gen_nonce
xyz = gen_xyz(nonce, uuid)

  wait_for_seconds

  posts = JSON.parse(RestClient.post("http://www.newrank.cn/xdnphb/detail/getAccountArticle", {uuid: uuid, nonce: nonce, xyz: xyz, flag: true}, {"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36"}))
end

#gen_nonceObject

generate parameter nonce



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/newrank.rb', line 117

def gen_nonce
	a = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a","b", "c", "d", "e", "f"]
	b = 0
	while 500 > b
		d = 0
		c = ""
		while 9 > d
			e = (16 * rand).floor
			c << a[e]
			d = d + 1
		end
		b = b + 1
	end
   c
end

#gen_xyz(nonce, uuid) ⇒ Object

generate parameter xyz



134
135
136
137
# File 'lib/newrank.rb', line 134

def gen_xyz(nonce, uuid)
   h = "/xdnphb/detail/getAccountArticle?AppKey=joker&flag=true&uuid=#{uuid}&nonce=#{nonce}"
  _md5(h)
end

#js_contextObject

js context



145
146
147
148
# File 'lib/newrank.rb', line 145

def js_context
  file_path = File.join( File.dirname(__FILE__), 'assets/newrank_md5.js')
  @context ||= ExecJS.compile(File.read(file_path))
end

#score_and_uuid(doc) ⇒ Object

find score and uuid



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/newrank.rb', line 77

def score_and_uuid(doc)
  score, uuid = nil

  script = doc.css("script[type='text/javascript']")[0]
  if !script.nil?
    parser = RKelly::Parser.new
    ast = parser.parse(script.text.strip)

    # 找到第一个数组节点
    array_node = ast.pointcut(RKelly::Nodes::ArrayNode).matches.first

    # 找到数组节点内地第一个Element Node并寻找Score
    element_node = array_node.pointcut(RKelly::Nodes::ElementNode).matches.first
    json_data = element_node.nil? ? {} : JSON.parse(element_node.to_ecma)
    if json_data["new_rank_index_mark"]
      score = json_data["new_rank_index_mark"].to_f
    else
      score = 0.0
    end

    # 找到有UUID的Node
    object_node = ast.pointcut(RKelly::Nodes::VarDeclNode).matches.select{|node| node.name == "fgkcdg"}.first
    unless object_node.nil?
      node = object_node.pointcut(RKelly::Nodes::PropertyNode).matches.select{|n| n.name == '"uuid"'}.first.value
      uuid = node.value[1..-2]
    else
      uuid = "uuid nil"
    end
  end

  return score, uuid
end

#wait_for_secondsObject

wait for seconds instead of request too much



112
113
114
# File 'lib/newrank.rb', line 112

def wait_for_seconds
	sleep(1 * rand + 1)
end

#week_data(doc) ⇒ Object

crawl week data



54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/newrank.rb', line 54

def week_data(doc)
  data = []

  if !doc.css("script")[0].nil?
    parser = RKelly::Parser.new
    ast = parser.parse(doc.css("script")[0].text.strip)
    array_node = ast.pointcut(RKelly::Nodes::ArrayNode).matches.first
    array_node.pointcut(RKelly::Nodes::ElementNode).matches.each do |element_node|
      data << JSON.parse(element_node.to_ecma)
    end
  end

  data
end