Class: Newrank
- Inherits:
-
Object
- Object
- Newrank
- Defined in:
- lib/newrank.rb
Instance Method Summary collapse
-
#_md5(str) ⇒ Object
use js md5 algorightm, written by newrank, file in assets/newrank_md5.js.
-
#crawl(newrank_id) ⇒ Object
crawl newrank info.
-
#document(newrank_account) ⇒ Object
get Nogogiri Document.
-
#fetch_post(uuid) ⇒ Object
crawl posts.
-
#gen_nonce ⇒ Object
generate parameter nonce.
-
#gen_xyz(nonce, uuid) ⇒ Object
generate parameter xyz.
-
#js_context ⇒ Object
js context.
-
#score_and_uuid(doc) ⇒ Object
find score and uuid.
-
#wait_for_seconds ⇒ Object
wait for seconds instead of request too much.
-
#week_data(doc) ⇒ Object
crawl week data.
Instance Method Details
#_md5(str) ⇒ Object
use js md5 algorightm, written by newrank, file in assets/newrank_md5.js
140 141 142 |
# File 'lib/newrank.rb', line 140 def _md5(str) js_context.call('newrank_md5', str, bare: true) end |
#crawl(newrank_id) ⇒ Object
crawl newrank info
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/newrank.rb', line 10 def crawl(newrank_id) doc = document(newrank_id.gsub("\u{a0}","")) if !doc.nil? score, uuid = score_and_uuid(doc) element = doc.css(".detail-fans-counts")[0] active_users_count = element.nil? ? 0 : element.text.gsub(",","").to_i element = doc.css(".info-detail-head-weixin-fun-introduce")[0] introduce = element.nil? ? "" : element.text week_data = week_data(doc) if !uuid.nil? posts_data = fetch_post(uuid) end { active_users_count: active_users_count, score: (score || 0), introduce: introduce, week_data: week_data, posts_data: (posts_data || {}) } else { active_users_count: 0, score: 0, introduce: "", week_data: [], posts_data: {} } end end |
#document(newrank_account) ⇒ Object
get Nogogiri Document
70 71 72 73 74 |
# File 'lib/newrank.rb', line 70 def document(newrank_account) wait_for_seconds url = 'http://www.newrank.cn/public/info/detail.html?account=' + newrank_account Nokogiri::HTML(open(url, "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1", :read_timeout => 10), nil, 'utf-8') end |
#fetch_post(uuid) ⇒ Object
crawl posts
44 45 46 47 48 49 50 51 |
# File 'lib/newrank.rb', line 44 def fetch_post(uuid) nonce = gen_nonce xyz = gen_xyz(nonce, uuid) wait_for_seconds posts = JSON.parse(RestClient.post("http://www.newrank.cn/xdnphb/detail/getAccountArticle", {uuid: uuid, nonce: nonce, xyz: xyz, flag: true}, {"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36"})) end |
#gen_nonce ⇒ Object
generate parameter nonce
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/newrank.rb', line 117 def gen_nonce a = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a","b", "c", "d", "e", "f"] b = 0 while 500 > b d = 0 c = "" while 9 > d e = (16 * rand).floor c << a[e] d = d + 1 end b = b + 1 end c end |
#gen_xyz(nonce, uuid) ⇒ Object
generate parameter xyz
134 135 136 137 |
# File 'lib/newrank.rb', line 134 def gen_xyz(nonce, uuid) h = "/xdnphb/detail/getAccountArticle?AppKey=joker&flag=true&uuid=#{uuid}&nonce=#{nonce}" _md5(h) end |
#js_context ⇒ Object
js context
145 146 147 148 |
# File 'lib/newrank.rb', line 145 def js_context file_path = File.join( File.dirname(__FILE__), 'assets/newrank_md5.js') @context ||= ExecJS.compile(File.read(file_path)) end |
#score_and_uuid(doc) ⇒ Object
find score and uuid
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/newrank.rb', line 77 def score_and_uuid(doc) score, uuid = nil script = doc.css("script[type='text/javascript']")[0] if !script.nil? parser = RKelly::Parser.new ast = parser.parse(script.text.strip) # 找到第一个数组节点 array_node = ast.pointcut(RKelly::Nodes::ArrayNode).matches.first # 找到数组节点内地第一个Element Node并寻找Score element_node = array_node.pointcut(RKelly::Nodes::ElementNode).matches.first json_data = element_node.nil? ? {} : JSON.parse(element_node.to_ecma) if json_data["new_rank_index_mark"] score = json_data["new_rank_index_mark"].to_f else score = 0.0 end # 找到有UUID的Node object_node = ast.pointcut(RKelly::Nodes::VarDeclNode).matches.select{|node| node.name == "fgkcdg"}.first unless object_node.nil? node = object_node.pointcut(RKelly::Nodes::PropertyNode).matches.select{|n| n.name == '"uuid"'}.first.value uuid = node.value[1..-2] else uuid = "uuid nil" end end return score, uuid end |
#wait_for_seconds ⇒ Object
wait for seconds instead of request too much
112 113 114 |
# File 'lib/newrank.rb', line 112 def wait_for_seconds sleep(1 * rand + 1) end |
#week_data(doc) ⇒ Object
crawl week data
54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/newrank.rb', line 54 def week_data(doc) data = [] if !doc.css("script")[0].nil? parser = RKelly::Parser.new ast = parser.parse(doc.css("script")[0].text.strip) array_node = ast.pointcut(RKelly::Nodes::ArrayNode).matches.first array_node.pointcut(RKelly::Nodes::ElementNode).matches.each do |element_node| data << JSON.parse(element_node.to_ecma) end end data end |