Module: ZhSieve
- Defined in:
- lib/zhSieve.rb,
lib/zhSieve/cli.rb,
lib/zhSieve/html2md.rb,
lib/zhSieve/version.rb,
lib/zhSieve/htmlpage.rb
Defined Under Namespace
Modules: Converter
Classes: CLI, HTMLPage, NoContents
Constant Summary
collapse
- BASE_URL =
"https://www.zhihu.com"
- ZL_URI =
"https://zhuanlan.zhihu.com/api/posts/"
- VERSION =
"0.2.0"
Class Method Summary
collapse
Class Method Details
.crawl_answer(options) ⇒ Object
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
# File 'lib/zhSieve.rb', line 9
def self.crawl_answer(options)
question_id = "#{options[:question_id]}"
question_uri = "/question/#{question_id}"
answer_id = "#{options[:answer_id]}"
answer_uri = "#answer-#{answer_id}"
search_uri = "#{BASE_URL}#{question_uri}#{answer_uri}"
agent = Mechanize.new
agent.user_agent = 'Chrome/53.0.2785.143'
agent.max_history = 1
agent.cookie_jar.load_cookiestxt("./cookies.txt")
search_page = agent.get("#{search_uri}")
haha = HTMLPage.new(contents:search_page,question_id:question_id,answer_id:answer_id).answerMarkdown
end
|
.crawl_zl_article(options) ⇒ Object
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/zhSieve.rb', line 28
def self.crawl_zl_article(options)
article_id = "#{options[:article_id]}"
search_uri = "#{ZL_URI}#{article_id}"
agent = Mechanize.new
agent.user_agent = 'Chrome/53.0.2785.143'
agent.max_history = 1
agent.cookie_jar.load_cookiestxt("./cookies.txt")
search_page = agent.get("#{search_uri}")
haha = HTMLPage.new(contents:search_page,article_id:article_id).articleMarkdown
end
|
.crawl_zl_people(options) ⇒ Object
24
25
26
|
# File 'lib/zhSieve.rb', line 24
def self.crawl_zl_people(options)
end
|