Method: EpubBook::Book#fetch_index

Defined in:
lib/epub_book/book.rb

#fetch_index(url = nil) ⇒ Object

得到书目索引



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/epub_book/book.rb', line 126

def fetch_index(url=nil)
  book[:files] = []
  url ||= @index_url
  #doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s))
  doc = Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(url).to_s)
  #generate index.yml
  EpubBook.logger.info "------Fetch index--#{url}---------------"

  if !book[:title]
    doc1 = if @des_url.nil?
             doc
           else
             #Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s))
             Nokogiri::HTML(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(generate_abs_url(doc.css(@des_url).attr("href").to_s)).to_s)
           end
    get_des(doc1)
  end

  binding.pry
  EpubBook.logger.info @index_item_css

  doc.css(@index_item_css).each do |item|
    _href = item.attr(@item_attr).to_s
    next if _href.start_with?('javascript') || _href.start_with?('#')

    _href = generate_abs_url(_href)

    EpubBook.logger.info item.inspect
    EpubBook.logger.info item.text

    book[:files] << {label: item.text, url: _href}
  end

  #如果有分页
  if @page_css && @page_attr
    if next_page = doc.css(@page_css).attr(@page_attr).to_s
      fetch_index(generate_abs_url(next_page))
    else
      return
    end
  end

  book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"}

  #保存书目
  save_book
end