Class: EpubBook::Book
- Inherits:
-
Object
- Object
- EpubBook::Book
- Defined in:
- lib/epub_book/book.rb
Constant Summary collapse
- UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36"
- Referer =
"http://www.baidu.com/"
- Reg =
/<script.*?>.*?<\/script>/m
Instance Attribute Summary collapse
-
#body_css ⇒ Object
Returns the value of attribute body_css.
-
#cover ⇒ Object
Returns the value of attribute cover.
-
#cover_css ⇒ Object
Returns the value of attribute cover_css.
-
#creator ⇒ Object
Returns the value of attribute creator.
-
#des_url ⇒ Object
Returns the value of attribute des_url.
-
#description_css ⇒ Object
Returns the value of attribute description_css.
-
#folder_name ⇒ Object
Returns the value of attribute folder_name.
-
#index_item_css ⇒ Object
Returns the value of attribute index_item_css.
-
#item_attr ⇒ Object
Returns the value of attribute item_attr.
-
#limit ⇒ Object
Returns the value of attribute limit.
-
#mail_to ⇒ Object
Returns the value of attribute mail_to.
-
#page_attr ⇒ Object
Returns the value of attribute page_attr.
-
#page_css ⇒ Object
Returns the value of attribute page_css.
-
#path ⇒ Object
Returns the value of attribute path.
-
#referer ⇒ Object
Returns the value of attribute referer.
-
#title_css ⇒ Object
Returns the value of attribute title_css.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Instance Method Summary collapse
- #book ⇒ Object
- #book_path ⇒ Object
- #fetch_book ⇒ Object
-
#fetch_index(url = nil) ⇒ Object
得到书目索引.
-
#generate_book(book_name = nil) {|_self| ... } ⇒ Object
创建书本.
-
#initialize(index_url, des_url = nil) {|_self| ... } ⇒ Book
constructor
A new instance of Book.
- #link_host ⇒ Object
- #save_book ⇒ Object
Constructor Details
#initialize(index_url, des_url = nil) {|_self| ... } ⇒ Book
Returns a new instance of Book.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/epub_book/book.rb', line 29 def initialize(index_url,des_url=nil ) @index_url = index_url @des_url = des_url @user_agent = UserAgent @referer = Referer @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3] @creator = 'javy_liu' @title_css = '.wrapper h1.title1' @index_item_css = 'ul.list3>li>a' @cover = 'cover.jpg' @body_css = '.articlebody' @item_attr = "href" yield self if block_given? end |
Instance Attribute Details
#body_css ⇒ Object
Returns the value of attribute body_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def body_css @body_css end |
#cover ⇒ Object
Returns the value of attribute cover.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def cover @cover end |
#cover_css ⇒ Object
Returns the value of attribute cover_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def cover_css @cover_css end |
#creator ⇒ Object
Returns the value of attribute creator.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def creator @creator end |
#des_url ⇒ Object
Returns the value of attribute des_url.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def des_url @des_url end |
#description_css ⇒ Object
Returns the value of attribute description_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def description_css @description_css end |
#folder_name ⇒ Object
Returns the value of attribute folder_name.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def folder_name @folder_name end |
#index_item_css ⇒ Object
Returns the value of attribute index_item_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def index_item_css @index_item_css end |
#item_attr ⇒ Object
Returns the value of attribute item_attr.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def item_attr @item_attr end |
#limit ⇒ Object
Returns the value of attribute limit.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def limit @limit end |
#mail_to ⇒ Object
Returns the value of attribute mail_to.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def mail_to @mail_to end |
#page_attr ⇒ Object
Returns the value of attribute page_attr.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def page_attr @page_attr end |
#page_css ⇒ Object
Returns the value of attribute page_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def page_css @page_css end |
#path ⇒ Object
Returns the value of attribute path.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def path @path end |
#referer ⇒ Object
Returns the value of attribute referer.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def referer @referer end |
#title_css ⇒ Object
Returns the value of attribute title_css.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def title_css @title_css end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
24 25 26 |
# File 'lib/epub_book/book.rb', line 24 def user_agent @user_agent end |
Instance Method Details
#book ⇒ Object
52 53 54 55 56 |
# File 'lib/epub_book/book.rb', line 52 def book return @book if @book Dir.mkdir(book_path) unless test(?d,book_path) @book = test(?s,File.join(book_path,'index.yml')) ? YAML.load(File.open(File.join(book_path,'index.yml'))) : {files: []} end |
#book_path ⇒ Object
44 45 46 |
# File 'lib/epub_book/book.rb', line 44 def book_path @book_path ||= File.join((@path || `pwd`.strip), @folder_name) end |
#fetch_book ⇒ Object
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/epub_book/book.rb', line 160 def fetch_book #重新得到书目,如果不存在或重新索引的话 fetch_index if !test(?s,File.join(book_path,'index.yml')) EpubBook.logger.info "------Fetch book----------" book[:files].each_with_index do |item,index| break if limit && index >= limit content_path = File.join(book_path,item[:content]) #如果文件存在且长度不为0则获取下一个 next if test(?s,content_path) begin doc_file = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent,'Referer'=> @referer).get(item[:url]).to_s)) File.open(content_path,'w') do |f| f.write("<h3>#{item[:label]}</h3>") f.write(doc_file.css(@body_css).to_s.gsub(Reg,'')) end rescue Exception => e EpubBook.logger.info "Error:#{e.},#{item.inspect}" #EpubBook.logger.info e.backtrace next end end end |
#fetch_index(url = nil) ⇒ Object
得到书目索引
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/epub_book/book.rb', line 121 def fetch_index(url=nil) book[:files] = [] url ||= @index_url doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(url)).to_s)) #generate index.yml if !book[:title] doc1 = if @des_url.nil? doc else Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(generate_abs_url(doc.css(@des_url).attr("href").to_s))).to_s)) end get_des(doc1) end doc.css(@index_item_css).each do |item| _href = URI.encode(item.attr(@item_attr).to_s) next if _href.start_with?('javascript') || _href.start_with?('#') _href = generate_abs_url(_href) book[:files] << {label: item.text, url: _href} end #如果有分页 if @page_css && @page_attr if next_page = doc.css(@page_css).attr(@page_attr).to_s fetch_index(generate_abs_url(next_page)) else return end end book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"} #保存书目 save_book end |
#generate_book(book_name = nil) {|_self| ... } ⇒ Object
创建书本
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/epub_book/book.rb', line 66 def generate_book(book_name=nil) #获取epub源数据 fetch_book if !@cover_css && @cover generate_cover = <<-eof convert #{File.("../../../#{@cover}",__FILE__)} -font tsxc.ttf -gravity center -fill red -pointsize 16 -draw "text 0,0 '#{book[:title]}'" #{File.join(book_path,@cover)} eof system(generate_cover) end epub = EeePub.make epub.title book[:title] epub.creator @creator epub.publisher @creator epub.date Time.now epub.identifier "http://javy_liu.com/book/#{@folder_name}", :scheme => 'URL' epub.uid "http://javy_liu.com/book/#{@folder_name}" epub.cover @cover epub.subject book[:title] epub.description book[:description] if book[:description] book[:files] = book[:files][0...limit] if limit _files = [] book[:files].collect! do |item| _file = File.join(book_path,item[:content]) if test(?f, _file) _files.push(_file) item end end book[:files].compact! epub.files _files.push(File.join(book_path,@cover)) epub.nav book[:files] book[:epub_file] = File.join(book_path,"#{book_name || @folder_name}.epub") yield self if block_given? epub.save(book[:epub_file]) #send mail if mail_to mailer = Mailer.new mailer.to = mail_to mailer.add_file book[:epub_file] mailer.body = "您创建的电子书[#{book[:title]}]见附件\n" mailer.send_mail end end |
#link_host ⇒ Object
48 49 50 |
# File 'lib/epub_book/book.rb', line 48 def link_host @link_host ||= @index_url[/\A(http:\/\/.*?)\/\w+/,1] end |
#save_book ⇒ Object
58 59 60 61 62 |
# File 'lib/epub_book/book.rb', line 58 def save_book File.open(File.join(book_path,'index.yml' ),'w') do |f| f.write(@book.to_yaml) end end |