Class: DocuBot::Bundle
- Inherits:
-
Object
- Object
- DocuBot::Bundle
- Defined in:
- lib/docubot/bundle.rb
Defined Under Namespace
Classes: PageCollision
Instance Attribute Summary collapse
-
#broken_links ⇒ Object
readonly
Returns the value of attribute broken_links.
-
#external_links ⇒ Object
readonly
Returns the value of attribute external_links.
-
#extras ⇒ Object
readonly
Returns the value of attribute extras.
-
#file_links ⇒ Object
readonly
Returns the value of attribute file_links.
-
#global ⇒ Object
readonly
Returns the value of attribute global.
-
#glossary ⇒ Object
readonly
Returns the value of attribute glossary.
-
#index ⇒ Object
readonly
Returns the value of attribute index.
-
#internal_links ⇒ Object
readonly
Returns the value of attribute internal_links.
-
#page_by_file_path ⇒ Object
readonly
Returns the value of attribute page_by_file_path.
-
#page_by_html_path ⇒ Object
readonly
Returns the value of attribute page_by_html_path.
-
#pages ⇒ Object
readonly
Returns the value of attribute pages.
-
#pages_by_title ⇒ Object
readonly
Returns the value of attribute pages_by_title.
-
#source ⇒ Object
readonly
Returns the value of attribute source.
-
#toc ⇒ Object
readonly
Returns the value of attribute toc.
Instance Method Summary collapse
- #create_pages(files_and_folders) ⇒ Object
- #find_page_collisions ⇒ Object
-
#initialize(source_directory) ⇒ Bundle
constructor
A new instance of Bundle.
- #validate_links ⇒ Object
- #warn_for_broken_links ⇒ Object
- #warn_for_missing_glossary_terms ⇒ Object
- #write(writer_type, destination = nil) ⇒ Object
Constructor Details
#initialize(source_directory) ⇒ Bundle
Returns a new instance of Bundle.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/docubot/bundle.rb', line 7 def initialize( source_directory ) @source = File.( source_directory ) raise "DocuBot cannot find directory #{@source}. Exiting." unless File.exists?( @source ) @pages = [] @extras = [] @pages_by_title = Hash.new{ |h,k| h[k]=[] } @page_by_file_path = {} @page_by_html_path = {} @glossary = DocuBot::Glossary.new( self, @source/'_glossary' ) @index = DocuBot::Index.new( self ) @toc = DocuBot::LinkTree::Root.new( self ) Dir.chdir( @source ) do # This might be nil; MetaSection.new is OK with that. index_file = Dir[ *DocuBot::Converter.types.map{|t| "index.#{t}"} ][ 0 ] @global = DocuBot::MetaSection.new( {:title=>'DocuBot Documentation'}, index_file ) @global.glossary = @glossary @global.index = @index @global.toc = @toc files_and_folders = Dir[ '**/*' ] # index files are handled by Page.new for a directory; no sections for special folders (but process contents) files_and_folders.reject!{ |path| name = File.basename( path ); name =~ /^(?:index\.[^.]+)$/ } # All files in the _templates directory should be ignored files_and_folders.reject!{ |f| f =~ /(?:^|\/)_/ } files_and_folders.concat Dir[ '_static/**/*' ].reject{ |f| File.directory?(f) } files_and_folders.concat Dir[ '_glossary/**/*' ].reject{ |f| File.directory?(f) } @global.ignore.as_list.each do |glob| files_and_folders = files_and_folders - Dir[glob] end # Sort by leading digits, if present, interpreted as numbers files_and_folders.sort_by!{ |path| path.split(%r{[/\\]}).map{ |name| name.tr('_',' ').scan(/\A(?:(\d+)\s+)?(.+)/)[0].tap{ |parts| parts[0] = parts[0] ? parts[0].to_i : 9e9 } } } create_pages( files_and_folders ) end # puts @toc.to_txt # Regenerate pages whose templates require full scaning to have completed # TODO: make this based off of a metasection attribute. @pages.select do |page| %w[ glossary ].include?( page.template ) end.each do |page| page.dirty_template end # TODO: make this optional via global variable validate_links warn_for_broken_links # TODO: make this optional via global variable warn_for_missing_glossary_terms find_page_collisions end |
Instance Attribute Details
#broken_links ⇒ Object (readonly)
Returns the value of attribute broken_links.
5 6 7 |
# File 'lib/docubot/bundle.rb', line 5 def broken_links @broken_links end |
#external_links ⇒ Object (readonly)
Returns the value of attribute external_links.
5 6 7 |
# File 'lib/docubot/bundle.rb', line 5 def external_links @external_links end |
#extras ⇒ Object (readonly)
Returns the value of attribute extras.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def extras @extras end |
#file_links ⇒ Object (readonly)
Returns the value of attribute file_links.
5 6 7 |
# File 'lib/docubot/bundle.rb', line 5 def file_links @file_links end |
#global ⇒ Object (readonly)
Returns the value of attribute global.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def global @global end |
#glossary ⇒ Object (readonly)
Returns the value of attribute glossary.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def glossary @glossary end |
#index ⇒ Object (readonly)
Returns the value of attribute index.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def index @index end |
#internal_links ⇒ Object (readonly)
Returns the value of attribute internal_links.
5 6 7 |
# File 'lib/docubot/bundle.rb', line 5 def internal_links @internal_links end |
#page_by_file_path ⇒ Object (readonly)
Returns the value of attribute page_by_file_path.
6 7 8 |
# File 'lib/docubot/bundle.rb', line 6 def page_by_file_path @page_by_file_path end |
#page_by_html_path ⇒ Object (readonly)
Returns the value of attribute page_by_html_path.
6 7 8 |
# File 'lib/docubot/bundle.rb', line 6 def page_by_html_path @page_by_html_path end |
#pages ⇒ Object (readonly)
Returns the value of attribute pages.
6 7 8 |
# File 'lib/docubot/bundle.rb', line 6 def pages @pages end |
#pages_by_title ⇒ Object (readonly)
Returns the value of attribute pages_by_title.
6 7 8 |
# File 'lib/docubot/bundle.rb', line 6 def pages_by_title @pages_by_title end |
#source ⇒ Object (readonly)
Returns the value of attribute source.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def source @source end |
#toc ⇒ Object (readonly)
Returns the value of attribute toc.
4 5 6 |
# File 'lib/docubot/bundle.rb', line 4 def toc @toc end |
Instance Method Details
#create_pages(files_and_folders) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/docubot/bundle.rb', line 67 def create_pages( files_and_folders ) files_and_folders.each do |path| extension = File.extname( path )[ 1..-1 ] item_is_page = File.directory?(path) || DocuBot::Converter.by_type[extension] if !item_is_page @extras << path else page = DocuBot::Page.new( self, path ) next if page.skip if path =~ %r{^_glossary/} @glossary << page else @pages << page @page_by_file_path[path] = page @page_by_html_path[page.html_path] = page @pages_by_title[page.title] << page @index.process_page( page ) # Add the page (and any sub-links) to the toc unless page.hide @toc.add_to_link_hierarchy( page.title, page.html_path, page ) page.toc.as_list.each do |id_or_text| if id_or_text[0..0] == '#' if ele = page.nokodoc.at_css(id_or_text) @toc.add_to_link_hierarchy( ele.inner_text, page.html_path + id_or_text, page ) else warn "Could not find requested toc anchor #{id_or_text.inspect} on #{page.html_path}" end else # TODO: Find an elegant way to handle quotes in XPath, for speed # Kramdown 'helpfully' converts quotes in the body to be curly, breaking direct text matching quotes = /['‘’"“”]+/ quoteless = id_or_text.gsub(quotes,'') if t=page.nokodoc.xpath('text()|.//text()').find{ |t| t.content.gsub(quotes,'')==quoteless } ele = t.parent # FIXME: better unique ID generator ele['id'] = "item-#{Time.now.to_i}-#{rand 999999}" unless ele['id'] @toc.add_to_link_hierarchy( id_or_text, page.html_path + '#' + ele['id'], page ) else warn "Could not find requested toc anchor for #{id_or_text.inspect} on #{page.html_path}" end end end end end end end end |
#find_page_collisions ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/docubot/bundle.rb', line 173 def find_page_collisions # Find any and all pages that would collide pages_by_html_path = Hash.new{ |h,k| h[k] = [] } @pages.each do |page| pages_by_html_path[page.html_path] << page end collisions = pages_by_html_path.select{ |path,pages| pages.length>1 } unless collisions.empty? = collisions.map do |path,pages| "#{path}: #{pages.map{ |page| "'#{page.title}' (#{page.file})" }.join(', ')}" end.join("\n") raise PageCollision.new, end end |
#validate_links ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/docubot/bundle.rb', line 118 def validate_links @external_links = Hash.new{ |h,k| h[k]=[] } @internal_links = Hash.new{ |h,k| h[k]=[] } @file_links = Hash.new{ |h,k| h[k]=[] } @broken_links = Hash.new{ |h,k| h[k]=[] } Dir.chdir( @source ) do @pages.each do |page| # TODO: set the xpath to .//a/@href once this is fixed: http://github.com/tenderlove/nokogiri/issues/#issue/213 page.nokodoc.xpath('.//a').each do |a| next unless href = a['href'] href = CGI.unescape(href) if href=~%r{\A[a-z]+:}i @external_links[page] << href else id = href[/#([a-z][\w.:-]*)?/i] file = href.sub(/#.*/,'') path = file.empty? ? page.html_path : Pathname.new( File.dirname(page.html_path) / file ).cleanpath.to_s if target=@page_by_html_path[path] if !id || id == "#" || target.nokodoc.at_css(id) @internal_links[page] << href else warn "Could not find internal link for #{id.inspect} on #{page.html_path.inspect}" if id @broken_links[page] << href end else if File.file?(path) && !@page_by_file_path[path] @file_links[page] << href else @broken_links[page] << href end end end end end end end |
#warn_for_broken_links ⇒ Object
156 157 158 159 160 161 162 |
# File 'lib/docubot/bundle.rb', line 156 def warn_for_broken_links @broken_links.each do |page,links| links.each do |link| warn "Broken link on #{page.file}: '#{link}'" end end end |
#warn_for_missing_glossary_terms ⇒ Object
164 165 166 167 168 169 170 171 |
# File 'lib/docubot/bundle.rb', line 164 def warn_for_missing_glossary_terms @glossary.missing_terms.each do |term,referrers| warn "Glossary term '#{term}' never defined." referrers.each do |referring_page| warn "...seen on #{referring_page.file}." end end end |
#write(writer_type, destination = nil) ⇒ Object
188 189 190 191 192 193 194 195 |
# File 'lib/docubot/bundle.rb', line 188 def write( writer_type, destination=nil ) writer = DocuBot::Writer.by_type[ writer_type.to_s.downcase ] if writer writer.new( self ).write( destination ) else raise "Unknown writer '#{writer_type}'; available types: #{DocuBot::Writer::INSTALLED_WRITERS.join ', '}" end end |