Class: DocuBot::Bundle

Inherits:
Object
  • Object
show all
Defined in:
lib/docubot/bundle.rb

Defined Under Namespace

Classes: PageCollision

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source_directory) ⇒ Bundle

Returns a new instance of Bundle.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/docubot/bundle.rb', line 7

def initialize( source_directory )
	@source = File.expand_path( source_directory )
	raise "DocuBot cannot find directory #{@source}. Exiting." unless File.exists?( @source )
	@pages  = []
	@extras = []
	@pages_by_title    = Hash.new{ |h,k| h[k]=[] }
	@page_by_file_path = {}
	@page_by_html_path = {}

	@glossary = DocuBot::Glossary.new( self, @source/'_glossary' )
	@index    = DocuBot::Index.new( self )
	@toc      = DocuBot::LinkTree::Root.new( self )

	Dir.chdir( @source ) do
		# This might be nil; MetaSection.new is OK with that.
		index_file = Dir[ *DocuBot::Converter.types.map{|t| "index.#{t}"} ][ 0 ]
		@global = DocuBot::MetaSection.new( {:title=>'DocuBot Documentation'}, index_file )
		@global.glossary = @glossary
		@global.index    = @index
		@global.toc      = @toc

		files_and_folders = Dir[ '**/*' ]

		# index files are handled by Page.new for a directory; no sections for special folders (but process contents)
		files_and_folders.reject!{ |path| name = File.basename( path ); name =~ /^(?:index\.[^.]+)$/ }
		
		# All files in the _templates directory should be ignored
		files_and_folders.reject!{ |f| f =~ /(?:^|\/)_/ }
		files_and_folders.concat Dir[ '_static/**/*'   ].reject{ |f| File.directory?(f) }
		files_and_folders.concat Dir[ '_glossary/**/*' ].reject{ |f| File.directory?(f) }

		@global.ignore.as_list.each do |glob|
			files_and_folders = files_and_folders - Dir[glob]
		end

		# Sort by leading digits, if present, interpreted as numbers
		files_and_folders.sort_by!{ |path| path.split(%r{[/\\]}).map{ |name| name.tr('_',' ').scan(/\A(?:(\d+)\s+)?(.+)/)[0].tap{ |parts| parts[0] = parts[0] ? parts[0].to_i : 9e9 } } }

		create_pages( files_and_folders )			
	end
	# puts @toc.to_txt
	
	# Regenerate pages whose templates require full scaning to have completed
	# TODO: make this based off of a metasection attribute.
	@pages.select do |page|
		%w[ glossary ].include?( page.template )
	end.each do |page|
		page.dirty_template
	end
	
	# TODO: make this optional via global variable
	validate_links
	warn_for_broken_links
	
	# TODO: make this optional via global variable
	warn_for_missing_glossary_terms
	
	find_page_collisions
end

Instance Attribute Details

Returns the value of attribute broken_links.



5
6
7
# File 'lib/docubot/bundle.rb', line 5

def broken_links
  @broken_links
end

Returns the value of attribute external_links.



5
6
7
# File 'lib/docubot/bundle.rb', line 5

def external_links
  @external_links
end

#extrasObject (readonly)

Returns the value of attribute extras.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def extras
  @extras
end

Returns the value of attribute file_links.



5
6
7
# File 'lib/docubot/bundle.rb', line 5

def file_links
  @file_links
end

#globalObject (readonly)

Returns the value of attribute global.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def global
  @global
end

#glossaryObject (readonly)

Returns the value of attribute glossary.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def glossary
  @glossary
end

#indexObject (readonly)

Returns the value of attribute index.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def index
  @index
end

Returns the value of attribute internal_links.



5
6
7
# File 'lib/docubot/bundle.rb', line 5

def internal_links
  @internal_links
end

#page_by_file_pathObject (readonly)

Returns the value of attribute page_by_file_path.



6
7
8
# File 'lib/docubot/bundle.rb', line 6

def page_by_file_path
  @page_by_file_path
end

#page_by_html_pathObject (readonly)

Returns the value of attribute page_by_html_path.



6
7
8
# File 'lib/docubot/bundle.rb', line 6

def page_by_html_path
  @page_by_html_path
end

#pagesObject (readonly)

Returns the value of attribute pages.



6
7
8
# File 'lib/docubot/bundle.rb', line 6

def pages
  @pages
end

#pages_by_titleObject (readonly)

Returns the value of attribute pages_by_title.



6
7
8
# File 'lib/docubot/bundle.rb', line 6

def pages_by_title
  @pages_by_title
end

#sourceObject (readonly)

Returns the value of attribute source.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def source
  @source
end

#tocObject (readonly)

Returns the value of attribute toc.



4
5
6
# File 'lib/docubot/bundle.rb', line 4

def toc
  @toc
end

Instance Method Details

#create_pages(files_and_folders) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/docubot/bundle.rb', line 67

def create_pages( files_and_folders )
	files_and_folders.each do |path|
		extension = File.extname( path )[ 1..-1 ]
		item_is_page = File.directory?(path) || DocuBot::Converter.by_type[extension]
		if !item_is_page
			@extras << path
		else
			page = DocuBot::Page.new( self, path )
			next if page.skip

			if path =~ %r{^_glossary/}
				@glossary << page
			else
				@pages                            << page
				@page_by_file_path[path]           = page
				@page_by_html_path[page.html_path] = page
				@pages_by_title[page.title]       << page
				@index.process_page( page )

				# Add the page (and any sub-links) to the toc
				unless page.hide
					@toc.add_to_link_hierarchy( page.title, page.html_path, page )
					page.toc.as_list.each do |id_or_text|
						if id_or_text[0..0] == '#'
							if ele = page.nokodoc.at_css(id_or_text)
								@toc.add_to_link_hierarchy( ele.inner_text, page.html_path + id_or_text, page )
							else
								warn "Could not find requested toc anchor #{id_or_text.inspect} on #{page.html_path}"
							end
						else
							# TODO: Find an elegant way to handle quotes in XPath, for speed
							# Kramdown 'helpfully' converts quotes in the body to be curly, breaking direct text matching
							quotes = /['‘’"“”]+/
							quoteless = id_or_text.gsub(quotes,'')
							if t=page.nokodoc.xpath('text()|.//text()').find{ |t| t.content.gsub(quotes,'')==quoteless }
								ele = t.parent
								# FIXME: better unique ID generator
								ele['id'] = "item-#{Time.now.to_i}-#{rand 999999}" unless ele['id']
								@toc.add_to_link_hierarchy( id_or_text, page.html_path + '#' + ele['id'], page )
							else
								warn "Could not find requested toc anchor for #{id_or_text.inspect} on #{page.html_path}"
							end
						end
					end
				end
				
			end
		end
	end		
end

#find_page_collisionsObject



173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/docubot/bundle.rb', line 173

def find_page_collisions
	# Find any and all pages that would collide
	pages_by_html_path = Hash.new{ |h,k| h[k] = [] }
	@pages.each do |page|
		pages_by_html_path[page.html_path] << page
	end
	collisions = pages_by_html_path.select{ |path,pages| pages.length>1 }
	unless collisions.empty?
		message = collisions.map do |path,pages|
			"#{path}: #{pages.map{ |page| "'#{page.title}' (#{page.file})" }.join(', ')}"
		end.join("\n")
		raise PageCollision.new, message
	end		
end


118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/docubot/bundle.rb', line 118

def validate_links
	@external_links = Hash.new{ |h,k| h[k]=[] }
	@internal_links = Hash.new{ |h,k| h[k]=[] }
	@file_links     = Hash.new{ |h,k| h[k]=[] }
	@broken_links   = Hash.new{ |h,k| h[k]=[] }

	Dir.chdir( @source ) do 
		@pages.each do |page|
			# TODO: set the xpath to .//a/@href once this is fixed: http://github.com/tenderlove/nokogiri/issues/#issue/213
			page.nokodoc.xpath('.//a').each do |a|
				next unless href = a['href']
				href = CGI.unescape(href)
				if href=~%r{\A[a-z]+:}i
					@external_links[page] << href
				else
					id   = href[/#([a-z][\w.:-]*)?/i]
					file = href.sub(/#.*/,'')
					path = file.empty? ? page.html_path : Pathname.new( File.dirname(page.html_path) / file ).cleanpath.to_s
					if target=@page_by_html_path[path]
						if !id || id == "#" || target.nokodoc.at_css(id)
							@internal_links[page] << href
						else
							warn "Could not find internal link for #{id.inspect} on #{page.html_path.inspect}" if id 
							@broken_links[page] << href
						end
					else
						if File.file?(path) && !@page_by_file_path[path]
							@file_links[page] << href
						else
							@broken_links[page] << href
						end
					end
				end
			end
		end
	end
end


156
157
158
159
160
161
162
# File 'lib/docubot/bundle.rb', line 156

def warn_for_broken_links
	@broken_links.each do |page,links|
		links.each do |link|
			warn "Broken link on #{page.file}: '#{link}'"
		end
	end
end

#warn_for_missing_glossary_termsObject



164
165
166
167
168
169
170
171
# File 'lib/docubot/bundle.rb', line 164

def warn_for_missing_glossary_terms
	@glossary.missing_terms.each do |term,referrers|
		warn "Glossary term '#{term}' never defined."
		referrers.each do |referring_page|
			warn "...seen on #{referring_page.file}."
		end
	end		
end

#write(writer_type, destination = nil) ⇒ Object



188
189
190
191
192
193
194
195
# File 'lib/docubot/bundle.rb', line 188

def write( writer_type, destination=nil )
	writer = DocuBot::Writer.by_type[ writer_type.to_s.downcase ]
	if writer
		writer.new( self ).write( destination )
	else
		raise "Unknown writer '#{writer_type}'; available types: #{DocuBot::Writer::INSTALLED_WRITERS.join ', '}"
	end
end