Class: Fit::Parse

Inherits:
ParseHolder show all
Defined in:
lib/fit/parse.rb

Constant Summary collapse

MAX_INT =

hardcoded java.lang.Integer.MAX_VALUE

2147483647
DEFAULT_TAGS =
['table', 'tr', 'td']
@@footnote_files =
0
@@footnote_path =

The original implementation of footnote hardcodes the creation path, hence is somewhat broken. The use of a class variable lets external clients (like Rake and FitTask) decide where to generate footnotes.

'Reports/'

Instance Attribute Summary

Attributes inherited from ParseHolder

#body, #end, #leader, #more, #parts, #tag, #trailer

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ParseHolder

#add_to_body, #add_to_tag, #at, create, #last, #leaf, #print, #size, #text

Constructor Details

#initialize(text, tags = DEFAULT_TAGS, level = 0, offset = 0) ⇒ Parse

Returns a new instance of Parse.

Raises:



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/fit/parse.rb', line 96

def initialize text, tags = DEFAULT_TAGS, level = 0, offset = 0
  tag = tags[level]
  lc = text.downcase
  start_tag = lc.index "<#{tag}"
  raise ParseException.new("Can't find tag: #{tag}", offset) if start_tag.nil?
  end_tag = lc.index('>', start_tag)
  raise ParseException.new("Can't find tag: #{tag}", offset) if end_tag.nil?
  end_tag += 1
  start_end = find_matching_end_tag lc, end_tag, tag, offset
  raise ParseException.new("Can't find tag: #{tag}", offset) if start_end.nil?
  end_end = lc.index('>', start_end)
  raise ParseException.new("Can't find tag: #{tag}", offset) if end_end.nil?
  end_end += 1
  start_more = lc.index "<#{tag}", end_end
  
  @leader = text[0...start_tag]
  @tag = text[start_tag...end_tag]
  @body = text[end_tag...start_end]
  @end = text[start_end...end_end]
  @trailer = text[end_end..-1]

  if level + 1 < tags.size
    @parts = Parse.new @body, tags, level + 1, offset + end_tag
    @body = nil
  else # check for nested table
    index = @body.index "<#{tags[0]}"
    unless index.nil?
      @parts = Parse.new @body, tags, 0, offset + end_tag
      @body = ''
    end
  end

  unless start_more.nil?
    @more = Parse.new @trailer, tags, level, offset + end_end
    @trailer = nil
  end
end

Class Method Details

.condense_whitespace(s) ⇒ Object



207
208
209
210
211
212
# File 'lib/fit/parse.rb', line 207

def Parse.condense_whitespace s
  not_breaking_space = [0x00a0].pack('U')
  # Hack to work around not_breaking_space being considered
  # a normal whitespace in Ruby 1.9, thus matching %r{\s+}
  s.gsub(not_breaking_space, '&nbsp;').gsub(%r{\s+}, ' ').gsub(%r{&nbsp;}, ' ').strip
end

.footnote_pathObject



219
# File 'lib/fit/parse.rb', line 219

def Parse.footnote_path; @@footnote_path; end

.footnote_path=(path) ⇒ Object



218
# File 'lib/fit/parse.rb', line 218

def Parse.footnote_path=(path); @@footnote_path = path; end

.html_to_text(s) ⇒ Object



158
159
160
161
# File 'lib/fit/parse.rb', line 158

def Parse.html_to_text s
  str = s.gsub(%r{<\s*br\s*/?\s*>}, '<br />').gsub(%r{<\s*/\s*p\s*>\s*<\s*p( .*?)?\s*>}, '<br />')
  unescape(condense_whitespace(remove_tags(str)))
end

.remove_tags(s) ⇒ Object



163
164
165
# File 'lib/fit/parse.rb', line 163

def Parse.remove_tags s
  s.gsub(/<.*?>/m) { $& == '<br />' ? $& : '' }
end

.unescape(s) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/fit/parse.rb', line 167

def Parse.unescape s
  str = Parse.unescape_numeric_entities s
  str = str.gsub %r{<br />}, "\n"
  # unescape HTML entities
  str = str.gsub(%r{&lt;}, '<').gsub(%r{&gt;}, '>').gsub(%r{&nbsp;}, ' ').gsub(%r{&quot;}, '"').gsub(%r{&amp;}, '&')
  # unescape smart quotes
  left_double_quotes = [0x201c].pack('U')
  right_double_quotes = [0x201d].pack('U')
  left_single_quotes = [0x2018].pack('U')
  right_single_quotes = [0x2019].pack('U')
  str.gsub(left_double_quotes, '"').gsub(right_double_quotes, '"').gsub(left_single_quotes, "'").gsub(right_single_quotes, "'")
end

.unescape_numeric_entities(s) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/fit/parse.rb', line 180

def Parse.unescape_numeric_entities s
  result = ''
  last_start = 0
  starts_at = s.index '&#'
  while not starts_at.nil?
    ends_at = s.index ';', starts_at
    if ends_at.nil?
      starts_at = s.index('&#', starts_at + 1)
      next
    end
    begin
      entity = s[(starts_at + 2)...ends_at]
      entity = '0x' + entity[1..-1] if (entity =~ /^x/ or entity =~ /^X/)
      char = Integer(entity)
      if char <= 0xFFFF
        result += s[last_start...starts_at] + [char].pack('U')
        last_start = ends_at + 1
      end
    rescue ArgumentError
      # just loop around again
    ensure
      starts_at = s.index '&#', ends_at
    end
  end
  result += s[last_start..-1]
end

Instance Method Details

#find_matching_end_tag(lc, match_from_here, tag, offset) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/fit/parse.rb', line 134

def find_matching_end_tag lc, match_from_here, tag, offset
  from_here = match_from_here
  count = 1
  start_end = 0
  while count > 0
    embedded_tag = lc.index "<#{tag}", from_here
    embedded_tag_end = lc.index "</#{tag}", from_here
    # which one is closer?
    raise ParseException.new("Can't find tag: #{tag}", offset) if embedded_tag.nil? and embedded_tag_end.nil?
    embedded_tag = MAX_INT if embedded_tag.nil?
    embedded_tag_end = MAX_INT if embedded_tag_end.nil?
    if embedded_tag < embedded_tag_end
      count += 1
      start_end = embedded_tag
      from_here = lc.index('>', embedded_tag) + 1
    elsif embedded_tag_end < embedded_tag
      count -= 1
      start_end = embedded_tag_end
      from_here = lc.index('>', embedded_tag_end) + 1
    end
  end
  start_end
end

#footnoteObject



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/fit/parse.rb', line 220

def footnote
  return '[-]' if @@footnote_files >= 25
  begin
    this_footnote = (@@footnote_files += 1)
    html = "footnotes/#{this_footnote}.html"
    path = "#{@@footnote_path}#{html}"
    FileUtils.mkpath File.dirname(path)
    f = File.new(path, 'w')
    print f
    f.close
    return "<a href=#{@@footnote_path}/#{html}>[#{this_footnote}]</a>"
  rescue Exception => e
    puts e.message
    puts e.backtrace
    return '[!]'
  end
end