Class: TruncatedSaxDocument

Inherits:
Nokogiri::XML::SAX::Document
  • Object
show all
Defined in:
lib/truncato/truncated_sax_document.rb

Constant Summary collapse

IGNORABLE_TAGS =
%w(html head body)
SINGLE_TAGS =
%w{br img}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ TruncatedSaxDocument

Returns a new instance of TruncatedSaxDocument.



12
13
14
15
16
# File 'lib/truncato/truncated_sax_document.rb', line 12

def initialize(options)
  @html_coder = HTMLEntities.new
  capture_options options
  init_parsing_state
end

Instance Attribute Details

#count_tagsObject (readonly)

Returns the value of attribute count_tags.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def count_tags
  @count_tags
end

#filtered_attributesObject (readonly)

Returns the value of attribute filtered_attributes.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def filtered_attributes
  @filtered_attributes
end

#filtered_tagsObject (readonly)

Returns the value of attribute filtered_tags.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def filtered_tags
  @filtered_tags
end

#ignored_levelsObject (readonly)

Returns the value of attribute ignored_levels.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def ignored_levels
  @ignored_levels
end

#max_lengthObject (readonly)

Returns the value of attribute max_length.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def max_length
  @max_length
end

#max_length_reachedObject (readonly)

Returns the value of attribute max_length_reached.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def max_length_reached
  @max_length_reached
end

#tailObject (readonly)

Returns the value of attribute tail.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def tail
  @tail
end

#truncated_stringObject (readonly)

Returns the value of attribute truncated_string.



9
10
11
# File 'lib/truncato/truncated_sax_document.rb', line 9

def truncated_string
  @truncated_string
end

Instance Method Details

#characters(decoded_string) ⇒ Object



25
26
27
28
29
30
# File 'lib/truncato/truncated_sax_document.rb', line 25

def characters decoded_string
  return if @max_length_reached || ignore_mode?
  remaining_length = max_length - @estimated_length - 1
  string_to_append = decoded_string.length > remaining_length ? truncate_string(decoded_string, remaining_length) : decoded_string
  append_to_truncated_string @html_coder.encode(string_to_append), string_to_append.length
end

#comment(string) ⇒ Object



32
33
34
35
36
37
# File 'lib/truncato/truncated_sax_document.rb', line 32

def comment string
  if @comments
    return if @max_length_reached
    process_comment string
  end
end

#end_documentObject



53
54
55
# File 'lib/truncato/truncated_sax_document.rb', line 53

def end_document
  close_truncated_document if max_length_reached
end

#end_element(name) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/truncato/truncated_sax_document.rb', line 39

def end_element name
  if filtered_tags.include?(name) && ignore_mode?
    exit_ignored_level
    return
  end

  return if @max_length_reached || ignorable_tag?(name) || ignore_mode?

  unless single_tag_element? name
    @closing_tags.pop
    append_to_truncated_string closing_tag(name), overriden_tag_length
  end
end

#start_element(name, attributes) ⇒ Object



18
19
20
21
22
23
# File 'lib/truncato/truncated_sax_document.rb', line 18

def start_element name, attributes
  enter_ignored_level if filtered_tags.include?(name)
  return if @max_length_reached || ignorable_tag?(name) || ignore_mode?
  @closing_tags.push name unless single_tag_element? name
  append_to_truncated_string opening_tag(name, attributes), overriden_tag_length
end