Class: Boilerpipe::SAX::TagActionMap

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/sax/tag_action_map.rb

Class Method Summary collapse

Class Method Details

.tag_actionsObject



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/boilerpipe/sax/tag_action_map.rb', line 3

def self.tag_actions
  labels = ::Boilerpipe::Labels
  {
    STYLE: TagActions::IgnorableElement.new,
    SCRIPT: TagActions::IgnorableElement.new,
    OPTION: TagActions::IgnorableElement.new,
    OBJECT: TagActions::IgnorableElement.new,
    EMBED: TagActions::IgnorableElement.new,
    APPLET: TagActions::IgnorableElement.new,
    LINK: TagActions::IgnorableElement.new,

    A: TagActions::AnchorText.new,
    BODY: TagActions::Body.new,

    STRIKE: TagActions::InlineNoWhitespace.new,
    U: TagActions::InlineNoWhitespace.new,
    B: TagActions::InlineNoWhitespace.new,
    I: TagActions::InlineNoWhitespace.new,
    EM: TagActions::InlineNoWhitespace.new,
    STRONG: TagActions::InlineNoWhitespace.new,
    SPAN: TagActions::InlineNoWhitespace.new,

    # New in 1.1 (especially to improve extraction quality from Wikipedia etc.)
    SUP: TagActions::InlineNoWhitespace.new,

    # New in 1.2
    CODE: TagActions::InlineNoWhitespace.new,
    TT: TagActions::InlineNoWhitespace.new,
    SUB: TagActions::InlineNoWhitespace.new,
    VAR: TagActions::InlineNoWhitespace.new,

    ABBR: TagActions::InlineWhitespace.new,
    ACRONYM: TagActions::InlineWhitespace.new,
    FONT: TagActions::InlineNoWhitespace.new,

    # added in 1.1.1
    NOSCRIPT: TagActions::IgnorableElement.new,

    # New in 1.3

    LI: TagActions::BlockTagLabel.new(labels::LabelAction.new([:LI])),
    H1: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H1, :HEADING])),
    H2: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H2, :HEADING])),
    H3: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H3, :HEADING]))
  }
end