Class: Opener::ConstituentParsers::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/constituent_parsers/base.rb,
lib/opener/constituent_parsers/base/version.rb

Overview

The base constituent parser kernel that supports multiple languages such as English and Spanish.

Constant Summary collapse

DEFAULT_LANGUAGE =

The default language to use.

Returns:

  • (String)
'en'.freeze
ACCEPTED_LANGUAGES =
['en', 'es', 'it', 'fr'].freeze
DEFAULT_OPTIONS =

Hash containing the default options to use.

Returns:

  • (Hash)
{
  :args     => [],
  :language => DEFAULT_LANGUAGE
}.freeze
VERSION =
'1.0.5'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Base

Returns a new instance of Base.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :args (Array)

    The commandline arguments to pass to the underlying Java code.

See Also:

  • DEFAULT_OPTIONS


55
56
57
58
59
# File 'lib/opener/constituent_parsers/base.rb', line 55

def initialize(options = {})
  options  = DEFAULT_OPTIONS.merge(options)
  @args    = options.delete(:args) || []
  @options = options
end

Instance Attribute Details

#argsArray (readonly)

Returns:

  • (Array)


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/opener/constituent_parsers/base.rb', line 25

class Base
  attr_reader :args, :options

  ##
  # The default language to use.
  #
  # @return [String]
  #
  DEFAULT_LANGUAGE = 'en'.freeze
  
  ACCEPTED_LANGUAGES = ['en', 'es', 'it', 'fr'].freeze

  ##
  # Hash containing the default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :args     => [],
    :language => DEFAULT_LANGUAGE
  }.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Java code.
  #
  # @see Opener::ConstituentParsers::DEFAULT_OPTIONS
  #
  def initialize(options = {})
    options  = DEFAULT_OPTIONS.merge(options)
    @args    = options.delete(:args) || []
    @options = options
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    if ACCEPTED_LANGUAGES.include?(language)
      input     = StringIO.new(input) unless input.kind_of?(IO)
      annotator = Java::ehu.parse.Annotate.new(language)
      reader    = InputStreamReader.new(input.to_inputstream)
      kaf       = KAFDocument.create_from_stream(reader)
      kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")

      if heads?
        head_finder = CollinsHeadFinder.new(language)
        annotator.parseWithHeads(kaf, head_finder)
      else
        annotator.parse(kaf)
      end

      return kaf.to_string
    else
      return input
    end
  end
  #
  ##
  # @return [String]
  #
  def language
    return options[:language]
  end

  def heads?
    true
  end

end

#optionsHash (readonly)

Returns:

  • (Hash)


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/opener/constituent_parsers/base.rb', line 25

class Base
  attr_reader :args, :options

  ##
  # The default language to use.
  #
  # @return [String]
  #
  DEFAULT_LANGUAGE = 'en'.freeze
  
  ACCEPTED_LANGUAGES = ['en', 'es', 'it', 'fr'].freeze

  ##
  # Hash containing the default options to use.
  #
  # @return [Hash]
  #
  DEFAULT_OPTIONS = {
    :args     => [],
    :language => DEFAULT_LANGUAGE
  }.freeze

  ##
  # @param [Hash] options
  #
  # @option options [Array] :args The commandline arguments to pass to the
  #  underlying Java code.
  #
  # @see Opener::ConstituentParsers::DEFAULT_OPTIONS
  #
  def initialize(options = {})
    options  = DEFAULT_OPTIONS.merge(options)
    @args    = options.delete(:args) || []
    @options = options
  end

  ##
  # Runs the command and returns the output of STDOUT, STDERR and the
  # process information.
  #
  # @param [String] input The input to process.
  # @return [Array]
  #
  def run(input)
    if ACCEPTED_LANGUAGES.include?(language)
      input     = StringIO.new(input) unless input.kind_of?(IO)
      annotator = Java::ehu.parse.Annotate.new(language)
      reader    = InputStreamReader.new(input.to_inputstream)
      kaf       = KAFDocument.create_from_stream(reader)
      kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")

      if heads?
        head_finder = CollinsHeadFinder.new(language)
        annotator.parseWithHeads(kaf, head_finder)
      else
        annotator.parse(kaf)
      end

      return kaf.to_string
    else
      return input
    end
  end
  #
  ##
  # @return [String]
  #
  def language
    return options[:language]
  end

  def heads?
    true
  end

end

Instance Method Details

#heads?Boolean

Returns:

  • (Boolean)


96
97
98
# File 'lib/opener/constituent_parsers/base.rb', line 96

def heads?
  true
end

#languageString

Returns:

  • (String)


92
93
94
# File 'lib/opener/constituent_parsers/base.rb', line 92

def language
  return options[:language]
end

#run(input) ⇒ Array

Runs the command and returns the output of STDOUT, STDERR and the process information.

Parameters:

  • input (String)

    The input to process.

Returns:

  • (Array)


68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/opener/constituent_parsers/base.rb', line 68

def run(input)
  if ACCEPTED_LANGUAGES.include?(language)
    input     = StringIO.new(input) unless input.kind_of?(IO)
    annotator = Java::ehu.parse.Annotate.new(language)
    reader    = InputStreamReader.new(input.to_inputstream)
    kaf       = KAFDocument.create_from_stream(reader)
    kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")

    if heads?
      head_finder = CollinsHeadFinder.new(language)
      annotator.parseWithHeads(kaf, head_finder)
    else
      annotator.parse(kaf)
    end

    return kaf.to_string
  else
    return input
  end
end