Module: ArkTweetNlp::Parser

Defined in:
lib/ark_tweet_nlp/parser.rb

Constant Summary collapse

TAGSET =
{
  :N => 'common noun',
  :O => 'pronoun, non possessive',
  :^ => 'proper noun',
  :S => 'nominal + possessive',
  :Z => 'proper noun + possessive',
  :V => 'verb including copula, auxiliaries',
  :L => 'nominal + verbal (e.g. i’m), verbal + nominal (let’s)',
  :M => 'proper noun + verbal',
  :A => 'adjective',
  :R => 'adverb',
  :! => 'interjection',
  :D => 'determiner',
  :P => 'pre- or postposition, or subordinating conjunction',
  :& => 'coordinating conjunction',
  :T => 'verb particle',
  :X => 'existential there, predeterminers',
  :Y => 'X + verbal',
  :'#' => 'hashtag (indicates topic/category for tweet)',
  :'@' => 'at-mention (indicates a user as a recipient of a tweet)',
  :~ => 'discourse marker, indications of continuation across multiple tweets',
  :U => 'URL or email address',
  :E => 'emoticon',
  :'$' => 'numeral',
  :',' => 'punctuation',
  :G => 'other abbreviations, foreign words, possessive endings, symbols, garbage'
}
TAGGER_PATH =

spec = Gem::Specification.find_by_name(“ark_tweet_nlp”) gem_root = spec.gem_dir gem_bin = gem_root + “/bin”

File.join(Gem::Specification.find_by_name("ark_tweet_nlp").gem_dir, 'bin', 'runTagger.sh')

Class Method Summary collapse

Class Method Details

.find_tags(text) ⇒ Object



42
43
44
45
# File 'lib/ark_tweet_nlp/parser.rb', line 42

def Parser.find_tags text
  result = Parser.run_tagger(text.tr_s("\t"," ")).gsub(/\t\t\t\s*\n/,'')
  result.split("\n").map{ |line| Parser.convert_line( line ) }
end

.get_words_tagged_as(tagged_result, *tags) ⇒ Object



47
48
49
50
51
52
# File 'lib/ark_tweet_nlp/parser.rb', line 47

def Parser.get_words_tagged_as tagged_result, *tags
  tagged_result.inject({}) do |acum,m|
    m.each{ |key, value| acum[value] == nil ? acum[value] = Array.new( [key] ) : acum[value] << key }
    acum
  end.select{ |m| tags.include?( m ) }
end

.olaObject



38
39
40
# File 'lib/ark_tweet_nlp/parser.rb', line 38

def Parser.ola
  "ola"
end