Module: Myasorubka::Treebank

Extended by:
Treebank
Included in:
Treebank
Defined in:
lib/myasorubka/treebank.rb

Overview

The Penn Treebank Project annotates naturally-occuring text for linguistic structure. Most notably, we produce skeletal parses showing rough syntactic and semantic information — a bank of linguistic trees.

Treebanks are often created on top of a corpus that has already been annotated with part-of-speech tags. In turn, treebanks are sometimes enhanced with semantic or other linguistic information.

Instance Method Summary collapse

Instance Method Details

#english(tag) ⇒ Object

Convert the given tag from English Penn Treebank format to the English representation in the MULTEXT-East format.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/myasorubka/treebank.rb', line 18

def english(tag)
  msd = Myasorubka::MSD.new(Myasorubka::MSD::English)

  case tag
  when 'CC' then
    msd[:pos] = :conjunction
    msd[:type] = :coordinating
  when 'CD' then
    msd[:pos] = :numeral
    msd[:type] = :cardinal
  when 'DT' then
    msd[:pos] = :determiner
  when 'IN' then
    msd[:pos] = :conjunction
    msd[:type] = :subordinating
  when 'JJ' then
    msd[:pos] = :adjective
  when 'JJR' then
    msd[:pos] = :adjective
    msd[:degree] = :comparative
  when 'JJS' then
    msd[:pos] = :adjective
    msd[:degree] = :superlative
  when 'MD' then
    msd[:pos] = :verb
    msd[:type] = :modal
  when 'NN' then
    msd[:pos] = :noun
    msd[:type] = :common
    msd[:number] = :singular
  when 'NNS'
    msd[:pos] = :noun
    msd[:type] = :common
    msd[:number] = :plural
  when 'NP'
    msd[:pos] = :noun
    msd[:type] = :proper
    msd[:number] = :singular
  when 'NPS'
    msd[:pos] = :noun
    msd[:type] = :proper
    msd[:number] = :plural
  when 'PDT' then
    msd[:pos] = :determiner
  when 'PP' then
    msd[:pos] = :pronoun
    msd[:type] = :personal
  when 'PP$' then
    msd[:pos] = :pronoun
    msd[:type] = :possessive
  when 'RB' then
    msd[:pos] = :adverb
  when 'RBR' then
    msd[:pos] = :adverb
    msd[:degree] = :comparative
  when 'RBS' then
    msd[:pos] = :adverb
    msd[:degree] = :superlative
  when 'TO' then
    msd[:pos] = :determiner
  when 'UH' then
    msd[:pos] = :interjection
  when 'VB' then
    msd[:pos] = :verb
    msd[:type] = :base
  when 'VBD' then
    msd[:pos] = :verb
    msd[:type] = :base
    msd[:tense] = :past
  when 'VBG' then
    msd[:pos] = :verb
    msd[:type] = :base
    msd[:vform] = :participle
    msd[:tense] = :present
  when 'VBN' then
    msd[:pos] = :verb
    msd[:type] = :base
    msd[:vform] = :participle
    msd[:tense] = :past
  when 'VBP' then
    msd[:pos] = :verb
    msd[:type] = :base
    msd[:tense] = :present
    msd[:number] = :singular
  when 'VBZ' then
    msd[:pos] = :verb
    msd[:type] = :base
    msd[:tense] = :present
    msd[:person] = :third
    msd[:number] = :singular
  when 'WDT' then
    msd[:pos] = :determiner
  when 'WP' then
    msd[:pos] = :pronoun
  when 'WP$' then
    msd[:pos] = :pronoun
    msd[:type] = :possessive
  when 'WRB' then
    msd[:pos] = :adverb
  else
    msd[:pos] = :residual
  end

  msd
end