Class: Dphil::LemmaList

Inherits:
Nokogiri::XML::SAX::Document
  • Object
show all
Includes:
Enumerable
Defined in:
lib/dphil/lemma_list.rb

Overview

An object containing a list of lemmata generated through SAX parsing of an

XML document.

Immutable.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source) ⇒ LemmaList

Returns a new instance of LemmaList.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/dphil/lemma_list.rb', line 15

def initialize(source)
  @members = []
  source = source.to_s.strip
  return if source.empty?
  @lemma_ignore_start_tags = Set.new(%w[TEI text body pre post div])
  @lemma_ignore_end_tags = @lemma_ignore_start_tags + Set.new(%w[pb lb])
  @index = 0
  @open_elements = []
  @current_pb = []
  @current_lb = []
  @current_chars = ""
  @current_lemma = []
  @inside_hyphen = false
  @empty_element = true

  @parser = Nokogiri::XML::SAX::Parser.new(self)
  @parser.parse(source)
end

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



13
14
15
# File 'lib/dphil/lemma_list.rb', line 13

def name
  @name
end

Instance Method Details

#[](*args) ⇒ Object



43
44
45
# File 'lib/dphil/lemma_list.rb', line 43

def [](*args)
  @members[*args]
end

#cx_tokensObject



64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/dphil/lemma_list.rb', line 64

def cx_tokens
  @members.map do |lemma|
    out = {
      t: lemma.text,
      n: Transliterate.normalize_iast(lemma.text),
      i: lemma.index,
      p: lemma.page,
      f: lemma.facs,
      l: lemma.line,
    }
    warn "Token empty: #{out}" if out[:t].empty?
    out
  end
end

#each(&block) ⇒ Object



34
35
36
# File 'lib/dphil/lemma_list.rb', line 34

def each(&block)
  @members.each(&block)
end

#get(index) ⇒ Object



47
48
49
50
51
52
53
54
# File 'lib/dphil/lemma_list.rb', line 47

def get(index)
  raise "Non-numeric index passed to Lemma.get" unless index.is_a? Numeric
  if index < 1
    warn "Minimum index of Lemma.get() is 1"
    index = 1
  end
  @members[index - 1]
end

#members(limit = nil) ⇒ Object



38
39
40
41
# File 'lib/dphil/lemma_list.rb', line 38

def members(limit = nil)
  return @members[0, limit] if limit.is_a? Numeric
  @members
end

#sizeObject



56
57
58
# File 'lib/dphil/lemma_list.rb', line 56

def size
  @members.size
end

#to_sObject



60
61
62
# File 'lib/dphil/lemma_list.rb', line 60

def to_s
  @members.map(&:text).join("\n")
end