Class: Perseus::IndexXML

Inherits:
CTSElement show all
Defined in:
lib/perseus/index_xml.rb

Direct Known Subclasses

FileIndexXML, NetworkIndexXML

Instance Attribute Summary collapse

Attributes inherited from CTSElement

#to_h, #to_json, #to_s

Instance Method Summary collapse

Instance Attribute Details

#corpus_by_editionObject (readonly)

Returns the value of attribute corpus_by_edition.



6
7
8
# File 'lib/perseus/index_xml.rb', line 6

def corpus_by_edition
  @corpus_by_edition
end

#corpus_by_groupnameObject (readonly)

Returns the value of attribute corpus_by_groupname.



6
7
8
# File 'lib/perseus/index_xml.rb', line 6

def corpus_by_groupname
  @corpus_by_groupname
end

Instance Method Details

#by_editionObject



10
11
12
# File 'lib/perseus/index_xml.rb', line 10

def by_edition
  @corpus_by_edition ||= generate_structure_by_edition
end

#by_groupnameObject



7
8
9
# File 'lib/perseus/index_xml.rb', line 7

def by_groupname
  @corpus_by_groupname ||= generate_structure_by_group
end

#generate_json_indecesObject



89
90
91
92
93
94
95
# File 'lib/perseus/index_xml.rb', line 89

def generate_json_indeces
  puts "Generating index by groupname"
  File.write(Perseus::CTS_BY_GROUP_JSON_FILE, JSON.pretty_generate(by_groupname))
  puts "Generating index by edition"
  File.write(Perseus::ALL_EDITIONS_JSON, JSON.pretty_generate(by_edition))
  puts "DONE".green
end

#generate_structure_by_editionObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/perseus/index_xml.rb', line 35

def generate_structure_by_edition
  new_corpus = []
  corpus_by_groupname.each do |t|
    groupname = t.groupname
    t.work.each_with_index do |work, i|
      begin
        # Check if we have many editions
        unless work["edition"].nil?
          if work.edition.kind_of?(Array)
            work.edition.each do |edition|
              new_corpus.push(CorpusHash.new({
                groupname: groupname,
                language: work["xml:lang"],
                type: "edition",
              }).merge(edition))
            end
          else
            new_corpus.push(CorpusHash.new({
              groupname: groupname,
              language: work["xml:lang"],
              type: "edition",
            }).merge(work.edition))
          end
        end
        # Check to see if we have translations
        unless work["translation"].nil?
          # Check if we have many translations
          if work.translation.kind_of?(Array)
            work.translation.each do |translation|
              new_corpus.push(CorpusHash.new({
                groupname: groupname,
                language: translation["xml:lang"],
                type: "edition",
              }).merge(translation))
            end
          else
            new_corpus.push(CorpusHash.new({
              groupname: groupname,
              language: work.translation["xml:lang"],
              type: "edition",
            }).merge(work.translation))
          end
        end
      rescue Exception => e
        puts "exception: #{e.message.red}"
        #puts "Stack trace: #{backtrace.map {|l| "  #{l}\n"}.join}"
        puts "We were working in group: #{groupname.cyan} with the following data point:".green
        puts work.inspect.yellow
      end
    end
  end
  new_corpus
end

#generate_structure_by_groupObject



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/perseus/index_xml.rb', line 14

def generate_structure_by_group
  to_h["TextInventory"]["textgroup"].map do |text|
    CorpusHash.new text
  end.map do |t|
    tmp_hash = CorpusHash.new
    t.work.each_with_index do |work, i|
      if work.kind_of?(Array)
        # This is a special kind of array and we need to make
        # it adhere to our protocol
        tmp_hash[work[0]] = work[1]
        #puts work.inspect
        #puts tmp_hash
        if t.work.size - 1 == i
          t.work = [tmp_hash]
          tmp_hash = CorpusHash.new
        end
      end
    end && t
  end
end