Module: XLSX2Shape

Defined in:
lib/xlsx2shape.rb

Instance Method Summary collapse

Instance Method Details

#escape_turtle(str) ⇒ Object



119
120
121
# File 'lib/xlsx2shape.rb', line 119

def escape_turtle(str)
  str.gsub(/\\/){ '\\\\' }.gsub(/"/){ '\"' }
end

#format_property(property, value, lang = nil) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/xlsx2shape.rb', line 106

def format_property(property, value, lang = nil)
  if value.is_a? Array
    value = value.sort_by{|e|
      format_pvalue(e)
    }.map do |e|
      format_pvalue(e)
    end
    %Q|  #{property} #{ value.join(", ") }|
  else
    value = format_pvalue(value, lang)
    %Q|  #{property} #{value}|
  end
end

#format_pvalue(value, lang = nil) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/xlsx2shape.rb', line 80

def format_pvalue(value, lang = nil)
  str = ""
  if value.is_a? Hash
    result = ["["]
    array = []
    value.keys.sort.each do |k|
      array << format_property(k, value[k])
    end
    result << array.join(";\n")
    result << "  ]"
    str = result.join("\n")
  elsif value.is_a? Integer
    str = value
  elsif value =~ /\Ahttps?:\/\//
    str = %Q|<#{value}>|
  elsif value =~ /\A\w+:[\w\-\.]+\Z/
    str = value
  elsif value =~ /\A(.+?)\^\^(\w+:\w+)\z/
    str = %Q|"#{escape_turtle($1)}"^^#{$2}|
  elsif lang
    str = %Q|"#{escape_turtle(value)}"@#{lang}|
  else
    str = %Q|"#{escape_turtle(value)}"|
  end
  str
end

#map_xlsx_row_headers(data_row, headers) ⇒ Object



73
74
75
76
77
78
79
# File 'lib/xlsx2shape.rb', line 73

def map_xlsx_row_headers(data_row, headers)
  hash = {}
  headers.each_with_index do |h, idx|
    hash[h] = data_row[idx].to_s
  end
  hash
end

#xlsx2shape(filename) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/xlsx2shape.rb', line 6

def xlsx2shape(filename)
  shapes = {}
  prefix = { sh: "http://www.w3.org/ns/shacl#" }
  xlsx = Roo::Excelx.new(filename)
  xlsx.each_with_pagename do |name, sheet|
    if name =~ /\Aprefix\z/i
      sheet.each do |row|
        prefix[row[0].to_s.intern] = row[1] if not row[1].empty?
      end
    else
      headers = sheet.row(1)
      uri = headers.first
      shapes[uri] = ["<#{uri}> a sh:NodeShape"]
      order = 1
      sheet.each_with_index do |row, idx|
        row_h = map_xlsx_row_headers(row, headers)
        case row.first
        when "sh:targetClass"
          shapes[uri] << "#{format_property("sh:targetClass", row[1])}" if row[1]
        when "sh:property"
          prop_values = []
          headers[1..-1].each do |prop|
            next if row_h[prop].empty?
            case prop
            when /\@(\w+)\z/
              lang = $1
              property_name = prop.sub(/\@(\w+)\z/, "")
              prop_values << format_property(property_name, row_h[prop], lang)
            when "sh:minCount", "sh:maxCount"
              prop_values << format_property(prop, row_h[prop].to_i)
            when "sh:languageIn"
              prop_values << "  sh:languageIn (#{row_h[prop].split.map{|e| format_pvalue(e) }.join(" ")})"
            when "sh:uniqueLang"
              case row_h[prop]
              when "true"
                prop_values << "  sh:uniqueLang true"
              when "false"
                prop_values << "  sh:uniqueLang false"
              else
                logger.warn "sh:uniqueLang value unknown: #{row_h[prop]} at #{uri}"
              end
            else
              prop_values << format_property(prop, row_h[prop])
            end
          end
          prop_values << format_property("sh:order", order)
          order += 1
          str = prop_values.join(";\n  ")
          shapes[uri] << "  sh:property [\n  #{str}\n  ]"
        when "sh:or"
          shapes[uri] << "  sh:or (#{row[1..-1].select{|e| not e.empty? }.map{|e| format_pvalue(e) }.join(" ")})"
        end
      end
    end
  end
  result = ""
  prefix.sort_by{|k,v| [k,v] }.each do |prefix, val|
    result << "@prefix #{prefix}: <#{val}>.\n"
  end
  shapes.sort_by{|uri, val| uri }.each do |uri, val|
    result << "\n"
    result << shapes[uri].join(";\n")
    result << ".\n"
  end
  result
end