Class: HXL

Inherits:
Object
  • Object
show all
Defined in:
lib/hxl.rb

Defined Under Namespace

Classes: HXLColSpec, HXLColumn, HXLFormatError, HXLRow, HXLTableSpec

Class Method Summary collapse

Class Method Details

.foreach(path, &block) ⇒ Object

Raises:



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/hxl.rb', line 11

def self.foreach(path, &block)

  table_spec = nil
  prev_row = nil

  source_row_number = -1
  row_number = -1
  disaggregation_position = 0

  CSV.foreach(path) do |row|

    source_row_number += 1

    # If we don't have a table_spec yet (row of HXL tags), scan for one
    if table_spec.nil?

      table_spec = self.parse_hashtag_row(row, prev_row)

      next if table_spec

    end
    prev_row = row
    next if table_spec.nil?



    disaggregation_position = 0

    loop do
      # Next logical row
      row_number += 1

      hxl_row, disaggregation_position = parse_row(row,
                                                   table_spec,
                                                   disaggregation_position,
                                                   row_number,
                                                   source_row_number)
      yield hxl_row

      break unless disaggregation_position < table_spec.get_disaggregation_count
    end


  end

  raise HXLFormatError.new('HXL hashtag row not found') if table_spec.nil?
end

.parse_hashtag(source_col_number, value) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/hxl.rb', line 140

def self.parse_hashtag(source_col_number, value)

  # Pattern for a single tag
  tag_regex = /(#[\w]+)(?:\/([[:alpha:]]{2}))?/

  # Pattern for full tag spec (optional second tag following '+')
  full_regex = /^\s*#{tag_regex}(?:\s*\+\s*#{tag_regex})?$/

  result = full_regex.match value
  col_spec = nil

  if result
    col1 = nil
    col2 = nil

    if result[3]
      # There were two tags
      col1 = HXLColumn.new result[1], result[2]
      col2 = HXLColumn.new result[3], result[4]
      col_spec = HXLColSpec.new source_col_number, col2, col1

    else
      col1 = HXLColumn.new result[1], result[2]
      col_spec = HXLColSpec.new source_col_number, col1
    end
  end

  col_spec
end

.parse_hashtag_row(row, prev_row) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/hxl.rb', line 106

def self.parse_hashtag_row(row, prev_row)

  # Try parsing the current raw CSV data row as a HXL hashtag row.
  # Returns a HXLTableSpec on success, or None on failure

  seen_header = false
  table_spec = HXLTableSpec.new

  row.each_with_index do |value, col_num|
    value = value.strip if value
    col_spec = nil

    if !value.nil? && !value.empty?
      col_spec = self.parse_hashtag(col_num, value)
      return nil if col_spec.nil?

      seen_header = true

      if col_spec.fixed_column
        col_spec.fixed_value = prev_row[col_num]
      end

    else
      col_spec = HXLColSpec.new col_num, HXLColumn.new
    end

    table_spec.push col_spec
  end

  return table_spec if seen_header

  nil
end

.parse_row(row, table_spec, disaggregation_position, row_number, source_row_number) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/hxl.rb', line 59

def self.parse_row(row, table_spec, disaggregation_position, row_number, source_row_number)

  hxl_fields = []
  col_num = -1

  seen_fixed = false
  row.each_with_index do |value, source_col_number|

    col_spec = table_spec.col_specs[source_col_number]

    # Only parse HXL columns
    next if col_spec.column.hxl_tag.nil?

    if col_spec.fixed_column
      # Looking at disaggregation

      if !seen_fixed
        col_num += 1
        raw_position = table_spec.get_raw_position(disaggregation_position)

        hxl_fields.push table_spec.col_specs[raw_position].fixed_value

        col_num += 1
        hxl_fields.push row[raw_position]

        seen_fixed = true
        disaggregation_position += 1

      end
    else
      # Regular column
      col_num += 1
      hxl_fields.push value

    end
  end

  hxl_row = HXLRow.new(
    table_spec.hxl_headers,
    hxl_fields,
    false,
    row_number,
    source_row_number)

  return hxl_row, disaggregation_position
end

.parse_source_rowObject



177
178
# File 'lib/hxl.rb', line 177

def self.parse_source_row
end

.parse_table_spec(row, prev_row) ⇒ Object



170
171
172
173
174
175
# File 'lib/hxl.rb', line 170

def self.parse_table_spec(row, prev_row)
  # Search for the HXL hashtag row
  # Returns a HXLTableSpec on success. Throws an exception on failure.

  raw = self.parse_source_row
end

.read(path) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/hxl.rb', line 3

def self.read(path)
  rows = []
  self.foreach(path) do |row|
    rows.push row
  end
  rows
end