Module: RegexpPropertyValues

Defined in:
lib/regexp_property_values.rb,
lib/regexp_property_values/version.rb,
lib/regexp_property_values/extension.rb

Defined Under Namespace

Modules: Extension

Constant Summary collapse

LIST_URL =
'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt'
VERSION =
'0.3.0'

Class Method Summary collapse

Class Method Details

.[](prop) ⇒ Object



76
77
78
# File 'lib/regexp_property_values.rb', line 76

def [](prop)
  prop.extend(Extension)
end

.add_oniguruma_properties(props_by_category) ⇒ Object



46
47
48
# File 'lib/regexp_property_values.rb', line 46

def add_oniguruma_properties(props_by_category)
  props_by_category['Special'] << 'Newline'.extend(Extension)
end

.alias_hashObject



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/regexp_property_values.rb', line 50

def alias_hash
  short_names, long_names = short_and_long_names
  return {} if short_names.empty?

  long_names -= by_category['POSIX brackets']
  by_matched_codepoints.each_value.each_with_object({}) do |props, hash|
    next if props.count < 2
    long_name = (props & long_names)[0] || fail("no long name for #{props}")
    (props & short_names).each { |short_name| hash[short_name] = long_name }
  end
end

.allObject



25
26
27
# File 'lib/regexp_property_values.rb', line 25

def all
  by_category.values.flatten
end

.all_for_current_rubyObject



29
30
31
# File 'lib/regexp_property_values.rb', line 29

def all_for_current_ruby
  all.select(&:supported_by_current_ruby?)
end

.by_categoryObject



33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/regexp_property_values.rb', line 33

def by_category
  result = File.foreach(file_path).each_with_object({}) do |line, hash|
    if /^\* (?<category>\S.+)/ =~ line
      @current_category = category
      hash[@current_category] ||= []
    elsif /^ {4}(?<value_name>\S.*)/ =~ line
      hash[@current_category] << value_name.extend(Extension)
    end
  end
  add_oniguruma_properties(result)
  result
end

.by_matched_codepointsObject



71
72
73
74
# File 'lib/regexp_property_values.rb', line 71

def by_matched_codepoints
  puts 'Establishing property codepoints, this may take a bit ...'
  all_for_current_ruby.group_by(&:matched_codepoints)
end

.file_pathObject



21
22
23
# File 'lib/regexp_property_values.rb', line 21

def file_path
  File.expand_path('../UnicodeProps.txt', __FILE__)
end

.short_and_long_namesObject



62
63
64
65
66
67
68
69
# File 'lib/regexp_property_values.rb', line 62

def short_and_long_names
  short_name_categories = ['Major and General Categories',
                           'PropertyAliases',
                           'PropertyValueAliases (Script)']
  by_category.each_with_object([[], []]) do |(cat_name, props), (short, long)|
    (short_name_categories.include?(cat_name) ? short : long).concat(props)
  end
end

.updateObject



14
15
16
17
18
19
# File 'lib/regexp_property_values.rb', line 14

def update
  puts "Downloading #{LIST_URL}"
  require 'open-uri'
  File.open(file_path, 'w') { |f| IO.copy_stream(open(LIST_URL), f) }
  puts 'Done!'
end