Class: KvgCharacterRecognition::Template

Inherits:
Object
  • Object
show all
Extended by:
Trainer
Defined in:
lib/kvg_character_recognition/template.rb

Class Method Summary collapse

Methods included from Trainer

heatmaps, preprocess

Class Method Details

.parse_from_xml(xml, datastore, kanji_list = []) ⇒ Object

This method populates the datastore with parsed template patterns from the kanjivg file in xml format Params:

xml

download the latest xml release from github.com/KanjiVG/kanjivg/releases

datastore

JSONDatastore or custom datastore type having methods store, persist!



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/kvg_character_recognition/template.rb', line 8

def self.parse_from_xml xml, datastore, kanji_list=[]
  file = File.open(xml) { |f| Nokogiri::XML(f) }

  file.xpath("//kanji").each do |kanji|
    #id has format: "kvg:kanji_CODEPOINT"
    codepoint = kanji.attributes["id"].value.split("_")[1]
    value = [codepoint.hex].pack("U")
    if kanji_list.empty?
      next unless codepoint.hex >= "04e00".hex && codepoint.hex <= "09faf".hex
    else
      next unless codepoint.hex >= "04e00".hex && codepoint.hex <= "09faf".hex && kanji_list.include?(value)
    end
    puts "#{codepoint} #{value}"

    # parse strokes
    strokes = kanji.xpath("g//path").map{|p| p.attributes["d"].value }.map{ |stroke| KvgCharacterRecognition::KvgParser::Stroke.new(stroke).to_a }

    strokes = preprocess(strokes)

    #Store to database
    #--------------
    character = {
      value: value,
      codepoint: codepoint.hex,
      number_of_strokes: strokes.count,
      number_of_points: @number_of_points,
      heatmaps: heatmaps(strokes)
    }

    datastore.store character
  end

  datastore.persist!
end