Class: Forspell::Loaders::Markdown

Inherits:
Base
  • Object
show all
Defined in:
lib/forspell/loaders/markdown.rb

Defined Under Namespace

Classes: FilteredHash

Constant Summary collapse

PARSER =
'GFM'
SPECIAL_CHARS_MAP =
{
  lsquo: "'",
  rsquo: "'",
  ldquo: '"',
  rdquo: '"'
}.freeze

Instance Method Summary collapse

Methods inherited from Base

#initialize, #read

Constructor Details

This class inherits a constructor from Forspell::Loaders::Base

Instance Method Details

#extract_wordsObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/forspell/loaders/markdown.rb', line 39

def extract_words
  document = Kramdown::Document.new(@input, input: PARSER)
  tree = FilteredHash.new.convert(document.root, document.options)
  chunks = extract_chunks(tree)
  result = []
  return result if chunks.empty?

  group_by_location = chunks.group_by { |res| res[:location] }
                            .transform_values do |lines|
    lines.map { |v| SPECIAL_CHARS_MAP[v[:value]] || v[:value] }
      .join.split(%r{[[:punct:]]&&[^-'_./\\:]|\s})
  end
  
  group_by_location.each_pair do |location, words|
    words.reject(&:empty?)
         .each { |word| result << Word.new(@file, location || 0, word) }
  end

  result
rescue RuntimeError => e
  raise Forspell::Loaders::ParsingError, e.message
end