Module: AtCoderFriends::Parser::PageParser

Defined in:
lib/at_coder_friends/parser/page_parser.rb

Overview

parses problem page and collect problem information

Constant Summary collapse

SECTION_TYPES =
[
  {
    key: 'constraints',
    patterns: [
      '^制約$',
      '^Constraints$'
    ]
  },
  {
    key: 'input format',
    patterns: [
      '^入出?力(形式)?$',
      '^Inputs?\s*(,|and)?\s*(Outputs?)?\s*(Format)?$'
    ]
  },
  {
    key: 'sample input %<no>s',
    patterns: [
      '^入力例\s*(?<no>\d+)?$',
      '^入力\s*(?<no>\d+)$',
      '^Sample\s*Input\s*(?<no>\d+)?$',
      '^Input\s*Example\s*(?<no>\d+)?$',
      '^Input\s*(?<no>\d+)$'
    ]
  },
  {
    key: 'sample output %<no>s',
    patterns: [
      '^出力例\s*(?<no>\d+)?$',
      '^出力\s*(?<no>\d+)$',
      '^入力例\s*(?<no>\d+)?\s*に対する出力例$',
      '^Sample\s*Output\s*(?<no>\d+)?$',
      '^Output\s*Example\s*(?<no>\d+)?$',
      '^Output\s*(?<no>\d+)$',
      '^Output\s*for\s*(the)?\s*Sample\s*Input\s*(?<no>\d+)?$'
    ]
  }
].freeze

Class Method Summary collapse

Class Method Details

.apply_sections(pbm, sections) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/at_coder_friends/parser/page_parser.rb', line 95

def apply_sections(pbm, sections)
  sections.each do |key, (text, code)|
    case key
    when 'constraints'
      pbm.desc += text
    when 'input format'
      pbm.desc += text
      pbm.fmt = code
    when /^sample input (?<no>\d+)$/
      pbm.add_smp($LAST_MATCH_INFO[:no], :in, code)
    when /^sample output (?<no>\d+)$/
      pbm.add_smp($LAST_MATCH_INFO[:no], :exp, code)
    end
  end
end

.collect_sections(page) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/at_coder_friends/parser/page_parser.rb', line 53

def collect_sections(page)
  sections = {}
  %w[h2 h3].each do |tag|
    page
      .search(tag)
      .each do |h|
        key = find_key(h)
        key && sections[key] ||= parse_section(h)
      end
  end
  sections
end

.find_key(h) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/at_coder_friends/parser/page_parser.rb', line 66

def find_key(h)
  title = normalize(h.content)
  SECTION_TYPES.each do |grp|
    grp[:patterns].each do |pat|
      m = title.match(/#{pat}/i)
      next unless m

      no = m.names.include?('no') && m['no'] || '1'
      return format(grp[:key], no: no)
    end
  end
  nil
end

.normalize(s) ⇒ Object



111
112
113
114
115
116
# File 'lib/at_coder_friends/parser/page_parser.rb', line 111

def normalize(s)
  s
    .tr(' 0-9A-Za-z', ' 0-9A-Za-z')
    .gsub(/[^一-龠_ぁ-ん_ァ-ヶーa-zA-Z0-9 ]/, '')
    .strip
end

.parse_section(h) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/at_coder_friends/parser/page_parser.rb', line 80

def parse_section(h)
  text = ''
  pre = nil
  nx = h.next
  while nx && nx.name != h.name
    text += nx.content.gsub("\r\n", "\n")
    %w[pre blockquote].each do |tag|
      pre ||= (nx.name == tag ? nx : nx.search(tag)[0])
    end
    nx = nx.next
  end
  code = (pre&.text || '').lstrip.gsub("\r\n", "\n")
  [text, code]
end

.process(pbm) ⇒ Object



48
49
50
51
# File 'lib/at_coder_friends/parser/page_parser.rb', line 48

def process(pbm)
  sections = collect_sections(pbm.page)
  apply_sections(pbm, sections)
end