Module: AtCoderFriends::Parser::PageParser
- Defined in:
- lib/at_coder_friends/parser/page_parser.rb
Overview
parses problem page and collect problem information
Constant Summary collapse
- SECTION_TYPES =
[ { key: 'constraints', patterns: [ '^制約$', '^Constraints$' ] }, { key: 'input format', patterns: [ '^入出?力(形式)?$', '^Inputs?\s*(,|and)?\s*(Outputs?)?\s*(Format)?$' ] }, { key: 'sample input %<no>s', patterns: [ '^入力例\s*(?<no>\d+)?$', '^入力\s*(?<no>\d+)$', '^Sample\s*Input\s*(?<no>\d+)?$', '^Input\s*Example\s*(?<no>\d+)?$', '^Input\s*(?<no>\d+)$' ] }, { key: 'sample output %<no>s', patterns: [ '^出力例\s*(?<no>\d+)?$', '^出力\s*(?<no>\d+)$', '^入力例\s*(?<no>\d+)?\s*に対する出力例$', '^Sample\s*Output\s*(?<no>\d+)?$', '^Output\s*Example\s*(?<no>\d+)?$', '^Output\s*(?<no>\d+)$', '^Output\s*for\s*(the)?\s*Sample\s*Input\s*(?<no>\d+)?$' ] } ].freeze
Class Method Summary collapse
- .apply_sections(pbm, sections) ⇒ Object
- .collect_sections(page) ⇒ Object
- .find_key(h) ⇒ Object
- .normalize(s) ⇒ Object
- .parse_section(h) ⇒ Object
- .process(pbm) ⇒ Object
Class Method Details
.apply_sections(pbm, sections) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 95 def apply_sections(pbm, sections) sections.each do |key, (text, code)| case key when 'constraints' pbm.desc += text when 'input format' pbm.desc += text pbm.fmt = code when /^sample input (?<no>\d+)$/ pbm.add_smp($LAST_MATCH_INFO[:no], :in, code) when /^sample output (?<no>\d+)$/ pbm.add_smp($LAST_MATCH_INFO[:no], :exp, code) end end end |
.collect_sections(page) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 53 def collect_sections(page) sections = {} %w[h2 h3].each do |tag| page .search(tag) .each do |h| key = find_key(h) key && sections[key] ||= parse_section(h) end end sections end |
.find_key(h) ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 66 def find_key(h) title = normalize(h.content) SECTION_TYPES.each do |grp| grp[:patterns].each do |pat| m = title.match(/#{pat}/i) next unless m no = m.names.include?('no') && m['no'] || '1' return format(grp[:key], no: no) end end nil end |
.normalize(s) ⇒ Object
111 112 113 114 115 116 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 111 def normalize(s) s .tr(' 0-9A-Za-z', ' 0-9A-Za-z') .gsub(/[^一-龠_ぁ-ん_ァ-ヶーa-zA-Z0-9 ]/, '') .strip end |
.parse_section(h) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 80 def parse_section(h) text = '' pre = nil nx = h.next while nx && nx.name != h.name text += nx.content.gsub("\r\n", "\n") %w[pre blockquote].each do |tag| pre ||= (nx.name == tag ? nx : nx.search(tag)[0]) end nx = nx.next end code = (pre&.text || '').lstrip.gsub("\r\n", "\n") [text, code] end |
.process(pbm) ⇒ Object
48 49 50 51 |
# File 'lib/at_coder_friends/parser/page_parser.rb', line 48 def process(pbm) sections = collect_sections(pbm.page) apply_sections(pbm, sections) end |