Class: Aozora2Html

Inherits:
Object
  • Object
show all
Defined in:
lib/aozora2html.rb,
lib/aozora2html/tag.rb,
lib/aozora2html/zip.rb,
lib/aozora2html/i18n.rb,
lib/aozora2html/error.rb,
lib/aozora2html/utils.rb,
lib/aozora2html/header.rb,
lib/aozora2html/tag/dir.rb,
lib/aozora2html/tag/img.rb,
lib/aozora2html/version.rb,
lib/aozora2html/tag/ruby.rb,
lib/aozora2html/tag/block.rb,
lib/aozora2html/tag/gaiji.rb,
lib/aozora2html/tag/accent.rb,
lib/aozora2html/tag/indent.rb,
lib/aozora2html/tag/inline.rb,
lib/aozora2html/tag/jisage.rb,
lib/aozora2html/tag/jizume.rb,
lib/aozora2html/tag/kunten.rb,
lib/aozora2html/tag_parser.rb,
lib/aozora2html/ruby_buffer.rb,
lib/aozora2html/style_stack.rb,
lib/aozora2html/tag/midashi.rb,
lib/aozora2html/text_buffer.rb,
lib/aozora2html/yaml_loader.rb,
lib/aozora2html/tag/chitsuki.rb,
lib/aozora2html/tag/decorate.rb,
lib/aozora2html/tag/kaeriten.rb,
lib/aozora2html/accent_parser.rb,
lib/aozora2html/tag/font_size.rb,
lib/aozora2html/tag/keigakomi.rb,
lib/aozora2html/tag/multiline.rb,
lib/aozora2html/tag/okurigana.rb,
lib/aozora2html/midashi_counter.rb,
lib/aozora2html/tag/editor_note.rb,
lib/aozora2html/tag/embed_gaiji.rb,
lib/aozora2html/string_refinements.rb,
lib/aozora2html/tag/inline_caption.rb,
lib/aozora2html/tag/oneline_indent.rb,
lib/aozora2html/tag/oneline_jisage.rb,
lib/aozora2html/tag/un_embed_gaiji.rb,
lib/aozora2html/tag/inline_yokogumi.rb,
lib/aozora2html/tag/multiline_style.rb,
lib/aozora2html/tag/dakuten_katakana.rb,
lib/aozora2html/tag/inline_font_size.rb,
lib/aozora2html/tag/inline_keigakomi.rb,
lib/aozora2html/tag/multiline_jisage.rb,
lib/aozora2html/tag/oneline_chitsuki.rb,
lib/aozora2html/tag/multiline_caption.rb,
lib/aozora2html/tag/multiline_midashi.rb,
lib/aozora2html/tag/multiline_chitsuki.rb,
lib/aozora2html/tag/multiline_yokogumi.rb,
lib/aozora2html/tag/reference_mentioned.rb

Overview

complex ruby markup if css3 is major supported, please fix ruby position with property “ruby-position” see also: www.w3.org/TR/2001/WD-css3-ruby-20010216/

Direct Known Subclasses

AccentParser, TagParser

Defined Under Namespace

Modules: StringRefinements, Utils Classes: AccentParser, Error, Header, I18n, MidashiCounter, RubyBuffer, StyleStack, Tag, TagParser, TextBuffer, YamlLoader, Zip

Constant Summary collapse

KU =

全角バックスラッシュが出せないから直打ち

['18e5'].pack('h*').force_encoding('shift_jis')
NOJI =
['18f5'].pack('h*').force_encoding('shift_jis')
DAKUTEN =
['18d8'].pack('h*').force_encoding('shift_jis')
GAIJI_MARK =
''.to_sjis
IGETA_MARK =
''.to_sjis
RUBY_BEGIN_MARK =
''.to_sjis
RUBY_END_MARK =
''.to_sjis
PAREN_BEGIN_MARK =
''.to_sjis
PAREN_END_MARK =
''.to_sjis
SIZE_SMALL =
''.to_sjis
SIZE_MIDDLE =
''.to_sjis
SIZE_LARGE =
''.to_sjis
TEIHON_MARK =
'底本:'.to_sjis
COMMAND_BEGIN =
''.to_sjis
COMMAND_END =
''.to_sjis
ACCENT_BEGIN =
''.to_sjis
ACCENT_END =
''.to_sjis
AOZORABUNKO =
'青空文庫'.to_sjis
PAT_EDITOR =

PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/

'(校訂|編|編集)$'.to_sjis
PAT_HENYAKU =
'編訳$'.to_sjis
PAT_TRANSLATOR =
'訳$'.to_sjis
RUBY_PREFIX =
''.to_sjis
PAT_RUBY =
/#{'《.*?》'.to_sjis}/.freeze
PAT_DIRECTION =
'(右|左|上|下)に(.*)'.to_sjis
PAT_REF =
'^「.+」'.to_sjis
CHUUKI_COMMAND =
'注記付き'.to_sjis
TCY_COMMAND =
'縦中横'.to_sjis
KEIGAKOMI_COMMAND =
'罫囲み'.to_sjis
YOKOGUMI_COMMAND =
'横組み'.to_sjis
CAPTION_COMMAND =
'キャプション'.to_sjis
WARIGAKI_COMMAND =
'割書'.to_sjis
KAERITEN_COMMAND =
'返り点'.to_sjis
KUNTEN_OKURIGANA_COMMAND =
'訓点送り仮名'.to_sjis
MIDASHI_COMMAND =
'見出し'.to_sjis
OMIDASHI_COMMAND =
'大見出し'.to_sjis
NAKAMIDASHI_COMMAND =
'中見出し'.to_sjis
KOMIDASHI_COMMAND =
'小見出し'.to_sjis
DOGYO_OMIDASHI_COMMAND =
'同行大見出し'.to_sjis
DOGYO_NAKAMIDASHI_COMMAND =
'同行中見出し'.to_sjis
DOGYO_KOMIDASHI_COMMAND =
'同行小見出し'.to_sjis
MADO_OMIDASHI_COMMAND =
'窓大見出し'.to_sjis
MADO_NAKAMIDASHI_COMMAND =
'窓中見出し'.to_sjis
MADO_KOMIDASHI_COMMAND =
'窓小見出し'.to_sjis
LEFT_MARK =
''.to_sjis
UNDER_MARK =
''.to_sjis
OVER_MARK =
''.to_sjis
MAIN_MARK =
'本文'.to_sjis
END_MARK =
'終わり'.to_sjis
TEN_MARK =
''.to_sjis
SEN_MARK =
''.to_sjis
OPEN_MARK =
'ここから'.to_sjis
CLOSE_MARK =
'ここで'.to_sjis
MADE_MARK =
'まで'.to_sjis
DOGYO_MARK =
'同行'.to_sjis
MADO_MARK =
''.to_sjis
JIAGE_COMMAND =
'字上げ'.to_sjis
JISAGE_COMMAND =
'字下げ'.to_sjis
PHOTO_COMMAND =
'写真'.to_sjis
ORIKAESHI_COMMAND =
'折り返して'.to_sjis
ONELINE_COMMAND =
'この行'.to_sjis
NON_0213_GAIJI =
'非0213外字'.to_sjis
WARICHU_COMMAND =
'割り注'.to_sjis
TENTSUKI_COMMAND =
'天付き'.to_sjis
PAT_REST_NOTES =
'(左|下)に「(.*)」の(ルビ|注記|傍記)'.to_sjis
PAT_KUTEN =
/#{'「※」[は|の]'.to_sjis}/.freeze
PAT_KUTEN_DUAL =
'※.*※'.to_sjis
PAT_GAIJI =
'(?:#)(.*)(?:、)(.*)'.to_sjis
PAT_KAERITEN =
'^([一二三四五六七八九十レ上中下甲乙丙丁天地人]+)$'.to_sjis
PAT_OKURIGANA =
'^((.+))$'.to_sjis
PAT_REMOVE_OKURIGANA =
/#{'[()]'.to_sjis}/.freeze
PAT_CHITSUKI =
/#{'(地付き|字上げ)(終わり)*$'.to_sjis}/.freeze
PAT_ORIKAESHI_JISAGE =
'折り返して(\\d*)字下げ'.to_sjis
PAT_ORIKAESHI_JISAGE2 =
'(\\d*)字下げ、折り返して(\\d*)字下げ'.to_sjis
PAT_JI_LEN =
'([0-9]+)字'.to_sjis
PAT_INLINE_RUBY =
'「(.*)」の注記付き'.to_sjis
PAT_IMAGE =
'(.*)((fig.+\\.png)(、横([0-9]+)×縦([0-9]+))*)入る'.to_sjis
PAT_FRONTREF =
'「([^「」]*(?:「.+」)*[^「」]*)」[にはの](「.+」の)*(.+)'.to_sjis
PAT_RUBY_DIR =
'(左|下)に「([^」]*)」の(ルビ|注記)'.to_sjis
PAT_CHUUKI =
/#{'「(.+?)」の注記'.to_sjis}/.freeze
PAT_BOUKI =
/#{'「(.)」の傍記'.to_sjis}/.freeze
PAT_CHARSIZE =
/#{'(.*)段階(..)な文字'.to_sjis}/.freeze
REGEX_HIRAGANA =
Regexp.new('[ぁ-んゝゞ]'.to_sjis)
REGEX_KATAKANA =
Regexp.new('[ァ-ンーヽヾヴ]'.to_sjis)
REGEX_ZENKAKU =
Regexp.new('[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]'.to_sjis)
REGEX_HANKAKU =
Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".to_sjis)
REGEX_KANJI =
Regexp.new('[亜-熙々※仝〆〇ヶ]'.to_sjis)
DYNAMIC_CONTENTS =
"<div id=\"card\">\r\n<hr />\r\n<br />\r\n<a href=\"JavaScript:goLibCard();\" id=\"goAZLibCard\">●図書カード</a><script type=\"text/javascript\" src=\"../../contents.js\"></script>\r\n<script type=\"text/javascript\" src=\"../../golibcard.js\"></script>\r\n</div>".to_sjis
ACCENT_TABLE =
loader.load('../yml/accent_table.yml')
COMMAND_TABLE =
class, tag
loader.load('../yml/command_table.yml')
JIS2UCS =
loader.load('../yml/jis2ucs.yml')
INDENT_TYPE =
{
  jisage: '字下げ'.to_sjis,
  chitsuki: '地付き'.to_sjis,
  midashi: '見出し'.to_sjis,
  jizume: '字詰め'.to_sjis,
  yokogumi: '横組み'.to_sjis,
  keigakomi: '罫囲み'.to_sjis,
  caption: 'キャプション'.to_sjis,
  futoji: '太字'.to_sjis,
  shatai: '斜体'.to_sjis,
  dai: '大きな文字'.to_sjis,
  sho: '小さな文字'.to_sjis
}.freeze
DAKUTEN_KATAKANA_TABLE =
{
  '2' => 'ワ゛'.to_sjis,
  '3' => 'ヰ゛'.to_sjis,
  '4' => 'ヱ゛'.to_sjis,
  '5' => 'ヲ゛'.to_sjis
}.freeze
VERSION =
'3.0.1'

Instance Method Summary collapse

Constructor Details

#initialize(input, output, gaiji_dir: nil, css_files: nil) ⇒ Aozora2Html

Returns a new instance of Aozora2Html.



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/aozora2html.rb', line 149

def initialize(input, output, gaiji_dir: nil, css_files: nil)
  @stream = if input.respond_to?(:read) ## readable IO?
              Jstream.new(input)
            else
              Jstream.new(File.open(input, 'rb:Shift_JIS'))
            end
  @out = if output.respond_to?(:print) ## writable IO?
           output
         else
           File.open(output, 'w')
         end
  @gaiji_dir = gaiji_dir || '../../../gaiji/'
  @css_files = css_files || ['../../aozora.css']

  @buffer = TextBuffer.new
  @ruby_buf = RubyBuffer.new
  @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
  @header = Aozora2Html::Header.new(css_files: @css_files) ## ヘッダ行の配列
  @style_stack = StyleStack.new ## スタイルのスタック
  @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
  @images = [] ## 使用した外字の画像保持用
  @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
  @tag_stack = []
  @midashi_counter = MidashiCounter.new(0) ## 見出しのカウンタ、見出しの種類によって増分が異なる
  @terprip = true  ## 改行制御用 (terpriはLisp由来?)
  @endchar = :eof  ## 解析終了文字、AccentParserやTagParserでは異なる
  @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
end

Instance Method Details

#block_allowed_context?Boolean

Returns:

  • (Boolean)


182
183
184
185
# File 'lib/aozora2html.rb', line 182

def block_allowed_context?
  # inline_tagが開いていないかチェックすれば十分
  @style_stack.empty?
end

#detect_command_mode(command) ⇒ Symbol

コマンド文字列からモードのシンボルを取り出す

Returns:

  • (Symbol)


230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/aozora2html.rb', line 230

def detect_command_mode(command)
  if command.match?(INDENT_TYPE[:chitsuki] + END_MARK) || command.match?(JIAGE_COMMAND + END_MARK)
    return :chitsuki
  end

  INDENT_TYPE.each_key do |key|
    if command.match?(INDENT_TYPE[key])
      return key
    end
  end
  nil
end

#kuten2png(substring) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/aozora2html.rb', line 212

def kuten2png(substring)
  desc = substring.gsub(PAT_KUTEN, '')
  matched = desc.match(/[12]-\d{1,2}-\d{1,2}/)
  if matched && !desc.match?(NON_0213_GAIJI) && !desc.match?(PAT_KUTEN_DUAL)
    @chuuki_table[:newjis] = true
    codes = matched[0].split('-')
    folder = sprintf('%1d-%02d', codes[0], codes[1])
    code = sprintf('%1d-%02d-%02d', *codes)
    Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK, ''), gaiji_dir: @gaiji_dir)
  else
    substring
  end
end

#line_numberObject



178
179
180
# File 'lib/aozora2html.rb', line 178

def line_number
  @stream.line
end

#new_midashi_id(size) ⇒ Object



208
209
210
# File 'lib/aozora2html.rb', line 208

def new_midashi_id(size)
  @midashi_counter.generate_id(size)
end

#processObject

parseする

終了時(終端まで来た場合)にはthrow :terminateで脱出する



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/aozora2html.rb', line 191

def process
  catch(:terminate) do
    parse
  rescue Aozora2Html::Error => e
    puts e.message(line_number)
    if e.is_a?(Aozora2Html::Error)
      exit(2)
    end
  end
  tail_output # final call
  finalize
  close
rescue StandardError => e
  puts "ERROR: line: #{line_number}"
  raise e
end