Class: ConverterBase

Inherits:

Object

Object
ConverterBase

Defined in:: lib/converterbase.rb

Constant Summary collapse

KANJI_NUM =

"〇一二三四五六七八九"

ENGLISH_SENTENCES_CHARACTERS =

/[\w.,!?'" &:;_-]+/

ENGLISH_SENTENCES_MIN_LENGTH = この文字数以上アルファベットが続くと半角のまま

KANJI_NUM_UNITS =

%w(万 億 兆 京).unshift("")

KANJI_KURAI =

%w(十 百 千).unshift("")

KANJI_NUM_UNITS_DIGIT =

{
  "十" => 1, "百" => 2, "千" => 3, "万" => 4, "億" => 8, "兆" => 12, "京" => 16
}

RECONVERT_KANJI_TO_NUM_PATTERN_UNIT =

"％㎜㎝㎞㎎㎏㏄㎡㎥"

ROME_NUM_ALPHABET =

%w(II III IV VI VII VIII IX ii iii iv vi vii viii ix)

ROME_NUM =

%w(Ⅱ Ⅲ Ⅳ Ⅵ Ⅶ Ⅷ Ⅸ ⅱ ⅲ ⅳ ⅵ ⅶ ⅷ ⅸ)

SINGLE_MINUTE_FAMILY = ミニュート（ノノカギ）化する記号定義

%!‘’'!

DOUBLE_MINUTE_FAMILY =

%!“”〝〟"!

HALF_INDENT_TARGET =

/^[ 　\t]*((?:[〔「『(（【〈《≪〝])|(?:※［＃始め二重山括弧］))/

FULL_INDENT_TARGET =

/^[ 　\t]*(――)/

AUTO_INDENT_IGNORE_INDENT_CHAR =

Inspector::IGNORE_INDENT_CHAR.sub("・", "")

AUTHOR_INTRODUCTION_SPLITTER = 前書き・後書きの検出及び処理 ==============================

/^　*[\*＊]{44}$/

AUTHOR_POSTSCRIPT_SPLITTER =

/^　*[\*＊]{48}$/

AUTHOR_COMMENT_CHUKI =

{
  introduction: {
    open: "［＃ここから前書き］", close: "［＃ここで前書き終わり］"
  },
  postscript: {
    open: "［＃ここから後書き］", close: "［＃ここで後書き終わり］"
  }
}

BRACKETS =

[%w(「 」), %w(『 』)

OPENCLOSE_REGEXPS = ネストに対応したかぎ括弧の正規表現

BRACKETS.map { |bracket|
  bo, bc = bracket
  /(?<oc>#{bo}[^#{bo+bc}]*(?:\g<oc>[^#{bo+bc}]*)*#{bc})/m
}

CHARACTER_OF_RUBY =

"一-龠Ａ-Ｚａ-ｚA-Za-z"

AUTO_RUBY_CHARACTERS =

"([ぁ-んァ-ヶーゝゞ・ 　]{,20})"

KANA =

"ァ-ヶー"

WORD_SEPARATOR = zws = zero width space

"［＃zws］"

Instance Attribute Summary collapse

#current_index ⇒ Object

現在処理してる subtitles 内でのインデックス.
#data_type ⇒ Object

Returns the value of attribute data_type.
#output_text_dir ⇒ Object

Returns the value of attribute output_text_dir.
#subtitles ⇒ Object

Returns the value of attribute subtitles.
#use_dakuten_font ⇒ Object readonly

Returns the value of attribute use_dakuten_font.

Class Method Summary collapse

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Summary collapse

#__calc_kanji_num_with_unit(string) ⇒ Object
#__calc_sum_unit(units) ⇒ Object
#after(io, text_type) ⇒ Object
#after_convert(io) ⇒ Object
#alphabet_to_zenkaku(data, force = false) ⇒ Object

半角アルファベットを全角に変換する.
#author_comment_force_close ⇒ Object
#auto_indent(data) ⇒ Object

行頭字下げ.
#auto_join_in_brackets(data) ⇒ Object

かぎ括弧内自動連結.
#auto_join_line(data) ⇒ Object

手動折り返しの自動連結.
#before(io, text_type) ⇒ Object
#before_convert(io) ⇒ Object
#blank_line?(line) ⇒ Boolean
#border_symbol?(line) ⇒ Boolean
#build_ruby(m1, m2, f1, f2) ⇒ Object

なろうのルビ対象文字を辿って｜を挿入する（青空文庫となろうのルビ仕様の差異吸収のため）空白もルビ対象文字に含むのはなろうの仕様である.
#calc_cr_count(str) ⇒ Object
#comments_block?(line) ⇒ Boolean

コメントブロックを検出する.
#convert(text, text_type) ⇒ Object
#convert_arrow(data) ⇒ Object

おかしくなりやすい矢印文字の変換.
#convert_dakuten_char_to_font(data) ⇒ Object

濁点のついてない文字に濁点をつける表現を対応.
#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

ギュメを二重山括弧（の外字）に変換.
#convert_for_all_data(data) ⇒ Object

小説データ全体に対して施す変換.
#convert_fraction_and_date(data) ⇒ Object

分数表記を○分の○表記に変更、及び日付表記を検出.
#convert_head_half_spaces(data) ⇒ Object

間違えて行頭字下げに半角スペースを使ってるっぽいのを全角スペースにする.
#convert_horizontal_ellipsis(data) ⇒ Object

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換.
#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

漢数字を単位を使った表現に変換.
#convert_main(io) ⇒ Object

変換処理本体.
#convert_novel_rule(data) ⇒ Object

小説のルールに沿うように変換.
#convert_numbers(data) ⇒ Object

数字の変換.
#convert_page_break(data) ⇒ Object

一定以上の連続する空行を改ページに変換.
#convert_prolonged_sound_mark_to_dash(data) ⇒ Object
#convert_rome_numeric(data) ⇒ Object

ローマ数字っぽいアルファベットをローマ数字に変換.
#convert_special_characters(data) ⇒ Object

特定の表現・記号を変換していく.
#convert_tatechuyoko(data) ⇒ Object

縦中横にすべき表現を変換.
#delay_outputs(data = "") ⇒ Object
#delete_dust_char(data) ⇒ Object

表示上化けてしまうゴミ削除.
#enchant_midashi(data) ⇒ Object

［＃改ページ］直後の行を見出しに設定する.
#erase_comments_block(data) ⇒ Object

コメントブロックを削除する.
#erase_introduction(data) ⇒ Object

前書きを削除する.
#erase_postscript(data) ⇒ Object

後書きを削除する.
#exception_reconvert_kanji_to_num(data) ⇒ Object

アラビア数字を使うべきところはアラビア数字に戻す.
#find_introduction? ⇒ Boolean

前書きの検出.
#force_indent_special_chapter(data) ⇒ Object

章見出しっぽい文字列を字下げする.
#half_indent_bracket(data) ⇒ Object

行頭かぎ括弧(等)に二分アキを追加する.
#hankaku_num_to_zenkaku_num(data) ⇒ Object

半角アラビア数字の全角化.
#hankakukana_to_zenkakukana(data) ⇒ Object

半角カナと｢｣｡､･等を全角に変換.
#inclusion_author_comment_block?(line) ⇒ Boolean
#initialize(setting, inspector, illustration) ⇒ ConverterBase constructor

A new instance of ConverterBase.
#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object
#insert_blank_line_to_border_symbol(line) ⇒ Object

■などの区切りの前後には空行が必ず存在するようにする.
#insert_char_separator(str) ⇒ Object

文字単位でzwsを挿入する.
#insert_separate_space(data) ⇒ Object

特定の記号の直後は全角アキを挿入する.
#insert_separator_for_selection(str) ⇒ Object

Kindle端末で単語選択がしやすいように０幅スペースを挿入する.
#insert_word_separator(str) ⇒ Object

単語単位でzwsを挿入する.
#is_sesame?(str, ten, last_char) ⇒ Boolean
#jisage(line, num) ⇒ Object

行頭空白を考慮した字下げ.
#join_inner_bracket(str) ⇒ Object

改行を連結した文章を作る.
#kanji_num_to_integer(string) ⇒ Object
#leave_author_comment_block?(line) ⇒ Boolean
#midashi(str) ⇒ Object
#modify_kana_ni_to_kanji_ni(data) ⇒ Object

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する.
#narou_ruby(data) ⇒ Object

小説家になろうのルビ対策.
#num_to_kanji(data) ⇒ Object

アラビア数字を漢数字に.
#object_of_ruby?(char) ⇒ Boolean
#outputs(data = "", force = false) ⇒ Object
#page_break?(line) ⇒ Boolean

改ページある？.
#process_author_comment(line) ⇒ Object
#rebuild_english_sentences(data) ⇒ Object

英文を再構成する.
#rebuild_force_indent_special_chapter(data) ⇒ Object
#rebuild_hankaku_num_and_comma(data) ⇒ Object
#rebuild_illust(data) ⇒ Object
#rebuild_kanji_num(data) ⇒ Object
#rebuild_kome_to_gaiji(data) ⇒ Object

※の外字注記化.
#rebuild_url(data) ⇒ Object
#replace_by_replace_txt(text) ⇒ Object

replace.txt により単純置換.
#replace_illust_tag(data) ⇒ Object

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する.
#replace_narou_tag(data) ⇒ Object

小説家になろう専用タグを置換.
#replace_tatesen(str) ⇒ Object
#replace_url(data) ⇒ Object

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない.
#reset_member_values ⇒ Object

.convert が実行されるたびに呼ばれるメンバ変数リセット用メソッドインスタンス作成時に一度だけ初期化したい場合は initialize で初期化する.
#rstrip_all_lines(data) ⇒ Object

すべての行の行末空白を削除.
#ruby_youon_to_big(ruby) ⇒ Object

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする.
#sentence?(match) ⇒ Boolean
#sesame(str) ⇒ Object
#should_word_be_hankaku?(word) ⇒ Boolean
#stash_hankaku_num_and_comma(num) ⇒ Object
#stash_kanji_num(data) ⇒ Object
#stash_kome(data) ⇒ Object

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする.
#symbols_to_zenkaku(data) ⇒ Object

半角記号を全角に変換.
#tcy(str) ⇒ Object

縦中横注記取得.
#to_ruby(match, m1, m2, openclose_symbols) ⇒ Object
#zenkaku_num_to_hankaku_num(num) ⇒ Object

全角数字(漢数字含む)を半角アラビア数字に.
#zenkaku_num_to_kanji(str) ⇒ Object

全角アラビア数字を漢数字に.
#zenkaku_rstrip(line) ⇒ Object

全角版 String#rstrip!.

Constructor Details

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

Returns a new instance of ConverterBase.

# File 'lib/converterbase.rb', line 39

def initialize(setting, inspector, illustration)
  @setting = setting
  @inspector = inspector
  @illustration = illustration
  @use_dakuten_font = false
  @output_text_dir = nil
  @subtitles = nil
  @data_type = "text"
  @current_index = 0
  reset_member_values
end

Instance Attribute Details

#current_index ⇒ `Object`

現在処理してる subtitles 内でのインデックス



23
24
25

# File 'lib/converterbase.rb', line 23

def current_index
  @current_index
end

#data_type ⇒ `Object`

Returns the value of attribute data_type.



22
23
24

# File 'lib/converterbase.rb', line 22

def data_type
  @data_type
end

#output_text_dir ⇒ `Object`

Returns the value of attribute output_text_dir.



22
23
24

# File 'lib/converterbase.rb', line 22

def output_text_dir
  @output_text_dir
end

#subtitles ⇒ `Object`

Returns the value of attribute subtitles.



22
23
24

# File 'lib/converterbase.rb', line 22

def subtitles
  @subtitles
end

#use_dakuten_font ⇒ `Object` (readonly)

Returns the value of attribute use_dakuten_font.



21
22
23

# File 'lib/converterbase.rb', line 21

def use_dakuten_font
  @use_dakuten_font
end

Class Method Details

.rebuild_brackets(data, stack) ⇒ `Object`

# File 'lib/converterbase.rb', line 863

def self.rebuild_brackets(data, stack)
  data.gsub(/［＃かぎ括弧＝(\d+)］/) do
    stack[$1.to_i]
  end
end

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 183

def __calc_kanji_num_with_unit(string)
  total = 0
  string.scan(/([#{KANJI_NUM}]*)([十百千]*)/) do |num, units|
    break if num + units == ""
    num = "1" if num.empty?
    num_tr = num.tr(KANJI_NUM, "0-9")
    if units.empty?
      total += num_tr.to_i
    else
      total += (num_tr + __calc_sum_unit(units).to_s[1, 99]).to_i
    end
  end
  total
end

#__calc_sum_unit(units) ⇒ `Object`

# File 'lib/converterbase.rb', line 177

def __calc_sum_unit(units)
  units.each_char.inject(0) do |sum, c|
    sum + ("1" + "0" * KANJI_NUM_UNITS_DIGIT[c]).to_i
  end
end

#after(io, text_type) ⇒ `Object`



35
36
37

# File 'lib/converterbase.rb', line 35

def after(io, text_type)
  io
end

#after_convert(io) ⇒ `Object`



1188
1189
1190

# File 'lib/converterbase.rb', line 1188

def after_convert(io)
  after(io, @text_type)
end

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

半角アルファベットを全角に変換する

force : 強制的に全アルファベットを全角にするか？

false の場合、英文章（半角スペースで区切られた2単語以上）を半角のままにする
英文の定義： 1. 半角スペースで区切られた２単語以上の文章、
             2. 一定以上の長さの一文字以上アルファベットを含む文章

# File 'lib/converterbase.rb', line 521

def alphabet_to_zenkaku(data, force = false)
  if force
    data.gsub!(/[a-zA-Z]+/) do |match|
      match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
    end
  else
    data.gsub!(ENGLISH_SENTENCES_CHARACTERS) do |match|
      if sentence?(match) || should_word_be_hankaku?(match)
        @english_sentences << match
        "［＃英文＝#{@english_sentences.size - 1}］"
      else
        match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
      end
    end
  end
end

#author_comment_force_close ⇒ `Object`

# File 'lib/converterbase.rb', line 805

def author_comment_force_close
  if @in_author_comment_block
    outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
  end
end

#auto_indent(data) ⇒ `Object`

行頭字下げ

必ず下げなければいけないところは強制的に字下げ他の部分は全体的に判断して字下げ enable_force_indent が有効なら強制字下げ

# File 'lib/converterbase.rb', line 622

def auto_indent(data)
  data.gsub!(FULL_INDENT_TARGET, "　\\1")
  if @setting.enable_force_indent || (@setting.enable_auto_indent && @inspector.inspect_indent(data))
    data.gsub!(/^([^#{AUTO_INDENT_IGNORE_INDENT_CHAR}])/) do
      # 行頭に三点リーダーの代わりに連続中黒（・・・）が来た場合の対策
      # https://github.com/whiteleaf7/narou/issues/35
      # 行頭に中黒１個だけの場合はよくある表現なので字下げしない
      if $1 == "・" && $'[0] != "・"
        "・"
      else
        $1 == " " || $1 == "　" ? "　" : "　#{$1}"
      end
    end
  end
end

#auto_join_in_brackets(data) ⇒ `Object`

かぎ括弧内自動連結

# File 'lib/converterbase.rb', line 839

def auto_join_in_brackets(data)
  if !@setting.enable_auto_join_in_brackets && !@setting.enable_inspect
    return
  end
  OPENCLOSE_REGEXPS.each_with_index do |openclose, i|
    stack = {}
    data.gsub!(openclose).with_index do |match, j|
      joined_str = join_inner_bracket(match)
      if @setting.enable_auto_join_in_brackets && joined_str
        error = @inspector.validate_joined_inner_brackets(match, joined_str, BRACKETS[i])
        stack[j] = error ? match : joined_str
      else
        stack[j] = match
      end
      "［＃かぎ括弧＝#{j}］"
    end
    if @setting.enable_inspect
      # 正しく閉じてないかぎ括弧だけが data に残ってる
      @inspector.inspect_invalid_openclose_brackets(data, BRACKETS[i], stack)
    end
    data.replace(ConverterBase.rebuild_brackets(data, stack))
  end
end

#auto_join_line(data) ⇒ `Object`

手動折り返しの自動連結

# File 'lib/converterbase.rb', line 872

def auto_join_line(data)
  # 次の行の冒頭が開き記号だったら意図的な改行だと判断して連結しない
  # 行頭の全角スペースが２個以上の場合も連結しない
  data.gsub!(/([^、])、\n　([^「『(（【<＜〈《≪・■…‥―　１-９一-九])/, "\\1、\\2")
end

#before(io, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 25

def before(io, text_type)
  data = io.string
  convert_page_break(data) if @text_type == "body" || @text_type == "textfile"
  if @text_type != "story" && @setting.enable_pack_blank_line
    data.gsub!("\n\n", "\n")
    data.gsub!(/(^\n){3}/m, "\n\n")   # 改行のみの行３つを２つに削減
  end
  io
end

#before_convert(io) ⇒ `Object`



1184
1185
1186

# File 'lib/converterbase.rb', line 1184

def before_convert(io)
  before(io, @text_type)
end

#blank_line?(line) ⇒ `Boolean`

Returns:

(Boolean)



690
691
692

# File 'lib/converterbase.rb', line 690

def blank_line?(line)
  line =~ /\A[ 　\t]*$/
end

#border_symbol?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 685

def border_symbol?(line)
  @@symbols ||= File.read(Narou.preset_dir.join("bordersymbols.txt"), encoding: "BOM|UTF-8")
  line =~ /^[ 　\t]*[#{@@symbols}]+$/
end

#build_ruby(m1, m2, f1, f2) ⇒ `Object`

なろうのルビ対象文字を辿って｜を挿入する（青空文庫となろうのルビ仕様の差異吸収のため）空白もルビ対象文字に含むのはなろうの仕様である

# File 'lib/converterbase.rb', line 961

def build_ruby(m1, m2, f1, f2)
  if m1 =~ /([#{CHARACTER_OF_RUBY}]+)([ 　])([#{CHARACTER_OF_RUBY}]+)$/
    m1.sub(/([#{CHARACTER_OF_RUBY}]+)([ 　])([#{CHARACTER_OF_RUBY}]+)$/) {
      if f2 == ""
        "#{$1}#{$2}［＃ルビ用縦線］#{$3}《#{ruby_youon_to_big(m2)}》"
      else
        "［＃ルビ用縦線］#{$1}《#{ruby_youon_to_big(f1)}》#{$2}［＃ルビ用縦線］#{$3}《#{ruby_youon_to_big(f2)}》"
      end
    }
  else
    m1.sub(/([#{CHARACTER_OF_RUBY}]+)$/, "［＃ルビ用縦線］\\1") + "《#{ruby_youon_to_big(m2)}》"
  end
end

#calc_cr_count(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 1081

def calc_cr_count(str)
  head_cr_count = str.index(/[^\n]/) || 0
  head_cr_count > 2 ? 2 : head_cr_count
end

#comments_block?(line) ⇒ `Boolean`

コメントブロックを検出する

コメントブロックの定義は - のみが50回以上連続された行に囲まれている間

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 552

def comments_block?(line)
  if line =~ /^-{50,}$/
    @in_comment_block ^= 1
    return true
  end
  @in_comment_block
end

#convert(text, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 1321

def convert(text, text_type)
  return "" if text == ""
  output_text_dir = @output_text_dir || @setting.archive_path
  @text_type = text_type
  text.force_encoding(Encoding::UTF_8)
  io = StringIO.new(rstrip_all_lines(text))
  (io = before_convert(io)).rewind
  (io = convert_main(io)).rewind
  (io = after_convert(io)).rewind
  data = replace_by_replace_txt(io.read)
  data = insert_separator_for_selection(data)
  return data
end

#convert_arrow(data) ⇒ `Object`

おかしくなりやすい矢印文字の変換

# File 'lib/converterbase.rb', line 424

def convert_arrow(data)
  # Kindle PW でしか確認してないのでとりあえず device=kindle の場合のみ変換
  if @device && @device.kindle?
    data.tr!("⇒⇐", "→←")
  end
end

#convert_dakuten_char_to_font(data) ⇒ `Object`

濁点のついてない文字に濁点をつける表現を対応

濁点つきフォントに部分的に切り替える

# File 'lib/converterbase.rb', line 470

def convert_dakuten_char_to_font(data)
  return unless @setting.enable_dakuten_font
  data.gsub!(/([ぁ-んァ-ヶι])[゛ﾞ]/) do
    @use_dakuten_font = true
    "［＃濁点］#{$1}［＃濁点終わり］"
  end
end

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

ギュメを二重山括弧（の外字）に変換

# File 'lib/converterbase.rb', line 451

def convert_double_angle_quotation_to_gaiji(data)
  data.gsub!("≪", "※［＃始め二重山括弧］")
  data.gsub!("≫", "※［＃終わり二重山括弧］")
end

#convert_for_all_data(data) ⇒ `Object`

小説データ全体に対して施す変換

# File 'lib/converterbase.rb', line 1158

def convert_for_all_data(data)
  hankakukana_to_zenkakukana(data)
  auto_join_in_brackets(data)
  auto_join_line(data) if @setting.enable_auto_join_line
  erase_comments_block(data)
  replace_illust_tag(data)
  replace_url(data)
  replace_narou_tag(data)
  convert_rome_numeric(data)
  alphabet_to_zenkaku(data, @setting.enable_alphabet_force_zenkaku)
  force_indent_special_chapter(data)
  convert_numbers(data)
  exception_reconvert_kanji_to_num(data)
  if @setting.enable_convert_num_to_kanji && @text_type != "subtitle" && @text_type != "chapter" \
     && @setting.enable_kanji_num_with_units
    convert_kanji_num_with_unit(data, @setting.kanji_num_with_units_lower_digit_zero)
  end
  rebuild_kanji_num(data)
  insert_separate_space(data)
  convert_special_characters(data)
  convert_fraction_and_date(data)
  modify_kana_ni_to_kanji_ni(data)
  convert_dakuten_char_to_font(data)
  convert_prolonged_sound_mark_to_dash(data)
end

#convert_fraction_and_date(data) ⇒ `Object`

分数表記を○分の○表記に変更、及び日付表記を検出

スラッシュで区切られた数字が２個なら分数、３個なら日付と定義

# File 'lib/converterbase.rb', line 265

def convert_fraction_and_date(data)
  if !@setting.enable_transform_fraction && !@setting.enable_transform_date
    return
  end
  target_num = "\d０-９#{KANJI_NUM}十百千万億兆京垓"
  data.gsub!(/[#{target_num}\/／]+/) do |match|
    numerics = match.split(/[\/／]/)
    case numerics.size
    when 2
      # 分数
      if @setting.enable_transform_fraction
        "#{zenkaku_num_to_kanji(numerics[1])}分の#{zenkaku_num_to_kanji(numerics[0])}"
      else
        match
      end
    when 3
      # 日付
      if @setting.enable_transform_date
        begin
          date = Date.new(*numerics.map { |s|
            s.tr!("0-9０-９#{KANJI_NUM}", "0-90-90-9")
            s.to_i
          })
        rescue ArgumentError
          match
        else
          convert_numbers(date.strftime(@setting.date_format))
        end
      end
    else
      match
    end
  end
end

#convert_head_half_spaces(data) ⇒ `Object`

間違えて行頭字下げに半角スペースを使ってるっぽいのを全角スペースにする

# File 'lib/converterbase.rb', line 434

def convert_head_half_spaces(data)
  data.gsub!(/^ +/) do |match|
    # 半角スペースの数に応じて全角スペースの数も調整してみる
    "　" * (match.count(" ") / 2.0).ceil
  end
end

#convert_horizontal_ellipsis(data) ⇒ `Object`

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換

# File 'lib/converterbase.rb', line 1032

def convert_horizontal_ellipsis(data)
  return if !@setting.enable_convert_horizontal_ellipsis || \
            @text_type == "subtitle" || @text_type == "chapter"
  %w(・ 。 、 ．).each do |char|
    data.gsub!(/#{char}{3,}/) do |match|
      pre_char, post_char = $`[-1], $'[0]
      if pre_char == "―" || post_char == "―"
        match
      else
        "…" * ((match.length / 3.0 / 2).ceil * 2)
      end
    end
  end
  data.gsub!("。。", "。")
  data.gsub!("、、", "、")
end

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

漢数字を単位を使った表現に変換

８００万１０００といったような表現は、内部一度で 8001000 に変換する。 lower_digit_zero はこの最後の 000 に適用される

# File 'lib/converterbase.rb', line 212

def convert_kanji_num_with_unit(data, lower_digit_zero = 0)
  data.gsub!(/([#{KANJI_NUM}十百千万億兆京]+)/) do |match|
    total = kanji_num_to_integer($1)
    next match if total.to_s.length > KANJI_NUM_UNITS_DIGIT["京"] + 4
    m1 = total.to_s.tr("0-9", KANJI_NUM)
    if m1 =~ /〇{#{lower_digit_zero},}$/
      digits = m1.reverse.scan(/.{1,4}/).map(&:reverse).reverse   # 下の桁から4桁ずつ区切った配列を作成
      keta = digits.size - 1
      digits.map.with_index { |nums, keta_i|
        four_digit_num = nums.scan(/./).map.with_index { |d, di|
          next "" if d == "〇"
          kurai = KANJI_KURAI[nums.length - di - 1]
          if d == "一"
            # 4桁の千の前は一は必須ではなく、5桁以上の場合の千の前には一をつける
            # 1100 → 千百、11100 → 一万一千百
            if kurai != "" && !(keta > 0 && kurai == "千")
              d = ""
            end
          end
          d + kurai
        }.join
        if four_digit_num.length > 0
          four_digit_num + KANJI_NUM_UNITS[keta - keta_i]
        else
          ""
        end
      }.join
    else
      match
    end
  end
end

#convert_main(io) ⇒ `Object`

変換処理本体

@text_type: 渡されるテキストの種類。

subtitle, introduction, body, postscript, textfile, chapter, story

# File 'lib/converterbase.rb', line 1341

def convert_main(io)
  @write_fp = StringIO.new
  case @text_type
  when "introduction"
    return @write_fp if @setting.enable_erase_introduction
  when "postscript"
    return @write_fp if @setting.enable_erase_postscript
  end
  title_and_author = nil
  if @text_type == "textfile"
    # タイトル・著者名スキップ
    title_and_author = io.gets + io.gets
    data = io.read
  else
    data = io.read
  end
  reset_member_values
  convert_for_all_data(data)
  progressbar = nil
  if @text_type == "textfile"
    # convert_for_all_data -> replace_narou_tag
    # で改行化を行わないと正確な改行数は分からない
    progressbar = ProgressBar.new(data.count("\n") + 1)
    progressbar.output(0)
  end
  @read_fp = StringIO.new(data)
  if @text_type == "subtitle"
    @write_fp.write(data)
  else
    @read_fp.each_with_index do |line, i|
      progressbar.output(i) if progressbar
      @request_skip_output_line = false
      zenkaku_rstrip(line)
      if @request_insert_blank_next_line
        outputs unless blank_line?(line)
        @request_insert_blank_next_line = false
        @before_line = ""
      end
      process_author_comment(line) if @text_type == "textfile"
      insert_blank_before_line_and_behind_to_special_chapter(line)
      insert_blank_line_to_border_symbol(line)

      outputs(line)
      unless @delay_outputs_buffer.empty?
        @write_fp.write(@delay_outputs_buffer)
        @before_line = @delay_outputs_buffer
        @delay_outputs_buffer.clear
      else
        @before_line = line
      end
    end
    author_comment_force_close if @text_type == "textfile"
  end

  @write_fp.rewind
  data = @write_fp.string
  if @text_type == "textfile"
    if @setting.enable_author_comments
      erase_introduction(data) if @setting.enable_erase_introduction
      erase_postscript(data) if @setting.enable_erase_postscript
    end
    if @setting.enable_enchant_midashi
      enchant_midashi(data)
    end
  end
  rebuild_illust(data)
  rebuild_url(data)
  rebuild_english_sentences(data)
  rebuild_hankaku_num_and_comma(data)
  rebuild_kome_to_gaiji(data)
  if @text_type == "body" || @text_type == "textfile"
    half_indent_bracket(data)
    auto_indent(data)
  end
  rebuild_force_indent_special_chapter(data)
  # 再構築された文章にルビがふられる可能性を考慮して、
  # この位置でルビの処理を行う
  narou_ruby(data) if @setting.enable_ruby
  # 三点リーダーの変換は、ルビで圏点として・・・を使っている場合を考慮して、ルビ処理後にする
  convert_horizontal_ellipsis(data)
  # ルビ化されなくて残ったギュメを二重山括弧（の外字）に変換
  convert_double_angle_quotation_to_gaiji(data)
  delete_dust_char(data)
  if title_and_author
    data.replace(title_and_author + data)
  end
  data.rstrip!
  @write_fp
ensure
  if @text_type == "textfile" && progressbar
    progressbar.clear
  end
end

#convert_novel_rule(data) ⇒ `Object`

小説のルールに沿うように変換

# File 'lib/converterbase.rb', line 488

def convert_novel_rule(data)
  # 括弧の閉じの直前の句点を消す
  data.gsub!(/。([」』）])/, "\\1")
  # 原則偶数個を１セットで使うべき文字を偶数個に補正
  # MEMO:（―も偶数個セットにするべきだが、記号的な意味で使われる場合もあるので無視）
  %w(… ‥).each do |target|
    data.gsub!(/#{target}+/) do |match|
      len = match.length
      len += 1 if len.odd?
      target * len
    end
  end
  # たまに見かける誤字対策
  data.gsub!(/。　/, "。")
end

#convert_numbers(data) ⇒ `Object`

数字の変換

# File 'lib/converterbase.rb', line 93

def convert_numbers(data)
  # 小数点を・に
  data.gsub!(/([\d０-９#{KANJI_NUM}]+?)[\.．]([\d０-９#{KANJI_NUM}]+?)/) do |match|
    integer = $1
    decimal = $2
    if [/\d/, /[０-９]/, /[#{KANJI_NUM}]/].any? { |r| integer[-1] =~ r && decimal[0] =~ r }
      "#{integer}・#{decimal}"
    else
      match
    end
  end
  if @setting.enable_convert_num_to_kanji &&
     @text_type != "subtitle" && @text_type != "chapter" && @text_type != "story"
    num_to_kanji(data)
  else
    hankaku_num_to_zenkaku_num(data)
  end
  data
end

#convert_page_break(data) ⇒ `Object`

一定以上の連続する空行を改ページに変換

# File 'lib/converterbase.rb', line 1139

def convert_page_break(data)
  if @setting.enable_convert_page_break
    threshold = @setting.to_page_break_threshold
    # `改ページ' を使うと見出し付与等で混乱するので自動生成したものは区別する
    data.gsub!(/(^\n){#{threshold},}/, "［＃改頁］\n")
  end
end

#convert_prolonged_sound_mark_to_dash(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 478

def convert_prolonged_sound_mark_to_dash(data)
  return unless @setting.enable_prolonged_sound_mark_to_dash
  data.gsub!(/(ー{2,})/) do |match|
    "―" * match.length
  end
end

#convert_rome_numeric(data) ⇒ `Object`

ローマ数字っぽいアルファベットをローマ数字に変換

※alphabet_to_zenkaku の前に実行する必要あり

# File 'lib/converterbase.rb', line 331

def convert_rome_numeric(data)
  ROME_NUM_ALPHABET.each_with_index do |rome, i|
    data.gsub!(/([^a-zA-Z])#{rome}([^a-zA-Z])/, "\\1#{ROME_NUM[i]}\\2")
  end
end

#convert_special_characters(data) ⇒ `Object`

特定の表現・記号を変換していく

# File 'lib/converterbase.rb', line 340

def convert_special_characters(data)
  stash_kome(data)
  convert_double_angle_quotation_to_gaiji(data)   # 最初からギュメなのはルビ対象外なので外字注記に
  symbols_to_zenkaku(data)
  convert_tatechuyoko(data)
  convert_novel_rule(data)
  convert_arrow(data)
  convert_head_half_spaces(data)
end

#convert_tatechuyoko(data) ⇒ `Object`

縦中横にすべき表現を変換

# File 'lib/converterbase.rb', line 384

def convert_tatechuyoko(data)
  # 感嘆符及び疑問符の縦中横化
  # AozoraEPUB3の縦中横設定を使えば明示的に注記を使う必要はないが、
  # 見出しの中では自動で縦中横にはならないため、明示的指定をしておく
  # 事前に !? は全角にしておく
  data.gsub!(/！+/) do |match|
    if "#{$`[-1]}#{$'[0]}".include?("？")
      next match
    end
    len = match.length
    if len == 3
      tcy("!!!")
    elsif len >= 4
      # 4個以上なら偶数になるように調整（奇数だった場合増やす方向（+1））して2個ずつ縦中横
      len += 1 if len.odd?
      tcy("!!") * (len / 2)
    else
      match
    end
  end
  data.gsub!(/[！？]+/) do |match|
    case match.length
    when 2
      tcy(match.tr("！？", "!?"))
    when 3
      # 見た目的にこのパターンだけ縦中横化を許容する
      if %w(！！？ ？！！).find { |v| v == match }
        tcy(match.tr("！？", "!?"))
      else
        match
      end
    else
      match
    end
  end
end

#delay_outputs(data = "") ⇒ `Object`

# File 'lib/converterbase.rb', line 77

def delay_outputs(data = "")
  unless @request_skip_output_line
    @delay_outputs_buffer << data + "\n"
  end
end

#delete_dust_char(data) ⇒ `Object`

表示上化けてしまうゴミ削除

# File 'lib/converterbase.rb', line 1150

def delete_dust_char(data)
  data.gsub!("︎", "")
  data.gsub!("︎", "")
end

#enchant_midashi(data) ⇒ `Object`

［＃改ページ］直後の行を見出しに設定する

# File 'lib/converterbase.rb', line 1074

def enchant_midashi(data)
  def midashi(str)
    midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
    @inspector.subtitle = midashi_title
    "［＃３字下げ］［＃中見出し］#{midashi_title}［＃中見出し終わり］"
  end

  def calc_cr_count(str)
    head_cr_count = str.index(/[^\n]/) || 0
    head_cr_count > 2 ? 2 : head_cr_count
  end

  # 実際に見出しを付与する
  data.gsub!(/［＃改ページ］\n(.+?)\n/) do |match|
    m1 = $1
    rest = $'
    # 前書きがある場合は今回は保留して、次の処理で見出しを付与する
    if $1 =~ /#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}/
      match
    else
      # 見出しの次の行が空行ではない場合空行を追加する
      add_tail = "\n" * (2 - calc_cr_count(rest))
      # 見出しと本文の間には空行を２行挟む
      "［＃改ページ］\n\n#{midashi(m1)}\n#{add_tail}"
    end
  end
  # 前書きがある場合は、前書き→見出しの順番を見出し→前書きに入れ替えて置換
  data.gsub!(/(［＃改ページ］\n)(#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}\n)(.+?\n)/m) do
    m1, m2, m3 = $1, $2, $3
    add_tail = $' =~ /\A$/ ? "" : "\n"
    "#{m1 + midashi(m3) + m2}#{add_tail}"
  end
end

#erase_comments_block(data) ⇒ `Object`

コメントブロックを削除する

# File 'lib/converterbase.rb', line 563

def erase_comments_block(data)
  if @text_type == "textfile"
    data.gsub!(/^-{50,}\n.*?^-{50,}\n/m, "")
  end
  data
end

#erase_introduction(data) ⇒ `Object`

前書きを削除する

# File 'lib/converterbase.rb', line 1111

def erase_introduction(data)
  del_count = 0
  data.gsub!(/(［＃改ページ］)\n#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("前書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#erase_postscript(data) ⇒ `Object`

後書きを削除する

# File 'lib/converterbase.rb', line 1125

def erase_postscript(data)
  del_count = 0
  data.gsub!(/#{AUTHOR_COMMENT_CHUKI[:postscript][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:postscript][:close]}\n(［＃改ページ］|\z)/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("後書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

アラビア数字を使うべきところはアラビア数字に戻す

# File 'lib/converterbase.rb', line 250

def exception_reconvert_kanji_to_num(data)
  return unless @setting.enable_convert_num_to_kanji
  data.gsub!(/([Ａ-Ｚａ-ｚ])([#{KANJI_NUM}・～]+)/) do   # ｖｅｒ１・０１ のようなパターンも許容する
    $1 + $2.tr(KANJI_NUM, "０-９")
  end
  data.gsub!(/([#{KANJI_NUM}・～]+)([Ａ-Ｚａ-ｚ#{RECONVERT_KANJI_TO_NUM_PATTERN_UNIT}])/) do
    $1.tr(KANJI_NUM, "０-９") + $2
  end
end

#find_introduction? ⇒ `Boolean`

前書きの検出

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 762

def find_introduction?
  pos = @read_fp.pos
  result = false
  @read_fp.each do |line|
    break if page_break?(line)
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
      break
    end
  end
  @read_fp.pos = pos
  result
end

#force_indent_special_chapter(data) ⇒ `Object`

章見出しっぽい文字列を字下げする

# File 'lib/converterbase.rb', line 641

def force_indent_special_chapter(data)
  return unless @text_type == "body" || @text_type == "textfile"
  @@count_of_rebuild_container ||= 0
  data.gsub!(/^[ 　\t]*([－―<＜〈-]*)([0-9０-９#{KANJI_NUM}]{1,3})([－―>＞〉-]*)$/) do
    top, chapter, bottom = $1, $2, $3
    if top != "" && "―－-".include?(top)   # include?は空文字("")だとtrueなのでチェック必須
      top = "― "
      bottom = " ―"
    end
    str = +"　　　#{top}"
    str += hankaku_num_to_zenkaku_num(chapter.tr("０-９", "0-9"))
    str += "#{bottom}"
    # 前後に空行を入れたいが、それは行処理ループ中に行う
    symbols_to_zenkaku(str)
    index = @@count_of_rebuild_container += 1
    @force_indent_special_chapter_list[convert_numbers(index.to_s.rjust(10,"0"))] = str
    "［＃章見出しっぽい文＝#{index.to_s.rjust(10,"0")}］"
  end
end

#half_indent_bracket(data) ⇒ `Object`

行頭かぎ括弧(等)に二分アキを追加する

「や（などの前にカスタム注記（［＃二分アキ］）を追加し、半文字分字下げする(二分アキ)。 kindle paperwhite で鍵括弧のインデントがおかしいことへの対応

# File 'lib/converterbase.rb', line 605

def half_indent_bracket(data)
  data.gsub!(HALF_INDENT_TARGET) do
    if @setting.enable_half_indent_bracket
      "［＃二分アキ］#{$1}"
    else
      $1
    end
  end
end

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

半角アラビア数字の全角化

1桁、3桁以上：全角化 2桁：縦中横化

# File 'lib/converterbase.rb', line 583

def hankaku_num_to_zenkaku_num(data)
  data.gsub!(/\d+/) do |num|
    if num.length == 2
      tcy(num)
    elsif num.length == 3 && @text_type == "subtitle" && $`.empty?
      tcy(num)
    else
      num.tr("0-9", "０-９")
    end
  end
  data
end

#hankakukana_to_zenkakukana(data) ⇒ `Object`

半角カナと｢｣｡､･等を全角に変換



353
354
355

# File 'lib/converterbase.rb', line 353

def hankakukana_to_zenkakukana(data)
  data.replace(NKF.nkf("-wWX", data).tr("\u2014", "―"))
end

#inclusion_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 776

def inclusion_author_comment_block?(line)
  result = false
  if page_break?(line)
    if find_introduction?
      @in_author_comment_block = :introduction
      result = true
    end
  elsif line =~ AUTHOR_POSTSCRIPT_SPLITTER
    @in_author_comment_block = :postscript
    result = true
  end
  result
end

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 667

def insert_blank_before_line_and_behind_to_special_chapter(line)
  result = +""
  if line =~ /［＃章見出しっぽい文＝/
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
  end
  line.sub!(/\A/, result)
end

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

■などの区切りの前後には空行が必ず存在するようにする

# File 'lib/converterbase.rb', line 697

def insert_blank_line_to_border_symbol(line)
  result = +""
  if border_symbol?(line)
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
    jisage(line, 4)
  end
  line.sub!(/\A/, result)
end

#insert_char_separator(str) ⇒ `Object`

文字単位でzwsを挿入する

# File 'lib/converterbase.rb', line 1273

def insert_char_separator(str)
  buffer = +""
  ss = StringScanner.new(str)
  before_symbol = false
  while char = ss.getch
    symbol = false
    case char
    when "｜"
      buffer << char
      if ss.scan(/.+?》/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "［"
      buffer << char
      if ss.scan(/^＃.+?］/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "<"
      if ss.scan(/.+?>/)
        buffer << "<#{ss.matched}"
        next
      end
      symbol = true
    when /[〔「『\(（【〈《≪〝]/
      buffer << char
      before_symbol = false
      next
    when /[―…!?！？※]/
      symbol = true
    end
    if before_symbol && !symbol
      buffer << WORD_SEPARATOR
    end
    buffer << char
    unless symbol
      buffer << WORD_SEPARATOR
    end
    before_symbol = symbol
  end
  buffer
end

#insert_separate_space(data) ⇒ `Object`

特定の記号の直後は全角アキを挿入する

# File 'lib/converterbase.rb', line 303

def insert_separate_space(data)
  data.gsub!(/([!?！？]+)([^!?！？])/) do
    m1, m2 = $1, $2
    m2 = "　" if m2 =~ /[ 、。]/
    if m2 =~ /[^」］｝\]\}』】〉》〕＞>≫)）"”’〟　☆★♪［―]/
      "#{m1}　#{m2}"
    else
      "#{m1}#{m2}"
    end
  end
end

#insert_separator_for_selection(str) ⇒ `Object`

Kindle端末で単語選択がしやすいように０幅スペースを挿入する

# File 'lib/converterbase.rb', line 1197

def insert_separator_for_selection(str)
  return str unless @device && @device.kindle?
  return str if @text_type != "body" && @text_type != "textfile"
  if @setting.enable_insert_word_separator
    insert_word_separator(str)
  elsif @setting.enable_insert_char_separator
    insert_char_separator(str)
  else
    str
  end
end

#insert_word_separator(str) ⇒ `Object`

単語単位でzwsを挿入する

# File 'lib/converterbase.rb', line 1212

def insert_word_separator(str)
  buffer = +""
  ss = StringScanner.new(str)
  before_symbol = false

  if @text_type == "textfile"
    buffer << ss.scan(/(.+\n){2}/)
  end

  while char = ss.getch
    symbol = false
    case char
    when "｜"
      ss.scan(/.+?》/)
    when "［"
      buffer << char
      if ss.scan(/^＃.+?］/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "<"
      if ss.scan(/.+?>/)
        buffer << "<#{ss.matched}"
        next
      end
      symbol = true
    when /[\d０-９]/
      ss.scan(/[\d０-９]+/)
    when /[ぁ-んゝゞ]/
      ss.scan(/[ぁ-んゝゞー]+/)
    when /[ァ-ヶ]/
      ss.scan(/[ァ-ヶー・]+/)
    when /[Ａ-Ｚａ-ｚA-Za-z]/
      ss.scan(/[Ａ-Ｚａ-ｚA-Za-z ]+/)
    when /[一-龥朗-鶴]/
      ss.scan(/[一-龥朗-鶴]+/)
    when /[〔「『\(（【〈《≪〝]/
      buffer << char
      before_symbol = false
      next
    else
      symbol = true
    end
    if before_symbol && !symbol
      buffer << WORD_SEPARATOR
    end
    buffer << char
    unless symbol
      buffer << ss.matched if ss.matched?
      buffer << WORD_SEPARATOR
    end
    before_symbol = symbol
  end
  buffer
end

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

Returns:

(Boolean)



905
906
907

# File 'lib/converterbase.rb', line 905

def is_sesame?(str, ten, last_char)
  ten =~ /^[・、]+$/ && (str.include?("｜") || object_of_ruby?(last_char))
end

#jisage(line, num) ⇒ `Object`

行頭空白を考慮した字下げ



681
682
683

# File 'lib/converterbase.rb', line 681

def jisage(line, num)
  line.sub!(/^[ 　\t]*/, "　" * num)
end

#join_inner_bracket(str) ⇒ `Object`

改行を連結した文章を作る

改行がひとつもなかった場合は nil を返す

# File 'lib/converterbase.rb', line 826

def join_inner_bracket(str)
  return nil if str.count("\n") == 0
  joined_str = str.dup
  joined_str.gsub!(/([…―])\n/, "\\1。\n")
  joined_str = joined_str.split("\n").map { |s|
    s.sub(/^　+/, "")
  }.join
  joined_str
end

#kanji_num_to_integer(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 198

def kanji_num_to_integer(string)
  total = 0
  string.scan(/([#{KANJI_NUM}十百千]+)([万億兆京]*)/) do |num, units|
    total += (__calc_kanji_num_with_unit(num).to_s + units.each_char.map { |c| "0" * KANJI_NUM_UNITS_DIGIT[c] }.join).to_i
  end
  total
end

#leave_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 790

def leave_author_comment_block?(line)
  result = false
  case @in_author_comment_block
  when :introduction
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
    end
  when :postscript
    if page_break?(line)
      result = true
    end
  end
  result
end

#midashi(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 1075

def midashi(str)
  midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
  @inspector.subtitle = midashi_title
  "［＃３字下げ］［＃中見出し］#{midashi_title}［＃中見出し終わり］"
end

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する

# File 'lib/converterbase.rb', line 1054

def modify_kana_ni_to_kanji_ni(data)
  if @setting.enable_kana_ni_to_kanji_ni
    data.gsub!(/([^#{KANA}]{2})ニ([^#{KANA}]{2})/) do
      prefix = $`.tap { |it|
        break it[-10, 10] if it.length > 10
      }
      @inspector.info(<<-EOS % (prefix + $1 + "ニ" + $2 + $'[0, 10]))
カタカナのニを漢字の二に修正しました
≫≫≫ 該当箇所
...%s...
      EOS
      "#{$1}二#{$2}"
    end
  end
  data
end

#narou_ruby(data) ⇒ `Object`

小説家になろうのルビ対策

# File 'lib/converterbase.rb', line 884

def narou_ruby(data)
  if @text_type != "subtitle" && @text_type != "chapter"
    # 《》なルビの対処
    data.gsub!(/(.+?)≪([^≪]+?)≫/) do |match|
      to_ruby(match, $1, $2, ["≪", "≫"])
    end
    if @data_type == "text"
      # （）なルビの対処
      data.gsub!(/(.+?)（#{AUTO_RUBY_CHARACTERS}）/) do |match|
        to_ruby(match, $1, $2, ["（", "）"])
      end
    end
  end
  data.replace(replace_tatesen(data))
  data.gsub!("［＃ルビ用縦線］", "｜")
end

#num_to_kanji(data) ⇒ `Object`

アラビア数字を漢数字に

カンマ区切りの数字はアラビア数字のままにしておくもともと漢数字なのは他の変換を受けないように退避させておく

# File 'lib/converterbase.rb', line 119

def num_to_kanji(data)
  stash_kanji_num(data)
  data.gsub!(/[\d０-９,，]+/) do |match|
    if match =~ /[,，]/
      if match =~ /[\d]/
        stash_hankaku_num_and_comma(match.tr("，", ","))
      else
        match
      end
    else
      zenkaku_num_to_kanji(match.tr("0-9", KANJI_NUM))
    end
  end
  data
end

#object_of_ruby?(char) ⇒ `Boolean`

Returns:

(Boolean)



901
902
903

# File 'lib/converterbase.rb', line 901

def object_of_ruby?(char)
  char =~ /[#{CHARACTER_OF_RUBY}]/
end

#outputs(data = "", force = false) ⇒ `Object`

# File 'lib/converterbase.rb', line 71

def outputs(data = "", force = false)
  if !@request_skip_output_line || force
    @write_fp.puts(data)
  end
end

#page_break?(line) ⇒ `Boolean`

改ページある？

Returns:

(Boolean)



712
713
714

# File 'lib/converterbase.rb', line 712

def page_break?(line)
  line =~ /［＃改ページ］/
end

#process_author_comment(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 731

def process_author_comment(line)
  if @setting.enable_author_comments
    if @in_author_comment_block
      if leave_author_comment_block?(line)
        outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
        if @in_author_comment_block == :introduction
          @request_skip_output_line = true
          line.clear
          @in_author_comment_block = nil
        elsif @in_author_comment_block == :postscript
          @in_author_comment_block = nil
          # ［＃改ページ］（前書きの開始位置）を検出したため、
          # 改めて前書きの検出をする
          process_author_comment(line)
        end
      end
    else
      if inclusion_author_comment_block?(line)
        # outputs を使うと改ページより前に注記が入ってしまうため、
        # delay_outputs を使って出力を line 出力の後に遅らせる
        delay_outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:open]) 
        if @in_author_comment_block == :postscript
          @request_skip_output_line = true
          line.clear
        end
      end
    end
  end
end

#rebuild_english_sentences(data) ⇒ `Object`

英文を再構成する

# File 'lib/converterbase.rb', line 541

def rebuild_english_sentences(data)
  @english_sentences.each_with_index do |sentence, id|
    data.sub!("［＃英文＝#{convert_numbers(id.to_s)}］", sentence)
  end
end

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 661

def rebuild_force_indent_special_chapter(data)
  data.gsub!(/［＃章見出しっぽい文＝(.+?)］/) do
    @force_indent_special_chapter_list[$1]
  end
end

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 142

def rebuild_hankaku_num_and_comma(data)
  data.gsub!(/［＃半角数字＝(.+?)］/) do
    @num_and_comma_list[$1.to_i]
  end
end

#rebuild_illust(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 1023

def rebuild_illust(data)
  @illust_chuki_list.each_with_index do |chuki, id|
    data.sub!("［＃挿絵＝#{convert_numbers(id.to_s)}］", chuki)
  end
end

#rebuild_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 158

def rebuild_kanji_num(data)
  data.gsub!(/［＃漢数字＝(.+?)］/) do
    @kanji_num_list[$1]
  end
end

#rebuild_kome_to_gaiji(data) ⇒ `Object`

※の外字注記化

stash_kome で2つにしておいた※を外字注記化する



461
462
463

# File 'lib/converterbase.rb', line 461

def rebuild_kome_to_gaiji(data)
  data.gsub!("※※", "※［＃米印、1-2-8］")
end

#rebuild_url(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 1004

def rebuild_url(data)
  @url_list.each_with_index do |url, id|
    data.sub!("［＃ＵＲＬ＝#{convert_numbers(id.to_s)}］",
              "<a href=\"#{url}\">#{url}</a>")
  end
end

#replace_by_replace_txt(text) ⇒ `Object`

replace.txt により単純置換

# File 'lib/converterbase.rb', line 1438

def replace_by_replace_txt(text)
  result = text.dup
  (@setting.replace_pattern + Narou.global_replace_pattern).each do |pattern|
    src, dst = pattern
    result.gsub!(src, dst)
  end
  result
end

#replace_illust_tag(data) ⇒ `Object`

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する

# File 'lib/converterbase.rb', line 1015

def replace_illust_tag(data)
  @illustration.scanner(data) do |chuki|
    next "" unless @setting.enable_illust
    @illust_chuki_list << chuki
    "［＃挿絵＝#{@illust_chuki_list.size - 1}］\n"
  end
end

#replace_narou_tag(data) ⇒ `Object`

小説家になろう専用タグを置換

# File 'lib/converterbase.rb', line 318

def replace_narou_tag(data)
  data.gsub!("【改ページ】", "")
  data.gsub!(/<KBR>/i, "\n")
  data.gsub!(/<PBR>/i, "\n")
end

#replace_tatesen(str) ⇒ `Object`



924
925
926

# File 'lib/converterbase.rb', line 924

def replace_tatesen(str)
  str.gsub("｜", "※［＃縦線］")
end

#replace_url(data) ⇒ `Object`

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない

# File 'lib/converterbase.rb', line 997

def replace_url(data)
  data.gsub!(URI::DEFAULT_PARSER.make_regexp(%w(http https))) do |match|
    @url_list << match
    "［＃ＵＲＬ＝#{@url_list.size - 1}］"
  end
end

#reset_member_values ⇒ `Object`

.convert が実行されるたびに呼ばれるメンバ変数リセット用メソッドインスタンス作成時に一度だけ初期化したい場合は initialize で初期化する

# File 'lib/converterbase.rb', line 55

def reset_member_values
  @request_insert_blank_next_line = false
  @request_skip_output_line = false
  @before_line = ""
  @delay_outputs_buffer = +""
  @in_comment_block = false
  @english_sentences = []
  @url_list = []
  @illust_chuki_list = []
  @kanji_num_list = {}
  @num_and_comma_list = {}
  @force_indent_special_chapter_list = {}
  @in_author_comment_block = nil
  @device = Narou.get_device
end

#rstrip_all_lines(data) ⇒ `Object`

すべての行の行末空白を削除



86
87
88

# File 'lib/converterbase.rb', line 86

def rstrip_all_lines(data)
  data.gsub(/[ 　\t]+$/m, "")
end

#ruby_youon_to_big(ruby) ⇒ `Object`

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする

# File 'lib/converterbase.rb', line 978

def ruby_youon_to_big(ruby)
  result = ruby
  if @setting.enable_ruby_youon_to_big
    result = ruby.tr("ぁぃぅぇぉゃゅょゎっァィゥェォャュョヮッヵヶ",
                     "あいうえおやゆよわつアイウエオヤユヨワツカケ")
  end
  result
end

#sentence?(match) ⇒ `Boolean`

Returns:

(Boolean)



509
510
511

# File 'lib/converterbase.rb', line 509

def sentence?(match)
  match.split(" ").size >= 2
end

#sesame(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 909

def sesame(str)
  if str.include?("｜")
    str.sub("｜", "［＃傍点］") + "［＃傍点終わり］"
  else
    str.sub(/([#{CHARACTER_OF_RUBY}　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃傍点］#{match_target[$1.length..-1]}"
      else
        "［＃傍点］#{match_target}"
      end
    } + "［＃傍点終わり］"
  end
end

#should_word_be_hankaku?(word) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 504

def should_word_be_hankaku?(word)
  (word.length >= ENGLISH_SENTENCES_MIN_LENGTH || @setting.disable_alphabet_word_to_zenkaku) &&
    word.match(/[a-z]/i)
end

#stash_hankaku_num_and_comma(num) ⇒ `Object`

# File 'lib/converterbase.rb', line 135

def stash_hankaku_num_and_comma(num)
  @@num_and_comma_list_counter ||= 0
  @@num_and_comma_list_counter += 1
  @num_and_comma_list[@@num_and_comma_list_counter] = num
  "［＃半角数字＝#{@@num_and_comma_list_counter}］"
end

#stash_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 148

def stash_kanji_num(data)
  data.gsub!(/[#{KANJI_NUM}十百千万億兆京]+/).with_index do |match, i|
    if "#{$`[-1]}#{$'[0]}" =~ /[\d０-９]/
      next match
    end
    @kanji_num_list[convert_numbers(i.to_s)] = match
    "［＃漢数字＝#{i}］"
  end
end

#stash_kome(data) ⇒ `Object`

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする



444
445
446

# File 'lib/converterbase.rb', line 444

def stash_kome(data)
  data.gsub!("※", "※※")
end

#symbols_to_zenkaku(data) ⇒ `Object`

半角記号を全角に変換

# File 'lib/converterbase.rb', line 364

def symbols_to_zenkaku(data)
  # MEMO: シングルミニュートを表示出来るフォントはほとんど無いためダブルにする
  data.gsub!(/[#{SINGLE_MINUTE_FAMILY}]([^"\n]+?)[#{SINGLE_MINUTE_FAMILY}]/, "〝\\1〟")
  data.gsub!(/[#{DOUBLE_MINUTE_FAMILY}]([^"\n]+?)[#{DOUBLE_MINUTE_FAMILY}]/, "〝\\1〟")
  data.tr!("-=+/*《》'\"%$#&!?<>＜＞()|‐,._;:\[\]{}",
           "－＝＋／＊≪≫’〝％＄＃＆！？〈〉〈〉（）｜－，．＿；：［］｛｝")
  data.gsub!("\\", "￥")
  data
end

#tcy(str) ⇒ `Object`

縦中横注記取得



377
378
379

# File 'lib/converterbase.rb', line 377

def tcy(str)
  "［＃縦中横］#{str}［＃縦中横終わり］"
end

#to_ruby(match, m1, m2, openclose_symbols) ⇒ `Object`

# File 'lib/converterbase.rb', line 928

def to_ruby(match, m1, m2, openclose_symbols)
  last_char = m1[-1]
  case
  when m2[0] == " "
    # 先頭が半角スペースはNG
    match
  when m2 =~ / {2,}$/
    # 末尾の半角スペースが2個以上はNG（1個はOK）
    match
  when last_char == "｜"
    # 直前に｜がある場合ルビ化は抑制される
    "#{m1[0...-1]}#{openclose_symbols[0]}#{m2}#{openclose_symbols[1]}"
  when is_sesame?(m1, m2, last_char)
    sesame(m1)
  when m1.include?("｜")
    "#{m1.sub(/｜([^｜]*)$/, "［＃ルビ用縦線］\\1")}《#{ruby_youon_to_big(m2)}》"
  when object_of_ruby?(last_char)
    if openclose_symbols[0] == "≪" && m2 !~ /^#{AUTO_RUBY_CHARACTERS}$/
      # 《 》タイプのルビであっても、｜が存在しない場合の自動ルビ化対象はひらがな等だけである
      match
    elsif m2 =~ /^([ぁ-んァ-ヶーゝゞ・]+)[ 　]?([ぁ-んァ-ヶーゝゞ・]*)$/
      build_ruby(m1, m2, $1, $2)
    else
      match
    end
  else
    match
  end
end

#zenkaku_num_to_hankaku_num(num) ⇒ `Object`

全角数字(漢数字含む)を半角アラビア数字に



573
574
575

# File 'lib/converterbase.rb', line 573

def zenkaku_num_to_hankaku_num(num)
  num.tr("０-９#{KANJI_NUM}", "0-90-9")
end

#zenkaku_num_to_kanji(str) ⇒ `Object`

全角アラビア数字を漢数字に



167
168
169

# File 'lib/converterbase.rb', line 167

def zenkaku_num_to_kanji(str)
  str.tr("０-９", KANJI_NUM)
end

#zenkaku_rstrip(line) ⇒ `Object`

全角版 String#rstrip!



990
991
992

# File 'lib/converterbase.rb', line 990

def zenkaku_rstrip(line)
  line.gsub!(/[　\s]+\z/, "")
end

Class: ConverterBase

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(setting, inspector, illustration) ⇒ ConverterBase

Instance Attribute Details

#current_index ⇒ Object

#data_type ⇒ Object

#output_text_dir ⇒ Object

#subtitles ⇒ Object

#use_dakuten_font ⇒ Object (readonly)

Class Method Details

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ Object

#__calc_sum_unit(units) ⇒ Object

#after(io, text_type) ⇒ Object

#after_convert(io) ⇒ Object

#alphabet_to_zenkaku(data, force = false) ⇒ Object

#author_comment_force_close ⇒ Object

#auto_indent(data) ⇒ Object

#auto_join_in_brackets(data) ⇒ Object

#auto_join_line(data) ⇒ Object

#before(io, text_type) ⇒ Object

#before_convert(io) ⇒ Object

#blank_line?(line) ⇒ Boolean

#border_symbol?(line) ⇒ Boolean

#build_ruby(m1, m2, f1, f2) ⇒ Object

#calc_cr_count(str) ⇒ Object

#comments_block?(line) ⇒ Boolean

#convert(text, text_type) ⇒ Object

#convert_arrow(data) ⇒ Object

#convert_dakuten_char_to_font(data) ⇒ Object

#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

#convert_for_all_data(data) ⇒ Object

#convert_fraction_and_date(data) ⇒ Object

#convert_head_half_spaces(data) ⇒ Object

#convert_horizontal_ellipsis(data) ⇒ Object

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

#convert_main(io) ⇒ Object

#convert_novel_rule(data) ⇒ Object

#convert_numbers(data) ⇒ Object

#convert_page_break(data) ⇒ Object

#convert_prolonged_sound_mark_to_dash(data) ⇒ Object

#convert_rome_numeric(data) ⇒ Object

#convert_special_characters(data) ⇒ Object

#convert_tatechuyoko(data) ⇒ Object

#delay_outputs(data = "") ⇒ Object

#delete_dust_char(data) ⇒ Object

#enchant_midashi(data) ⇒ Object

#erase_comments_block(data) ⇒ Object

#erase_introduction(data) ⇒ Object

#erase_postscript(data) ⇒ Object

#exception_reconvert_kanji_to_num(data) ⇒ Object

#find_introduction? ⇒ Boolean

#force_indent_special_chapter(data) ⇒ Object

#half_indent_bracket(data) ⇒ Object

#hankaku_num_to_zenkaku_num(data) ⇒ Object

#hankakukana_to_zenkakukana(data) ⇒ Object

#inclusion_author_comment_block?(line) ⇒ Boolean

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object

#insert_blank_line_to_border_symbol(line) ⇒ Object

#insert_char_separator(str) ⇒ Object

#insert_separate_space(data) ⇒ Object

#insert_separator_for_selection(str) ⇒ Object

#insert_word_separator(str) ⇒ Object

#is_sesame?(str, ten, last_char) ⇒ Boolean

#jisage(line, num) ⇒ Object

#join_inner_bracket(str) ⇒ Object

#kanji_num_to_integer(string) ⇒ Object

#leave_author_comment_block?(line) ⇒ Boolean

#midashi(str) ⇒ Object

#modify_kana_ni_to_kanji_ni(data) ⇒ Object

#narou_ruby(data) ⇒ Object

#num_to_kanji(data) ⇒ Object

#object_of_ruby?(char) ⇒ Boolean

#outputs(data = "", force = false) ⇒ Object

#page_break?(line) ⇒ Boolean

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

#current_index ⇒ `Object`

#data_type ⇒ `Object`

#output_text_dir ⇒ `Object`

#subtitles ⇒ `Object`

#use_dakuten_font ⇒ `Object` (readonly)

.rebuild_brackets(data, stack) ⇒ `Object`

#__calc_kanji_num_with_unit(string) ⇒ `Object`

#__calc_sum_unit(units) ⇒ `Object`

#after(io, text_type) ⇒ `Object`

#after_convert(io) ⇒ `Object`

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

#author_comment_force_close ⇒ `Object`

#auto_indent(data) ⇒ `Object`

#auto_join_in_brackets(data) ⇒ `Object`

#auto_join_line(data) ⇒ `Object`

#before(io, text_type) ⇒ `Object`

#before_convert(io) ⇒ `Object`

#blank_line?(line) ⇒ `Boolean`

#border_symbol?(line) ⇒ `Boolean`

#build_ruby(m1, m2, f1, f2) ⇒ `Object`

#calc_cr_count(str) ⇒ `Object`

#comments_block?(line) ⇒ `Boolean`

#convert(text, text_type) ⇒ `Object`

#convert_arrow(data) ⇒ `Object`

#convert_dakuten_char_to_font(data) ⇒ `Object`

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

#convert_for_all_data(data) ⇒ `Object`

#convert_fraction_and_date(data) ⇒ `Object`

#convert_head_half_spaces(data) ⇒ `Object`

#convert_horizontal_ellipsis(data) ⇒ `Object`

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

#convert_main(io) ⇒ `Object`

#convert_novel_rule(data) ⇒ `Object`

#convert_numbers(data) ⇒ `Object`

#convert_page_break(data) ⇒ `Object`

#convert_prolonged_sound_mark_to_dash(data) ⇒ `Object`

#convert_rome_numeric(data) ⇒ `Object`

#convert_special_characters(data) ⇒ `Object`

#convert_tatechuyoko(data) ⇒ `Object`

#delay_outputs(data = "") ⇒ `Object`

#delete_dust_char(data) ⇒ `Object`

#enchant_midashi(data) ⇒ `Object`

#erase_comments_block(data) ⇒ `Object`

#erase_introduction(data) ⇒ `Object`

#erase_postscript(data) ⇒ `Object`

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

#find_introduction? ⇒ `Boolean`

#force_indent_special_chapter(data) ⇒ `Object`

#half_indent_bracket(data) ⇒ `Object`

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

#hankakukana_to_zenkakukana(data) ⇒ `Object`

#inclusion_author_comment_block?(line) ⇒ `Boolean`

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

#insert_char_separator(str) ⇒ `Object`

#insert_separate_space(data) ⇒ `Object`

#insert_separator_for_selection(str) ⇒ `Object`

#insert_word_separator(str) ⇒ `Object`

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

#jisage(line, num) ⇒ `Object`

#join_inner_bracket(str) ⇒ `Object`

#kanji_num_to_integer(string) ⇒ `Object`

#leave_author_comment_block?(line) ⇒ `Boolean`

#midashi(str) ⇒ `Object`

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

#narou_ruby(data) ⇒ `Object`

#num_to_kanji(data) ⇒ `Object`

#object_of_ruby?(char) ⇒ `Boolean`

#outputs(data = "", force = false) ⇒ `Object`

#page_break?(line) ⇒ `Boolean`

#process_author_comment(line) ⇒ `Object`

#rebuild_english_sentences(data) ⇒ `Object`

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

#rebuild_illust(data) ⇒ `Object`

#rebuild_kanji_num(data) ⇒ `Object`

#rebuild_kome_to_gaiji(data) ⇒ `Object`

#rebuild_url(data) ⇒ `Object`

#replace_by_replace_txt(text) ⇒ `Object`