Class: CBETA

Inherits:
Object
  • Object
show all
Defined in:
lib/cbeta.rb

Defined Under Namespace

Classes: BMToText, CharCount, CharFrequency, Gaiji, HTMLToPDF, HTMLToText, P5aToHTML, P5aToHTMLForEveryEdition, P5aToHTMLForPDF, P5aToSimpleHTML, P5aToText, P5aValidator

Constant Summary collapse

CANON =
'DA|GA|GB|[A-Z]'
DATA =
File.join(File.dirname(__FILE__), 'data')
PUNCS =
'.[]。,、?「」『』《》<>〈〉〔〕[]【】〖〗'

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCBETA

載入藏經資料



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/cbeta.rb', line 122

def initialize()
  fn = File.join(File.dirname(__FILE__), 'data/canons.csv')
  text = File.read(fn)
  @canon_abbr = {}
  @canon_nickname = {}
  CSV.parse(text, :headers => true) do |row|
    id = row['id']
    unless row['nickname'].nil?
      @canon_nickname[id] = row['nickname']
    end
    next if row['abbreviation'].nil?
    next if row['abbreviation'].empty?
    @canon_abbr[id] = row['abbreviation']
  end
  
  fn = File.join(File.dirname(__FILE__), 'data/categories.json')
  s = File.read(fn)
  @categories = JSON.parse(s)
end

Class Method Details

.get_canon_from_vol(vol) ⇒ String

由 冊號 取得 藏經 ID



23
24
25
# File 'lib/cbeta.rb', line 23

def self.get_canon_from_vol(vol)
  vol.sub(/^(#{CANON}).*$/, '\1')
end

.get_canon_id_from_linehead(linehead) ⇒ String

由 行首資訊 取得 藏經 ID



16
17
18
# File 'lib/cbeta.rb', line 16

def self.get_canon_id_from_linehead(linehead)
  linehead.sub(/^(#{CANON}).*$/, '\1')
end

.get_sort_order_from_canon_id(canon) ⇒ String

由「藏經 ID」取得「排序用編號」,例如:傳入 “T” 回傳 “A”;傳入 “X” 回傳 “B”



56
57
58
59
60
61
62
63
# File 'lib/cbeta.rb', line 56

def self.get_sort_order_from_canon_id(canon)
  # CBETA 提供,惠敏法師最後決定的全文檢索順序表, 2016-06-03
  table = %w(T X A K S F C D U P J L G M N H I W B GA GB)    
  i = table.index(canon)
  abort "unknown canon id: #{canon}" if i.nil?
  
  (i + 'A'.ord).chr
end

.get_work_id_from_file_basename(fn) ⇒ String

由 XML檔主檔名 取得 典籍編號



49
50
51
# File 'lib/cbeta.rb', line 49

def self.get_work_id_from_file_basename(fn)
  fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
end

.get_xml_file_from_vol_and_work(vol, work) ⇒ String

由 冊號 及 典籍編號 取得 XML 主檔名



31
32
33
# File 'lib/cbeta.rb', line 31

def self.get_xml_file_from_vol_and_work(vol, work)
  vol + 'n' + work.sub(/^(#{CANON})(.*)$/, '\2')
end

.linehead_to_s(linehead) ⇒ String

將行首資訊轉為引用格式

Examples:

CBETA.linehead_to_s('T85n2838_p1291a03')
# return "T85, no. 2838, p. 1291, a03"


73
74
75
76
77
78
# File 'lib/cbeta.rb', line 73

def self.linehead_to_s(linehead)
  linehead.match(/^((?:#{CANON})\d+)n(.*)_p(\d+)([a-z]\d+)$/) {
    return "#{$1}, no. #{$2}, p. #{$3}, #{$4}"
  }
  nil
end

.linehead_to_xml_file_path(linehead) ⇒ String

由 行首資訊 取得 XML檔相對路徑



38
39
40
41
42
43
44
# File 'lib/cbeta.rb', line 38

def self.linehead_to_xml_file_path(linehead)
  if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n\d+[a-zA-Z]?).*$/)
    File.join(m[:canon], m[:vol], m[:work]+'.xml')
  else
    nil
  end
end

.normalize_vol(vol) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/cbeta.rb', line 80

def self.normalize_vol(vol)
  if vol.match(/^(#{CANON})(.*)$/)
    canon = $1
    vol = $2
  
    if %w[A C G GA GB L M P U].include? canon
      # 這些藏經的冊號是三碼
      vol_len = 3
    else
      vol_len = 2      
    end
    canon + vol.rjust(vol_len, '0')
  else
    abort "unknown vol format: #{vol}"
  end
end

.open_xml(fn) ⇒ Object



97
98
99
100
101
102
# File 'lib/cbeta.rb', line 97

def self.open_xml(fn)
  s = File.read(fn)
  doc = Nokogiri::XML(s)
  doc.remove_namespaces!()
  doc
end

.pua(gid) ⇒ Object

傳入 缺字碼,傳回 Unicode PUA 字元



105
106
107
# File 'lib/cbeta.rb', line 105

def self.pua(gid)
  [0xf0000 + gid[2..-1].to_i].pack 'U'
end

.ranjana_pua(gid) ⇒ Object

傳入 蘭札體 缺字碼,傳回 Unicode PUA 字元



110
111
112
113
# File 'lib/cbeta.rb', line 110

def self.ranjana_pua(gid)
  i = 0x10000 + gid[-4..-1].hex
  [i].pack("U")
end

.siddham_pua(gid) ⇒ Object

傳入 悉曇字 缺字碼,傳回 Unicode PUA 字元



116
117
118
119
# File 'lib/cbeta.rb', line 116

def self.siddham_pua(gid)
  i = 0xFA000 + gid[-4..-1].hex
  [i].pack("U")
end

Instance Method Details

#get_canon_abbr(id) ⇒ String

取得藏經略名

Examples:

cbeta = CBETA.new
cbeta.get_canon_abbr('T') # return "大"


170
171
172
173
174
# File 'lib/cbeta.rb', line 170

def get_canon_abbr(id)
   r = get_canon_symbol(id)
   return nil if r.nil?
   r.sub(/^【(.*?)】$/, '\1')
end

#get_canon_nickname(id) ⇒ String



144
145
146
147
# File 'lib/cbeta.rb', line 144

def get_canon_nickname(id)
  return nil unless @canon_nickname.key? id
  @canon_nickname[id]
end

#get_canon_symbol(id) ⇒ String

取得藏經略符

Examples:

cbeta = CBETA.new
cbeta.get_canon_symbol('T') # return "【大】"


157
158
159
160
# File 'lib/cbeta.rb', line 157

def get_canon_symbol(id)
  return nil unless @canon_abbr.key? id
  @canon_abbr[id]
end

#get_category(book_id) ⇒ String

傳入經號,取得部類

Examples:

cbeta = CBETA.new
cbeta.get_category('T0220') # return '般若部類'


183
184
185
# File 'lib/cbeta.rb', line 183

def get_category(book_id)
  @categories[book_id]
end