Class: Me2Text::Token

Inherits:
Object
  • Object
show all
Defined in:
lib/me2text/token.rb

Direct Known Subclasses

Keyword, Link, Plain

Constant Summary collapse

begin 
  dquota_s = '\\xe2\\x80\\x9c|\\xe2\\x80\\x9f|\\xe2\\x9d\\x9d|\\xe2\\x80\\xb6|\\xe2\\x80\\x9d|\\x22|\\xef\\xbc\\x82'
  dquota_e = '\\xe2\\x80\\x9d|\\xe2\\x80\\x9e|\\xe2\\x9d\\x9e|\\xcb\\x9d|\\xe2\\x80\\xb3|\\xe2\\x80\\x9e|\\xe2\\x80\\x9f|\\x22|\\xef\\xbc\\x82'
  dquota_ne = '\\xe2\\x80\\x9d\\xe2\\x80\\x9e\\xe2\\x9d\\x9e\\xcb\\x9d\\xe2\\x80\\xb3\\xe2\\x80\\x9e\\xe2\\x80\\x9f\\x22\\xef\\xbc\\x82'
  /(?:#{dquota_s})([^#{dquota_ne}]*)(?:#{dquota_e}):(http[s]?:\/\/[^\s]*)(\s|$)/u
end
REGEX_URL =
/(http[s]?:\/\/[^\s|^\'|^\"]*)([\'|\"|\s]|$)/u
ESCAPE_CHAR =
"\xc2\xa0"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#textObject

Returns the value of attribute text.



14
15
16
# File 'lib/me2text/token.rb', line 14

def text
  @text
end

Class Method Details

.join_tokens(tokens, format, options) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/me2text/token.rb', line 143

def join_tokens(tokens, format, options)
  if options[:limit].nil?
    return tokens.map { |token| token.to_s(format, options) }.join
  end

  result = ""
  process_length = 0
  tokens.each_with_index do |tk, idx| 
    n_len = process_length + tk.length
    r_len = options[:limit] - n_len
    
    # 길이가 남았다.
    if r_len > 0
      process_length = process_length + tk.length
      result << tk.to_s(format, options)

    # 길이가 딱 맞다.
    elsif r_len == 0
      # 이번 토큰이 마지막 토큰(더이상 추가할 게 없음) 인가? 
      if (idx + 1) == tokens.size
        process_length = process_length + tk.length
        result << tk.to_s(format, options)
      else
        # 남은 문자열이 있다.
        val = tk.truncate(0, format, options)
        process_length = process_length + val[1]
        result << val.first
      end
      break

    # 길이가 r_len만큼 모자르다
    else
      val = tk.truncate(-r_len, format, options)
      process_length = process_length + val[1]
      result << val.first
      break
    end
  end

  result
end

.tokenize(text, options = {}) ⇒ Object



95
96
97
# File 'lib/me2text/token.rb', line 95

def tokenize(text, options = {})
  tokenize_me2link(text.gsub(/\\\"/u, ESCAPE_CHAR), options)
end

.tokenize_keyword(text, options = {}) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/me2text/token.rb', line 125

def tokenize_keyword(text, options = {})
  tokens = []
  while (m = text.match(Keyword::KEYWORD_REGEX))
    tokens += tokenize_plaintext(m.pre_match, options) if !m.pre_match.empty?
    tokens << Keyword.new(m.to_s, options)
    text = m.post_match
  end        
  if !text.empty?
    tokens += tokenize_plaintext(text, options)
  end
  
  tokens
end


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/me2text/token.rb', line 99

def tokenize_me2link(text, options = {})
  tokens = []
  while (m = text.match(REGEX_ME2LINK))
    # 매치 이전 텍스트 처리
    tokens += tokenize_keyword(m.pre_match, options) if !m.pre_match.empty?
    
    url = m[2]
    anchor_text = m[1]
    if url
      if !options[:link_handler].nil?
        url = options[:link_handler].call(url, options)
      end
      tokens << Link.new(anchor_text, url, m[3].length > 0)
    else
      tokens += tokenize_keyword(anchor_text, options)
    end

    text = m.post_match
  end
  if !text.empty?
    tokens += tokenize_keyword(text, options)
  end

  tokens
end

.tokenize_plaintext(text, options = {}) ⇒ Object



139
140
141
# File 'lib/me2text/token.rb', line 139

def tokenize_plaintext(text, options = {})
  [Plain.new(text)]
end

Instance Method Details

#length(options = {}) ⇒ Object



29
30
31
# File 'lib/me2text/token.rb', line 29

def length(options ={})
  1
end

#to_html(options = {}) ⇒ Object



23
24
# File 'lib/me2text/token.rb', line 23

def to_html(options ={})
end

#to_s(format, options = {}) ⇒ Object



16
17
18
19
20
21
# File 'lib/me2text/token.rb', line 16

def to_s(format, options = {})
  unless [:html, :text].include?(format)
    raise ArgumentError.new("Unknown format: #{format.inspect}") 
  end
  self.send('to_' + format.to_s, options)
end

#to_text(options = {}) ⇒ Object



26
27
# File 'lib/me2text/token.rb', line 26

def to_text(options ={})
end

#truncate(nos, format, options = {}) ⇒ Object



33
34
35
36
37
38
39
# File 'lib/me2text/token.rb', line 33

def truncate(nos, format, options = {})
  options = {
    :ellipsis => "…"
  }.merge(options)

  [options[:ellipsis], 1]
end