Class: Me2Text::Token
- Inherits:
-
Object
- Object
- Me2Text::Token
- Defined in:
- lib/me2text/token.rb
Constant Summary collapse
- REGEX_ME2LINK =
begin dquota_s = '\\xe2\\x80\\x9c|\\xe2\\x80\\x9f|\\xe2\\x9d\\x9d|\\xe2\\x80\\xb6|\\xe2\\x80\\x9d|\\x22|\\xef\\xbc\\x82' dquota_e = '\\xe2\\x80\\x9d|\\xe2\\x80\\x9e|\\xe2\\x9d\\x9e|\\xcb\\x9d|\\xe2\\x80\\xb3|\\xe2\\x80\\x9e|\\xe2\\x80\\x9f|\\x22|\\xef\\xbc\\x82' dquota_ne = '\\xe2\\x80\\x9d\\xe2\\x80\\x9e\\xe2\\x9d\\x9e\\xcb\\x9d\\xe2\\x80\\xb3\\xe2\\x80\\x9e\\xe2\\x80\\x9f\\x22\\xef\\xbc\\x82' /(?:#{dquota_s})([^#{dquota_ne}]*)(?:#{dquota_e}):(http[s]?:\/\/[^\s]*)(\s|$)/u end
- REGEX_URL =
/(http[s]?:\/\/[^\s|^\'|^\"]*)([\'|\"|\s]|$)/u
- ESCAPE_CHAR =
"\xc2\xa0"
Instance Attribute Summary collapse
-
#text ⇒ Object
Returns the value of attribute text.
Class Method Summary collapse
- .join_tokens(tokens, format, options) ⇒ Object
- .tokenize(text, options = {}) ⇒ Object
- .tokenize_keyword(text, options = {}) ⇒ Object
- .tokenize_me2link(text, options = {}) ⇒ Object
- .tokenize_plaintext(text, options = {}) ⇒ Object
Instance Method Summary collapse
- #length(options = {}) ⇒ Object
- #to_html(options = {}) ⇒ Object
- #to_s(format, options = {}) ⇒ Object
- #to_text(options = {}) ⇒ Object
- #truncate(nos, format, options = {}) ⇒ Object
Instance Attribute Details
#text ⇒ Object
Returns the value of attribute text.
14 15 16 |
# File 'lib/me2text/token.rb', line 14 def text @text end |
Class Method Details
.join_tokens(tokens, format, options) ⇒ Object
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/me2text/token.rb', line 143 def join_tokens(tokens, format, ) if [:limit].nil? return tokens.map { |token| token.to_s(format, ) }.join end result = "" process_length = 0 tokens.each_with_index do |tk, idx| n_len = process_length + tk.length r_len = [:limit] - n_len # 길이가 남았다. if r_len > 0 process_length = process_length + tk.length result << tk.to_s(format, ) # 길이가 딱 맞다. elsif r_len == 0 # 이번 토큰이 마지막 토큰(더이상 추가할 게 없음) 인가? if (idx + 1) == tokens.size process_length = process_length + tk.length result << tk.to_s(format, ) else # 남은 문자열이 있다. val = tk.truncate(0, format, ) process_length = process_length + val[1] result << val.first end break # 길이가 r_len만큼 모자르다 else val = tk.truncate(-r_len, format, ) process_length = process_length + val[1] result << val.first break end end result end |
.tokenize(text, options = {}) ⇒ Object
95 96 97 |
# File 'lib/me2text/token.rb', line 95 def tokenize(text, = {}) tokenize_me2link(text.gsub(/\\\"/u, ESCAPE_CHAR), ) end |
.tokenize_keyword(text, options = {}) ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/me2text/token.rb', line 125 def tokenize_keyword(text, = {}) tokens = [] while (m = text.match(Keyword::KEYWORD_REGEX)) tokens += tokenize_plaintext(m.pre_match, ) if !m.pre_match.empty? tokens << Keyword.new(m.to_s, ) text = m.post_match end if !text.empty? tokens += tokenize_plaintext(text, ) end tokens end |
.tokenize_me2link(text, options = {}) ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/me2text/token.rb', line 99 def tokenize_me2link(text, = {}) tokens = [] while (m = text.match(REGEX_ME2LINK)) # 매치 이전 텍스트 처리 tokens += tokenize_keyword(m.pre_match, ) if !m.pre_match.empty? url = m[2] anchor_text = m[1] if url if ![:link_handler].nil? url = [:link_handler].call(url, ) end tokens << Link.new(anchor_text, url, m[3].length > 0) else tokens += tokenize_keyword(anchor_text, ) end text = m.post_match end if !text.empty? tokens += tokenize_keyword(text, ) end tokens end |
.tokenize_plaintext(text, options = {}) ⇒ Object
139 140 141 |
# File 'lib/me2text/token.rb', line 139 def tokenize_plaintext(text, = {}) [Plain.new(text)] end |
Instance Method Details
#length(options = {}) ⇒ Object
29 30 31 |
# File 'lib/me2text/token.rb', line 29 def length( ={}) 1 end |
#to_html(options = {}) ⇒ Object
23 24 |
# File 'lib/me2text/token.rb', line 23 def to_html( ={}) end |
#to_s(format, options = {}) ⇒ Object
16 17 18 19 20 21 |
# File 'lib/me2text/token.rb', line 16 def to_s(format, = {}) unless [:html, :text].include?(format) raise ArgumentError.new("Unknown format: #{format.inspect}") end self.send('to_' + format.to_s, ) end |
#to_text(options = {}) ⇒ Object
26 27 |
# File 'lib/me2text/token.rb', line 26 def to_text( ={}) end |
#truncate(nos, format, options = {}) ⇒ Object
33 34 35 36 37 38 39 |
# File 'lib/me2text/token.rb', line 33 def truncate(nos, format, = {}) = { :ellipsis => "…" }.merge() [[:ellipsis], 1] end |