Class: SiSU_TeX_Pdf::SpecialCharacters

Inherits:
Object
  • Object
show all
Includes:
SiSU_Parts_TeXpdf
Defined in:
lib/sisu/texpdf_format.rb

Instance Method Summary collapse

Methods included from SiSU_Parts_TeXpdf

#set_fonts, #tex_close, #tex_open, #the_font, #the_line_break, #txt_close, #txt_open, #url_decoration

Methods included from SiSU_Parts_Generic

#footer_signature, #home, #home_txt, #i_choice, #i_home_button, #i_ico, #i_new, #rl_root, #root_http, #sisu, #sisu_txt, #sisudoc, #site, #the_icon, #the_text, #the_url, #txt_home, #txt_hp, #txt_hp_alias, #txt_signature, #urify, #url_close, #url_open

Constructor Details

#initialize(md, str, is = :default) ⇒ SpecialCharacters

Returns a new instance of SpecialCharacters.



1233
1234
1235
1236
# File 'lib/sisu/texpdf_format.rb', line 1233

def initialize(md,str,is=:default)
  @md,@txt,@is=md,str,is
  @tex2pdf=@@tex3pdf ||=SiSU_Env::SystemCall.new.tex2pdf_engine
end

Instance Method Details

#characters_code_listingsObject

special characters - some substitutions are sequence sensitive, rearrange with care



1471
1472
1473
1474
1475
# File 'lib/sisu/texpdf_format.rb', line 1471

def characters_code_listings                                             # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_code_listings(str,is) unless str.nil?
  @txt=str
end

#special_charactersObject

special characters - some substitutions are sequence sensitive, rearrange with care



1438
1439
1440
1441
1442
1443
1444
1445
# File 'lib/sisu/texpdf_format.rb', line 1438

def special_characters                                                   # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=special_characters_unsafe_1(str) unless str.nil? #xetex_special_characters_unsafe_1(@txt)
  str=xetex_special_characters_2(str,is) unless str.nil? #issues with xetex
  str=xetex_special_characters_3(str) unless str.nil?
  @txt=str
end

#special_characters_codeObject



1476
1477
1478
1479
1480
# File 'lib/sisu/texpdf_format.rb', line 1476

def special_characters_code
  str=@txt
  str=str.gsub(/ \\\\([ #{Mx[:br_nl]}]+|$)/,' \textbackslash\textbackslash\hardspace\1')
  str
end

#special_characters_code_fix(str) ⇒ Object



1429
1430
1431
1432
# File 'lib/sisu/texpdf_format.rb', line 1429

def special_characters_code_fix(str)
  str=str.gsub(/<=tilde>/,'{$\tilde$}')
  str
end

#special_characters_safeObject

special characters - some substitutions are sequence sensitive, rearrange with care



1457
1458
1459
1460
1461
1462
1463
# File 'lib/sisu/texpdf_format.rb', line 1457

def special_characters_safe                                              # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=xetex_special_characters_2(str,is) unless str.nil?                 # remove this to start with, causes issues
  str=special_characters_safe_close(str) unless str.nil?
  @txt=str
end

#special_characters_safe_close(str) ⇒ Object



1422
1423
1424
1425
1426
1427
1428
# File 'lib/sisu/texpdf_format.rb', line 1422

def special_characters_safe_close(str)
  str=str.gsub(/<=tilde>/,'{$\tilde$}').
    gsub(/<=hash>/,'{\#}').
    gsub(/<=amp>/,'{\\\&}'). #changed ... 2005
    gsub(/<=copymark>\s*(.+)/,
      '^\copyright \textnormal{\1} \2') # watch likely to be problematic
end

#special_characters_safe_no_urlsObject



1464
1465
1466
1467
1468
1469
1470
# File 'lib/sisu/texpdf_format.rb', line 1464

def special_characters_safe_no_urls
  str,is=@txt,:no_urls
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=xetex_special_characters_2(str,is) unless str.nil? # remove this to start with, causes issues
  str=special_characters_safe_close(str) unless str.nil?
  @txt=str
end

#special_characters_unsafe_1(str) ⇒ Object

depreciated, make obsolete



1433
1434
1435
1436
1437
# File 'lib/sisu/texpdf_format.rb', line 1433

def special_characters_unsafe_1(str) #depreciated, make obsolete
  # some substitutions are sequence sensitive, rearrange with care.
  str=str.gsub(/\\textbackslash (copyright|clearpage|newpage)/,"\\\\\\1")  #kludge bad solution, find out where tail is sent through specChar !
  str
end

#special_number_break_pointsObject



1452
1453
1454
1455
1456
# File 'lib/sisu/texpdf_format.rb', line 1452

def special_number_break_points
  str=@txt
  str=str.gsub(/([0-9a-f]{8})/i,'\1\-')
  @txt=str
end

#special_word_break_pointsObject



1446
1447
1448
1449
1450
1451
# File 'lib/sisu/texpdf_format.rb', line 1446

def special_word_break_points
  str=@txt
  str=str.gsub(/([_,.;:\/|=])/,'\1\-').
    gsub(/(--)(\S{4,})/,'\1\-\2')
  @txt=str
end

#xetex_code_listings(str, is = :default) ⇒ Object

~ ^ $ & % _ { } #LaTeX special characters - KEEP list



1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
# File 'lib/sisu/texpdf_format.rb', line 1237

def xetex_code_listings(str,is=:default)                                 # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list
  word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/
  para_array=[]
  str=if word
    word.each do |w| # _ - / # | : ! ^ ~
      w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>').
        gsub(/[\\]?~/,'~').
        gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,"\n").              #watch
        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~').                #126 usual
        gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|')                   #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX
      para_array << w
    end
    str=para_array.join(' ')
    str=str.strip unless is==:code
    str
  else ''
  end
  str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ').
    gsub(/.+?<-#>/,'').
    gsub(/#{Mx[:br_eof]}/,'').
    gsub(/#{Mx[:br_endnotes]}/,'').
  #problem sequence ->
    gsub(/&(?:lt|#060);/,'<').                                           # < SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'>').                     # > SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').                            # { SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').                            # } SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~').                    # ~ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').                            # SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').                            # ! SiSU not really special sisu character but done, also LaTeX
   #gsub(/(^|\s)\*\s/,'\1\asterisk ').                                   # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').                            # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').                            # - SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'&').                            #unless @txt=~/<:code>/  # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').                            # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\').                           # \ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').                            # _ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|').                            # | SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':').                            # : SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'^').                         # ^ SiSU not really special sisu character but done, also LaTeX
  ##watch placement, problem sequence ^
    gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ').
    gsub(/\\copy(right|mark)?/,'<=copymark>')                            # ok problem with superscript
end

#xetex_special_characters_1(str, is = :default) ⇒ Object

~ ^ $ & % _ { } #LaTeX special characters - KEEP list



1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
# File 'lib/sisu/texpdf_format.rb', line 1282

def xetex_special_characters_1(str,is=:default)                          # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list
  word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/
  para_array=[]
  str=if word
    word.each do |w| # _ - / # | : ! ^ ~
      if w !~/https?:/ \
      and w=~/\/\S+?\// \
      and w.length > 6
        w=w.gsub(/([_.\/])/,'\1\-')
      end
      w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>').
        gsub(/[\\]?~/,'<=tilde>').
        gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,' \newline ').      #watch
        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>').         #126 usual
        gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'\pipe')               #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX
      if w !~/#{Mx[:rel_o]}/ \
      and w !~/#{Mx[:gl_o]}#/
        w=w.gsub(/\#/,'<=hash>')
      end
      para_array << w
    end
    str=para_array.join(' ')
    str=str.strip unless is==:code
    str
  else ''
  end
  str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ').
    gsub(/.+?<-#>/,'').
    gsub(/#{Mx[:br_eof]}/,'').
    gsub(/#{Mx[:br_endnotes]}/,'')
  #problem sequence ->
  str=str.gsub(/&(?:nbsp);|#{Mx[:nbsp]}/,'\hardspace') unless is==:code  # < SiSU special character also LaTeX
  str=str.gsub(/&(?:lt|#060);/,'\lt').                                   # < SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'\gt').                   # > SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'\curlyopen').                   # { SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'\curlyclose').                  # } SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>').             # ~ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'\#').                           # # SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').                            # ! SiSU not really special sisu character but done, also LaTeX
    gsub(/(^|\s)\*\s/,'\1\asterisk ').                                   # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'\*').                           # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').                            # - SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'<=amp>'). #unless @txt=~/<:code>/  # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'\slash').                       # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\textbackslash').               # \ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'<=underscore>').                # _ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|').                            # | SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':').                            # : SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'\caret').                    # ^ SiSU not really special sisu character but done, also LaTeX
  ##watch placement, problem sequence ^
    gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ').
    gsub(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript
end

#xetex_special_characters_2(str, is = :default) ⇒ Object



1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
# File 'lib/sisu/texpdf_format.rb', line 1337

def xetex_special_characters_2(str,is=:default)
  str=str.gsub(/#{Mx[:gl_o]}#156#{Mx[:gl_c]}/,'\oe ').
    gsub(/\$/,'\$').
    gsub(/\#/,'\#').
    gsub(/\%/,'\%').
    gsub(/\~/,'\~') #revist, should not be necessary to mark remaining tildes
  if str !~/^\s*#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}image\s/
    str=str.gsub(/_/,'\_')
  end
  str=str.gsub(/\{/,'\{').
    gsub(/\}/,'\}')
  str=if is==:code
    str.gsub(/&/,'{\\\&}').
      gsub(/\\~(\\\{)/,'{$\tilde$}\1').
      gsub(/(\\\})\\~/,'\1{$\tilde$}').
      gsub(/\\~(\[)/,'{$\tilde$}\1').
      gsub(/(\])\\~/,'\1{$\tilde$}').
      gsub(/<=tilde>/,'{$\tilde$}').
      gsub(/<=hash>/,'{\#}')
  else
    str.gsub(/&nbsp;|#{Mx[:nbsp]}/,'~'). # ~ character for hardspace
      gsub(/&/,'<=amp>')
  end
  str=str.gsub(/&\S+?;/,' ').
    gsub(/§/u,'\S'). #latex: space between next character not preserved? #str.gsub(/§ /,'\S ')
    gsub(/£/u,'\pounds').
    gsub(/<a href=".+?">/,' ').
    gsub(/<\/a>/,' ')
  unless is==:no_urls
    str=str.gsub(/((?:^|\s)#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
        '\1\begin{scriptsize}\url{\2}\end{scriptsize}\3'). #special case \{ e.g. \}http://url
      gsub(/#{Mx[:url_o]}\\_(\S+?)#{Mx[:url_c]}/,
        '\begin{scriptsize}\url{\1}\end{scriptsize}'). #special case \{ e.g. \}http://url
      gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
        '\begin{scriptsize}\\url{\1}\end{scriptsize}') #specially escaped url no decoration
  end
  if is !=:code \
  and is !=:no_urls
    str=str.gsub(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"\s]+?)([;.,]?(?=\s|$))/,
      "\\1#{url_decoration.tex_open}\\begin{scriptsize}\\url{\\2}\\end{scriptsize}#{url_decoration.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
  end
  str=str.gsub(/<:ee>/,'').
    gsub(/<!>/,' ').  #proposed change, insert, but may be redundant
    gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{Tex[:backslash]*2} "). # Work Area
    gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\begin{bfseries}\1 \end{bfseries}').
    gsub(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}').
    gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\emph{\1}').
    gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\uline{\1}'). # ulem
    gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,"``\\1''"). # quote #CHECK
    gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\uline{\1}'). # ulem
    gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\sout{\1}'). # ulem
    gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,"\$^{\\textrm{\\1}}\$").
    gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,"\$_{\\textrm{\\1}}\$").
    gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\begin{monosp}\1\end{monosp}')
  unless is==:code
    str=str.gsub(/"(.+?)"/,'“\1”').  # quote marks / quotations open & close " need condition exclude for code
      gsub(/\s+"/,'').                                # open "
      gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*"/,'\1“'). #fix Mx[:lv_o] # open "
      gsub(/"(\s|\.|,|:|;)/,'”\1').                     # close "
      gsub(/"(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*$/,'”\1'). #fix Mx[:lv_o] # close "
      gsub(/"(\.|,)/,'').                              # close "
      gsub(/\s+'/,' `').                                # open '
      gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*'/,'\1`') #fix Mx[:lv_o] # open '
  end
  str=str.gsub(/(<font.*?>|<\/font>)/,'').
    gsub(/\s*#{Mx[:fa_superscript_o]}(\S+?)#{Mx[:fa_superscript_c]}/,'^\1')
  str
end

#xetex_special_characters_3(str) ⇒ Object



1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
# File 'lib/sisu/texpdf_format.rb', line 1405

def xetex_special_characters_3(str)
  str=str.gsub(/<br(\s*[^\/][^>])/,'\1'). # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
    gsub(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
  while str =~/(https?:\/\/\S+?)(?:<=tilde>\S+)+/ #tilde in urls \href treated differently from text #FIX
    str=str.gsub(/(https?:\/\/\S+?)(?:<=tilde>(\S+))+/,'\1~\2')
  end
  str=str.gsub(/<=tilde>/,'{$\tilde$}').
    gsub(/(https?:\/\/\S+?)(?:(?:<=hash>)(\S+))+/,'\1#\2'). #hash in urls \href treated differently from text #FIX
    gsub(/<=hash>/,'{\#}')
  while str =~/(https?:\/\/\S+?)(?:<=amp>\S+)+/ #amp in urls \href treated differently from text #FIX
    str=str.gsub(/(https?:\/\/\S+?)(?:<=amp>(\S+))+/,'\1&\2')
  end
  str=str.gsub(/<=amp>/,'{\\\&}'). #changed ... 2005
    gsub(/<=copymark>\s*(.+)/,
      '^\copyright \textnormal{\1} \2') # watch likely to be problematic
  str
end