Module: TypoHero

Extended by:
TypoHero
Included in:
TypoHero
Defined in:
lib/typohero.rb,
lib/typohero/latex.rb,
lib/typohero/version.rb

Constant Summary collapse

EXCLUDED_TAGS =
%w(head pre code kbd math script style textarea)
EXCLUDED_TAGS_RE =
/\A<(\/)?(?:#{EXCLUDED_TAGS.join('|')})[\p{Space}\/>]/im
TOKENIZER_RE =
%r{
  <!--(?:(?:(?!-->).)*)-->|            # comment
  <!\[CDATA\[(?:(?:(?!\]\]>).)*)\]\]>| # cdata
  <[^>]+>|                             # opening or closing tag
  \\[\(\)\[\]]|                        # latex begin/end
  \$\$|                                # dollar latex begin/end
  (?:(?:(?!\$\$|\\[\(\)\[\]])[^<])+)   # text without double dollar or latex
}xm
ESCAPE =
{
  '\\\\'  => '&#92;',
  '\"'    => '&#34;',
  "\\'"   => '&#39;',
  '\.'    => '&#46;',
  '\,'    => '&#44;',
  '\-'    => '&#45;',
  '\`'    => '&#96;',
}
UNESCAPE =
Hash[ESCAPE.map {|k,v| [v,k[1..-1]]
ESCAPE_RE =
Regexp.union(*ESCAPE.keys)
UNESCAPE_RE =
Regexp.union(*UNESCAPE.keys)
NBSP =
"\u00a0"
NBSP_THIN =
"\u202F"
MDASH =
"\u2014"
NDASH =
"\u2013"
LDQUO =
"\u201C"
RDQUO =
"\u201D"
LSQUO =
"\u2018"
RSQUO =
"\u2019"
BDQUO =
"\u201E"
ELLIPSIS =
"\u2026"
SPECIAL =
{
  # enhance!
  ' - '      => " #{NDASH} ",
  '---'      => MDASH,
  '--'       => NDASH,
  '...'      => ELLIPSIS,
  '. . .'    => ELLIPSIS,
  '``'       => LDQUO,
  "''"       => RDQUO,
  '`'        => LSQUO,
  #'\''        => RSQUO, # needs more complex treatment
  ',,'       => BDQUO,
  '(c)'      => "\u00A9",
  '(C)'      => "\u00A9",
  '(r)'      => "\u00AE",
  '(R)'      => "\u00AE",
  '(tm)'     => "\u2122",
  '(TM)'     => "\u2122",
  # normalize for further processing
  '&ldquo;'  => LDQUO,
  '&rdquo;'  => RDQUO,
  '&lsquo;'  => LSQUO,
  '&rsquo;'  => RSQUO,
  '&nbsp;'   => NBSP,
  '&ndash;'  => NDASH,
  '&mdash;'  => MDASH
}
SPECIAL_RE =
Regexp.union(*SPECIAL.keys)
LATEX_RE =
/(#{Regexp.union *LATEX.keys})(?=\p{Space}|$)/m
DASH_RE =
"[#{MDASH}#{NDASH}]"
AMP_RE =
'&(?:amp;)?'
LEFT_QUOTE_RE =
"[#{LDQUO}#{LSQUO}#{BDQUO}]"
PRIME_RE =
/(?<=\d)(''?)(?=[\p{Space}\dNEWS]|$)/m
PRIMES =
{
 "'"   => "\u2032",
 "''"  => "\u2033",
 "'''" => "\u2034",
}
ORDINAL_RE =
/(?<=\d)(st|nd|rd|th)(?=\p{Space}|$)/
MDASH_SPACE_RE =
/\p{Space}*#{MDASH}\p{Space}*/
NDASH_SPACE_RE =
/\p{Space}*#{NDASH}\p{Space}*/
MDASH_SPACE =
"#{NBSP_THIN}#{MDASH}#{NBSP_THIN}"
NDASH_SPACE =
"#{NBSP}#{NDASH}#{NBSP}"
REPLACE_AMP_RE =
/(?<=\p{Space})#{AMP_RE}(?=\p{Space})/
CAPS_BEGIN_RE =
"(^|\\p{Space}|#{LEFT_QUOTE_RE})"
CAPS_INNER_RE =

right quote for posession (e.g. JIMMY’S)

"(?:#{AMP_RE}|[A-Z\\d\\.]|#{RSQUO})*"
CAPS_RE =
/#{CAPS_BEGIN_RE}([A-Z\d]#{CAPS_INNER_RE}[A-Z]#{CAPS_INNER_RE}|[A-Z]#{CAPS_INNER_RE}[A-Z\d]#{CAPS_INNER_RE})/m
RIGHT_QUOTE_RE =
%r{
  ^['"](?=\p{Punct})\B|                       # Very first character is a closing quote followed by punctuation at a non-word-break
  (?<!^|#{DASH_RE}|\p{Space}|[\[\{\(\-])['"]| # Not after dash, space or opening parentheses
  ['"](?=\p{Space}|$)|                        # Followed by space or end of line
  's\b|                                       # Apostrophe
  (?<=#{DASH_RE})['"](?=\p{Punct})|           # Dash quote punctuation (e.g. --'!), for quotations
  '(?=(\d\d(?:s|\p{Space}|$)))                # Decade abbreviations (the '80s)
}xm
LEFT_QUOTES =
{
  "'" => LSQUO,
  '"' => LDQUO,
}
RIGHT_QUOTES =
{
  "'" => RSQUO,
  '"' => RDQUO,
}
TWO_QUOTES =
{
  '"\'' => LDQUO + LSQUO,
  '\'"' => LSQUO + LDQUO
}
PARAGRAPH_RE =
'h[1-6]|p|li|dt|dd|div'
INLINE_RE =
'a|em|span|strong|i|b'
WIDONT_PARAGRAPH_RE =
/\A<\/(?:#{PARAGRAPH_RE})>\Z/im
WIDONT_INLINE_RE =
/\A<\/?(?:#{INLINE_RE})[^>]*>\Z/im
WIDONT_NBSP_RE =
/[#{NBSP}#{NBSP_THIN}<>]/
INITIAL_QUOTE_RE =
/(?=(?:<(?:#{PARAGRAPH_RE})[^>]*>|^)(?:<(?:#{INLINE_RE})[^>]*>|\p{Space})*)#{LEFT_QUOTE_RE}/m
INITIAL_QUOTES =
{
  LSQUO => "<span class=\"quo\">#{LSQUO}</span>",
  LDQUO => "<span class=\"dquo\">#{LDQUO}</span>",
  BDQUO => "<span class=\"bdquo\">#{BDQUO}</span>",
}
LATEX =
{
  '\AA' => "\u00c5",
  '\AE' => "\u00c6",
  '\Alpha' => "\u0391",
  '\Beta' => "\u0392",
  '\Box' => "\u25a1",
  '\Bumpeq' => "\u224e",
  '\Cap' => "\u22d2",
  '\Chi' => "\u03a7",
  '\Cup' => "\u22d3",
  '\DH' => "\u00d0",
  '\DJ' => "\u0110",
  '\Delta' => "\u0394",
  '\ESH' => "\u01a9",
  '\Epsilon' => "\u0395",
  '\Eta' => "\u0397",
  '\Gamma' => "\u0393",
  '\IJ' => "\u0132",
  '\Iota' => "\u0399",
  '\Kappa' => "\u039a",
  '\L' => "\u0141",
  '\Lambda' => "\u039b",
  '\Leftrightarrow' => "\u21d4",
  '\Mu' => "\u039c",
  '\NG' => "\u014a",
  '\Nu' => "\u039d",
  '\O' => "\u00d8",
  '\OE' => "\u0152",
  '\OHORN' => "\u01a0",
  '\Omega' => "\u03a9",
  '\Omicron' => "\u039f",
  '\P' => "\u00b6",
  '\Phi' => "\u03a6",
  '\Pi' => "\u03a0",
  '\Psi' => "\u03a8",
  '\Rho' => "\u03a1",
  '\Rightarrow' => "\u21d2",
  '\S' => "\u00a7",
  '\Sigma' => "\u03a3",
  '\Subset' => "\u22d0",
  '\Supset' => "\u22d1",
  '\TH' => "\u00de",
  '\Tau' => "\u03a4",
  '\Theta' => "\u0398",
  '\Thorn' => "\u00de",
  '\UHORN' => "\u01af",
  '\Upsilon' => "\u03a5",
  '\Vdash' => "\u22a9",
  '\Vvdash' => "\u22aa",
  '\Xi' => "\u039e",
  '\Zeta' => "\u0396",
  '\aa' => "\u00e5",
  '\ae' => "\u00e6",
  '\ain' => "\u02bf",
  '\alpha' => "\u03b1",
  '\angle' => "\u2220",
  '\approx' => "\u2248",
  '\approxeq' => "\u224a",
  '\ast' => "\u2217",
  '\asymp' => "\u224d",
  '\ayn' => "\u02bf",
  '\backsim' => "\u223d",
  '\backsimeq' => "\u22cd",
  '\barwedge' => "\u22bc",
  '\because' => "\u2235",
  '\beta' => "\u03b2",
  '\between' => "\u226c",
  '\bigcap' => "\u22c2",
  '\bigcup' => "\u22c3",
  '\bigvee' => "\u22c1",
  '\bigwedge' => "\u22c0",
  '\bot' => "\u22a5",
  '\bowtie' => "\u22c8",
  '\boxdot' => "\u22a1",
  '\boxminus' => "\u229f",
  '\boxplus' => "\u229e",
  '\boxtimes' => "\u22a0",
  '\bullet' => "\u2219",
  '\bumpeq' => "\u224f",
  '\cap' => "\u2229",
  '\cdot' => "\u22c5",
  '\cdots' => "\u22ef",
  '\chi' => "\u03c7",
  '\circ' => "\u2218",
  '\circeq' => "\u2257",
  '\circledast' => "\u229b",
  '\circledcirc' => "\u229a",
  '\circleddash' => "\u229d",
  '\clubsuit' => "\u2663",
  '\complement' => "\u2201",
  '\cong' => "\u2245",
  '\coprod' => "\u2210",
  '\copyright' => "\u00a9",
  '\cup' => "\u222a",
  '\curlyeqprec' => "\u22de",
  '\curlyeqsucc' => "\u22df",
  '\curlyvee' => "\u22ce",
  '\curlywedge' => "\u22cf",
  '\dag' => "\u2020",
  '\dashv' => "\u22a3",
  '\ddag' => "\u2021",
  '\ddots' => "\u22f1",
  '\delta' => "\u03b4",
  '\dh' => "\u00f0",
  '\diamond' => "\u22c4",
  '\diamondsuit' => "\u2662",
  '\div' => "\u00f7",
  '\divideontimes' => "\u22c7",
  '\dj' => "\u0111",
  '\doteq' => "\u2250",
  '\doteqdot' => "\u2251",
  '\dotplus' => "\u2214",
  '\dots' => "\u2026",
  '\downarrow' => "\u2193",
  '\eqcirc' => "\u2256",
  '\equiv' => "\u2261",
  '\eta' => "\u03b7",
  '\euro' => "\u20ac",
  '\exists' => "\u2203",
  '\fallingdotseq' => "\u2252",
  '\flat' => "\u266d",
  '\forall' => "\u2200",
  '\gamma' => "\u03b3",
  '\geq' => "\u2265",
  '\geqq' => "\u2267",
  '\gg' => "\u226b",
  '\ggg' => "\u22d9",
  '\gneqq' => "\u2269",
  '\gnsim' => "\u22e7",
  '\gtrdot' => "\u22d7",
  '\gtreqless' => "\u22db",
  '\gtrless' => "\u2277",
  '\gtrsim' => "\u2273",
  '\guillemotleft' => "\u00ab",
  '\guillemotright' => "\u00bb",
  '\guilsinglleft' => "\u2039",
  '\guilsinglright' => "\u203a",
  '\hamza' => "\u02be",
  '\heartsuit' => "\u2661",
  '\hv' => "\u0195",
  '\i' => "\u0131",
  '\iiint' => "\u222d",
  '\iint' => "\u222c",
  '\ij' => "\u0133",
  '\in' => "\u2208",
  '\infty' => "\u221e",
  '\int' => "\u222b",
  '\intercal' => "\u22ba",
  '\iota' => "\u03b9",
  '\kappa' => "\u03ba",
  '\l' => "\u0142",
  '\lambda' => "\u03bb",
  '\langle' => "\u27e8",
  '\lceil' => "\u2308",
  '\leadsto' => "\u219d",
  '\leftarrow' => "\u2190",
  '\leftrightarrow' => "\u2194",
  '\leftthreetimes' => "\u22cb",
  '\leq' => "\u2264",
  '\leqq' => "\u2266",
  '\lessdot' => "\u22d6",
  '\lesseqgtr' => "\u22da",
  '\lessgtr' => "\u2276",
  '\lesssim' => "\u2272",
  '\lfloor' => "\u230a",
  '\lhd' => "\u22b2",
  '\ll' => "\u226a",
  '\lll' => "\u22d8",
  '\lneqq' => "\u2268",
  '\lnot' => "\u00ac",
  '\lnsim' => "\u22e6",
  '\ltimes' => "\u22c9",
  '\measuredangle' => "\u2221",
  '\mid' => "\u2223",
  '\mp' => "\u2213",
  '\mu' => "\u03bc",
  '\multimap' => "\u22b8",
  '\nVdash' => "\u22ae",
  '\nabla' => "\u2207",
  '\natural' => "\u266e",
  '\ncong' => "\u2247",
  '\neq' => "\u2260",
  '\nexists' => "\u2204",
  '\ng' => "\u014b",
  '\ngeq' => "\u2271",
  '\ngtr' => "\u226f",
  '\ni' => "\u220b",
  '\nleq' => "\u2270",
  '\nless' => "\u226e",
  '\nmid' => "\u2224",
  '\nobreakspace' => "\u00a0",
  '\notin' => "\u2209",
  '\nparallel' => "\u2226",
  '\nprec' => "\u2280",
  '\nsim' => "\u2241",
  '\nsubseteq' => "\u2288",
  '\nsucc' => "\u2281",
  '\nsupseteq' => "\u2289",
  '\ntriangleleft' => "\u22ea",
  '\ntrianglelefteq' => "\u22ec",
  '\ntriangleright' => "\u22eb",
  '\ntrianglerighteq' => "\u22ed",
  '\nu' => "\u03bd",
  '\o' => "\u00f8",
  '\odot' => "\u2299",
  '\oe' => "\u0153",
  '\ohorn' => "\u01a1",
  '\oint' => "\u222e",
  '\omega' => "\u03c9",
  '\omicron' => "\u03bf",
  '\ominus' => "\u2296",
  '\oplus' => "\u2295",
  '\oslash' => "\u2298",
  '\otimes' => "\u2297",
  '\parallel' => "\u2225",
  '\partial' => "\u2202",
  '\pi' => "\u03c0",
  '\pitchfork' => "\u22d4",
  '\pm' => "\u00b1",
  '\pounds' => "\u00a3",
  '\prec' => "\u227a",
  '\preccurlyeq' => "\u227c",
  '\precnsim' => "\u22e8",
  '\precsim' => "\u227e",
  '\prod' => "\u220f",
  '\propto' => "\u221d",
  '\psi' => "\u03c8",
  '\quotedblbase' => "\u201e",
  '\quotesinglbase' => "\u201a",
  '\rangle' => "\u27e9",
  '\rceil' => "\u2309",
  '\rfloor' => "\u230b",
  '\rhd' => "\u22b3",
  '\rightarrow' => "\u2192",
  '\rightleftharpoons' => "\u21cc",
  '\rightthreetimes' => "\u22cc",
  '\risingdotseq' => "\u2253",
  '\rtimes' => "\u22ca",
  '\set' => "\u2205",
  '\setminus' => "\u2216",
  '\sharp' => "\u266f",
  '\sigma' => "\u03c3",
  '\sim' => "\u223c",
  '\simeq' => "\u2243",
  '\spadesuit' => "\u2660",
  '\sphericalangle' => "\u2222",
  '\sqcap' => "\u2293",
  '\sqcup' => "\u2294",
  '\sqsubset' => "\u228f",
  '\sqsubseteq' => "\u2291",
  '\sqsupset' => "\u2290",
  '\sqsupseteq' => "\u2292",
  '\ss' => "\u00df",
  '\star' => "\u22c6",
  '\subset' => "\u2282",
  '\subseteq' => "\u2286",
  '\subsetneq' => "\u228a",
  '\succ' => "\u227b",
  '\succcurlyeq' => "\u227d",
  '\succnsim' => "\u22e9",
  '\succsim' => "\u227f",
  '\sum' => "\u2211",
  '\supset' => "\u2283",
  '\supseteq' => "\u2287",
  '\supsetneq' => "\u228b",
  '\surd' => "\u221a",
  '\tau' => "\u03c4",
  '\textBhook' => "\u0181",
  '\textChook' => "\u0187",
  '\textDafrican' => "\u0189",
  '\textDhook' => "\u018a",
  '\textEopen' => "\u0190",
  '\textEreversed' => "\u018e",
  '\textEsh' => "\u01a9",
  '\textEzh' => "\u01b7",
  '\textFhook' => "\u0191",
  '\textGammaafrican' => "\u0194",
  '\textHbar' => "\u0126",
  '\textIotaafrican' => "\u0196",
  '\textKhook' => "\u0198",
  '\textNhookleft' => "\u019d",
  '\textOopen' => "\u0186",
  '\textPhook' => "\u01a4",
  '\textTbar' => "\u0166",
  '\textThook' => "\u01ac",
  '\textTretroflexhook' => "\u01ae",
  '\textTstroke' => "\u0166",
  '\textVhook' => "\u01b2",
  '\textYhook' => "\u01b3",
  '\textampersand' => "\u0026",
  '\textasciiacute' => "\u00b4",
  '\textasciicedilla' => "\u00b8",
  '\textasciicircum' => "\u005e",
  '\textasciidieresis' => "\u00a8",
  '\textasciigrave' => "\u0060",
  '\textasciimacron' => "\u00af",
  '\textasciitilde' => "\u007e",
  '\textasteriskcentered' => "\u002a",
  '\textbackslash' => "\u005c",
  '\textbar' => "\u007c",
  '\textbardotlessj' => "\u025f",
  '\textbarglotstop' => "\u02a1",
  '\textbari' => "\u0268",
  '\textbarl' => "\u0142",
  '\textbaro' => "\u0275",
  '\textbarrevglotstop' => "\u02a2",
  '\textbaru' => "\u0289",
  '\textbeltl' => "\u026c",
  '\textbhook' => "\u0253",
  '\textbraceleft' => "\u007b",
  '\textbraceright' => "\u007d",
  '\textbrokenbar' => "\u00a6",
  '\textbullet' => "\u2022",
  '\textbullseye' => "\u0298",
  '\textcent' => "\u00a2",
  '\textcentereddot' => "\u00b7",
  '\textchook' => "\u0188",
  '\textcloseepsilon' => "\u029a",
  '\textcloseomega' => "\u0277",
  '\textcloserevepsilon' => "\u025e",
  '\textcolonmonetary' => "\u20a1",
  '\textcopyright' => "\u00a9",
  '\textcrb' => "\u0180",
  '\textcrd' => "\u0111",
  '\textcrh' => "\u0127",
  '\textcrlambda' => "\u019b",
  '\textctc' => "\u0255",
  '\textctesh' => "\u0286",
  '\textctj' => "\u029d",
  '\textctyogh' => "\u0293",
  '\textctz' => "\u0291",
  '\textcurrency' => "\u00a4",
  '\textdctzlig' => "\u02a5",
  '\textdegree' => "\u00b0",
  '\textdhook' => "\u0257",
  '\textdiv' => "\u00f7",
  '\textdollar' => "\u0024",
  '\textdong' => "\u20ab",
  '\textdtail' => "\u0256",
  '\textdyoghlig' => "\u02a4",
  '\textdzlig' => "\u02a3",
  '\textemdash' => "\u2014",
  '\textendash' => "\u2013",
  '\texteopen' => "\u025b",
  '\textepsilon' => "\u025b",
  '\textequals' => "\u003d",
  '\textesh' => "\u0283",
  '\texteturned' => "\u01dd",
  '\texteuro' => "\u20ac",
  '\textexclamdown' => "\u00a1",
  '\textezh' => "\u0292",
  '\textfishhookr' => "\u027e",
  '\textflorin' => "\u0192",
  '\textg' => "\u0067",
  '\textgamma' => "\u0263",
  '\textgammalatinsmall' => "\u0263",
  '\textglotstop' => "\u0294",
  '\textgreater' => "\u003e",
  '\texthash' => "\u0023",
  '\texthbar' => "\u0127",
  '\texthtb' => "\u0253",
  '\texthtbardotlessj' => "\u0284",
  '\texthtc' => "\u0188",
  '\texthtd' => "\u0257",
  '\texthtg' => "\u0260",
  '\texthth' => "\u0266",
  '\texththeng' => "\u0267",
  '\texthtk' => "\u0199",
  '\texthtp' => "\u01a5",
  '\texthtq' => "\u02a0",
  '\texthtscg' => "\u029b",
  '\texthtt' => "\u01ad",
  '\texthvlig' => "\u0195",
  '\textinterrobang' => "\u203d",
  '\textinvglotstop' => "\u0296",
  '\textinvscr' => "\u0281",
  '\textiota' => "\u0269",
  '\textiotalatin' => "\u0269",
  '\textkhook' => "\u0199",
  '\textkra' => "\u0138",
  '\textlengthmark' => "\u02d0",
  '\textless' => "\u003c",
  '\textlhti' => "\u027f",
  '\textlira' => "\u20a4",
  '\textlogicalnot' => "\u00ac",
  '\textlonglegr' => "\u027c",
  '\textlooptoprevesh' => "\u01aa",
  '\textltailm' => "\u0271",
  '\textltailn' => "\u0272",
  '\textltilde' => "\u026b",
  '\textlyoghlig' => "\u026e",
  '\textminus' => "\u2212",
  '\textmu' => "\u00b5",
  '\textnaira' => "\u20a6",
  '\textnhookleft' => "\u0272",
  '\textnumero' => "\u2116",
  '\textonehalf' => "\u00bd",
  '\textonequarter' => "\u00bc",
  '\textonesuperior' => "\u00b9",
  '\textoopen' => "\u0254",
  '\textopeno' => "\u0254",
  '\textordfeminine' => "\u00aa",
  '\textordmasculine' => "\u00ba",
  '\textoverline' => "\u203e",
  '\textpalhookbelow' => "\u01ab",
  '\textparagraph' => "\u00b6",
  '\textpercent' => "\u0025",
  '\textperiodcentered' => "\u00b7",
  '\textpertenthousand' => "\u2031",
  '\textperthousand' => "\u2030",
  '\textphi' => "\u0278",
  '\textphook' => "\u01a5",
  '\textpm' => "\u00b1",
  '\textprimstress' => "\u02c8",
  '\textquestiondown' => "\u00bf",
  '\textquotedbl' => "\u0022",
  '\textquotedblleft' => "\u201c",
  '\textquotedblright' => "\u201d",
  '\textquoteleft' => "\u2018",
  '\textquoteright' => "\u2019",
  '\textquotesingle' => "\u0027",
  '\textraisevibyi' => "\u0285",
  '\textramshorns' => "\u0264",
  '\textreferencemark' => "\u203b",
  '\textregistered' => "\u00ae",
  '\textreve' => "\u0258",
  '\textrevepsilon' => "\u025c",
  '\textrevglotstop' => "\u0295",
  '\textrhookrevepsilon' => "\u025d",
  '\textrhookschwa' => "\u025a",
  '\textrtaild' => "\u0256",
  '\textrtaill' => "\u026d",
  '\textrtailn' => "\u0273",
  '\textrtailr' => "\u027d",
  '\textrtails' => "\u0282",
  '\textrtailt' => "\u0288",
  '\textrtailz' => "\u0290",
  '\textscb' => "\u0299",
  '\textscg' => "\u0262",
  '\textsch' => "\u029c",
  '\textschwa' => "\u0259",
  '\textsci' => "\u026a",
  '\textscl' => "\u029f",
  '\textscn' => "\u0274",
  '\textscoelig' => "\u0276",
  '\textscr' => "\u0280",
  '\textscripta' => "\u0251",
  '\textscriptg' => "\u0261",
  '\textscriptv' => "\u028b",
  '\textscy' => "\u028f",
  '\textsection' => "\u00a7",
  '\textsterling' => "\u00a3",
  '\textstretchc' => "\u0297",
  '\texttbar' => "\u0167",
  '\texttctclig' => "\u02a8",
  '\texttesh' => "\u02a7",
  '\textteshlig' => "\u02a7",
  '\textthook' => "\u01ad",
  '\textthorn' => "\u00fe",
  '\textthornvari' => "\u00fe",
  '\textthornvarii' => "\u00fe",
  '\textthornvariii' => "\u00fe",
  '\textthornvariv' => "\u00fe",
  '\textthreequarters' => "\u00be",
  '\textthreesuperior' => "\u00b3",
  '\texttimes' => "\u00d7",
  '\texttrademark' => "\u2122",
  '\texttretroflexhook' => "\u0288",
  '\texttslig' => "\u02a6",
  '\texttstroke' => "\u0167",
  '\textturna' => "\u0250",
  '\textturnh' => "\u0265",
  '\textturnk' => "\u029e",
  '\textturnlonglegr' => "\u027a",
  '\textturnm' => "\u026f",
  '\textturnmrleg' => "\u0270",
  '\textturnr' => "\u0279",
  '\textturnrrtail' => "\u027b",
  '\textturnscripta' => "\u0252",
  '\textturnt' => "\u0287",
  '\textturnv' => "\u028c",
  '\textturnw' => "\u028d",
  '\textturny' => "\u028e",
  '\texttwosuperior' => "\u00b2",
  '\textunderscore' => "\u005f",
  '\textupsilon' => "\u028a",
  '\textvhook' => "\u028b",
  '\textwon' => "\u20a9",
  '\textyen' => "\u00a5",
  '\textyhook' => "\u01b4",
  '\textyogh' => "\u0292",
  '\th' => "\u00fe",
  '\therefore' => "\u2234",
  '\times' => "\u00d7",
  '\tone1' => "\u02e9",
  '\tone2' => "\u02e8",
  '\tone3' => "\u02e7",
  '\tone4' => "\u02e6",
  '\tone5' => "\u02e5",
  '\top' => "\u22a4",
  '\triangleq' => "\u225c",
  '\uhorn' => "\u01b0",
  '\unlhd' => "\u22b4",
  '\unrhd' => "\u22b5",
  '\uparrow' => "\u2191",
  '\updownarrow' => "\u2195",
  '\uplus' => "\u228e",
  '\upsilon' => "\u03c5",
  '\epsilon' => "\u03b5",
  '\varepsilon' => "\u03b5",
  '\phi' => "\u03c6",
  '\varphi' => "\u03c6",
  '\rho' => "\u03c1",
  '\varrho' => "\u03c1",
  '\sigma' => "\u03c2",
  '\varsigma' => "\u03c2",
  '\theta' => "\u03b8",
  '\vartheta' => "\u03b8",
  '\vdash' => "\u22a2",
  '\vdots' => "\u22ee",
  '\vee' => "\u2228",
  '\veebar' => "\u22bb",
  '\wedge' => "\u2227",
  '\wr' => "\u2240",
  '\xi' => "\u03be",
  '\zeta' => "\u03b6",
  '\not\approx' => "\u2249",
  '\not\asymp' => "\u226d",
  '\not\equiv' => "\u2262",
  '\not\gtrsim' => "\u2275",
  '\not\lesssim' => "\u2274",
  '\not\ni' => "\u220c",
  '\not\preccurlyeq' => "\u22e0",
  '\not\simeq' => "\u2244",
  '\not\sqsubseteq' => "\u22e2",
  '\not\sqsupseteq' => "\u22e3",
  '\not\subset' => "\u2284",
  '\not\succcurlyeq' => "\u22e1",
  '\not\supset' => "\u2285",
}
VERSION =
'0.0.3'

Instance Method Summary collapse

Instance Method Details

#amp(s) ⇒ Object



341
342
343
# File 'lib/typohero.rb', line 341

def amp(s)
  s.gsub!(REPLACE_AMP_RE, '<span class="amp">&amp;</span>')
end

#caps(s) ⇒ Object



345
346
347
# File 'lib/typohero.rb', line 345

def caps(s)
  s.gsub!(CAPS_RE, '\1<span class="caps">\2</span>')
end

#dash_spaces(s) ⇒ Object



336
337
338
339
# File 'lib/typohero.rb', line 336

def dash_spaces(s)
  s.gsub!(MDASH_SPACE_RE, MDASH_SPACE)
  s.gsub!(NDASH_SPACE_RE, NDASH_SPACE)
end

#decode(s) ⇒ Object



313
314
315
316
317
318
# File 'lib/typohero.rb', line 313

def decode(s)
  s.gsub!(/&#x([0-9A-F]+);|&#([0-9]+);/i) do
    i = $1 ? $1.to_i(16) : $2.to_i(10)
    i == 38 ? '&amp;' : i.chr('UTF-8')
  end
end

#enhance(input) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/typohero.rb', line 250

def enhance(input)
  tokens, text, prev_last_char = [], []
  tokenize(input) do |s, type|
    if type == :text
      last_char = s[-1]
      decode(s)
      escape(s)
      primes(s)
      special(s)
      latex(s)
      quotes(s, prev_last_char)
      dash_spaces(s)
      prev_last_char = last_char
      text << s
    end
    tokens << s
  end
  widont(tokens)
  text.each do |s|
    initial_quotes(s)
    amp(s)
    caps(s)
    ordinals(s)
    nobr(s)
    unescape(s)
  end
  html_safe(input, tokens.join)
end

#escape(s) ⇒ Object



320
321
322
# File 'lib/typohero.rb', line 320

def escape(s)
  s.gsub!(ESCAPE_RE, ESCAPE)
end

#html_safe(src, dst) ⇒ Object



309
310
311
# File 'lib/typohero.rb', line 309

def html_safe(src, dst)
  src.respond_to?(:html_safe?) && src.html_safe? ? dst.html_safe : dst
end

#initial_quotes(s) ⇒ Object



349
350
351
# File 'lib/typohero.rb', line 349

def initial_quotes(s)
  s.gsub!(INITIAL_QUOTE_RE, INITIAL_QUOTES)
end

#latex(s) ⇒ Object



332
333
334
# File 'lib/typohero.rb', line 332

def latex(s)
  s.gsub!(LATEX_RE, LATEX)
end

#nobr(s) ⇒ Object



353
354
355
# File 'lib/typohero.rb', line 353

def nobr(s)
  s.gsub!(/[\p{Digit}\p{Word}]+(-[\p{Digit}\p{Word}]+)+/, '<span class="nobr">\0</span>')
end

#ordinals(s) ⇒ Object



362
363
364
# File 'lib/typohero.rb', line 362

def ordinals(s)
  s.gsub!(ORDINAL_RE, '<span class="ord">\1</span>')
end

#primes(s) ⇒ Object



357
358
359
360
# File 'lib/typohero.rb', line 357

def primes(s)
  # Special case for inches and minutes, seconds
  s.gsub!(PRIME_RE, PRIMES)
end

#quotes(s, prev_last_char) ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
377
# File 'lib/typohero.rb', line 366

def quotes(s, prev_last_char)
  if s =~ /\A['"]\Z/
    s.replace(prev_last_char =~ /\P{Space}/ ? RIGHT_QUOTES[s] : LEFT_QUOTES[s])
    return
  end

  # Special case for double sets of quotes, e.g.
  #   <p>He said, "'Quoted' words in a larger quote."</p>
  s.gsub!(/(?:"'|'")(?=\p{Word})/, TWO_QUOTES)
  s.gsub!(RIGHT_QUOTE_RE, RIGHT_QUOTES)
  s.gsub!(/['"]/,         LEFT_QUOTES)
end

#special(s) ⇒ Object



328
329
330
# File 'lib/typohero.rb', line 328

def special(s)
  s.gsub!(SPECIAL_RE, SPECIAL)
end

#strip_tags(input) ⇒ Object



244
245
246
247
248
# File 'lib/typohero.rb', line 244

def strip_tags(input)
  out = ''
  tokenize(input) {|s, type| out << s if type == :text || type == :latex }
  html_safe(input, out)
end

#tokenize(input) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/typohero.rb', line 135

def tokenize(input)
  comment, excluded, latex, dollar = false, 0, 0, 0
  input.scan TOKENIZER_RE do |s|
    type =
      if s =~ /\A<!--/
        :comment
      elsif s =~ /\A<!\[/
        :cdata
      end

    if !type && latex == 0 && dollar.even?
      if s=~ /\A</
        if s =~ EXCLUDED_TAGS_RE
          excluded += $1 ? -1 : 1
          excluded = 0 if excluded < 0
          type = :excluded
        else
          type = excluded == 0 ? :tag : :excluded
        end
      end
    end

    if !type && excluded == 0
      case s
      when /\A\\[\(\[]\Z/
        latex += 1
        type = :latex
      when /\A\\[\)\]]\Z/
        latex -= 1 if latex > 0
        type = :latex
      when '$$'
        dollar += 1
        type = :latex
      end
    end

    type ||=
      if excluded != 0
        :excluded
      elsif latex != 0 || dollar.odd?
        :latex
      else
        :text
      end

    yield(s, type)
  end
end

#tokenize_with_tags(input) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/typohero.rb', line 184

def tokenize_with_tags(input)
  tags = []
  tokenize(input) do |s, type|
    if type == :tag && s =~ /\A<(\/)?([^\p{Space}\/>]+)/
      if $1
        until tags.empty? || tags.pop == $2; end
      else
        tags << $2
      end
    end
    yield(s, type, tags)
  end
end

#truncate(input, *max_words_or_separator) ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/typohero.rb', line 198

def truncate(input, *max_words_or_separator)
  max_words = max_words_or_separator.select {|i| Fixnum === i }.first
  if separator = max_words_or_separator.reject {|i| Fixnum === i }.first
    separator = Regexp.union(*separator) unless Regexp === separator
    separator = nil unless input =~ separator
  end
  out, tail, truncated = '', '', false
  tokenize_with_tags(input) do |s, type, tags|
    if separator && (type == :comment || type == :text || type == :latex || type == :tag) && separator === s
      out << $` if type == :text
      if type == :tag
        if s =~ /\A<\//
          tail << s
        else
          tags.pop
        end
      end
      truncated = tags
      break
    elsif max_words == 0
      if type == :text
        truncated = tags
        break
      end
      tail << s
    else
      if max_words && type == :text
        s =~ /\A(\p{Space}*)(.*)\Z/m
        ws, w = $1, $2.split(/\p{Space}+/)
        if w.size > max_words
          out << ws << w[0...max_words].join(' ')
          truncated = tags
          break
        end
        max_words -= w.size
      end
      out << s
    end
  end
  if truncated
    out.sub!(/[\p{Space}\p{Punct}]*\Z/, ELLIPSIS)
    tail << "</#{truncated.pop}>" until truncated.empty?
  end
  html_safe(input, out << tail)
end

#unescape(s) ⇒ Object



324
325
326
# File 'lib/typohero.rb', line 324

def unescape(s)
  s.gsub!(UNESCAPE_RE, UNESCAPE)
end

#widont(tokens) ⇒ Object



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/typohero.rb', line 279

def widont(tokens)
  state, i, widow = 1, tokens.size - 1, nil
  while i >= 0
    if tokens[i] =~ WIDONT_PARAGRAPH_RE
      state = 1
    elsif tokens[i] !~ WIDONT_INLINE_RE
      if tokens[i] =~ WIDONT_NBSP_RE
        state = 0
      elsif state == 1 || state == 3
        if tokens[i] =~ (state == 1 ? /(\P{Space}+)?(\p{Space}+)?(\P{Space}+\p{Space}*)\Z/m :
                                      /(\P{Space}+)?(\p{Space}+)(\P{Space}*)\Z/m)
          if $1 && $2
            tokens[i].replace "#{$`}#{$1}#{NBSP}#{$3}"
            state = 0
          elsif $2
            state = 2
            widow = tokens[i]
          else
            state = 3
          end
        end
      elsif state == 2 && tokens[i] =~ /(\P{Space}+\p{Space}*)\Z/m
        widow.sub!(/\A\p{Space}*/, NBSP)
        state = 0
      end
    end
    i -= 1
  end
end