Module: Omml2Mathml

Defined in:
lib/omml2mathml.rb,
lib/omml2mathml/convert.rb,
lib/omml2mathml/version.rb

Constant Summary collapse

VERSION =
"0.0.12".freeze

Class Method Summary collapse

Class Method Details

.convert(filename) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/omml2mathml/convert.rb', line 6

def convert(filename)
  @tags = %w{
    acc
    accPr
    aln
    alnScr
    argPr
    argSz
    bar
    barPr
    baseJc
    begChr
    borderBox
    borderBoxPr
    box
    boxPr
    brk
    brkBin
    brkBinSub
    cGp
    cGpRule
    chr
    count
    cSp
    ctrlPr
    d
    defJc
    deg
    degHide
    den
    diff
    dispDef
    dPr
    e
    endChr
    eqArr
    eqArrPr
    f
    fName
    fPr
    func
    funcPr
    groupChr
    groupChrPr
    grow
    hideBot
    hideLeft
    hideRight
    hideTop
    interSp
    intLim
    intraSp
    jc
    lim
    limLoc
    limLow
    limLowPr
    limUpp
    limUppPr
    lit
    lMargin
    m
    mathFont
    mathPr
    maxDist
    mc
    mcJc
    mcPr
    mcs
    mPr
    mr
    nary
    naryLim
    naryPr
    noBreak
    nor
    num
    objDist
    oMath
    oMathPara
    oMathParaPr
    opEmu
    phant
    phantPr
    plcHide
    pos
    postSp
    preSp
    r
    rad
    radPr
    rMargin
    rPr
    rSp
    rSpRule
    scr
    sepChr
    show
    shp
    smallFrac
    sPre
    sPrePr
    sSub
    sSubPr
    sSubSup
    sSubSupPr
    sSup
    sSupPr
    strikeBLTR
    strikeH
    strikeTLBR
    strikeV
    sty
    sub
    subHide
    sup
    supHide
    t
    transp
    type
    vertJc
    wrapIndent
    wrapRight
    zeroAsc
    zeroDesc
    zeroWid
  }

  @mathml = {}
  @tags.each do |t|
    @mathml["m_#{t.downcase}"] = t
  end

  html = Nokogiri::HTML.parse(File.read(filename, encoding: "utf-8")
                              .gsub(/\r/, "").gsub(/<m:/, "<m_")
                              .gsub(/<\/m:/, "</m_")
    .gsub(/<!\[endif\]>/, "<!--endif-->")
    .gsub(/<!\[endif\]-->/, "<!--endif-- -->")
                              .gsub(/<!\[if !msEquation\]>/,
                                    "<!--if !msEquation-->"))
  @xslt = Nokogiri::XSLT(File.open(
                           File.join(File.dirname(__FILE__),
                                     "xhtml-mathml.xsl"), "rb"
                         ))
  html.traverse do |n|
    if n.comment?
      if /^\[if gte msEquation 12\]>/.match? n.text
        n.replace(n.text.sub(/\[if gte msEquation 12\]>/, "")
                  .sub(/<!--endif-->/, ""))
      elsif /^if !msEquation/.match? n.text
        n.next.remove
        n.remove
      else
        n.remove
      end
    end
  end
  xml = Nokogiri::XML(html.to_xhtml)
  ns = xml.root.add_namespace "m", "http://schemas.microsoft.com/office/2004/12/omml"
  xml.traverse do |t|
    if t.element? && @mathml.has_key?(t.name)
      t.name = @mathml[t.name]
      t.namespace = ns
    end
  end
  # xml.xpath("//xmlns:link | //xmlns:style | //*[@class = 'MsoToc1'] | //*[@class = 'MsoToc2'] |//*[@class = 'MsoToc3'] |//*[@class = 'MsoToc4'] |//*[@class = 'MsoToc5'] |//*[@class = 'MsoToc6'] |//*[@class = 'MsoToc7'] |//*[@class = 'MsoToc8'] |//*[@class = 'MsoToc9'] ").each { |x| x.remove }
  xml.xpath("//*[local-name()='oMath' or local-name()='oMathPara']").each do |x|
    # prepare input: delete xmlns & change
    input = Nokogiri::XML(x.to_xml.sub(/<m:(oMath|oMathPara)>/,
                                       "<m:\\1 xmlns:m='http://schemas.openxmlformats.org/officeDocument/2006/math'>"))
    out = @xslt.transform(input)
    mml = out.to_xml.gsub(/<\?xml[^>]+>/, "")
      .gsub(%r{<([^:/! >]+ xmlns="http://www.w3.org/1998/Math/MathML")},
            "<mml:\\1")
      .gsub(%r{<([^:/!>]+)>}, "<mml:\\1>")
      .gsub(%r{</([^:/!>]+)>}, "</mml:\\1>")
      .gsub(%r{ xmlns="http://www.w3.org/1998/Math/MathML"}, "")
      .gsub(%r{ xmlns:mml="http://www.w3.org/1998/Math/MathML"}, "")
      .gsub(%r{ xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"}, "")
      .gsub(%r{ xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"}, "")
    x.replace("<mml:math>#{mml}</mml:math>")
  end
  xml.to_s
end