Class: Stepmod::Utils::TermsExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/stepmod/utils/terms_extractor.rb

Constant Summary collapse

ACCEPTED_STAGES =

TODO: we may want a command line option to override this in the future

%w(IS DIS FDIS TS).freeze
WITHDRAWN_STATUS =
"withdrawn".freeze
REDUNDENT_NOTE_REGEX =
/^An? .*? is a type of \{\{[^}]*\}\}\s*?\.?$/.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(stepmod_dir, index_path, stdout) ⇒ TermsExtractor

Returns a new instance of TermsExtractor.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/stepmod/utils/terms_extractor.rb', line 38

def initialize(stepmod_dir, index_path, stdout)
  @stdout = stdout
  @stepmod_dir = stepmod_dir
  @stepmod_path = Pathname.new(stepmod_dir).realpath
  @index_path = Pathname.new(index_path).to_s
  @general_concepts = Glossarist::ManagedConceptCollection.new
  @resource_concepts = Glossarist::ManagedConceptCollection.new
  @parsed_bibliography = []
  @added_bibdata = {}
  @part_concepts = []
  @part_resources = {}
  @part_modules = {}
  @encountered_terms = {}
  @sequence = 0
end

Instance Attribute Details

#encountered_termsObject (readonly)

Returns the value of attribute encountered_terms.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def encountered_terms
  @encountered_terms
end

#general_conceptsObject (readonly)

Returns the value of attribute general_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def general_concepts
  @general_concepts
end

#git_revObject (readonly)

Returns the value of attribute git_rev.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def git_rev
  @git_rev
end

#index_pathObject (readonly)

Returns the value of attribute index_path.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def index_path
  @index_path
end

#parsed_bibliographyObject (readonly)

Returns the value of attribute parsed_bibliography.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def parsed_bibliography
  @parsed_bibliography
end

#part_conceptsObject (readonly)

Returns the value of attribute part_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_concepts
  @part_concepts
end

#part_modulesObject (readonly)

Returns the value of attribute part_modules.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_modules
  @part_modules
end

#part_resourcesObject (readonly)

Returns the value of attribute part_resources.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_resources
  @part_resources
end

#resource_conceptsObject (readonly)

Returns the value of attribute resource_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def resource_concepts
  @resource_concepts
end

#stdoutObject (readonly)

Returns the value of attribute stdout.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stdout
  @stdout
end

#stepmod_dirObject (readonly)

Returns the value of attribute stepmod_dir.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stepmod_dir
  @stepmod_dir
end

#stepmod_pathObject (readonly)

Returns the value of attribute stepmod_path.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stepmod_path
  @stepmod_path
end

Class Method Details

.call(stepmod_dir, index_path, stdout = $stdout) ⇒ Object



34
35
36
# File 'lib/stepmod/utils/terms_extractor.rb', line 34

def self.call(stepmod_dir, index_path, stdout = $stdout)
  new(stepmod_dir, index_path, stdout).call
end

Instance Method Details

#callObject



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/stepmod/utils/terms_extractor.rb', line 76

def call
  log "INFO: STEPmod directory set to #{stepmod_dir}."
  log "INFO: Detecting paths..."

  log "INFO: Detecting Git SHA..."
  Dir.chdir(stepmod_path) do
    @git_rev = `git rev-parse HEAD` || nil
  end

  published_part_nos = published_part_numbers
  repo_index = Nokogiri::XML(File.read(@index_path)).root

  files = []

  # add module paths
  repo_index.xpath("//module").each do |x|
    unless published_part_nos.include? x['part']
      log "INFO: skipping module #{x['name']} as part #{x['part']} is not published in `docs.xml`."
      next
    end

    if x['status'] == WITHDRAWN_STATUS
      log "INFO: skipping module #{x['name']} as it is withdrawn."
      next
    end

    arm_path = @stepmod_path.join("modules/#{x['name']}/arm_annotated.exp")
    mim_path = @stepmod_path.join("modules/#{x['name']}/mim_annotated.exp")

    if File.exist? arm_path
      files << arm_path
    else
      log "INFO: skipping module ARM for #{x['name']} as it does not exist at #{arm_path}."
    end

    if File.exist? mim_path
      files << mim_path
    else
      log "INFO: skipping module MIM for #{x['name']} as it does not exist at #{mim_path}."
    end
  end

  # Should ignore these because the `<resource_docs>` elements do not provide any EXPRESS schemas
  # # add resource_docs paths
  # repo_index.xpath("//resource_doc").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
  #   files << path if File.exists? path
  # end

  # add resource paths
  repo_index.xpath("//resource").each do |x|
    unless published_part_nos.include? x['part']
      log "INFO: skipping resource #{x['name']} as part #{x['part']} is not published in `docs.xml`."
      next
    end

    if x['status'] == WITHDRAWN_STATUS
      log "INFO: skipping resource #{x['name']} as it is withdrawn."
      next
    end

    if x["name"] == "iso13584_expressions_schema"
      log "INFO: skipping resource #{x['name']} as the ISO 13584 series is out of scope."
      next
    end

    if x["name"] == "iso13584_generic_expressions_schema"
      log "INFO: skipping resource #{x['name']} as the ISO 13584 series is out of scope."
      next
    end

    path = @stepmod_path.join("resources/#{x['name']}/#{x['name']}_annotated.exp")
    if File.exist? path
      files << path
    else
      log "INFO: skipping resource #{x['name']} as it does not exist at #{path}."
    end
  end

  # Should ignore these because we are skiping Clause 3 terms
  # add business_object_models paths
  # repo_index.xpath("//business_object_model").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   annotated_path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom_annotated.exp")
  #   path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom.exp")
  #   files << if File.exists?(annotated_path)
  #              annotated_path
  #            elsif File.exists?(path)
  #              path
  #            end
  # end

  # Should ignore these because there are no EXPRESS schemas here (they are implemented inside modules
  # # add application_protocols paths
  # repo_index.xpath("//application_protocol").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
  #   files << path if File.exists? path
  # end

  files.compact.sort!.uniq!
  process_term_files(files)

  [
    general_concepts, # Should be empty because skiping all Clause 3 terms
    resource_concepts,
    parsed_bibliography,
    part_concepts, # Should be empty because skiping all Clause 3 terms
    part_resources.values.compact,
    part_modules.values.compact,
  ]
end

#log(message) ⇒ Object



54
55
56
# File 'lib/stepmod/utils/terms_extractor.rb', line 54

def log(message)
  stdout.puts "[stepmod-utils] #{message}"
end

#published_part_numbersObject



69
70
71
72
73
74
# File 'lib/stepmod/utils/terms_extractor.rb', line 69

def published_part_numbers
  docs_xml = Nokogiri::XML(File.read(@stepmod_path.join('library/docs.xml')))
  docs_xml.xpath("//doc").map do |x|
    x['part']
  end.uniq.sort
end

#term_special_category(bibdata) ⇒ Object



58
59
60
61
62
63
64
65
66
67
# File 'lib/stepmod/utils/terms_extractor.rb', line 58

def term_special_category(bibdata)
  case bibdata.part.to_i
  when 41, 42, 43, 44, 45, 46, 47, 51
    true
  when [56..112]
    true
  else
    false
  end
end