Class: Stepmod::Utils::TermsExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/stepmod/utils/terms_extractor.rb

Constant Summary collapse

ACCEPTED_STAGES =

TODO: we may want a command line option to override this in the future

%w(IS DIS FDIS TS).freeze
WITHDRAWN_STATUS =
"withdrawn".freeze
REDUNDENT_NOTE_REGEX =
/^An? .*? is a type of \{\{[^}]*\}\}\s*?\.?$/.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(stepmod_dir, index_path, stdout) ⇒ TermsExtractor

Returns a new instance of TermsExtractor.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/stepmod/utils/terms_extractor.rb', line 38

def initialize(stepmod_dir, index_path, stdout)
  @stdout = stdout
  @stepmod_dir = stepmod_dir
  @stepmod_path = Pathname.new(stepmod_dir).realpath
  @index_path = Pathname.new(index_path).to_s
  @general_concepts = Glossarist::ManagedConceptCollection.new
  @resource_concepts = Glossarist::ManagedConceptCollection.new
  @parsed_bibliography = []
  @added_bibdata = {}
  @part_concepts = []
  @part_resources = {}
  @part_modules = {}
  @encountered_terms = {}
  @sequence = 0
end

Instance Attribute Details

#encountered_termsObject (readonly)

Returns the value of attribute encountered_terms.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def encountered_terms
  @encountered_terms
end

#general_conceptsObject (readonly)

Returns the value of attribute general_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def general_concepts
  @general_concepts
end

#git_revObject (readonly)

Returns the value of attribute git_rev.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def git_rev
  @git_rev
end

#index_pathObject (readonly)

Returns the value of attribute index_path.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def index_path
  @index_path
end

#parsed_bibliographyObject (readonly)

Returns the value of attribute parsed_bibliography.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def parsed_bibliography
  @parsed_bibliography
end

#part_conceptsObject (readonly)

Returns the value of attribute part_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_concepts
  @part_concepts
end

#part_modulesObject (readonly)

Returns the value of attribute part_modules.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_modules
  @part_modules
end

#part_resourcesObject (readonly)

Returns the value of attribute part_resources.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def part_resources
  @part_resources
end

#resource_conceptsObject (readonly)

Returns the value of attribute resource_concepts.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def resource_concepts
  @resource_concepts
end

#stdoutObject (readonly)

Returns the value of attribute stdout.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stdout
  @stdout
end

#stepmod_dirObject (readonly)

Returns the value of attribute stepmod_dir.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stepmod_dir
  @stepmod_dir
end

#stepmod_pathObject (readonly)

Returns the value of attribute stepmod_path.



21
22
23
# File 'lib/stepmod/utils/terms_extractor.rb', line 21

def stepmod_path
  @stepmod_path
end

Class Method Details

.call(stepmod_dir, index_path, stdout = $stdout) ⇒ Object



34
35
36
# File 'lib/stepmod/utils/terms_extractor.rb', line 34

def self.call(stepmod_dir, index_path, stdout = $stdout)
  new(stepmod_dir, index_path, stdout).call
end

Instance Method Details

#callObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/stepmod/utils/terms_extractor.rb', line 69

def call
  log "INFO: STEPmod directory set to #{stepmod_dir}."
  log "INFO: Detecting paths..."

  # Run `cvs status` to find out version
  log "INFO: Detecting Git SHA..."
  Dir.chdir(stepmod_path) do
    @git_rev = `git rev-parse HEAD` || nil
  end

  repo_index = Nokogiri::XML(File.read(@index_path)).root

  files = []

  # add module paths
  repo_index.xpath("//module").each do |x|
    next if x['status'] == WITHDRAWN_STATUS

    arm_path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/arm_annotated.exp")
    mim_path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/mim_annotated.exp")

    files << arm_path if File.exist? arm_path
    files << mim_path if File.exist? mim_path
  end

  # Should ignore these because the `<resource_docs>` elements do not provide any EXPRESS schemas
  # # add resource_docs paths
  # repo_index.xpath("//resource_doc").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
  #   files << path if File.exists? path
  # end

  # add resource paths
  repo_index.xpath("//resource").each do |x|
    next if x["status"] == WITHDRAWN_STATUS || x["name"] == "iso13584_expressions_schema"

    path = Pathname.new("#{stepmod_dir}/resources/#{x['name']}/#{x['name']}_annotated.exp")
    files << path if File.exist? path
  end

  # Should ignore these because we are skiping Clause 3 terms
  # add business_object_models paths
  # repo_index.xpath("//business_object_model").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   annotated_path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom_annotated.exp")
  #   path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom.exp")
  #   files << if File.exists?(annotated_path)
  #              annotated_path
  #            elsif File.exists?(path)
  #              path
  #            end
  # end

  # Should ignore these because there are no EXPRESS schemas here (they are implemented inside modules
  # # add application_protocols paths
  # repo_index.xpath("//application_protocol").each do |x|
  #   next if x['status'] == WITHDRAWN_STATUS

  #   path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
  #   files << path if File.exists? path
  # end

  files.compact.sort!.uniq!
  process_term_files(files)

  [
    general_concepts, # Should be empty because skiping all Clause 3 terms
    resource_concepts,
    parsed_bibliography,
    part_concepts, # Should be empty because skiping all Clause 3 terms
    part_resources.values.compact,
    part_modules.values.compact,
  ]
end

#log(message) ⇒ Object



54
55
56
# File 'lib/stepmod/utils/terms_extractor.rb', line 54

def log(message)
  stdout.puts "[stepmod-utils] #{message}"
end

#term_special_category(bibdata) ⇒ Object



58
59
60
61
62
63
64
65
66
67
# File 'lib/stepmod/utils/terms_extractor.rb', line 58

def term_special_category(bibdata)
  case bibdata.part.to_i
  when 41, 42, 43, 44, 45, 46, 47, 51
    true
  when [56..112]
    true
  else
    false
  end
end