Class: Stepmod::Utils::TermsExtractor
- Inherits:
-
Object
- Object
- Stepmod::Utils::TermsExtractor
- Defined in:
- lib/stepmod/utils/terms_extractor.rb
Constant Summary collapse
- ACCEPTED_STAGES =
TODO: we may want a command line option to override this in the future
%w(IS DIS FDIS TS).freeze
- WITHDRAWN_STATUS =
"withdrawn".freeze
- REDUNDENT_NOTE_REGEX =
/^An? .*? is a type of \{\{[^}]*\}\}\s*?\.?$/.freeze
Instance Attribute Summary collapse
-
#encountered_terms ⇒ Object
readonly
Returns the value of attribute encountered_terms.
-
#general_concepts ⇒ Object
readonly
Returns the value of attribute general_concepts.
-
#git_rev ⇒ Object
readonly
Returns the value of attribute git_rev.
-
#index_path ⇒ Object
readonly
Returns the value of attribute index_path.
-
#parsed_bibliography ⇒ Object
readonly
Returns the value of attribute parsed_bibliography.
-
#part_concepts ⇒ Object
readonly
Returns the value of attribute part_concepts.
-
#part_modules ⇒ Object
readonly
Returns the value of attribute part_modules.
-
#part_resources ⇒ Object
readonly
Returns the value of attribute part_resources.
-
#resource_concepts ⇒ Object
readonly
Returns the value of attribute resource_concepts.
-
#stdout ⇒ Object
readonly
Returns the value of attribute stdout.
-
#stepmod_dir ⇒ Object
readonly
Returns the value of attribute stepmod_dir.
-
#stepmod_path ⇒ Object
readonly
Returns the value of attribute stepmod_path.
Class Method Summary collapse
Instance Method Summary collapse
- #call ⇒ Object
-
#initialize(stepmod_dir, index_path, stdout) ⇒ TermsExtractor
constructor
A new instance of TermsExtractor.
- #log(message) ⇒ Object
- #published_part_numbers ⇒ Object
- #term_special_category(bibdata) ⇒ Object
Constructor Details
#initialize(stepmod_dir, index_path, stdout) ⇒ TermsExtractor
Returns a new instance of TermsExtractor.
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 38 def initialize(stepmod_dir, index_path, stdout) @stdout = stdout @stepmod_dir = stepmod_dir @stepmod_path = Pathname.new(stepmod_dir).realpath @index_path = Pathname.new(index_path).to_s @general_concepts = Glossarist::ManagedConceptCollection.new @resource_concepts = Glossarist::ManagedConceptCollection.new @parsed_bibliography = [] @added_bibdata = {} @part_concepts = [] @part_resources = {} @part_modules = {} @encountered_terms = {} @sequence = 0 end |
Instance Attribute Details
#encountered_terms ⇒ Object (readonly)
Returns the value of attribute encountered_terms.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def encountered_terms @encountered_terms end |
#general_concepts ⇒ Object (readonly)
Returns the value of attribute general_concepts.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def general_concepts @general_concepts end |
#git_rev ⇒ Object (readonly)
Returns the value of attribute git_rev.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def git_rev @git_rev end |
#index_path ⇒ Object (readonly)
Returns the value of attribute index_path.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def index_path @index_path end |
#parsed_bibliography ⇒ Object (readonly)
Returns the value of attribute parsed_bibliography.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def parsed_bibliography @parsed_bibliography end |
#part_concepts ⇒ Object (readonly)
Returns the value of attribute part_concepts.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def part_concepts @part_concepts end |
#part_modules ⇒ Object (readonly)
Returns the value of attribute part_modules.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def part_modules @part_modules end |
#part_resources ⇒ Object (readonly)
Returns the value of attribute part_resources.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def part_resources @part_resources end |
#resource_concepts ⇒ Object (readonly)
Returns the value of attribute resource_concepts.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def resource_concepts @resource_concepts end |
#stdout ⇒ Object (readonly)
Returns the value of attribute stdout.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def stdout @stdout end |
#stepmod_dir ⇒ Object (readonly)
Returns the value of attribute stepmod_dir.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def stepmod_dir @stepmod_dir end |
#stepmod_path ⇒ Object (readonly)
Returns the value of attribute stepmod_path.
21 22 23 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 21 def stepmod_path @stepmod_path end |
Class Method Details
.call(stepmod_dir, index_path, stdout = $stdout) ⇒ Object
34 35 36 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 34 def self.call(stepmod_dir, index_path, stdout = $stdout) new(stepmod_dir, index_path, stdout).call end |
Instance Method Details
#call ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 76 def call log "INFO: STEPmod directory set to #{stepmod_dir}." log "INFO: Detecting paths..." log "INFO: Detecting Git SHA..." Dir.chdir(stepmod_path) do @git_rev = `git rev-parse HEAD` || nil end published_part_nos = published_part_numbers repo_index = Nokogiri::XML(File.read(@index_path)).root files = [] # add module paths repo_index.xpath("//module").each do |x| unless published_part_nos.include? x['part'] log "INFO: skipping module #{x['name']} as part #{x['part']} is not published in `docs.xml`." next end if x['status'] == WITHDRAWN_STATUS log "INFO: skipping module #{x['name']} as it is withdrawn." next end arm_path = @stepmod_path.join("modules/#{x['name']}/arm_annotated.exp") mim_path = @stepmod_path.join("modules/#{x['name']}/mim_annotated.exp") if File.exist? arm_path files << arm_path else log "INFO: skipping module ARM for #{x['name']} as it does not exist at #{arm_path}." end if File.exist? mim_path files << mim_path else log "INFO: skipping module MIM for #{x['name']} as it does not exist at #{mim_path}." end end # Should ignore these because the `<resource_docs>` elements do not provide any EXPRESS schemas # # add resource_docs paths # repo_index.xpath("//resource_doc").each do |x| # next if x['status'] == WITHDRAWN_STATUS # path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml") # files << path if File.exists? path # end # add resource paths repo_index.xpath("//resource").each do |x| unless published_part_nos.include? x['part'] log "INFO: skipping resource #{x['name']} as part #{x['part']} is not published in `docs.xml`." next end if x['status'] == WITHDRAWN_STATUS log "INFO: skipping resource #{x['name']} as it is withdrawn." next end if x["name"] == "iso13584_expressions_schema" log "INFO: skipping resource #{x['name']} as the ISO 13584 series is out of scope." next end if x["name"] == "iso13584_generic_expressions_schema" log "INFO: skipping resource #{x['name']} as the ISO 13584 series is out of scope." next end path = @stepmod_path.join("resources/#{x['name']}/#{x['name']}_annotated.exp") if File.exist? path files << path else log "INFO: skipping resource #{x['name']} as it does not exist at #{path}." end end # Should ignore these because we are skiping Clause 3 terms # add business_object_models paths # repo_index.xpath("//business_object_model").each do |x| # next if x['status'] == WITHDRAWN_STATUS # annotated_path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom_annotated.exp") # path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/bom.exp") # files << if File.exists?(annotated_path) # annotated_path # elsif File.exists?(path) # path # end # end # Should ignore these because there are no EXPRESS schemas here (they are implemented inside modules # # add application_protocols paths # repo_index.xpath("//application_protocol").each do |x| # next if x['status'] == WITHDRAWN_STATUS # path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml") # files << path if File.exists? path # end files.compact.sort!.uniq! process_term_files(files) [ general_concepts, # Should be empty because skiping all Clause 3 terms resource_concepts, parsed_bibliography, part_concepts, # Should be empty because skiping all Clause 3 terms part_resources.values.compact, part_modules.values.compact, ] end |
#log(message) ⇒ Object
54 55 56 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 54 def log() stdout.puts "[stepmod-utils] #{}" end |
#published_part_numbers ⇒ Object
69 70 71 72 73 74 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 69 def published_part_numbers docs_xml = Nokogiri::XML(File.read(@stepmod_path.join('library/docs.xml'))) docs_xml.xpath("//doc").map do |x| x['part'] end.uniq.sort end |
#term_special_category(bibdata) ⇒ Object
58 59 60 61 62 63 64 65 66 67 |
# File 'lib/stepmod/utils/terms_extractor.rb', line 58 def term_special_category(bibdata) case bibdata.part.to_i when 41, 42, 43, 44, 45, 46, 47, 51 true when [56..112] true else false end end |