Class: Etna::Clients::Magma::FileLinkingWorkflow

Inherits:
Struct
  • Object
show all
Defined in:
lib/etna/clients/magma/workflows/file_linking_workflow.rb

Constant Summary collapse

PATIENT_TIMEPOINT_REGEX =
/([^-]+-[^-]+)-(DN?[0-9]+).*/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ FileLinkingWorkflow

Returns a new instance of FileLinkingWorkflow.



10
11
12
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 10

def initialize(opts)
  super(**{attribute_options: {}, matching_expressions: []}.update(opts))
end

Instance Attribute Details

#attribute_optionsObject

Returns the value of attribute attribute_options

Returns:

  • (Object)

    the current value of attribute_options



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def attribute_options
  @attribute_options
end

#bucket_nameObject

Returns the value of attribute bucket_name

Returns:

  • (Object)

    the current value of bucket_name



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def bucket_name
  @bucket_name
end

#magma_crudObject

Returns the value of attribute magma_crud

Returns:

  • (Object)

    the current value of magma_crud



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def magma_crud
  @magma_crud
end

#matching_expressionsObject

Returns the value of attribute matching_expressions

Returns:

  • (Object)

    the current value of matching_expressions



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def matching_expressions
  @matching_expressions
end

#metis_clientObject

Returns the value of attribute metis_client

Returns:

  • (Object)

    the current value of metis_client



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def metis_client
  @metis_client
end

#model_nameObject

Returns the value of attribute model_name

Returns:

  • (Object)

    the current value of model_name



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def model_name
  @model_name
end

#project_nameObject

Returns the value of attribute project_name

Returns:

  • (Object)

    the current value of project_name



7
8
9
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 7

def project_name
  @project_name
end

Instance Method Details

#containing_record_workflowObject



104
105
106
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 104

def containing_record_workflow
  @containing_record_workflow ||= EnsureContainingRecordWorkflow.new(magma_crud: magma_crud, models: models)
end

#each_revisionObject



74
75
76
77
78
79
80
81
82
83
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 74

def each_revision
  find_matches.each do |key, file_paths|
    match_map, attribute_name = key
    record_identifiers = matches_to_record_identifiers(match_map)
    id = containing_record_workflow.ensure_record(model_name, record_identifiers)
    file_paths.each do |file_path|
      yield [id, revision_for(id, attribute_name, file_path, match_map, record_identifiers)]
    end
  end
end

#find_matchesObject



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 18

def find_matches
  {}.tap do |all_matches|
    metis_client.folders(
        project_name: project_name,
        bucket_name: bucket_name
    ).each do |folder|
      metis_client.list_folder(
          Etna::Clients::Metis::ListFolderRequest.new(
              project_name: project_name,
              bucket_name: bucket_name,
              folder_path: folder.folder_path,
          ),
      ).files.all.each do |file|
        matches = matching_expressions
                  .map { |regex, attribute_name| [regex.match(file.file_path), regex, attribute_name] }
                  .select { |match, regex, attribute_name| !match.nil? }

        if matches.length > 1
          raise "File path #{file.file_path} matches multiple regex, #{matches.map(&:second)}.  Please modify the matching expressions to disambiguate"
        end

        if matches.length == 1
          match, _, attribute_name = matches.first
          match_map = match.names.zip(match.captures).to_h
          key = [match_map, attribute_name]

          if attribute_options.dig(attribute_name, :file_collection)
            (all_matches[key] ||= []).push(file.file_path)
          else
            if all_matches.include?(key)
              raise "Field #{attribute_name} for #{match_map} identified for two files, #{file.file_path} and #{all_matches[key]}.  Please modify the existing matching expressionts to disambiguate"
            end

            all_matches[key] = [file.file_path]
          end
        end
      end
    end
  end
end


85
86
87
88
89
90
91
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 85

def link_files
  magma_crud.update_records do |update_request|
    each_revision do |id, revision|
      update_request.update_revision(model_name, id, revision)
    end
  end
end

#magma_clientObject



14
15
16
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 14

def magma_client
  magma_crud.magma_client
end

#matches_to_record_identifiers(match_data) ⇒ Object

Subclasses should override this to implement custom logic for how regex matches should match to linking.



60
61
62
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 60

def matches_to_record_identifiers(match_data)
  {"project" => project_name}
end

#modelsObject



108
109
110
111
112
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 108

def models
  @models ||= begin
    magma_client.retrieve(RetrievalRequest.new(project_name: self.project_name, model_name: 'all')).models
  end
end

#patient_timepoint_from(word) ⇒ Object



64
65
66
67
68
69
70
71
72
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 64

def patient_timepoint_from(word)
  match = PATIENT_TIMEPOINT_REGEX.match(word)
  return {} unless match

  return {
      'patient' => match[1],
      'timepoint' => "#{match[1]}-#{match[2]}",
  }
end

#revision_for(id, attribute_name, file_path, match_map, record_identifiers) ⇒ Object



93
94
95
96
97
98
99
100
101
102
# File 'lib/etna/clients/magma/workflows/file_linking_workflow.rb', line 93

def revision_for(id, attribute_name, file_path, match_map, record_identifiers)
  if attribute_options.dig(attribute_name, :file_collection)
    file_path = ::File.dirname(file_path)
    {attribute_name => "https://metis.ucsf.edu/#{project_name}/browse/#{bucket_name}/#{file_path}"}
  else
    {attribute_name => {
      path: "metis://#{project_name}/#{bucket_name}/#{file_path}",
      original_filename: File.basename(file_path)}}
  end
end