Class: RightScraper::Scrapers::Workflow

Inherits:
Base
  • Object
show all
Defined in:
lib/right_scraper/scrapers/workflow.rb

Overview

Workflow scraper

Instance Attribute Summary

Attributes inherited from Base

#resources

Instance Method Summary collapse

Methods inherited from Base

#close, #next_resource, #scrape, scraper

Constructor Details

#initialize(options) ⇒ Workflow

Initialize list of known workflows on top of



33
34
35
36
# File 'lib/right_scraper/scrapers/workflow.rb', line 33

def initialize(options)
  @known_workflows = []
  super(options)
end

Instance Method Details

#default_buildersObject

List of default builders for this scaper

Return

Array<Builder>

Default builders



85
86
87
# File 'lib/right_scraper/scrapers/workflow.rb', line 85

def default_builders
  [RightScraper::Builders::Filesystem]
end

#default_scannersObject

List of default scanners for this scaper

Return

Array<Scanner>

Default scanners



76
77
78
79
# File 'lib/right_scraper/scrapers/workflow.rb', line 76

def default_scanners
  [RightScraper::Scanners::WorkflowMetadata,
    RightScraper::Scanners::WorkflowManifest]
end

#find_next(dir) ⇒ Object

Find the next workflows, starting in dir.

Parameters

dir(Dir)

directory to begin search in



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/right_scraper/scrapers/workflow.rb', line 42

def find_next(dir)
  @logger.operation(:finding_next_workflow, "in #{dir.path}") do

    # Note: there could be multiple workflow definitions in one directory
    # so we need to record the current position whether we found a workflow
    # or not. The next iteration will search again in the current directory
    # event if we found one. If we don't find one then we call 
    # 'search_dirs' which will recurse in the sub-directories.
    @stack << dir

    def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
    meta_ext = RightScraper::Resources::Workflow::METADATA_EXT
    potentials = Dir[File.join(dir.path, "*#{def_ext}")]
    potentials.reject! { |wdef| !File.exists?(wdef.chomp(File.extname(wdef)) + meta_ext) }
    potentials.reject! { |wdef| @known_workflows.include?(wdef) }
    unless potentials.empty?
      wdef = potentials.first
      relative_def = strip_repo_dir(wdef)
      @logger.operation(:reading_workflow, "#{relative_def}") do
        workflow = RightScraper::Resources::Workflow.new(@repository, relative_def)
        @builder.go(File.dirname(wdef), workflow)
        @known_workflows << wdef
        workflow
      end
    else
      search_dirs
    end
  end
end