Class: RightScraper::Scrapers::Workflow

Inherits:
Base show all
Defined in:
lib/right_scraper/scrapers/workflow.rb

Overview

Workflow scraper

Instance Attribute Summary

Attributes inherited from Base

#resources

Instance Method Summary collapse

Methods inherited from Base

#close, #next_resource, registration_module, #scrape, scraper

Methods inherited from RegisteredBase

query_registered_type, register_class, register_self, registered_types, registration_module

Constructor Details

#initialize(options) ⇒ Workflow

Initialize list of known workflows on top of



35
36
37
38
# File 'lib/right_scraper/scrapers/workflow.rb', line 35

def initialize(options)
  @known_workflows = []
  super(options)
end

Instance Method Details

#default_buildersObject

List of default builders for this scaper

Return

Array<Builder>

Default builders



87
88
89
# File 'lib/right_scraper/scrapers/workflow.rb', line 87

def default_builders
  [RightScraper::Builders::Filesystem]
end

#default_scannersObject

List of default scanners for this scaper

Return

Array<Scanner>

Default scanners



78
79
80
81
# File 'lib/right_scraper/scrapers/workflow.rb', line 78

def default_scanners
  [RightScraper::Scanners::,
    RightScraper::Scanners::WorkflowManifest]
end

#find_next(dir) ⇒ Object

Find the next workflows, starting in dir.

Parameters

dir(Dir)

directory to begin search in



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/right_scraper/scrapers/workflow.rb', line 44

def find_next(dir)
  @logger.operation(:finding_next_workflow, "in #{dir.path}") do

    # Note: there could be multiple workflow definitions in one directory
    # so we need to record the current position whether we found a workflow
    # or not. The next iteration will search again in the current directory
    # event if we found one. If we don't find one then we call
    # 'search_dirs' which will recurse in the sub-directories.
    @stack << dir

    def_ext = RightScraper::Resources::Workflow::DEFINITION_EXT
    meta_ext = RightScraper::Resources::Workflow::
    potentials = Dir[File.join(dir.path, "*#{def_ext}")]
    potentials.reject! { |wdef| !File.exists?(wdef.chomp(File.extname(wdef)) + meta_ext) }
    potentials.reject! { |wdef| @known_workflows.include?(wdef) }
    unless potentials.empty?
      wdef = potentials.first
      relative_def = strip_repo_dir(wdef)
      @logger.operation(:reading_workflow, "#{relative_def}") do
        workflow = RightScraper::Resources::Workflow.new(@repository, relative_def)
        @builder.go(File.dirname(wdef), workflow)
        @known_workflows << wdef
        workflow
      end
    else
      search_dirs
    end
  end
end