Module: ScrapedPageArchive

Extended by:
ScrapedPageArchive
Included in:
ScrapedPageArchive
Defined in:
lib/scraped_page_archive.rb,
lib/scraped_page_archive/version.rb

Constant Summary collapse

VERSION =
'0.1.0'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#github_repo_urlObject



77
78
79
# File 'lib/scraped_page_archive.rb', line 77

def github_repo_url
  @github_repo_url ||= (git_remote_get_url_origin || ENV['MORPH_SCRAPER_CACHE_GITHUB_REPO_URL'])
end

Instance Method Details

#branch_nameObject

TODO: This should be configurable.



57
58
59
# File 'lib/scraped_page_archive.rb', line 57

def branch_name
  @branch_name ||= 'scraped-pages-archive'
end

#gitObject



61
62
63
# File 'lib/scraped_page_archive.rb', line 61

def git
  @git ||= Git.clone(git_url, tmpdir)
end

#git_remote_get_url_originObject



81
82
83
84
85
86
# File 'lib/scraped_page_archive.rb', line 81

def git_remote_get_url_origin
  @git_remote_get_url_origin ||= begin
    remote_url = `git remote get-url origin`.chomp
    remote_url.empty? ? nil : remote_url
  end
end

#git_urlObject



69
70
71
72
73
74
75
# File 'lib/scraped_page_archive.rb', line 69

def git_url
  @git_url ||= begin
    url = URI.parse(github_repo_url)
    url.password = ENV['SCRAPED_PAGE_ARCHIVE_GITHUB_TOKEN']
    url.to_s
  end
end

#record(&block) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/scraped_page_archive.rb', line 18

def record(&block)
  if github_repo_url.nil?
    warn "Could not determine git repo for 'scraped_page_archive' to use.\n\n" \
      "See https://github.com/everypolitician/scraped_page_archive#usage for details."
    return block.call
  end
  VCR::Archive::Persister.storage_location = git.dir.path
  if git.branches[branch_name] || git.branches["origin/#{branch_name}"]
    git.checkout(branch_name)
  else
    git.chdir do
      # FIXME: It's not currently possible to create an orphan branch with ruby-git
      # @see https://github.com/schacon/ruby-git/pull/140
      system("git checkout --orphan #{branch_name}")
      system("git rm --quiet -rf .")
    end
    git.commit("Initial commit", allow_empty: true)
  end
  ret = VCR.use_cassette('', &block)

  # NOTE: This is a workaround for a ruby-git bug.
  # @see https://github.com/schacon/ruby-git/issues/23
  git.status.changed.each { git.diff.entries }

  files = (git.status.changed.keys + git.status.untracked.keys)
  return ret unless files.any?
  # For each interaction, commit the yml and html along with the correct commit message.
  files.find_all { |f| f.end_with?('.yml') }.each do |f|
    interaction = git.chdir { YAML.load_file(f) }
    message = "#{interaction['response']['status'].values_at('code', 'message').join(' ')} #{interaction['request']['uri']}"
    git.add([f, f.sub(/\.yml$/, '.html')])
    git.commit(message) rescue binding.pry
  end
  # FIXME: Auto-pushing should be optional if the user wants to manually do it at the end.
  git.push('origin', branch_name)
  ret
end

#tmpdirObject



65
66
67
# File 'lib/scraped_page_archive.rb', line 65

def tmpdir
  @tmpdir ||= Dir.mktmpdir
end