Class: TagCrawler::Main

Inherits:
Object
  • Object
show all
Defined in:
lib/tag_crawler.rb

Instance Method Summary collapse

Instance Method Details

#runObject



11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/tag_crawler.rb', line 11

def run
	validate_args
	scraper = TagCrawler::WebScraper.new(@url)
	puts "getting links..."
	links = scraper.get_links
	puts "getting tags..."
	tags = scraper.get_tags
	puts "getting sequences..."
	sequences = scraper.get_sequences
	puts "creating file..."
	file_writer = TagCrawler::FileWriter.new(ARGV[1])
	file_writer.write(links, tags, sequences)
end

#validate_argsObject



25
26
27
28
29
30
31
# File 'lib/tag_crawler.rb', line 25

def validate_args
	if ARGV.length != 2
		puts "Error: You must provide 2 arguments: a url and a filename."
		exit
	end
	validate_url(ARGV[0])
end

#validate_url(url) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/tag_crawler.rb', line 33

def validate_url(url)
	@url = url
	url_protocol = @url.match(PROTOCOL)
	# if no protocol, add http://
	if(url_protocol.nil?)
		@url = "http://" + @url
	# print error if not valid protocol
	elsif(!VALID_PROTOCOLS.include?(url_protocol.pre_match))
		puts "Error: URL must have http or https protocol"
		exit
	end
end