Class: UKPlanningScraper::Authority
- Inherits:
-
Object
- Object
- UKPlanningScraper::Authority
- Defined in:
- lib/uk_planning_scraper/idox.rb,
lib/uk_planning_scraper/authority.rb,
lib/uk_planning_scraper/northgate.rb,
lib/uk_planning_scraper/authority_scrape_params.rb
Constant Summary collapse
[]
Instance Attribute Summary collapse
-
#name ⇒ Object
readonly
Returns the value of attribute name.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Class Method Summary collapse
- .all ⇒ Object
- .load ⇒ Object
- .named(name) ⇒ Object
-
.not_tagged(tag) ⇒ Object
Not tagged x.
-
.tagged(tag) ⇒ Object
Tagged x.
-
.tags ⇒ Object
List all the tags in use.
-
.untagged ⇒ Object
Authorities with no tags.
Instance Method Summary collapse
-
#add_tag(tag) ⇒ Object
Add a single tag to existing tags.
-
#add_tags(tags) ⇒ Object
Add multiple tags to existing tags.
- #applicant_name(s) ⇒ Object
- #application_type(s) ⇒ Object
- #decided_days(n) ⇒ Object
- #development_type(s) ⇒ Object
-
#initialize(name, url) ⇒ Authority
constructor
A new instance of Authority.
- #received_days(n) ⇒ Object
- #scrape(options = {}) ⇒ Object
- #system ⇒ Object
- #tagged?(tag) ⇒ Boolean
- #tags ⇒ Object
-
#validated_days(n) ⇒ Object
Parameter methods for Authority#scrape Desgined to be method chained, eg:.
Constructor Details
#initialize(name, url) ⇒ Authority
Returns a new instance of Authority.
9 10 11 12 13 14 15 |
# File 'lib/uk_planning_scraper/authority.rb', line 9 def initialize(name, url) @name = name.strip @url = url.strip @tags = [] # Strings in arbitrary order @applications = [] # Application objects @scrape_params = {} end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(method_name, *args) ⇒ Object (private)
Handle the simple params with this
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 90 def method_missing(method_name, *args) sc_params = { validated_from: Date, validated_to: Date, received_from: Date, received_to: Date, decided_from: Date, decided_to: Date, keywords: String } value = args[0] if sc_params[method_name] check_class(value, sc_params[method_name], method_name.to_s) value.strip! if value.class == String if value.class == Date && value > Date.today raise ArgumentError.new("#{method_name} can't be a date in the " + \ "future (#{value.to_s})") end @scrape_params[method_name] = value self else raise NoMethodError.new(method_name.to_s) end end |
Instance Attribute Details
#name ⇒ Object (readonly)
Returns the value of attribute name.
5 6 7 |
# File 'lib/uk_planning_scraper/authority.rb', line 5 def name @name end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
5 6 7 |
# File 'lib/uk_planning_scraper/authority.rb', line 5 def url @url end |
Class Method Details
.all ⇒ Object
84 85 86 |
# File 'lib/uk_planning_scraper/authority.rb', line 84 def self.all @@authorities end |
.load ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/uk_planning_scraper/authority.rb', line 122 def self.load # Don't run this method more than once return unless @@authorities.empty? CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \ 'authorities.csv'), :headers => true) do |line| auth = Authority.new(line['authority_name'], line['url']) if line['tags'] auth.(line['tags'].split(/\s+/)) end auth.add_tag(auth.system) @@authorities << auth end end |
.named(name) ⇒ Object
95 96 97 98 99 |
# File 'lib/uk_planning_scraper/authority.rb', line 95 def self.named(name) = @@authorities.find { |a| name == a.name } raise AuthorityNotFound if .nil? end |
.not_tagged(tag) ⇒ Object
Not tagged x
109 110 111 112 113 |
# File 'lib/uk_planning_scraper/authority.rb', line 109 def self.not_tagged(tag) found = [] @@authorities.each { |a| found << a unless a.tagged?(tag) } found end |
.tagged(tag) ⇒ Object
Tagged x
102 103 104 105 106 |
# File 'lib/uk_planning_scraper/authority.rb', line 102 def self.tagged(tag) found = [] @@authorities.each { |a| found << a if a.tagged?(tag) } found end |
.tags ⇒ Object
List all the tags in use
89 90 91 92 93 |
# File 'lib/uk_planning_scraper/authority.rb', line 89 def self. = [] @@authorities.each { |a| << a. } .flatten.uniq.sort end |
.untagged ⇒ Object
Authorities with no tags
116 117 118 119 120 |
# File 'lib/uk_planning_scraper/authority.rb', line 116 def self.untagged found = [] @@authorities.each { |a| found << a if a..empty? } found end |
Instance Method Details
#add_tag(tag) ⇒ Object
Add a single tag to existing tags
61 62 63 64 |
# File 'lib/uk_planning_scraper/authority.rb', line 61 def add_tag(tag) clean_tag = tag.strip.downcase.gsub(' ', '') @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates end |
#add_tags(tags) ⇒ Object
Add multiple tags to existing tags
56 57 58 |
# File 'lib/uk_planning_scraper/authority.rb', line 56 def () .each { |t| add_tag(t) } end |
#applicant_name(s) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 54 def applicant_name(s) unless system == 'idox' raise NoMethodError.new("applicant_name is only implemented for Idox. \ This authority (#{@name}) is #{system.capitalize}.") end check_class(s, String) @scrape_params[:applicant_name] = s.strip self end |
#application_type(s) ⇒ Object
65 66 67 68 69 70 71 72 73 74 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 65 def application_type(s) unless system == 'idox' raise NoMethodError.new("application_type is only implemented for \ Idox. This authority (#{@name}) is #{system.capitalize}.") end check_class(s, String) @scrape_params[:application_type] = s.strip self end |
#decided_days(n) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 40 def decided_days(n) # decided within the last n days # Assumes that every scraper/system can do a date range search check_class(n, Fixnum) unless n > 0 raise ArgumentError.new("decided_days must be greater than 0") end decided_from(Date.today - (n - 1)) decided_to(Date.today) self end |
#development_type(s) ⇒ Object
76 77 78 79 80 81 82 83 84 85 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 76 def development_type(s) unless system == 'idox' raise NoMethodError.new("development_type is only implemented for \ Idox. This authority (#{@name}) is #{system.capitalize}.") end check_class(s, String) @scrape_params[:development_type] = s.strip self end |
#received_days(n) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 26 def received_days(n) # received within the last n days # Assumes that every scraper/system can do a date range search check_class(n, Fixnum) unless n > 0 raise ArgumentError.new("received_days must be greater than 0") end received_from(Date.today - (n - 1)) received_to(Date.today) self end |
#scrape(options = {}) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/uk_planning_scraper/authority.rb', line 17 def scrape( = {}) = { delay: 10, } # The user-supplied options override the defaults = .merge() # Select which scraper to use case system when 'idox' @applications = scrape_idox(@scrape_params, ) when 'northgate' @applications = scrape_northgate(@scrape_params, ) else raise SystemNotSupported.new("Planning system not supported for \ #{@name} at URL: #{@url}") end # Post processing @applications.each do |app| app. = @name end # Output as an array of hashes output = [] # FIXME - silently ignores invalid apps. How should we handle them? @applications.each { |app| output << app.to_hash if app.valid? } # Reset so that old params don't get used for new scrapes clear_scrape_params output # Single point of successful exit end |
#system ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/uk_planning_scraper/authority.rb', line 70 def system if @url.match(/search\.do\?action=advanced/i) 'idox' elsif @url.match(/generalsearch\.aspx/i) 'northgate' elsif @url.match(/ocellaweb/i) 'ocellaweb' elsif @url.match(/\/apas\//) 'agileplanning' else 'unknownsystem' end end |
#tagged?(tag) ⇒ Boolean
66 67 68 |
# File 'lib/uk_planning_scraper/authority.rb', line 66 def tagged?(tag) @tags.include?(tag) end |
#tags ⇒ Object
51 52 53 |
# File 'lib/uk_planning_scraper/authority.rb', line 51 def @tags.sort end |
#validated_days(n) ⇒ Object
Parameter methods for Authority#scrape Desgined to be method chained, eg:
applications = UKPlanningScraper::Authority.named(“Barnet”). \ development_type(“Q22”).keywords(“illuminat”). \ validated_days(30).scrape
12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 12 def validated_days(n) # Validated within the last n days # Assumes that every scraper/system can do a date range search check_class(n, Fixnum) unless n > 0 raise ArgumentError.new("validated_days must be greater than 0") end validated_from(Date.today - (n - 1)) validated_to(Date.today) self end |