Class: UKPlanningScraper::Authority

Inherits:
Object
  • Object
show all
Defined in:
lib/uk_planning_scraper/idox.rb,
lib/uk_planning_scraper/authority.rb,
lib/uk_planning_scraper/northgate.rb,
lib/uk_planning_scraper/authority_scrape_params.rb

Constant Summary collapse

@@authorities =
[]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, url) ⇒ Authority

Returns a new instance of Authority.



9
10
11
12
13
14
15
# File 'lib/uk_planning_scraper/authority.rb', line 9

def initialize(name, url)
  @name = name.strip
  @url = url.strip
  @tags = [] # Strings in arbitrary order
  @applications = [] # Application objects
  @scrape_params = {}
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method_name, *args) ⇒ Object (private)

Handle the simple params with this



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 107

def method_missing(method_name, *args)
  sc_params = {
    validated_from: Date,
    validated_to: Date,
    received_from: Date,
    received_to: Date,
    decided_from: Date,
    decided_to: Date,
    keywords: String
  }
  
  value = args[0]
  
  if sc_params[method_name]
    check_class(value, sc_params[method_name], method_name.to_s)
    value.strip! if value.class == String
    
    if value.class == Date && value > Date.today
      raise ArgumentError.new("#{method_name} can't be a date in the " + \
        "future (#{value.to_s})")
    end
    
    @scrape_params[method_name] = value
    self
  else
    raise NoMethodError.new(method_name.to_s)
  end
end

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



5
6
7
# File 'lib/uk_planning_scraper/authority.rb', line 5

def name
  @name
end

#urlObject (readonly)

Returns the value of attribute url.



5
6
7
# File 'lib/uk_planning_scraper/authority.rb', line 5

def url
  @url
end

Class Method Details

.allObject



84
85
86
# File 'lib/uk_planning_scraper/authority.rb', line 84

def self.all
  @@authorities
end

.loadObject



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/uk_planning_scraper/authority.rb', line 122

def self.load
  # Don't run this method more than once
  return unless @@authorities.empty?
  CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \
      'authorities.csv'), :headers => true) do |line|
    auth = Authority.new(line['authority_name'], line['url'])
    
    if line['tags']
      auth.add_tags(line['tags'].split(/\s+/))
    end
    
    auth.add_tag(auth.system)
    @@authorities << auth
  end
end

.named(name) ⇒ Object

Raises:



95
96
97
98
99
# File 'lib/uk_planning_scraper/authority.rb', line 95

def self.named(name)
  authority = @@authorities.find { |a| name == a.name }
  raise AuthorityNotFound if authority.nil?
  authority 
end

.not_tagged(tag) ⇒ Object

Not tagged x



109
110
111
112
113
# File 'lib/uk_planning_scraper/authority.rb', line 109

def self.not_tagged(tag)
  found = []
  @@authorities.each { |a| found << a unless a.tagged?(tag) }
  found
end

.tagged(tag) ⇒ Object

Tagged x



102
103
104
105
106
# File 'lib/uk_planning_scraper/authority.rb', line 102

def self.tagged(tag)
  found = []
  @@authorities.each { |a| found << a if a.tagged?(tag) }
  found
end

.tagsObject

List all the tags in use



89
90
91
92
93
# File 'lib/uk_planning_scraper/authority.rb', line 89

def self.tags
  tags = []
  @@authorities.each { |a| tags << a.tags }
  tags.flatten.uniq.sort
end

.untaggedObject

Authorities with no tags



116
117
118
119
120
# File 'lib/uk_planning_scraper/authority.rb', line 116

def self.untagged
  found = []
  @@authorities.each { |a| found << a if a.tags.empty? }
  found
end

Instance Method Details

#add_tag(tag) ⇒ Object

Add a single tag to existing tags



61
62
63
64
# File 'lib/uk_planning_scraper/authority.rb', line 61

def add_tag(tag)
  clean_tag = tag.strip.downcase.gsub(' ', '')
  @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates
end

#add_tags(tags) ⇒ Object

Add multiple tags to existing tags



56
57
58
# File 'lib/uk_planning_scraper/authority.rb', line 56

def add_tags(tags)
  tags.each { |t| add_tag(t) }
end

#applicant_name(s) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 54

def applicant_name(s)
  unless system == 'idox'
    raise NoMethodError.new("applicant_name is only implemented for Idox. \
      This authority (#{@name}) is #{system.capitalize}.")
  end
  
  check_class(s, String)
  @scrape_params[:applicant_name] = s.strip
  self
end

#application_type(s) ⇒ Object



76
77
78
79
80
81
82
83
84
85
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 76

def application_type(s)
  unless system == 'idox'
    raise NoMethodError.new("application_type is only implemented for \
      Idox. This authority (#{@name}) is #{system.capitalize}.")
  end
  
  check_class(s, String)
  @scrape_params[:application_type] = s.strip
  self
end

#case_officer_code(s) ⇒ Object



65
66
67
68
69
70
71
72
73
74
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 65

def case_officer_code(s)
  unless system == 'northgate'
    raise NoMethodError.new("case_officer_code is only implemented for Northgate. \
      This authority (#{@name}) is #{system.capitalize}.")
  end
  
  check_class(s, String)
  @scrape_params[:case_officer_code] = s.strip
  self
end

#decided_days(n) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 40

def decided_days(n)
  # decided within the last n days
  # Assumes that every scraper/system can do a date range search
  check_class(n, Fixnum)

  unless n > 0
    raise ArgumentError.new("decided_days must be greater than 0")
  end
  
  decided_from(Date.today - (n - 1))
  decided_to(Date.today)
  self
end

#development_type(s) ⇒ Object



87
88
89
90
91
92
93
94
95
96
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 87

def development_type(s)
  unless system == 'idox'
    raise NoMethodError.new("development_type is only implemented for \
      Idox. This authority (#{@name}) is #{system.capitalize}.")
  end
  
  check_class(s, String)
  @scrape_params[:development_type] = s.strip
  self
end

#received_days(n) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 26

def received_days(n)
  # received within the last n days
  # Assumes that every scraper/system can do a date range search
  check_class(n, Fixnum)

  unless n > 0
    raise ArgumentError.new("received_days must be greater than 0")
  end
  
  received_from(Date.today - (n - 1))
  received_to(Date.today)
  self
end

#scrape(options = {}) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/uk_planning_scraper/authority.rb', line 17

def scrape(options = {})
  default_options = {
    delay: 10,
  }
  # The user-supplied options override the defaults
  options = default_options.merge(options)

  # Select which scraper to use
  case system
  when 'idox'
    @applications = scrape_idox(@scrape_params, options)
  when 'northgate'
    @applications = scrape_northgate(@scrape_params, options)
  else
    raise SystemNotSupported.new("Planning system not supported for \
      #{@name} at URL: #{@url}")
  end
  
  # Post processing
  @applications.each do |app|
    app.authority_name = @name
  end

  # Output as an array of hashes
  output = []
  # FIXME - silently ignores invalid apps. How should we handle them?
  @applications.each { |app| output << app.to_hash if app.valid? }
  
  # Reset so that old params don't get used for new scrapes
  clear_scrape_params
  
  output  # Single point of successful exit
end

#status(s) ⇒ Object



98
99
100
101
102
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 98

def status(s)
  check_class(s, String)
  @scrape_params[:status] = s.strip
  self
end

#systemObject



70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/uk_planning_scraper/authority.rb', line 70

def system
  if @url.match(/search\.do\?action=advanced/i)
    'idox'
  elsif @url.match(/\.aspx/i)
    'northgate'
  elsif @url.match(/ocellaweb/i)
    'ocellaweb'
  elsif @url.match(/\/apas\//)
    'agileplanning'
  else
    'unknownsystem'
  end
end

#tagged?(tag) ⇒ Boolean

Returns:

  • (Boolean)


66
67
68
# File 'lib/uk_planning_scraper/authority.rb', line 66

def tagged?(tag)
  @tags.include?(tag)
end

#tagsObject



51
52
53
# File 'lib/uk_planning_scraper/authority.rb', line 51

def tags
  @tags.sort
end

#validated_days(n) ⇒ Object

Parameter methods for Authority#scrape Desgined to be method chained, eg:

applications = UKPlanningScraper::Authority.named(“Barnet”). \ development_type(“Q22”).keywords(“illuminat”). \ validated_days(30).scrape



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/uk_planning_scraper/authority_scrape_params.rb', line 12

def validated_days(n)
  # Validated within the last n days
  # Assumes that every scraper/system can do a date range search
  check_class(n, Fixnum)

  unless n > 0
    raise ArgumentError.new("validated_days must be greater than 0")
  end
  
  validated_from(Date.today - (n - 1))
  validated_to(Date.today)
  self
end