Class: Factbook::Page

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/factbook-readers/page.rb

Constant Summary collapse

SITE_BASE =

standard version (note: requires https)

'https://www.cia.gov/library/publications/the-world-factbook/geos/{code}.html'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(code = nil, json: nil, html: nil, cache: false, info: nil) ⇒ Page

Returns a new instance of Page.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/factbook-readers/page.rb', line 46

def initialize( code=nil,
                json: nil,
                html: nil,
                cache: false,
                info: nil )
  if json
     ## note: assumes json is (still) a string/text
     ##        (NOT yet parsed to structured data)
    b = JsonBuilder.new( json )
  else  ## assume html
    if html
      ## for debugging and testing allow "custom" passed-in html page
    else
      ## allow passing in code struct too - just use/pluck two-letter code from struct !!!
      code = code.code   if code.is_a?( Codes::Code )

      raise ArgumentError, "two letter code (e.g. au) required to download page & build page url"   if code.nil?
      url = SITE_BASE.sub( '{code}', code )

      html = if cache && Webcache.exist?( url )
                 Webcache.read( url )  ## for debugging - read from cache
             else
                 download_page( url )
             end
    end
    b = Builder.new( html )
  end

  @sects = b.sects
  @info  = b.info

  ## todo/fix/quick hack:
  ##  check for info opts - lets you overwrite page info
  ##  -- use proper header to setup page info - why, why not??
  @info = info    if info


  @data = {}
  @sects.each do |sect|
    @data[ sect.title ] = sect.data
  end
end

Instance Attribute Details

#dataObject (readonly)

“plain” access with vanilla hash



10
11
12
# File 'lib/factbook-readers/page.rb', line 10

def data
  @data
end

#infoObject (readonly)

meta info e.g. country_code, country_name, region_name, last_updated, etc.



9
10
11
# File 'lib/factbook-readers/page.rb', line 9

def info
  @info
end

#sectsObject (readonly)

“structured” access e.g. sects/subsects/etc.



8
9
10
# File 'lib/factbook-readers/page.rb', line 8

def sects
  @sects
end

Class Method Details

.download(code, cache: false) ⇒ Object



35
36
37
# File 'lib/factbook-readers/page.rb', line 35

def self.download( code, cache: false )
  new( code, cache: cache )
end

.parse(html) ⇒ Object Also known as: parse_html

parse html from string



17
18
19
# File 'lib/factbook-readers/page.rb', line 17

def self.parse( html )   ## parse html from string
  new( html: html )
end

.parse_json(json) ⇒ Object

parse json from string



26
27
28
# File 'lib/factbook-readers/page.rb', line 26

def self.parse_json( json )  ## parse json from string
  new( json: json )
end

.read(path) ⇒ Object Also known as: read_html



21
22
23
24
# File 'lib/factbook-readers/page.rb', line 21

def self.read( path )
  html = File.open( path, 'r:utf-8' ) { |f| f.read }
  new( html: html )
end

.read_json(path) ⇒ Object



30
31
32
33
# File 'lib/factbook-readers/page.rb', line 30

def self.read_json( path )
  json = File.open( path, 'r:utf-8' ) { |f| f.read }
  new( json: json )
end

Instance Method Details

#[](key) ⇒ Object

convenience shortcut



99
100
101
102
103
104
105
106
107
108
# File 'lib/factbook-readers/page.rb', line 99

def [](key)  ### convenience shortcut
  # lets you use
  #   page['geo']
  #   instead of
  #   page.data['geo']

  ##  fix: use delegate data, [] from forwardable lib - why?? why not??

  data[key]
end

#to_json(minify: false) ⇒ Object

convenience helper for data.to_json; note: pretty print by default!



90
91
92
93
94
95
96
# File 'lib/factbook-readers/page.rb', line 90

def to_json( minify: false )  ## convenience helper for data.to_json; note: pretty print by default!
  if minify
    data.to_json
  else ## note: pretty print by default!
    JSON.pretty_generate( data )
  end
end