Module: Unisat::Puppeteer

Defined in:
lib/unisat.rb

Class Method Summary collapse

Class Method Details

.cacheObject

shortcut helper



37
# File 'lib/unisat.rb', line 37

def self.cache()  Unisat.cache; end

.chrome_pathObject



30
# File 'lib/unisat.rb', line 30

def self.chrome_path() @chrome_path; end

.chrome_path=(path) ⇒ Object

todo: use a config block in the future - why? why not?



19
20
21
22
23
24
25
26
27
28
# File 'lib/unisat.rb', line 19

def self.chrome_path=( path )
  if File.exist?( path )
    puts "** bingo! found chrome executable @ path >#{path}<"
  else
    puts "*** ERROR - sorry; cannot find chrome executable @ path >#{path}<"
    exit 1
  end

  @chrome_path = path
end

.search(q, offset: 1, limit: 1, force: false) ⇒ Object

search



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/unisat.rb', line 42

def self.search( q, offset: 1, 
                    limit: 1,
                    force: false )

   opts = {}
   opts[:headless]        = false
   opts[:executable_path] = chrome_path  if chrome_path   ## add only if set (default is nil)


  ::Puppeteer.launch( **opts ) do |browser|

    page = browser.new_page
 
    limit.times do |i|
      count = offset+i

      page_url  = "https://unisat.io/search?q=#{q}&type=text&p=#{count}"
      print page_url
     
      ## check if already in cache
      if force == false && cache.exist?( q, offset: count )
        print "...in cache\n"
        next 
      end

      print "... goto page ...\n"
      response = page.goto( page_url )
      pp response.headers

      puts "sleeping #{wait_in_s} sec(s)..."
      sleep( wait_in_s )
  
      ## print search result summary / counts
      page.wait_for_selector('div.result-notice')
      el =  page.query_selector("div.result-notice")
      puts
      puts el.evaluate("el => el.innerText")
      #=> Searched for "biixel" among 3252336 records, found 1105 results.

 
      ## print pagination 
      page.wait_for_selector('ul.ant-pagination')
      el =  page.query_selector("ul.ant-pagination")
      puts
      puts el.evaluate("el => el.innerText")
      #=> 1 ••• 32 33 34 35

      
      ## get search results (32 inscribes per page)
      page.wait_for_selector('div.sats-list')
      el =  page.query_selector("div.sats-list")
  
      html = el.evaluate("el => el.innerHTML")
      # puts
      # puts html
   
      cache.add_page( html, q, 
                            offset: count )
    end

    puts "sleeping 2 secs before shutdown..."
    sleep( 2 )
  end
end

.wait_in_sObject



34
# File 'lib/unisat.rb', line 34

def self.wait_in_s() @wait_in_s ||= 10; end

.wait_in_s=(s) ⇒ Object

use/rename to wait_in_secs - why? why not?



33
# File 'lib/unisat.rb', line 33

def self.wait_in_s=( s ) @wait_in_s = s; end