Class: GoogleVideo::Client

Inherits:
Record
  • Object
show all
Defined in:
lib/google-video.rb

Overview

The main client object providing interface methods for retrieving information from the Google Video server. Parameters are specified via a hash passed to the object on construction mapping attribute names to their respective values.

Constant Summary collapse

DEFAULT_HOST =

the default hostname queried to retrieve google video content.

'video.google.com'
DEFAULT_AGENT =

the default user agent submitted with http requests of google video.

'google-video for Ruby (http://www.rubyforge.org/projects/google-video/)'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params = nil) ⇒ Client

Constructs a Client for querying the Google Video server. Optional parameters to be specified as a hash include:

  • host: optional alternate host name to query instead of the default host.

  • agent: optional alternate user agent to submit with http requests instead of the default agent.



462
463
464
465
466
467
# File 'lib/google-video.rb', line 462

def initialize (params = nil)
  super(params)

  @host = DEFAULT_HOST if (!@host)
  @agent = DEFAULT_AGENT if (!@agent)
end

Instance Attribute Details

#agentObject (readonly)

the user agent submitted with http requests of google video.



449
450
451
# File 'lib/google-video.rb', line 449

def agent
  @agent
end

#hostObject (readonly)

the hostname queried to retrieve google video content.



446
447
448
# File 'lib/google-video.rb', line 446

def host
  @host
end

Instance Method Details

#top_videos(top_request) ⇒ Object

Looks up top videos on Google Video with the parameters specified in the supplied TopVideosRequest and returns a TopVideosResponse.



698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
# File 'lib/google-video.rb', line 698

def top_videos (top_request)
  # validate parameters
  if !top_request.is_a?(TopVideosRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::TopVideosRequest") 
  end

  # gather response data from the server
  url = _top_videos_url(top_request)
  response = _request(url)
  doc = Hpricot(response)

  # parse out each of the top video entries
  top_videos = []
  # grab the top 100 table rows
  rows = doc/"table[@class='table-top100']/tr"
  # the first row is just header info, so skip it
  rows.shift
  # there's one video per row, so we iterate over the table row elements
  rows.each do |row|
    # break the table cells into logically-named elements we can manipulate more precisely
    (td_movement, td_rank_today, td_rank_yesterday, td_thumbnail, td_detail) = (row/"td")

    # parse the rank movement direction
    movement_html = (td_movement/"img").to_html
    if (movement_html =~ /up\.gif/)
      movement = 1
    elsif (movement_html =~ /down\.gif/)
      movement = -1
    else
      movement = 0
    end

    # parse today and yesterday's rank
    rank_today = td_rank_today.inner_html.to_i
    rank_yesterday = td_rank_yesterday.inner_html.to_i

    # parse the video thumbnail image
    thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
    
    # parse the detailed video info
    a_video = (td_detail%"a")
    page_url = "http://#{@host}#{a_video['href']}"

    # title
    title = _decode_html(a_video.inner_html.strip)

    # stars
    star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")

    # rating count
    span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
    rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

    # duration
    duration = (td_detail%"div[@class='meta']").all_text.gsub(/ /, '').strip

    # description
    description = _decode_html((td_detail).all_text.strip)
    
    # construct the video object
    video = Video.new(:title => title,
                      :page_url => page_url,
                      :thumbnail_image_url => thumbnail_image_url,
                      :star_count => star_count,
                      :rating_count => rating_count,
                      :duration => duration,
                      :description => description)

    # create the top video entry and throw it on the list of top videos
    top_videos << TopVideo.new(:movement => movement,
                               :rank_today => rank_today,
                               :rank_yesterday => rank_yesterday,
                               :video => video)
  end
  
  TopVideosResponse.new(:request_url => url, 
                        :videos => top_videos)
end

#video_details(details_request) ⇒ Object

Looks up detailed information on a specific Video on Google Video with the parameters specified in the supplied VideoDetailsRequest and returns a VideoDetailsResponse.



567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
# File 'lib/google-video.rb', line 567

def video_details (details_request)
  # validate parameters
  if !details_request.is_a?(VideoDetailsRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::VideoDetailsRequest") 
  end

  # gather response data from the server
  url = _video_details_url(details_request)
  response = _request(url)
  doc = Hpricot(response)
  
  # parse title
  title = (doc/"div[@id='pvprogtitle']").inner_html.strip

  # parse description
  font_description = (doc%"div[@id='description']/font")
  description = (font_description) ? font_description.all_text.strip : ''
  span_wholedescr = (doc%"span[@id='wholedescr']")
  if (span_wholedescr)
    description += ' ' + span_wholedescr.all_text.strip
  end
  description = _decode_html(description)
  
  # parse star count
  span_rating = (doc%"span[@id='communityRating']")
  star_count = _parse_star_elements(span_rating/"img[@class='star']")

  # parse rating count
  span_raters = (doc%"span[@id='numOfRaters']")
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

  # parse upload user, duration, upload date, upload user domain, upload
  # user url.  unfortunately this is a bit messy since, unlike much of the
  # rest of google's lovely html, there are no useful id or class names we
  # can hang our hat on.  rather, there are anywhere from one to three
  # rows of text, with only the middle row (in the three-row scenario)
  # containing duration and upload date, omnipresent.  still, we buckle
  # down and have at it with fervor and tenacity.
  duration_etc_html = (doc%"div[@id='durationetc']").inner_html
  duration_parts = duration_etc_html.split(/<br[^>]+>/)
  # see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
  if (duration_parts[0] =~ /\-  [A-Za-z]{3} \d+, \d{4}/)
    # first line is duration / upload_date, and there is no upload username
    upload_user = ''
    duration_upload_html = duration_parts[0]
    upload_user_domain = duration_parts[1]
  else
    upload_user = _decode_html(_clean_string(duration_parts[0]))
    duration_upload_html = duration_parts[1]        
    upload_user_domain = duration_parts[2]
  end

  # parse the duration and upload date
  ( duration, upload_date ) = duration_upload_html.split(/\-/)
  duration = _clean_string(duration)
  upload_date = _parse_date(_clean_string(upload_date))

  # parse the upload user url and domain if present
  if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
    upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
    upload_user_domain = _clean_string($2)
  else
    upload_user_url = ''
    upload_user_domain = ''
  end
  
  # pull out view count and rank info table row elements
  tr_statsall = (doc/"div[@id='statsall']/table/tr")

  # remove the first row which just contains header info
  tr_statsall.shift

  # parse each of the view count and rank rows
  (view_count, rank) = _parse_statsall_row(tr_statsall.shift)
  (view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
  (view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
  (view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)

  # pull out the url to the video .gvp file if prsent
  img_download = (doc%"img[@src='/static/btn_download.gif']")
  if (img_download)
    onclick_html = img_download['onclick']
    onclick_script = _decode_html(onclick_html)
    onclick_script =~ /onDownloadClick\(([^\)]+)\)/
    video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
  else
    video_file_url = ''
  end

  # pull out the video frame thumbnails
  video_frame_thumbnails = []
  (doc/"img[@class='detailsimage']").each do |frame_image|
    video_frame_thumbnails << _parse_video_frame_thumbnail(frame_image)
  end
  
  # pull out the playlist entries
  playlist_entries = []
  table_upnext = (doc%"table[@id='upnexttable']")
  (table_upnext/"tr").each do |tr_playlist|
    playlist_entries << _parse_playlist_entry(tr_playlist)
  end

  # create the new, fully populated video record
  video = Video.new(:description => description,
                    :duration => duration,
                    :page_url => url,
                    :playlist_entries => playlist_entries,
                    :rank => rank,
                    :rank_yesterday => rank_yesterday,
                    :rank_email => rank_email,
                    :rank_embed => rank_embed,
                    :rating_count => rating_count,
                    :star_count => star_count,
                    :title => title,
                    :upload_date => upload_date,
                    :upload_user => upload_user,
                    :upload_user_domain => upload_user_domain,
                    :upload_user_url => upload_user_url,
                    :video_file_url => video_file_url,
                    :video_frame_thumbnails => video_frame_thumbnails,
                    :view_count => view_count,
                    :view_count_yesterday => view_count_yesterday,
                    :view_count_email => view_count_email,
                    :view_count_embed => view_count_embed)

  # build and return the response
  VideoDetailsResponse.new(:request_url => url, :video => video)
end

#video_search(search_request) ⇒ Object

Runs a search query on Google Video with the parameters specified in the supplied VideoSearchRequest and returns a VideoSearchResponse.



471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/google-video.rb', line 471

def video_search (search_request)
  # validate parameters
  if !search_request.is_a?(VideoSearchRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::VideoSearchRequest") 
  end

  # gather response data from the server
  url = _search_url(search_request)
  response = _request(url)
  doc = Hpricot(response)

  # parse the overall search query stats
  row = (doc%"#resultsheadertable/tr/td/font")
  stats_html = row.inner_html

  # see if we've got no results
  regexp_no_results = Regexp.new(/No results \(<b>([0-9.]+)/)
  if regexp_no_results.match(stats_html)
    execution_time = $1.to_f
    # nothing doing, so return a simplified response and we're done
    return VideoSearchResponse.new(:request_url => url,
                                   :start_index => 0,
                                   :end_index => 0,
                                   :total_result_count => 0,
                                   :execution_time => execution_time,
                                   :videos => Array.new)
  end

  # pull out the actual stats now that we know we can
  regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
  if !regexp_stats.match(stats_html)
    # simple sanity-check here, even though we don't bother below, since if this 
    # doesn't work nothing else is likely to operate properly
    raise GoogleVideoException.new("failed to parse search query stats")
  end
  ( start_index, end_index, total_result_count, execution_time ) = [ $1.to_i, $2.to_i, $3.to_i, $4.to_f ]

  # parse the video results
  videos = []
  rows = doc/"table[@class='searchresult']/tr"
  rows.each do |row|
    # parse the thumbnail image
    thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])

    # parse the title and page url
    a_title = (row%"div[@class='resulttitle']/a")
    page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
    title = _decode_html(a_title.inner_html.strip)

    # parse the description text
    description = _decode_html((row%"div[@class='snippet']").inner_html.strip)

    # parse the upload username
    span_channel = (row%"span[@class='channel']")
    channel_html = (span_channel) ? span_channel.inner_html : ''
    channel_html =~ /([^\-]+)/
    upload_user = _decode_html(_clean_string($1))

    # stars
    star_count = _parse_star_elements(row/"img[@class='star']")

    # rating count
    span_raters = (row%"span[@id='numOfRaters']")
    rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

    # duration
    span_date = (row%"span[@class='date']")
    date_html = span_date.inner_html
    date_html =~ /([^\-]+) \- (.*)$/
    duration = _clean_string($1)
    upload_date = _parse_date(_clean_string($2))
    
    # construct the video object and tack it onto the video result list
    videos << Video.new(:title => title,
                        :page_url => page_url,
                        :thumbnail_image_url => thumbnail_image_url,
                        :description => description,
                        :star_count => star_count,
                        :rating_count => rating_count,
                        :duration => duration,
                        :upload_date => upload_date,
                        :upload_user => upload_user)
  end

  # construct the final search response with all info we've gathered
  VideoSearchResponse.new(:request_url => url,
                          :start_index => start_index,
                          :end_index => end_index,
                          :total_result_count => total_result_count,
                          :execution_time => execution_time,
                          :videos => videos)
end