Class: Grabepg::GrabTvsou
- Inherits:
-
Object
- Object
- Grabepg::GrabTvsou
- Includes:
- Grabepg
- Defined in:
- lib/grabepg/grab_tvsou.rb
Constant Summary collapse
- ChannelTypeMap =
{"yg_ys_li"=>"央视","yg_ws_li"=>"卫视","yg_hw_li"=>"海外","yg_df_li"=>"地方"}
Instance Attribute Summary collapse
-
#channels ⇒ Object
readonly
频道存储.
-
#default_min_interval ⇒ Object
readonly
俩个节目间的最小间隔时间.
-
#grabbase ⇒ Object
readonly
Returns the value of attribute grabbase.
-
#home_page ⇒ Object
readonly
首页.
-
#proxy_list ⇒ Object
readonly
代理列表.
-
#schedules ⇒ Object
readonly
时间表存储.
Instance Method Summary collapse
-
#dispose_channel_page(url, channel_type) ⇒ Object
获取频道列表 url是获取频道列表的首页 地方需要调用此函数.
-
#dispose_home_page ⇒ Object
对首页进行处理获取部分频道的URL和嘻嘻.
-
#dispose_href_schedule_data(href, start_time, use_time) ⇒ Object
获取频道时间表URL.
-
#dispose_schedule_page(url, start_time, use_time) ⇒ Object
根据URL解析时间表页面.
-
#dispose_show_info(url) ⇒ Object
解析节目详情页面.
-
#get_channel_logo(_url, channel_type, no_dis = false) ⇒ Object
获取频道图标地址 url 手机表的URL值 channel_type 频道类型 no_dis 直接使用URL 不处理.
-
#get_data(start_time, use_time) ⇒ Object
获取时间 start_time 时间起始点 use_time 天数.
- #get_data_year_month_day(time) ⇒ Object
- #get_proxy_list ⇒ Object
-
#get_url(type) ⇒ Object
获取从tvsou的什么网站上获取 type: mobile,webpage.
-
#initialize(grabtype, proxy_list) ⇒ GrabTvsou
constructor
type 从mobie还是网站接口抓取数据.
Constructor Details
Instance Attribute Details
#channels ⇒ Object (readonly)
频道存储
19 20 21 |
# File 'lib/grabepg/grab_tvsou.rb', line 19 def channels @channels end |
#default_min_interval ⇒ Object (readonly)
俩个节目间的最小间隔时间
25 26 27 |
# File 'lib/grabepg/grab_tvsou.rb', line 25 def default_min_interval @default_min_interval end |
#grabbase ⇒ Object (readonly)
Returns the value of attribute grabbase.
16 17 18 |
# File 'lib/grabepg/grab_tvsou.rb', line 16 def grabbase @grabbase end |
#home_page ⇒ Object (readonly)
首页
11 12 13 |
# File 'lib/grabepg/grab_tvsou.rb', line 11 def home_page @home_page end |
#proxy_list ⇒ Object (readonly)
代理列表
14 15 16 |
# File 'lib/grabepg/grab_tvsou.rb', line 14 def proxy_list @proxy_list end |
#schedules ⇒ Object (readonly)
时间表存储
22 23 24 |
# File 'lib/grabepg/grab_tvsou.rb', line 22 def schedules @schedules end |
Instance Method Details
#dispose_channel_page(url, channel_type) ⇒ Object
获取频道列表 url是获取频道列表的首页 地方需要调用此函数
118 119 120 |
# File 'lib/grabepg/grab_tvsou.rb', line 118 def dispose_channel_page(url,channel_type) end |
#dispose_home_page ⇒ Object
对首页进行处理获取部分频道的URL和嘻嘻
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/grabepg/grab_tvsou.rb', line 77 def dispose_home_page get_channellist = lambda { |li,type| channellist = {} li.css('a').each do |a| channellist.merge!({a.content=>{url:a.get_attribute("href"),type:type}}) unless channellist.has_key?(a.content) end channellist } doc = @grabbase.get_doc_with_proxy(@proxy_list,@home_page) begin doc.css("li").each do |li| case ChannelTypeMap[li.get_attribute("class")] when "央视" @channels.merge!(get_channellist.call(li,"CCTV")) when "卫视" @channels.merge!(get_channellist.call(li,"WTV")) when "海外" when "地方" end end @error_num=0 rescue unless @error_num @error_num = 0 end @error_num+=1 raise err.to_s if @error_num==5 dispose_home_page end return @channels end |
#dispose_href_schedule_data(href, start_time, use_time) ⇒ Object
获取频道时间表URL
147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/grabepg/grab_tvsou.rb', line 147 def dispose_href_schedule_data(href,start_time,use_time) hrefs=href.split("&programDT=") _hrefs=hrefs[1].split("&") ret = [] get_data(start_time,use_time).each do |time| _hrefs[0]=time[:time] url = hrefs[0]+"&programDT=" + time[:time] 1.upto(_hrefs.length-1).each do |i| url += "&"+_hrefs[i] end ret<<{url:url,time:time[:time],date:time[:date]} end ret end |
#dispose_schedule_page(url, start_time, use_time) ⇒ Object
根据URL解析时间表页面
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/grabepg/grab_tvsou.rb', line 163 def dispose_schedule_page(url,start_time,use_time) url = url urls = url.split("?") begin doc = @grabbase.get_doc_with_proxy(@proxy_list,url) @error_num = 0 _url = doc.css("div[class='week']")[0].css('a')[0].get_attribute("href") rescue => err unless @error_num @error_num = 0 end @error_num+=1 raise err.to_s if @error_num==5 dispose_schedule_page(url,start_time,use_time) end _url = urls[0]+_url urls = dispose_href_schedule_data(_url,start_time,use_time) ret = {} last_time = -5 last_schedule = {} urls.each do |url| p "Grab url: #{url}" if url doc = @grabbase.get_doc_with_proxy(@proxy_list,url[:url]) schedules = [] div = doc.css('div[class="time"]')[0] if div div.css("li[class='gray']").each do |schedule| begin _dispose = schedule.content _dispose_show =schedule.css("span")[0].text time = _dispose.gsub(_dispose_show,"") href =schedule.css('a')[schedule.css('a').count-1].get_attribute("href") _url = @site+"/" + href if schedule.css('a')[0] schedules << {time:time,schedule_name:_dispose_show.delete(" 剧情"),url:_url} now = time.gsub(":","").to_i if((now-last_time)<5) schedules.delete(last_schedule) end last_schedule = {time:time,schedule_name:_dispose_show.gsub(" 剧情",""),url:_url} last_time = now rescue => err p "Schedule: #{schedule}" end end ret.merge!({url[:date]=>schedules}) else p "Error In this url: #{url} couldn't get doc.css('div[class=time]')[0]" end end end return ret end |
#dispose_show_info(url) ⇒ Object
解析节目详情页面
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# File 'lib/grabepg/grab_tvsou.rb', line 222 def dispose_show_info(url) doc = @grabbase.get_doc_with_proxy(@proxy_list,url) if doc.nil? unless @error_num @error_num = 0 end @error_num+=1 raise err.to_s if @error_num==5 dispose_show_info(url) end begin show_name = doc.css('div[class="tv_info_top"]')[0].content _doc=doc.css("div[class='tv_info']") img_url = _doc.css("img")[0].get_attribute("src").gsub(" ","") show_info = _doc.css("p")[0].content.gsub("[全文]","") @error_num = 0 {show_name:show_name,img_url:img_url,show_info:show_info} rescue => err unless @error_num @error_num = 0 end @error_num+=1 raise err.to_s if @error_num==5 dispose_show_info(url) end end |
#get_channel_logo(_url, channel_type, no_dis = false) ⇒ Object
获取频道图标地址 url 手机表的URL值 channel_type 频道类型 no_dis 直接使用URL 不处理
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/grabepg/grab_tvsou.rb', line 126 def get_channel_logo(_url,channel_type,no_dis=false) if no_dis url = _url else tvs = _url.split("TVid=") tvid = tvs[1].split("&")[0] channelids = _url.split("Channelid=") channelid = channelids[1].split("&")[0] if channel_type=="CCTV" url = "http://epg.tvsou.com/programys/TV_#{tvid}/Channel_#{channelid}/W1.htm" elsif channel_type=="WTV" url = "http://epg.tvsou.com/programws/TV_#{tvid}/Channel_#{channelid}/W1.htm" end end doc = @grabbase.get_doc_with_proxy(@proxy_list,url) logo_network_path=doc.css("div[id='epg_m1']").css("img")[0].get_attribute("src") return logo_network_path end |
#get_data(start_time, use_time) ⇒ Object
获取时间 start_time 时间起始点 use_time 天数
65 66 67 68 69 70 71 72 73 |
# File 'lib/grabepg/grab_tvsou.rb', line 65 def get_data(start_time,use_time) time = Time.now+start_time*24*60*60 ret = [] use_time.times.each do |i| _time = time + i*24*60*60 ret << get_data_year_month_day(_time) end ret end |
#get_data_year_month_day(time) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/grabepg/grab_tvsou.rb', line 49 def get_data_year_month_day(time) month=time.month.to_s if month.length<2 month="0"+month end day = time.day.to_s if day.length<2 day = "0"+day end return {time:"#{time.year}-#{time.month}-#{day}",date:"#{@grabbase.conversion_what_day(time.wday)}(#{month}-#{day})"} end |
#get_proxy_list ⇒ Object
39 40 41 |
# File 'lib/grabepg/grab_tvsou.rb', line 39 def get_proxy_list @proxy_list end |
#get_url(type) ⇒ Object
获取从tvsou的什么网站上获取 type: mobile,webpage
45 46 47 |
# File 'lib/grabepg/grab_tvsou.rb', line 45 def get_url(type) return "http://m.tvsou.com/index.asp" if type.eql?("mobile") end |