Script para pegar a programação do dia da TV fechada (cabo) no site http://www.hagah.com.br
O script faz uso do BeautifulSoup para fazer o parsing da página.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 import urllib2
4 from BeautifulSoup import BeautifulSoup
5 from datetime import datetime
6 x = datetime.now()
7 hoje = x.strftime('%d/%m/%Y')
8 print hoje
9
10 canais = [
11 ["MDO","A & E Mundo"],
12 #["APL","Animal Planet"],
13 #["LOC","Animax"],
14 ["UNO","AXN"],
15 #["BAN","Bandeirantes"],
16 #["BBC","BBC World"],
17 #["BIT","Bloomberg"],
18 #["BMG","Boomerang"],
19 #["CBR","Canal Brasil"],
20 #["FUT","Canal Futura"],
21 #["CAR","Cartoon Network"],
22 #["MAX","Cinemax"],
23 #["MXE","Cinemax e"],
24 #["CLI","Clima Tempo"],
25 #["CNE","CNN Espanhol"],
26 #["CNN","CNN International"],
27 #["CNT","CNT"],
28 #["DWL","Deutsche Welle"],
29 #["DIS","Discovery Channel"],
30 #["HEA","Discovery Home & Health"],
31 #["DIK","Discovery Kids"],
32 #["TRV","Discovery Travel & Living"],
33 #["DNY","Disney Channel"],
34 #["EET","E! Entertainment "],
35 #["ESB","ESPN Brasil"],
36 #["ESP","ESPN International"],
37 #["EUR","EuroChannel"],
38 #["FAS","Fashion TV"],
39 #["GLS","For Man"],
40 #["FOX","Fox"],
41 #["FLI","Fox Life"],
42 #["FNE","Fox News"],
43 ["CFX","FX"],
44 #["GLN","Globo News"],
45 #["GNT","GNT"],
46 ["HAL","Hallmark"],
47 #["HBO","HBO"],
48 #["HBE","HBO e"],
49 #["HFA","HBO Family"],
50 #["HFE","HBO Family e"],
51 #["HPL","HBO Plus"],
52 #["HPE","HBO Plus e"],
53 #["KID","Jetix"],
54 #["MAP","Max Prime"],
55 #["MPE","Max Prime e"],
56 #["MGM","MGM"],
57 #["MTV","MTV Brasil"],
58 #["MTH","MTV Hits"],
59 #["MSW","Multishow"],
60 #["SUP","National Geographic"],
61 #["NIC","Nickelodeon"],
62 ["TRA","People+Arts"],
63 #["PLA","Playboy TV"],
64 #["RAI","RAI (italiano)"],
65 #["POA","RBS TV"],
66 #["REC","Record"],
67 #["RTV","Rede TV"],
68 #["VDA","Rede Vida"],
69 #["NAC","Sesc TV"],
70 #["HOT","Sexy Hot"],
71 #["SHO","Shoptime"],
72 #["SIC","SIC Internacional"],
73 ["SET","Sony Entertainment Television"],
74 #["SPE","Speed Channel"],
75 #["SPO","SPORTV"],
76 #["SP2","SPORTV 2"],
77 #["TC2","Telecine Action"],
78 #["TC5","Telecine Cult"],
79 #["TC3","Telecine Light"],
80 #["TC4","Telecine Pipoca"],
81 #["TC1","Telecine Premium"],
82 #["TGC","The Golf Channel"],
83 #["HIS","The History Channel"],
84 #["TNT","TNT"],
85 #["TCM","Turner Classic Movies"],
86 #["CAM","TV Câmara"],
87 #["CNV","TV Canção Nova"],
88 #["CUL","TV Cultura"],
89 #["ESC","TV Escola"],
90 #["JUS","TV Justiça"],
91 #["RTB","TV Rá Tim Bum"],
92 #["SEN","TV Senado"],
93 #["TVV","TV Terra Viva"],
94 #["TED","TVE Brasil"],
95 #["TVE","TVE Internacional"],
96 #["TV5","TV5MONDE"],
97 #["USA","Universal Channel"],
98 #["VH1","VH1"],
99 ["WBT","Warner"]
100 ]
101
102 for canal in canais:
103 url="http://www.hagah.com.br/programacao-tv/jsp/default.jsp?uf=1&local=1®ionId=1&action=programacao_canal&canal=%s&operadora=14&data=%s" % (canal[0],hoje)
104 pagina = urllib2.urlopen(url)
105 print "***** %s *****" % canal[1]
106 conteudo = BeautifulSoup(pagina.read(), fromEncoding="iso-8858-1")
107 tabela = conteudo.findAll(id='grade canal')
108 linhas = conteudo.findAll('tr')
109 for linha in linhas:
110 dado = linha.findAll('td')
111 hora = dado[0]
112 if dado[1].findAll('strong'):
113 programa=dado[1].findAll('strong')
114 for item in programa:
115 if item:
116 print "%s |%s " % (hora.contents[0],item.contents[0])
117 print "----------------------------------------------------------------------------------------------------"