1 import requests,re 2 3 4 def getdetail(url): 5 6 response = requests.get(url) 7 html = response.content.decode('gbk') 8 # 电影详情页标题 9 movie_title_name = re.search('<h1><font color=#07519a>(.*)</f',html) 10 movie_title = movie_title_name.group(1) 11 # 电影 磁力 magnet 12 movie_magnet_url = re.search('/><a href="(.*)"><str',html) 13 # print(movie_magnet.group(1)) 14 movie_magnet = movie_magnet_url.group(1) 15 # torrent种子 16 movie_torrent_url = re.search('ddf"><a href="(.*)">ft',html) 17 movie_torrent = movie_torrent_url.group(1) 18 # print(movie_torrent.group(1)) 19 # 这个列表用来title 20 movie_title_list = [] 21 movie_title_list.append(movie_title) 22 23 # 这个列表两个下载的链接 24 movie_down_url = [] 25 movie_down_url.append(movie_magnet) 26 movie_down_url.append(movie_torrent) 27 movie_down_url_all = [] 28 movie_down_url_all.append(movie_down_url) 29 30 31 movie_dict = dict(zip(movie_title_list,movie_down_url_all)) 32 print(movie_dict) 33 34 35 36 def getpage(): 37 38 for i in range(1,178): 39 lurl = 'http://www.dytt8.net/html/gndy/dyzz/list_23_%s.html' % i 40 41 response = requests.get(lurl) 42 43 html = response.text 44 45 movie_url_list = re.findall('<a href="(.*)" class="ulink"',html) 46 47 for movie_item in movie_url_list: 48 movie_url = 'http://www.dytt8.net'+movie_item 49 getdetail(movie_url) 50 51 52 if __name__ == '__main__': 53 getpage()
最新评论