|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Created on Tue Mar 13 15:11:32 2018 |
| 4 | +
|
| 5 | +@author: Administrator |
| 6 | +""" |
| 7 | + |
| 8 | +from bs4 import BeautifulSoup |
| 9 | +import urllib.request |
| 10 | +import urllib |
| 11 | +import xlwt |
| 12 | + |
| 13 | +#获取网页 |
| 14 | +def gethtml(url, headers={}): |
| 15 | + req = urllib.request.Request(url, headers=headers) |
| 16 | + response = urllib.request.urlopen(req) |
| 17 | + content = response.read().decode('utf-8') |
| 18 | + response.close() |
| 19 | + return content |
| 20 | +def set_style(name, height, bold = False): |
| 21 | + style = xlwt.XFStyle() #初始化样式 |
| 22 | + |
| 23 | + font = xlwt.Font() #为样式创建字体 |
| 24 | + font.name = name |
| 25 | + font.bold = bold |
| 26 | + font.color_index = 4 |
| 27 | + font.height = height |
| 28 | + |
| 29 | + style.font = font |
| 30 | + return style |
| 31 | + |
| 32 | +#解析音乐列表网页 |
| 33 | +def parsehtmlMusicList(html): |
| 34 | + soup = BeautifulSoup(html, 'lxml') |
| 35 | + list_pic = soup.select('ul#m-pl-container li div img') |
| 36 | + list_nameUrl = soup.select('ul#m-pl-container li div a.msk') |
| 37 | + list_num = soup.select('div.bottom span.nb') |
| 38 | + list_author = soup.select('ul#m-pl-container li p a') |
| 39 | + n = 0 |
| 40 | + length = len(list_pic) |
| 41 | + #创建工作簿 |
| 42 | + workbook = xlwt.Workbook(encoding='utf-8') |
| 43 | + #创建sheet |
| 44 | + data_sheet = workbook.add_sheet('demo') |
| 45 | + row0 = [u'歌单介绍', u'歌曲链接地址', u'歌曲播放次数', u'歌单作者'] |
| 46 | + data_sheet.col(0).width = 9999#设置单元格宽度 |
| 47 | + data_sheet.col(1).width = 9999#设置单元格宽度 |
| 48 | + data_sheet.col(2).width = 4444#设置单元格宽度 |
| 49 | + data_sheet.col(3).width = 3333#设置单元格宽度 |
| 50 | + data_sheet.col(4).width = 3333#设置单元格宽度 |
| 51 | + #生成第一行和第二行 |
| 52 | + for i in range(len(row0)): |
| 53 | + data_sheet.write(0, i, row0[i], set_style('Times New Roman', 220, True)) |
| 54 | + while n < length: |
| 55 | + description=list_nameUrl[n]['title']#歌单介绍 |
| 56 | + songhref= list_nameUrl[n]['href'] |
| 57 | + num=list_num[n].text#歌曲播放量 |
| 58 | + #picture=list_pic[n]['src']#图片链接地址 |
| 59 | + author=list_author[n]['title']#歌单作者 |
| 60 | + row=[description, songhref, num, author] |
| 61 | + #print('歌单图片:'+list_pic[n]['src']+'\n\n') |
| 62 | + #print('歌单名称:'+list_nameUrl[n]['title']+'\n\n歌单地址:'+list_nameUrl[n]['href']+'\n\n') |
| 63 | + #print('歌单播放量:'+list_num[n].text+'\n\n') |
| 64 | + #print('歌单作者:'+list_author[n]['title']+'\n\n作者主页:'+list_author[n]['href']+'\n\n\n') |
| 65 | + n += 1 |
| 66 | + for i in range(len(row)): |
| 67 | + data_sheet.write(n, i, row[i], set_style('Times New Roman', 220, True)) |
| 68 | + workbook.save('C:/Users/Administrator/Desktop/xlwtDemo.xls') |
| 69 | +url = 'http://music.163.com/discover/playlist' |
| 70 | +url = gethtml(url, headers={ |
| 71 | + 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', |
| 72 | + 'Host': 'music.163.com' |
| 73 | +}) |
| 74 | +parsehtmlMusicList(url) |
| 75 | + |
| 76 | + |
0 commit comments