闲聊的时候,觉得,想写个爬虫,爬下2个人共同听过的歌曲有哪些,然后一鼓作气,花了一个多小时,写了一个。支持最近一周和所有时间,需要用户没有关闭听歌排行显示

How to start

使用到的工具是Selenium一个web自动化测试工具,提供IDE以及插件多种方式。python下安装

pip install selenium

chromedriver.exe 下载对应Chrome版本的driver.exe,仓库带了一份Chrome version 78的driver.exe

代码不多,60行,我就直接贴代码了。

python main.py


# -*- coding:utf-8 -*-
from enum import Enum
from time import sleep

from selenium import webdriver
import selenium.webdriver.support.ui as ui
import sys

reload(sys)
sys.setdefaultencoding('utf8')


def get_song_rank_list(url, songs_time):
    print("\n-------------Start---------------")
    driver = webdriver.Chrome(
        executable_path='./drv/chromedriver')
    # https://sites.google.com/a/chromium.org/chromedriver/downloads chrome对应版本driver

    driver.get(url)
    driver.switch_to.frame('g_iframe')
    wait = ui.WebDriverWait(driver, 15)
    data = ""
    song_dict = {}

    if wait.until(lambda driver: driver.find_element_by_class_name('g-bd')):
        if songs_time == 1:
            driver.find_element_by_id('rHeader').find_element_by_id('songsall').click()
            sleep(3)

        data += driver.find_element_by_id('rHeader').find_element_by_tag_name('h4').text
        print(data)
        lists = driver.find_element_by_class_name('m-record').find_elements_by_tag_name('li')
        print("Top{}:".format(len(lists)))
        for l in lists:
            name = (l.find_element_by_tag_name('b').text).decode("utf-8")
            singer = (l.find_element_by_class_name('s-fc8').text.replace('-', '')).decode("utf-8")
            times = l.find_element_by_class_name('bg').get_attribute('style')
            song = "{}, {}, {}".format(name, singer, times)
            print(song)
            song_dict[name] = song
    return song_dict


def compare_song(dict1, dict2):
    print("\n--------------------------------")
    print("\n---->Same song between us")
    for name, song in dict1.iteritems():
        if name in dict2:
            print(song)
    print("---->End")
    print("\n--------------------------------")

SONGS_WEEK = 0
SONGS_ALL = 1
if __name__ == '__main__':
    girl = get_song_rank_list("https://music.163.com/#/user/songs/rank?id=288007045", SONGS_ALL)
    boy = get_song_rank_list("https://music.163.com/#/user/songs/rank?id=39661960", SONGS_ALL)
    compare_song(girl, boy)

Console

12-25 11:55
查看更多