1. 頁面分析
① 獲取每支球員頁面的url; ② 利用Python代碼獲取每個網(wǎng)頁中的數(shù)據(jù); ③ 將獲取到的數(shù)據(jù),存儲至不同的數(shù)據(jù)庫;
# 76人
https://nba.stats.qq.com/player/list.htm#teamId=20
# 火箭
https://nba.stats.qq.com/player/list.htm#teamId=10
# 熱火
https://nba.stats.qq.com/player/list.htm#teamId=14
部分截圖如下
2. 數(shù)據(jù)爬取
from selenium import webdriver
# 創(chuàng)建瀏覽器對象,該操作會自動幫我們打開Google瀏覽器窗口
browser = webdriver.Chrome()
# 調(diào)用瀏覽器對象,向服務(wù)器發(fā)送請求。該操作會打開Google瀏覽器,并跳轉(zhuǎn)到“百度”首頁
browser.get("https://nba.stats.qq.com/player/list.htm#teamId=20")
# 最大化窗口
browser.maximize_window()
# 獲取球員中文名
chinese_names = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[2]/a')
chinese_names_list = [i.text for i in chinese_names]
# 獲取球員英文名
english_names = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[3]/a')
english_names_list = [i.get_attribute('title') for i in english_names] # 獲取屬性
# 獲取球員號碼
numbers = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[4]')
numbers_list = [i.text for i in numbers]
# 獲取球員位置
locations = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[5]')
locations_list = [i.text for i in locations]
# 獲取球員身高
heights = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[6]')
heights_list = [i.text for i in heights]
# 獲取球員體重
weights = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[7]')
weights_list = [i.text for i in weights]
# 獲取球員年齡
ages = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[8]')
ages_list = [i.text for i in ages_list]
# 獲取球員球齡
qiu_lings = browser.find_elements_by_xpath('//div[@class="players"]//tr[@class="show"]/td[9]')
qiu_lings_list = [i.text for i in qiu_lings_list]
3. 存儲至txt
for i in zip(chinese_names_list,english_names_list,numbers_list,locations_list,heights_list,weights_list,ages_list,qiu_lings_list):
with open("NBA.txt","a+",encoding="utf-8") as f:
# zip函數(shù),得到的是一個元組,我們需要將它轉(zhuǎn)換為一個字符串
f.write(str(i)[1:-1])
# 自動換行,好寫入第2行數(shù)據(jù)
f.write("\n")
f.write("\n")
4. 存儲至excel
import pandas as pd
# 一定要學(xué)會組織數(shù)據(jù)
df = pd.DataFrame({"中文名": chinese_names_list,
"英文名": english_names_list,
"球員號碼": numbers_list,
"位置": locations_list,
"身高": heights_list,
"體重": weights_list,
"年齡": ages_list,
"球齡": qiu_lings_list})
# to_excel()函數(shù)
df.to_excel("NBA.xlsx",encoding="utf-8",index=None)
5. 存儲至mysql
① 創(chuàng)建一個表nba
import pymysql
# 1. 連接數(shù)據(jù)庫
db = pymysql.connect(host='localhost',user='root', password='123456',port=3306, db='demo', charset='utf8')
# 2. 創(chuàng)建一個表
# 創(chuàng)建一個游標(biāo)對象;
cursor = db.cursor()
# 建表語句;
sql = """
create table NBA(
chinese_names_list varchar(20),
english_names_list varchar(20),
numbers_list varchar(20),
locations_list varchar(20),
heights_list varchar(20),
weights_list varchar(20),
ages_list varchar(20),
qiu_lings_list varchar(20)
)charset=utf8
"""
# 執(zhí)行sql語句;
cursor.execute(sql)
# 斷開數(shù)據(jù)庫的連接;
db.close()
② 往表nba中插入數(shù)據(jù)
import pymysql
# 1. 組織數(shù)據(jù)
data_list = []
for i in zip(chinese_names_list,english_names_list,numbers_list,locations_list,heights_list,weights_list,ages_list,qiu_lings_list):
data_list.append(i)
# 2. 連接數(shù)據(jù)庫
db = pymysql.connect(host='localhost',user='root', password='123456',port=3306, db='demo', charset='utf8')
# 創(chuàng)建一個游標(biāo)對象;
cursor = db.cursor()
# 3. 插入數(shù)據(jù)
sql = 'insert into nba(chinese_names_list,english_names_list,numbers_list,locations_list,heights_list,weights_list,ages_list,qiu_lings_list) values(%s,%s,%s,%s,%s,%s,%s,%s)'
try:
cursor.executemany(sql,data_list)
db.commit()
print("插入成功")
except:
print("插入失敗")
db.rollback()
db.close()








暫無數(shù)據(jù)