这是我为网络抓取编写的代码,我想将所有数据保存在字典中,然后保存到数据框中。直到上一次迭代时,它都会保存字典,但是当退出循环时,目录中的所有列表都为空
i=2011
#league_data={}
team_names=[]
team_points=[]
while i<2021:
print(i)
url="https://www.skysports.com/premier-league-table/"+str(i)
page=requests.get(url)
#print(page.status_code)
soup= BeautifulSoup(page.text,'html.parser')
league=soup.find('table',class_ ='standing-table__table')
league_table = league.find_all('tbody')
for league_teams in league_table:
rows = league_teams.find_all('tr')
for row in rows:
if i==2011:
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
team_names.append(team_name)
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
team_points.append(team_point)
print(team_points)
league_data[i]=team_points
print(league_data)
team_points.clear()
i=i+1
#print(team_names)
#print(len(team_names))
print(league_data)
这是输出。在输出中,我在每次迭代中都打印了列表和字典状态
2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25'], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2012
['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25']
{2011: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2012: ['89', '78', '75', '73', '72', '63', '61', '49', '46', '46', '44', '43', '42', '41', '41', '41', '39', '36', '28', '25'], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2013
['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30']
{2011: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2012: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2013: ['86', '84', '82', '79', '72', '69', '64', '56', '50', '49', '45', '42', '40', '38', '38', '37', '36', '33', '32', '30'], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2014
['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30']
{2011: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2012: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2013: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2014: ['87', '79', '75', '70', '64', '62', '60', '56', '54', '48', '47', '47', '44', '41', '39', '38', '38', '35', '33', '30'], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2015
['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17']
{2011: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2012: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2013: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2014: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2015: ['81', '71', '70', '66', '66', '63', '62', '60', '51', '50', '47', '47', '45', '43', '42', '42', '39', '37', '34', '17'], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
2016
['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24']
{2011: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2012: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2013: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2014: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2015: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2016: ['93', '86', '78', '76', '75', '69', '61', '46', '46', '45', '45', '44', '44', '41', '41', '40', '40', '34', '28', '24'], 2017: [], 2018: [], 2019: [], 2020: []}
2017
['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31']
{2011: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2012: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2013: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2014: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2015: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2016: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2017: ['100', '81', '77', '75', '70', '63', '54', '49', '47', '44', '44', '44', '42', '41', '40', '37', '36', '33', '33', '31'], 2018: [], 2019: [], 2020: []}
2018
['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16']
{2011: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2012: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2013: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2014: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2015: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2016: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2017: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2018: ['98', '97', '72', '71', '70', '66', '57', '54', '52', '52', '50', '49', '45', '45', '40', '39', '36', '34', '26', '16'], 2019: [], 2020: []}
2019
['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21']
{2011: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2012: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2013: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2014: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2015: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2016: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2017: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2018: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2019: ['99', '81', '66', '66', '62', '59', '59', '56', '54', '54', '52', '49', '44', '43', '41', '39', '35', '34', '34', '21'], 2020: []}
2020
['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']
{2011: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2012: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2013: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2014: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2015: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2016: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2017: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2018: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2019: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5'], 2020: ['40', '38', '38', '34', '33', '32', '32', '29', '29', '27', '26', '23', '23', '22', '19', '19', '17', '12', '11', '5']}
{2011: [], 2012: [], 2013: [], 2014: [], 2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: []}
在league_data[i]=team_points
执行此行league_data[i]
并team_points
指向同一对象后的问题(如您在我的输出中看到的,两者都具有相同的ID)
i=2011
league_data={}
team_names=[]
team_points=[]
while i<2021:
print(i)
url="https://www.skysports.com/premier-league-table/"+str(i)
page=requests.get(url)
#print(page.status_code)
soup= BeautifulSoup(page.text,'html.parser')
league=soup.find('table',class_ ='standing-table__table')
league_table = league.find_all('tbody')
for league_teams in league_table:
rows = league_teams.find_all('tr')
for row in rows:
if i==2011:
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
team_names.append(team_name)
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
team_points.append(team_point)
print(team_points)
league_data[i]=team_points
print(league_data)
print("Id of league_data[i]:", id(league_data[i]))
print("Id of team_points :", id(team_points))
team_points.clear()
i=i+1
break
#print(team_names)
#print(len(team_names))
print(league_data)
2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
Id of league_data[i]: 140615373022336
Id of team_points : 140615373022336
{2011: []}
解决方案:只需将此行修改league_data[i]=team_points
为league_data[i]=team_points.copy()
。问题解决了
i=2011
league_data={}
team_names=[]
team_points=[]
while i<2021:
print(i)
url="https://www.skysports.com/premier-league-table/"+str(i)
page=requests.get(url)
#print(page.status_code)
soup= BeautifulSoup(page.text,'html.parser')
league=soup.find('table',class_ ='standing-table__table')
league_table = league.find_all('tbody')
for league_teams in league_table:
rows = league_teams.find_all('tr')
for row in rows:
if i==2011:
team_name = row.find('td', class_ ='standing-table__cell standing-table__cell--name').text.strip()
team_names.append(team_name)
team_point = row.find_all('td', class_ = 'standing-table__cell')[9].text.strip()
team_points.append(team_point)
print(team_points)
league_data[i]=team_points.copy()
print(league_data)
print("Id of league_data[i]:", id(league_data[i]))
print("Id of team_points :", id(team_points))
team_points.clear()
i=i+1
break
#print(team_names)
#print(len(team_names))
print(league_data)
2011
['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
Id of league_data[i]: 140615375754176
Id of team_points : 140614558230912
{2011: ['89', '89', '70', '69', '65', '64', '56', '52', '52', '47', '47', '47', '45', '45', '43', '38', '37', '36', '31', '25']}
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句