如何使用 Selenium 和 Python 在 Python 類中調用方法

Gogi 发表于 Dev

高吉

我對編碼很陌生，我需要解決一件實際的事情，只需從站點獲取信息並將其寫入 excel（我希望我可以通過指南進行管理），但主要問題是我無法進入該站點（該網站是免費）你能看看我的代碼嗎？當我運行它時，我得到

[] 進程完成，退出代碼 0

import requests
from bs4 import BeautifulSoup
import pytest
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

URL ='http://way2drug.com/passonline/'
HEADERS= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'}


class Test1():
  def setup_method(self, method):
    self.driver = webdriver.Chrome()
    self.vars = {}
  
  def teardown_method(self, method):
    self.driver.quit()
  
  def test_1(self):
    # Test name: 1
    # Step # | name | target | value | comment
    # 1 | open | /passonline/ |  | 
    self.driver.get("http://way2drug.com/passonline/")
    # 2 | setWindowSize | 1920x1030 |  | 
    self.driver.set_window_size(1920, 1030)
    # 3 | click | css=#registration img |  | 
    self.driver.find_element(By.CSS_SELECTOR, "#registration img").click()
    # 4 | click | name=user_login |  | 
    self.driver.find_element(By.NAME, "user_login").click()
    # 5 | type | name=user_login |  | 
    self.driver.find_element(By.NAME, "user_login").send_keys("MY USER")
    # 6 | click | id=page1 |  | 
    self.driver.find_element(By.ID, "page1").click()
    # 7 | type | name=user_password |  | 
    self.driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
    # 8 | click | id=register |  | 
    self.driver.find_element(By.ID, "register").click()
    # 9 | click | id=myHeader1 |  | 
    self.driver.find_element(By.ID, "myHeader1").click()
    # 10 | click | id=smiles |  | 
    self.driver.find_element(By.ID, "smiles").click()
    self.driver.find_element(By.ID, "smi").click()
    self.driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
    self.driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()



def get_html(url, params=None):
  r = requests.get(url, headers=HEADERS, params=params)
  return r

def get_content(html):
  soup = BeautifulSoup(html, 'html.parser')
  items = soup.find_all('a', class_='Antineoplastic')

  print(items)

def parse():
  html = get_html(URL)
  if html.status_code == 200:
    get_content(html.text)
  else:
    print('ALL YOUR BASE ARE BELONG TO US')


parse()

刺猬

請注意-永遠不要提供憑據

發生什麼了？

您提到您必須執行登錄，而 selenium 是一個不錯的選擇，但您正在做的是parse()通過requests. 因此，如果您查看自己的內容soup，則不會找到您要查找的內容。

怎麼修？

執行您的 selenium 操作並走到您想要抓取的網站。在下一步中，將您推driver.page_source入BeautifulSoup並找到您的元素：

soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')

print(items)

如果你的選擇是正確的，你就會得到你的結果。

編輯

關於您的評論，您可以結束的線索，對於之間的調試步驟，您應該提出帶有重點示例的新問題：

import requests
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

driver = webdriver.Chrome()
 
driver.get("http://way2drug.com/passonline/")
driver.set_window_size(1920, 1030)
driver.find_element(By.CSS_SELECTOR, "#registration img").click()
driver.find_element(By.NAME, "user_login").click()
driver.find_element(By.NAME, "user_login").send_keys("MY USER")
driver.find_element(By.ID, "page1").click()
driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
driver.find_element(By.ID, "register").click()
driver.find_element(By.ID, "myHeader1").click()
driver.find_element(By.ID, "smiles").click()
driver.find_element(By.ID, "smi").click()
driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()

soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')

print(items)

driver.quit()