python selenium UI自動化解決驗證碼的4種方法

發(fā)布時間：2020-08-31 09:50:15 來源：腳本之家閱讀：227 作者：地空神一欄目：開發(fā)技術(shù)

本文介紹了python selenium UI自動化解決驗證碼的4種方法，分享給大家，具體如下：

測試環(huán)境

windows7+
firefox50+
geckodriver # firefox瀏覽器驅(qū)動
python3
selenium3

selenium UI自動化解決驗證碼的4種方法：去掉驗證碼、設(shè)置萬能碼、驗證碼識別技術(shù)-tesseract、添加cookie登錄，本次主要講解驗證碼識別技術(shù)-tesseract和添加cookie登錄。

1. 去掉驗證碼

去掉驗證碼，直接通過用戶名和密碼登陸網(wǎng)站。

2. 設(shè)置萬能碼

設(shè)置萬能碼，就是不管什么情況，輸入萬能碼，都可以成功登錄網(wǎng)站。

3. 驗證碼識別技術(shù)-tesseract

準(zhǔn)備條件

tesseract，下載地址：https://github.com/parrot-office/tesseract/releases/tag/3.5.1
Python3.x，下載地址：https://www.python.org/downloads/
pillow（Python3圖像處理庫）

安裝好Python，通過pip install pillow安裝pillow庫。然后將tesseract中的tesseract.exe和testdata文件夾放到測試腳本所在目錄下，testdata中默認(rèn)有eng.traineddata和osd.traineddata，如果要識別漢語，請自行下載對應(yīng)包。

以下是兩個主要文件，TesseractPy3.py是通過python代碼去調(diào)用tesseract以達到識別驗證碼的效果。code.py是通過selenium獲取驗證碼圖片，進而使用TesseractPy3中的函數(shù)得到驗證碼，實現(xiàn)網(wǎng)站的自動化登陸。

TesseractPy3.py

#coding=utf-8

import os
import subprocess
import traceback
import logging

from PIL import Image # 來源于Pillow庫

TESSERACT = 'tesseract' # 調(diào)用的本地命令名稱
TEMP_IMAGE_NAME = "temp.bmp" # 轉(zhuǎn)換后的臨時文件
TEMP_RESULT_NAME = "temp" # 保存識別文字臨時文件
CLEANUP_TEMP_FLAG = True # 清理臨時文件的標(biāo)識
INCOMPATIBLE = True # 兼容性標(biāo)識

def image_to_scratch(image, TEMP_IMAGE_NAME):
  # 將圖片處理為兼容格式
  image.save(TEMP_IMAGE_NAME, dpi=(200,200))

def retrieve_text(TEMP_RESULT_NAME):
  # 讀取識別內(nèi)容
  inf = open(TEMP_RESULT_NAME + '.txt','r')
  text = inf.read()
  inf.close()
  return text

def perform_cleanup(TEMP_IMAGE_NAME, TEMP_RESULT_NAME):
  # 清理臨時文件
  for name in (TEMP_IMAGE_NAME, TEMP_RESULT_NAME + '.txt', "tesseract.log"):
    try:
      os.remove(name)
    except OSError:
      pass

def call_tesseract(image, result, lang):
  # 調(diào)用tesseract.exe，將識讀結(jié)果寫入output_filename中
  args = [TESSERACT, image, result, '-l', lang]
  proc = subprocess.Popen(args)
  retcode = proc.communicate()

def image_to_string(image, lang, cleanup = CLEANUP_TEMP_FLAG, incompatible = INCOMPATIBLE):
  # 假如圖片是不兼容的格式并且incompatible = True，先轉(zhuǎn)換圖片為兼容格式（本程序?qū)D片轉(zhuǎn)換為.bmp格式），然后獲取識讀結(jié)果;如果cleanup=True,操作之后刪除臨時文件。
  logging.basicConfig(filename='tesseract.log')
  try:
    try:
      call_tesseract(image, TEMP_RESULT_NAME, lang)
      text = retrieve_text(TEMP_RESULT_NAME)
    except Exception:
      if incompatible:
        image = Image.open(image)
        image_to_scratch(image, TEMP_IMAGE_NAME)
        call_tesseract(TEMP_IMAGE_NAME, TEMP_RESULT_NAME, lang)
        text = retrieve_text(TEMP_RESULT_NAME)
      else:
        raise
    return text
  except: 
    s=traceback.format_exc()
    logging.error(s)
  finally:
    if cleanup:
      perform_cleanup(TEMP_IMAGE_NAME, TEMP_RESULT_NAME)

code.py

#coding=utf-8

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
from PIL import Image
import unittest, time, re
from TesseractPy3 import *

class lgoin(unittest.TestCase):
  def setUp(self):
    self.driver = webdriver.Ie()
    self.driver.implicitly_wait(30)
    self.base_url = 'http://127.0.0.1:8080/test' # 要測試的鏈接
    self.title = '某管理平臺' # 測試網(wǎng)站的Title
    self.verificationErrors = []
    self.accept_next_alert = True

  def test_lgoin(self):
    driver = self.driver
    driver.get(self.base_url)
    driver.maximize_window()
    driver.save_screenshot('All.png') # 截取當(dāng)前網(wǎng)頁，該網(wǎng)頁有我們需要的驗證碼
    imgelement = driver.find_element_by_class_name('kaptchaImage')
    location = imgelement.location # 獲取驗證碼x,y軸坐標(biāo)
    size = imgelement.size # 獲取驗證碼的長寬
    rangle = (int(location['x']),int(location['y']),int(location['x']+size['width']),int(location['y']+size['height'])) # 寫成我們需要截取的位置坐標(biāo)
    i = Image.open("All.png") # 打開截圖
    result = i.crop(rangle) # 使用Image的crop函數(shù)，從截圖中再次截取我們需要的區(qū)域
    result.save('result.jpg')
    text = image_to_string('result.jpg', 'eng').strip()

    assert self.title in driver.title

    driver.find_element_by_id(u'userCode').clear()
    driver.find_element_by_id(u'userCode').send_keys('XXXXXX') # 用戶名
    driver.find_element_by_id(u'password').clear()
    driver.find_element_by_id(u'password').send_keys('XXXXXX') # 密碼
    #driver.find_element_by_name('verifyCode').clear()
    driver.find_element_by_name('verifyCode').send_keys(text)
    driver.find_element_by_name('submit').submit()


  def is_element_present(self, how, what):
    try: self.driver.find_element(by=how, value=what)
    except NoSuchElementException as e: return False
    return True

  def is_alert_present(self):
    try: self.driver.switch_to_alert()
    except NoAlertPresentException as e: return False
    return True

  def close_alert_and_get_its_text(self):
    try:
      alert = self.driver.switch_to_alert()
      alert_text = alert.text
      if self.accept_next_alert:
         alert.accept()
      else:
        alert.dismiss()
      return alert_text
    finally: self.accept_next_alert = True

  def tearDown(self):
    #self.driver.quit()
    self.assertEqual([], self.verificationErrors)

if __name__ == "__main__":
  unittest.main()

最后，執(zhí)行命令python code.py，就可以成功自動登錄網(wǎng)站。

注意：

由于受驗證碼圖片質(zhì)量以及清晰度的影響，并不是每一次都能成功登陸。

4. 添加cookie登錄

首先獲取網(wǎng)站登陸后的cookie，然后通過添加cookie的方式，實現(xiàn)網(wǎng)站登陸的目的。我們用cook來表示xxxxxx的登錄后的cookie。

# coding=utf-8

from selenium import webdriver
import time 

driver = webdriver.Firefox()
driver.get("http://www.xxxxxx.com/") # 要登陸的網(wǎng)站

driver.add_cookie(cook) # 這里添加cookie，有時cookie可能會有多條，需要添加多次
time.sleep(3) 

# 刷新下頁面就可以看到登陸成功了
driver.refresh()

注意：

登錄時有勾選下次自動登錄的請勾選，瀏覽器提示是否保存用戶密碼時請選擇確定，這樣獲取的cookie成功登陸的機率比較高

以上就是本文的全部內(nèi)容，希望對大家的學(xué)習(xí)有所幫助，也希望大家多多支持億速云。

向AI問一下細(xì)節(jié)

python selenium UI自動化解決驗證碼的4種方法

猜你喜歡

最新資訊

相關(guān)推薦

相關(guān)標(biāo)簽