一、装载好chromedriver之后,运行命令行 "chrome.exe"(路径) --remote-debugging-port=9222 打开谷歌浏览器。
二、使用selenium打开网页:
def opendriver(url, domainlist):
"""
打开网站
"""
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(options=chrome_options)
driver.get(url)
time.sleep(5)
# driver.maximize_window()
# # 获取打开滑块验证码页面的元素
formdf = icp_query(driver, url, domainlist)
return formdf
三、获取输入框批量输入数据,并破解验证码:
使用 driver.find_element_by_xpath 获取element时,xpath可通过浏览器打开网站,使用检查功能,找到element所对应行,右键复制xpath。
def icp_query(driver, url, domainlist):
"""
破解验证码并批量查询数据
"""
formdf = pd.DataFrame()
for ind in range(len(domainlist)):
while True:
try:
input = driver.find_element_by_css_selector("input.el-input__inner")
# print(input)
# # 输入查询网站
input.clear()
time.sleep(0.5)
inputtext = domainlist[ind]
input.send_keys(inputtext)
while True:
try:
time.sleep(2)
getcheck = driver.find_element_by_xpath("/html/body/div[1]/div/header/div[3]/div/button")
# getcheck = driver.find_element_by_css_selector("button.el-button.el-button--primary")
ActionChains(driver).move_to_element(getcheck).perform() # 悬停鼠标
# # 点击进入滑块验证码页面
getcheck.click()
print("click button")
time.sleep(1.5)
# 保存图片
html = etree.HTML(driver.page_source)
print(html.xpath('/html/body/div/div/div/div/div/div//img/@src'))
imgbases = list(filter(None, html.xpath('/html/body/div/div/div/div/div/div//img/@src')))
big_base = imgbases[1].split('base64,')[-1]
print('bg: ' + big_base)
except Exception as e:
print(e)
if input.text=='' or input.text==None:
alertcheck = driver.find_element_by_xpath("/html/body/div[4]/ul/li[3]/input")
ActionChains(driver).move_to_element(alertcheck).perform()
alertcheck.click()
driver.refresh()
continue
if big_base != '':
save_picture('bg.jpg', base64.b64decode(big_base))
# 保存缺口图
small_base = imgbases[2].replace('data:text/javascript;base64,', '')
print('tp: ' + big_base)
save_picture('tp.png', base64.b64decode(small_base))
# 计算距离
distance = identify_gap('bg.jpg', 'tp.png')
# 滑动滑块
slide_block(driver, distance)
try:
formdict = getform(driver, inputtext)
print(formdict)
break
except Exception as e:
print(e)
else:
print('没保存成功,重试')
driver.refresh()
if ind==0:
formdf = pd.DataFrame([formdict])
print('add first line to dataframe')
else:
formdf = formdf.append(formdict, ignore_index=True)
print('append to dataframe')
break
except Exception as e:
print(e)
driver.get(url)
time.sleep(4)
if len(domainlist)>0:
return formdf
else:
return 'no query list!'
其中,弹出滑动验证码后,保存图片并计算图片左端到缺口的距离
def save_picture(file_name, avatar_bytes):
"""
保存图片
"""
with open(file_name, 'wb+') as f:
f.write(avatar_bytes)
f.close()
print('图片保存成功')
def identify_gap(bg, tp):
"""
bg: 背景图片
tp: 缺口图片
计算滑块距离
"""
target_rgb = cv.imread(bg)
target_gray = cv.cvtColor(target_rgb, cv.COLOR_BGR2GRAY)
template_rgb = cv.imread(tp, 0)
res = cv.matchTemplate(target_gray, template_rgb, cv.TM_CCOEFF_NORMED)
value = cv.minMaxLoc(res)
print(value)
imgx = value[3][0]
return imgx
之后 ,为模拟人工滑动效果,计算滑动轨迹,滑动滑块文章来源:https://www.toymoban.com/news/detail-522515.html
def get_track(distance):
"""
计算滑块移动轨迹
"""
track=[]
current=0
mid=distance*3/4.5
t=random.randint(2,3)/4.5
v=0
while current<distance:
if current<mid:
a=random.uniform(7, 8)
else:
a=random.uniform(-11,-9)
if move<3:
v=10
a=2
v0=v
v=v0+a*t
move=v0*t+1/2*a*t*t
current+=move
track.append(round(move))
track.append(distance-current)
return track
def slide_block(driver, distance):
"""
滑动滑块
"""
# 确定滑块
slider_button = driver.find_element_by_id('sildeBox')
# 鼠标操作
action_chains = ActionChains(driver)
# 点击该元素并且不放开
action_chains.click_and_hold(on_element=slider_button).perform()
# distance = distance / 2.6153846153846154
print('distance: ' + str(distance))
# 生成拖拽移动轨迹,加3是为了模拟滑过缺口位置后返回缺口的情况
track_list = get_track(distance + random.randint(0, 3))
# 根据轨迹拖拽圆球
sum = 0
for track in track_list:
sum += track
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
print('track: %d'%(sum))
# 模拟人工滑动超过缺口位置返回至缺口的情况,数据来源于人工滑动轨迹,同时还加入了随机数,都是为了更贴近人工滑动轨迹
ActionChains(driver).move_by_offset(xoffset=-1, yoffset=0).perform()
time.sleep(random.uniform(0.2, 0.8) / 10)
# imitate.perform()
# time.sleep(0.012)
ActionChains(driver).move_by_offset(xoffset=1, yoffset=0).perform()
# 放开圆球
action_chains.pause(random.uniform(5, 9) / 10).release().perform()
time.sleep(1.5)
四、破解完成后跳转,获取表单数据(根据实际网站代码调整dict)文章来源地址https://www.toymoban.com/news/detail-522515.html
def getform(driver, inputtext):
"""
获取表单数据
"""
time.sleep(1)
formdata = {}
formdata['input'] = inputtext
heading = driver.find_element_by_xpath('/html/body/div[1]/div/section/div/div/div[1]/div/div[1]/div[2]/table')
try:
formdata['name'] = driver.find_element_by_xpath('/html/body/div[1]/div/section/div/div/div[1]/div/div[1]/div[3]/table/tbody/tr/td[2]/div').text
formdata['filingnum'] = driver.find_element_by_xpath('/html/body/div[1]/div/section/div/div/div[1]/div/div[1]/div[3]/table/tbody/tr/td[3]/div').text
formdata['checkdate'] = driver.find_element_by_xpath('/html/body/div[1]/div/section/div/div/div[1]/div/div[1]/div[3]/table/tbody/tr/td[4]/div').text
formdata['ifretrict'] = driver.find_element_by_xpath('/html/body/div[1]/div/section/div/div/div[1]/div/div[1]/div[3]/table/tbody/tr/td[5]/div/span').text
except Exception as e:
print('find no item!')
return formdata
到了这里,关于selenium破解滑块验证码自动查询+获取后续表单数据的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!