python实现豆瓣自动滑块验证,自动登录发送根据自己加入的小组发送房屋出租广告

wylc123 9月前 ⋅ 180 阅读

一 、背景

        个人需要寻找合租室友,复制粘贴发广告太麻烦,搞个自动的。能自动登录个人豆瓣账号,根据自己加入的豆瓣租房小组,发送出租广告。因为发送速度快的话,官方会验证是否是人工操作,会弹出滑块验证。验证码组合为:一个带缺口的背景图片,一个缺口的补全图片。

二 、 技术栈

      1.  selenium实现自动化

      2. python图片处理

三 、上代码

#!usr/bin/env python
# -*- coding: utf-8 -*-
#!文件类型: python
#!创建时间: 2019/1/23 15:03
#!作者: SongBin
#!文件名称: Douban.py
#!简介:豆瓣出租房子
import os
import socket
import time
import urllib
import win32api
import cv2
import numpy as np
import pyperclip
import win32con
from PIL import Image
from pykeyboard import PyKeyboard
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException, \
    ElementNotVisibleException, TimeoutException, UnexpectedAlertPresentException
from selenium.webdriver import DesiredCapabilities, ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import logging
chrome_options = Options()
# chrome_options.add_argument("--headless")
class DouBanIssue():
    def __init__(self):
        self.users = ["xxxx","yyyyyy"] #你自己的多个或一个豆瓣账号
        self.browser = webdriver.Chrome(executable_path='C:\\softs\\chrome\\chromedriver.exe',
                                   options=chrome_options)  # .Firefox()  # .PhantomJS(desired_capabilities=dcap) # executable_path='/usr/local/bin/phantomjs'  phantomjs没有设置环境变量时可加参数
        self.wait = WebDriverWait(self.browser, 20)
    def get_track(self, distance):
        """
        根据偏移量获取移动轨迹
        :param distance: 偏移量
        :return: 移动轨迹
        """
        # 移动轨迹
        track = []
        # 当前位移
        current = 0
        # 减速阈值
        mid = distance * 4 / 5
        # 计算间隔
        t = 0.2
        # 初速度
        v = 0

        while current < distance:
            if current < mid:
                # 加速度为正2
                a = 2
            else:
                # 加速度为负3
                a = -3
            # 初速度v0
            v0 = v
            # 当前速度v = v0 + at
            v = v0 + a * t
            # 移动距离x = v0t + 1/2 * a * t^2
            move = v0 * t + 1 / 2 * a * t * t
            # 当前位移
            current += move
            # 加入轨迹
            track.append(round(move))
        return track
    def move_to_gap(self, slider, track):
        """
        拖动滑块到缺口处
        :param slider: 滑块
        :param track: 轨迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(0.5)
        ActionChains(self.browser).release().perform()

    # 截取当前页面验证码
    def get_vcodeimg(self):
        try:
            # 背景图 slideBkg
            bgimgelement = self.browser.find_element_by_id("slideBkg")
            # 图片坐标
            bglocations = bgimgelement.location
            print(bglocations)
            # 图片大小
            bgsizes = bgimgelement.size
            print(bgsizes)
            # 构造指数的位置
            bgrangle = (int(bglocations['x']), int(bglocations['y']), int(bglocations['x'] + bgsizes['width']),
                      int(bglocations['y'] + bgsizes['height']))
            print(bgrangle)

            # 缺口 slideBkg
            tbimgelement = self.browser.find_element_by_id("slideBlock")
            # 图片坐标
            tblocations = tbimgelement.location
            print(tblocations)
            # 图片大小
            tbsizes = tbimgelement.size
            print(tbsizes)
            # 构造指数的位置
            tbrangle = (int(tblocations['x']), int(tblocations['y']), int(tblocations['x'] + tbsizes['width']),
                        int(tblocations['y'] + tbsizes['height']))
            print(tbrangle)

            # 截取当前浏览器
            sysPath = "./images/sys.png"
            bgPath = "./images/bg.png"
            tbPath = "./images/tb.png"

            # 获取全屏截图
            self.browser.save_screenshot(sysPath)

            # 打开截图切割
            img = Image.open(sysPath)
            # 按坐标切图
            bgjpg = img.crop(bgrangle)
            bgjpg.save(bgPath)
            tbjpg = img.crop(tbrangle)
            tbjpg.save(tbPath)
            print("图片截取成功!")
            if (os.path.exists(sysPath)):
                os.remove(sysPath)
                print('已删除中间全屏截图')
            else:
                print("要删除的文件不存在!")
                pass
            # 返回验证码截图
            return ''
        except Exception as  e:
            print("异常信息:"+str(e))
        finally:
            print("结束")
    def get_img_by_el(self,imgid,imgname):
        # 分别处理每个图片,取出名称及地址
        # 背景图 slideBkg
        pic = self.browser.find_element_by_id(imgid)
        imgurl = pic.get_attribute("src")
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0'}
        try:
            req = urllib.request.Request(imgurl, headers=headers)
            res = urllib.request.urlopen(req, None, 5)
            content = res.read()
        except UnicodeDecodeError as e:
            print('-----UnicodeDecodeErrorurl:', imgurl)
            # 当图片链接失效时,做标记
            # print(traceback.print_exc())

        except urllib.error.URLError as e:
            print("-----urlErrorurl:", imgurl)
            # 当图片链接失效时,做标记
            # print(traceback.print_exc())

        except socket.timeout as e:
            print("-----socket timout:", imgurl)
            # 当图片链接失效时,做标记
            # print(traceback.print_exc())
        basepath = "./images"
        if not os.path.exists(basepath):
            os.makedirs(basepath)
        picname = imgname
        print(picname)
        file_name = os.path.join(basepath, picname)
        with open(file_name, 'wb') as fp:
            fp.write(content)
            fp.close()

    def show(name):
        cv2.imshow('Show', name)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    def get_gap(self):
        otemp = './images/tb.png'
        oblk = './images/bg.jpg'
        target = cv2.imread(otemp, 0)
        template = cv2.imread(oblk, 0)  # 读取到两个图片,进行灰值化处理
        w, h = target.shape[::-1]
        temp = './images/temp.jpg'
        targ = './images/targ.jpg'
        cv2.imwrite(temp, template)
        cv2.imwrite(targ, target)  # 处理后进行保存
        target = cv2.imread(targ)
        target = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)  # 转化到灰度
        target = abs(255 - target)  # 返回绝对值
        cv2.imwrite(targ, target)  # 重新写入
        target = cv2.imread(targ)
        template = cv2.imread(temp)
        result = cv2.matchTemplate(target, template, cv2.TM_CCOEFF_NORMED)  # 进行匹配
        x, y = np.unravel_index(result.argmax(), result.shape)  # 通过np转化为数值,就是坐标
        # 展示圈出来的区域
        cv2.rectangle(template, (y, x), (y + w, x + h), (7, 249, 151), 2)
        # self.show(template)
        distant = y*(280/680) - 15
        return distant
    def crack(self):
        logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                            filename='C:\pylogs\output.log',
                            filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志
                            #a是追加模式,默认如果不写的话,就是追加模式
                            format=
                            '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'
                            #日志格式
                            )

        count = 1
        m=2
        while count > 0:
            try:
                try:
                    # 打开浏览器
                    logging.error("程序开始运行###########")
                    print("程序开始运行###########")
                    self.browser.implicitly_wait(10)
                    loginurl = 'https://www.douban.com/'
                    self.browser.maximize_window()
                    self.browser.get(loginurl)
                    # 切换到login frame
                    iframe = self.browser.find_elements_by_tag_name("iframe")[0]
                    self.browser.switch_to.frame(iframe)
                    self.browser.find_element_by_xpath("/html/body/div[1]/div[1]/ul[1]/li[2]").click()
                except UnexpectedAlertPresentException:
                    # 获取alert对话框
                    dig_alert = self.browser.switch_to.alert
                    time.sleep(1)
                    # 打印警告对话框内容
                    print(dig_alert.text)
                    # alert对话框属于警告对话框,我们这里只能接受弹窗
                    dig_alert.accept()
                    time.sleep(1)
                    pass
                #这里可设置多个豆瓣号
                users = ["18612100611","15001260611"]
                WebDriverWait(self.browser, 5, 1).until(EC.presence_of_element_located((By.ID, 'username')))
                self.browser.find_element_by_id('username').send_keys(users[0])#选择一个账号
                WebDriverWait(self.browser, 5, 1).until(EC.presence_of_element_located((By.ID, 'password')))
                self.browser.find_element_by_id('password').send_keys('XXXXX') #这里填写选择的账号的密码
                self.browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div[5]/a').click()
                time.sleep(10)
                #进入到我的小组列表页
                # self.browser.get("https://www.douban.com/group/people/194854484/joins")
                self.browser.get("https://www.douban.com/group/people/179416843/joins")
                time.sleep(5)
                alist = self.browser.find_elements_by_css_selector("ul>li>div.pic>a")
                hrefs=[];
                for a in alist:
                    href = a.get_attribute("href")
                    print(a.get_attribute("href"))
                    hrefs.append(href)
                while m<len(hrefs):
                # for href in hrefs:
                    href = hrefs[m];
                    self.browser.get(href+"new_topic")
                    time.sleep(5)
                    # 填写标题
                    # self.browser.find_element_by_class_name("group-editor-input").send_keys("随时可以入住,北京14号线善各庄地铁站旁边(200米),善各庄周转房,18平单间出租,一人住租金1800,两人住1900,室内有双人床 ,可押一付一")
                    self.browser.find_element_by_class_name("group-editor-input").send_keys("北京8号线育新地铁站附近南店新村主卧带阳台无中介出租")
                    # 一般到这一步会出现滑动验证码验证是否人工
                    elt = None
                    try:
                        elt = self.browser.find_element_by_id("dui-dialog0")
                    except:
                        pass
                    if elt==None:
                        pass
                    else:
                        #处理滑动验证码
                        try:
                            # 切换到验证码frame
                            iframeEl = self.browser.find_element_by_css_selector("#TCaptcha > iframe")
                            self.browser.switch_to.frame(iframeEl)
                            # 获取移动轨迹
                            # self.get_vcodeimg()
                            self.get_img_by_el("slideBkg","bg.jpg");
                            self.get_img_by_el("slideBlock","tb.png");
                            gap = self.get_gap()
                            print("偏移距离:",gap)
                            while gap < 100:
                                self.browser.find_element_by_id("e_reload").click()
                                self.get_img_by_el("slideBkg", "bg.jpg");
                                self.get_img_by_el("slideBlock", "tb.png");
                                gap = self.get_gap()
                                print("偏移距离:", gap)
                            track = self.get_track(gap)
                            print('滑动轨迹', track)
                            # 拖动滑块
                            slider = self.browser.find_element_by_id("slideBlock")
                            self.move_to_gap(slider, track)
                            # success = self.wait.until(
                            #     EC.text_to_be_present_in_element((By.CLASS_NAME, 'tcaptcha-success'),
                            #                                      '简直比闪电还快'))
                            time.sleep(3)
                            self.browser.switch_to.default_content()
                            # print(success)
                        except Exception as  e:
                            logging.error("异常信息:" + str(e))
                            print("异常信息:" + str(e))
                            self.browser.switch_to.default_content()
                    # 填写内容北京8号线育新地铁站旁边(1000米),南店新村1号楼,主卧带阳台出租,无中介费用,随时可以入住,押一付三,男女不限,可以做饭菜,紧邻西小口东升科技园北领地,上班出行方便。有意向租房者可联系房东陈女士:18401378131或微信:chenfen1981426。
                    #     pyperclip.copy("北京14号线善各庄地铁站旁边(200米),善各庄周转房,18平单间出租,,一人住租金1800,两人住1900,室内有双人床,可押一付一,随时可以入住,无中介费用,男女不限,不可以做饭菜,水电网全免,空调电费有个人电表单算,紧邻望京科技产业园,上班出行方便。有意向租房者可联系房东赵女士:15001260611同微,有意向者可工作日晚上7点后,或周末看房。")
                    pyperclip.copy("北京8号线育新地铁站旁边(1000米),南店新村1号楼,主卧带阳台出租,无中介费用,随时可以入住,押一付三,男女不限,可以做饭菜,紧邻西小口东升科技园北领地,上班出行方便。有意向租房者可联系房东陈女士:18401378131或微信:chenfen1981426。")
                    self.browser.find_element_by_class_name("public-DraftStyleDefault-ltr").click()
                    time.sleep(1)
                    k = PyKeyboard()
                    # 模拟键盘点击ctrl+v
                    k.press_key(k.control_key)
                    k.tap_key('v')
                    k.release_key(k.control_key)
                    # self.browser.find_element_by_class_name("public-DraftStyleDefault-ltr").click()

                    # 上传图片
                    self.browser.find_element_by_class_name("DRE-ui-button-image").click()
                    time.sleep(1)
                    pyperclip.copy('"C:\\chuzu\\1.jpg" "C:\\chuzu\\2.jpg" "C:\\chuzu\\3.jpg" "C:\\chuzu\\4.jpg"')
                    time.sleep(1)
                    k2 = PyKeyboard()
                    # 模拟键盘点击ctrl+v
                    k2.press_key(k2.control_key)
                    k2.tap_key('v')
                    k2.release_key(k2.control_key)
                    k12 = PyKeyboard()
                    k12.tap_key(k12.enter_key)
                    time.sleep(5)

                    # # 上传视频
                    # self.browser.find_element_by_class_name("DRE-ui-button-video").click()
                    # time.sleep(1)
                    # pyperclip.copy("https://v.qq.com/x/page/x3011dqgva6.html?&ptag=4_7.3.0.22225_copy")
                    # time.sleep(2)
                    # k2 = PyKeyboard()
                    # # 模拟键盘点击ctrl+v
                    # k2.press_key(k2.control_key)
                    # k2.tap_key('v')
                    # k2.release_key(k2.control_key)
                    # k12 = PyKeyboard()
                    # k12.tap_key(k12.enter_key)
                    # time.sleep(5)


                    self.browser.find_element_by_class_name("group-editor-button-submit").click()
                    time.sleep(600)
                    # self.browser.switch_to.default_content
                    print("放送次數:"+str(m))
                    m=m+1
                print("发完一遍!")
                m = 0
                # 一小时后重发一次
                time.sleep(3600)
            except Exception as  e:
                logging.error("异常信息:"+str(e))
                print("异常信息:"+str(e))
                # self.browser.quit()
                #win32api.MessageBox(0, "出现异常!", "消息框标题", win32con.MB_OK)
                # count = 0
                # break
                pass
if __name__ == '__main__':
    issue = DouBanIssue()
    issue.crack()

 


全部评论: 0

    我有话说: