首页 > 其他分享 >exchange邮件爬虫

exchange邮件爬虫

时间:2022-10-05 08:22:25浏览次数:60  
标签:account exchange items def 爬虫 user print 邮件 inbox

#!/usr/bin/python3
# coding=utf8

from __future__ import print_function
import shutil
from exchangelib import Credentials, Account, Configuration, DELEGATE, FileAttachment, EWSDateTime
from multiprocessing.pool import Pool

from exchangelib.protocol import BaseProtocol
from exchangelib.protocol import NoVerifyHTTPAdapter
from urllib3.exceptions import InsecureRequestWarning
import urllib3
import time, os, sys, linecache
urllib3.disable_warnings(InsecureRequestWarning)

URL = ""
MAIL_SERVER = ""
suffix = "" #邮箱尾缀

# Tell exchangelib to use this adapter class instead of the default
BaseProtocol.HTTP_ADAPTER_CLS = NoVerifyHTTPAdapter
BaseProtocol.USERAGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"

FILTER = ['admin_meical', 'e_news', 'LanguageCenter', 'rtaf_news', 'weather', "Welfare", "dict1"]
AttachFilter = 'ppt'

TSTART = EWSDateTime(2020, 7, 3)
TEND = EWSDateTime(2022, 7, 3)

if not os.path.isdir("attach"):
    os.mkdir("attach")

pwd_path = os.getcwd()
AttachDir = os.path.join(pwd_path, 'attach')

def R(message):
    return "\033[1;91m{}\033[0;m".format(message)

def G(message):
    return "\033[1;92m{}\033[0;m".format(message)

def Y(message):
    return "\033[1;93m{}\033[0;m".format(message)

def B(message):
    return "\033[1;94m{}\033[0;m".format(message)

def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print(R('EXCEPTION IN (LINE {} "{}"): {}'.format(lineno, line.strip(), exc_obj)))

def getAccount(username, password):
    if not username.endswith(suffix):
        username += "@" + suffix
    credentials = Credentials(username, password)
    config = Configuration(server=MAIL_SERVER, credentials=credentials)
    account = Account(primary_smtp_address=username, config=config,
                      autodiscover=False, access_type=DELEGATE)
    return account

def log(user, text):
    with open(os.path.join(user, "log.txt"), "a", encoding="utf-8") as f:
        f.write(text+"\n")

def getinfo(user, account):
    print(user, "[*]Found {} mails in inbox, {} unread".format(
        account.inbox.total_count, account.inbox.unread_count))
    print("trash", account.trash.total_count)
    print("outbox", account.outbox.total_count)
    print("sent", account.sent.total_count)

def mkuserdir(user):
    if not os.path.isdir(user):
        os.mkdir(user)
    else:
        print(B("[*]Dir %s already exists" % user))

def download_attachments(items, user):
    for item in items:
        try:
            #print("[*]Find message: %s" % (item.message_id))
            pathh = "%s_%s_%s" % (item.sender.email_address.split("@")[0], str(item.datetime_received).split()[0], item.importance)
            pathh = os.path.join(user, pathh)
            if item.has_attachments:
                for attachment in item.attachments:
                    if isinstance(attachment, FileAttachment):
                        if AttachFilter not in attachment.name:
                            continue
                        if not os.path.isdir(pathh):
                            os.mkdir(pathh)
                        if len(attachment.name) > 60:
                            name, ext = attachment.name.rsplit(".",1)
                            attach_name = "{}.{}".format(name[:55], ext)
                        else:
                            attach_name = attachment.name
                        attach_path = os.path.join(pathh, attach_name)
                        with open(attach_path, 'wb') as f, attachment.fp as fp:
                            buffer = fp.read(1024)
                            while buffer:
                                f.write(buffer)
                                buffer = fp.read(1024)
                        shutil.copy(attach_path, AttachDir)
                        log(user, '[+]Attachment saved: ' + attachment.name)
                        print(G("[+]Saved attachment: %s for user: %s" % (attachment.name, user)))
        except Exception as e:
            pass
            #PrintException()

def getinbox(account, user):
    #print(B("[*]Getting attachments in inbox: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    filtered_items = account.inbox.filter(subject__contains='foo').exclude(categories__icontains='bar')

def gettrash(account, user):
    #print(B("[*]Getting attachments in th trash: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def getoutbox(account, user):
    #print(B("[*]Getting attachments in the outbox: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def getsent(account, user):
    #print(B("[*]Getting attachments in the sent: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def usermail(user, passwd):
    mkuserdir(user)

    tries1 = 0
    while tries1<2:
        try:
            account = getAccount(user, passwd)
            #getinfo(user, account)
            break
        except Exception as e:
            #PrintException()
            tries1 += 1
            time.sleep(20)
    if tries1 == 3:
        return False
    #return True

    tries = 0
    while tries<2:
        try:
            if [].count(user) == 0:
                getinbox(account, user)
            gettrash(account, user)
            getoutbox(account, user)
            getsent(account, user)
            return True
        except Exception as e:
            #PrintException()
            tries += 1
            #print(Y("[*]Sleep 20s and try again"))
            time.sleep(10)
    return False

def account_entry(fp):
    for i in fp:
        user, passwd = i.split()
        #print(Y("==================================Enter Next Account=================================="))
        # if usermail(user, passwd):
        #     print(G("[+]Complete download attachments for %s" % (user)))
        # else:
        #     print(R("[-]Fail download attachments for %s" % (user)))
        usermail(user, passwd)

    #print(B("[*]Getting all attachment"))

if __name__ == '__main__':

    # 分割用户名:哈希:密码,保留用户名和密码
    # with open("ori.txt", 'r', encoding='utf-8') as f:
    #     with open("new.txt", 'a', encoding='utf-8') as fp:
    #         lines = f.readlines()
    #         for line in lines:
    #             [u, h, p] = line.split(":")
    #             line2 = u + ' ' + p
    #             fp.write(line2)

    # 将1600个用户分给20个线程处理,每个线程处理80个用户
    fd = open("new.txt").read().split("\n")
    c = 0
    fd_list = []
    all_list = []
    thread_jobs = 80
    for f in fd:
        c = c + 1
        if c>thread_jobs:
            all_list.append(fd_list)
            fd_list = []
            c = 0
        fd_list.append(f)
    if not fd_list:
        all_list.append(fd_list)

    thread_num = len(all_list)
    print(G("[*]Thread num is: %s" % str(thread_num)))

    pool = Pool(processes=thread_num)
    pool.map(account_entry, all_list)

标签:account,exchange,items,def,爬虫,user,print,邮件,inbox
From: https://www.cnblogs.com/z5onk0/p/16755019.html

相关文章

  • 07-RabbitMQ核心API-Direct Exchange
    DirectExchange简介所有发送到directexchange的消息被转发到Routekey中指定的Queue注意:Direct模式可以使用RabbitMQ自带的Exchange(defaultexchange),所以不需......
  • 08-RabbitMQ核心API-Topic Exchange
    TopicExchange简介所有发送到TopicExchange的消息被转发到所有关心RouteKey中指定Topic的Queue上Exchange将RouteKey和某Topic进行模糊匹配,此时队列需要绑定一个T......
  • 09-RabbitMQ核心API-Fanout Exchange
    FanoutExchange简介不处理路由键,只需要简单的将队列绑定到交换机上发送到交换机的消息都会被转发到与该交换机绑定的所有队列上Fanout交换机转发消息是最快的......
  • 06-RabbitMQ核心API-Exchange
    Exchange流程图接收消息,并根据路由键转发消息所绑定的队列Exchange属性属性含义name交换机名称type交换机类型[direct|topic|fanout......
  • python 爬虫(正则)
    ......
  • python爬虫配置随机请求头headers伪装User-Agent
    python爬虫随机headers伪装fake_useragentfake_useragent库调用方法ua.random可以随机返回一个headers(User-Agent)fromfake_useragentimportUserAgent#下载:pipins......
  • 拉钩网爬虫
    【lg.py】importscrapyimporttest1.itemsclassLgSpider(scrapy.Spider):name='lg'#允许爬取的域allowed_domains=['lagou.com']#爬虫入......
  • 盘点一个Python抓取有道翻译爬虫中的报错问题
    大家好,我是皮皮。一、前言前几天在Python白银交流群【斌】问了一个Python网络爬虫的问题,提问截图如下:报错截图如下:粉丝需要的数据如下:二、实现过程有道翻译之前有做过很多,确......
  • 一个爬虫使用教程
    前言用了\(GitHub\)上一个项目作为载体,该项目中有些代码需要修改https://github.com/dataabc/weibo-search/安装python准备工作进入\(weibo-search-master\)......
  • 邮件正则无表达式编写
    email_reg=/^\w{3,}(\.\w+)*@[A-z0-9]+(\.[A-z]+){1,2}$/;说明:^\w表示以字母、数字或下划线开头,{3,}表示至少要三个字符\.表示邮件中的.,\是转义用的,w+表示一个或多个......