*注意:本爬虫使用的qt版本为5.14.2,使用的是qtwebkit类是自己配置了qtwebkit和对应qt版本的openssl,其中qtwebkit类在qt5.15和qt6版本已经删除,需要自己私下配置qtwebkit和对应qt版本的openssl
头文件
widget.h
#ifndef WIDGET_H
#define WIDGET_H
#include <QWidget>
#include <QtWebKitWidgets/QWebView>
#include<QWebSettings>
#include<QWebElementCollection>
#include <QtWebKit/qwebsettings.h>
#include <QWebFrame>
QT_BEGIN_NAMESPACE
namespace Ui { class Widget; }
QT_END_NAMESPACE
class Widget : public QWidget
{
Q_OBJECT
public:
Widget(QWidget *parent = nullptr);
~Widget();
QWebView *view; //建立qwebview 类来读取url
public slots:
void on_pushButton_clicked();
void onProgress(int progress);//加载进度显示
void onl oadFinished(bool ok);//判断是否加载完成
private:
//下载图片函数
QImage loadimg(const QString &url);
Ui::Widget *ui;
};
#endif // WIDGET_H
cpp文件
#include "widget.h"
#include "ui_widget.h"
#include <QWebView>
#include<QWebSettings>
#include<QWebElementCollection>
#include <QWebFrame>
#include <QNetworkReply>
#include <QNetworkRequest>
#include <QNetworkAccessManager>
#include <QFileInfo>
#include<windows.h>
#include<stdio.h>
#include <string.h>
Widget::Widget(QWidget *parent)
: QWidget(parent)
, ui(new Ui::Widget)
{
ui->setupUi(this);
view =new QWebView(this);
ui->lay->addWidget(view);
qDebug() << "OpenSSL支持情况:" << QSslSocket::supportsSsl();
qDebug()<<"QSslSocket="<<QSslSocket::sslLibraryBuildVersionString();
QWebSettings *sett=view->settings();
//设置程序不显示图片和js
sett->setAttribute(QWebSettings::AutoLoadImages,false);
sett->setAttribute(QWebSettings::JavaEnabled,false);
connect(view,SIGNAL(loadProgress(int)),this,SLOT(onProgress(int)));
connect(view,SIGNAL(loadFinished(bool)),this,SLOT(onLoadFinished(bool)));
}
Widget::~Widget()
{
delete ui;
}
void Widget::on_pushButton_clicked()
{
QUrl url=ui->lineEdit->text();
view->load(url);
}
void Widget::onProgress(int progress)
{
//进度显示
QString title=QString::number(progress)+"%";
this->setWindowTitle(title);
}
void Widget::onLoadFinished(bool ok)
{
qDebug()<<"finish"<<ok;
//取网页的的照片标签
QWebElementCollection element=view->page()->mainFrame()->findAllElements("body> div.container.container-default > main > section > ul > li > article > a > img");
//循环遍历当前网页的照片标签并保存默认目录
for(int i=0;i<element.count();++i)
{
const QWebElement &ele=element.at(i);
QString url=ele.attribute("src");
qDebug()<<url;
QImage image=loadimg(url);
QFileInfo info(url);
QString fileName=info.fileName();
image.save(fileName);
qDebug()<<"保存的图片名称 "<<fileName;
}
qDebug()<<"断点1";
//通过谷歌浏览器可以得到网页下一页的标签为a.next,
QString test="#tuchong-com > div.container.container-default > main > section > div > div > div > a.next";
element = view->page()->mainFrame()->findAllElements(test);
qDebug()<<"断点2";
//只取10页的照片
if(element.count()>=10)
{
qDebug()<<"没有了";
return;
}
Sleep(8000);
const QWebElement &ele=element.at(0);
![请添加图片描述](https://i-blog.csdnimg.cn/direct/d985708f000948ef90bd65dc3d3e7e11.png)
//当前网页的网站显示为https://landscape.tuchong.com/?page=, 通过谷歌浏览器可以知道href标签的值为?page=,这里是页数
QString urlnext="https://landscape.tuchong.com/"+ele.attribute("href");
qDebug()<<urlnext;
QUrl nexturl=urlnext;
//加载新网页
view->load(nexturl);
}
QImage Widget::loadimg(const QString &url)
{
QNetworkAccessManager mgr;
QNetworkReply *rep=mgr.get(QNetworkRequest (QUrl(url)));
QEventLoop loop;
connect(rep,SIGNAL(finished()),&loop,SLOT(quit()));
//卡住等待结束(quit)
loop.exec();
QByteArray date=rep->readAll();
QImage image =QImage::fromData(date);
return image;
}
main.cpp
#include "widget.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
Widget w;
w.show();
return a.exec();
}