标签：info 断点续传 setopt quot libcurl easy curl 多线程 CURLOPT

libcurl多线程下载一步步实现

创建时间: 2024年12月1日 17:35
标签: libcurl, linux, 下载, 多线程
最后编辑: 2025年1月16日 23:43

平台是WSL的Ubuntu22，使用Gcc编译。

单线程下载

编译命令gcc -o trans trans.c -lcurl

/* trans.c */
#include <curl/curl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
// 文件信息

struct fileInfo{

char* fileptr;

int offset;

};
// 回调函数，负责将数据写入文件

size_t writeFunc(void* ptr, size_t size, size_t memb, void* userdata) {

// ptr是请求到的数据，sizememb表示传输数据块的大小，userdata是我们传进来的参数，前面三个是固定的

struct fileInfo info = (struct fileInfo)userdata;

memcpy(info->fileptr + info->offset, ptr, sizememb); // 每次拷贝sizememb的数据到fileptr，并将文件指针偏移sizememb

info->offset+=sizememb;

printf("%ld\n", sizememb);

return size*memb;

}
// 回调函数，负责消除curl_easy_setopt(curl, CURLOPT_HEADER, 1);将数据打印到控制台的行为

size_t writeNoFunc(void* ptr, size_t size, size_t memb, void* userdata) {
return size*memb;

}
// 获取下载文件的大小

double getDownloadFileLength(const char* url) {

double downloadFileLength = 0;  // 创建变量记录文件长度，文件长度是个复数，其他类型变量无法正确接收文件长度

CURL* curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_USERAGENT, &quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36&quot;);
curl_easy_setopt(curl, CURLOPT_HEADER, 1);
curl_easy_setopt(curl, CURLOPT_NOBODY,1);  // 这两行指明执行时只获取响应头，不包含响应体
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeNoFunc);

CURLcode res =  curl_easy_perform(curl);
if(res == CURLE_OK) {
    curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &amp;downloadFileLength);
}else {
    downloadFileLength = -1;
}
curl_easy_cleanup(curl);

return downloadFileLength;

}
// 下载

int download(const char* url, const char* filename) {
long fileLength = getDownloadFileLength(url);
printf(&quot;fileLength: %ld\n&quot;, fileLength);
// write
int fd = open(filename, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR );  // 打开文件
if(fd == -1) {
    return -1;
}

if(-1 == lseek(fd, fileLength-1, SEEK_SET))  // 开辟空间
{
    perror(&quot;lseek&quot;);
    close(fd);
    return -1;
}

if(1!=write(fd, &quot;&quot;, 1)){  // 写入&quot;&quot;就相当于写入\0，标志着字符串结尾
    perror(&quot;write&quot;);
    close(fd);
    return -1;
}

char* fileptr = (char*)mmap(NULL, fileLength, PROT_READ|PROT_WRITE, MAP_SHARED,fd, 0); // 将文件映射到内存,强转成(char*)是因为要向这块内存写入字符串
if(fileptr == MAP_FAILED){  // 映射失败处理
    perror(&quot;mmap&quot;);
    close(fd);
    return 1;
}

struct fileInfo* info = (struct fileInfo*)malloc(sizeof(struct fileInfo));
if(info == NULL){  // 结构体内存分配失败处理
    munmap(fileptr, fileLength);
    close(fd);
    return -1;
}

info-&gt;fileptr = fileptr;  // 让结构体中的fileptr指针指向映射文件的内存
info-&gt;offset = 0;

// curl
CURL* curl = curl_easy_init();

curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunc);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, info);  // 将info 传入writeFunc进行操作

CURLcode res =  curl_easy_perform(curl);
if(res != CURLE_OK) {
    printf(&quot;res %d\n&quot;, res);
}
curl_easy_cleanup(curl);  // 清理curl
free(info);  // 释放结构体指针
close(fd);  // 关闭文件
munmap(fileptr, fileLength);  // 释放映射的内存

return 0;

}
int main() {

const char* url = "https://www.sordum.org/files/download/dns-jumper/DnsJumper.zip";

const char* filename = "./get.zip";

download(url, filename);

return 0;

}

下载完成后可以通过md5sum 文件名查看md5码来验证下载是否正确。

程序流程：

获取要下载的文件大小，
open打开一个文件，返回文件描述符fd，
通过lseek和write操作fd开辟出相当于要下载文件大小的空间，
使用mmap将这块磁盘空间映射到内存，接着只需要使用指向这块内存的指针对这块内存读写即可。
通过curl下载文件
最后则是释放内存，清理资源

开辟空间的具体做法是，通过lseek将读写指针移动到合适的位置，然后通过write在该位置写入\0，这样就将这一块存储空间固定下来。

多线程版本

编译命令gcc -o trans trans.c -lcurl -lpthread，加入了pthread库，因此编译时添加-lpthread后缀

#include <curl/curl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
define THREAD_NUM 10
// 文件信息

struct fileInfo{

const char* url;

char* fileptr;

int offset;

int end;

pthread_t thid;

};
// 回调函数，负责将数据写入文件

size_t writeFunc(void* ptr, size_t size, size_t memb, void* userdata) {

// ptr是请求到的数据，sizememb表示传输数据块的大小，userdata是我们传进来的参数，前面三个是固定的

struct fileInfo info = (struct fileInfo)userdata;

memcpy(info->fileptr + info->offset, ptr, sizememb); // 每次拷贝sizememb的数据到fileptr，并将文件指针偏移sizememb

info->offset+=sizememb;

printf("write: %ld\n", sizememb);

return size*memb;

}
// 回调函数，负责消除curl_easy_setopt(curl, CURLOPT_HEADER, 1);将数据打印到控制台的行为

size_t writeNoFunc(void* ptr, size_t size, size_t memb, void* userdata) {
return size*memb;

}
// 获取下载文件的大小

double getDownloadFileLength(const char* url) {

double downloadFileLength = 0;  // 创建变量记录文件长度，文件长度是个复数，其他类型变量无法正确接收文件长度

CURL* curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_USERAGENT, &quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36&quot;);
curl_easy_setopt(curl, CURLOPT_HEADER, 1);
curl_easy_setopt(curl, CURLOPT_NOBODY,1);  // 这两行指明执行时只获取响应头，不包含响应体
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeNoFunc);

CURLcode res =  curl_easy_perform(curl);
if(res == CURLE_OK) {
    curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &amp;downloadFileLength);
}else {
    downloadFileLength = -1;
}
curl_easy_cleanup(curl);

return downloadFileLength;

}
// 线程函数

void* worker(void* arg){

struct fileInfo* info = (struct fileInfo*)arg;
char range[64] = {0};
snprintf(range, 64, &quot;%d-%d&quot;, info-&gt;offset, info-&gt;end);  //设置range字符数组的值
printf(&quot;threadId: %ld, download from %d to %d\n&quot;, info-&gt;thid, info-&gt;offset, info-&gt;end);
// curl
CURL* curl = curl_easy_init();

curl_easy_setopt(curl, CURLOPT_URL, info-&gt;url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunc); 
curl_easy_setopt(curl, CURLOPT_WRITEDATA, info);  // 将info 传入writeFunc进行操作
curl_easy_setopt(curl, CURLOPT_RANGE, range);  // 根据range字符串指定获取的数据范围
CURLcode res =  curl_easy_perform(curl);
if(res != CURLE_OK) {
    printf(&quot;res %d\n&quot;, res);
}
curl_easy_cleanup(curl);  // 清理curl

return NULL;

}
// 下载

int download(const char* url, const char* filename) {
long fileLength = getDownloadFileLength(url);
printf(&quot;fileLength: %ld\n&quot;, fileLength);
// write
int fd = open(filename, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR );  // 打开文件
if(fd == -1) {
    return -1;
}

if(-1 == lseek(fd, fileLength-1, SEEK_SET))  // 开辟空间
{
    perror(&quot;lseek&quot;);
    close(fd);
    return -1;
}

if(1!=write(fd, &quot;&quot;, 1)){  // 写入&quot;&quot;就相当于写入\0，标志着字符串结尾
    perror(&quot;write&quot;);
    close(fd);
    return -1;
}

char* fileptr = (char*)mmap(NULL, fileLength, PROT_READ|PROT_WRITE, MAP_SHARED,fd, 0); // 将文件映射到内存,强转成(char*)是因为要向这块内存写入字符串
if(fileptr == MAP_FAILED){  // 映射失败处理
    perror(&quot;mmap&quot;);
    close(fd);
    return 1;
}

// 多线程
int i=0;
long partSize = fileLength / THREAD_NUM;  // 分块
struct fileInfo* info[THREAD_NUM+1] = {0};  // 创建结构体指针数组
for(i=0; i&lt;=THREAD_NUM; i++){  // 给每个结构体指针分配内存，并设置其中的值
    info[i] = (struct fileInfo*)malloc(sizeof(struct fileInfo));
    info[i]-&gt;offset = i*partSize;
    if(i&lt;THREAD_NUM){
        info[i]-&gt;end = (i+1)*partSize - 1;
    }else{
        info[i]-&gt;end = fileLength-1;
    }
    info[i]-&gt;fileptr = fileptr;
    info[i]-&gt;url = url;
    // printf(&quot;offset:%d, end:%d\n&quot;,info[i]-&gt;offset, info[i]-&gt;end);
}

// 线程创建
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_create(&amp;(info[i]-&gt;thid), NULL, worker, info[i]);  // info[i[作为worker参数
}

// 线程启动
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_join(info[i]-&gt;thid, NULL);
}

// 释放结构体指针指向的内存
for(i=0; i&lt;=THREAD_NUM; i++){
    free(info[i]);  
}
close(fd);  // 关闭文件IO
munmap(fileptr, fileLength);  // 释放映射的内存

return 0;

}
int main() {

const char* url = "https://www.sordum.org/files/download/dns-jumper/DnsJumper.zip";

const char* filename = "./get.zip";

download(url, filename);

return 0;

}

程序流程：

getDownloadFileLength获取下载文件大小
根据文件大小，使用open打开一个文件，并且通过lseek和write开辟磁盘空间
使用mmap将文件映射到内存中，并将该内存块首地址强转为char*类型，以供写入数据
将文件分块，程序中是分为10+1块，将文件总长度除以10，得到10块中每块的长度，1则是余下的文件大小
创建线程函数worker，worker根据传入的参数来下载文件
创建10+1个线程，每个线程写入一块文件
启动多线程
最后则是释放内存，清理资源

加入进度条

#include <curl/curl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
#include <signal.h>
define THREAD_NUM 10
// 文件信息

struct fileInfo{

const char* url;

char* fileptr;

int offset;

int end;

pthread_t thid;

long fileLength;

};
// 进度条

int progress_bar(double x)

{

int i;

x*=10000;

x = ((double)((int)x))/100;

printf("\r\033[K");

printf("%.2f%%\t[", x);

for(i = 0; i < (x/10); i++)

{

printf("#");

}

if(x==100){

printf("]\n");

}else{

printf("]");

}

fflush(stdout);//立刻输出
return 0;

}
// 回调函数，负责将数据写入文件

size_t writeFunc(void* ptr, size_t size, size_t memb, void* userdata) {

// ptr是请求到的数据，sizememb表示传输数据块的大小，userdata是我们传进来的参数，前面三个是固定的

struct fileInfo info = (struct fileInfo)userdata;

memcpy(info->fileptr + info->offset, ptr, sizememb); // 每次拷贝sizememb的数据到fileptr，并将文件指针偏移sizememb

info->offset+=sizememb;

// print progress bar

static double progress=0;

progress += sizememb;

double x = progress/(info->fileLength);

progress_bar(x);

return size*memb;

}
// 回调函数，负责消除curl_easy_setopt(curl, CURLOPT_HEADER, 1);将数据打印到控制台的行为

size_t writeNoFunc(void* ptr, size_t size, size_t memb, void* userdata) {
return size*memb;

}
// 获取下载文件的大小

double getDownloadFileLength(const char* url) {

double downloadFileLength = 0;  // 创建变量记录文件长度，文件长度是个复数，其他类型变量无法正确接收文件长度

CURL* curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_USERAGENT, &quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36&quot;);
curl_easy_setopt(curl, CURLOPT_HEADER, 1);
curl_easy_setopt(curl, CURLOPT_NOBODY,1);  // 这两行指明执行时只获取响应头，不包含响应体
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeNoFunc);

CURLcode res =  curl_easy_perform(curl);
if(res == CURLE_OK) {
    curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &amp;downloadFileLength);
}else {
    downloadFileLength = -1;
}
curl_easy_cleanup(curl);

return downloadFileLength;

}
// 线程函数

void* worker(void* arg){

struct fileInfo* info = (struct fileInfo*)arg;
char range[64] = {0};
snprintf(range, 64, &quot;%d-%d&quot;, info-&gt;offset, info-&gt;end);  //设置range字符数组的值
printf(&quot;threadId: %ld, download from %d to %d\n&quot;, info-&gt;thid, info-&gt;offset, info-&gt;end);
// curl
CURL* curl = curl_easy_init();

curl_easy_setopt(curl, CURLOPT_URL, info-&gt;url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunc); 
curl_easy_setopt(curl, CURLOPT_WRITEDATA, info);  // 将info 传入writeFunc进行操作
curl_easy_setopt(curl, CURLOPT_RANGE, range);  // 根据range字符串指定获取的数据范围
CURLcode res =  curl_easy_perform(curl);
if(res != CURLE_OK) {
    printf(&quot;res %d\n&quot;, res);
}
curl_easy_cleanup(curl);  // 清理curl

return NULL;

}
// 下载

int download(const char* url, const char* filename) {
long fileLength = getDownloadFileLength(url);
printf(&quot;fileLength: %ld\n&quot;, fileLength);
// write
int fd = open(filename, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR );  // 打开文件
if(fd == -1) {
    return -1;
}

if(-1 == lseek(fd, fileLength-1, SEEK_SET))  // 开辟空间
{
    perror(&quot;lseek&quot;);
    close(fd);
    return -1;
}

if(1!=write(fd, &quot;&quot;, 1)){  // 写入&quot;&quot;就相当于写入\0，标志着字符串结尾
    perror(&quot;write&quot;);
    close(fd);
    return -1;
}

char* fileptr = (char*)mmap(NULL, fileLength, PROT_READ|PROT_WRITE, MAP_SHARED,fd, 0); // 将文件映射到内存,强转成(char*)是因为要向这块内存写入字符串
if(fileptr == MAP_FAILED){  // 映射失败处理
    perror(&quot;mmap&quot;);
    close(fd);
    return 1;
}

// 多线程
int i=0;
long partSize = fileLength / THREAD_NUM;  // 分块
struct fileInfo* info[THREAD_NUM+1] = {0};  // 创建结构体指针数组
for(i=0; i&lt;=THREAD_NUM; i++){  // 给每个结构体指针分配内存，并设置其中的值
    info[i] = (struct fileInfo*)malloc(sizeof(struct fileInfo));
    info[i]-&gt;offset = i*partSize;
    if(i&lt;THREAD_NUM){
        info[i]-&gt;end = (i+1)*partSize - 1;
    }else{
        info[i]-&gt;end = fileLength-1;
    }
    info[i]-&gt;fileptr = fileptr;
    info[i]-&gt;url = url;
    info[i]-&gt;fileLength = fileLength;
    // printf(&quot;offset:%d, end:%d\n&quot;,info[i]-&gt;offset, info[i]-&gt;end);
}

// 线程创建
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_create(&amp;(info[i]-&gt;thid), NULL, worker, info[i]);
}

// 线程启动
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_join(info[i]-&gt;thid, NULL);
}

// 释放结构体指针指向的内存
for(i=0; i&lt;=THREAD_NUM; i++){
    free(info[i]);  
}
close(fd);  // 关闭文件IO
munmap(fileptr, fileLength);  // 释放映射的内存

return 0;

}
void signal_handler(int signum){

printf("signum: %d\n", signum);

exit(1);

}
int main() {

const char* url = "https://www.sordum.org/files/download/dns-jumper/DnsJumper.zip";

const char* filename = "./get.zip";

if(SIG_ERR == signal(SIGINT, signal_handler))

{

perror("signal\n");

return -1;

}

download(url, filename);

return 0;

}

实现断点续传

思路：在下载过程中停止下载时，通过C标准库中的signal函数发出一个信号，在该信号的处理函数中，将当前下载情况记录到一个文件中，继续下载时打开该文件从文件中获取之前的下载情况继续下载。（实际逻辑是，每次下载时检查该文件是否存在，如果存在，就从该文件中记录的下载位置下载，不存在就从头开始下载。且一旦正常下载完成，就删除该文件）
同时要更改进度条，原因是libcurl传入到progressFunc函数中的totalDownload是根据 curl_easy_setopt(curl, CURLOPT_RANGE, range)中的range变量决定的，而range是一个“offset-end”形式的字符串，即totalDownload = end-offset；而nowDownload则是根据writeFunc的返回值累计而成的。也就是说nowDownload的取值范围是[0, end-offset]。需要说明的是，每个线程都有一个totalDownload。先前我们计算进度条的方式是将downloadFileLength作为分母，如果不需要断点续传，这样是没问题的。但是断点续传时，range的offset不再是从头开始，此时的offset是原来的offset加上了断点前下载的数据大小，这就导致新的end-offset得到的值小于原来的end-offset，想象一下，断点前的offset-end是0-100，下载到50时断开了，现在的offset-end是50-100，然而downloadFileLength是不变的，所以现在能得到的进度条变成【0-50】。
更改进度条的两种方式：

一是更改进度条的分母，也就是总文件大小，将总文件大小改成断点续传时剩下的要下载的总文件大小。即分母不取总文件大小，而是取每个线程的totalDownload的和。

二是将分子加上之前已下载的数据量。将断点的每个线程的offset的和减去初始的每个线程的offset和，就可以得到已下载的数据量。

#include <curl/curl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
#include <signal.h>
define THREAD_NUM 10
// 文件信息

struct fileInfo{

const char* url;

char* fileptr;

int offset;

int end;

pthread_t thid;

double nowDownload;

double totalDownload;

FILE* recordFile;

};
struct fileInfo** pInfoTable; // 全局变量，指向结构体指针数组
// 回调函数，负责将数据写入文件

size_t writeFunc(void* ptr, size_t size, size_t memb, void* userdata) {

// ptr是请求到的数据，sizememb表示传输数据块的大小，userdata是我们传进来的参数，前面三个是固定的

struct fileInfo info = (struct fileInfo)userdata;

memcpy(info->fileptr + info->offset, ptr, sizememb); // 每次拷贝sizememb的数据到fileptr，并将文件指针偏移sizememb

info->offset+=size*memb;
return size*memb;

}
// 回调函数，负责消除curl_easy_setopt(curl, CURLOPT_HEADER, 1);将数据打印到控制台的行为

size_t writeNoFunc(void* ptr, size_t size, size_t memb, void* userdata) {
return size*memb;

}
// 下载进度会传入该函数处理

int progressFunc(void* userdata, double totalDownload, double nowDownload, double totalUpload, double nowUpload)

{  // 每个线程都会单独调用progressFunc，并且获取到的nowDownload和totalDownload也是当前线程下载的文件块的下载信息

int percent = 0;

static int print = 0;

struct fileInfo* info = (struct fileInfo*)userdata;

info->nowDownload = nowDownload; //更新当前线程操作的结构体的download

info->totalDownload = totalDownload;
if(totalDownload&gt;0){
    int i=0;
    double allNowDownload = 0;
    double allTotalDownload = 0;
    for(i=0; i&lt;=THREAD_NUM; i++){//将所有线程，也就是info[0-10]的download加在一起
        allNowDownload+=pInfoTable[i]-&gt;nowDownload;
        allTotalDownload+=pInfoTable[i]-&gt;totalDownload;
    }

    percent = (int)(allNowDownload/allTotalDownload*100);
}
if(percent&gt;print){
    printf(&quot;percent: %d%%\n&quot;, percent);  // 由于一些原因，percent总体上趋于100，但是局部会有下降，因此这样写确保它是一直上升的
    print=percent;
}

// printf(&quot;nowDownload: %f, totalDownload: %f\n&quot;, nowDownload,totalDownload);

return 0;

}
// 获取下载文件的大小

double getDownloadFileLength(const char* url) {

double downloadFileLength = 0;  // 创建变量记录文件长度，文件长度是个复数，其他类型变量无法正确接收文件长度

CURL* curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_USERAGENT, &quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36&quot;);
curl_easy_setopt(curl, CURLOPT_HEADER, 1);
curl_easy_setopt(curl, CURLOPT_NOBODY,1);  // 这两行指明执行时只获取响应头，不包含响应体
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeNoFunc);  

CURLcode res =  curl_easy_perform(curl);
if(res == CURLE_OK) {
    curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &amp;downloadFileLength);
}else {
    perror(&quot;downloadFileLength error\n&quot;);
    downloadFileLength = -1;
}
curl_easy_cleanup(curl);

return downloadFileLength;

}
// 线程函数

void* worker(void* arg){

struct fileInfo* info = (struct fileInfo*)arg;
char range[64] = {0};
if(info-&gt;recordFile){
    fscanf(info-&gt;recordFile, &quot;%d-%d&quot;, &amp;info-&gt;offset, &amp;info-&gt;end);  // fscanf每次读取会更新内部指针，下次读取会从上次读取的末尾开始，这就使得多个线程读取一个文件时不会读取到相同的数据，而是按线程启动的顺序读完整个文件
}
if(info-&gt;offset &gt; info-&gt;end) return NULL;  // 当一段文件被下载完时，offset = end+1，因此作判断返回
snprintf(range, 64, &quot;%d-%d&quot;, info-&gt;offset, info-&gt;end);  //设置range字符数组的值

printf(&quot;threadId: %ld, download from %d to %d\n&quot;, info-&gt;thid, info-&gt;offset, info-&gt;end);
// curl
CURL* curl = curl_easy_init();

curl_easy_setopt(curl, CURLOPT_URL, info-&gt;url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunc); 
curl_easy_setopt(curl, CURLOPT_WRITEDATA, info);  // 将info 传入writeFunc进行操作

curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, progressFunc);
curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, info);

curl_easy_setopt(curl, CURLOPT_RANGE, range);  // 根据range字符串指定获取的数据范围
CURLcode res =  curl_easy_perform(curl);
if(res != CURLE_OK) {
    printf(&quot;res %d\n&quot;, res);
}
curl_easy_cleanup(curl);  // 清理curl

return NULL;

}
// 下载

int download(const char* url, const char* filename) {
long fileLength = getDownloadFileLength(url);
printf(&quot;fileLength: %ld\n&quot;, fileLength);
// write
int fd = open(filename, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR );  // 打开文件
if(fd == -1) {
    return -1;
}

if(-1 == lseek(fd, fileLength-1, SEEK_SET))  // 开辟空间
{
    perror(&quot;lseek&quot;);
    close(fd);
    return -1;
}

if(1!=write(fd, &quot;&quot;, 1)){  // 写入&quot;&quot;就相当于写入\0，标志着字符串结尾
    perror(&quot;write&quot;);
    close(fd);
    return -1;
}

char* fileptr = (char*)mmap(NULL, fileLength, PROT_READ|PROT_WRITE, MAP_SHARED,fd, 0); // 将文件映射到内存,强转成(char*)是因为要向这块内存写入字符串
if(fileptr == MAP_FAILED){  // 映射失败处理
    perror(&quot;mmap&quot;);
    close(fd);
    return -1;
}

FILE* fp = fopen(&quot;a.txt&quot;, &quot;r&quot;);  // 打开记录文件传输断点的文件

// 多线程
int i=0;
long partSize = fileLength / THREAD_NUM;  // 分块
struct fileInfo* info[THREAD_NUM+1] = {NULL};  // 创建结构体指针数组

for(i=0; i&lt;=THREAD_NUM; i++){  // 给每个结构体指针分配内存，并设置其中的值
    info[i] = (struct fileInfo*)malloc(sizeof(struct fileInfo));
    memset(info[i], 0, sizeof(struct fileInfo));  // 将info[i]指向的结构体中的所有值置为0
    info[i]-&gt;offset = i*partSize;
    if(i&lt;THREAD_NUM){
        info[i]-&gt;end = (i+1)*partSize - 1;
    }else{
        info[i]-&gt;end = fileLength-1;
    }
    info[i]-&gt;fileptr = fileptr;
    info[i]-&gt;url = url;
    info[i]-&gt;recordFile = fp;
    // printf(&quot;offset:%d, end:%d\n&quot;,info[i]-&gt;offset, info[i]-&gt;end);
}
pInfoTable = info;
// 线程创建
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_create(&amp;(info[i]-&gt;thid), NULL, worker, info[i]);
    usleep(1);
}

// 线程启动
for(i=0; i&lt;=THREAD_NUM; i++){
    pthread_join(info[i]-&gt;thid, NULL);
}

// 释放结构体指针指向的内存
for(i=0; i&lt;=THREAD_NUM; i++){
    free(info[i]);  
}
if(fp){  // 验证a.txt是否存在，正常下载的情况下没断开过下载的情况下，不会产生a.txt，则fp=NULL，此时fclose(fp)会报错，因此要先验证fp是否存在
    fclose(fp);
    if(remove(&quot;a.txt&quot;) !=0){  // 文件正常下载完成且a.txt存在才会执行到这里，删除a.txt
        perror(&quot;error delete a.txt!&quot;);
    }
}
close(fd);  // 关闭文件IO
munmap(fileptr, fileLength);  // 释放映射的内存


return 0;

}
// 断开下载时，会执行以下代码

void signal_handler(int signum){

printf("signum: %d\n", signum);

int i=0;

int fd = open("a.txt", O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);

if(fd==-1)

{

exit(1);

}

for(i=0; i<=THREAD_NUM; i++)

{  // 把断开下载时，每个线程的下载情况写入a.txt

char range[64] = {0};

snprintf(range, 64, "%d-%d\r\n", pInfoTable[i]->offset, pInfoTable[i]->end);
    write(fd, range, strlen(range));
}
close(fd);
exit(1);

}
// 根据URL创建文件名

void getFileName(const char* url, char** filename){

// const char* delim = '/';

const char* lastSlash = strrchr(url, '/');

if(lastSlash!=NULL){

filename = (char)malloc(strlen(lastSlash+1)+3);

strcpy(filename, ++lastSlash);

}

printf("%s\n", lastSlash);

size_t len = strlen(filename);

(filename)[len-4] = 'c';

(filename)[len-3] = 'p';

(filename)[len-2] = 'y';

(filename)[len-1] = lastSlash[len-4];

(filename)[len] = lastSlash[len-3];

(filename)[len+1] = lastSlash[len-2];

(filename)[len+2] = lastSlash[len-1];

(filename)[len+3] = '\0';

}
int main() {
// const char* url = &quot;https://www.sordum.org/files/download/dns-jumper/DnsJumper.zip&quot;;
const char* url = &quot;https://uu.gdl.netease.com/5051/UU-5.30.1.exe&quot;;
char* filename;
getFileName(url, &amp;filename);
if(SIG_ERR == signal(SIGINT, signal_handler))  // 断开下载时调用signal_handler
{
    perror(&quot;signal\n&quot;);
    return -1;
}
download(url, filename);
return 0;

}

常用函数

初始化和清理
- curl_global_init()：初始化libcurl环境，通常在程序开始时调用一次。
- curl_global_cleanup()：清理libcurl环境，通常在程序结束时调用。
- curl_easy_init()：初始化一个CURL类型的指针，用于创建一个新的curl会话。
- curl_easy_cleanup()：释放curl会话资源。
设置选项
- curl_easy_setopt¹：用于设置curl会话的选项。
- curl_easy_getinfo()²：用于获取curl会话的信息。
执行请求：
- curl_easy_perform()：执行一个curl会话的请求。

问题记录

curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file);  // 写入响应体数据(body)
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &file);  // 写入响应头数据(header)

在linux环境下，不会出现任何问题。但是在windows下配置的linux编译环境中，会有问题产生。 这两行代码，只执行第一行时，可以正常将body的数据写入文件，都执行则可以将header和body的数据都写入文件。但是只执行第二行的代码，文件中将不会写入任何数据。

想要单独获取响应头信息，需要通过
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback);，设置处理头信息的回调函数。回调函数的格式如下：

size_t header_callback(char *buffer, size_t size, size_t nitems, void *userdata) {
    // 这里处理头部数据，例如打印或者保存到文件
    printf("Header: %.*s", (int)(nitems * size), buffer);
    return nitems * size;
}

但是，实际情况依然是无法单独得到响应头。

curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &file);

如果只执行上面两行代码，则会将响应体的内容打印出来，而响应头的位置则是空行。

总结

curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); 
// curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &file);
res = curl_easy_perform(curl);

对于以上五行代码

注释掉第二行的情况下，可以将响应头和响应体都写入到文件中（正常状态）
注释掉第二、三行的情况下，文件中写入空行
注释掉第一、三行时，会将响应体打印到控制台，而响应头的位置是空行
不注释，则会将响应体写入到文件，控制台打印空行

curl_easy_setopt详细介绍

curl_easy_setopt详细介绍
1. curl_easy_setopt(curl, CURLOPT_URL, info->url);
  
  设置请求的url
2. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFunc)
  1. CURLOPT_WRITEFUNCTION设置一个自定义的回调函数writeFunc来处理从服务器下载的数据，传入的writeFunc是函数指针
  2. writeFunc 的原型 size_t writeFunc(void *ptr, size_t size, size_t nmemb, void *stream);
    - ptr：指向包含接收到的数据的内存块的指针。
    - size：每个数据块的大小。
    - nmemb：数据块的数量。
    - stream：一个用户定义的指针，通常用来传递额外的数据给回调函数，比如可以将文件描述符传送进来，就可以将数据写入文件
    - **size*nmemb**是每次下载的数据长度，并且要将该值返回 return **size*nmemb**
3. curl_easy_setopt(curl，cURLoPT_WRITEDATA，info);
  - 将info传入给writeFunc处理，info就是writeFunc中的stream
4. curl_easy_setopt(curl, CURLOPT_NOPROGRESS, OL);
  - CURLOPT_NOPROGRESS 是一个选项，用于控制是否显示进度信息。当设置 CURLOPT_NOPROGRESS 为 0L（即 long 类型的 0），libcurl 将显示进度信息；如果你设置为 1L（即 long 类型的 1），libcurl 将不显示进度信息。
5. curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, progressFunc)
  1. CURLOPT_PROGRESSFUNCTION：指定了当 libcurl 在传输数据时应该调用的进度回调函数。progressFunc值应该是一个指向函数的指针
  2. progressFunc原型
    int progressFunc(void* userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow);
    - userdata是一个用户定义的指针，可以在回调函数中使用。
    - dltotal 和 dlnow 分别表示下载的总字节数和当前已下载的字节数。
    - ultotal 和 ulnow 分别表示上传的总字节数和当前已上传的字节数。
6. curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, info)
  - 将info传递给progressFunc中的userdata
7. curl_easy_setopt(curl, CURLOPT_RANGE, range);
  1. 这行代码是用于设置 CURL 选项，使得 CURL 请求时可以指定下载文件的特定范围。CURLOPT_RANGE 选项允许你指定想要检索的数据范围，格式通常是 "X-Y"，其中 X 和 Y 是字节索引，X 和 Y 都可以省略。内容对于 HTTP 传输，还支持多个间隔，用逗号分隔，如 "X-Y,N-M"。
  2. 例如，如果你想请求一个文件的前200个字节，可以设置range字符串为为 "0-199"。如果你想从第200个字节开始获取文件的其余部分，可以设置为 "200-"。还可以请求多个范围，如 "0-199,1000-1199"，这将请求文件的前200个字节和从1000字节开始的200个字节。
  3. 需要注意的是，服务器是否支持范围请求（byte range）取决于服务器的配置。即使客户端设置了 CURLOPT_RANGE，服务器也可能因为各种原因不遵守这个请求，而是返回完整的响应内容。因此，使用这个选项时，应当准备好处理完整的响应内容，而不仅仅是请求的范围。
  4. int progressFunc(void* userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)中的dltotal = Y-X，dlnow则是从0开始，不断加上size_t writeFunc(void *ptr, size_t size, size_t nmemb, void *stream)的返回值，最高等于Y-X。 ↩
curl_easy_getinfo

curl_easy_getinfo

上次编辑时间: 2024年12月6日 18:00
创建时间: 2024年12月6日 14:46

CURLINFO_CONTENT_LENGTH_DOWNLOAD 和 CURLINFO_CONTENT_LENGTH_DOWNLOAD_T 都是 libcurl 提供的选项，用于获取下载内容的长度。它们的主要区别在于返回值的类型和对大文件的支持。
1. CURLINFO_CONTENT_LENGTH_DOWNLOAD：
  - 这个选项要求传递一个 double 类型的指针，用来存放下载内容的 Content-Length 信息。
  - 如果文件大小无法获取，那么函数返回值为 1。
  - 这个选项在 libcurl 7.19.4 版本之后，如果大小未知会返回 1。
  - 这个选项已经被标记为弃用（Deprecated since 7.55.0）。
2. CURLINFO_CONTENT_LENGTH_DOWNLOAD_T：
  - 这个选项是 CURLINFO_CONTENT_LENGTH_DOWNLOAD 的更新替代品，它要求传递一个 curl_off_t 类型的指针，curl_off_t 是一个更大的数据类型，能够支持更大的文件大小。
  - 这个选项在 libcurl 7.55.0 版本中被加入，用于替代 CURLINFO_CONTENT_LENGTH_DOWNLOAD。
  - 如果内容长度未知，这个选项也会返回 1。
  - 这个选项专门用于 HTTP 协议，并且能够更准确地处理大文件的 Content-Length。
也就是说，CURLINFO_CONTENT_LENGTH_DOWNLOAD_T 是一个更新的选项，它使用 curl_off_t 类型来支持更大的文件大小，并且是 CURLINFO_CONTENT_LENGTH_DOWNLOAD 的替代品。

示例：
```
int main(void) {
    CURL *curl = curl_easy_init();
    if(curl) {
        CURLcode res;
        curl_easy_setopt(curl, CURLOPT_URL, "https://example.com");
        /* Perform the request */
        res = curl_easy_perform(curl);
        if(!res) {
            /* check the size */
            double cl;
            res = curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
            if(!res) {
                printf("Size: %.0f\n", cl);
            }
        }
    }
}
```
↩

标签：info,断点续传,setopt,quot,libcurl,easy,curl,多线程,CURLOPT
From： https://www.cnblogs.com/xue3z/p/18676982/libcurl-multi-threaded-download-supports-breakpoi

libcurl多线程下载，支持断点续传

libcurl多线程下载一步步实现

单线程下载

多线程版本

define THREAD_NUM 10

加入进度条

define THREAD_NUM 10

实现断点续传

define THREAD_NUM 10

常用函数

问题记录

curl_easy_setopt详细介绍

curl_easy_setopt详细介绍

curl_easy_getinfo

curl_easy_getinfo

相关文章

赞助商

阅读排行