一、环境
.net core 6.0
vs2022 控制台应用程序
Nuget引入:
AngleSharp 1.1.0 用于HTML解析
Downloader 3.0.6 用于下载文件
ShellProgressBar 5.2.0 用于进度条显示
二、效果
三、相关代码
1.Program.cs
using ShellProgressBar; using Spider; using System.Collections; var url = "https://blog.csdn.net/u011127019/article/details/124248757"; var data = await HttpHelper.GetHtmlDocument(url); DownloadHandler downloadHandler = new DownloadHandler(); List<ImageList> imageList = new List<ImageList>(); ImageList imageList1 = new ImageList { Name = "图片目录", Images = new List<string>() }; foreach (var item in data.QuerySelectorAll("#article_content img")) { var link = item.QuerySelector("img"); var href = item?.GetAttribute("src"); if (href != null) { imageList1.ImageCount++; imageList1.Images.Add(href); } } imageList.Add(imageList1); var list = imageList;// 加载图集列表 ProgressBarOptions BarOptions = new() { ProgressCharacter = '─', ProgressBarOnBottom = true, ForegroundColor = ConsoleColor.Yellow, ForegroundColorDone = ConsoleColor.DarkGreen, BackgroundColor = ConsoleColor.DarkGray, BackgroundCharacter = '\u2593' }; ProgressBarOptions ChildBarOptions = new() { ForegroundColor = ConsoleColor.Green, BackgroundColor = ConsoleColor.DarkGreen, ProgressCharacter = '─' }; using var bar = new ProgressBar(list.Count, "正在下载所有图片", BarOptions); foreach (var item in list) { bar.Message = $"图集:{item.Name}"; bar.Tick(); int i = 1; foreach (var imgUrl in item.Images) { using (var childBar = bar.Spawn(item.ImageCount, $"图片:{imgUrl}", ChildBarOptions)) { childBar.Tick(); string fileName = string.Empty; // 具体的下载代码 if (imgUrl.Contains(".png")) { fileName = ".png"; } if (imgUrl.Contains(".jpg")) { fileName = ".jpg"; } await downloadHandler.Download(childBar, imgUrl, AppDomain.CurrentDomain.BaseDirectory + "\\Images\\" + i + fileName); i++; } } }
2.HttpHelper.cs
using AngleSharp.Html.Dom; using AngleSharp.Html.Parser; using Downloader; using System.Net; using System.Text; namespace Spider { public static class HttpHelper { public const string UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"; public static IDownloadService Downloader { get; } public static DownloadConfiguration DownloadConf => new() { BufferBlockSize = 10240, // 通常,主机最大支持8000字节,默认值为8000。 ChunkCount = 8, // 要下载的文件分片数量,默认值为1 // MaximumBytesPerSecond = 1024 * 50, // 下载速度限制,默认值为零或无限制 MaxTryAgainOnFailover = 5, // 失败的最大次数 ParallelDownload = true, // 下载文件是否为并行的。默认值为false Timeout = 1000, // 每个 stream reader 的超时(毫秒),默认值是1000 RequestConfiguration = { Accept = "*/*", AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate, CookieContainer = new CookieContainer(), // Add your cookies Headers = new WebHeaderCollection(), // Add your custom headers KeepAlive = true, ProtocolVersion = HttpVersion.Version11, // Default value is HTTP 1.1 UseDefaultCredentials = false, UserAgent = UserAgent } }; public static HttpClientHandler Handler { get; } public static HttpClient Client { get; } static HttpHelper() { Handler = new HttpClientHandler(); Client = new HttpClient(Handler); Client.DefaultRequestHeaders.Add("User-Agent", UserAgent); Downloader = new DownloadService(DownloadConf); } public static async Task<IHtmlDocument> GetHtmlDocument(string url) { var html = await Client.GetStringAsync(url); return new HtmlParser().ParseDocument(html); } public static async Task<IHtmlDocument> GetHtmlDocument(string url, string charset) { var res = await Client.GetAsync(url); var resBytes = await res.Content.ReadAsByteArrayAsync(); var resStr = Encoding.GetEncoding(charset).GetString(resBytes); return new HtmlParser().ParseDocument(resStr); } } }
3.DownloadHandler.cs
using Downloader; using ShellProgressBar; using System; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; namespace Spider { public class DownloadHandler { public async Task Download(IProgressBar bar, string url, string filepath) { var barOptions = new ProgressBarOptions { ForegroundColor = ConsoleColor.Yellow, BackgroundColor = ConsoleColor.DarkYellow, ForegroundColorError = ConsoleColor.Red, ForegroundColorDone = ConsoleColor.Green, BackgroundCharacter = '\u2593', ProgressBarOnBottom = true, EnableTaskBarProgress = RuntimeInformation.IsOSPlatform(OSPlatform.Windows), DisplayTimeInRealTime = false, ShowEstimatedDuration = false }; var percentageBar = bar.Spawn(100, $"正在下载:{Path.GetFileName(url)}", barOptions); HttpHelper.Downloader.DownloadStarted += DownloadStarted; HttpHelper.Downloader.DownloadFileCompleted += DownloadFileCompleted; HttpHelper.Downloader.DownloadProgressChanged += DownloadProgressChanged; await HttpHelper.Downloader.DownloadFileTaskAsync(url, filepath); void DownloadStarted(object? sender, DownloadStartedEventArgs e) { Trace.WriteLine( $"图片, FileName:{Path.GetFileName(e.FileName)}, TotalBytesToReceive:{e.TotalBytesToReceive}"); } void DownloadFileCompleted(object? sender, AsyncCompletedEventArgs e) { Trace.WriteLine($"下载完成, filepath:{filepath}"); percentageBar.Dispose(); } void DownloadProgressChanged(object? sender, DownloadProgressChangedEventArgs e) { percentageBar.AsProgress<double>().Report(e.ProgressPercentage); } } } }
4.Images.cs
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Spider { public class ImageList { public string Name { get; set; } = string.Empty; public int ImageCount { get; set; } public List<string>? Images { get; set; } } }
四、源码下载
链接:https://pan.baidu.com/s/1VnnH05Har9hUhxAsIfKSMw?pwd=paws
提取码:paws