static void Main(string[] args)
{
Stopwatch stopwatch = new Stopwatch();
//图片网址
//https://www.ivsky.com/tupian/index_8.html
//string url = "https://cn.bing.com/images/search?q=%e6%84%8f%e5%a2%83%e5%9b%be%e7%89%87&qpvt=%e6%84%8f%e5%a2%83%e5%9b%be%e7%89%87&FORM=IGRE";
int temp = 0;
HttpCrawelHelper.CreatFile();
//遍历页数
for (int i =10; i < 12; i++)
{
string url = "https://www.ivsky.com/tupian/index_"+i+".html";
string path = Path.Combine(@"D:\Picture\");
HttpCrawelHelper.HttpGetHandle(url, path, 1, ref temp);
}
stopwatch.Stop();
Console.WriteLine("————-———爬取成功!—————");
Console.WriteLine("\n_______总共爬取了" + temp + "张图片!_______________");
Console.WriteLine("\n一共耗时" + stopwatch.ElapsedMilliseconds / 1000 + "秒");
//Console.ReadKey();
}
class HttpCrawelHelper
{
#region 爬取图片
public static void HttpGetHandle(string url, string path, int name,ref int temp)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.CreateHttp(url);
webRequest.Method = "GET";
webRequest.UserAgent = " Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0";
var webResponse = webRequest.GetResponse();
StreamReader streamReader = new StreamReader(webResponse.GetResponseStream(), Encoding.UTF8);
string str = streamReader.ReadToEnd();
streamReader.Close();
if (string.IsNullOrEmpty(str))
{
Console.WriteLine("—错误—");
Console.ReadKey();
}
// Regex regex = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<Group>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
Regex regex = new Regex(@"<img[^>]+>");
WebClient client = new WebClient();
MatchCollection match = regex.Matches(str);
foreach (Match match1 in match)
{
string src = match1.Value;
//网址
int aaa = src.IndexOf('=') + 4;
int bbb = src.LastIndexOf('=') - aaa - 5;
string ccc = src.Substring(aaa, bbb);
string ddd = "https://" + ccc;
//名字
int mmm = src.LastIndexOf('=')+2;
int iii = src.LastIndexOf('>')- mmm-1;
string ggg = src.Substring(mmm, iii);
temp++;
client.DownloadFile(ddd, path + temp + ".jpg");
name++;
Console.WriteLine("\n正在爬取———" + "|" + temp);
System.Threading.Thread.Sleep(200);
}
try
{
//foreach (Match match1 in match)
//{
// string src = match1.Groups[1].Value;
// if (src.Contains("http") && !src.Contains(".svg"))
// {
// temp++;
// client.DownloadFile(src, path + name + ".jpg");
// name++;
// Console.WriteLine("\n正在爬取———" + "|" + temp);
// }
//}
}
catch (Exception ex)
{
Console.WriteLine("-------" + ex);
}
}
#endregion
#region 创建一个文件夹
public static void CreatFile()
{
if (Directory.Exists(@"D:\Picture\"))
{
Console.WriteLine("\n—————开始—————");
}
else
{
DirectoryInfo directory = new DirectoryInfo(@"D:\Picture\");
directory.Create();
}
}
#endregion
}