public static void GetListHtmlString(string content, string searchStr, List<string> list) { if (string.IsNullOrEmpty(content) || string.IsNullOrEmpty(searchStr)) return; int thisIndex = 0; while (true) { int startIndex = content.IndexOf(searchStr, thisIndex); if (startIndex == -1) break; startIndex += searchStr.Length; int endIndex = content.IndexOf("\"", startIndex); if (endIndex == -1) break; var listChar = content.Skip(startIndex).Take(endIndex - startIndex).ToArray(); string str = new string(listChar); if (str.Contains("data:image/png;base64")) continue; if (/*str.ToLower().StartsWith(Aide.Domain.ToLower()) && */!list.Contains(str)) { list.Add(str); } thisIndex = endIndex; } return; }
调用:
var html = new StreamReader("C:\\html.txt").ReadToEnd(); var list = new List<string>(); GetListHtmlString(db.CanceledRemark, "src=\"", list); GetListHtmlString(db.CanceledRemark, "href=\"", list); return SuccessMsg(list);
返回结果:
标签:string,list,抓取,content,startIndex,html,str,net From: https://www.cnblogs.com/cheua/p/17853411.html