检查Chrome浏览器标签,通过Chrome导出的收藏夹文件,挨个使用http get请求检查url是否有效
package main
import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
)
func main() {
path := flag.String("f", "", "bookmark path")
proxy := flag.String("p", "", "proxy address")
flag.Parse()
err := checkChromeBookmark(*path, *proxy)
if err != nil {
fmt.Println(err)
}
}
func checkChromeBookmark(path, proxy string) error {
fw, err := os.Open(path)
if err != nil {
return err
}
//goland:noinspection GoUnhandledErrorResult
defer fw.Close()
const to = time.Second * 5 // 指定超时时间
var (
r = bufio.NewScanner(fw)
data []string
dc = &http.Client{Timeout: to}
pc *http.Client
wg sync.WaitGroup
)
// 解析代理url成功,才设置代理对象
if up, err := url.Parse(proxy); err == nil {
pc = &http.Client{
Timeout: to,
Transport: &http.Transport{
Proxy: func(*http.Request) (*url.URL, error) {
return up, nil
},
},
}
}
for r.Scan() {
line := r.Text()
level := strings.IndexFunc(line, func(r rune) bool { return r != ' ' })
if level > 0 && level%4 == 0 {
level = level/4 - 1 // 每4个空格算一级,这里计算dir属于哪一级
if si := strings.Index(line, `<DT><H3 ADD_DATE="`); si > 0 { // 目录
si = strings.LastIndex(line, `">`) + 2
if ei := len(line) - 5; level >= len(data) {
data = append(data, line[si:ei]) // 目录多一级则追加
} else {
data[level] = line[si:ei] // 否则赋值同级已有目录
}
} else if si = strings.Index(line, `<DT><A HREF="`); si > 0 { // 链接
ul := line[si+13 : strings.Index(line, ` ADD_DATE="`)-1]
info := fmt.Sprintf("\n\"%s\", %s, %s:\n\t",
strings.Join(data[:level], `" -> "`),
line[strings.LastIndex(line, `">`)+2:len(line)-4],
ul)
wg.Add(1)
go func() { // 启动协程同时执行检测逻辑
defer wg.Done()
err = checkUrl(dc, ul)
if err != nil && pc != nil {
// 只有当直连超时才尝试代理连接
if e, ok := err.(net.Error); ok && e.Timeout() {
err = checkUrl(pc, ul)
if err == nil {
err = errors.New("direct connection failed Proxy success")
}
}
}
if err != nil {
fmt.Println(info, err)
}
}()
}
}
}
wg.Wait()
return r.Err()
}
func checkUrl(c *http.Client, u string) error {
req, err := http.NewRequest(http.MethodGet, u, nil)
if err != nil {
return err
}
req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
req.Header.Set("accept", "*/*")
resp, err := c.Do(req)
if err != nil {
return err
}
//goland:noinspection GoUnhandledErrorResult
defer resp.Body.Close()
n, err := io.Copy(io.Discard, resp.Body)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("code %d", resp.StatusCode)
}
if n <= 0 {
return fmt.Errorf("n <= 0")
}
return nil
}
标签:http,err,nil,Chrome,strings,链接,return,line,收藏夹
From: https://www.cnblogs.com/janbar/p/17086074.html