首页 > 其他分享 >检查Chrome收藏夹链接是否有效

检查Chrome收藏夹链接是否有效

时间:2023-02-02 15:13:26浏览次数:62  
标签:http err nil Chrome strings 链接 return line 收藏夹

检查Chrome浏览器标签,通过Chrome导出的收藏夹文件,挨个使用http get请求检查url是否有效

package main

import (
	"bufio"
	"errors"
	"flag"
	"fmt"
	"io"
	"net"
	"net/http"
	"net/url"
	"os"
	"strings"
	"sync"
	"time"
)

func main() {
	path := flag.String("f", "", "bookmark path")
	proxy := flag.String("p", "", "proxy address")
	flag.Parse()

	err := checkChromeBookmark(*path, *proxy)
	if err != nil {
		fmt.Println(err)
	}
}

func checkChromeBookmark(path, proxy string) error {
	fw, err := os.Open(path)
	if err != nil {
		return err
	}
	//goland:noinspection GoUnhandledErrorResult
	defer fw.Close()

	const to = time.Second * 5 // 指定超时时间
	var (
		r    = bufio.NewScanner(fw)
		data []string
		dc   = &http.Client{Timeout: to}
		pc   *http.Client
		wg   sync.WaitGroup
	)
	// 解析代理url成功,才设置代理对象
	if up, err := url.Parse(proxy); err == nil {
		pc = &http.Client{
			Timeout: to,
			Transport: &http.Transport{
				Proxy: func(*http.Request) (*url.URL, error) {
					return up, nil
				},
			},
		}
	}

	for r.Scan() {
		line := r.Text()
		level := strings.IndexFunc(line, func(r rune) bool { return r != ' ' })
		if level > 0 && level%4 == 0 {
			level = level/4 - 1 // 每4个空格算一级,这里计算dir属于哪一级

			if si := strings.Index(line, `<DT><H3 ADD_DATE="`); si > 0 { // 目录
				si = strings.LastIndex(line, `">`) + 2
				if ei := len(line) - 5; level >= len(data) {
					data = append(data, line[si:ei]) // 目录多一级则追加
				} else {
					data[level] = line[si:ei] // 否则赋值同级已有目录
				}
			} else if si = strings.Index(line, `<DT><A HREF="`); si > 0 { // 链接
				ul := line[si+13 : strings.Index(line, ` ADD_DATE="`)-1]
				info := fmt.Sprintf("\n\"%s\", %s, %s:\n\t",
					strings.Join(data[:level], `" -> "`),
					line[strings.LastIndex(line, `">`)+2:len(line)-4],
					ul)

				wg.Add(1)
				go func() { // 启动协程同时执行检测逻辑
					defer wg.Done()

					err = checkUrl(dc, ul)
					if err != nil && pc != nil {
						// 只有当直连超时才尝试代理连接
						if e, ok := err.(net.Error); ok && e.Timeout() {
							err = checkUrl(pc, ul)
							if err == nil {
								err = errors.New("direct connection failed Proxy success")
							}
						}
					}
					if err != nil {
						fmt.Println(info, err)
					}
				}()
			}
		}
	}
	wg.Wait()
	return r.Err()
}

func checkUrl(c *http.Client, u string) error {
	req, err := http.NewRequest(http.MethodGet, u, nil)
	if err != nil {
		return err
	}
	req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
	req.Header.Set("accept", "*/*")

	resp, err := c.Do(req)
	if err != nil {
		return err
	}
	//goland:noinspection GoUnhandledErrorResult
	defer resp.Body.Close()

	n, err := io.Copy(io.Discard, resp.Body)
	if err != nil {
		return err
	}

	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("code %d", resp.StatusCode)
	}
	if n <= 0 {
		return fmt.Errorf("n <= 0")
	}
	return nil
}

标签:http,err,nil,Chrome,strings,链接,return,line,收藏夹
From: https://www.cnblogs.com/janbar/p/17086074.html

相关文章