首页 > 其他分享 >wechat crawler url拼接 url解析 微信爬虫 json序列化 反序列化

wechat crawler url拼接 url解析 微信爬虫 json序列化 反序列化

时间:2024-08-10 19:38:54浏览次数:9  
标签:JsonProperty set string get url 微信 int 序列化 public

WechatPublicRequest\Program.cs

using System.Collections.Specialized;
using System.Diagnostics;
using System.Web;
using Newtonsoft.Json;

class Program
{

    static async Task Main()
    {

        var latestTxtFilePath = GetLatestTxtFilePath();
        System.Console.WriteLine(latestTxtFilePath);
        string htmlContent = File.ReadAllText(latestTxtFilePath);
        (string biz, string uin, string pass_ticket, string key, string appmsg_token) = ExtractParameters(htmlContent);
        var configFilePath = "config.json";
        string currentOffset = File.ReadAllText(configFilePath);
        OffsetConfig offsetConfig = JsonConvert.DeserializeObject<OffsetConfig>(currentOffset) ?? throw new Exception("OffsetConfig is null");
        for (int i = offsetConfig.CurrnetOffset; i <= 10000; i += 10)
        {
            var offset = i;
            string newUrl = GenerateNewUrl(offset, 10, biz, uin, pass_ticket, key, appmsg_token);
            // System.Console.WriteLine(newUrl);
            string v = await RequestArticles(newUrl);
            WeChatMessageResponse? articlResponseDto = JsonConvert.DeserializeObject<WeChatMessageResponse>(v);
            File.WriteAllText($"response_{DateTime.Now.ToString("yyyyMMdd_HHmmss")}.json", JsonConvert.SerializeObject(articlResponseDto));

            if (articlResponseDto == null)
                break;

            if (articlResponseDto.Ret == -3)
            {
                System.Console.WriteLine("Res=-3");
                offsetConfig.CurrnetOffset = i;
                string v1 = JsonConvert.SerializeObject(offsetConfig);
                File.WriteAllText(configFilePath, v1);
                break;
            }

            if (articlResponseDto.CanMessageContinue == 0)
            {
                break;
            }
            GeneralMessageList generalMessageList =
                JsonConvert.DeserializeObject<GeneralMessageList>(articlResponseDto.GeneralMessageListJson)
                ?? throw new Exception("GeneralMessageList is null");
            // 现在您可以访问 generalMessageList.List 来获取消息列表
            var articleUrls = new List<string>();
            foreach (var article in generalMessageList.List)
            {
                long timestamp = article.CommMsgInfo.Datetime;
                DateTimeOffset dateTimeOffset = DateTimeOffset.FromUnixTimeSeconds(timestamp);
                DateTime publishTime = dateTimeOffset.LocalDateTime;
                // System.Console.WriteLine(publishTime);
                // System.Console.WriteLine(article.AppMsgExtInfo.ContentUrl);
                articleUrls.Add(article.AppMsgExtInfo.ContentUrl);
                if (article.AppMsgExtInfo.IsMulti == 1)
                {
                    // System.Console.WriteLine("多文章信息");
                    foreach (var a in article.AppMsgExtInfo.MultiAppMsgItemList)
                    {
                        // System.Console.WriteLine(a.ContentUrl);
                        articleUrls.Add(a.ContentUrl);
                    }
                }
            }

            break;
        }
    }

    private static string GetLatestTxtFilePath()
    {
        string newestFilePath;
        string path = @"C:\Users\user\AppData\Local\Temp";
        var directoryInfo = new DirectoryInfo(path);
        FileInfo? newestFile = directoryInfo.GetFiles("*tmp.txt").OrderByDescending(f => f.CreationTime).FirstOrDefault();
        if (newestFile != null)
        {
            newestFilePath = newestFile.FullName;
        }
        else
        {
            newestFilePath = string.Empty;
        }
        return newestFilePath;
    }


    private static (string biz, string uin, string pass_ticket, string key, string appmsg_token) ExtractParameters(string htmlContent)
    {
        string appmsg_token = htmlContent.Split(@"window.appmsg_token = """).Last().Split("\n").First().Replace(@""";", "").Trim();
        string key = htmlContent.Split(@"var key = """).Last().Split("\n").First().Replace(@""" || """";", "").Trim();
        string pass_ticket = htmlContent.Split(@"var pass_ticket = """).Last().Split("\n").First().Replace(@""" || """";", "").Trim();
        string uin = htmlContent.Split(@"var uin = """).Last().Split("\n").First().Replace(@""" || """";", "").Trim();
        string biz = htmlContent.Split(@"__biz: '").Last().Split("\n").First().Replace(@"',", "").Trim();
        return (biz, uin, pass_ticket, key, appmsg_token);
    }

    public static string GenerateNewUrl(int offset, int count, string biz, string uin, string pass_ticket, string key, string appmsg_token)
    {
        // 创建一个空的查询字符串实例
        NameValueCollection newQueryCollection = HttpUtility.ParseQueryString(string.Empty);
        // 添加参数
        newQueryCollection.Add("action", "getmsg");
        newQueryCollection.Add("__biz", biz);
        newQueryCollection.Add("f", "json");
        newQueryCollection.Add("offset", offset.ToString());
        newQueryCollection.Add("count", count.ToString());
        newQueryCollection.Add("is_ok", "1");
        newQueryCollection.Add("scene", "124");
        newQueryCollection.Add("uin", uin);
        newQueryCollection.Add("key", key);
        newQueryCollection.Add("pass_ticket", pass_ticket);
        newQueryCollection.Add("appmsg_token", appmsg_token);
        newQueryCollection.Add("wxtoken", string.Empty);
        newQueryCollection.Add("x5", "0");
        // 将查询字符串转换为字符串形式
        string? query = newQueryCollection.ToString();
        var newUrl = $"https://mp.weixin.qq.com/mp/profile_ext?{query}";
        return newUrl;
    }

    public static async Task<string> RequestArticles(string url)
    {
        var client = new HttpClient();

        var request = new HttpRequestMessage
        {
            Method = HttpMethod.Get,

            RequestUri = new Uri(url),

            Headers =
            {
                { "host", "mp.weixin.qq.com" },
                {
                    "user-agent",
                    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat"
                },
                { "x-requested-with", "XMLHttpRequest" },
                { "accept", "*/*" },
                {
                    "referer",
                    "https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzIyODU3MzUzNA==&scene=124&uin=NDA4Mzg1NTk1&key=daf9bdc5abc4e8d09279856c85ab26663b0e996252fd607b2035c6db5ff6ab2c3e1e84d19704a7b17655085e77f37bffcf54bbd45d057c86f15cff8940697f997eeb46f73ddfffb2cabff6fb3172ca365a809c8bdd9b9658e11ab0a2eee1f64e0b97c00725713646c85dd157805fd263219bbbed79013f1cd1740092726a24cb&devicetype=Windows+10+x64&version=62090538&lang=zh_CN&a8scene=7&acctmode=0&pass_ticket=YzPgVcUQZmmEtdUZZh+9UodacIwMMAAkaFIFgqe8hnLbKuO53V4Pr0Es/hHANfsJ&winzoom=1"
                },
                // { "accept-language", @"zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4" },
                {
                    "Cookie",
                    "wxuin=408385595; devicetype=android-33; version=2800315a; lang=zh_CN; pass_ticket=YzPgVcUQZmmEtdUZZh9UodacIwMMAAkaFIFgqe8hnLbKuO53V4Pr0Es/hHANfsJ; wap_sid2=CLvw3cIBEooBeV9IUDdmR1gyc1gzVFhWb3djVkRYWGdPejRGZDF0LXVrSHNoaHVONWthOWpBclZFU3MxMi13ZTAzbUNuUGRCNGFpekI3ZmEyUlR2Y3FWQzZKX0QxWEdDQk9VQUltYUJiUm9FU2dHZ1VCOWFrZW5PcmNFYnVUcWVVRjRjRTZpeTlxYmNOOFNBQUF+MMfe0rUGOA1AlU4="
                },
            },
        };

        using (var response = await client.SendAsync(request))
        {
            response.EnsureSuccessStatusCode();
            var body = await response.Content.ReadAsStringAsync();
            // Console.WriteLine(body);
            return body;
        }
    }
}



WechatPublicRequest\ArticleEntity.cs

using System;
using System.Collections.Generic;
using Newtonsoft.Json;

public class WeChatMessageResponse
{
    [JsonProperty("ret")]
    public int Ret { get; set; }

    [JsonProperty("errmsg")]
    public string ErrorMessage { get; set; }

    [JsonProperty("msg_count")]
    public int MessageCount { get; set; }

    [JsonProperty("can_msg_continue")]
    public int CanMessageContinue { get; set; }

    [JsonProperty("general_msg_list")]
    public string GeneralMessageListJson { get; set; }

    [JsonProperty("next_offset")]
    public int NextOffset { get; set; }

    [JsonProperty("video_count")]
    public int VideoCount { get; set; }

    [JsonProperty("use_video_tab")]
    public int UseVideoTab { get; set; }

    [JsonProperty("real_type")]
    public int RealType { get; set; }

    [JsonProperty("home_page_list")]
    public List<object> HomePageList { get; set; }
}

public class GeneralMessageList
{
    [JsonProperty("list")]
    public List<MessageItem> List { get; set; }
}

public class MessageItem
{
    [JsonProperty("comm_msg_info")]
    public CommMsgInfo CommMsgInfo { get; set; }

    [JsonProperty("app_msg_ext_info")]
    public AppMsgExtInfo AppMsgExtInfo { get; set; }
}

public class CommMsgInfo
{
    [JsonProperty("id")]
    public long Id { get; set; }

    [JsonProperty("type")]
    public int Type { get; set; }

    [JsonProperty("datetime")]
    public long Datetime { get; set; }

    [JsonProperty("fakeid")]
    public string FakeId { get; set; }

    [JsonProperty("status")]
    public int Status { get; set; }

    [JsonProperty("content")]
    public string Content { get; set; }
}

public class AppMsgExtInfo
{
    [JsonProperty("title")]
    public string Title { get; set; }

    [JsonProperty("digest")]
    public string Digest { get; set; }

    [JsonProperty("content")]
    public string Content { get; set; }

    [JsonProperty("fileid")]
    public int FileId { get; set; }

    [JsonProperty("content_url")]
    public string ContentUrl { get; set; }

    [JsonProperty("source_url")]
    public string SourceUrl { get; set; }

    [JsonProperty("cover")]
    public string Cover { get; set; }

    [JsonProperty("subtype")]
    public int Subtype { get; set; }

    [JsonProperty("is_multi")]
    public int IsMulti { get; set; }

    [JsonProperty("multi_app_msg_item_list")]
    public List<AppMsgExtInfoMulti> MultiAppMsgItemList { get; set; }

    [JsonProperty("author")]
    public string Author { get; set; }

    [JsonProperty("copyright_stat")]
    public int CopyrightStat { get; set; }

    [JsonProperty("duration")]
    public int Duration { get; set; }

    [JsonProperty("del_flag")]
    public int DelFlag { get; set; }

    [JsonProperty("item_show_type")]
    public int ItemShowType { get; set; }

    [JsonProperty("audio_fileid")]
    public int AudioFileId { get; set; }

    [JsonProperty("play_url")]
    public string PlayUrl { get; set; }

    [JsonProperty("malicious_title_reason_id")]
    public int MaliciousTitleReasonId { get; set; }

    [JsonProperty("malicious_content_type")]
    public int MaliciousContentType { get; set; }
}

public class AppMsgExtInfoMulti
{
    [JsonProperty("title")]
    public string Title { get; set; }

    [JsonProperty("digest")]
    public string Digest { get; set; }

    [JsonProperty("content")]
    public string Content { get; set; }

    [JsonProperty("fileid")]
    public int FileId { get; set; }

    [JsonProperty("content_url")]
    public string ContentUrl { get; set; }

    [JsonProperty("source_url")]
    public string SourceUrl { get; set; }

    [JsonProperty("cover")]
    public string Cover { get; set; }

    [JsonProperty("subtype")]
    public int Subtype { get; set; }


    [JsonProperty("author")]
    public string Author { get; set; }

    [JsonProperty("copyright_stat")]
    public int CopyrightStat { get; set; }

    [JsonProperty("duration")]
    public int Duration { get; set; }

    [JsonProperty("del_flag")]
    public int DelFlag { get; set; }

    [JsonProperty("item_show_type")]
    public int ItemShowType { get; set; }

    [JsonProperty("audio_fileid")]
    public int AudioFileId { get; set; }

    [JsonProperty("play_url")]
    public string PlayUrl { get; set; }

    [JsonProperty("malicious_title_reason_id")]
    public int MaliciousTitleReasonId { get; set; }

    [JsonProperty("malicious_content_type")]
    public int MaliciousContentType { get; set; }
}

public class OffsetConfig
{
    public int CurrnetOffset { get; set; }
    public string Biz { get; set; }
}

WechatPublicRequest\config.json

{"CurrnetOffset":0,"Biz":"MzI5NDg3NTQyMw=="}

标签:JsonProperty,set,string,get,url,微信,int,序列化,public
From: https://www.cnblogs.com/zhuoss/p/18352705

相关文章

  • 【笔记】微信抢红包-3千万的技术架构
    总体思路Redis服务器两台虚拟机,2C4G规格redis服务部署客户端pom文件<?xmlversion="1.0"encoding="UTF-8"?><projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"......
  • Java计算机毕业设计基于微信小程序的HPV疫苗预约与抢苗系统的设计与实现(开题+源码+论
    本系统(程序+源码)带文档lw万字以上 文末可获取一份本项目的java源码和数据库参考。系统程序文件列表开题报告内容研究背景随着健康意识的提升,人们对疾病预防的重视程度日益增强,尤其是针对女性健康的HPV(人乳头瘤病毒)疫苗,其作为预防宫颈癌等恶性肿瘤的有效手段,需求量急剧增......
  • Java计算机毕业设计基于微信小程序的网络文学管理平台(开题+源码+论文)
    本系统(程序+源码)带文档lw万字以上 文末可获取一份本项目的java源码和数据库参考。系统程序文件列表开题报告内容研究背景随着移动互联网的迅猛发展,网络文学已成为大众文化消费的重要组成部分,其便捷性、互动性和丰富性深受读者喜爱。然而,传统网络文学平台多依赖于网页或AP......
  • 如何在Java项目中使用自定义序列化器处理URL
    如何在Java项目中使用自定义序列化器处理URL在Java开发中,处理和序列化URL是一个常见的需求,尤其是在涉及到图像资源时。如果项目需要根据特定条件处理图像URL(如添加前缀),可以自定义一个序列化器来简化这一过程。本文将介绍如何创建一个自定义的ImgJsonSerializer类,处理单个URL和UR......
  • php程序对微信你昵称的表情处理导出excel文件
     php程序对微信昵称的表情处理,若转义存数据库,怎么读出并导出成excel文件.<?phpinclude(dirname(__FILE__).'/phpexcel-1.7.7/Classes/PHPExcel.php');include(dirname(__FILE__).'/phpexcel-1.7.7/Classes/PHPExcel/IOFactory.php');$name2="[[EMOJI:%F0%9F%......
  • http协议中url中的解码和解码
    问题引出当我们进入百度页面:此时的域名是:www.baidu.com当我们搜索aaa+@///+bbb时此时的域名是:https://www.baidu.com/s?wd=aaa%2B%40%2F%2F%2F%2Bbbb&rsv_spt=1&rsv_iqid=0xc051be38000e103b&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_p......
  • 使用微信小程序开发制作一个简易的在线投票应用
    一、项目概述在线投票应用是一个基于微信小程序开发的应用,用于用户创建投票活动并进行投票。用户可以创建投票活动,设置投票选项和投票方式,然后将活动分享给其他用户进行投票。二、开发环境准备微信开发者工具:用于项目的开发和调试。小程序注册账号:用于获取小程序的AppID。云......
  • Java计算机毕业设计共享茶室预约微信小程序(开题报告+源码+论文)
    本系统(程序+源码)带文档lw万字以上 文末可获取一份本项目的java源码和数据库参考。系统程序文件列表开题报告内容研究背景在快节奏的现代生活中,人们对于休闲放松的需求日益增长,茶文化作为中国传统文化的重要组成部分,正逐渐成为一种流行的生活方式。然而,传统茶室在预约、管......
  • 毕业设计:基于Springboot的口腔医院微信小程序【代码+论文+PPT】
    全文内容包括:1、采用技术;2、系统功能;3、系统截图;4、配套内容。索取方式见文末微信号,欢迎关注收藏!一、采用技术语言:Java1.8框架:SpringBoot数据库:MySQL5.7、8.0开发工具:IntelliJIDEA旗舰版、微信开发工具其他:Maven3.8以上二、系统功能系统首页:展示口腔医院微信小程序的主......
  • Java基于微信小程序的图书销售购物商城系统 毕业设计
    文末获取资源,收藏关注不迷路文章目录项目介绍设计任务技术介绍项目界面关键代码目录项目介绍微信作为国内最大的社交平台之一,拥有数亿的用户群体,这为基于微信小程序的图书销售购物商城系统提供了巨大的用户潜力和市场。随着智能手机的普及和移动互联网的快速发展,越......