首页 > 编程语言 >python

python

时间:2022-11-13 22:55:43浏览次数:43  
标签:style python house hezu basic response desc

import scrapy
from scrapy.spiders.crawl import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from ..items import RentItem


class RentspiderSpider(CrawlSpider):
    name = 'rentspider'
    allowed_domains = ['hz.58.com']
    start_urls = ['https://hz.58.com/caihongcheng/chuzu/pn1']

    rules = (
        Rule(LinkExtractor(allow='https://hz.58.com/caihongcheng/chuzu/pn\d+.*'), follow=True),
        # Rule(LinkExtractor(allow= r'/hezu/*.shtml?houseId=\d+.*',restrict_xpaths = ['//ul[@class="house-list"]//a']),follow=False,callback='pars_hezu'),
        # Rule(LinkExtractor(allow= r'/hezu/\d+x.shtml?houseId=\d+.*'),follow=False,callback='parse_hezu'),
        # Rule(LinkExtractor(allow=r'/hezu/50814053918142x.shtml?houseId=2582157217996813.*'), follow=False,callback='parse_hezu'),
        # Rule(
        #     LinkExtractor(allow=r'/zufang/\d+x.shtml\?houseId=\d+.*', restrict_xpaths=['//ul[@class="house-list"]//a']),
        #     follow=False, callback='parse_zufang'),
        # Rule(LinkExtractor(allow=r'/hezu/\d+x.shtml\?houseId=\d+.*', restrict_xpaths=['//ul[@class="house-list"]//a']),
        #      follow=False, callback='parse_zufang', ),
        Rule(LinkExtractor(allow=r'/zufang/\d+x.shtml\?houseId=\d+.*'), follow=False, callback='parse_zufang'),
        Rule(LinkExtractor(allow=r'/hezu/\d+x.shtml\?houseId=\d+.*'), follow=False, callback='parse_zufang', ),
        # : // hz.58.com / hezu / 50659210723596x.shtml?houseId=2562337288563716 & shangquan=caihongcheng & shangquanId=13477 & dataSource=2 & tid=7b8da1d2-4d4c-401c-a6a6-6bbde41696e5 & legourl= // legoclick.58.com / jump?target=szqCXB3draOWUvYfpARlIitdnjmdrHcQnj0zn1NOPM980v6YUykKuaYYPyR6nH9vnzY3uh7bsHwhmymVrHTYnzd6uhm1uAR-PHuhnW0KP1bYnHnYrjm3PWn3P19KTHcdPWc1n10zrj9dPWnLnHmKnW9QnjTKnW9QnjTKnikQn9DLrik3PikQn1ELPkDQPWNvrHbOnjD1njnzTHDKwbnVPHWM_oeG9cbVNDnVOuacOlXxlppylpAdTHDKnE76UAQxuARhmgR_I7qWIZFtniQhuy7YIgF-gvF60vRxmyQ_gvw-uh7dUZwtnEDVnEDKnHNYPjD1rjbvnWckn1bknHbdnkDvTEDQTywBnjbkPj--syEkujTVPjcdPzd6PywWsHDLPW6hmvu6P19LuEDQPHEYnHn3rHmzn1DzrHEQPHm3THDdPjEQn19OPWczrjndrjDYPjcKTHD_nHcKTHT8nHEYrjbkrjcKTiYKTEDKubP7UdRVid7EPvRDHR6zRYVZnARJNHYqTHDQPi3QrHN8nHnvsWn1THTKnTDKnikQn97exEDQnjT1nkDQnjTznWTdTHIBrAw6nyEzsHwbPAnVPjTQmzd6PhDvsHuBmhw-PjDvrHu-PEDKnTDKTHTKP1b_rjN_nHnYP10KnE78IyQ_THE3m1Tznj76uWEYrAEQrHD
        # : // hz.58.com / zufang / 50541694381583x.shtml?houseId=2547295196438538 & shangquan=caihongcheng & shangquanId=13477 & dataSource=0 & iuType=j_1 & PGTID=0d3090a7-034a-51cf-9afe-2277515ef0f7 & ClickID=2
        # : // hz.58.com / zufang / 49422523381159x.shtml?houseId=2404041308351491 & shangquan=caihongcheng & shangquanId=13477 & dataSource=0 & iuType=j_1 & PGTID=0d3090a7-034a-53b8-d4a6-8c2f7d365374 & ClickID=15
    )

    def parse_hezu(self, response):
        print('*' * 200)
        print(response.text)
        return

    def parse_zufang(self, response):
        print('*' * 200)
        title = response.css('.house-title h1::text').get()
        price = response.css('.house-basic-info .house-basic-right .strongbox::text').get().strip()
        rent_style = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li span:nth-child(2)::text').get()
        house_style_list = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(2) span:nth-child(2)::text').get()
        # print(type(house_style_list))
        house_style = house_style_list.split()[0]
        house_deco = house_style_list.split()[3]
        house_area = house_style_list.split()[1] + house_style_list.split()[2]
        house_location = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(4) span:nth-child(2) a::text').get().strip()
        address = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(6) span:nth-child(2)::text').get().strip()
        desc = response.css(
            '.house-detail-desc .main-detail-info .house-word-introduce .introduce-item li .a2::text').getall()
        description = ''.join(desc).strip()
        url = response.xpath('/html/head/link[1]/@href').get()
        # print('0' * 100)
        # print(url)
        # print('0' * 100)
        item = RentItem(title=title, price=price, rent_style=rent_style, house_style=house_style, house_deco=house_deco,
                        house_area=house_area, house_location=house_location, address=address, description=description,url = url)
        yield item

 

import scrapy
from scrapy.spiders.crawl  import CrawlSpider, Rule
from scrapy.linkextractors  import LinkExtractor
from ..items  import RentItem


class RentspiderSpider(CrawlSpider):
    name  = 'rentspider'
    allowed_domains  = [ 'hz.58.com' ]
    start_urls  = [ 'https://hz.58.com/caihongcheng/chuzu/pn1' ]

    rules  = (
        Rule(LinkExtractor(allow = 'https://hz.58.com/caihongcheng/chuzu/pn\d+.*' ), follow = True ),
        # Rule(LinkExtractor(allow= r'/hezu/*.shtml?houseId=\d+.*',restrict_xpaths = ['//ul[@class="house-list"]//a']),follow=False,callback='pars_hezu'),
        # Rule(LinkExtractor(allow= r'/hezu/\d+x.shtml?houseId=\d+.*'),follow=False,callback='parse_hezu'),
        # Rule(LinkExtractor(allow=r'/hezu/50814053918142x.shtml?houseId=2582157217996813.*'), follow=False,callback='parse_hezu'),
        # Rule(
        #     LinkExtractor(allow=r'/zufang/\d+x.shtml\?houseId=\d+.*', restrict_xpaths=['//ul[@class="house-list"]//a']),
        #     follow=False, callback='parse_zufang'),
        # Rule(LinkExtractor(allow=r'/hezu/\d+x.shtml\?houseId=\d+.*', restrict_xpaths=['//ul[@class="house-list"]//a']),
        #      follow=False, callback='parse_zufang', ),
        Rule(LinkExtractor(allow = r '/zufang/\d+x.shtml\?houseId=\d+.*' ), follow = False , callback = 'parse_zufang' ),
        Rule(LinkExtractor(allow = r '/hezu/\d+x.shtml\?houseId=\d+.*' ), follow = False , callback = 'parse_zufang' , ),
        # : // hz.58.com / hezu / 50659210723596x.shtml?houseId=2562337288563716 & shangquan=caihongcheng & shangquanId=13477 & dataSource=2 & tid=7b8da1d2-4d4c-401c-a6a6-6bbde41696e5 & legourl= // legoclick.58.com / jump?target=szqCXB3draOWUvYfpARlIitdnjmdrHcQnj0zn1NOPM980v6YUykKuaYYPyR6nH9vnzY3uh7bsHwhmymVrHTYnzd6uhm1uAR-PHuhnW0KP1bYnHnYrjm3PWn3P19KTHcdPWc1n10zrj9dPWnLnHmKnW9QnjTKnW9QnjTKnikQn9DLrik3PikQn1ELPkDQPWNvrHbOnjD1njnzTHDKwbnVPHWM_oeG9cbVNDnVOuacOlXxlppylpAdTHDKnE76UAQxuARhmgR_I7qWIZFtniQhuy7YIgF-gvF60vRxmyQ_gvw-uh7dUZwtnEDVnEDKnHNYPjD1rjbvnWckn1bknHbdnkDvTEDQTywBnjbkPj--syEkujTVPjcdPzd6PywWsHDLPW6hmvu6P19LuEDQPHEYnHn3rHmzn1DzrHEQPHm3THDdPjEQn19OPWczrjndrjDYPjcKTHD_nHcKTHT8nHEYrjbkrjcKTiYKTEDKubP7UdRVid7EPvRDHR6zRYVZnARJNHYqTHDQPi3QrHN8nHnvsWn1THTKnTDKnikQn97exEDQnjT1nkDQnjTznWTdTHIBrAw6nyEzsHwbPAnVPjTQmzd6PhDvsHuBmhw-PjDvrHu-PEDKnTDKTHTKP1b_rjN_nHnYP10KnE78IyQ_THE3m1Tznj76uWEYrAEQrHD
        # : // hz.58.com / zufang / 50541694381583x.shtml?houseId=2547295196438538 & shangquan=caihongcheng & shangquanId=13477 & dataSource=0 & iuType=j_1 & PGTID=0d3090a7-034a-51cf-9afe-2277515ef0f7 & ClickID=2
        # : // hz.58.com / zufang / 49422523381159x.shtml?houseId=2404041308351491 & shangquan=caihongcheng & shangquanId=13477 & dataSource=0 & iuType=j_1 & PGTID=0d3090a7-034a-53b8-d4a6-8c2f7d365374 & ClickID=15
    )

    def parse_hezu( self , response):
        print ( '*' * 200 )
        print (response.text)
        return

    def parse_zufang( self , response):
        print ( '*' * 200 )
        title  = response.css( '.house-title h1::text' ).get()
        price  = response.css( '.house-basic-info .house-basic-right .strongbox::text' ).get().strip()
        rent_style  = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li span:nth-child(2)::text' ).get()
        house_style_list  = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(2) span:nth-child(2)::text' ).get()
        # print(type(house_style_list))
        house_style  = house_style_list.split()[ 0 ]
        house_deco  = house_style_list.split()[ 3 ]
        house_area  = house_style_list.split()[ 1 ]  + house_style_list.split()[ 2 ]
        house_location  = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(4) span:nth-child(2) a::text' ).get().strip()
        address  = response.css(
            '.house-basic-info .house-basic-right .house-basic-desc .house-desc-item li:nth-child(6) span:nth-child(2)::text' ).get().strip()
        desc  = response.css(
            '.house-detail-desc .main-detail-info .house-word-introduce .introduce-item li .a2::text' ).getall()
        description  = ''.join(desc).strip()
        url  = response.xpath( '/html/head/link[1]/@href' ).get()
        # print('0' * 100)
        # print(url)
        # print('0' * 100)
        item  = RentItem(title = title, price = price, rent_style = rent_style, house_style = house_style, house_deco = house_deco,
                        house_area = house_area, house_location = house_location, address = address, description = description,url  = url)
        yield item

  

标签:style,python,house,hezu,basic,response,desc
From: https://www.cnblogs.com/Jeffreywuyu/p/16887588.html

相关文章

  • Eclipse Python IDE安装
     一、下载JDK首先下载JDK,进入JDK官网下载最新版本的JDK并安装:https://www.oracle.com/in/java/technologies/downloads/下载最新版本与时俱进(非常建议)。选择合适的文......
  • python赋值运算符
    =  等于运算符+= 加法运算符-= 减法运算符*= 乘法运算法/= 除法运算法%=取模运算法如果可以被整除,返回0如果不能被整除,返回余数**=幂运算法f=10......
  • Python: 函数传参、默认参数
    python函数是值传递还是引用传递答案:都不是,应该是传对象或对象的引用函数参数在传递的过程中将整个对象传入,对可变对象的修改在函数外部以及内部都可以见,调用者和......
  • python字典类型
    什么是字典字典是由多个键(key)及其对应的值(value)所组成的一种数据类型a=dict()a={}person={'name':'qjb','age':33}字典支持的数据类型key支持字符串......
  • 极客编程python入门-调用函数
    调用函数Python内置了很多有用的函数,我们可以直接调用。要调用一个函数,需要知道函数的名称和参数a=abs(100.123)print(a)a=abs(-123)print(a)a=abs(-123.123)print(a......
  • Python_字符串常用操作
    string.count(str)统计str在string中出现的次数s="一花一木一世界"print(s.count("一"))  string.replace(old_str,new_str)将string中的old_str替换为ne......
  • 【Python】第4章-8 求分数序列前N项和
    本题要求编写程序,计算序列2/1+3/2+5/3+8/5+...的前N项之和。注意该序列从第2项起,每一项的分子是前一项分子与分母的和,分母是前一项的分子。输入格式:输入在一行中给出......
  • python pip 安装使用国内镜像源
    pythonpip安装使用国内镜像源一般我们Pthon安装库是通过:pipinstall库不过上面的库安装速度太慢,有时候回不成功,因为他用的是国外的镜像库,所以我们不然用国内的镜像库,安......
  • Python蟒蛇绘制
    Python之turtlePython蟒蛇绘制:importturtleturtle.setup(650,350,200,200)turtle.penup()turtle.fd(-250)turtle.pendown()turtle.pensize(25)turtle.pencolor("purple")tu......
  • 【Python】第4章-7 统计学生平均成绩与及格人数
    本题要求编写程序,计算学生们的平均成绩,并统计及格(成绩不低于60分)的人数。题目保证输入与输出均在整型范围内。输入格式:输入在第一行中给出非负整数N,即学生人数。第二行......