新零云博客 - 专注活动,软件,教程分享!

新零云博客
XingLy.Cn
ad

[8-5] Python快速爬取当前城市所有租房网站房源及配置

引入

Python爬取当前城市房源信息,以徐州为例

代码效果图请看下方,其他部分请查看附件,一起学习,谢谢

如有问题可及时在讨论区回复,感谢大家支持!

# -*- coding: utf-8 -*-
"""
@Time : 2020/3/18 22:23
@Auth : Suk
@File : 5.小猪短租相关信息.py
@IDE  : PyCharm
@Motto: Knowing your ignorance is the best way to succeed.
@Tips : 版权所有,转载,转发请注明,如有侵权请联系,谢谢.
"""
# 小猪短租相关信息,包含出租房屋名称、地址、价格、房东、详细链接等信息
# 爬取搜索页面信息,爬取5页相关内容,通过获得的详细链接页面,爬取详细页面内容
 
import bs4
import requests
from bs4 import BeautifulSoup
 
kv = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45'
}
# format='{0:<10}\t{1:{6}<40}\t{2:{6}<10}\t{3:{6}^10}\t{4:{6}<10}\t{5:{6}<10}'
format = '{0:<10}\t{1:{5}<40}\t{2:{5}<10}\t{3:{5}^30}\t{4:{5}<10}'
 
 
def main():
    global a
    print('为您找到了{}条信息,您可以根据需要检索相关信息'.format(len(allInitMessage)))
    print('-------------------------------------------------------------------------')
    print(
        '序号             房屋名称                                                            价格                  地址                                               房东         ')
    for i in allInitMessage:
        # print(format.format(i[0],i[1],i[2],i[3],i[4],i[5],chr(12288)))
        print(format.format(i[0], i[1], i[2], i[3], i[4], chr(12288)))
    a = eval(input('请输入你想了解的房屋序号:'))
    return a
 
 
def houseDetails(num):
    deurl = allInitMessage[num - 1][-1]
    detailRe = requests.get(deurl, headers=kv)
    sp = BeautifulSoup(detailRe.text, 'lxml')
    print('标题:' + sp.title.string.strip())
    print("价格:" + sp.find('div', class_='fl').text.strip())
    print("地址:" + sp.find('span', class_='pr5').text.strip())
    print("房东:" + sp.find('a', class_='lorder_name').text.strip())
    print('详细信息:')
    for i in sp.find('ul', class_='house_info clearfix').text.split():
        if ':' in i:
            print("\t" + i)
        elif "宜住" in i:
            print("\t" + i)
    print("个性描述:")
    for i in sp.find('div', class_='box_white clearfix detail_intro_item').text.split():
        if "个性描述" in i or "查看全部" in i or "收起" in i:
            continue
        else:
            print("\t" + i)
 
    print("内部情况:")
    for i in sp.find('div', class_='box_gray clearfix detail_intro_item').text.split():
        if "内部情况" in i or "查看全部" in i or "收起" in i:
            continue
        else:
            print("\t" + i)
 
    print("交通情况:")
    for i in sp.findAll('div', class_='info_r')[2].text.split():
        if "交通信息" in i or "交通情况" in i or "查看全部" in i or "收起" in i:
            continue
        else:
            print("\t" + i)
    print("周边情况:")
    for i in sp.findAll('div', class_='info_r')[3].text.split():
        if "交通信息" in i or "查看全部" in i or "收起" in i:
            continue
        else:
            print("\t" + i)
 
    print("配套条件:")
    print('\t', end="")
    for i in sp.findAll('div', class_='info_r')[4].children:
        if type(i) == bs4.element.Tag:
            if i.ul != None:
                for j in i.ul.children:
                    if type(j) == bs4.element.Tag:
                        if 'no' in j.get('class')[0]:
                            continue
                        else:
                            print(j.text.strip() + ',', end="")
 
    print('\n' + "入住须知:")
    for i in sp.findAll('div', class_='info_r')[5].text.split():
        print('\t' + i)
    print("押金及其他费用")
    for i in sp.find('div', class_='clause_box').text.split():
        if "押金及其他费用" in i or "查看全部" in i or "收起" in i or ">" in i:
            continue
        elif i[-1] == ":":
            print("\t" + i, end="")
        else:
            print("\t" + i)
    try:
        select = input('是否返回主页面(y/n)?')
        if select in 'yY':
            main()
            return True
        if select in 'nN':
            print("退出成功!")
            return False
 
    except:
        print("ERROR!输入错误!")
 
 
url = 'http://xuzhou.xiaozhu.com/search-duanzufang-p'
allhref = []
allInitMessage = []
for page in range(1, eval(input('当前城市:徐州\t请输入你想检索的页数,共13页:')) + 1):
    lis = []
    hrefs = []
    print('\t正在检索第' + str(page) + '页')
    re = requests.get(url + str(page) + '-0/')
    soup = BeautifulSoup(re.text, 'lxml')
    for i in soup.find('ul', class_='pic_list clearfix list_code').children:
        if type(i) == bs4.element.Tag:
            lis.append(i.find("a"))
    for i in lis:
        if i != None:
            hrefs.append(i.get('href'))
    for i in hrefs:
        message = []
        innitMessageget = requests.get(i)
        innitMessage = BeautifulSoup(innitMessageget.text, 'lxml')
        message.append(str(len(allInitMessage) + 1) + '.')
        message.append(innitMessage.title.string)
        message.append(innitMessage.find('div', class_='fl').text.strip())
        message.append(innitMessage.find('span', class_='pr5').text.strip())
        message.append(innitMessage.find('a', class_='lorder_name').text.strip())
        message.append(i)
        allInitMessage.append(message)
        allhref.append(hrefs)
    print("\n\n\tMission Success!\n\n\n")
main()
while (True):
    bolean = houseDetails(a)
    if bolean == False:
        exit(0)
    else:
        continue


效果展示

效果

素质三连!收藏+评分+有用

本原创文章未经允许不得转载 | 当前页面:新零云博客 - 专注活动,软件,教程分享! » [8-5] Python快速爬取当前城市所有租房网站房源及配置

评论