当前位置:首页 >艺术新闻 >

python爬取笔趣阁小说「python学习分享:笔趣阁小说全本下载工具爬虫源码」

2023-12-18 08:35:27

#导入模块import requestsimport parselfrom lxml import etreeimport reimport pandas as pdimport datetimeimport timefrom tqdm import tqdmimport prettytable as pt#请求头headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}curr_time = datetime.datetime.now()times = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')print(f' 现在是:{times} by 琴棋书画')print()print('***********欢迎使用笔趣阁小说全本下载工具*************')print()while True:#请求数据keyword=input('请输入你想要搜索的小说名字(0退出):')if keyword=='0':breakseacher_url=f'https://www.biquge9.com/s?q={keyword}'respones=requests.get(seacher_url,headers=headers)#print(respones.text)#解析获取数据selector=etree.HTML(respones.text)xs_urls=selector.xpath('//h4/a[contains(@href,"/book/")]/@href')#获取小说地址列表xs_names=selector.xpath('//a[contains(@href,"/book/")]/text()') #获取小说名字列表xs_authers=selector.xpath('//div[contains(@class,"author")]/text()')#获取小说作者列表# selector=parsel.Selector(respones.text)# xs_urls=selector.css( '.bookinfo .bookname a::attr(href)').getall()# xs_names=selector.css( 'bookinfo .bookname a::text').getall()# xs_authers=selector.css('.bookinfo div::text').getall()# print(xs_authers,xs_names,xs_urls)tb=pt.PrettyTable()tb.field_names=['序号','书名','作者','小说ID']num=0if xs_names:#print(xs_urls,xs_names,xs_authers)xs_list=[]#遍历弄表for xs_url,xs_name,xs_auther in zip(xs_urls,xs_names,xs_authers):xs_id=xs_url.split('/')[2]#小说IDxs_url='https://www.biquge9.com' xs_url #小说网址xs_name=xs_name.strip() #小说书名xs_auther=xs_auther.split(':')[1] #小说作者#print(xs_id,xs_url,xs_name,xs_auther)dict={'书名':xs_name,'作者':xs_auther,'ID':xs_id}#生成字典xs_list.append(dict)#小说信息列表tb.add_row([num, xs_name, xs_auther, xs_id])num = num 1print(tb)#print()print(f'一共搜索到{len(xs_list)}条数据')print()# sea_data=pd.DataFrame(xs_list)# print(sea_data)#输出小说信息列表print()key_num=input('请输入你要下载的小说序号:')print()print('小说正在下载,已完成......')xs_ID=xs_list[int(key_num)]['ID']url='https://www.biquge9.com/book/' xs_IDurls=[url '/{}.html'.format(str(i))for i in range(1,5000)]#生成章节地址列表#print(urls)for url1 in tqdm(urls): #遍历章节地址列表data_1=requests.get(url1,headers=headers) #请求章节文本selector=parsel.Selector(data_1.text)#print(selector)title=selector.css('.content h1::text').get() #获取章节标题#print(title)content=selector.css('#chaptercontent::text').getall()#获取章节正文#print(content)content_1='n'.join(content)#print(content_1)with open(xs_name '.txt','a ',encoding='utf-8') as file:#保存小说内容到TXT文件if title!='':file.write(title)file.write('n')file.write(content_1)file.write('n')#print('正在保存',title)else:file.write(content_1)file.write('n')else:print('请正确输入小说名字或作者名字,没有查到这本书的数据.....')

郑重声明:本文版权归原作者所有,转载文章仅为传播更多信息之目的,如有侵权行为,请第一时间联系我们修改或删除,多谢。

推荐阅读

热点排行