安裝第三方庫
pip install tqdm
一床估、代碼
# -*-coding:utf-8-*-
"""
@File : 單線程下爬取筆趣小說.py
@Time : 2021.5.11
@Author : 老白
@Software: IntelliJ IDEA 2019.3.5 x64
@python : Python 3.7.3
"""
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
#定義獲取文本內(nèi)容
def get_content(target):
reqs = requests.get(url=target)
reqs.encoding = 'utf-8'
hals = reqs.text
bf = BeautifulSoup(hals, 'lxml')
texts = bf.find('div', id='content')
contents = texts.text.strip().split('\xa0' * 4)
return contents
#輸入書名獲取目標文本鏈接
keyword = input("輸入書名:")
target = 'https://www.xxbiquge.net/search.php?keyword=' + keyword
req = requests.get(url=target,timeout=(3,7))
req.encoding = 'utf-8'
html = req.text
soup = BeautifulSoup(html, 'lxml')
target = soup.a.attrs['href']
print(target)
server = 'https://www.xxbiquge.net/'
book_name = keyword + '.txt'
req = requests.get(url=target)
req.encoding = 'utf-8'
html = req.text
chapter_bs = BeautifulSoup(html, 'lxml')
chapters = chapter_bs.find('div', id='list')
chapters = chapters.find_all('a')
#循環(huán)寫入文本內(nèi)容
for chapter in tqdm(chapters):
chapter_name = chapter.string
url = server + chapter.get('href')
content = get_content(url)
with open(book_name, 'a', encoding='utf-8') as f:
f.write(chapter_name)
f.write('\n')
f.write('\n'.join(content))
f.write('\n')
二、示例效果
idea
有段時間沒更新了诱渤,我自己的博客服務(wù)器配置太拉了就停了丐巫。