爬取小说斗破苍穹

news/2024/7/19 8:43:54 标签: python, 爬虫

从网上寻找小说斗破苍穹,爬取并保存。

python">import requests
import time
import re
from lxml import etree
from bs4 import BeautifulSoup
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'
}

path = 'D:\\doupo.txt' #保存的位置
f = open(path,'a+')

def get_info(url):
    res = requests.get(url,headers = headers)
    if res.status_code == 200: #爬取成功
        contents = re.findall('<p>(.*?)</p>',res.content.decode('utf-8'),re.S) #在爬取的网页内寻找内容
        for content in contents:
            f.write(content+'\n') #将找到的内容写入你要保存的文件内
    else:
        pass

if __name__ == '__main__':
    urls = ['http://www.doupoxs.com/doupocangqiong/{}.html'.format(str(i)) for i in range(2,1665)] #每一页的网址
    f.write('\n\n' + "第1章"+ '\n\n')
    get_info("http://www.doupoxs.com/doupocangqiong/1.html")
    i = 2
    for url in urls:
        f.write('\n\n'+"第%d章" % (i) + '\n\n')
        get_info(url)
        time.sleep(1) #避免过快访问,间隔1s
        print("第%d章下载"%(i))
        i += 1

f.close()

http://www.niftyadmin.cn/n/1269258.html

相关文章

python 最大公约数

三种方法求最大公约数 import time #连续整数检测 def calculation(a,b):x a % bwhile (x ! 0):a bb xx a % breturn b #分解质因数 def primefactor(a,b):i 2j 1while((i<a) and (i<b)):if(((a % i) 0) and ((b % i) 0)):a a / ib b / ij j * ielse:i i 1r…

数值分析 插值法

import numpy as np import matplotlib.pyplot as plt # 得到差商表def get_meandiff_tabel(X, Y):n len(X)A np.zeros([n, n])for i in range(0, n):A[i][0] Y[i]for j in range(1,n):for i in range(j,n):A[i][j] (A[i][j-1] - A[i-1][j-1])/(X[i]-X[i-j])return AX [0.…

python 从一个文件调用另一个文件的方法

view.py #-*- coding:utf-8 -*-import time class View(object):admin 1password 1def printAdminView(self):print("******************************************")print("* *")print("* 欢迎登录…

控制发送邮件 python

#-*- coding:utf-8 -*- import smtplib # # # # smtpobj smtplib.SMTP("smtp.qq.com",587) # print(type(smtpobj)) # #如果是250 表示连接成功 # print(smtpobj.ehlo()) # # print(smtpobj.login(1090036582qq.com,nykbzerukhkajbfa)) # #断开连接 # print(smtpobj…

python基础-类

#举例说明如何定义类和实例化所定义的类#定义类 class People(object):name "默认"age "默认"def __init__(self,name,age):self.name nameself.age agedef pprint(self):print("名字是&#xff1a;%s"%(self.name))print("年龄是&…

python 字符串匹配

#-*- coding:utf-8 -*- import datetime#BF算法 def bf(ss,s):lens len(ss)llen len(s)for i in range(0,lens):for j in range(0,llen):if ss[i j]! s[j]:j -1breakif j llen-1:return Trueif (lens - i) < llen:return False#KMP算法 def same_start_end(s):"&qu…

python 列表 字典 元组

#-*- coding:utf-8 -*-#什么是编译&#xff0c;什么叫解释&#xff1f; #答;编译器是把源程序的每一条语句都编译成机器语言&#xff0c;并保存成二进制文件; # 解释器则是只在执行程序时&#xff0c;才一条一条的解释成机器语言给计算机来执行#除了Python语言&#xff0c;你…

python 蛮力法之最近对

#-*- coding:utf-8 -*-from math import sqrt import time # 蛮力法 def solve(points):n len(points)min_d float("inf") # 最小距离&#xff1a;无穷大min_ps None # 最近点对for i in range(n-1):for j in range(i1, n):d sqrt((points[i][0] - points[j][…