python">import requests
import json
from bs4 import BeautifulSoup
'''
分析爬取的数据
数据源地址 https://www.lmonkey.com/t
数据内容
文章标题 文章链接 作者 发布时间
工具 python
'''
url = 'https://www.lmonkey.com/t'
headers = {
'User-Agent' : 'Mozilla / 5.0(Windows NT 10.0; WOW64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 72.0.3626.81 Safari / 537.36 SE 2.X MetaSr 1.0'
}
res = requests. get( url= url, headers = headers)
if res. status_code == 200 :
print ( "请求发送成功" )
soup = BeautifulSoup( res. text, 'lxml' )
divs = soup. find_all( 'div' , class_= "list-group-item list-group-item-action p-06" )
varlist = [ ]
for i in divs:
r = i. find( 'div' , class_= "topic_title" )
if r:
print ( i. span[ 'title' ] )
vardict = { 'title' : r. text. split( '\n' ) [ 0 ] ,
'url' : i. a[ 'href' ] ,
'author' : i. strong. a. text,
'time' : i. span[ 'title' ]
}
varlist. append( vardict)
print ( varlist)
with open ( './yuanquan.json' , 'w' , encoding= 'utf-8' ) as fp:
json. dump( varlist, fp, ensure_ascii= False , sort_keys= True , indent= 4 )
with open ( './yuanquan.json' , 'r' , encoding= 'utf-8' ) as f:
print ( f. read( ) )