前置:
1.安装好了nodejs
2.npm i cheerio
3.jquery选择器基础知识
var https = require('https') var cheerio = require('cheerio')
// 简书首页 var url = "https://www.jianshu.com/" function filterSection(html) { //加载爬到的html结构 var $ = cheerio.load(html) var noteList = $('.note-list') var sectionData = [] noteList.each(function(item) { var content = $(this).find('.content')
// note 作者 var author = content.find('.author').find('.info').text()
// note title var title = content.find('.title').text()
// note 内容摘要 var abstract = content.find('.abstract').text() var noteListData = { author: author, title: title, abstract: abstract } sectionData.push(noteListData) }) return sectionData } function print(sectionData) { sectionData.forEach((item) => { var author = item.author, title = item.title, abstract = item.abstract; console.log(author + '\n' + title + '\n' + abstract + '\n') }) } https.get(url, (res) => { var html = '' res.on('data', (data) => { html += data }) res.on('end', () => { var sectionData = filterSection(html)
print(sectionData) }).on('error', () => { console.log('错误') }) })