准备
(1)可以在pycharm中安装requests库(pycharm中输入pip install requests),因为我我这里是已经安装了。

(2)也可以在pycharm中,点击file — settings — project:python — python interpreter


代码
http://pic.netbian.com/index.html所爬取的是这个壁纸网站的图片
import requests
import re
import os
#导入相关包
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3706.400 SLBrowser/10.0.4040.400'}
#配置相关的请求头按需求添加
i=1
j=10
#设置页码
for x in range(1,(j+1)):
#for循环实现翻页获取
if(x==1):
response_one = requests.get("http://pic.netbian.com/index.html",headers=headers)
elif(x!=1):
response_one = requests.get("http://pic.netbian.com/index_"+str(x)+".html", headers=headers)
response_one.encoding = "utf-8"
#发送get请求,返回html 编码为utf-8 一级界面
html_one = response_one.text
#请求返回的html文本赋值给html
urls = re.findall('<a href="/tupian/(.*?).html" ',html_one)
#正则匹配文本
for url in urls:
response_two = requests.get("http://pic.netbian.com/tupian/"+url+".html",headers=headers)
response_two.encoding = "utf-8"
#二级页面
html_two = response_two.text
url_end = re.findall("<img src=\"(.*?)\" data-pic", html_two)
#正则匹配文本
images = requests.get("http://pic.netbian.com"+url_end[0],headers=headers)
#请求图片网址
if not os.path.exists("C:\\Users\\Lenovo\\Desktop\\images"):
os.mkdir("C:\\Users\\Lenovo\\Desktop\\images")
#判断C:\\Users\\Lenovo\\Desktop路径下是否存在images文件夹 没有则创建一个images文件夹
with open("C:\\Users\\Lenovo\\Desktop\\images\\"+str(i)+".jpg","wb") as f:
f.write(images.content)
#以二进制的方式写入 编码为utf-8
i+=1
牛牛牛牛牛牛牛牛牛牛牛牛牛牛