思路
去到想爬取微博博主的相册,通过 f12 可发现获得数据,我们可以通过正则把图片需要的参数取出来

url中的owner_uid、viewer_uid、page_id、__rnd基本是不变的,page是页数,所以只用取since_id就可以了。

代码
import requests
import re
header = {
'Cookie':'填写自己的cookie'
}
owner_uid = '填写f12获取url中的参数'
viewer_uid = '填写f12获取url中的参数 '
page_id = '填写f12获取url中的参数 '
__rnd = '填写f12获取url中的参数 '
i = 1#页数
sore = 1#计数
while(True):
if(i==1):
url_photo_get = 'https://weibo.com/p/aj/album/loading?ajwvr=6&type=photo&owner_uid='+owner_uid+'&viewer_uid='+viewer_uid+'&page_id='+page_id+'&page='+str(i)+'&ajax_call=1&__rnd='+__rnd
html = requests.get(url_photo_get,headers=header)
# 初始主页
html.encoding = 'utf-8'
url_txt = re.findall('thumb300\\\/(.*?)?tags', html.text)#正则初始主页图片的url
since_id = re.findall('&since_id=(.*?)\\\\">',html.text)#正则取since_id
else:
url_photo_get = 'https://weibo.com/p/aj/album/loading?ajwvr=6&type=photo&owner_uid='+owner_uid+'&viewer_uid='+viewer_uid+'&since_id='+since_id[0]+'&page_id='+page_id+'&page='+str(i)+'&ajax_call=1&__rnd='+__rnd
html = requests.get(url_photo_get,headers=header)
html.encoding = 'utf-8'
url_txt = re.findall('thumb300\\\/(.*?)?tags', html.text) # 正则初始主页图片的url
since_id = re.findall('&since_id=(.*?)\\\\">', html.text) # 正则取since_id
while (len(since_id) == 0):
html = requests.get(url_photo_get, headers=header)
html.encoding = 'utf-8'
url_txt = re.findall('thumb300\\\/(.*?)?tags', html.text) # 正则初始主页图片的url
since_id = re.findall('&since_id=(.*?)\\\\">', html.text) # 正则取since_id
if(len(url_txt) != 0):
break
for url in url_txt:
u = re.findall('^(.*?)\?',url)
img_html = requests.get('https://wxt.sinaimg.cn/mw1024/' + u[0])
with open('E:\\photo\\' + str(sore) + '.jpg', 'wb') as f:#保存路径
f.write(img_html.content)
f.close()
print('第'+str(sore)+'张图片:https://wxt.sinaimg.cn/mw1024/' + u[0])
sore += 1
i+=1