本文介绍了如何从Kickstarter网页上抓取所有图片网址?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我要从此 Kickstarter网页,但是以下代码并未提供所有图像:
I want to scrape all the image urls from this Kickstarter webpage, but the following code does not give all the images:
url = 'https://www.kickstarter.com/projects/1878352656/sleep-yoga-go-travel-pillow?ref=category_newest'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
x = soup.select('img[src^="https://ksr-ugc.imgix.net/assets/"]')
print(x)
img_links = []
for img in x:
img_links.append(img['src'])
for l in img_links:
print(l)
推荐答案
import requests
from bs4 import BeautifulSoup
data = [
{
"operationName": "Campaign",
"query": "query Campaign($slug: String!) {\n project(slug: $slug) {\n id\n isSharingProjectBudget\n risks\n story(assetWidth: 680)\n currency\n spreadsheet {\n displayMode\n public\n url\n data {\n name\n value\n phase\n rowNum\n __typename\n }\n dataLastUpdatedAt\n __typename\n }\n environmentalCommitments {\n id\n commitmentCategory\n description\n __typename\n }\n __typename\n }\n}\n",
"variables": {
"slug": "1878352656/sleep-yoga-go-travel-pillow"
}
}
]
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"
}
def main(url):
with requests.Session() as req:
r = req.get(url[:27])
soup = BeautifulSoup(r.content, 'html.parser')
headers['X-CSRF-Token'] = soup.select_one(
"meta[name=csrf-token]")['content']
req.headers.update(headers)
r = req.post(url, json=data, headers=headers).json()
goal = r[0]['data']['project']['story']
soup = BeautifulSoup(goal, 'html.parser')
hey = [x['data-src'] for x in soup.findAll("img", {'data-src': True})]
print(hey)
main("https://www.kickstarter.com/graph")
输出:
['https://ksr-ugc.imgix.net/assets/018/947/295/e28df5848b46dd364b0ccf7f08874ed1_original.png?ixlib=rb-2.1.0&w=680&fit=max&v=1509125786&auto=format&frame=1&lossless=true&s=aa182d32433644ed4b67536f9249b9a4', 'https://ksr-ugc.imgix.net/assets/019/532/467/0921999530e580a28726d31817e89219_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690440&auto=format&frame=1&q=92&s=12289756eee63bd8229e43d3fdb697e2', 'https://ksr-ugc.imgix.net/assets/018/950/941/145ffd2dcc872e18c0bb7f62a74f0ac9_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509143419&auto=format&frame=1&q=92&s=312107a6ab70eb9b8d274d8e9536d759', 'https://ksr-ugc.imgix.net/assets/019/532/475/e5a27a164a960efdf14be1dfc3d937a8_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690498&auto=format&frame=1&q=92&s=c84f7614d7d26a16b71cb0a78eb5b964', 'https://ksr-ugc.imgix.net/assets/019/532/479/a1892bbea9c95e10abdb71bc3db3a18f_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690515&auto=format&frame=1&q=92&s=6a7efea040c55c59f084bea7ddf49713', 'https://ksr-ugc.imgix.net/assets/019/532/484/5eea3d7d665f9a28607615fb7e76520d_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690539&auto=format&frame=1&q=92&s=01323684393c3a32d89827f7d17034d3', 'https://ksr-ugc.imgix.net/assets/019/532/486/be1c40c7e1d2bba356e34c78e0a6cebb_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690553&auto=format&frame=1&q=92&s=0413e46ca9ffd51686188ea3aa496dc2', 'https://ksr-ugc.imgix.net/assets/018/915/460/61dba7d4eee548c30826c4ac1e7c1adf_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1508953589&auto=format&frame=1&q=92&s=ee0206f737d2a5dc987a7a8d7c5c0181', 'https://ksr-ugc.imgix.net/assets/019/532/556/67874ab9e61b5a62111bb8ffd5a86ec7_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690990&auto=format&frame=1&q=92&s=1a32cc86ea8356fb8f773a9946db1078', 'https://ksr-ugc.imgix.net/assets/018/916/228/07a65eaae32a1f35a2fc010368808597_original.gif?ixlib=rb-2.1.0&w=680&fit=max&v=1508956933&auto=format&gif-q=50&q=92&s=629409cb25ff98768e8244a855cd6537', 'https://ksr-ugc.imgix.net/assets/018/951/269/ae4a074d834e1017594ea570061f8693_original.gif?ixlib=rb-2.1.0&w=680&fit=max&v=1509145394&auto=format&gif-q=50&q=92&s=ba37411b148e6aaf68ec15b12bb08b4a', 'https://ksr-ugc.imgix.net/assets/019/532/499/8108b7c345be49d1a7f24c9a808883ea_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690652&auto=format&frame=1&q=92&s=7e706e7c22a89c177e7269e360219d6d', 'https://ksr-ugc.imgix.net/assets/019/532/503/9d14734ab1a118f333abc8c726be25dd_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690688&auto=format&frame=1&q=92&s=ebac74715b62fa63baa97348f555db39', 'https://ksr-ugc.imgix.net/assets/019/020/550/d03e197225cc7dff72a8b0781d971e2b_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509562418&auto=format&frame=1&q=92&s=3dacbcc7501b93325aa5f3b96f4dd6bb', 'https://ksr-ugc.imgix.net/assets/018/984/033/1690b96aa7cf9c8a82d546959f76078e_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1509389255&auto=format&frame=1&q=92&s=c261edca6ff33ca4932692ec77b630a8', 'https://ksr-ugc.imgix.net/assets/019/532/505/989aa88cd062029b6d952f08418fcd79_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690721&auto=format&frame=1&q=92&s=5e61d60f95fec622497779fa84cce01e', 'https://ksr-ugc.imgix.net/assets/019/532/507/a4d83eac4c0f53d893e23ec163842c5b_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690737&auto=format&frame=1&q=92&s=fffba5b95e026d8727290e893191fa83', 'https://ksr-ugc.imgix.net/assets/019/023/799/c5619d8153edaa43789d5912a0c875ee_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509574534&auto=format&frame=1&q=92&s=e7e0efcd372067e2b9a46d7b9dd5b195', 'https://ksr-ugc.imgix.net/assets/018/983/735/83c71def243879b687956cbf8b806b14_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509388186&auto=format&frame=1&q=92&s=5e454a646cd64af594c00730b7b4c87a', 'https://ksr-ugc.imgix.net/assets/019/024/228/747a531dedc814049501a75141766164_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509576227&auto=format&frame=1&q=92&s=faa3f0ec1fa32d4d4dd53835fb098f09', 'https://ksr-ugc.imgix.net/assets/019/042/081/530fbfc825cbc110df875ec0524feacc_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509667007&auto=format&frame=1&q=92&s=4bf397a28271ec71ec1aa7f635cdb8f0', 'https://ksr-ugc.imgix.net/assets/018/920/236/d940db3bb0d32e4d4e2584a35d719993_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1508974842&auto=format&frame=1&q=92&s=0c4df9694d53562e9bd3b9cff38f53bf', 'https://ksr-ugc.imgix.net/assets/018/951/628/2f16dddba5c345c618af882cd1d3283c_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509147803&auto=format&frame=1&q=92&s=03ab16df99ca9cdc7473d85370ce8616', 'https://ksr-ugc.imgix.net/assets/018/920/245/6d4cebd599b4c5b908139587b1888263_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1508974879&auto=format&frame=1&q=92&s=fdadb27846cdd0cd722a5a3f209b008c', 'https://ksr-ugc.imgix.net/assets/019/023/916/444a8d28593f4c013d99e225280dd00e_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509574972&auto=format&frame=1&q=92&s=6b1ccf939b3bba80402a618087173f7a']
这篇关于如何从Kickstarter网页上抓取所有图片网址?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!