我是Scrapy和Python的新手。我正在尝试使用Scrapy示例中的FormRequest,但似乎formdata参数没有解析“ Air”中的“ []”。有什么解决办法吗?
这是代码:
import scrapy
import re
import json
from scrapy.http import FormRequest
class AirfareSpider(scrapy.Spider):
name = 'airfare'
start_urls = [
'http://www.viajanet.com.br/busca/voos-resultados#/POA/MEX/RT/01-03-2017/15-03-2017/-/-/-/1/0/0/-/-/-/-'
]
def parse(self, response):
return [FormRequest(url='http://www.viajanet.com.br/busca/resources/api/AvailabilityStatusAsync',
formdata={"Partner":{
"Token":"p0C6ezcSU8rS54+24+zypDumW+ZrLkekJQw76JKJVzWUSUeGHzltXDhUfEntPPLFLR3vJpP7u5CZZYauiwhshw==",
"Key":"OsHQtrHdMZPme4ynIP4lcsMEhv0=",
"Id":"52",
"ConsolidatorSystemAccountId":"80",
"TravelAgencySystemAccountId":"80",
"Name":"B2C"
},
"Air":[{
"Arrival":{
"Iata":"MEX",
"Date":"2017-03-15T15:00:00.000Z"
},
"Departure":{
"Iata":"POA",
"Date":"2017-03-01T15:00:00.000Z"
},
"InBoundTime":"0",
"OutBoundTime":"0",
"CiaCodeList":"[]",
"BookingClass":"-1",
"IsRoundTrip":"true",
"Stops":"-1",
"FareType":"-"
}],
"Pax":{
"adt":"1",
"chd":"0",
"inf":"0"
},
"DisplayTotalAmount":"false",
"GetDeepLink":"false",
"GetPriceMatrixOnly":"false",
"PageLength":"10",
"PageNumber":"2"
}
, callback=self.parse_airfare)]
def parse_airfare(self, response):
data = json.loads(response.body)
最佳答案
尝试使用FormRequest.from_response函数
https://doc.scrapy.org/en/latest/topics/request-response.html#using-formrequest-from-response-to-simulate-a-user-login
import scrapy
class LoginSpider(scrapy.Spider):
name = 'example.com'
start_urls = ['http://www.example.com/users/login.php']
def parse(self, response):
return scrapy.FormRequest.from_response(
response,
formdata={'username': 'john', 'password': 'secret'},
callback=self.after_login
)
def after_login(self, response):
# check login succeed before going on
if "authentication failed" in response.body:
self.logger.error("Login failed")
return
关于python - FormRequest Scrapy,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/39571221/