我正在尝试重写我的代码以正确利用Promise。
完整的程序应该从T恤站点中抓取数据。该第一段代码应该进入网站的首页,获取立即可用的产品页面,然后将URL存储在数组中。其余的URL将存储在“ remainder”中,以便稍后进行第二次抓取。
当前手动对每个部分进行单元测试:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
//Modules being used:
var cheerio = require('cheerio');
var request = require('request');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
function firstScrape (url) {
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
return links;
// return this array you've made
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
for(var link in arrayOfLinks){
promiseArray.push(requestPromise(link));
return Promise.all(promiseArray);
}
}
function lastStep (arrayOfHTMLresults){
for(var html in arrayOfHTMLresults){
var $ = cheerio.load(html);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(scrapeLink);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = scrapeLink;
}
}
console.log(urlSet);
console.log(remainder);
}
firstScrape(url)
.then(nextStep)
.then(lastStep);
我目前遇到以下错误:
(node:71094) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3): Error: Invalid URI "0"
这是我要承诺的代码:
// Load front page of shirts4mike
function firstScrape(){
request(url, function(error, response, html) {
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
//iterate over links with 'shirt'
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//create new link
var scrapeLink = url + a;
//for each new link, go in and find out if there is a submit button.
//If there, add it to the set
request(scrapeLink, function(error,response, html){
if(!error && response.statusCode == 200) {
var $ = cheerio.load(html);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(scrapeLink);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = scrapeLink;
}
}
});
});
}
});
}
我无法解决的是当我不知道
urlSet.add(scrapeLink);
是什么时如何在lastStep()
中使用scrapeLink
?知道为什么吗?谢谢
最佳答案
.add()
不是Array.prototype
方法,您也可以在return
循环中使用promiseArray
for
,而不是将Promise
推送到promiseArray
并使用Promise.all()
function nextStep (arrayOfLinks) {
var promiseArray = [];
for(var i = 0; i < arrayOfLinks.length; i++) {
var link = requestPromise(arrayOfLinks[i]);
promiseArray.push(link);
}
return Promise.all(promiseArray)
}