我正在尝试重写我的代码以正确利用Promise。

完整的程序应该从T恤站点中抓取数据。该第一段代码应该进入网站的首页,获取立即可用的产品页面,然后将URL存储在数组中。其余的URL将存储在“ remainder”中,以便稍后进行第二次抓取。

当前手动对每个部分进行单元测试:

//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
    //Save the scraped data in a spreadsheet (CSV format).



//Modules being used:
var cheerio = require('cheerio');
var request = require('request');

//harcoded url
var url = 'http://shirts4mike.com/';

//url for tshirt pages
var urlSet = new Set();

var remainder;

   const requestPromise = function(url) {
    return new Promise(function(resolve, reject) {
        request(url, function(error, response, html) {

            if(error) return reject(error);

            if(!error && response.statusCode == 200){
                return resolve(html);
            }
        });
    });
}


function firstScrape (url) {
    return requestPromise(url)
        .then(function(html) {
            var $ = cheerio.load(html);

            var links = [];

            //get all the links
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');

                //add into link array
                links.push(url + a);

            });
            return links;
            // return this array you've made
        });
}


function nextStep (arrayOfLinks) {
    var promiseArray = [];

    for(var link in arrayOfLinks){
        promiseArray.push(requestPromise(link));
        return Promise.all(promiseArray);
    }
}


function lastStep (arrayOfHTMLresults){
    for(var html in arrayOfHTMLresults){
        var $ = cheerio.load(html);

        //if page has a submit it must be a product page
        if($('[type=submit]').length !== 0){

            //add page to set
            urlSet.add(scrapeLink);

        } else if(remainder == undefined) {
            //if not a product page, add it to remainder so it another scrape can be performed.
            remainder = scrapeLink;
        }
    }
    console.log(urlSet);
    console.log(remainder);
}


firstScrape(url)
    .then(nextStep)
    .then(lastStep);


我目前遇到以下错误:

(node:71094) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3): Error: Invalid URI "0"

这是我要承诺的代码:

// Load front page of shirts4mike
function firstScrape(){
    request(url, function(error, response, html) {
        if(!error && response.statusCode == 200){
            var $ = cheerio.load(html);

        //iterate over links with 'shirt'
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');

                //create new link
                var scrapeLink = url + a;

                //for each new link, go in and find out if there is a submit button.
                //If there, add it to the set
                request(scrapeLink, function(error,response, html){
                    if(!error && response.statusCode == 200) {
                        var $ = cheerio.load(html);

                        //if page has a submit it must be a product page
                        if($('[type=submit]').length !== 0){

                            //add page to set
                            urlSet.add(scrapeLink);

                        } else if(remainder == undefined) {
                            //if not a product page, add it to remainder so it another scrape can be performed.
                            remainder = scrapeLink;
                        }
                    }
                });
            });
        }
    });
}


我无法解决的是当我不知道urlSet.add(scrapeLink);是什么时如何在lastStep()中使用scrapeLink

知道为什么吗?谢谢

最佳答案

.add()不是Array.prototype方法,您也可以在return循环中使用promiseArray for,而不是将Promise推送到promiseArray并使用Promise.all()

function nextStep (arrayOfLinks) {
    var promiseArray = [];

    for(var i = 0; i < arrayOfLinks.length; i++) {
        var link = requestPromise(arrayOfLinks[i]);
        promiseArray.push(link);
    }

    return Promise.all(promiseArray)
}

09-10 11:59