def get_user_data(self,start_url):
        html = requests.get(url=start_url,headers=self.headers,cookies=self.cookies).content
        selector = etree.fromstring(html,etree.HTMLParser(encoding='utf-8'))
        contents = selector.xpath('//span[@class="ctt"]/text()')
        times = selector.xpath('//span[@class="ct"]/text()')
        data = {}
        for each_text in contents:

            data['content'] = each_text.encode().decode('utf-8').replace('\u200b','')

        for each_time in times:
            month_day, time, device = each_time.split(maxsplit=2)
            data['mobile_phone'] = device
            data['create_time'] = month_day + time
            data['crawl_time'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
            self.mysql.insert(data)

我想将数据插入到数据库中,但是重复了data['content']字段,我应该如何修改它?

最佳答案

您应该并行地遍历contentstimes,而不是一个接一个地迭代。尝试使用zip

def get_user_data(self,start_url):
    html = requests.get(url=start_url,headers=self.headers,cookies=self.cookies).content
    selector = etree.fromstring(html,etree.HTMLParser(encoding='utf-8'))
    contents = selector.xpath('//span[@class="ctt"]/text()')
    times = selector.xpath('//span[@class="ct"]/text()')
    for each_text, each_time in zip(contents, times):
        data = {}
        data['content'] = each_text.encode().decode('utf-8').replace('\u200b','')
        month_day, time, device = each_time.split(maxsplit=2)
        data['mobile_phone'] = device
        data['create_time'] = month_day + time
        data['crawl_time'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
        self.mysql.insert(data)

10-07 19:39
查看更多