我正在尝试检测商品在Amazon上的可用性。为什么此代码不起作用?

from simplified_scrapy.request import req
from simplified_scrapy.simplified_doc import SimplifiedDoc
import requests
import re
from bs4 import BeautifulSoup
from collections import OrderedDict
from time import sleep
import time
from lxml import html
import json

def check(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    page = requests.get(url, headers = headers)
    for i in range(20):
        sleep(3)
        doc = html.fromstring(page.content)
        XPATH_AVAILABILITY = '//div[@id ="availability"]//text()'
        RAw_AVAILABILITY = doc.xpath(XPATH_AVAILABILITY)
        AVAILABILITY = ''.join(RAw_AVAILABILITY).strip() if RAw_AVAILABILITY else None
        return AVAILABILITY

file_name = raw_input("Enter file name: ")
filepath = "%s"%(file_name)

with open(filepath) as f:
    listoflinks = [line.rstrip('\n') for line in f]

all_links = []
for i in listoflinks:
    html = req.get(i)
    doc = SimplifiedDoc(html)
    amazon_links = doc.getElements('a')
    amazon_links = amazon_links.containsOr(['https://www.amazon.com/','https://amzn.to/'],attr='href')
    for a in amazon_links:
        if a.href not in all_links:
            all_links.append(a.href)

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

for i in all_links:
    print "LINK:"
    print i
    response = requests.get(i, headers=headers)
    #soup = BeautifulSoup(html, "lxml")
    soup = BeautifulSoup(response.content, features="lxml")
    title = soup.select("#productTitle")[0].get_text().strip()

    if check(i) == 'In stock.':
        price = soup.select("#priceblock_saleprice")[0].get_text()
    else:
        price = "UNAVAILABLE"
    review_count = int(soup.select("#acrCustomerReviewText")[0].get_text().split()[0])
    jsonObject = {'title': title, 'price': price, 'review_count': review_count}
    print json.dumps(jsonObject, indent=2)
    print "////////////////////////////////////////////////"

print "..............................................."
print "FINALLY..."
print "# OF LINKS RETRIEVED:"
print len(all_links)


当我执行它时,出现此错误:


  检查文件“ scra.py”,第17行
  
  doc = html.fromstring(page.content)
  
  AttributeError:“ unicode”对象没有属性“ fromstring”


请帮我。我已经尝试过将page转换为pagedata = page.json(),但这只会使情况变得更糟。

最佳答案

尝试使用它代替html.fromstring

doc = BeautifulSoup(page.content, 'html.parser')
doc = doc.prettify()

关于python - AttributeError:“unicode”对象没有属性“fromstring”。如何解决这个问题?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/59774103/

10-10 10:29