我写了一个宏来从零售商的网页上获取产品信息。它运行良好,但不会在我的工作表中呈现任何结果。我很难理解为什么。我在搜索输入框中输入“sale”,得到以下url:
http://www.shopjustice.com/search/?q=sale&originPageName=home
我想在我的工作表中列出产品的名称、以前的价格和当前的价格。这个
这些元素的HTML如下:
<div class="subCatName">
<a href="/girls-clothing/colored-jeggings/6611358/651?pageSort=W3sidHlwZSI6InJlbGV2YW5jZSIsInZhbCI6IiJ9XQ==&productOrigin=search%20page&productGridPlacement=1-1" id="anchor2_6611358" class="auxSubmit">Colored Jeggings</a>
</div>
<div class="cat-list-price subCatPrice">
<div class="priceContainer">
<span class="mobile-was-price">
was
$26.90</span>
<span class="mobile-now-price">
now
$10.49</span>
</div>
<div class="price_description">
<span class="mobile-extra">
Extra 30% off clearance!</span>
</div>
</div>
代码如下:
Sub test2()
Dim RowCount, erow As Long
Dim sht As Object
Dim ele As IHTMLElement
Dim eles As IHTMLElementCollection
Dim doc As HTMLDocument
Set sht = Sheets("JUSTICESALE")
RowCount = 1
sht.Range("A" & RowCount) = "Clothing Item"
sht.Range("B" & RowCount) = "SKU"
sht.Range("C" & RowCount) = "Former Price"
sht.Range("D" & RowCount) = "Sale Price"
Set ie = CreateObject("InternetExplorer.application")
searchterm = InputBox("ENTER SEARCH TERM")
Application.StatusBar = "LOADING JUSTICE SEARCH"
With ie
.Visible = True
.navigate "http://www.shopjustice.com/"
Do While .busy Or _
.readystate <> 4
DoEvents
Loop
Set doc = ie.document
doc.getelementsbyname("q").Item.innertext = searchterm
doc.getElementsByClassName("searchbtn").Item.Click
Application.StatusBar = "EXTRACTING PRODUCT DATA"
Set eles = doc.getElementsByClassName("subCatName")
For Each ele In eles
If ele.className = "subCatName" Then
erow = sht.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = doc.getElementsByClassName("auxSubmit")(RowCount).innertext
Cells(erow, 2) = doc.getElementsByClassName("mobile-was-price")(RowCount).innertext
RowCount = RowCount + 1
End If
Next ele
End With
Set ie = Nothing
Application.StatusBar = ""
End Sub
任何帮助都将不胜感激。
编辑:
嗨,彼得,我很欣赏你的洞察力。它当然预先解决了一些问题。但是,在添加下面的代码之前,由于缺少类名循环,它仍然没有写入到excel。
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
我错过了什么?
我还为不同零售商的网页提供了另一种方法,尽管是相同的概念,如下所示。你对这种方法有什么看法?我唯一的问题是在Select Case行出现权限被拒绝错误70。
Sub test5()
Dim erow As Long
Dim ele As Object
Set sht = Sheets("CARTERS")
RowCount = 1
sht.Range("A" & RowCount) = "Clothing Item"
sht.Range("B" & RowCount) = "SKU"
sht.Range("C" & RowCount) = "Former Price"
sht.Range("D" & RowCount) = "Sale Price"
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Set objIE = CreateObject("Internetexplorer.application")
searchterm = InputBox("ENTER CARTER'S SEARCH TERM")
With objIE
.Visible = True
.navigate "http://www.carters.com/"
Do While .Busy Or _
.readyState <> 4
DoEvents
Loop
.document.getElementsByName("q").Item.innerText = searchterm
.document.getElementsByClassName("btn_search").Item.Click
Do While .readyState <> READYSTATE_COMPLETE
DoEvents
Loop
For Each ele In .document.all
Select Case ele.className
Case “product - name”
RowCount = RowCount + 1
sht.Range("A" & RowCount) = ele.innerText
Case “product - standard - price”
sht.Range("B" & RowCount) = ele.innerText
Case "product-sales-price"
sht.Range("C" & RowCount) = ele.innerText
End Select
Next ele
End With
Set objIE = Nothing
End Sub
再次感谢你的帮助。
最佳答案
你的代码运行良好,有两个警告。。。
首先,在您“点击”主页面上的搜索按钮后,您的代码不会等待结果页面加载。因此,查找每个项的循环失败,因为没有任何内容。
其次,在解析某些字段的HTML时需要一些错误处理,以便在这些字段丢失时处理这种情况。例如,查看此处的代码并将其应用于您的情况:
For Each ele In eles
If ele.className = "subCatName" Then
erow = sht.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
On Error Resume Next
Cells(erow, 1) = doc.getElementsByClassName("auxSubmit")(RowCount).innerText
If Err.Number <> 0 Then
Cells(erow, 1) = "ERR: 'auxSubmit' Class Name Not Found!"
Err.Clear
Else
End If
Cells(erow, 2) = doc.getElementsByClassName("mobile-was-price")(RowCount).innerText
If Err.Number <> 0 Then
Cells(erow, 2) = "ERR: 'mobile-was-price' Class Name Not Found!"
Err.Clear
End If
On Error GoTo 0
RowCount = RowCount + 1
End If
Next ele