问题描述
我对VBA和HTML/XHTML还是很陌生,但是通过在线研究和其他出色成员的帮助,我设法编写了一个代码来提取所需的数据.因为它是XHTML,所以我很难确定想要的元素的ID,所以我认为这是我最不喜欢它的地方.
I'm very new to VBA and HTML/XHTML, but through online research and help from other wonderful members on here I've managed to write a code to pull the data I want. I had a hard time identifying the IDs of the elements I want since it's in XHTML, so I think that's where I've botched it the most.
网站: http://www.usbanklocations.com/banks.php?q=&ct=&ml=30&lc=
这是我要代码执行的操作:拉银行名称,地址,电话号码,总存款和总资产-给出我在Excel工作表中提供的银行名称和城市.
Here is what I want the code to do:Pull Bank Name, Address, Phone Number, Total Deposits and Total Assets -- GIVEN the bank name and city I provide in my excel sheet.
这是我的代码:
Private Declare Sub Sleep Lib "kernel32.dll" (ByVal dwMilliseconds As Long)
Sub CommunityBanks()
Dim IE As Object, TableResults As Object, webRow As Object, BankName As Variant, page As Long, pageTotal As Long, r As Long
Dim beginTime As Date, i As Long, myvalue As Variant
Set IE = CreateObject("internetexplorer.application")
IE.navigate "http://www.usbanklocations.com/banks.php?name=" & Range("A2").Value & "+Bank&ml=30&lc=" & Range("B2").Value & "%2C+TX"
IE.Visible = True
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'input bank name into form
'myvalue = InputBox("Enter City. Press okay to begin search", "Bank Search")
'Range("F3").Value = myvalue
'IE.document.getelementbyid("MainContent_txtCity").Value = "LegacyTexas"
'click find button
'IE.document.getelementbyid("MainContent_btn").Click
'Sleep 5 * 1000
IE.document.getelementbytagname("table").getelementsbyclassname("btn").Click
Sleep 5 * 1000
'total pages
pageTotal = IE.document.getelementbyid("lsortby").innertext
page = 0
Do Until page = pageTotal
DoEvents
page = IE.document.getelementbyclassname("lsortby").innertext
With IE.document.getelementbyid("main")
For r = 1 To .Rows.Length - 1
If Not IsArray(BankName) Then
ReDim BankName(7, 0) As Variant
Else
ReDim Preserve BankName(7, UBound(BankName, 2) + 1) As Variant
End If
BankName(0, UBound(BankName, 2)) = .Rows(r).Cells(0).innertext
Next r
End With
If page < pageTotal Then
IE.document.getelementbyclassname("panelpn").Click
beginTime = Now
Application.Wait (Now + TimeValue("00:00:05"))
End If
Loop
For r = 0 To UBound(BankName, 2)
IE.navigate "http://www.usbanklocations.com/" & BankName(0, r)
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'wait 5 sec. for screen refresh
Sleep 5 * 1000
With IE.document.getelementbytagname("table")
For i = 0 To .Rows.Length - 1
DoEvents
Select Case .Rows(i).Cells(0).innertext
Case "Name:"
BankName(1, r) = .Rows(i).Cells(1).innertext
Case "Location:"
BankName(2, r) = .Rows(i).Cells(1).innertext
Case "Phone:"
BankName(3, r) = .Rows(i).Cells(1).innertext
Case "Branch Deposit:"
BankName(4, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
Case "Total Assets:"
BankName(5, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
End Select
Next i
End With
Next r
IE.Quit
Set IE = Nothing
'post result on Excel cell
Worksheets(1).Range("A9").Resize(UBound(BankName, 2) + 1, UBound(BankName, 1) + 1).Value = Application.Transpose(BankName)
End Sub
先谢谢您!我将不胜感激任何帮助.
Thank you in advance! I would greatly appreciate any help.
推荐答案
考虑以下示例,该示例使用XHR而不是IE和基于拆分的HTML内容解析:
Consider the below example which uses XHR instead of IE and split-based HTML content parsing:
Option Explicit
Sub Test_usbanklocations()
Dim oSource, oDestination, y, oSrcRow, sName, sCity, sDist, sUrl0, sUrl1, sUrl2, lPage, sResp1, sResp2, i, a1, a2, a3, a4, a5
Set oSource = Sheets(1)
Set oDestination = Sheets(2)
oDestination.Cells.Delete
DataOutput oDestination, 1, Array("Name", "Location", "Phone", "Total Assets", "Total Deposits")
y = 2
For Each oSrcRow In oSource.UsedRange.Rows
sName = oSrcRow.Cells(1, 1).Value
sCity = oSrcRow.Cells(1, 2).Value
sDist = oSrcRow.Cells(1, 3).Value
sUrl0 = "http://www.usbanklocations.com/banks.php?q=" & EncodeUriComponent(sName) & "&lc=" & EncodeUriComponent(sCity) & "&ml=" & sDist
sUrl1 = sUrl0
lPage = 1
Do
sResp1 = GetXHR(sUrl1)
If InStr(sResp1, "We can not find the address you provided. Please check.") > 0 Then Exit Do
a1 = Split(sResp1, "<div class=""pl")
For i = 1 To UBound(a1)
a2 = Split(a1(i), "</div>", 3)
a3 = Split(a2(1), "<a href=""", 2)
a4 = Split(a3(1), """>", 2)
sUrl2 = "http://www.usbanklocations.com" & a4(0)
sResp2 = GetXHR(sUrl2)
a5 = Array( _
GetFragment(sResp2, "<b>Name:</b></td><td>", "</td>"), _
Replace(GetFragment(sResp2, "<b>Location:</b></td><td>", "</td>"), "View Other Branches", ""), _
GetFragment(sResp2, "<b>Phone:</b></td>", "</td>"), _
GetFragment(sResp2, "<b>Total Assets:</b></td><td>", "</td>"), _
GetFragment(sResp2, "<b>Total Deposits:</b></td><td>", "</td>") _
)
DataOutput oDestination, y, a5
y = y + 1
DoEvents
Next
If InStr(sResp1, "Next Page >") = 0 Then Exit Do
lPage = lPage + 1
sUrl1 = sUrl0 & "&ps=" & lPage
DoEvents
Loop
Next
MsgBox "Completed"
End Sub
Function GetXHR(sUrl)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sUrl, False
.Send
GetXHR = .ResponseText
End With
End Function
Sub DataOutput(oSht, y, aValues)
With oSht.Cells(y, 1).Resize(1, UBound(aValues) + 1)
.NumberFormat = "@"
.Value = aValues
End With
End Sub
Function GetFragment(sText, sPatt1, sPatt2)
Dim a1, a2
a1 = Split(sText, sPatt1, 2)
If UBound(a1) <> 1 Then Exit Function
a2 = Split(a1(1), sPatt2, 2)
If UBound(a2) <> 1 Then Exit Function
GetFragment = GetInnerText(a2(0))
End Function
Function EncodeUriComponent(sText)
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(sText)
End Function
Function GetInnerText(sText)
With CreateObject("htmlfile")
.Write ("<body>" & sText & "</body>")
GetInnerText = .DocumentElement.Document.GetElementsByTagName("body")(0).InnerText
End With
End Function
例如,第一个工作表包含要搜索的数据(银行名称,位置和距离以作为依据):
As an example, the first worksheet contains data to search (Bank name, Location and Distance to refine by):
然后在第二个工作表上的结果如下:
Then result on the second worksheet is as follows:
这篇关于XHTML网站抓取指南的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!