下面的代码可以正常工作,除了一件事情:所需的输出打印几次=)
#! /usr/bin/env python2.7
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from bs4 import BeautifulSoup as bs
from random import choice
from urllib import urlretrieve
from urllib2 import *
import sys
import os
# Settings for browser
class MyBrowser(QWebPage):
def __init__(self):
QWebPage.__init__(self)
# Specifies whether images are automatically loaded in web pages.
self.settings().setAttribute(QWebSettings.AutoLoadImages, False)
def userAgentForUrl(self, url):
return "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15"
class Name_Creater(QWebView):
def __init__(self):
QWebView.__init__(self)
self.setPage(MyBrowser())
self.loadFinished.connect(self.grab_first_name)
self.frame = self.page().mainFrame()
def grab_first_name(self):
html = unicode(self.frame.toHtml()).encode('utf-8')
soup = bs(html)
for name in soup.findAll('li', text=True):
print name
if __name__ == '__main__':
app = QApplication(sys.argv)
url_first_names = QUrl("http://www.genealogyroadtrip.com/Census/male_names_1.htm")
br = Name_Creater()
br.load(url_first_names)
br.show()
app.exec_()
最佳答案
问题在于您正在连接到loadFinished
的QWebView
信号,该信号将针对所加载的每个页面发出一次。因此,如果有多个帧,将发出多个loadFinished
信号。
解决方法是连接到mainFrame的loadFinished
信号:
self.setPage(MyBrowser())
self.frame = self.page().mainFrame()
self.frame.loadFinished.connect(self.grab_first_name)