下面的代码可以正常工作,除了一件事情:所需的输出打印几次=)

#! /usr/bin/env python2.7

from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from bs4 import BeautifulSoup as bs
from random import choice
from urllib import urlretrieve
from urllib2 import *
import sys
import os


# Settings for browser
class MyBrowser(QWebPage):
    def __init__(self):
        QWebPage.__init__(self)
        # Specifies whether images are automatically loaded in web pages.
        self.settings().setAttribute(QWebSettings.AutoLoadImages, False)

    def userAgentForUrl(self, url):
        return "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15"



class Name_Creater(QWebView):
    def __init__(self):
        QWebView.__init__(self)
        self.setPage(MyBrowser())
        self.loadFinished.connect(self.grab_first_name)
        self.frame = self.page().mainFrame()

    def grab_first_name(self):
        html = unicode(self.frame.toHtml()).encode('utf-8')
        soup = bs(html)
        for name in soup.findAll('li', text=True):
            print name

if __name__ == '__main__':
    app = QApplication(sys.argv)
    url_first_names = QUrl("http://www.genealogyroadtrip.com/Census/male_names_1.htm")
    br = Name_Creater()
    br.load(url_first_names)
    br.show()
    app.exec_()

最佳答案

问题在于您正在连接到loadFinishedQWebView信号,该信号将针对所加载的每个页面发出一次。因此,如果有多个帧,将发出多个loadFinished信号。

解决方法是连接到mainFrameloadFinished信号:

self.setPage(MyBrowser())
self.frame = self.page().mainFrame()
self.frame.loadFinished.connect(self.grab_first_name)

10-07 21:08