python爬虫、软件安装

python爬虫、软件安装

生成器、迭代器、可迭代对象

生成器

简单生成器示例
def demo_gen():
    yield 1
    yield 2
    yield 3
gen = demo_gen()
print(next(gen))
print(next(gen))
print(next(gen))
print(next(gen))  # 当生成器的值取完了,在使用next方法,会抛出StopIteration错误
生成器表达式

代码在ipython(安装:pip install ipython)运行的

In [1]: (i for i in range(10))
Out[1]: <generator object <genexpr> at 0x107798200>
类示例代码:
class Sentence:
    """
    简单的生成器,使用yield关键字
    """

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(self.text)

    def __iter__(self):
        for word in self.words:
            yield word

迭代器

使用while循环模拟迭代器
s = 'ABC'
it = iter(s)
while True:
  try:
    print(next(it))
  except StopIteration:
    del it
    break
实现迭代器的必备方法
  1. __next__

    返回下一个可用的元素,如果没有元素了,抛出StopIteration异常。

  2. __iter__

    返回self,以便在应该使用可迭代对象的地方使用迭代器,例如for循环中。

实现代码:
class IterableDemo:

    def __init__(self, text):
        self.text = text
        self.index = 0

    def __iter__(self):
        return iter(self)

    def __next__(self):
      """next方法的获取逻辑"""
        try:
            word = self.text[self.index]
        except IndexError:
            raise StopIteration
        self.index += 1
        return word

if __name__ == '__main__':
    it = IterableDemo('Hello,world')
    print(next(it))
    print(next(it))
    print(next(it))

可迭代对象

from collections.abc import Iterable

class Sentence:
  def __iter__(self):
    return iter(self)
  
print(issubclass(Sentence, Iterable)) # == True

class Sentence:
  def __getitem__(self, index):
    return self.words[index]
  
print(issubclass(Sentence, Iterable)) # == False

实现__getitem__方法
def __getitem__(self, index):
  return self.words[index]
实现__iter__方法
def __iter__(self):
  return iter(self.words)
示例代码:
class Sentence:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(self.text)

    def __iter__(self):
        return iter(self.words)

    # def __getitem__(self, index):
    #     return self.words[index]

    def __len__(self):
        return len(self.words)

    def __str__(self):
        return 'Sentence(%s)' % reprlib.repr(self.words)


s = Sentence('Hello world how are you')
for word in s:
    print(word)
04-05 09:36