这里就不做详细讲解了 毕竟不是一句两句能说的清楚,所以我把代码和注释放到了这里 谢谢!

import pymysql
from redis import Redis
# 写入mysql
class WangyiMysql(object):
conn = None
cursor = None
def open_spider(self,spider):
self.conn = pymysql.Connection(host='127.0.0.1',port=3306,user='root',password='',db='spider',charset='utf8')
print(self.conn)
def process_item(self, item, spider):
sql = 'insert into news values ("%s","%s")'%(item['title'],item['content'])
self.cursor = self.conn.cursor() try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
print(e)
self.conn.rollback()
return item def close_spider(self,spider):
self.cursor.close()
self.conn.close() # 写入redis
class WangyiRedis(object):
conn = None
def open_spider(self,spider):
self.conn = Redis(host='127.0.0.1',port=6379)
print(self.conn) def process_item(self, item, spider):
self.conn.lpush('news',item) # 写入文件
class ChoutiproPipeline(object):
#重写父类方法,该方法只会被执行一次
fp = None
def open_spider(self,spider):
print('开始爬虫......')
self.fp = open('chouti.txt','w',encoding='utf-8') #该方法调用后就可以接受爬虫类提交过来的item对象,且赋值给了item参数
def process_item(self, item, spider):
author = item['title']
content = item['content']
self.fp.write(author+':'+content+'\n') return item #将item传递给下一个即将被执行的管道类 def close_spider(self,spider):
# print('爬虫结束!')
self.fp.close()

  如果 你要这三项同时执行的话,记得在setings 里面 配置一下这个

    ITEM_PIPELINES = {
'first_blod.pipelines.FirstBlodPipeline': ,
'first_blod.pipelines.MysqlPileLine': ,
   # 记得写类名,哪个执行写哪个
}
05-08 08:24