一段简短的代码,来展示如何把爬取内容写到redis里面:
#! /usr/bin/env python
# -*- coding=utf-8 -*-
import requests
import json
import re
import sys
import redis
reload(sys)
sys.setdefaultencoding("utf-8")
classinfo = []
f = open('info.txt','w')
num = 0
class RedisTT(object):
def __init__(self):
self.keyName = 'DouBan'
self.host = 'localhost'
self.port = ''
self.re = redis.Redis(host=self.host,port=self.port)
def insertRedis(self,jsonStr):#存入到redis中
self.re.lpush(self.keyName,jsonStr)
class Douban(object):
def write(self,htm):
titl = re.findall('data-tit(.*?)data-enough',htm.text,re.S)
for each in titl:
#print each
info = {}
#print each
info['title'] = re.search('le="(.*?)"',each,re.S).group(1)
info['year'] = re.search('data-release="(.*?)" data',each,re.S).group(1)
info['Rating']= re.findall('data-rate="(.*?)" data-star',each,re.S)[0]
info['time'] = re.findall('data-duration="(.*?)" data-re',each,re.S)[0]
info['reg'] = re.findall('data-region="(.*?)" data-dir',each,re.S)[0]
info['act'] = re.findall('data-actors="(.*?)" data-in',each,re.S)[0]
RedisTT().insertRedis(json.dumps(info))
def getremen(self):
url = 'http://movie.douban.com/'
html = requests.get(url)
html.encoding = 'utf-8'
# print html.text
self.write(html)
if __name__ == "__main__":
Douban().getremen()
结果如下: