代码如下:
#载入RCrul
library(RCurl)
#修改clientheader
myheader=c("User-Agent"="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
"Accept"="*/*",
"Accept-Language"="zh-CN,zh;q=0.8",
"Connection"="keep-alive"
)
#调试使用,老师给的那个headher好像不能使用
#"Referer"="http://jandan.net/ooxx/page-1197"
urllist=0
page=1:7
urllist[page]=paste("http://jandan.net/ooxx/page-119",page,sep='')
for(url in urllist){
#获取妹子图
temp=getURL(url,httpheader=myheader)
k = strsplit(temp,"\r\n")[[1]]
#正则
pattern='
pattern2='jpg" />'
tempurl=k[grep(pattern,k)]
temp2=tempurl[grep(pattern2,tempurl)]
urladdr=regexec(pattern,temp2)
murl=regmatches(temp2,m)
#写入文件
write.table(url,file="url.txt",append = TRUE)
Sys.sleep(10)
}