爬取淘宝商品信息
import pymysql
import requests
import re def getHTMLText(url):
kv = {'cookie':'thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; enc=ooWAQ8HPiBkBlDgWaQ2BoQXFD4cHXejeOP0Nq7xvbCuGN5yubT%2ByBjrb2j417KSrQkoR9YQxMFoqYufejy7Hlw%3D%3D; _m_h5_tk=9cc0be22588c97655e9e0ed031f29703_1589472803622; _m_h5_tk_enc=8fd3fcd9077f0f17bcb2dc4f9d593617; cookie2=1a0da2cc9535ebe4f7bd2787bebb9da1; t=0a472589a79eda4e33e9b072e3446525; _tb_token_=e136e0330e37e; alitrackid=www.taobao.com; _samesite_flag_=true; cna=cAVGFNpuDkUCAXkcRV0prgNa; sgcookie=EGxNSorLw1t5Dg21WTFJw; unb=3361002229; uc3=nk2=qA%2Fo8e0UjX1l%2BUs%3D&lg2=UtASsssmOIJ0bQ%3D%3D&id2=UNN78Eg15kheYA%3D%3D&vt3=F8dBxGZobO%2BfXgtBG40%3D; csg=228d2d4a; lgc=%5Cu5E05%5Cu6C14%5Cu5CF0happy; cookie17=UNN78Eg15kheYA%3D%3D; dnk=%5Cu5E05%5Cu6C14%5Cu5CF0happy; skt=aa63ca1a4a6e356c; existShop=MTU4OTYzNTEyOQ%3D%3D; uc4=nk4=0%40qjS8tzpCQQHfNZapqmDNrmd4%2F2Dhnw%3D%3D&id4=0%40UgQz06zOiEqwpAtViK7HqZlIKslx; tracknick=%5Cu5E05%5Cu6C14%5Cu5CF0happy; _cc_=U%2BGCWk%2F7og%3D%3D; _l_g_=Ug%3D%3D; sg=y99; _nk_=%5Cu5E05%5Cu6C14%5Cu5CF0happy; cookie1=UNJSu1S2nK7AhsBSrVKq4Nd7T4K1fH40ygcHPrTYWeA%3D; lastalitrackid=login.taobao.com; tfstk=ccVGB7cFtRk12ge_PFG_ovbuN0aGaiXZfSPUT5GPww8ivnNE7sYkLLSdMwmTjSpf.; uc1=cookie16=URm48syIJ1yk0MX2J7mAAEhTuw%3D%3D&cookie21=VT5L2FSpccLuJBreK%2BBd&cookie15=UIHiLt3xD8xYTw%3D%3D&existShop=false&pas=0&cookie14=UoTUM2YYf7HXaw%3D%3D; mt=ci=21_1; v=0; JSESSIONID=EC27C014A7BFB337D51D380F31E05C14; l=eBTRDRMIq3U2_UibBOfwourza77OSIRAguPzaNbMiOCPO_fp5GiGWZb3Hg89C3GVh67HR3J_JUNTBeYBqIv4n5U62j-la_kmn; isg=BA4O1D8B3Mb76GvkyGwHSEkCX-TQj9KJQPZkSjhXepHMm671oB8imbRZ08f3g8qh',
'user-agent':'Mozilla/5.0'}
try:
r = requests.get(url, headers=kv,timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
#print(r.text)
return r.text
except:
return "" def parsePage(ilt, html):
try:
plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"', html)
tlt = re.findall(r'\"raw_title\"\:\".*?\"', html)
pic=re.findall(r'\"pic_url\"\:\".*?\"',html)
for i in range(len(plt)):
price = eval(plt[i].split(':')[1])
title = eval(tlt[i].split(':')[1])
img="https:"+eval(pic[i].split(':')[1])
oldprice=price.replace('','')
ilt.append([i+1,title,price,oldprice,100,img])
except:
print("") def printGoodsList(ilt):
db = pymysql.connect("localhost", "root", "fengge666", "phpwork", charset='utf8')
cursor = db.cursor()
sql_cixian = "INSERT INTO meat values (%s,%s,%s,%s,%s,%s)"
cursor.executemany(sql_cixian, ilt)
db.commit()
db.close() tplt = "{:4}\t{:8}\t{:16}\t{:16}"
print(tplt.format("序号", "价格", "商品名称","商品图片"))
count = 0
for g in ilt:
count = count + 1
print(tplt.format(count, g[0], g[1],g[2])) def main():
goods = '肉'
depth = 2
start_url = 'https://s.taobao.com/search?q=' + goods
infoList = []
for i in range(depth):
try:
url = start_url + '&s=' + str(44 * i)
html = getHTMLText(url)
parsePage(infoList, html)
except:
continue
printGoodsList(infoList) main()
通过异步交互,放到html页面
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>我的博客</title>
<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">
<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
<script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>
<style>
body {
/* 将body默认的padding和margin清零,有助于页面无白边,实现全屏阅读 */
padding: 0;
margin: 0;
/* 这是字体大小及字体风格 */
font: 12px/1.5 Helvetica Neue, Helvetica, Arial, Hiragino Sans GB, \5FAE\8F6F\96C5\9ED1, tahoma, simsun, \5b8b\4f53;
font-weight: 500;
overflow-x: hidden;
background-image: url("img/background2.png");
}
#name{
color:white;
padding: 15px;
font-family: "楷体";
font-size: 15px;
} .container {
position: relative;
width: 1090px;
height: 2660px;
margin: 0 auto;
} /* 搜索框整体 */
.searchBar {
width: 1090px;
height: 100px;
margin: 0 auto;
margin-top: -12px;
} /* 左边的logo图案,是一个超链接 */
.searchBar a {
display: block;
text-decoration: none;
font-size: 13px;
color: black;
} /* 设置左边logo图案 */
.searchBar a img {
float: left;
width: 210px;
height: 80px;
cursor: pointer;
margin-top: 20px;
z-index: 9;
} /* 搜索输入框部分 */
.searchText {
float: right;
width: 510px;
padding-left: 1px;
margin-top: 28px;
margin-right: 62px;
background-color: #ffffff;
border: 2px solid #ff2337;
border-radius: 40px;
} /* 输入框样式设置 */
.searchText input[type="text"] {
display: block;
width: 80%;
margin-top: 9px;
margin-left: 36px;
border: 0px;
} /* 输入框的提示字段字体样式,这里设置为italic,表示斜体 */
.searchText input[type="text"]::placeholder {
font-style: italic;
} /* 当时点击输入框时,输入框默认的外边框会别隐藏 */
.searchText input[type="text"]:focus {
outline: none;
} .searchText img {
transform: scale(0.5);
border: 1px solid;
margin-top: -4px;
background-color: rgb(214, 214, 214);
} /* 设置输入框左边的那个小放大镜,这里有个细节 */
/* all_Icon.png实际是一个很大的雪碧图,我们这里使用css3的背景定位,设置要裁剪的区域为-466px,-677px这个位置 */
.searchText .zoomIcon {
position: absolute;
width: 14px;
height: 14px;
margin-left: 10px;
margin-top: 11px;
background-position: -466px -667px;
background-image: url('img/all_Icon.png');
border: 1px;
} /* 搜索按钮 */
.searchButton {
display: block;
float: right;
width: 56px;
height: 38px;
margin-right: -4px;
margin-top: -29px;
background-color: #ff2337;
border-radius: 30px;
cursor: pointer;
} /* 搜索按钮的放大镜图标 */
.searchButton i {
display: block;
width: 56px;
height: 38px;
margin-right: -4px;
margin-top: 0px;
background-image: url('img/放大镜大.png');
background-repeat: no-repeat;
background-position: 50%;
border-radius: 30px;
} /* 当鼠标移动到搜索按钮上时会发生的变化 */
.searchButton:hover {
background: linear-gradient(270deg, #f85a7d, #ff3234);
}
/* 购物车 */
.shopCar {
display: block;
float: right;
width: 106px;
height: 36px;
margin-top: 28px;
margin-right: 75px;
line-height: 36px;
border: 2px solid #ff1e32;
text-decoration: none;
border-radius: 36px;
font-size: 1rem;
font-weight: 500;
text-align: center;
} /* 当鼠标移动到购物车标签上时,购物车标签的字体颜色和底色的变化 */
.shopCar:hover {
color: #ff1e32;
background-color: #fff4f5;
} /* 购物车icon图标 */
.shopIcon {
display: inline-block;
width: 20px;
height: 20px;
margin-right: 2px;
margin-bottom: -5px;
background: url(img/购物车.png) no-repeat 0 0;
}
a{
text-decoration: none;
} /* classificationNav是classification+Nav,意思是分类导航 */
.classificationNav {
position: relative;
width: 1090px;
z-index: 9999;
} /* 给分类导航栏的水平导航栏设置相关属性 */
.classificationNav ul.horizontal {
width: 100%;
height: 40px;
} /* 对分类导航栏的水平导航栏选项进行行级块元素设置 */
.classificationNav ul.horizontal li {
display: inline-block;
height: 40px;
font-size: 14px;
font-weight: bolder;
line-height: 40px;
overflow: hidden;
margin-right: 80px;
}
.classificationNav ul.horizontal li a{
color: black;
}
/* 当鼠标经过导航栏里面的字体的时候,字体颜色变为红色,鼠标指针的图标变成小手 */
.classificationNav ul.horizontal li a:hover {
color: red;
cursor: pointer;
} /*圆角图片*/
.roundimg{
width: 140px;
height: 140px;
margin-left: 100px;
margin-top: 5px;
float: left;
border-radius: 200px;
} a{
color: black;
}
h3{
font-size: 15px;
}
p{
margin-bottom: 4px;
}
/* -------------------------商品展示栏-------------------------- */
.goodsDisplay {
width: 1000px;
height: 800px;
background-color: rgb(255, 255, 255);
}
.shopTitle {
width: 1000px;
height: 60px;
text-align: center;
line-height: 60px;
font-size: 28px;
color: rgb(255, 255, 255);
background-color: rgb(227, 20, 54);
margin-top: 6px;
margin-bottom: 12px;
}
/* 商品列表 */
.goodsList {
position: relative;
width: 1000px;
height: 630px;
margin-top: -12px;
background-color: rgb(240, 240, 240);
} .goodsList ul {
margin-left: -40px;
} .goodsList ul li {
display: block;
float: left;
position: relative;
height: 279px;
width: 190px;
background-color: #fff;
margin-top: 10px;
margin-bottom: 16px;
margin-left: 8px;
} /* 对单个商品进行指定 */
.goodsList ul li .goods {
height: 296px;
background-color: #f0f0f0;
border: 1px solid transparent;
background-color: inherit;
transition: all 0.2s ease;
} /* 单个商品左上角的红色小标签 */
.goodsList ul li .leftTop {
display: block;
position: absolute;
top: 0;
margin-top: 1px;
margin-left: -1px;
width: fit-content;
height: 20px;
padding: 0 5px 0 3px;
color: #fff;
font-size: 12px;
line-height: 20px;
background-color: #ff8385;
z-index: 1;
} /* 单个商品左下角的小标签 */
.goodsList ul li .leftBottom {
display: block;
position: absolute;
height: 15px;
left: 8px;
border: 1px solid #e31436;
border-radius: 3px;
background: #fff;
color: #e31436;
font-size: 12px;
line-height: 15px;
} /* 商品名字 */
.goodsList ul li h3 {
color: black;
display: block;
height: auto;
width: 180px;
white-space: nowrap;
text-overflow: ellipsis;
font-weight: bold;
margin-left: 4px;
overflow: hidden;
} /* 鼠标移动到商品栏后,相应的商品显示商品名字,这里用的是white-space属性让标题全部显示 */
.goodsList ul li:hover .goods {
height: 338px;
border: 1px solid red;
white-space: normal;
} .goodsList ul li:hover p {
white-space: normal;
} .goodsList ul li h3:hover p {
cursor: pointer;
text-decoration: underline;
} .goodsList ul li img {
width: 100%;
} /* 单个的商品描述文字 */
.goodsList ul li .describe {
color: #666;
margin-left: 4px;
} /* 单个商品的价格,采用大红色显示 */
.goodsList ul li .value {
position: relative;
color: rgb(227, 20, 54);
background-color: #fff;
line-height: 22px;
margin-bottom: 14px;
z-index: 9999;
} .goodsList ul li .value a {
display: block;
float: right;
width: fit-content;
height: 24px;
margin-right: 4px;
bottom: 14px;
padding: 0 6px;
border-radius: 3px;
background: #e31436;
color: #fff;
font-size: 12px;
line-height: 24px;
text-align: center;
} /* 商品原价格,这是文字划线,一般用于打折吸引客服 */
.goodsList ul li .value del {
color: #999;
} /* 右侧导航栏 */
ul.rightNav {
position: fixed;
right: 60px;
top: 30vh;
width: 128px;
z-index: 9999;
} /* 右侧导航栏的li属性设置 */
ul.rightNav li {
display: block;
list-style: none;
width: 100px;
line-height: 30px;
height: 30px;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
text-align: center;
color: #fff;
background-color: #f03048;
border-bottom: 1px solid rgba(0, 0, 0, 0.2);
cursor: pointer;
z-index: 100;
} /* 单鼠标移动到右侧导航栏的指定 li 上时,底色显示为暗红色 */
ul.rightNav li:hover {
background-color: rgb(175, 36, 52);
} /* 右侧导航条的第一个li是购物车 */
#oneCar {
display: block;
float: left;
margin-left: 18px;
margin-top: 5px;
width: 20px;
height: 20px;
background-size: 20px 21px;
background-image: url('img/购物车_1.png');
} /* 购物车 */
#oneCar+span {
margin-left: -20px;
} /* 右侧导航栏最后一个li是上箭头 */
#oneUp {
display: block;
margin-left: 40px;
margin-top: 0px;
width: 20px;
height: 16px;
background-size: 20px 21px;
background-image: url('img/上箭头.png');
} #oneUp+span {
display: block;
line-height: 16px;
} </style>
<script>
$(function(){
/*获取用户名*/
var searchUrl =window.location.href;
var searchData =searchUrl.split("="); //截取 url中的“=”,获得“=”后面的参数
var searchText =decodeURI(searchData[1]);
if(searchText!=null&&searchText!="undefined"){
htmltext="<a style='color: white;text-decoration: none;'>"+searchText+"</a>";
$("#name").html(htmltext);
}
/*设置轮播图*/
setInterval("changeImg()",2000); /*异步请求数据*/
$.ajax({
url:'getseefood.php',
dataType : "json",//数据格式
type : "post",//请求方式
async : false,//是否异步请求
success : function(data) { //如果请求成功,返回数据。
var html = "<ul>";
for(var i=0;i<data.length;i++){ //遍历data数组
var ls = data[i];
html+="<a href='#'>" +
"<li>" +
"<div class='goods'>" +
"<img src="+ls.img+">" +
"<span class='leftTop'>热销</span>" +
"<span class=\"leftBottom\">新人专享</span>" +
"<h3><a><p>"+ls.title+"</p></a></h3>" +
"<p class='describe'>月销:"+ls.salenumber+"</p>" +
"<div class='value'>" +
"<span style='margin-left: 6px;'>¥</span>" +
"<span style='font-size: 20px !important;font-weight:700;margin-left: -3px;'>"+ls.newprice+"</span>" +
"<del>"+ls.oldprice+"</del>" +
"<a href=''>立即购买</a>" +
"</div>" +
"</div>" +
"</li>";
}
$("#goodsList").html(html); //在html页面id=test的标签里显示html内容
},
error:function (e) {
alert("出错了!");
}
});
$.ajax({
url:'getfruit.php',
dataType : "json",//数据格式
type : "post",//请求方式
async : false,//是否异步请求
success : function(data) { //如果请求成功,返回数据。
var html = "<ul>";
for(var i=0;i<data.length;i++){ //遍历data数组
var ls = data[i];
html+="<a href='#'>" +
"<li>" +
"<div class='goods'>" +
"<img src="+ls.img+">" +
"<span class='leftTop'>热销</span>" +
"<span class=\"leftBottom\">新人专享</span>" +
"<h3><a><p>"+ls.title+"</p></a></h3>" +
"<p class='describe'>月销:"+ls.salenumber+"</p>" +
"<div class='value'>" +
"<span style='margin-left: 6px;'>¥</span>" +
"<span style='font-size: 20px !important;font-weight:700;margin-left: -3px;'>"+ls.newprice+"</span>" +
"<del>"+ls.oldprice+"</del>" +
"<a href=''>立即购买</a>" +
"</div>" +
"</div>" +
"</li>";
}
$("#goodsList2").html(html); //在html页面id=test的标签里显示html内容
},
error:function (e) {
alert("出错了!");
}
}); $.ajax({
url:'getmeat.php',
dataType : "json",//数据格式
type : "post",//请求方式
async : false,//是否异步请求
success : function(data) { //如果请求成功,返回数据。
var html = "<ul>";
for(var i=0;i<data.length;i++){ //遍历data数组
var ls = data[i];
html+="<a href='#'>" +
"<li>" +
"<div class='goods'>" +
"<img src="+ls.img+">" +
"<span class='leftTop'>热销</span>" +
"<span class=\"leftBottom\">新人专享</span>" +
"<h3><a><p>"+ls.title+"</p></a></h3>" +
"<p class='describe'>月销:"+ls.salenumber+"</p>" +
"<div class='value'>" +
"<span style='margin-left: 6px;'>¥</span>" +
"<span style='font-size: 20px !important;font-weight:700;margin-left: -3px;'>"+ls.newprice+"</span>" +
"<del>"+ls.oldprice+"</del>" +
"<a href=''>立即购买</a>" +
"</div>" +
"</div>" +
"</li>";
}
$("#goodsList3").html(html); //在html页面id=test的标签里显示html内容
},
error:function (e) {
alert("出错了!");
}
});
$.ajax({
url:'getvegetable.php',
dataType : "json",//数据格式
type : "post",//请求方式
async : false,//是否异步请求
success : function(data) { //如果请求成功,返回数据。
var html = "<ul>";
for(var i=0;i<data.length;i++){ //遍历data数组
var ls = data[i];
html+="<a href='#'>" +
"<li>" +
"<div class='goods'>" +
"<img src="+ls.img+">" +
"<span class='leftTop'>热销</span>" +
"<span class=\"leftBottom\">新人专享</span>" +
"<h3><a><p>"+ls.title+"</p></a></h3>" +
"<p class='describe'>月销:"+ls.salenumber+"</p>" +
"<div class='value'>" +
"<span style='margin-left: 6px;'>¥</span>" +
"<span style='font-size: 20px !important;font-weight:700;margin-left: -3px;'>"+ls.newprice+"</span>" +
"<del>"+ls.oldprice+"</del>" +
"<a href=''>立即购买</a>" +
"</div>" +
"</div>" +
"</li>";
}
$("#goodsList4").html(html); //在html页面id=test的标签里显示html内容
},
error:function (e) {
alert("出错了!");
}
}); //--------------右侧导航栏部分-------------------
// 获取右侧导航栏对象
const rightNav = document.getElementsByClassName('rightNav')[0];
window.onscroll = function () {
// scrollTop表示滚动后距离顶端多少
// 当距离页面顶端大于799px的距离时会出现右侧导航栏,否则右侧导航条会消失
if (document.documentElement.scrollTop >= 500) {
rightNav.style.display = "block";
} else {
rightNav.style.display = "none";
}
} //点击右侧导航栏后被点击的相应li会变暗红色
const rightNavLi = document.querySelectorAll('.rightNav li');
for (let i = 0; i < rightNavLi.length; i++) {
rightNavLi[i].onclick = function () {
// 遍历当前所有的右侧导航栏 li ,然后设置为红色
for (let i = 0; i < rightNavLi.length; i++) {
rightNavLi[i].style.backgroundColor = '#f03048';
}
// 设置所点击的对象底色为红色
this.style.backgroundColor = 'rgb(175, 36, 52)';
}
} });
var i=0;
function changeImg(){
i++;
//3.获取图片位置并设置src属性值
document.getElementById("img1").src="img/img"+i+".jpg";
if(i==3){
i=0;
}
}
</script>
</head>
<body>
<nav class="navbar navbar-inverse" role="navigation">
<div class="container-fluid">
<div class="navbar-header">
<a class="navbar-brand" href="#">胡吃海喝购物网站</a>
</div>
<div>
<ul class="nav navbar-nav">
<li class="active"><a href="#"><font size="4">首页</font></a></li>
<li><a href="#">我的订单</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">
关于我<b class="caret"></b>
</a>
<ul class="dropdown-menu">
<li><a href="#">个人信息</a></li>
<li class="divider"></li>
<li><a href="#">我的红包</a></li>
<li class="divider"></li>
<li><a href="#">我的优惠券</a></li>
</ul>
</li>
<li><a href="#"></a></li>
<li><a href="#">客户服务</a></li>
<li><a href="#">充值中心</a></li>
</ul>
</div>
<ul class="nav navbar-nav navbar-right">
<li><a href="login/register.html"><span class="glyphicon glyphicon-user"></span> 注册</a></li>
<li><a href="login/login.html"><span class="glyphicon glyphicon-log-in"></span> 登录</a></li>
<li><div id="name"><a></a></div></li>
</ul>
</div>
</nav>
<!-- logo和搜索框和购物车 -->
<!-- 主要内容 -->
<div class="container">
<!-- 搜索栏那一栏 -->
<div class="searchBar">
<a href=""><img src="img/logo2.png" alt=""></img></a>
<a href="" class="shopCar"><i class="shopIcon"></i> 购物车</a>
<div class="searchText">
<span class="zoomIcon"></span>
<input type="text" placeholder="食品生鲜吃顿好的">
<span class="searchButton"><i></i></span>
</div>
</div> <!--分类导航-->
<div class="classificationNav">
<ul class="horizontal">
<li><a style="color: red;">首页</a></li>
<li><a style="text-decoration: none">海鲜</a></li>
<li><a style="text-decoration: none"> 水果</a></li>
<li><a style="text-decoration: none"> 蔬菜</a></li>
<li><a style="text-decoration: none"> 鲜肉</a></li>
<li><a style="text-decoration: none">充值</a></li>
</ul>
</div> <!-- 食品生鲜分类 -->
<img class="roundimg" src="img/seefood.jpg" href="#">
<img class="roundimg" src="img/fruits.jpg" href="#">
<img class="roundimg" src="img/vegetable.jpg" href="#">
<img class="roundimg" src="img/meat.jpg" href="#"> <!-- 轮播图部分 -->
<div>
<img src="img/img1.jpg" class="img-rounded" id="img1" width="100%" height="400px">
</div>
<!-- 商品展示栏部分 -->
<div class="goodsDisplay">
<div class="shopTitle"><a name="fruit">★新品上市★</a></div>
<div class="goodsList" id="goodsList2"></div>
<div class="shopTitle"><a name="seefood">★口碑爆款★</a></div>
<div class="goodsList" id="goodsList"></div>
<div class="shopTitle"><a name="meat">★绝味鲜肉★</a></div>
<div class="goodsList" id="goodsList3"></div>
<div class="shopTitle"><a name="vegetable">★新鲜蔬菜★</a></div>
<div class="goodsList" id="goodsList4"></div>
<br>
</div> <!-- 右侧导航栏 -->
<ul class="rightNav">
<a href="">
<li style="height: 34px;line-height: 34px;"><span id="oneCar"></span><span>购物车</span></li>
</a>
<a href="">
<li><a href="#seefood" style="color: white;text-decoration: none">★新品上市★</a></li>
</a>
<a href="">
<li><a href="#fruit" style="color: white;text-decoration: none">★口碑爆款★</a></li>
</a>
<a href="">
<li><a href="#meat" style="color: white;text-decoration: none">★绝味鲜肉★</a></li>
</a>
<a href="">
<li><a href="#vegetable" style="color: white;text-decoration: none">★新鲜蔬菜★</a></li>
</a>
<a href="#top">
<li style="height: 34px;line-height: 34px;"><span id="oneUp"></span><span>TOP</span></li>
</a>
</ul> </div>
</body>
</html>
php胡吃海喝项目首页完成。。。。