pandas模块:
读取excel / json / sql / csv /ini 文件--
import pandas as pd
DataFrame数据结构:
DataFrame是一个表格型的数据结构,含有一组有序的列。
DataFrame可以被看做是由Series组成的字典,并且共用一个索引。
产生时间对象数组:date_range
#参数
start 开始时间
end 结束时间
periods 时间长度
freq 时间频率,默认为'D',
#属性
dtype是 查看数据类型
index 查看行序列或者索引
columns 查看各列的标签
values 查看数据框内的数据,也即不含表头索引的数据
describe 查看数据每一列的极值,均值,中位数,只可用于数值型数据
transpose 转置,也可用T来操作
sort_index 排序,可按行或列index排序输出
sort_values 按数据值来排序
import pandas as pd
import numpy as np
#--
date = pd.date_range('20190101',periods=6,frep='M')
print(date)
#列参数
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
'2019-05-31', '2019-06-30'],
dtype='datetime64[ns]', freq='M')
#生出随机数
np.random.seed(1)
arr = 10*np.random.randon(6,4)
print(arr)
#行参数
df = pd.DaDataFrame(arr, index=dates,columns=['c1','c2','c3','c4'])
print(df) --表格
#查看数据类型
print(df.dtypes)
print(df.conumns)
-Index(['c1', 'c2', 'c3', 'c4'], dtype='object')
# 按行标签[c1, c2, c3, c4]从大到小排序
df.sort_index(axis=0)
# 按列标签[2019-01-01, 2019-01-02...]从大到小排序
df.sort_index(axis=1)
# 通过自定义的行标签选择数据
df.loc['2019-01-01':'2019-01-03']
#类似numpy数组取值
df.iloc[1:4, 1:4]
#逻辑判断取值
df[df['c1'] > 0]
DataFrame值替换
df.iloc[0:3, 0:2] = 0
#读取csv文件
import pandas as pd
from io import StringIO
test_data = '''
5.1,,1.4,0.2
4.9,3.0,1.4,0.2
4.7,3.2,,0.2
7.0,3.2,4.7,1.4
6.4,3.2,4.5,1.5
6.9,3.1,4.9,
,,,
'''
test_data = StringIO(test_data)
df = pd.read_csv(test_data, header=None)
df.columns = ['c1', 'c2', 'c3', 'c4']
df
c1 c2 c3 c4
0 5.1 NaN 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 NaN 0.2
3 7.0 3.2 4.7 1.4
4 6.4 3.2 4.5 1.5
5 6.9 3.1 4.9 NaN
6 NaN NaN NaN NaN
#读取json文件
strtext = '[{"ttery":"min","issue":"20130801-3391","code":"8,4,5,2,9","code1":"297734529","code2":null,"time":1013395466000},\
{"ttery":"min","issue":"20130801-3390","code":"7,8,2,1,2","code1":"298058212","code2":null,"time":1013395406000},\
{"ttery":"min","issue":"20130801-3389","code":"5,9,1,2,9","code1":"298329129","code2":null,"time":1013395346000},\
{"ttery":"min","issue":"20130801-3388","code":"3,8,7,3,3","code1":"298588733","code2":null,"time":1013395286000},\
{"ttery":"min","issue":"20130801-3387","code":"0,8,5,2,7","code1":"298818527","code2":null,"time":1013395226000}]'
df = pd.read_json(strtext, orient='records')
df
@读取sql语句
import numpy as np
import pandas as pd
import pymysql
def conn(sql):
# 连接到mysql数据库
conn = pymysql.connect(
host="localhost",
port=3306,
user="root",
passwd="123",
db="db1",
)
try:
data = pd.read_sql(sql, con=conn)
return data
except Exception as e:
print("SQL is not correct!")
finally:
conn.close()
sql = "select * from test1 limit 0, 10" # sql语句
data = conn(sql)
print(data.columns.tolist()) # 查看字段
print(data) # 查看数据