Counter函数可以对列表中数据进行统计每一个有多少种
most_common(10)可以提取前十位

from collections import Counter

a = ['q','q','w','w','w']

count = Counter(a)

count.most_common(1)

[('w', 3)]

count

Counter({'q': 2, 'w': 3})

pandas中的series对象有一个value_counts方法可以计数
.fillna()函数可以替换确实值NA

import numpy as np

from numpy.random import randn

data = {i : randn() for i in range(7)}

data

{0: -0.2657989059225722,

 1: -1.2517286143172295,

 2: -0.6360811023039581,

 3: 1.2009891917346602,

 4: 1.7528414640242418,

 5: -0.24155970563487628,

 6: -0.7637924413712933}

最近的两个结果保存在_和__中

9*3

%pwd

# 获得当前工作目录

'D:\\Code\\Python\\code'

魔法命令
%time 一条语句的执行时间
%timeit 执行多次的平均时间

numpy基础

import numpy as np

data1 = [6, 7.5, 8],[2, 0 ,1]

arr1 = np.array(data1)

arr1

array([[6. , 7.5, 8. ],

       [2. , 0. , 1. ]])

arr1.ndim

arr1.shape

(2, 3)

arr1.dtype

dtype('float64')

np.zeros((2, 3, 4))

array([[[0., 0., 0., 0.],

        [0., 0., 0., 0.],

        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],

        [0., 0., 0., 0.],

        [0., 0., 0., 0.]]])

np.ones((3))

array([1., 1., 1.])

np.arange(9)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

np.eye(3,)

array([[1., 0., 0.],

       [0., 1., 0.],

       [0., 0., 1.]])

数组切片后的修改会反映到原始数组上

arr = np.arange(9)

arr2 = arr[5:8]

arr2[:] = 4

arr

array([0, 1, 2, 3, 4, 4, 4, 4, 8])

arr = np.arange(9)

arr2 = arr[5:8].copy()

arr2[:] = 4

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

三维数组先是层、行、列

data = np.random.randn(4,3)

data

array([[ 0.7144327 ,  0.87144603,  1.10651404],

       [-0.19509352, -0.01102958,  1.8051039 ],

       [ 0.03106339,  0.83767495,  0.20094192],

       [ 0.96032146, -0.40303045,  1.4522938 ]])

data[[1,2,0],[1,2,0]]

# 取出来的数据为（11）（22）（00）

array([-0.01102958,  0.20094192,  0.7144327 ])

不连续提取数据

data[[0,2]][:,[0,2]]

# 跳着取方法一

array([[0.7144327 , 1.10651404],

       [0.03106339, 0.20094192]])

data[np.ix_([0,2],[0,2])]

# 跳着取方法二

array([[0.7144327 , 1.10651404],

       [0.03106339, 0.20094192]])

data.T

array([[ 0.7144327 , -0.19509352,  0.03106339,  0.96032146],

       [ 0.87144603, -0.01102958,  0.83767495, -0.40303045],

       [ 1.10651404,  1.8051039 ,  0.20094192,  1.4522938 ]])

np.sqrt(data)

F:\Anaconda\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in sqrt

  """Entry point for launching an IPython kernel.

array([[0.84524121, 0.93351274, 1.05190971],

       [       nan,        nan, 1.34354155],

       [0.17624808, 0.91524584, 0.44826546],

       [0.97995993,        nan, 1.20511153]])

np.exp(data)

array([[2.04302734, 2.39036489, 3.02379915],

       [0.82275771, 0.98903102, 6.0806032 ],

       [1.03155089, 2.31098757, 1.22255377],

       [2.61253617, 0.66829175, 4.27290447]])

np.rint(data)

# 四舍五入

array([[ 1.,  1.,  1.],

       [-0., -0.,  2.],

       [ 0.,  1.,  0.],

       [ 1., -0.,  1.]])

np.modf(data)

# 将数据分为小数和整数部分

(array([[ 0.7144327 ,  0.87144603,  0.10651404],

        [-0.19509352, -0.01102958,  0.8051039 ],

        [ 0.03106339,  0.83767495,  0.20094192],

        [ 0.96032146, -0.40303045,  0.4522938 ]]), array([[ 0.,  0.,  1.],

        [-0., -0.,  1.],

        [ 0.,  0.,  0.],

        [ 0., -0.,  1.]]))

np.isnan(data)

array([[False, False, False],

       [False, False, False],

       [False, False, False],

       [False, False, False]])

np.where(data > 0,9,data)

array([[ 9.        ,  9.        ,  9.        ],

       [-0.19509352, -0.01102958,  9.        ],

       [ 9.        ,  9.        ,  9.        ],

       [ 9.        , -0.40303045,  9.        ]])

axis中0表示竖向求和，1表示横向求和

np.mean(data,axis=1)

array([0.89746426, 0.5329936 , 0.35656009, 0.6698616 ])

np.in1d(data, [1,2,3])

# 查看data中每个元素是否在1，2，3内

array([False, False, False, False, False, False, False, False, False,

       False, False, False])

from numpy.linalg import inv, qr

from numpy.random import randn

x = randn(5,5)

mat = x.dot(inv(x))

# 求逆

mat = np.rint(mat)

mat

array([[ 1.,  0., -0.,  0.,  0.],

       [ 0.,  1., -0.,  0.,  0.],

       [ 0., -0.,  1.,  0.,  0.],

       [-0.,  0., -0.,  1., -0.],

       [-0.,  0.,  0., -0.,  1.]])

np.diag(mat)

# 返回对角线元素

array([1., 1., 1., 1., 1.])

np.random.permutation(mat)

# 返回序列的随机排列

array([[ 0., -0.,  1.,  0.,  0.],

       [-0.,  0., -0.,  1., -0.],

       [ 1.,  0., -0.,  0.,  0.],

       [-0.,  0.,  0., -0.,  1.],

       [ 0.,  1., -0.,  0.,  0.]])

np.random.randint(0,2,12)

array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1])

numpy基础

python学习笔记（三）：numpy基础

numpy基础