numpy 文档

In [1]:
# https://numpy.org/devdocs/user/quickstart.html

#推荐 https://www.yiibai.com/numpy/
#https://www.jianshu.com/p/83c8ef18a1e8
# NumPy 通常与 SciPy(Scientific Python)和 Matplotlib(绘图库)一起使用。

#NumPy是Python语言的一个扩充程序库。支持高级大量的维度数组与矩阵运算,
#此外也针对数组运算提供大量的数学函数库。Numpy内部解除了Python的PIL(全局解释器锁),
#运算效率极好,是大量机器学习框架的基础库!

# pip install numpy

import numpy as np

a=[1,2,3,4]
b=np.array(a)
print(a)
print(b)

print("1>",b.size) #数组元素个数
print("2>",b.shape) #数组形状
print("3>",b.ndim) #数组维度
print("4>",b.dtype) #数组元素类型
print("5>",b.itemsize) #对象中每个元素的大小,以字节为单位
[1, 2, 3, 4]
[1 2 3 4]
1> 4
2> (4,)
3> 1
4> int32
5> 4

创建与获取

一维数组

In [2]:
# 一维数组
a=np.array([1,23,4])
b=np.array((1,23,4))
print('a=',a, '; b=',b)
a= [ 1 23  4] ; b= [ 1 23  4]
In [3]:
## 指定数据类型
a=np.array([1,23,4],dtype=np.int)
print(a.dtype, a) #np.array([1,23,4])

a=np.array([1,23,4],dtype=np.float)
print(a.dtype, b) #float64
int32 [ 1 23  4]
float64 [ 1 23  4]

二维数组

In [4]:
# 二维数组
array=np.array([
    [1,2,3],
    [4,5,6]
])
array
Out[4]:
array([[1, 2, 3],
       [4, 5, 6]])
In [5]:
# 获取元素(从0开始计数)
print(len(array), array[0]) #2个元素(每个元素也是数组),第0个元素是数组[1 2 3]
print(len(array[0]), array[0][1], array[0,1]) #第0个元素长度是3,里面的第一个元素是2;
# 二维数组的元素,可以[i][j]获取,也可以[i,j]获取
2 [1 2 3]
3 2 2
In [6]:
# 获取数组的元素总数,形状,维度个数
print(array.size)
print(array.shape)
print(array.ndim)
6
(2, 3)
2
In [7]:
print(array.dtype)
print(array.dtype.name)
print(array.itemsize)

type(array)
int32
int32
4
Out[7]:
numpy.ndarray
In [8]:
# 行列转置
array.T
Out[8]:
array([[1, 4],
       [2, 5],
       [3, 6]])

reshape变形、多维数组

reshape()能把arange()产生的数据序列再排序,按照第一维从左到右,其余从上到下;

In [9]:
#一维数组
a=np.arange(12)
print("a=",a)

#变成4行3列
a2=a.reshape(4,3)
print('a2=',a2)
a= [ 0  1  2  3  4  5  6  7  8  9 10 11]
a2= [[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
In [10]:
#变成竖排的4行3列,思路:先变成3行4列,再行列转置
a3=a.reshape(3,4).T
print('a3=',a3)
a3= [[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
In [11]:
# 更高维度的数组
a=np.arange(24).reshape(2,3,4) #2个,每个3行4列。C语言风格的下标,越往后的维度,下标变动越快。
a
Out[11]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])
In [12]:
print(a[0], "\n") #最外围的维度,包含元素越多。
print(a[0][0], a[0][0][1], "\n") #最外围的第一个维度中,下一维度是行,最后是列(一个数字)
print(a[1])
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]] 

[0 1 2 3] 1 

[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]

创建0矩阵、单位矩阵

In [13]:
a=np.zeros((3,4));
print(a) #3行4列的0矩阵。
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
In [14]:
np.ones((2,4)) #2行4列的1矩阵
Out[14]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])
In [15]:
np.identity(4) #单位矩阵
Out[15]:
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])
In [16]:
np.empty( (6, 6) )   # uninitialized #空矩阵,受到上一个初始化矩阵的影响
Out[16]:
array([[4.45619116e-313, 1.38338381e-322, 9.47957064e-312,
        4.94065646e-324, 0.00000000e+000, 5.51718906e-313],
       [7.90505033e-323, 9.47972338e-312, 9.47972338e-312,
        9.47972338e-312, 3.60739285e-313, 2.96439388e-323],
       [9.47972338e-312, 9.47972338e-312, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000, 1.69759663e-313],
       [4.94065646e-324, 9.88131292e-324, 4.94065646e-324,
        9.47972338e-312, 8.39911598e-323, 9.72228417e-315],
       [0.00000000e+000, 0.00000000e+000, 5.51718906e-313,
        1.38338381e-322, 9.47955314e-312, 4.94065646e-324],
       [0.00000000e+000, 2.33419537e-313, 4.94065646e-324,
        9.47972338e-312, 0.00000000e+000, 0.00000000e+000]])

创建等差数列

有2种实现:

  • np.arange() 指定首尾,间隔。
  • np.linspace() 指定首尾,输出几个。
In [17]:
# arange区间是[左闭,右开)的,参数顺序 start, end, space
a1=np.arange(10,20) #array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
a2=np.arange(10,20,2) #等差是2 array([10, 12, 14, 16, 18])
print(a1)
print(a2)
[10 11 12 13 14 15 16 17 18 19]
[10 12 14 16 18]
In [18]:
#在-5,5之间,每隔0.5产生一个数
np.arange(-5,5,0.5)
Out[18]:
array([-5. , -4.5, -4. , -3.5, -3. , -2.5, -2. , -1.5, -1. , -0.5,  0. ,
        0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5])
In [19]:
#太多则显示时会跳过
print(np.arange(10000))
[   0    1    2 ... 9997 9998 9999]
In [20]:
#更严谨的,对小数位数控制更精确的是np.linspace()
#在-5,5之间,相同间隔,产生n=10个数字
np.linspace(-5,5,10)
Out[20]:
array([-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
        0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ])
In [21]:
# 常用于画图时的坐标
x = np.linspace( 0, 2*np.pi, 50 ) # useful to evaluate function at lots of points

f = np.sin(x)
print(len(f), f[0:10])

import matplotlib.pyplot as plt
plt.scatter(x,f)
plt.show()
50 [0.         0.12787716 0.25365458 0.375267   0.49071755 0.59811053
 0.69568255 0.78183148 0.85514276 0.91441262]
<Figure size 640x480 with 1 Axes>

切片后还是数组

In [22]:
# 切片也是[左闭右开)区间
t=np.arange(10)**3
print(t[0]) #取出一位
print(t[0:1]) #数组切片后还是数组
t
0
[0]
Out[22]:
array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729], dtype=int32)
In [23]:
t[8:100] #多了也没有,也不报错。只显示能显示的位数
Out[23]:
array([512, 729], dtype=int32)
In [24]:
t[2:5] # [左闭右开)
Out[24]:
array([ 8, 27, 64], dtype=int32)
In [25]:
# 第三个参数是间隔
print(t[::2]) # from start to end, every 2 item: 0,2,4, ...
print(t[:6:2]) # [0, 6) 每隔2个取一个
t[::-1]  # reversed
[  0   8  64 216 512]
[ 0  8 64]
Out[25]:
array([729, 512, 343, 216, 125,  64,  27,   8,   1,   0], dtype=int32)
In [26]:
for i in t:
    print(i, "\t", i**(1 / 3))
0 	 0.0
1 	 1.0
8 	 2.0
27 	 3.0
64 	 3.9999999999999996
125 	 5.0
216 	 5.999999999999999
343 	 6.999999999999999
512 	 7.999999999999999
729 	 8.999999999999998

新增、删除元素

In [27]:
# np增加一个元素
a1=np.array([1,2,3])
a2=np.append(a1,400)
print(a1,a2) #说明不改变原来的值
[1 2 3] [  1   2   3 400]
In [28]:
# 删除一个元素
a=np.array([1,2,3])
a2=np.delete(a,1,axis=0)
print(a,a2)
[1 2 3] [1 3]
In [29]:
# 删除多个元素
a=np.array([1,2,3,4,5,6])
a2=np.delete(a,[2,3,4],axis=0) #删除2,3,4位置的元素: 3,4,5,留下的是1,2,6
print(a,a2)
[1 2 3 4 5 6] [1 2 6]

创建随机矩阵

In [30]:
np.random.random((2,3))
Out[30]:
array([[0.62029509, 0.60048788, 0.9172909 ],
       [0.79063187, 0.42748123, 0.21276437]])

查找元素的位置 np.where

In [31]:
a=np.arange(12).reshape(3,4)
a
Out[31]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
In [32]:
a==8
Out[32]:
array([[False, False, False, False],
       [False, False, False, False],
       [ True, False, False, False]])
In [33]:
b=np.where(a==8)
print(b)
print(b[0], b[1]) #8在第2行第0列
(array([2], dtype=int64), array([0], dtype=int64))
[2] [0]
In [34]:
print(b[0], b[1], '\na[b]=',a[b], '\na[a==8]=',a[a==8])
[2] [0] 
a[b]= [8] 
a[a==8]= [8]
In [35]:
help(np.where)
Help on function where in module numpy:

where(...)
    where(condition, [x, y])
    
    Return elements chosen from `x` or `y` depending on `condition`.
    
    .. note::
        When only `condition` is provided, this function is a shorthand for
        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
        preferred, as it behaves correctly for subclasses. The rest of this
        documentation covers only the case where all three arguments are
        provided.
    
    Parameters
    ----------
    condition : array_like, bool
        Where True, yield `x`, otherwise yield `y`.
    x, y : array_like
        Values from which to choose. `x`, `y` and `condition` need to be
        broadcastable to some shape.
    
    Returns
    -------
    out : ndarray
        An array with elements from `x` where `condition` is True, and elements
        from `y` elsewhere.
    
    See Also
    --------
    choose
    nonzero : The function that is called when x and y are omitted
    
    Notes
    -----
    If all the arrays are 1-D, `where` is equivalent to::
    
        [xv if c else yv
         for c, xv, yv in zip(condition, x, y)]
    
    Examples
    --------
    >>> a = np.arange(10)
    >>> a
    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    >>> np.where(a < 5, a, 10*a)
    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
    
    This can be used on multidimensional arrays too:
    
    >>> np.where([[True, False], [True, True]],
    ...          [[1, 2], [3, 4]],
    ...          [[9, 8], [7, 6]])
    array([[1, 8],
           [3, 4]])
    
    The shapes of x, y, and the condition are broadcast together:
    
    >>> x, y = np.ogrid[:3, :4]
    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
    array([[10,  0,  0,  0],
           [10, 11,  1,  1],
           [10, 11, 12,  2]])
    
    >>> a = np.array([[0, 1, 2],
    ...               [0, 2, 4],
    ...               [0, 3, 6]])
    >>> np.where(a < 4, a, -1)  # -1 is broadcast
    array([[ 0,  1,  2],
           [ 0,  2, -1],
           [ 0,  3, -1]])

通过函数创建矩阵

In [36]:
# 通过函数赋值,传入参数是下标

def f2(x,y): #传入坐标
    return 10*x + y +0.2

b=np.fromfunction(f2, (5,4), dtype=int)
b
Out[36]:
array([[ 0.2,  1.2,  2.2,  3.2],
       [10.2, 11.2, 12.2, 13.2],
       [20.2, 21.2, 22.2, 23.2],
       [30.2, 31.2, 32.2, 33.2],
       [40.2, 41.2, 42.2, 43.2]])
In [37]:
for row in b: #按row迭代
    print(row)
[0.2 1.2 2.2 3.2]
[10.2 11.2 12.2 13.2]
[20.2 21.2 22.2 23.2]
[30.2 31.2 32.2 33.2]
[40.2 41.2 42.2 43.2]
In [38]:
#按元素迭代 b.flat
for ele in b.flat:
    print(ele)
0.2
1.2
2.2
3.2
10.2
11.2
12.2
13.2
20.2
21.2
22.2
23.2
30.2
31.2
32.2
33.2
40.2
41.2
42.2
43.2

运算

np数据类型,是对每个元素分别运算。

使用numpy,能减少了for循环的使用。

算术运算

In [39]:
a=np.array([10,20,40,30])
b=np.arange(4)
print(a,b) #[10 20 40 30] [0 1 2 3]

np.add(a, b) #逐位相加
[10 20 40 30] [0 1 2 3]
Out[39]:
array([10, 21, 42, 33])
In [40]:
#加减法,就是逐位运算
c=a-b
print(c)
[10 19 38 27]
In [41]:
a*b #逐位相乘
Out[41]:
array([ 0, 20, 80, 90])
In [42]:
b**2 #乘方
Out[42]:
array([0, 1, 4, 9], dtype=int32)
In [43]:
10*np.sin(a) #三角函数 
Out[43]:
array([-5.44021111,  9.12945251,  7.4511316 , -9.88031624])
In [44]:
X=np.array([1,2,3]) #幂运算
print(np.exp(X))

np.power(2, [0,1,2,3,4,5])
[ 2.71828183  7.3890561  20.08553692]
Out[44]:
array([ 1,  2,  4,  8, 16, 32], dtype=int32)
In [45]:
np.sqrt(b) #开方
Out[45]:
array([0.        , 1.        , 1.41421356, 1.73205081])
In [46]:
#逻辑运算
b>2
Out[46]:
array([False, False, False,  True])

矩阵的运算

创建随机矩阵

In [47]:
np.random.randint(low=5, high=30, size=(5, 5))
Out[47]:
array([[25, 17, 15, 14, 16],
       [27,  9, 11, 27, 26],
       [25, 23, 16, 15, 11],
       [16, 12, 20,  5, 19],
       [13, 13, 11,  6, 15]])

单位矩阵 np.eye(n)

In [48]:
np.eye(4)
Out[48]:
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

点乘 - 矩阵的乘法(加权和)

In [49]:
count=np.array([1,2,3])
price=np.array([20,3,5])

count.dot(price.T)
Out[49]:
41
In [50]:
count.T.dot(price)
Out[50]:
41
In [51]:
count.T #一维没有转置的概念?
Out[51]:
array([1, 2, 3])
In [52]:
#二维行向量,A.BT是数,AT.B是矩阵
count=np.array([[1,2,3]])
price=np.array([[20,3,5]])

count.dot(price.T)
Out[52]:
array([[41]])
In [53]:
count.T.dot(price)
Out[53]:
array([[20,  3,  5],
       [40,  6, 10],
       [60,  9, 15]])
In [54]:
A = np.array( [[1,1,0],
             [0,1,2]] )
B = np.array( [[2,0],
             [3,4],
              [5,6]] )
print(A)
print()

print(B)
[[1 1 0]
 [0 1 2]]

[[2 0]
 [3 4]
 [5 6]]
In [55]:
print(A@B) #或者 
A.dot(B)
[[ 5  4]
 [13 16]]
Out[55]:
array([[ 5,  4],
       [13, 16]])

矩阵的逆

In [56]:
import numpy as np
a=np.array([
    [1,2,3],
    [4,5,6],
    [7,8,0]
])
print(a)
[[1 2 3]
 [4 5 6]
 [7 8 0]]
In [57]:
## 变为矩阵
matA=np.mat(a)
b=matA.I

print(matA)
b
[[1 2 3]
 [4 5 6]
 [7 8 0]]
Out[57]:
matrix([[-1.77777778,  0.88888889, -0.11111111],
        [ 1.55555556, -0.77777778,  0.22222222],
        [-0.11111111,  0.22222222, -0.11111111]])
In [58]:
type(matA)
Out[58]:
numpy.matrix
In [59]:
matA@b #矩阵*其逆矩阵 = 单位矩阵
Out[59]:
matrix([[ 1.00000000e+00,  5.55111512e-17,  1.38777878e-17],
        [ 5.55111512e-17,  1.00000000e+00,  2.77555756e-17],
        [ 1.77635684e-15, -8.88178420e-16,  1.00000000e+00]])
In [60]:
matA@b-np.eye(3)
Out[60]:
matrix([[-1.11022302e-16,  5.55111512e-17,  1.38777878e-17],
        [ 5.55111512e-17,  0.00000000e+00,  2.77555756e-17],
        [ 1.77635684e-15, -8.88178420e-16,  0.00000000e+00]])

集合运算

In [61]:
# 删除b数组中出现过的元素,就是求差集
a=np.array([1,2,3,400])
b=np.array([2,3,4,500])
c=np.setdiff1d(a,b) #a-b的差集
print(a,b,c)
[  1   2   3 400] [  2   3   4 500] [  1 400]

统计函数(指定轴)

In [62]:
np.random.seed(1)
a=np.random.random([2,3])
print(a)

print('sum=', a.sum())
print('mean=', a.mean())
print('min=', a.min())
print('max=', a.max())
[[4.17022005e-01 7.20324493e-01 1.14374817e-04]
 [3.02332573e-01 1.46755891e-01 9.23385948e-02]]
sum= 1.6788879311798277
mean= 0.27981465519663795
min= 0.00011437481734488664
max= 0.7203244934421581
In [63]:
b=np.arange(12).reshape(4,3)
print(b)

print( b.sum() ) #总和 66
print('axis=0:', b.sum(axis=0) ) #列求和
print('axis=1:',  b.sum(axis=1) ) #行求和

b.cumsum(axis=1) #每一行的累加
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
66
axis=0: [18 22 26]
axis=1: [ 3 12 21 30]
Out[63]:
array([[ 0,  1,  3],
       [ 3,  7, 12],
       [ 6, 13, 21],
       [ 9, 19, 30]], dtype=int32)
In [64]:
print(b.sum(axis=0) )  # sum of each column
print(b.sum(axis=1) ) # sum of each row

print( b.min(axis=1) ) # min of each row


#
b.cumsum(axis=1)  # cumulative sum along each row
[18 22 26]
[ 3 12 21 30]
[0 3 6 9]
Out[64]:
array([[ 0,  1,  3],
       [ 3,  7, 12],
       [ 6, 13, 21],
       [ 9, 19, 30]], dtype=int32)
In [65]:
print(b)
print(b.ndim) #2 维度
print(b.shape) #(4,3) 4行3列
print(b.size) #12
print(b.dtype) #int64

print('='*10)
print(b.itemsize) #8 等价于下句
print(b.dtype.itemsize) #8

print(b.data) #<memory at 0x0000025D17EDB708> 不常用
print( id(b) )
print(type(b)) #<class 'numpy.ndarray'>
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
2
(4, 3)
12
int32
==========
4
4
<memory at 0x000001BEDEE568B8>
1919297979376
<class 'numpy.ndarray'>

矩阵变形

有 reshape, resize, ravel 等几个函数。

In [66]:
a=np.arange(12).reshape((3,4))
print(a.shape)
a
(3, 4)
Out[66]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
In [67]:
#返回摊平的数组,但是不改变原始数组
#展开的时候,是C语言风格,就是最右侧的下标变动最快
a.ravel() 
Out[67]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [68]:
a.reshape([6,2])
Out[68]:
array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])
In [69]:
a.reshape(6,2) #参数中加不加[]都一样
Out[69]:
array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])
In [70]:
print(a.T) # returns the array, transposed 转置

print("转置前", a.shape)
print("转置后", a.T.shape)
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
转置前 (3, 4)
转置后 (4, 3)
In [71]:
# reshape 的一个参数如果是-1,则会根据另一个参数会自动计算
a.reshape(6, -1)
Out[71]:
array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])
In [72]:
# reshape 不改变原始矩阵,会返回一个新矩阵的 view。
# 但是 resize 则直接改变原始矩阵
a.resize(2,6)
a
Out[72]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

数组拼接

In [73]:
a=np.arange(4).reshape(2,2)
b=np.arange(10,14).reshape(2,-1)
print(a,"\n\n", b)
[[0 1]
 [2 3]] 

 [[10 11]
 [12 13]]
In [74]:
np.vstack( (a,b) ) #垂直合并
Out[74]:
array([[ 0,  1],
       [ 2,  3],
       [10, 11],
       [12, 13]])
In [75]:
np.hstack( (a,b) ) #水平合并
Out[75]:
array([[ 0,  1, 10, 11],
       [ 2,  3, 12, 13]])
In [76]:
# column_stack 能把1d 数组 当做列 合并为2d数组。对于2d数组,它和hstack输出是一样的。
np.column_stack( (a,b) )  # with 2D arrays, 就是 hstack
Out[76]:
array([[ 0,  1, 10, 11],
       [ 2,  3, 12, 13]])
In [77]:
np.row_stack( (a,b) ) #对于任意输入,都和vstack一样
Out[77]:
array([[ 0,  1],
       [ 2,  3],
       [10, 11],
       [12, 13]])
In [78]:
# 对于输入1d数组, column_stack 和 hstack 结果不一样。
a1=np.array([1,2,3])
a2=np.array([4,5,6])
print(a1, "\n")
print(a2, "\n")
print(np.column_stack((a1,  a2))) #把每个1d数组当做一列,输出2d数组
print(np.hstack( (a1, a2) )) #把数组水平拼接起来
[1 2 3] 

[4 5 6] 

[[1 4]
 [2 5]
 [3 6]]
[1 2 3 4 5 6]
In [79]:
print(np.column_stack is np.hstack) #F 对于输入2d数组时,输出没有差异。输入1d数组时,输出不一样。
print(np.row_stack is np.vstack) #T
False
True
In [80]:
help(np.concatenate)
Help on function concatenate in module numpy:

concatenate(...)
    concatenate((a1, a2, ...), axis=0, out=None)
    
    Join a sequence of arrays along an existing axis.
    
    Parameters
    ----------
    a1, a2, ... : sequence of array_like
        The arrays must have the same shape, except in the dimension
        corresponding to `axis` (the first, by default).
    axis : int, optional
        The axis along which the arrays will be joined.  If axis is None,
        arrays are flattened before use.  Default is 0.
    out : ndarray, optional
        If provided, the destination to place the result. The shape must be
        correct, matching that of what concatenate would have returned if no
        out argument were specified.
    
    Returns
    -------
    res : ndarray
        The concatenated array.
    
    See Also
    --------
    ma.concatenate : Concatenate function that preserves input masks.
    array_split : Split an array into multiple sub-arrays of equal or
                  near-equal size.
    split : Split array into a list of multiple sub-arrays of equal size.
    hsplit : Split array into multiple sub-arrays horizontally (column wise).
    vsplit : Split array into multiple sub-arrays vertically (row wise).
    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
    stack : Stack a sequence of arrays along a new axis.
    block : Assemble arrays from blocks.
    hstack : Stack arrays in sequence horizontally (column wise).
    vstack : Stack arrays in sequence vertically (row wise).
    dstack : Stack arrays in sequence depth wise (along third dimension).
    column_stack : Stack 1-D arrays as columns into a 2-D array.
    
    Notes
    -----
    When one or more of the arrays to be concatenated is a MaskedArray,
    this function will return a MaskedArray object instead of an ndarray,
    but the input masks are *not* preserved. In cases where a MaskedArray
    is expected as input, use the ma.concatenate function from the masked
    array module instead.
    
    Examples
    --------
    >>> a = np.array([[1, 2], [3, 4]])
    >>> b = np.array([[5, 6]])
    >>> np.concatenate((a, b), axis=0)
    array([[1, 2],
           [3, 4],
           [5, 6]])
    >>> np.concatenate((a, b.T), axis=1)
    array([[1, 2, 5],
           [3, 4, 6]])
    >>> np.concatenate((a, b), axis=None)
    array([1, 2, 3, 4, 5, 6])
    
    This function will not preserve masking of MaskedArray inputs.
    
    >>> a = np.ma.arange(3)
    >>> a[1] = np.ma.masked
    >>> b = np.arange(2, 5)
    >>> a
    masked_array(data=[0, --, 2],
                 mask=[False,  True, False],
           fill_value=999999)
    >>> b
    array([2, 3, 4])
    >>> np.concatenate([a, b])
    masked_array(data=[0, 1, 2, 2, 3, 4],
                 mask=False,
           fill_value=999999)
    >>> np.ma.concatenate([a, b])
    masked_array(data=[0, --, 2, 2, 3, 4],
                 mask=[False,  True, False, False, False, False],
           fill_value=999999)

In [81]:
a=np.arange(5)
b=np.array([11,22,33])
print(a,b)
[0 1 2 3 4] [11 22 33]
In [82]:
print(np.concatenate((a,b) ) )
np.concatenate((a,b),axis=0) #axis沿着x轴(默认)方向合并数组
[ 0  1  2  3  4 11 22 33]
Out[82]:
array([ 0,  1,  2,  3,  4, 11, 22, 33])
In [83]:
a1=np.array([[1,2],[3,4]])
a2=np.array([[10,20]])
print('a1=',a1)
print('a2=',a2)
#
np.concatenate( (a1,a2), axis=0) #加到第0维后面
a1= [[1 2]
 [3 4]]
a2= [[10 20]]
Out[83]:
array([[ 1,  2],
       [ 3,  4],
       [10, 20]])
In [84]:
np.concatenate( (a1,a2.T), axis=1) #加到第一维后面
Out[84]:
array([[ 1,  2, 10],
       [ 3,  4, 20]])

多维数组的拼接

In [85]:
# 对一组多维数据拼接
a1=np.linspace(1,24,24).reshape(2,3,4)
a1
a2=np.linspace(-1,-24,24).reshape(2,3,4)
a2
a3=np.linspace(1,24,24).reshape(2,3,4)
a3
Out[85]:
array([[[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]],

       [[13., 14., 15., 16.],
        [17., 18., 19., 20.],
        [21., 22., 23., 24.]]])
In [86]:
s=list()
s.append(a1)
s.append(a2)
s.append(a3)
print(s)
[array([[[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]],

       [[13., 14., 15., 16.],
        [17., 18., 19., 20.],
        [21., 22., 23., 24.]]]), array([[[ -1.,  -2.,  -3.,  -4.],
        [ -5.,  -6.,  -7.,  -8.],
        [ -9., -10., -11., -12.]],

       [[-13., -14., -15., -16.],
        [-17., -18., -19., -20.],
        [-21., -22., -23., -24.]]]), array([[[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]],

       [[13., 14., 15., 16.],
        [17., 18., 19., 20.],
        [21., 22., 23., 24.]]])]
In [87]:
ei=np.concatenate(s, axis=1)
ei
Out[87]:
array([[[  1.,   2.,   3.,   4.],
        [  5.,   6.,   7.,   8.],
        [  9.,  10.,  11.,  12.],
        [ -1.,  -2.,  -3.,  -4.],
        [ -5.,  -6.,  -7.,  -8.],
        [ -9., -10., -11., -12.],
        [  1.,   2.,   3.,   4.],
        [  5.,   6.,   7.,   8.],
        [  9.,  10.,  11.,  12.]],

       [[ 13.,  14.,  15.,  16.],
        [ 17.,  18.,  19.,  20.],
        [ 21.,  22.,  23.,  24.],
        [-13., -14., -15., -16.],
        [-17., -18., -19., -20.],
        [-21., -22., -23., -24.],
        [ 13.,  14.,  15.,  16.],
        [ 17.,  18.,  19.,  20.],
        [ 21.,  22.,  23.,  24.]]])
In [88]:
es=ei.shape
es
Out[88]:
(2, 9, 4)
In [89]:
output=ei.reshape(es[0]*es[1], -1)
output
Out[89]:
array([[  1.,   2.,   3.,   4.],
       [  5.,   6.,   7.,   8.],
       [  9.,  10.,  11.,  12.],
       [ -1.,  -2.,  -3.,  -4.],
       [ -5.,  -6.,  -7.,  -8.],
       [ -9., -10., -11., -12.],
       [  1.,   2.,   3.,   4.],
       [  5.,   6.,   7.,   8.],
       [  9.,  10.,  11.,  12.],
       [ 13.,  14.,  15.,  16.],
       [ 17.,  18.,  19.,  20.],
       [ 21.,  22.,  23.,  24.],
       [-13., -14., -15., -16.],
       [-17., -18., -19., -20.],
       [-21., -22., -23., -24.],
       [ 13.,  14.,  15.,  16.],
       [ 17.,  18.,  19.,  20.],
       [ 21.,  22.,  23.,  24.]])
In [90]:
output.reshape(es[0], -1)
Out[90]:
array([[  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
         12.,  -1.,  -2.,  -3.,  -4.,  -5.,  -6.,  -7.,  -8.,  -9., -10.,
        -11., -12.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,
         10.,  11.,  12.],
       [ 13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,
         24., -13., -14., -15., -16., -17., -18., -19., -20., -21., -22.,
        -23., -24.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,
         22.,  23.,  24.]])

拆分矩阵 Splitting

In [91]:
a=np.arange(24).reshape(2,12)
print(a)

np.hsplit(a, 3) #水平拆分成3个
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
Out[91]:
[array([[ 0,  1,  2,  3],
        [12, 13, 14, 15]]), array([[ 4,  5,  6,  7],
        [16, 17, 18, 19]]), array([[ 8,  9, 10, 11],
        [20, 21, 22, 23]])]
In [92]:
# Split `a` after the third and the fourth column
np.hsplit(a, (3, 4))
Out[92]:
[array([[ 0,  1,  2],
        [12, 13, 14]]), array([[ 3],
        [15]]), array([[ 4,  5,  6,  7,  8,  9, 10, 11],
        [16, 17, 18, 19, 20, 21, 22, 23]])]
In [93]:
np.vsplit(a, 2) #沿着竖直方向拆分成2个
Out[93]:
[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]]),
 array([[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])]
In [94]:
# 自己指定方向
np.array_split(a, 2, axis=1) #沿着竖直方向,拆分成2个
Out[94]:
[array([[ 0,  1,  2,  3,  4,  5],
        [12, 13, 14, 15, 16, 17]]), array([[ 6,  7,  8,  9, 10, 11],
        [18, 19, 20, 21, 22, 23]])]

不复制

In [95]:
a=np.array([
    [1,2,3],
    [4,5,6]
])
b=a
b is a
Out[95]:
True
In [96]:
# 函数传参不复制
def f3(x):
    print(id(x))

print(id(a))
f3(a)
1919298081296
1919298081296

浅拷贝

In [97]:
c=a.view() #复制一个副本
c is a
Out[97]:
False
In [98]:
c.base is a   # c is a view of the data owned by a ## bug 文档是 True
Out[98]:
True
In [99]:
print(a)
c
[[1 2 3]
 [4 5 6]]
Out[99]:
array([[1, 2, 3],
       [4, 5, 6]])
In [100]:
print(c.flags)

a.flags
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

Out[100]:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False
In [101]:
print(c.flags.owndata)
print(a.flags.owndata)
False
True
In [102]:
c=c.reshape((3,2)) # a's shape doesn't change
print(c.shape)
a.shape
(3, 2)
Out[102]:
(2, 3)
In [103]:
a[1,:]=1234

c #也随着a而改变
Out[103]:
array([[   1,    2],
       [   3, 1234],
       [1234, 1234]])
In [104]:
a=a.reshape(2,3)
print(a)
c
[[   1    2    3]
 [1234 1234 1234]]
Out[104]:
array([[   1,    2],
       [   3, 1234],
       [1234, 1234]])
In [105]:
# reshape 返回的是一个view
a0=np.arange(12)
a1=a0.reshape(2,6)
print(a1)
print(a1.base is a0)
a1.base
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
True
Out[105]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [106]:
# 切片返回的也是一个view
s=a[:, 1:2]
s[:]=10 #对s的修改,能影响到a
print(s)
a
[[10]
 [10]]
Out[106]:
array([[   1,   10,    3],
       [1234,   10, 1234]])
In [107]:
# 对a的修改也能影响到切片 s
a[0,1]=1000
print(a)
s
[[   1 1000    3]
 [1234   10 1234]]
Out[107]:
array([[1000],
       [  10]])

深拷贝

In [108]:
a=np.array([
    [1,2],
    [3,4]
])
a
Out[108]:
array([[1, 2],
       [3, 4]])
In [109]:
d = a.copy()  # a new array object with new data is created
d is a
Out[109]:
False
In [110]:
d.base is a
Out[110]:
False
In [111]:
d[0,0]=1000 #对d的修改不影响a
print(d)
a
[[1000    2]
 [   3    4]]
Out[111]:
array([[1, 2],
       [3, 4]])
In [112]:
#如果只想保留切片,删除原始数据,则删除前需要 copy 一下。否则被引用着就一直删不掉
a=np.arange(int(1e5))
b=a[:10].copy
del a
b
Out[112]:
<function ndarray.copy>