Numpy
介紹
Numpy 是 N 維 array 的 library, 實做眾多相關的操作, 而發展到現在幾乎已經是 Python 科學運算相關的公認 API, 就算發展新一套 library 也常常會跟 Numpy 具有一定程度的 API 相容。
Numpy API 相容的其他 library:
- CuPy: Numpy API for CUDA
- Sparse: Numpy API for sparse arrays
- Dask: Numpy API in parallel for multi-core workstations or distributed clusters
Numpy Slicing
>>> import numpy as np
>>> data = np.mat('1 2 3; 4 5 6; 7 8 9')
>>> data
matrix([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> data[0, 1] # data[0][1]
2
>>> data[:, 0] # row : all, col : 0
matrix([[1],
[4],
[7]])
>>> data[1:, 1:]
matrix([[5, 6],
[8, 9]])
>>> data[:, (0, 2)] # row : all, col : (0, 2)
matrix([[1, 3],
[4, 6],
[7, 9]])
>>> data[1:-1, 1:-1]
matrix([[5]])
>>> data[(0, 2), (1, 1)] # parameters : rows & cols, access (0, 1) & (2, 1)
matrix([[2, 8]])
>>> data[(0, 2),
... (1, 1)]
matrix([[2, 8]])
>>> data[[1, 0]]
matrix([[4, 5, 6],
[1, 2, 3]])
>>> data[[[1, 0]]] # the same
matrix([[4, 5, 6],
[1, 2, 3]])
>>> data[[[[1, 0]]]]
matrix([[[4, 5, 6],
[1, 2, 3]]])
>>> data[((0, 0), (1, 2)), # rows
... ((2, 1), (2, 1))] # cols
matrix([[3, 2],
[6, 8]])
>>> data[(((0, 0), (1, 2)), ((0, 0), (1, 2))),
... (((2, 1), (2, 1)), ((2, 1), (2, 1)))]
matrix([[[3, 2],
[6, 8]],
[[3, 2],
[6, 8]]])
>>> data[ np.mat('0 0; 1 2') ]
matrix([[[1, 2, 3],
[1, 2, 3]],
[[4, 5, 6],
[7, 8, 9]]])
Numpy get n smallest data
np.partition(a, k)
會把 a 的 k-th smallest element 放在 a[k],
較小的值放在 a[:k],較大的值放在 a[k+1:],
index 從 0 開始 (所以第五小的值會在 index 4)
>>> import numpy as np
>>> data = np.array([7, 8, 1, 2, 5, 9, 0])
>>> data
array([7, 8, 1, 2, 5, 9, 0])
>>> np.argpartition(data, 0) # indices
array([6, 1, 2, 3, 4, 5, 0])
>>> data[np.argpartition(data, 0)] # guarantee the smallest value is at index 0
array([0, 8, 1, 2, 5, 9, 7])
>>> data[np.argpartition(data, 0)[0]] # smallest
0
>>> np.argpartition(data, 3) # indices
array([2, 6, 3, 4, 0, 5, 1])
>>> data[np.argpartition(data, 3)] # guarantee the 4th smallest value is at index 3
array([1, 0, 2, 5, 7, 9, 8])
>>> data[np.argpartition(data, 3)[3]] # 4th smallest
5
>>> np.partition(data, 3) # new array, like ``data[np.argpartition(data, 3)]``
array([1, 0, 2, 5, 7, 9, 8])
>>> np.partition(data, 3)[3] # 4th smallest
5
>>> data = np.mat('2 3 1; 5 6 4; 8 9 7')
>>> data
matrix([[2, 3, 1],
[5, 6, 4],
[8, 9, 7]])
>>> np.partition(data, 0)
matrix([[1, 3, 2],
[4, 6, 5],
[7, 9, 8]])
>>> np.partition(data, 0)[:, 0]
matrix([[1],
[4],
[7]])
>>> data = np.array([[7, 8, 1, 2, 5, 9, 0], [7, 8, 1, 2, 5, 9, 0]])
>>> data
array([[7, 8, 1, 2, 5, 9, 0],
[7, 8, 1, 2, 5, 9, 0]])
>>> np.argpartition(data, 0)
array([[6, 1, 2, 3, 4, 5, 0],
[6, 1, 2, 3, 4, 5, 0]])
>>> data.flat # flat iterator
<numpy.flatiter object at 0x13c04b0>
>>> np.argpartition(data.flat, 0)
array([ 6, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13])
>>> data.flat[np.argpartition(data.flat, 0)[0]]
0
>>> np.argpartition(data.flat, 1)
array([ 6, 13, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 1])
>>> data.flat[np.argpartition(data.flat, 1)[1]]
0
Assign with condition
>>> import numpy as np
>>> data = np.array(range(10))
>>> data
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> data > 5
array([False, False, False, False, False, False, True, True, True, True], dtype=bool)
>>> data[data > 5]
array([6, 7, 8, 9])
>>> data[data > 5] = -1
>>> data
array([ 0, 1, 2, 3, 4, 5, -1, -1, -1, -1])
Extend Numpy Array With Values
>>> import numpy as np
>>> data = np.array([range(5)]*3)
>>> print(data)
array([[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]])
>>> np.pad(data, ((0, 2), (2, 0)), 'constant', constant_values=9)
array([[9, 9, 0, 1, 2, 3, 4],
[9, 9, 0, 1, 2, 3, 4],
[9, 9, 0, 1, 2, 3, 4],
[9, 9, 9, 9, 9, 9, 9],
[9, 9, 9, 9, 9, 9, 9]])
Print The Full Numpy Array
>>> import numpy
>>> data = numpy.array(range(10000))
>>> print(data)
[ 0 1 2 ..., 9997 9998 9999]
>>> numpy.set_printoptions(threshold=numpy.inf)
>>> print(data) # print all the data
Generate An Array By Specific Function And Index
>>> import numpy
>>> numpy.fromfunction(lambda x, y: x+y, (3, 3), dtype=int)
array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4]])
>>> numpy.fromfunction(lambda x, y: (-1)**(x+y), (3, 3), dtype=int)
array([[ 1, -1, 1],
[-1, 1, -1],
[ 1, -1, 1]])
#
# numpy.fromfunction
#
# function accept coordinate
# shape of array you want
“Complex” Numpy Array To “Real” Numpy Array
>>> data = np.array([i+i*2j for i in range(10)])
>>> data
array([ 0. +0.j, 1. +2.j, 2. +4.j, 3. +6.j, 4. +8.j, 5.+10.j,
6.+12.j, 7.+14.j, 8.+16.j, 9.+18.j])
>>> data.real
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
>>> data.imag
array([ 0., 2., 4., 6., 8., 10., 12., 14., 16., 18.])
Insert Numpy Array
>>> data = np.array([range(5), range(5)])
>>> data
array([[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]])
>>> np.insert(data, slice(None, None), 42, axis=1)
array([[42, 0, 42, 1, 42, 2, 42, 3, 42, 4],
[42, 0, 42, 1, 42, 2, 42, 3, 42, 4]])
檢查使用狀況
>>> a = np.array((range(10),range(10)))
>>> np.who()
Name Shape Bytes Type
===========================================================
a 2 x 10 160 int64
Upper bound on total bytes = 160
自己為 NumPy Array 加額外 Attributes
# 測試資料
old_array = np.array([1, 2, 3])
# 客製化 class
# 用於增加 Attributes
class MyNumpyClass(np.ndarray):
myattribute = 42
# 直接利用原本的 NumPy Array (用同一塊記憶體)
new_array = MyNumpyClass(old_array.shape,
buffer=old_array,
dtype=old_array.dtype)
new_array[0] = 42
print(new_array)
print(old_array) # 發現跟著一起變動
old_array[1] = 5566
print(new_array) # 發現跟著一起變動
print(old_array)
print(new_array.myattribute) # 42
# 以下可以把 new_array 傳給任何 NumPy 內的 Function 做操作
NumPy Array 轉 Bytes
>>> import numpy
>>> data = numpy.array(range(10))
>>> data.tobytes()
b'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00'
>>> data.view('S8') # S 表示 bytestring
array([b'', b'\x01', b'\x02', b'\x03', b'\x04', b'\x05', b'\x06', b'\x07',
b'\x08', b'\t'],
dtype='|S8')
效能
不管幾維的資料,記憶體內的儲存方式都是一維, 存放順序則會跟 layout 方式有關, C-order 是 Row Major Order, 會依序先存 row 1 再存 row 2, Fortran-order 是 Column Major Order, 會依序先存 column 1 再存 column 2。
import numpy as np
col_major = np.zeros((10,10), order='C') # C
row_major = np.zeros((10,10), order='F') # Fortran
講求效能的話要注意用的 method 是 copy 還是 view 版本。
> import numpy as np
> a = np.zeros((200, 200), order='C')
> b = np.zeros((200, 200), order='F')
> np.concatenate((a, a), axis=0)
> np.concatenate((b, b), axis=0)
# 在 IPython 內
> %timeit np.concatenate((a, a), axis=0)
10000 loops, best of 3: 20.8 µs per loop
> %timeit np.concatenate((b, b), axis=0)
10000 loops, best of 3: 32.8 µs per loop
> %timeit np.vstack((a, a))
10000 loops, best of 3: 23.8 µs per loop
> %timeit np.vstack((b, b))
10000 loops, best of 3: 34.8 µs per loop
> %timeit a.ravel() # flat, no copy
1000000 loops, best of 3: 234 ns per loop # 1 ns = 0.001 µs
> %timeit a.flatten() # flat, copy
100000 loops, best of 3: 10.3 µs per loop