Scipy and Numpy

  • linear algbra linalg
In [3]:
import numpy as np

a1 = np.ones((2,5))
a1
Out[3]:
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])
In [7]:
from scipy import linalg as la
arr = np.array([[1,2],[2,3]])
la.det(arr)
Out[7]:
-1.0

Array

  • append()
  • insert()
  • read()

ndarray

  • ndarray.ndim()
  • ndarray.shape()
  • ndarray.size()
  • ndarray.dtype()
  • ndarray.itemsize()
  • numpy.arange()
  • numpy.random.random((2,2))
  • numpy.linspace()
  • numpy.ones()
  • numpy.zeros()
In [9]:
import numpy as np
np.linspace(1,2,10,endpoint=False)
Out[9]:
array([1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9])
In [10]:
np.fromfunction(lambda i,j:(i+1)*(j+1),(5,5))
Out[10]:
array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  4.,  6.,  8., 10.],
       [ 3.,  6.,  9., 12., 15.],
       [ 4.,  8., 12., 16., 20.],
       [ 5., 10., 15., 20., 25.]])
In [11]:
arr = np.array([[1,2,3],[4,5,6]])
arr[0:2]
Out[11]:
array([[1, 2, 3],
       [4, 5, 6]])
In [13]:
arr = np.array([[1,2,3],[4,5,6]])
arr[:,0:2]
Out[13]:
array([[1, 2],
       [4, 5]])
In [15]:
for row in arr:
    print(row)
[1 2 3]
[4 5 6]

Difference between reshape and resize

In [16]:
arr.reshape(3,2)
arr
Out[16]:
array([[1, 2, 3],
       [4, 5, 6]])
In [17]:
arr.resize(3,2)
arr
Out[17]:
array([[1, 2],
       [3, 4],
       [5, 6]])
In [25]:
arr1 = np.arange(0,16).reshape(4,4)
arr1
Out[25]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
In [26]:
arr1.reshape(2,-1)
Out[26]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])
  • arr.hstack and arr.vstack
In [30]:
a1 = np.array([1,2,3])
a2 = np.array([4,5,6])
np.vstack((a1,a2))
Out[30]:
array([[1, 2, 3],
       [4, 5, 6]])

Arithmetic operator

  • '+', '-', '*', '/', and so on.
  • .sum()
  • .sum(axis=0)
  • .sum(axis=1)
  • .min()
  • .argmax() # return the index of max
  • .mean()
  • .var()
  • .std()
In [32]:
a3 = np.array([[4,5,6],[7,8,9]])
a1 + a3
Out[32]:
array([[ 5,  7,  9],
       [ 8, 10, 12]])
In [33]:
a3+2
Out[33]:
array([[ 6,  7,  8],
       [ 9, 10, 11]])
In [36]:
a3.sum(axis=0)
Out[36]:
array([11, 13, 15])
In [40]:
a3.argmax() 
Out[40]:
5

linear algbra in numpy

  • np.linalg.det(x)
  • np.linalg.inv(x)
  • np.dot(x,y)

ufunc (universal function)

  • apply_along_axis
  • bincount
  • argsort
  • argmin
  • ceil
  • clip
  • comsum
  • comprod
  • exp
  • cross
  • corrcoef
In [45]:
import math
import time
import numpy as np

x = np.arange(0,100,0.01)
tm1 = time.process_time()
for i,t in enumerate(x):
    x[i] = math.pow(math.sin(t),2)
tm2 = time.process_time()
    
y = np.arange(0,100,0.01)
tn1 = time.process_time()
y = np.power(np.sin(y),2)
tn2 = time.process_time()

print('running time by math:', tm2-tm1)
print('running time by numpy:', tn2-tn1)
running time by math: 0.015625
running time by numpy: 0.0

Series in Pandas

  • consist of both index and data
  • assign the index by yourself
  • acess a member according to index
  • arithmetic operator
In [48]:
import pandas as pd
s1 = pd.Series([1,2,3,4,'a'])
s1
Out[48]:
0    1
1    2
2    3
3    4
4    a
dtype: object
In [49]:
s2 = pd.Series([1,2,3,'a'], index=[1,2,3,4])
s2
Out[49]:
1    1
2    2
3    3
4    a
dtype: object
In [50]:
s3 = pd.Series([1,2,3,'SUFE'], index=['a','b','c','d'])
s3
Out[50]:
a       1
b       2
c       3
d    SUFE
dtype: object
In [51]:
s3['b']
Out[51]:
2
In [54]:
data = {'B':88, 'A':90,'T':91}
index1 = ['B','A','T','H']
s4 = pd.Series(data,index1)
s4
Out[54]:
B    88.0
A    90.0
T    91.0
H     NaN
dtype: float64
In [55]:
pd.isnull(s4)
Out[55]:
B    False
A    False
T    False
H     True
dtype: bool
In [57]:
s5 = s4
s6 = s4
s4['H'] = 98
s5 + s6
Out[57]:
B    176.0
A    180.0
T    182.0
H    196.0
dtype: float64

Datafram in Pandas

  • consist of index and value
  • df.index
  • df.columns
  • df.values
  • acess clumns: df.name, df['names'], df.iloc[:,1]
  • acess members: df.iloc[0,1]
In [61]:
data = {'names':['wang','li','yang','liu','zhou'],'salaries':[4000,5000,3000,6000,4400]}
df1 = pd.DataFrame(data)
df1
Out[61]:
names salaries
0 wang 4000
1 li 5000
2 yang 3000
3 liu 6000
4 zhou 4400
In [100]:
data = np.array([('wang',4000),('li',5000),('yang',3000),('liu',6000),('zhou',4400)])
df2 = pd.DataFrame(data,index=range(1,6),columns=['names','salaries'])
df2.index
df2.columns
df2.values
Out[100]:
array([['wang', '4000'],
       ['li', '5000'],
       ['yang', '3000'],
       ['liu', '6000'],
       ['zhou', '4400']], dtype=object)
In [101]:
df2.salaries
df2['salaries']
Out[101]:
1    4000
2    5000
3    3000
4    6000
5    4400
Name: salaries, dtype: object
In [102]:
df2.iloc[:,1]
Out[102]:
1    4000
2    5000
3    3000
4    6000
5    4400
Name: salaries, dtype: object
In [81]:
df2.iloc[2,1]
Out[81]:
'3000'
In [85]:
df2.iloc[:2,1]
Out[85]:
1    4000
2    5000
Name: salaries, dtype: object
In [95]:
data = np.array([('wang',4000),('li',5000),('yang',3000),('liu',6000),('zhou',4400)])
df3 = pd.DataFrame(data,index=range(1,6),columns=['names','salaries'])
df3['names'] = 'admin'
df3
Out[95]:
names salaries
1 admin 4000
2 admin 5000
3 admin 3000
4 admin 6000
5 admin 4400
In [96]:
del df3['salaries']
df3
Out[96]:
names
1 admin
2 admin
3 admin
4 admin
5 admin
In [97]:
df2.salaries.min()
Out[97]:
'3000'
In [103]:
df2[df2.salaries>='5000']
Out[103]:
names salaries
2 li 5000
4 liu 6000
In [107]:
np.eye(3)
np.eye(4,k=1)
np.eye(4,k=-1)
Out[107]:
array([[0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]])
In [109]:
np.random.uniform(0,1,5)
Out[109]:
array([0.02100736, 0.47109388, 0.06358623, 0.36710694, 0.15601807])
In [121]:
x1 = np.random.rand(3,5)
x1
#x1.shape[0]
Out[121]:
array([[0.56770254, 0.89093686, 0.93961713, 0.40366624, 0.33346843],
       [0.72013447, 0.01398888, 0.04531643, 0.93985577, 0.67832158],
       [0.40438896, 0.49029484, 0.38928032, 0.66411173, 0.48375558]])
In [123]:
y1 = np.random.choice(np.arange(x1.shape[0]),2,replace=True)
y1
Out[123]:
array([0, 1])
In [125]:
x1[y1]
Out[125]:
array([[0.56770254, 0.89093686, 0.93961713, 0.40366624, 0.33346843],
       [0.72013447, 0.01398888, 0.04531643, 0.93985577, 0.67832158]])
In [126]:
x1[:,y1]
Out[126]:
array([[0.56770254, 0.89093686],
       [0.72013447, 0.01398888],
       [0.40438896, 0.49029484]])
In [135]:
z1 = np.arange(5)
#z1<=2
x1[:,z1<=2]
Out[135]:
array([[0.56770254, 0.89093686, 0.93961713],
       [0.72013447, 0.01398888, 0.04531643],
       [0.40438896, 0.49029484, 0.38928032]])
In [136]:
np.where(z1<=2)
Out[136]:
(array([0, 1, 2], dtype=int64),)
In [137]:
np.where(z1%2)
Out[137]:
(array([1, 3], dtype=int64),)