运算#

import pandas as pd
import numpy as np

Dataframe运算#

df = pd.DataFrame(np.random.rand(4, 3), index=['a', 'b', 'c', 'd'], columns=['first', 'second', 'third'])
df
first second third
a 0.753379 0.518270 0.843628
b 0.415295 0.641808 0.183964
c 0.458310 0.527492 0.628082
d 0.123897 0.896220 0.021434
# point-wise
np.exp(df)
first second third
a 2.124166 1.679120 2.324786
b 1.514818 1.899914 1.201973
c 1.581400 1.694677 1.874012
d 1.131899 2.450325 1.021666

列间运算#

df['minus'] = df['first'] - df['second']
df
first second third minus
a 0.753379 0.518270 0.843628 0.235109
b 0.415295 0.641808 0.183964 -0.226513
c 0.458310 0.527492 0.628082 -0.069182
d 0.123897 0.896220 0.021434 -0.772324
df['quadratic'] = df['third'] ** 2
df
first second third minus quadratic
a 0.753379 0.518270 0.843628 0.235109 0.711708
b 0.415295 0.641808 0.183964 -0.226513 0.033843
c 0.458310 0.527492 0.628082 -0.069182 0.394487
d 0.123897 0.896220 0.021434 -0.772324 0.000459
# 使用drop函数删列、行
df.drop(columns=['minus', 'quadratic'], inplace=True)
df
first second third
a 0.753379 0.518270 0.843628
b 0.415295 0.641808 0.183964
c 0.458310 0.527492 0.628082
d 0.123897 0.896220 0.021434

replace和apply函数#

df['third'] = 1
# 用100代替1
df.replace(1, 100, inplace=True)
df
first second third
a 0.753379 0.518270 100
b 0.415295 0.641808 100
c 0.458310 0.527492 100
d 0.123897 0.896220 100
# 每个元素开平方
df.apply(lambda x: x ** 0.5)
first second third
a 0.867974 0.719910 10.0
b 0.644434 0.801130 10.0
c 0.676986 0.726287 10.0
d 0.351990 0.946689 10.0
# 统计各列的和
df.apply(np.sum, axis=1)
a    101.271649
b    101.057103
c    100.985803
d    101.020117
dtype: float64

合并Dataframe#

df1 = pd.DataFrame(np.random.randint(0, 10, (2, 3)), index=np.arange(2), columns=['a', 'b', 'c'])
df1
a b c
0 5 8 6
1 8 6 2
df2 = pd.DataFrame(np.random.randint(5, 15, (3, 3)), index=np.arange(4, 7), columns=['a', 'b', 'c'])
df2
a b c
4 6 14 11
5 9 11 13
6 9 5 6
# concat合并
pd.concat([df1, df2])
a b c
0 5 8 6
1 8 6 2
4 6 14 11
5 9 11 13
6 9 5 6