运算#
import pandas as pd
import numpy as np
Dataframe运算#
df = pd.DataFrame(np.random.rand(4, 3), index=['a', 'b', 'c', 'd'], columns=['first', 'second', 'third'])
df
first | second | third | |
---|---|---|---|
a | 0.753379 | 0.518270 | 0.843628 |
b | 0.415295 | 0.641808 | 0.183964 |
c | 0.458310 | 0.527492 | 0.628082 |
d | 0.123897 | 0.896220 | 0.021434 |
# point-wise
np.exp(df)
first | second | third | |
---|---|---|---|
a | 2.124166 | 1.679120 | 2.324786 |
b | 1.514818 | 1.899914 | 1.201973 |
c | 1.581400 | 1.694677 | 1.874012 |
d | 1.131899 | 2.450325 | 1.021666 |
列间运算#
df['minus'] = df['first'] - df['second']
df
first | second | third | minus | |
---|---|---|---|---|
a | 0.753379 | 0.518270 | 0.843628 | 0.235109 |
b | 0.415295 | 0.641808 | 0.183964 | -0.226513 |
c | 0.458310 | 0.527492 | 0.628082 | -0.069182 |
d | 0.123897 | 0.896220 | 0.021434 | -0.772324 |
df['quadratic'] = df['third'] ** 2
df
first | second | third | minus | quadratic | |
---|---|---|---|---|---|
a | 0.753379 | 0.518270 | 0.843628 | 0.235109 | 0.711708 |
b | 0.415295 | 0.641808 | 0.183964 | -0.226513 | 0.033843 |
c | 0.458310 | 0.527492 | 0.628082 | -0.069182 | 0.394487 |
d | 0.123897 | 0.896220 | 0.021434 | -0.772324 | 0.000459 |
# 使用drop函数删列、行
df.drop(columns=['minus', 'quadratic'], inplace=True)
df
first | second | third | |
---|---|---|---|
a | 0.753379 | 0.518270 | 0.843628 |
b | 0.415295 | 0.641808 | 0.183964 |
c | 0.458310 | 0.527492 | 0.628082 |
d | 0.123897 | 0.896220 | 0.021434 |
replace和apply函数#
df['third'] = 1
# 用100代替1
df.replace(1, 100, inplace=True)
df
first | second | third | |
---|---|---|---|
a | 0.753379 | 0.518270 | 100 |
b | 0.415295 | 0.641808 | 100 |
c | 0.458310 | 0.527492 | 100 |
d | 0.123897 | 0.896220 | 100 |
# 每个元素开平方
df.apply(lambda x: x ** 0.5)
first | second | third | |
---|---|---|---|
a | 0.867974 | 0.719910 | 10.0 |
b | 0.644434 | 0.801130 | 10.0 |
c | 0.676986 | 0.726287 | 10.0 |
d | 0.351990 | 0.946689 | 10.0 |
# 统计各列的和
df.apply(np.sum, axis=1)
a 101.271649
b 101.057103
c 100.985803
d 101.020117
dtype: float64
合并Dataframe#
df1 = pd.DataFrame(np.random.randint(0, 10, (2, 3)), index=np.arange(2), columns=['a', 'b', 'c'])
df1
a | b | c | |
---|---|---|---|
0 | 5 | 8 | 6 |
1 | 8 | 6 | 2 |
df2 = pd.DataFrame(np.random.randint(5, 15, (3, 3)), index=np.arange(4, 7), columns=['a', 'b', 'c'])
df2
a | b | c | |
---|---|---|---|
4 | 6 | 14 | 11 |
5 | 9 | 11 | 13 |
6 | 9 | 5 | 6 |
# concat合并
pd.concat([df1, df2])
a | b | c | |
---|---|---|---|
0 | 5 | 8 | 6 |
1 | 8 | 6 | 2 |
4 | 6 | 14 | 11 |
5 | 9 | 11 | 13 |
6 | 9 | 5 | 6 |