变更行和列#

import pandas as pd
import numpy as np

修改行和列的名称#

df = pd.read_csv('sample.csv', index_col=0)
df
duration playtime up favorite comment share
a 8 66 30 34 8 40
b 59 84 47 31 50 91
c 31 18 78 64 28 57
d 19 47 20 8 12 96
e 99 64 57 19 33 63
f 73 82 48 44 57 4
g 25 65 98 9 83 97
h 96 31 66 13 13 29
df = df[df['up'] != 75]
df
duration playtime up favorite comment share
a 8 66 30 34 8 40
b 59 84 47 31 50 91
c 31 18 78 64 28 57
d 19 47 20 8 12 96
e 99 64 57 19 33 63
f 73 82 48 44 57 4
g 25 65 98 9 83 97
h 96 31 66 13 13 29
df.columns
Index(['duration', 'playtime', 'up', 'favorite', 'comment', 'share'], dtype='object')
# 可直接修改列名,行也是一样
df.columns = np.arange(6)
df
0 1 2 3 4 5
a 8 66 30 34 8 40
b 59 84 47 31 50 91
c 31 18 78 64 28 57
d 19 47 20 8 12 96
e 99 64 57 19 33 63
f 73 82 48 44 57 4
g 25 65 98 9 83 97
h 96 31 66 13 13 29

修改行、列的顺序#

# 使用reindex修改行的顺序
df.reindex(['c', 'd', 'e', 'f', 'g', 'h', 'xx', 'yy'])
0 1 2 3 4 5
c 31.0 18.0 78.0 64.0 28.0 57.0
d 19.0 47.0 20.0 8.0 12.0 96.0
e 99.0 64.0 57.0 19.0 33.0 63.0
f 73.0 82.0 48.0 44.0 57.0 4.0
g 25.0 65.0 98.0 9.0 83.0 97.0
h 96.0 31.0 66.0 13.0 13.0 29.0
xx NaN NaN NaN NaN NaN NaN
yy NaN NaN NaN NaN NaN NaN
# 直接修改列的顺序
df[[5, 4, 3, 2, 1, 0]]
5 4 3 2 1 0
a 40 8 34 30 66 8
b 91 50 31 47 84 59
c 57 28 64 78 18 31
d 96 12 8 20 47 19
e 63 33 19 57 64 99
f 4 57 44 48 82 73
g 97 83 9 98 65 25
h 29 13 13 66 31 96

改变索引#

# 使用reset_index重置索引
df.reset_index(drop=False)
index 0 1 2 3 4 5
0 a 8 66 30 34 8 40
1 b 59 84 47 31 50 91
2 c 31 18 78 64 28 57
3 d 19 47 20 8 12 96
4 e 99 64 57 19 33 63
5 f 73 82 48 44 57 4
6 g 25 65 98 9 83 97
7 h 96 31 66 13 13 29
# 使用set_index用现有的列做索引
df.set_index(keys=[0, 1])
2 3 4 5
0 1
8 66 30 34 8 40
59 84 47 31 50 91
31 18 78 64 28 57
19 47 20 8 12 96
99 64 57 19 33 63
73 82 48 44 57 4
25 65 98 9 83 97
96 31 66 13 13 29