索引和遍历#

import pandas as pd
import numpy as np

以字典的方式索引#

df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df
first second
a 0.241914 0.936741
b 0.721130 0.812072
c 0.707245 0.819285
"""
可直接取某列
不能直接取某行,如df['a']会报错
"""
df['first']
a    0.241914
b    0.721130
c    0.707245
Name: first, dtype: float64
# 先column,再index
df['first']['b']
0.7211298502376191

loc与iloc#

# 使用loc取某行
df.loc['a']
first     0.241914
second    0.936741
Name: a, dtype: float64
# 使用loc时,先index,再column
df.loc[:'b', :'first']
first
a 0.241914
b 0.721130
# 使用iloc时,索引方式和数组相同
df.iloc[:2, :2]
first second
a 0.241914 0.936741
b 0.721130 0.812072
# 修改数值
df.iloc[2, 1] = 100
df
first second
a 0.241914 0.936741
b 0.721130 0.812072
c 0.707245 100.000000
# 直接修改df.values
df.values[0, 1] = 1
df
first second
a 0.241914 1.000000
b 0.721130 0.812072
c 0.707245 100.000000

按条件索引#

df[df['second'] >= 0.5]
first second
a 0.241914 1.000000
b 0.721130 0.812072
c 0.707245 100.000000
df[(df['first'] >= 0.5) & (df['first'] <= 1.0)]
first second
b 0.721130 0.812072
c 0.707245 100.000000

遍历dataframe#

# 按行遍历
for x, y in df.iterrows():
    # index
    print(x)
    # 此行的series
    print(y)
    print(y['second'])
a
first     0.241914
second    1.000000
Name: a, dtype: float64
1.0
b
first     0.721130
second    0.812072
Name: b, dtype: float64
0.8120721605424106
c
first       0.707245
second    100.000000
Name: c, dtype: float64
100.0
# 按列遍历
for x, y in df.items():
    # column
    print(x)
    # 此列的series
    print(y)
first
a    0.241914
b    0.721130
c    0.707245
Name: first, dtype: float64
second
a      1.000000
b      0.812072
c    100.000000
Name: second, dtype: float64