# 索引和遍历

In [1]:
import pandas as pd
import numpy as np

## 以字典的方式索引

In [2]:
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df

Unnamed: 0,first,second
a,0.254904,0.054261
b,0.454977,0.87699
c,0.870276,0.734116


In [3]:
"""
可直接取某列
不能直接取某行，如df['a']会报错
"""
df['first']

a    0.254904
b    0.454977
c    0.870276
Name: first, dtype: float64

In [4]:
# 先column，再index
df['first']['b']

0.45497650367236453

## loc与iloc

In [5]:
# 使用loc取某行
df.loc['a']

first     0.254904
second    0.054261
Name: a, dtype: float64

In [6]:
# 使用loc时，先index，再column
df.loc[:'b', :'first']

Unnamed: 0,first
a,0.254904
b,0.454977


In [7]:
# 使用iloc时，索引方式和数组相同
df.iloc[:2, :2]

Unnamed: 0,first,second
a,0.254904,0.054261
b,0.454977,0.87699


In [8]:
# 修改数值
df.iloc[2, 1] = 100
df

Unnamed: 0,first,second
a,0.254904,0.054261
b,0.454977,0.87699
c,0.870276,100.0


In [9]:
# 直接修改df.values
df.values[0, 1] = 1
df

Unnamed: 0,first,second
a,0.254904,1.0
b,0.454977,0.87699
c,0.870276,100.0


## 按条件索引

In [10]:
df[df['second'] >= 0.5]

Unnamed: 0,first,second
a,0.254904,1.0
b,0.454977,0.87699
c,0.870276,100.0


In [11]:
df[(df['first'] >= 0.5) & (df['first'] <= 1.0)]

Unnamed: 0,first,second
c,0.870276,100.0


## 遍历dataframe

In [12]:
# 按行遍历
for x, y in df.iterrows():
    # index
    print(x)
    # 此行的series
    print(y)
    print(y['second'])

a
first     0.254904
second    1.000000
Name: a, dtype: float64
1.0
b
first     0.454977
second    0.876990
Name: b, dtype: float64
0.8769898565356357
c
first       0.870276
second    100.000000
Name: c, dtype: float64
100.0


In [13]:
# 按列遍历
for x, y in df.items():
    # column
    print(x)
    # 此列的series
    print(y)

first
a    0.254904
b    0.454977
c    0.870276
Name: first, dtype: float64
second
a      1.00000
b      0.87699
c    100.00000
Name: second, dtype: float64
