{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 索引和遍历"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 以字典的方式索引"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
" 0.054261 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
" 0.876990 | \n",
"
\n",
" \n",
" c | \n",
" 0.870276 | \n",
" 0.734116 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.254904 0.054261\n",
"b 0.454977 0.876990\n",
"c 0.870276 0.734116"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 0.254904\n",
"b 0.454977\n",
"c 0.870276\n",
"Name: first, dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"可直接取某列\n",
"不能直接取某行,如df['a']会报错\n",
"\"\"\"\n",
"df['first']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.45497650367236453"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 先column,再index\n",
"df['first']['b']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## loc与iloc"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"first 0.254904\n",
"second 0.054261\n",
"Name: a, dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用loc取某行\n",
"df.loc['a']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first\n",
"a 0.254904\n",
"b 0.454977"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用loc时,先index,再column\n",
"df.loc[:'b', :'first']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
" 0.054261 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
" 0.876990 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.254904 0.054261\n",
"b 0.454977 0.876990"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用iloc时,索引方式和数组相同\n",
"df.iloc[:2, :2]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
" 0.054261 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
" 0.876990 | \n",
"
\n",
" \n",
" c | \n",
" 0.870276 | \n",
" 100.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.254904 0.054261\n",
"b 0.454977 0.876990\n",
"c 0.870276 100.000000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 修改数值\n",
"df.iloc[2, 1] = 100\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
" 0.87699 | \n",
"
\n",
" \n",
" c | \n",
" 0.870276 | \n",
" 100.00000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.254904 1.00000\n",
"b 0.454977 0.87699\n",
"c 0.870276 100.00000"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 直接修改df.values\n",
"df.values[0, 1] = 1\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 按条件索引"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.254904 | \n",
" 1.00000 | \n",
"
\n",
" \n",
" b | \n",
" 0.454977 | \n",
" 0.87699 | \n",
"
\n",
" \n",
" c | \n",
" 0.870276 | \n",
" 100.00000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.254904 1.00000\n",
"b 0.454977 0.87699\n",
"c 0.870276 100.00000"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['second'] >= 0.5]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" c | \n",
" 0.870276 | \n",
" 100.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"c 0.870276 100.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['first'] >= 0.5) & (df['first'] <= 1.0)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 遍历dataframe"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"first 0.254904\n",
"second 1.000000\n",
"Name: a, dtype: float64\n",
"1.0\n",
"b\n",
"first 0.454977\n",
"second 0.876990\n",
"Name: b, dtype: float64\n",
"0.8769898565356357\n",
"c\n",
"first 0.870276\n",
"second 100.000000\n",
"Name: c, dtype: float64\n",
"100.0\n"
]
}
],
"source": [
"# 按行遍历\n",
"for x, y in df.iterrows():\n",
" # index\n",
" print(x)\n",
" # 此行的series\n",
" print(y)\n",
" print(y['second'])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"first\n",
"a 0.254904\n",
"b 0.454977\n",
"c 0.870276\n",
"Name: first, dtype: float64\n",
"second\n",
"a 1.00000\n",
"b 0.87699\n",
"c 100.00000\n",
"Name: second, dtype: float64\n"
]
}
],
"source": [
"# 按列遍历\n",
"for x, y in df.items():\n",
" # column\n",
" print(x)\n",
" # 此列的series\n",
" print(y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}