{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 索引和遍历" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 以字典的方式索引" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.2549040.054261
b0.4549770.876990
c0.8702760.734116
\n", "
" ], "text/plain": [ " first second\n", "a 0.254904 0.054261\n", "b 0.454977 0.876990\n", "c 0.870276 0.734116" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 0.254904\n", "b 0.454977\n", "c 0.870276\n", "Name: first, dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "可直接取某列\n", "不能直接取某行,如df['a']会报错\n", "\"\"\"\n", "df['first']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.45497650367236453" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 先column,再index\n", "df['first']['b']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## loc与iloc" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "first 0.254904\n", "second 0.054261\n", "Name: a, dtype: float64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用loc取某行\n", "df.loc['a']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first
a0.254904
b0.454977
\n", "
" ], "text/plain": [ " first\n", "a 0.254904\n", "b 0.454977" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用loc时,先index,再column\n", "df.loc[:'b', :'first']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.2549040.054261
b0.4549770.876990
\n", "
" ], "text/plain": [ " first second\n", "a 0.254904 0.054261\n", "b 0.454977 0.876990" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用iloc时,索引方式和数组相同\n", "df.iloc[:2, :2]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.2549040.054261
b0.4549770.876990
c0.870276100.000000
\n", "
" ], "text/plain": [ " first second\n", "a 0.254904 0.054261\n", "b 0.454977 0.876990\n", "c 0.870276 100.000000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 修改数值\n", "df.iloc[2, 1] = 100\n", "df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.2549041.00000
b0.4549770.87699
c0.870276100.00000
\n", "
" ], "text/plain": [ " first second\n", "a 0.254904 1.00000\n", "b 0.454977 0.87699\n", "c 0.870276 100.00000" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 直接修改df.values\n", "df.values[0, 1] = 1\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 按条件索引" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.2549041.00000
b0.4549770.87699
c0.870276100.00000
\n", "
" ], "text/plain": [ " first second\n", "a 0.254904 1.00000\n", "b 0.454977 0.87699\n", "c 0.870276 100.00000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['second'] >= 0.5]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
c0.870276100.0
\n", "
" ], "text/plain": [ " first second\n", "c 0.870276 100.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['first'] >= 0.5) & (df['first'] <= 1.0)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 遍历dataframe" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a\n", "first 0.254904\n", "second 1.000000\n", "Name: a, dtype: float64\n", "1.0\n", "b\n", "first 0.454977\n", "second 0.876990\n", "Name: b, dtype: float64\n", "0.8769898565356357\n", "c\n", "first 0.870276\n", "second 100.000000\n", "Name: c, dtype: float64\n", "100.0\n" ] } ], "source": [ "# 按行遍历\n", "for x, y in df.iterrows():\n", " # index\n", " print(x)\n", " # 此行的series\n", " print(y)\n", " print(y['second'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "first\n", "a 0.254904\n", "b 0.454977\n", "c 0.870276\n", "Name: first, dtype: float64\n", "second\n", "a 1.00000\n", "b 0.87699\n", "c 100.00000\n", "Name: second, dtype: float64\n" ] } ], "source": [ "# 按列遍历\n", "for x, y in df.items():\n", " # column\n", " print(x)\n", " # 此列的series\n", " print(y)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }