{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 运算" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataframe运算" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthird
a0.0502730.7301080.892286
b0.6347890.4524580.716244
c0.1578290.8295390.441068
d0.2336970.9283380.292920
\n", "
" ], "text/plain": [ " first second third\n", "a 0.050273 0.730108 0.892286\n", "b 0.634789 0.452458 0.716244\n", "c 0.157829 0.829539 0.441068\n", "d 0.233697 0.928338 0.292920" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.rand(4, 3), index=['a', 'b', 'c', 'd'], columns=['first', 'second', 'third'])\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthird
a1.0515582.0753042.440703
b1.8866241.5721722.046732
c1.1709662.2922621.554367
d1.2632622.5303001.340336
\n", "
" ], "text/plain": [ " first second third\n", "a 1.051558 2.075304 2.440703\n", "b 1.886624 1.572172 2.046732\n", "c 1.170966 2.292262 1.554367\n", "d 1.263262 2.530300 1.340336" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# point-wise\n", "np.exp(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 列间运算" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthirdminus
a0.0502730.7301080.892286-0.679835
b0.6347890.4524580.7162440.182331
c0.1578290.8295390.441068-0.671710
d0.2336970.9283380.292920-0.694640
\n", "
" ], "text/plain": [ " first second third minus\n", "a 0.050273 0.730108 0.892286 -0.679835\n", "b 0.634789 0.452458 0.716244 0.182331\n", "c 0.157829 0.829539 0.441068 -0.671710\n", "d 0.233697 0.928338 0.292920 -0.694640" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['minus'] = df['first'] - df['second']\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthirdminusquadratic
a0.0502730.7301080.892286-0.6798350.796174
b0.6347890.4524580.7162440.1823310.513006
c0.1578290.8295390.441068-0.6717100.194541
d0.2336970.9283380.292920-0.6946400.085802
\n", "
" ], "text/plain": [ " first second third minus quadratic\n", "a 0.050273 0.730108 0.892286 -0.679835 0.796174\n", "b 0.634789 0.452458 0.716244 0.182331 0.513006\n", "c 0.157829 0.829539 0.441068 -0.671710 0.194541\n", "d 0.233697 0.928338 0.292920 -0.694640 0.085802" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['quadratic'] = df['third'] ** 2\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthird
a0.0502730.7301080.892286
b0.6347890.4524580.716244
c0.1578290.8295390.441068
d0.2336970.9283380.292920
\n", "
" ], "text/plain": [ " first second third\n", "a 0.050273 0.730108 0.892286\n", "b 0.634789 0.452458 0.716244\n", "c 0.157829 0.829539 0.441068\n", "d 0.233697 0.928338 0.292920" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用drop函数删列、行\n", "df.drop(columns=['minus', 'quadratic'], inplace=True)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## replace和apply函数" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthird
a0.0502730.730108100
b0.6347890.452458100
c0.1578290.829539100
d0.2336970.928338100
\n", "
" ], "text/plain": [ " first second third\n", "a 0.050273 0.730108 100\n", "b 0.634789 0.452458 100\n", "c 0.157829 0.829539 100\n", "d 0.233697 0.928338 100" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['third'] = 1\n", "# 用100代替1\n", "df.replace(1, 100, inplace=True)\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecondthird
a0.2242160.85446310.0
b0.7967360.67265010.0
c0.3972770.91079010.0
d0.4834230.96350310.0
\n", "
" ], "text/plain": [ " first second third\n", "a 0.224216 0.854463 10.0\n", "b 0.796736 0.672650 10.0\n", "c 0.397277 0.910790 10.0\n", "d 0.483423 0.963503 10.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 每个元素开平方\n", "df.apply(lambda x: x ** 0.5)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 100.780381\n", "b 101.087247\n", "c 100.987368\n", "d 101.162035\n", "dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 统计各列的和\n", "df.apply(np.sum, axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 合并Dataframe" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0108
1208
\n", "
" ], "text/plain": [ " a b c\n", "0 1 0 8\n", "1 2 0 8" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame(np.random.randint(0, 10, (2, 3)), index=np.arange(2), columns=['a', 'b', 'c'])\n", "df1" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
41075
5699
613119
\n", "
" ], "text/plain": [ " a b c\n", "4 10 7 5\n", "5 6 9 9\n", "6 13 11 9" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.DataFrame(np.random.randint(5, 15, (3, 3)), index=np.arange(4, 7), columns=['a', 'b', 'c'])\n", "df2" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0108
1208
41075
5699
613119
\n", "
" ], "text/plain": [ " a b c\n", "0 1 0 8\n", "1 2 0 8\n", "4 10 7 5\n", "5 6 9 9\n", "6 13 11 9" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# concat合并\n", "pd.concat([df1, df2])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }