{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 运算"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataframe运算"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.050273 | \n",
" 0.730108 | \n",
" 0.892286 | \n",
"
\n",
" \n",
" b | \n",
" 0.634789 | \n",
" 0.452458 | \n",
" 0.716244 | \n",
"
\n",
" \n",
" c | \n",
" 0.157829 | \n",
" 0.829539 | \n",
" 0.441068 | \n",
"
\n",
" \n",
" d | \n",
" 0.233697 | \n",
" 0.928338 | \n",
" 0.292920 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third\n",
"a 0.050273 0.730108 0.892286\n",
"b 0.634789 0.452458 0.716244\n",
"c 0.157829 0.829539 0.441068\n",
"d 0.233697 0.928338 0.292920"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.rand(4, 3), index=['a', 'b', 'c', 'd'], columns=['first', 'second', 'third'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1.051558 | \n",
" 2.075304 | \n",
" 2.440703 | \n",
"
\n",
" \n",
" b | \n",
" 1.886624 | \n",
" 1.572172 | \n",
" 2.046732 | \n",
"
\n",
" \n",
" c | \n",
" 1.170966 | \n",
" 2.292262 | \n",
" 1.554367 | \n",
"
\n",
" \n",
" d | \n",
" 1.263262 | \n",
" 2.530300 | \n",
" 1.340336 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third\n",
"a 1.051558 2.075304 2.440703\n",
"b 1.886624 1.572172 2.046732\n",
"c 1.170966 2.292262 1.554367\n",
"d 1.263262 2.530300 1.340336"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# point-wise\n",
"np.exp(df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 列间运算"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
" minus | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.050273 | \n",
" 0.730108 | \n",
" 0.892286 | \n",
" -0.679835 | \n",
"
\n",
" \n",
" b | \n",
" 0.634789 | \n",
" 0.452458 | \n",
" 0.716244 | \n",
" 0.182331 | \n",
"
\n",
" \n",
" c | \n",
" 0.157829 | \n",
" 0.829539 | \n",
" 0.441068 | \n",
" -0.671710 | \n",
"
\n",
" \n",
" d | \n",
" 0.233697 | \n",
" 0.928338 | \n",
" 0.292920 | \n",
" -0.694640 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third minus\n",
"a 0.050273 0.730108 0.892286 -0.679835\n",
"b 0.634789 0.452458 0.716244 0.182331\n",
"c 0.157829 0.829539 0.441068 -0.671710\n",
"d 0.233697 0.928338 0.292920 -0.694640"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['minus'] = df['first'] - df['second']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
" minus | \n",
" quadratic | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.050273 | \n",
" 0.730108 | \n",
" 0.892286 | \n",
" -0.679835 | \n",
" 0.796174 | \n",
"
\n",
" \n",
" b | \n",
" 0.634789 | \n",
" 0.452458 | \n",
" 0.716244 | \n",
" 0.182331 | \n",
" 0.513006 | \n",
"
\n",
" \n",
" c | \n",
" 0.157829 | \n",
" 0.829539 | \n",
" 0.441068 | \n",
" -0.671710 | \n",
" 0.194541 | \n",
"
\n",
" \n",
" d | \n",
" 0.233697 | \n",
" 0.928338 | \n",
" 0.292920 | \n",
" -0.694640 | \n",
" 0.085802 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third minus quadratic\n",
"a 0.050273 0.730108 0.892286 -0.679835 0.796174\n",
"b 0.634789 0.452458 0.716244 0.182331 0.513006\n",
"c 0.157829 0.829539 0.441068 -0.671710 0.194541\n",
"d 0.233697 0.928338 0.292920 -0.694640 0.085802"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['quadratic'] = df['third'] ** 2\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.050273 | \n",
" 0.730108 | \n",
" 0.892286 | \n",
"
\n",
" \n",
" b | \n",
" 0.634789 | \n",
" 0.452458 | \n",
" 0.716244 | \n",
"
\n",
" \n",
" c | \n",
" 0.157829 | \n",
" 0.829539 | \n",
" 0.441068 | \n",
"
\n",
" \n",
" d | \n",
" 0.233697 | \n",
" 0.928338 | \n",
" 0.292920 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third\n",
"a 0.050273 0.730108 0.892286\n",
"b 0.634789 0.452458 0.716244\n",
"c 0.157829 0.829539 0.441068\n",
"d 0.233697 0.928338 0.292920"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用drop函数删列、行\n",
"df.drop(columns=['minus', 'quadratic'], inplace=True)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## replace和apply函数"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.050273 | \n",
" 0.730108 | \n",
" 100 | \n",
"
\n",
" \n",
" b | \n",
" 0.634789 | \n",
" 0.452458 | \n",
" 100 | \n",
"
\n",
" \n",
" c | \n",
" 0.157829 | \n",
" 0.829539 | \n",
" 100 | \n",
"
\n",
" \n",
" d | \n",
" 0.233697 | \n",
" 0.928338 | \n",
" 100 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third\n",
"a 0.050273 0.730108 100\n",
"b 0.634789 0.452458 100\n",
"c 0.157829 0.829539 100\n",
"d 0.233697 0.928338 100"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['third'] = 1\n",
"# 用100代替1\n",
"df.replace(1, 100, inplace=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
" third | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.224216 | \n",
" 0.854463 | \n",
" 10.0 | \n",
"
\n",
" \n",
" b | \n",
" 0.796736 | \n",
" 0.672650 | \n",
" 10.0 | \n",
"
\n",
" \n",
" c | \n",
" 0.397277 | \n",
" 0.910790 | \n",
" 10.0 | \n",
"
\n",
" \n",
" d | \n",
" 0.483423 | \n",
" 0.963503 | \n",
" 10.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second third\n",
"a 0.224216 0.854463 10.0\n",
"b 0.796736 0.672650 10.0\n",
"c 0.397277 0.910790 10.0\n",
"d 0.483423 0.963503 10.0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 每个元素开平方\n",
"df.apply(lambda x: x ** 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 100.780381\n",
"b 101.087247\n",
"c 100.987368\n",
"d 101.162035\n",
"dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 统计各列的和\n",
"df.apply(np.sum, axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 合并Dataframe"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 8 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 0 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 1 0 8\n",
"1 2 0 8"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.DataFrame(np.random.randint(0, 10, (2, 3)), index=np.arange(2), columns=['a', 'b', 'c'])\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" 10 | \n",
" 7 | \n",
" 5 | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 9 | \n",
" 9 | \n",
"
\n",
" \n",
" 6 | \n",
" 13 | \n",
" 11 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"4 10 7 5\n",
"5 6 9 9\n",
"6 13 11 9"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame(np.random.randint(5, 15, (3, 3)), index=np.arange(4, 7), columns=['a', 'b', 'c'])\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 8 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 0 | \n",
" 8 | \n",
"
\n",
" \n",
" 4 | \n",
" 10 | \n",
" 7 | \n",
" 5 | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 9 | \n",
" 9 | \n",
"
\n",
" \n",
" 6 | \n",
" 13 | \n",
" 11 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 1 0 8\n",
"1 2 0 8\n",
"4 10 7 5\n",
"5 6 9 9\n",
"6 13 11 9"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# concat合并\n",
"pd.concat([df1, df2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}