保存与加载#
import pandas as pd
import numpy as np
与dict转换#
"""
使用指定data,index,columns的方式创建dataframe
下面使用了ord和chr来遍历字母表
"""
index = [chr(i) for i in range(ord('a'), ord('a') + 8)]
columns = ['duration', 'playtime', 'up', 'favorite', 'comment', 'share']
df = pd.DataFrame(np.random.randint(1, 100, (len(index), len(columns))), index=index, columns=columns)
df.values
array([[19, 52, 71, 7, 86, 90],
[13, 39, 57, 62, 96, 91],
[30, 92, 98, 53, 60, 29],
[79, 85, 99, 93, 23, 93],
[82, 27, 2, 81, 32, 96],
[96, 80, 3, 89, 85, 38],
[18, 30, 84, 17, 64, 81],
[36, 96, 78, 33, 59, 65]])
# df -> dict
data_dict = df.to_dict(orient='index')
data_dict
{'a': {'duration': 19,
'playtime': 52,
'up': 71,
'favorite': 7,
'comment': 86,
'share': 90},
'b': {'duration': 13,
'playtime': 39,
'up': 57,
'favorite': 62,
'comment': 96,
'share': 91},
'c': {'duration': 30,
'playtime': 92,
'up': 98,
'favorite': 53,
'comment': 60,
'share': 29},
'd': {'duration': 79,
'playtime': 85,
'up': 99,
'favorite': 93,
'comment': 23,
'share': 93},
'e': {'duration': 82,
'playtime': 27,
'up': 2,
'favorite': 81,
'comment': 32,
'share': 96},
'f': {'duration': 96,
'playtime': 80,
'up': 3,
'favorite': 89,
'comment': 85,
'share': 38},
'g': {'duration': 18,
'playtime': 30,
'up': 84,
'favorite': 17,
'comment': 64,
'share': 81},
'h': {'duration': 36,
'playtime': 96,
'up': 78,
'favorite': 33,
'comment': 59,
'share': 65}}
# dict -> df
df_dict = pd.DataFrame.from_dict(data_dict, orient='index')
df_dict
duration | playtime | up | favorite | comment | share | |
---|---|---|---|---|---|---|
a | 19 | 52 | 71 | 7 | 86 | 90 |
b | 13 | 39 | 57 | 62 | 96 | 91 |
c | 30 | 92 | 98 | 53 | 60 | 29 |
d | 79 | 85 | 99 | 93 | 23 | 93 |
e | 82 | 27 | 2 | 81 | 32 | 96 |
f | 96 | 80 | 3 | 89 | 85 | 38 |
g | 18 | 30 | 84 | 17 | 64 | 81 |
h | 36 | 96 | 78 | 33 | 59 | 65 |
csv文件#
# 存储为csv文件
df.to_csv('sample.csv')
# 需要指定index_col=0
df_csv = pd.read_csv('sample.csv', index_col=0)
df_csv.values
array([[19, 52, 71, 7, 86, 90],
[13, 39, 57, 62, 96, 91],
[30, 92, 98, 53, 60, 29],
[79, 85, 99, 93, 23, 93],
[82, 27, 2, 81, 32, 96],
[96, 80, 3, 89, 85, 38],
[18, 30, 84, 17, 64, 81],
[36, 96, 78, 33, 59, 65]])
excel文件#
df.to_excel('sample.xlsx')
# 若不指定index_col=0,会多出一行'Unnamed: 0'
df_excel = pd.read_excel('sample.xlsx')
df_excel
Unnamed: 0 | duration | playtime | up | favorite | comment | share | |
---|---|---|---|---|---|---|---|
0 | a | 19 | 52 | 71 | 7 | 86 | 90 |
1 | b | 13 | 39 | 57 | 62 | 96 | 91 |
2 | c | 30 | 92 | 98 | 53 | 60 | 29 |
3 | d | 79 | 85 | 99 | 93 | 23 | 93 |
4 | e | 82 | 27 | 2 | 81 | 32 | 96 |
5 | f | 96 | 80 | 3 | 89 | 85 | 38 |
6 | g | 18 | 30 | 84 | 17 | 64 | 81 |
7 | h | 36 | 96 | 78 | 33 | 59 | 65 |