Python中快速掌握Data Frame的常用操作_Python

掌握Data Frame的常用操作

一. 查看DataFrame的常用属性

DataFrame基础属性有：values(元素)、index(索引)、columns(列名) 、dtypes(类型)、size(元素个数)、ndim(维度数)和 shape(形状大小尺寸),还有使用T属性进行转置

				?

									import pandas as pd

									detail=pd.read_excel('E:\data\meal_order_detail.xlsx') #读取数据，使用read_excel 函数调用

									# print(detail)

									print("索引",detail.index)

									print("所以 值 ：",detail.values)

									print("所以列名：",detail.columns)

									print("数据类型：",detail.dtypes)

									print("元素个数：",detail.size)

									print("维度：",detail.ndim)

									print("形状大小 尺寸：",detail.shape)

									#使用T属性 进行转置

									print("转置前的形状：",detail.shape)数据

									print("转置后的形状：",detail.T.shape)

二. 查改增删DataFrame数据

查看访问DataFramezhon'的数据
（1.1）DataFrame数据的基本查看方式

				?

									#使用字典访问方式

									order_id=detail['order_id']

									print("订单详情表的order_id的形状：",order_id.shape)

									#使用访问属性的方式 

									dishes_name=detail.dishes_name

									print("订单详情表中的dishes_name的形状：",dishes_name.shape)

									#DataFrame 单列多行的数据获取

									dishes_name5=detail['dishes_name'][:5]

									print(dishes_name5)

									#多列多行数据

									orderDish=detail[['order_id','dishes_name']][:5]

									print(orderDish)

									#访问多行数据

									order5=detail[:][1:6]

									print("订单详情表中的1~6行元素的数据：\n",order5)

									#使用DataFrame的head和tail方法获取多行数据

									print('订单详情表中前5行数据：\n',detail.head())#head()里面没有参数的话，默认为5行

									print('订单详情表中后5行数据：\n',detail.tail()) #tail()里面没有参数的话，默认为5行

(1.2) .DataFrame的loc和iloc访问方式;

				?

									dishes_name1=detail.loc[:,'dishes_name'] #DataFrame.loc[行索引名称或条件,列索引名称]

									print("使用loc提取dishes_name列的size：",dishes_name1.size)

									dishes_name2=detail.iloc[:,3] #DataFrame.iloc[行索引位置,列索引位置]

									print("使用iloc提取第3列的size：",dishes_name2.size)

									#使用loc、iloc 实现多列切片

									orderDish1=detail.loc[:,['order_id','dishes_name']]

									print(orderDish1.size)

									orderDish2=detail.iloc[:,[1,3]]

									print(orderDish2.size)

									#使用loc、iloc 实现花式切片

									print("列名为order_id和dishes_name 的行名为3的数据：\n",detail.loc[3,['order_id','dishes_name']])

									print('列名为order_id和dishes_name 行名为2、3、4、5、6的数据为：\n',detail.loc[2:6,['order_id','dishes_name']])

									print('列名1和3，行位置为3的数据为：\n',detail.iloc[3,[1,3]]) #这里为什么不可以loc函数，

									               #因为loc函数传入的是列索引的名称（或行的名称或条件），而iloc传入的是位置

									print('列位置为1和3，行位置为2，3，4，5，6的数据和：\n',detail.iloc[2:7,[1,3]])#这里是位置索引，7是取不到的

									#使用loc和iloc函数实现条件切片

									print('detail中order_id为458的dishes_name为：\n',detail.loc[detail['order_id']==458,['order_id','dishes_name']]) #使用了loc

									print("detail中order_id为458 的第1、5列的数据为：\n",detail.iloc[(detail['order_id']==458).values,[1,5]])#values 获取元素 #使用iloc函数

(1.3).ix切片方法

				?

									#使用loc、iloc、ix 实现切片 比较（DataFrame.ix[行的索引或位置或条件,列索引名称和位置]）

									print('列名为dishes_name行名为2，3，4，5，6的数据为：\n',detail.loc[2:6,['dishes_name']])

									print('列位置为5行名为2~6的数据为：\n',detail.iloc[2:6,5])

									print('列位置为5行名为2~6的数据为：\n',detail.ix[2:6,5])

2.更改DataFame中的数据

				?

									#将order_id为458 的改成 45800

									detail.loc[detail['order_id']==458,'order_id'] = 45800 #45800 这里 没有单引号的

									print('更改后detail中的order_id为 458 的：\n',detail.loc[detail['order_id']==458,'order_id'])

									print('更改后detail中的order_id为 45800 的：\n',detail.loc[detail['order_id']==45800,'order_id'])

									detail.loc[detail['order_id']==45800,'order_id'] = 458

3.为DataFrame增添数据

				?

									#新增一列非定值

									detail['payment']=detail['counts']*detail['amounts']

									print('detail新增列payment的前5行数据为：\n',detail['payment'].head())

									#新增一列定值

									detail['pay_way']='现金支付'

									print('detail新增列的前5行的数据为：\n',detail['pay_way'].head())

									``4.删除某行或某列的数据(drop)

									#删除某列

									print('删除pay_way前 detail中的列索引为：\n',detail.columns)

									detail.drop(labels='pay_way',axis=1,inplace=True)

									print('删除pay_way后 detail中的列索引为：\n',detail.columns)

									#删除某几行

									print('删除1~10行 前 detail的长度：',len(detail))

									detail.drop(labels=range(1,11),axis=0,inplace=True)

									print('删除1~10行 后 detail的长度：',len(detail))