【数据分析&数据挖掘】计算每日营业额&最火菜品统计

lmseohy

2019-12-29

import pandas as pd
import numpy as np

# 计算每日营业额 加载数据
detail = pd.read_excel("./meal_order_detail.xlsx")
print("detail: \n", detail)
print("detail的列名称: \n", detail.columns)

# 1、计算出每个产品的营业额
detail.loc[:, "pay"] = detail.loc[:, "counts"] * detail.loc[:, "amounts"]
print(detail)

# 2、构建日数据
# 先将时间数据转化为pandas默认支持的时间序列数据
detail.loc[:, "place_order_time"] = pd.to_datetime(detail.loc[:, "place_order_time"])

# 通过列表推导式来获取日属性
detail.loc[:, "day"] = [i.day for i in detail.loc[:, "place_order_time"]]

print(detail)

# 3、计算营业额——按照日进行分组，统计pay的sum
res = detail.groupby(by="day")["pay"].sum()
# res = detail.groupby(by="day")["pay"]  # 一个对象

print("res: \n", res)

# 求这家店最火的菜品，以及售卖份数
import pandas as pd

# 加载数据
detail = pd.read_excel("./meal_order_detail.xlsx")
print("detail: \n", detail)
print("detail的列名称: \n", detail.columns)

# 先将dishes_name转化为category类型
detail.loc[:, "dishes_name"] = detail.loc[:, "dishes_name"].astype("category")

# 统计describe分析
print("对于菜品最火及售卖份数的统计分析: \n", detail.loc[:, "dishes_name"].describe())

# 白饭/大碗不算菜品
# 删除白饭的行
bool_index = detail.loc[:, "dishes_name"] == "白饭/大碗"
print("bool_index")

# 确定哪些行是白饭大碗
drop_index_list = detail.loc[bool_index, :].index

# 删除
detail.drop(labels=drop_index_list, axis=0, inplace=True)

# 保留法
bool_id = detail.loc[:, "dishes_name"] != "白饭/大碗"

# 保留True的行
detail = detail.loc[bool_id, :]

# 再去统计菜品的deccribe

# 先将dishes_name转化为category类型
detail.loc[:, "dishes_name"] = detail.loc[:, "dishes_name"].astype("category")

# 统计describe分析
print("对于菜品最火以及售卖份数的统计分析: \n", detail)

营业额