您好,登錄后才能下訂單哦!
本篇內(nèi)容主要講解“怎么用Python可視化圖顯示數(shù)據(jù)某化妝品企業(yè)銷售情況”,感興趣的朋友不妨來看看。本文介紹的方法操作簡單快捷,實用性強。下面就讓小編來帶大家學習“怎么用Python可視化圖顯示數(shù)據(jù)某化妝品企業(yè)銷售情況”吧!
對象:用戶;銷售
關(guān)注點:找到影響銷售的增長因素
目標:發(fā)現(xiàn)問題&提出解決方案
分析銷售趨勢,找到影響企業(yè)營收增長的商品或區(qū)域
按月份銷售趨勢圖(整體)
商品銷售額對比(一級、二級,找出最低、最高)
區(qū)域銷售額對比(下鉆:區(qū)、省,找出最低、最高)
探索不同商品的銷售狀況,為企業(yè)的商品銷售,提出策略建議
不同月份的各個產(chǎn)品的銷售額占比情況
產(chǎn)品相關(guān)分析
分析用戶特征、購買頻率、留存率等
購買頻率分布
復購率(重復購買用戶數(shù)量(兩天都有購買過算重復)/用戶數(shù)量)
同期群分析(按月)
獲取數(shù)據(jù)(excel)
為某化妝品企業(yè) 2019 年 1 月-2019 年 9 月每日訂單詳情數(shù)據(jù)和企業(yè)的商品信息數(shù)據(jù),包括兩個數(shù)據(jù)表,銷售訂單表和商品信息表。其中銷售訂單表為每個訂單的情況明細,一個訂單對應一次銷售、一個訂單可包含多個商品。
import pandas as pd import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams['font.family'] = 'SimHei' import numpy as np import warnings warnings.filterwarnings("ignore") data = pd.read_excel('C:/Users/cherich/Desktop/日化.xlsx',encoding='gbk') data.head()
data_info = pd.read_excel('C:/Users/cherich/Desktop/日化.xlsx',encoding='gbk',sheet_name='商品信息表') data_info
數(shù)據(jù)清洗和加工
data = data.dropna() # 訂購數(shù)量結(jié)尾有字符'個' data['訂購數(shù)量'] = data['訂購數(shù)量'].apply(lambda x:str(x)[:-1] if str(x)[-1] == '個' else x) data['訂購數(shù)量'] = data['訂購數(shù)量'].astype(int) # 訂購數(shù)量結(jié)尾有字符'元' data['訂購單價'] = data['訂購單價'].apply(lambda x:str(x)[:-1] if str(x)[-1] == '元' else x) data['訂購單價'] = data['訂購單價'].astype(int) # 日期里有特殊字符 2019#3#11 def proess_date(df): pos = str(df).find('#') if pos!= -1: df = str(df).split('#') return df[0]+'-'+df[1]+'-'+df[2] else: return df # res = proess_date(df ='2019#3#11') data['訂單日期'] = data['訂單日期'].apply(proess_date) data['訂單日期'] = data['訂單日期'].apply(lambda x:str(x).replace('年','-').replace('月','-') if '年' in str(x) else x ) data['訂單日期'] = pd.to_datetime(data['訂單日期']) #data.info() data = data[data.duplicated()==False] data['所在省份'].nunique() data['月份'] = data['訂單日期'].apply(lambda x:str(x).split('-')[1]) data
數(shù)據(jù)可視化
# 兩張表數(shù)據(jù)合并 total_data = pd.merge(data,data_info,on='商品編號',how='left') total_data
groups = data.groupby('月份') x = [each[0] for each in groups] y = [each[1].金額.sum() for each in groups] z = [each[1].金額.count() for each in groups] money_mean = data.金額.sum()/9 order_mean = data.金額.count()/9 plt.figure(figsize=(18, 10), dpi=80) plt.subplot(221) plt.plot(x, y,linewidth=2) plt.axvspan('07', '08', color='#EE7621', alpha=0.3) plt.axhline(money_mean, color='#EE7621', linestyle='--',linewidth=1) plt.title("每月銷售額趨勢圖",color='#4A708B',fontsize=24) plt.ylabel("金額/(億)",fontsize=16) plt.subplot(222) plt.plot(x, z, linewidth=2, color = '#EE7621') plt.axvline('07', color='#4A708B', linestyle='--',linewidth=1) plt.axhline(order_mean, color='#4A708B', linestyle='--',linewidth=1) plt.title("每月訂單量趨勢圖",color='#4A708B',fontsize=24) plt.ylabel("訂單/(單)",fontsize=16) plt.show()
圖表說明:從整體來看,銷售額和訂單量從4月開始大幅度上升,均高于均值;8月份開始呈下降趨勢,處于均值水平。
groups_category= total_data.groupby(['月份','商品大類']) category1 = [] category2 = [] for i,j in groups_category: # print(i,j.月份.count()) if i[1]=='彩妝': category1.append(j.金額.sum()) else: category2.append(j.金額.sum()) labels = x xticks = np.arange(len(labels)) width = 0.5 p = np.arange(len(labels)) fig, ax = plt.subplots(figsize=(18,8)) rects1 = ax.bar(p - width/2, category1,width, label='彩妝',color='#FFEC8B') rects2 = ax.bar(p + width/2, category2, width, label='護膚品',color='#4A708B') ax.set_ylabel('銷售額/(億)') ax.set_title('每月護膚品和彩妝的銷售額對比圖(大類)') ax.set_xticks(xticks) ax.set_xticklabels(labels) ax.legend() plt.show()
圖表說明:護膚品需求滿足大多數(shù)人,明顯高于彩妝。并且5月—8月是護膚品需求旺季。相比彩妝的變化不明顯。
groups_categorys= total_data.groupby('商品小類') x = [each[0] for each in groups_categorys] y = [each[1].金額.sum() for each in groups_categorys] fig = plt.figure(figsize=(18,8),dpi=80) plt.title('各個品類的銷售額對比圖',color='#4A708B',fontsize=24) plt.ylabel('銷售額(元)',fontsize=15) colors = ['#6699cc','#4A708B','#CDCD00','#DAA520','#EE7621','#FFEC8B','#CDCD00','#4A708B','#6699cc','#DAA520','#4A708B','#FFEC8B'] for i, group_name in enumerate(groups_categorys): lin1 =plt.bar(group_name[0], group_name[1].金額.sum(),width=0.8,color=colors[i]) for rect in lin1: height = rect.get_height() plt.text(rect.get_x()+rect.get_width()/2, height+1, int(height),ha="center", fontsize=12) plt.xticks(fontsize=15) plt.grid() plt.show()
圖表說明:面膜的銷售額第一,其次是面霜、爽膚水。銷售額最低的是蜜粉,眼影。
total_data = total_data.dropna() total_data['所在區(qū)域'] = total_data['所在區(qū)域'].apply(lambda x:str(x).replace('男區(qū)','南區(qū)').replace('西 區(qū)','西區(qū)')) groups_area= total_data.groupby(['所在區(qū)域','商品小類']) results = {} for i,j in groups_area: money = int(j.金額.sum()) if i[0] in results.keys(): results[i[0]][i[1]] = money else: results[i[0]] = {} for cate in category_names: results[i[0]][cate] = 0 results[i[0]]['口紅'] = money results= {key_data:list(values_data.values()) for key_data,values_data in results.items()} def survey1(results, category_names): labels = list(results.keys()) data = np.array(list(results.values())) data_cum = data.cumsum(axis=1) category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) fig, ax = plt.subplots(figsize=(25,8)) ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max()) for i, (colname, color) in enumerate(zip(category_names, category_colors)): widths = data[:, i] starts = data_cum[:, i] - widths ax.barh(labels, widths, left=starts, height=0.5, label=colname, color=color) xcenters = starts + widths / 2 r, g, b, _ = color text_color = 'white' if r * g * b < 0.5 else 'darkgrey' for y, (x, c) in enumerate(zip(xcenters, widths)): ax.text(x, y, str(int(c)), ha='center', va='center',color=text_color) ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1), loc='lower left', fontsize='small') return fig, ax survey1(results, category_names) plt.show()
圖表說明:東部地區(qū)占市場份額的35%左右,份額最低的是西部地區(qū)。
area_names = list(total_data.商品小類.unique()) groups_priv= total_data.groupby(['所在省份','商品小類']) results = {} for i,j in groups_priv: money = int(j.金額.sum()) if i[0] in results.keys(): results[i[0]][i[1]] = money else: results[i[0]] = {} for cate in category_names: results[i[0]][cate] = 0 results[i[0]]['口紅'] = money results= {key_data:list(values_data.values()) for key_data,values_data in results.items()} def survey2(results, category_names): labels = list(results.keys()) data = np.array(list(results.values())) data_cum = data.cumsum(axis=1) category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) fig, ax = plt.subplots(figsize=(25,20)) ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max()) for i, (colname, color) in enumerate(zip(category_names, category_colors)): widths = data[:, i] starts = data_cum[:, i] - widths ax.barh(labels, widths, left=starts, height=0.5, label=colname, color=color) xcenters = starts + widths / 2 ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1), loc='lower left', fontsize='small') return fig, ax survey2(results, area_names) plt.show()
圖表說明:江蘇銷售額第一,其次是廣東省;銷售額最低的是寧夏、內(nèi)蒙、海南
import numpy as np import matplotlib.pyplot as plt category_names = list(total_data.商品小類.unique()) groups_small_category= total_data.groupby(['月份','商品小類']) results = {} for i,j in groups_small_category: money = int(j.金額.sum()) if i[0] in results.keys(): results[i[0]][i[1]] = money else: results[i[0]] = {} for cate in category_names: results[i[0]][cate] = 0 results[i[0]]['口紅'] = money results= {key_data:list(values_data.values()) for key_data,values_data in results.items()} def survey(results, category_names): labels = list(results.keys()) data = np.array(list(results.values())) data_cum = data.cumsum(axis=1) category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) fig, ax = plt.subplots(figsize=(25,8)) ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max()) for i, (colname, color) in enumerate(zip(category_names, category_colors)): widths = data[:, i] starts = data_cum[:, i] - widths ax.barh(labels, widths, left=starts, height=0.5, label=colname, color=color) xcenters = starts + widths / 2 # r, g, b, _ = color # text_color = 'white' if r * g * b < 0.5 else 'darkgrey' # for y, (x, c) in enumerate(zip(xcenters, widths)): # ax.text(x, y, str(int(c)), ha='center', va='center') ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1), loc='lower left', fontsize='small') return fig, ax survey(results, category_names) plt.show()
圖表說明:眼霜、爽膚水、面膜:4,5,6,7,8月份需求量最大;粉底、防曬霜、隔離霜、睫毛膏、蜜粉1,2,3月份需求量最大。
data_user_buy=total_data.groupby('客戶編碼')['訂單編碼'].count() data_user_buy plt.figure(figsize=(10,4),dpi=80) plt.hist(data_user_buy,color='#FFEC8B') plt.title('用戶購買次數(shù)分布',fontsize=16) plt.xlabel('購買次數(shù)') plt.ylabel('用戶數(shù)') plt.show()
圖表說明:大部分用戶購買次數(shù)在10次-35次之間,極少部分用戶購買次數(shù)80次以上
date_rebuy=total_data.groupby('客戶編碼')['訂單日期'].apply(lambda x:len(x.unique())).rename('rebuy_count') date_rebuy print('復購率:',round(date_rebuy[date_rebuy>=2].count()/date_rebuy.count(),4))
total_data['時間標簽'] = total_data['訂單日期'].astype(str).str[:7] total_data = total_data[total_data['時間標簽']!='2050-06'] total_data['時間標簽'].value_counts().sort_index() total_data = total_data.sort_values(by='時間標簽') month_lst = total_data['時間標簽'].unique() final=pd.DataFrame() final #引入時間標簽 for i in range(len(month_lst)-1): #構(gòu)造和月份一樣長的列表,方便后續(xù)格式統(tǒng)一 count = [0] * len(month_lst) #篩選出當月訂單,并按客戶昵稱分組 target_month = total_data.loc[total_data['時間標簽']==month_lst[i],:] target_users = target_month.groupby('客戶編碼')['金額'].sum().reset_index() #如果是第一個月份,則跳過(因為不需要和歷史數(shù)據(jù)驗證是否為新增客戶) if i==0: new_target_users = target_month.groupby('客戶編碼')['金額'].sum().reset_index() else: #如果不是,找到之前的歷史訂單 history = total_data.loc[total_data['時間標簽'].isin(month_lst[:i]),:] #篩選出未在歷史訂單出現(xiàn)過的新增客戶 new_target_users = target_users.loc[target_users['客戶編碼'].isin(history['客戶編碼']) == False,:] #當月新增客戶數(shù)放在第一個值中 count[0] = len(new_target_users) #以月為單位,循環(huán)遍歷,計算留存情況 for j,ct in zip(range(i + 1,len(month_lst)),range(1,len(month_lst))): #下一個月的訂單 next_month = total_data.loc[total_data['時間標簽'] == month_lst[j],:] next_users = next_month.groupby('客戶編碼')['金額'].sum().reset_index() #計算在該月仍然留存的客戶數(shù)量 isin = new_target_users['客戶編碼'].isin(next_users['客戶編碼']).sum() count[ct] = isin #格式轉(zhuǎn)置 result = pd.DataFrame({month_lst[i]:count}).T #合并 final = pd.concat([final,result]) final.columns = ['當月新增','+1月','+2月','+3月','+4月','+5月','+6月','+7月','+8月'] result = final.divide(final['當月新增'],axis=0).iloc[:] result['當月新增'] = final['當月新增'] result.round(2)
到此,相信大家對“怎么用Python可視化圖顯示數(shù)據(jù)某化妝品企業(yè)銷售情況”有了更深的了解,不妨來實際操作一番吧!這里是億速云網(wǎng)站,更多相關(guān)內(nèi)容可以進入相關(guān)頻道進行查詢,關(guān)注我們,繼續(xù)學習!
免責聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。