本文共 9183 字,大约阅读时间需要 30 分钟。
import matplotlib.pyplot as pltedu = [0.2515, 0.3724, 0.3336, 0.0368, 0.0057]labels = ['中专', '大专', '本科', '硕士', '其他']plt.rcParams['font.sans-serif'] = ['SimHei']plt.rcParams['axes.unicode_minus'] = Falseplt.pie(x=edu, labels=labels, autopct='%.1f%%')plt.title('失信用户的教育水平分布')plt.show() GDP = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\Province GDP 2017.xlsx')plt.bar(range(GDP.shape[0]), GDP.GDP, tick_label=GDP.Province, color='steelblue')plt.ylabel('GDP(万亿)')plt.title('2017年度6个省份GDP分布')for x, y in enumerate(GDP.GDP): plt.text(x, y + 0.1, '%s' % round(y, 1), ha='center')plt.show() GDP.sort_values(by='GDP', inplace=True)plt.barh(range(GDP.shape[0]), width=GDP.GDP, tick_label=GDP.Province, color='steelblue')plt.xlabel('GDP(万亿)')plt.title('2017年度6个省份GDP分布')for y, x in enumerate(GDP.GDP): plt.text(x + 0.1, y, '%s' % round(x, 1), va='center')plt.show() Industry_GDP = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\Industry_GDP.xlsx')Quarters = Industry_GDP.Quarter.unique()Industry1 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第一产业']Industry2 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第二产业']Industry3 = Industry_GDP.GPD[Industry_GDP.Industry_Type == '第三产业']plt.bar(range(len(Quarters)), height=Industry1, color='steelblue', label='第一产业')plt.bar(range(len(Quarters)), height=Industry2, bottom=Industry1, color='green', label='第二产业')plt.bar(range(len(Quarters)), height=Industry3, bottom=Industry1 + Industry2, color='red', label='第三产业')plt.ylabel('生成总值(亿)')plt.title('2017年各季度三产业总值')plt.legend()plt.show() HuRun = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第5章 Python数据处理工具--Pandas\HuRun.xlsx')Cities = HuRun.City.unique()Counts2016 = HuRun.Counts[HuRun.Year == 2016]Counts2017 = HuRun.Counts[HuRun.Year == 2017]plt.bar(np.arange(len(Cities)), height=Counts2016, label='2016', color='steelblue', width=0.4)plt.bar(np.arange(len(Cities)) + 0.4, height=Counts2017, label='2017', color='indianred', width=0.4)plt.xticks(np.arange(5) + 0.2, Cities)plt.ylabel('亿万资产家庭数')plt.title('近两年5个城市亿万资产家庭数比较')plt.legend()plt.show() HuRun_reshape = HuRun.pivot_table(index='City', columns='Year', values='Counts').reset_index()HuRun_reshape.sort_values(by=2016, ascending=False, inplace=True)HuRun_reshape.plot(x='City', y=[2016, 2017], kind='bar', color=['steelblue', 'indianred'], rot=0, width=0.8, title='近两年5个城市亿万资产家庭数比较')plt.xlabel('')plt.ylabel('亿万资产家庭数')plt.show() import seaborn as snssns.barplot(y='Province', x='GDP', data=GDP, color='steelblue', orient='horizontal')plt.xlabel('GDP(万亿)')plt.ylabel('')plt.title('2017年度6个省份GDP分布')for y, x in enumerate(GDP.GDP): plt.text(x, y, '%s' % round(x, 1), va='center')plt.show() Titanic.dropna(subset=['Age'], inplace=True)plt.hist(Titanic.Age, bins=20, color='steelblue', edgecolor='black')plt.xlabel('年龄')plt.ylabel('频数')plt.title('乘客年龄分布')plt.show() Titanic.Age.plot(kind='hist', bins=20, color='steelblue', edgecolor='black', density=True, label='直方图')Titanic.Age.plot(kind='kde', color='red', label='核密度图')plt.xlabel('年龄')plt.ylabel('核密度值')plt.title('乘客年龄分布')plt.legend()plt.show() Age_Male = Titanic.Age[Titanic.Sex == 'male']Age_Female = Titanic.Age[Titanic.Sex == 'female']sns.distplot(Age_Male, bins=20, kde=False, hist_kws={'color': 'steelblue'}, label='男性')sns.distplot(Age_Female, bins=20, kde=False, hist_kws={'color': 'purple'}, label='女性')plt.title('男女乘客的年龄直方图')plt.legend()plt.show()sns.distplot(Age_Male, hist=False, kde_kws={'color': 'red', 'linestyle': '-'}, norm_hist=True, label='男性')sns.distplot(Age_Female, hist=False, kde_kws={'color': 'black', 'linestyle': '--'}, norm_hist=True, label='女性')plt.title('男女乘客的年龄核密度图')plt.legend()plt.show() Sec_Buildings = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\sec_buildings.xlsx')plt.boxplot(x=Sec_Buildings.price_unit, patch_artist=True, showmeans=True, boxprops={'color': 'black', 'facecolor': 'steelblue'}, flierprops={'marker': 'o', 'markerfacecolor': 'red', 'markersize': 3}, meanprops={'marker': 'D', 'markerfacecolor': 'indianred', 'markersize': 4}, medianprops={'linestyle': '--', 'color': 'orange'})plt.title('二手房单价分布的箱线图')plt.show() group_region = Sec_Buildings.groupby('region').agg({'price_unit': np.mean}).sort_values('price_unit', ascending=False)region_price = []for region in group_region.index: region_price.append(Sec_Buildings.price_unit[Sec_Buildings.region == region])plt.boxplot(x=region_price, labels=group_region.index, patch_artist=True, boxprops={'color': 'black', 'facecolor': 'steelblue'}, flierprops={'marker': 'o', 'markerfacecolor': 'red', 'markersize': 3}, meanprops={'marker': 'D', 'markerfacecolor': 'indianred', 'markersize': 4}, medianprops={'linestyle': '--', 'color': 'orange'})plt.ylabel('单价(元)')plt.title('不同行政区域的二手房单价对比')plt.show() tips = pd.read_csv(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\tips.csv')sns.violinplot(x='total_bill', y='day', hue='sex', data=tips, order=['Thur', 'Fri', 'Sat', 'Sun'], scale='count', split=True, palette='RdBu')plt.title('每天不同性别客户的消费额情况')plt.legend(loc='upper center', ncol=2)plt.show() wechat = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\wechat.xlsx')plt.plot(wechat.Date, wechat.Counts, linestyle='-', linewidth=2, color='steelblue', marker='o', markersize=6, markeredgecolor='black', markerfacecolor='brown')plt.ylabel('人数')plt.title('每天微信文章阅读人数趋势')plt.show() import matplotlib as mplplt.plot(wechat.Date, wechat.Counts, linestyle='-', color='steelblue', label='阅读人数')plt.plot(wechat.Date, wechat.Times, linestyle='--', color='indianred', label='阅读人次')ax = plt.gca()date_format = mpl.dates.DateFormatter("%m-%d")ax.xaxis.set_major_formatter(date_format)xlocator = mpl.ticker.LinearLocator(10)ax.xaxis.set_major_locator(xlocator)plt.xticks(rotation=45)plt.ylabel('人数')plt.title('每天微信文章阅读人数与人次趋势')plt.legend()plt.show()weather = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\weather.xlsx')data = weather.pivot_table(index='month', columns='year', values='high')plt.plot(data.index, data.values, style=['-', '--', ':'])plt.xlabel('月份')plt.ylabel('气温')plt.title('每月平均最高气温波动趋势')plt.show() iris = pd.read_csv(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\iris.csv')plt.scatter(iris.Petal_Width, iris.Petal_Length, color='steelblue')plt.xlabel('花瓣宽度')plt.ylabel('花瓣长度')plt.title('鸢尾花的花瓣宽度与长度关系')plt.show()iris.plot(x='Petal_Width', y='Petal_Length', kind='scatter', title='鸢尾花的花瓣宽度与长度关系')plt.xlabel('花瓣宽度')plt.ylabel('花瓣长度')plt.show() sns.lmplot(x='Petal_Width', y='Petal_Length', hue='Species', data=iris, legend_out=False, truncate=True)plt.xlabel('花瓣宽度')plt.ylabel('花瓣长度')plt.title('鸢尾花的花瓣宽度与长度关系')plt.show() Prod_Category = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\SuperMarket.xlsx')range_diff = Prod_Category.Profit_Ratio.max() - Prod_Category.Profit_Ratio.min()Prod_Category['std_ratio'] = (Prod_Category.Profit_Ratio - Prod_Category.Profit_Ratio.min()) / range_diff + 0.001plt.scatter(Prod_Category.Sales[Prod_Category.Category == '办公用品'], Prod_Category.Profit[Prod_Category.Category == '办公用品'], s=Prod_Category.std_ratio[Prod_Category.Category == '办公用品'] * 1000, color='steelblue', label='办公用品', alpha=0.6)plt.scatter(Prod_Category.Sales[Prod_Category.Category == '技术产品'], Prod_Category.Profit[Prod_Category.Category == '技术产品'], s=Prod_Category.std_ratio[Prod_Category.Category == '技术产品'] * 1000, color='indianred', label='技术产品', alpha=0.6)plt.scatter(Prod_Category.Sales[Prod_Category.Category == '家具产品'], Prod_Category.Profit[Prod_Category.Category == '家具产品'], s=Prod_Category.std_ratio[Prod_Category.Category == '家具产品'] * 1000, color='black', label='家具产品', alpha=0.6)plt.xlabel('销售额')plt.ylabel('利润')plt.title('销售额、利润及利润率的气泡图')plt.legend()plt.show() Sales = pd.read_excel(r'E:\PyCharmProject\venv\StataAnalysis_Files\从零开始学Python--数据分析与挖掘\第6章 Python数据可视化\Sales.xlsx')Sales['year'] = Sales.Date.dt.yearSales['month'] = Sales.Date.dt.monthSummary = Sales.pivot_table(index='month', columns='year', values='Sales', aggfunc=np.sum)sns.heatmap(data=Summary, cmap='PuBuGn', linewidths=.1, annot=True, fmt='.1e')plt.title('每年各月份销售总额热力图')plt.show() import jiebatext = open(r'text.txt', encoding='utf-8').read()def split_words(text): cut_text = jieba.cut(text) string = ','.join(cut_text) stop_words = ['我们', '你们',] word_cloud = wordcloud.WordCloud( font_path=r'.\simhei.ttf', background_color='white', width=500, height=350, max_font_size=100, min_font_size=10, stopwords=stop_words, scale=15, ) word_cloud.generate(string) word_cloud.to_file(r'词云分词.png')split_words(text=text)
以上为多种数据可视化方法的实例展示,每种方法都配有完整的代码和图表说明,适用于不同数据分析场景。
转载地址:http://mhwu.baihongyu.com/