Python - 数据显示不正确(Matplotlib)

时间:2017-01-16 06:16:49

标签: python numpy matplotlib

我是python的新手,我有一个问题。我正在从CSV文件加载数据,删除重复,保存删除的重复csv文件,然后加载正确的CSV文件并生成图形。然而,我的问题是,图表没有正确显示,因为总数是错误的。我知道程序运行正常,因为如果我删除第1部分(请参阅下面的#section1),我会显示正确的数据。我看不出第1节中的内容会扭曲数据......任何帮助都会非常感激。感谢。

摘要:无法在同一个py文件中运行第1节和第2节,否则数据会被错误地计算。想知道为什么?如何在不运行单独的py文件的情况下避免它。

from collections import Counter
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import csv
import itertools

第1节

 # Create database of duplicates - check if the mac and os pairs have     duplicates
reader = csv.reader(open('Workbook1.csv', 'r'), delimiter=',')
writer = csv.writer(open('remacos.csv', 'w'), delimiter=',')
entries = set()

for row in reader:
key = (row[1], row[2])

if key not in entries:
    writer.writerow(row)
    entries.add(key)

entries.clear()
# Create database of duplicates - check if the mac and browser pairs     have duplicates
reader = csv.reader(open('Workbook1.csv', 'r'), delimiter=',')    
writer = csv.writer(open('remacbrowser.csv', 'w'), delimiter=',')
entries = set()

for row in reader:
key = (row[1], row[3])

if key not in entries:
    writer.writerow(row)
    entries.add(key)

第2节

# Read Removed Duplicated entries Database and Count Values for OS.
df = pd.read_csv('remacos.csv', index_col="mac")         
counteros = Counter(df['os'])
os_names = counteros.keys()
os_counts = counteros.values()

# Read Removed Duplicated entries Database and Count Values for     Browsers.
df = pd.read_csv('remacbrowser.csv', index_col="mac")         
counterbrowsers = Counter(df['browser'])
browser_names = counterbrowsers.keys()
browser_counts = counterbrowsers.values()

创建2个条形图和饼图

# Plot histogram using matplotlib bar() for OS.
indexes = np.arange(len(os_names))
width = 0.7
plt.bar(indexes, os_counts, width)
plt.xticks(indexes + width * 0.5, os_names)
plt.show()

# Plot histogram using matplotlib bar() for Browsers.
indexes = np.arange(len(browser_names))
width = 0.7
plt.bar(indexes, browser_counts, width)
plt.xticks(indexes + width * 0.5, browser_names)
plt.show()

# Make Pie Chart for OS's
plt.figure()
values = os_counts
labels = os_names
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
return my_autopct
plt.pie(values, labels=labels, autopct=make_autopct(values))
#plt.pie(values, labels=labels) #autopct??
plt.show()
# Make Pie Chart for Browsers
plt.figure()
values = browser_counts
labels = browser_names
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
return my_autopct
plt.pie(values, labels=labels, autopct=make_autopct(values))
#plt.pie(values, labels=labels) #autopct??
plt.show()'

1 个答案:

答案 0 :(得分:-1)

解决方案是在每个创建重复数据库

之后添加'del writer'