我正在尝试使用Pytest对涉及CSV文件的函数进行单元测试。当我的函数正常工作时,我感觉在项目目录中创建“示例” csv文件以测试该函数时,会有很多代码重复。保存实际数据的实际csv文件具有数百万条记录。
这些不是我必须在模块中测试的 csv文件,因此了解什么是测试适用于不同文件结构的函数的最佳方法将大有帮助。
现在,我正在创建一个非常短的csv文件,该文件以单行数据加上通过函数处理文件后的预期数据帧输出来模拟实际文件架构。
也许嘲笑是路要走?但是我觉得您不需要为这种测试而嘲笑
@pytest.mark.parametrize('test_file, expected', [
(r'Path\To\Project\Output\Folder\mock_sales1.csv',
pd.DataFrame([['A0A0A0', 1, 4000]], columns=['Postal_Code', 'Store_Num', 'Sales'])),
(r'Path\To\Project\Output\Folder\mock_sales2.csv',
pd.DataFrame([['A0A0A0', 1, 4000]], columns=['Postal_Code', 'Store_Num', 'Sales']))
])
def test_sales_dataframe(test_file, expected):
# This part is repetitive, different tests each need a seperate file written within the test function.
# Writing sample file to test that files with 7 columns are read correctly.
mock_mks_sales1 = [['Data0', 'A0A0A0', 1, 'Data3', 'Data4', 'Data5', 4000]]
with open(r'Path\To\Project\Output\Folder\mock_sales1.csv', 'w') as file:
writer = csv.writer(file)
writer.writerows(mock_sales1)
# Writing sample file to test that files with 8 columns are read correctly.
mock_mks_sales2 = [['Data0', 'A0A0A0', 1, 'Data3', 'Data4', 'Data5', 'Data6', 4000]]
with open(r'Path\To\Project\Output\Folder\mock_sales2.csv', 'w') as file:
writer = csv.writer(file)
writer.writerows(mock_sales2)
sales_df = mks_sales_dataframe(test_file)
testing.assert_frame_equal(expected, sales_df)
os.remove(r'Path\To\Project\Output\Folder\mock_sales1.csv')
os.remove(r'Path\To\Project\Output\Folder\mock_sales2.csv')
def sales_dataframe(file):
try:
with open(file, 'r') as f:
reader = csv.reader(f)
num_cols = len(next(reader))
columns = [1, 2, (num_cols - 1)] # Number of columns is variable, this is used later to accurately specify which columns should be read. This is part I'm testing!
sales_df = pd.read_csv(file, usecols=columns, names=['Postal_Code', 'Store_Num', 'Sales'])
return sales_df
except FileNotFoundError:
raise FileNotFoundError(file)
测试按预期通过。但是,对于每个不同的测试,我都必须在测试函数中创建一个示例csv文件,并在测试完成后删除每个文件。可以想象,单个测试函数中有很多重复的代码,感觉很笨拙,特别是在对测试进行参数化时。
答案 0 :(得分:1)
我认为问题在于您的测试输入和预期输出是紧密关联的,但是位于两个不同的位置,一个位于参数中,另一个位于测试代码中。
如果更改一个参数,则除了重复的代码外,还需要更改测试方法的主体(不是正确的imo)。
我认为您应该具有参数test(test_data, expected output)
并将输入注入到临时文件中。
然后调用函数并比较预期输出和实际输出。
@pytest.mark.parametrize('test_data, expected', [
([['Data0', 'A0A0A0', 1, 'Data3', 'Data4', 'Data5', 4000]],
pd.DataFrame([['A0A0A0', 1, 4000]], columns=['Postal_Code', 'Store_Num', 'Sales'])),
([['Data0', 'A0A0A0', 1, 'Data3', 'Data4', 'Data5', 'Data6', 4000]],
pd.DataFrame([['A0A0A0', 1, 4000]], columns=['Postal_Code', 'Store_Num', 'Sales']))
])
def test_sales_dataframe(test_data, expected):
# Write your test data in a temporary file
tmp_file = r'Path\To\Project\Output\Folder\tmp.csv';
with open(tmp_file, 'w') as file:
writer = csv.writer(file)
writer.writerows(test_data)
# Process the data
sales_df = mks_sales_dataframe(tmp_file)
# Compare expected and actual output
testing.assert_frame_equal(expected, sales_df)
# Clean the temporary file
os.remove(tmp_file)
您还可以创建.csv并将其添加为测试资源,但是输入和预期输出的位置将有所不同。
答案 1 :(得分:1)
减少重复的一种方法是对 setUp 使用 tearDown 和 TestCase 方法
import os
import csv
import unittest
test_file = 'test.csv'
rows = [
['0a', '0b', '0c'],
['1a', '1b', '1c'],
]
class TestCsv(unittest.TestCase):
def setUp(self):
with open(test_file, 'w', newline='') as csv_file:
writer = csv.writer(csv_file, dialect='excel')
writer.writerows(rows)
def tearDown(self):
os.remove(test_file)
def test_read_line(self):
with open(test_file, 'r') as csv_file:
reader = csv.reader(csv_file, dialect='excel')
self.assertEqual(next(reader), rows[0])
self.assertEqual(next(reader), rows[1])
if __name__ == "__main__":
unittest.main()