我是python的初学者。我正在向以下脚本编写脚本:
代码:
import csv
import os
import pandas as pd
path = "/Users/<mylocaldir>/Documents/Data/"
file_list = os.listdir(path)
for file in file_list:
fullpath = os.path.join(path, file)
data = pd.read_csv(fullpath)
newdata = data.drop_duplicates()
newfile = fullpath.replace(".csv","_new.csv")
newdata.to_csv ("newfile", index=True, header=True)
运行脚本时,没有显示错误。但是,* _ new.csv未创建
有任何解决此问题的帮助吗?
答案 0 :(得分:0)
我不认识pandas
,但您不需要它。您可以尝试这样的事情:
import os
file_list = os.listdir()
# loop through the list
for filename in file_list:
# don't process any non csv file
if not filename.endswith('.csv'):
continue
# lines will be a temporary holding spot to check
# for duplicates
lines = []
new_file = filename.replace('.csv', '_new.csv')
# open 2 files - csv file and new csv file to write
with open(filename, 'r') as fr, open(new_file, 'w') as fw:
# read line from csv
for line in fr:
# if that line is not in temporary list called lines,
# add it there and write to file
# if that line is found in temporary list called lines,
# don't do anything
if line not in lines:
lines.append(line)
fw.write(line)
print('Done')
结果
原始文件
cat name.csv
id,name
1,john
1,john
2,matt
1,john
新文件
cat name_new.csv
id,name
1,john
2,matt
另一个原始文件
cat pay.csv
id,pay
1,100
2,300
1,100
4,400
4,400
2,300
4,400
这是新文件
id,pay
1,100
2,300
4,400
答案 1 :(得分:0)
更新
以下脚本略作修改即可从Src文件夹读取并写入Dest文件夹:
Traceback (most recent call last):
File "manage.py", line 21, in <module>
main()
File "manage.py", line 17, in main
execute_from_command_line(sys.argv)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/__init__.py", line 401, in execute_from_command_line
utility.execute()
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/__init__.py", line 395, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/base.py", line 328, in run_from_argv
self.execute(*args, **cmd_options)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/base.py", line 369, in execute
output = self.handle(*args, **options)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/commands/check.py", line 64, in handle
fail_level=getattr(checks, options['fail_level']),
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/base.py", line 395, in check
include_deployment_checks=include_deployment_checks,
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/management/base.py", line 382, in _run_checks
return checks.run_checks(**kwargs)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/checks/registry.py", line 72, in run_checks
new_errors = check(app_configs=app_configs)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/checks/urls.py", line 40, in check_url_namespaces_unique
all_namespaces = _load_all_namespaces(resolver)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/core/checks/urls.py", line 57, in _load_all_namespaces
url_patterns = getattr(resolver, 'url_patterns', [])
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/utils/functional.py", line 48, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/urls/resolvers.py", line 588, in url_patterns
patterns = getattr(self.urlconf_module, "urlpatterns", self.urlconf_module)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/utils/functional.py", line 48, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/urls/resolvers.py", line 581, in urlconf_module
return import_module(self.urlconf_name)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/Users/peggy/candles/candles/urls.py", line 29, in <module>
path('review/', include('review.urls')),
File "/Users/peggy/candles/.env/lib/python3.7/site-packages/django/urls/conf.py", line 34, in include
urlconf_module = import_module(urlconf_module)
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/Users/peggy/candles/review/urls.py", line 3, in <module>
from review import views
File "/Users/peggy/candles/review/views.py", line 4, in <module>
from .forms import *
File "/Users/peggy/candles/review/forms.py", line 6, in <module>
class ReviewForm(forms.Form):
File "/Users/peggy/candles/review/forms.py", line 12, in ReviewForm
widget=forms.Select(choices=SCORE_CHOICES),
NameError: name 'SCORE_CHOICES' is not defined
我还添加了一项检查,以查看目标文件夹中是否已经存在文件。
我将热衷于了解是否有更好的方法来编写此脚本。