如何使用python 3.7在CSV文件中进行读写?

时间:2019-08-25 02:57:15

标签: python-3.7 import-csv

我正在预处理我的文本数据。数据在CSV文件(输入文件)中。现在,我想打开我的数据,读取它,然后将结果保存到另一个CSV文件(输出文件)中。 我尝试了不同的方法,并且也查看了Internet和StackOverflow,但是没有一个答案可以解决我的问题。

import re, string, unicodedata
import nltk
import csv
import inflect
from nltk.stem import LancasterStemmer,  WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
ps = PorterStemmer()
lem = WordNetLemmatizer

with open('file.csv', 'r') as csv_file, open('new_file.csv', 'w') as out_file:
    reader = csv.reader(csv_file)
    writer = csv.writer(out_file)

text = nltk.word_tokenize(text)


def non_ascii(text):
    new_words = []
    for word in text:
        remove_non_ascii_words = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8','ignore')
        new_words.append(remove_non_ascii_words)
    return new_words

def remove_punct(text):
    new_words =[]
    for word in text:
        new_word = re.sub(r'[^\w\s]', '', word)
        if new_word != '':
            new_words.append(new_word)
    return new_words


def to_lower(text):
    new_words = []
    for word in text:
        new_word = word.lower()
        new_words.append(new_word)
    return new_words


def replace_numbers(text):
    p = inflect.engine()
    new_words = []
    for word in text:
        if word.isdigit():
            new_word = p.number_to_words(word)
            new_words.append(new_word)
        else:
            new_words.append(word)
    return new_words


def remove_stopwords(text):
    new_words = []
    for word in text:
        if word not in stopwords.words('english'):
            new_words.append(word)
    return new_words


def stem_words(text):
    stemmer = LancasterStemmer()
    stems = []
    for word in text:
       stem = stemmer.stem(word)
       stems.append(stem)
    return stems


def lemmatize_verbs(text):
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for word in text:
        lemma = lemmatizer.lemmatize(word, pos='v')
        lemmas.append(lemma)
    return lemmas


def normalize(text):
    text = non_ascii(text)
    text = remove_punct(text)
    text = to_lower(text)
    text = replace_numbers(text)
    text = remove_stopwords(text)
    return text

text = normalize(text)

def stem_lemmatize(text):
    stems = stem_words(text)
    lemmas = lemmatize_verbs(text)
    return stems, lemmas

stems, lemmas = stem_lemmatize(text)
print('Stemmed:\n', stems)
print('\nLemmatized:\n', lemmas)

writer.writerow()
csv_file.close()

我要将结果保存到CSV文件中。

1 个答案:

答案 0 :(得分:0)

取决于您希望如何在输出CSV文件中存储数据,可以考虑使用.write_row()

您可以将信息存储在每一行中,例如

writeObject.write_row(["rowNumber", "day", "dollars"])

我发现了一个有用的website,它也可以帮助您写入CSV文件。