Question

我在Python中有一个代码来预处理一些文本并将其写入文件。它删除了主题标签，用户名，符号和链接，停用词，也获得了词根

import tweepy
import time
import os
import sys
import json
import argparse
import re
from collections import defaultdict
import glob
from nltk.stem.snowball import SnowballStemmer

text = "shit.txt"

def process_text(text=text):
    text=re.sub('\\B@[a-zA-Z0-9_]*\\b','',text)
    text=re.sub('\\B#[a-zA-Z0-9_]*\\b','',text)
    text=re.sub('\\B$[a-zA-Z0-9_]*\\b','',text)
    text=re.sub('\\bRT\\b','',text)
    text = text.lower()
    text = re.sub("(https?://[^ ]+)",'',text)
    if text:
        a1 = [line.split("-")[0] for line in file("ListOfShortWords.txt")]
        a2 = [re.sub("\n",'',line.split("-")[1]).encode("utf-8")for line in file("ListOfShortWords.txt")]
        HashList = defaultdict(lambda:"nil")
        for c in range(0,len(a1)):
            HashList[a1[c]] = a2[c]
        text = re.sub(r'([aeiou])\1{2,}', r'\1', text)
        text = re.sub(r'([^aeiou])\1{2,}', r'\1\1',text)
        text = re.sub(r'(.)\1{2,}\\b', r'\1', text)
        for key in HashList.keys():
            text = re.sub("\\b"+str(key)+"\\b",str(HashList[key]),text)
        for stopword in    ['about','above','after','ain\'t','aint','again','against','all','am','an','and','any','are','as','at','be','because','been','before','being','below','between','both','but','by','could','did','do','does','doing','down','during','each','few','for','from','further','had','has','have','having','he','he\'d','he\'ll','he\'s''here''here\'s''hers''herself''him''himself','her','his','how','how\'s','i','i\'d','i\'ll','i\'m','i\'ve','ive','if','in','into','is','it','it\'s','its','itself','let\'s','lets','me','more','most','my','myself','no','nor','not','of','off','on','once','only','or','other','ought','our','ours','ourselves','out','over','own','same','she','she\'d','she\'ll','she\'s','shes','should','so','some','such','than','that','that\'s','thats','the','their','theirs','them','themselves','then','there','there\'s','theres','these','they','they\'d','theyd','they\'ll','they\'re','they\'ve','theyll','theyre','theyve','this','those','through','to','too','under','until','up','very','was','we','we\'d','we\'ll','we\'re','we\'ve','were','what','what\'s','whats','when','when\'s','whens','where','where\'s','wheres','which','while','who','who\'s','whos','whom','why','why\'s','whys','with','won\'t','wont','would','you','you\'d','youd','you\'ll','youll','you\'re','you\'ve','youre','youve','your','yours','yourself','yourselves','\'tis','\'twas','tis','twas']:
            text = re.sub("\\b"+stopword+"\\b",'',text)
        for ch in ['&','$',',','.','/',':',';','"','{','[','}',']','|','\\','+','=','-','_',')','(','*','^','%','!','~','`','?']:
            text = text.replace(ch,' ')
        text = re.sub("\\b[0-9]*\\b",'',text)
        text = text.replace('\'','')
        text = re.sub('\\b[a-z]\\b','',text)
        text = re.sub(r'[^\x00-\x7F]+',' ',text)
        text = ' '.join(text.split())
    return text

for pp in ['pos','neg','neu','irr']:
    a = 1
    for fil in glob.glob("Senti/"+str(pp)+"/*.txt"):
        for line in file(fil):
            t = process_text(text=line)
            realline=''
            for word in t.split():
                realline = realline+" "+str(SnowballStemmer("english").stem(word)
            with open("Processed Senti/"+str(pp)+"/"+str(a)+".txt", 'w') as outf:
                outf.write(realline)
            a=a+1

我收到错误说

with open("Processed Senti/"+str(pp)+"/"+str(a)+".txt", 'w') as outf:
   ^
SyntaxError: invalid syntax

代码有什么问题？存在所有必需的文件夹和文件

Answer 1

上一行中有a）缺失... str（）函数未正确关闭。

Python中的WIth循环错误

1 个答案: