从python的字符串列表中删除或删除所有特殊字符

时间:2018-09-11 16:04:27

标签: python

我有一个字符串列表,我必须删除所有特殊字符(,-'“。)。

我的代码是

import glob
import re

files = []
for text in glob.glob("*.txt.txt"):
 with open(text) as f:
    fileRead = [ line.lower() for line in f]
 files.append(fileRead)

files1 = []

for item in files :
 files1.append(''.join(item))

我使用了很多选项,包括“替换”,“条带”和“重新”。

当我使用strip(如下所示)时,代码会运行,但是输出中看不到任何变化。

files1 = [line.strip("'") for line in files1]

当我使用re时,出现TypeError:预期的字符串或类似字节的对象。 我从列表列表更改为字符串列表,以便可以使用re。这种方法已经说了很多遍了,但是并没有为我解决问题。

files1 = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", files1)

我无法使用替换,因为它引发了属性错误,即替换不能在列表上使用。

请建议我如何摆脱所有特殊字符。

3 个答案:

答案 0 :(得分:2)

您应该将re.sub函数应用于单个对象,而不是列表。

files_cleaned = [re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", file) for file in files]

如果您只想接受字母数字字符,则可以改为:

files_cleaned = [re.sub(r"[^a-zA-Z0-9]", "", file) for file in files]

答案 1 :(得分:0)

尝试以下示例:

import sys
import re
from selenium import webdriver
import pyautogui


from PyQt5.QtWidgets import (QApplication, QWidget, QInputDialog, QLineEdit,
                             QLabel, QVBoxLayout, QPushButton)

driver = webdriver.Chrome()


class App(QWidget):

    def __init__(self):
        super().__init__()
        self.title  = 'IP / Domain'
        self.left   = 50
        self.top    = 50
        self.width  = 640
        self.height = 480

        self.initUI()

    def initUI(self):
        self.setWindowTitle(self.title)
        self.setGeometry(self.left, self.top, self.width, self.height)

        self.label = QLabel()
        self.label.setStyleSheet("color: green; font: 16px;")

        layout = QVBoxLayout()
        layout.addWidget(self.label)
        layout.addWidget(QPushButton("Enter IP-address", clicked=self.getText))
        self.setLayout(layout)
        self.show()

    def getText(self):
        userInput, okPressed = QInputDialog.getText( self,"Input IP-address", "Your IP-address:",QLineEdit.Normal, "")
        if okPressed:
            self.ipFormatChk(userInput)     #Pass the userInput variable into the ipFormatChk function

            if userInput.strip():
                self.ipFormatChk(userInput)


    def ipFormatChk(self, userInput):

        pattern = r"\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\." \
                  r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"

        advBasicOptions = ("Basic", "Advanced")
        self.selection, okPressed = QInputDialog.getItem(self, "Select Basic or Advanced", "", advBasicOptions, 0,
                                                         False)


        if re.match(pattern, userInput) and self.selection == advBasicOptions[0]:
            additionalText = "This is IP-address"
            self.label.setStyleSheet("color: lightgreen; font: 24px;")

            print('IP Basic')
            driver.get(
                'https://www.talosintelligence.com')  
            pyautogui.press('enter')

            driver.switch_to.window(self)

            pyautogui.hotkey('ctrl', 't')
            driver.switch_to.window(driver.window_handles[1])
            pyautogui.typewrite('https://www.virustotal.com/')  
            pyautogui.press('enter')

            pyautogui.hotkey('ctrl', 't')
            driver.switch_to.window(driver.window_handles[2])
            pyautogui.typewrite('http://www.ipvoid.com/ip-blacklist-check')  # Blacklist
            pyautogui.press('enter')


            pyautogui.hotkey('ctrl', 't')
            driver.switch_to.window(driver.window_handles[3])
            pyautogui.typewrite('https://www.shodan.io')  
            pyautogui.press('enter')

            driver.switch_to.window(driver.window_handles[0])

            return


            if re.match(pattern, userInput) and self.selection == advBasicOptions[0]:
                print('advanced')

            else:
               print("something")


if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex  = App()
    sys.exit(app.exec_())

输出:

files = ["Hello%","&*hhf","ddh","GTD@JJ"]    #input data in list

# going through each element of list
# apllying a filter on each character of string for alphabet or numeric other then special symbol
# joining the charactors back again and putting them in list
result = ["".join(list(filter(str.isalnum, line))) for line in files]

print(result)    #print the result

答案 2 :(得分:0)

您可以使用str.isalnum

如果str中的所有字符都是字母数字,则

将返回True。