我有一个字符串列表,我必须删除所有特殊字符(,-'“。)。
我的代码是
import glob
import re
files = []
for text in glob.glob("*.txt.txt"):
with open(text) as f:
fileRead = [ line.lower() for line in f]
files.append(fileRead)
files1 = []
for item in files :
files1.append(''.join(item))
我使用了很多选项,包括“替换”,“条带”和“重新”。
当我使用strip(如下所示)时,代码会运行,但是输出中看不到任何变化。
files1 = [line.strip("'") for line in files1]
当我使用re时,出现TypeError:预期的字符串或类似字节的对象。 我从列表列表更改为字符串列表,以便可以使用re。这种方法已经说了很多遍了,但是并没有为我解决问题。
files1 = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", files1)
我无法使用替换,因为它引发了属性错误,即替换不能在列表上使用。
请建议我如何摆脱所有特殊字符。
答案 0 :(得分:2)
您应该将re.sub函数应用于单个对象,而不是列表。
files_cleaned = [re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", file) for file in files]
如果您只想接受字母数字字符,则可以改为:
files_cleaned = [re.sub(r"[^a-zA-Z0-9]", "", file) for file in files]
答案 1 :(得分:0)
尝试以下示例:
import sys
import re
from selenium import webdriver
import pyautogui
from PyQt5.QtWidgets import (QApplication, QWidget, QInputDialog, QLineEdit,
QLabel, QVBoxLayout, QPushButton)
driver = webdriver.Chrome()
class App(QWidget):
def __init__(self):
super().__init__()
self.title = 'IP / Domain'
self.left = 50
self.top = 50
self.width = 640
self.height = 480
self.initUI()
def initUI(self):
self.setWindowTitle(self.title)
self.setGeometry(self.left, self.top, self.width, self.height)
self.label = QLabel()
self.label.setStyleSheet("color: green; font: 16px;")
layout = QVBoxLayout()
layout.addWidget(self.label)
layout.addWidget(QPushButton("Enter IP-address", clicked=self.getText))
self.setLayout(layout)
self.show()
def getText(self):
userInput, okPressed = QInputDialog.getText( self,"Input IP-address", "Your IP-address:",QLineEdit.Normal, "")
if okPressed:
self.ipFormatChk(userInput) #Pass the userInput variable into the ipFormatChk function
if userInput.strip():
self.ipFormatChk(userInput)
def ipFormatChk(self, userInput):
pattern = r"\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\." \
r"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"
advBasicOptions = ("Basic", "Advanced")
self.selection, okPressed = QInputDialog.getItem(self, "Select Basic or Advanced", "", advBasicOptions, 0,
False)
if re.match(pattern, userInput) and self.selection == advBasicOptions[0]:
additionalText = "This is IP-address"
self.label.setStyleSheet("color: lightgreen; font: 24px;")
print('IP Basic')
driver.get(
'https://www.talosintelligence.com')
pyautogui.press('enter')
driver.switch_to.window(self)
pyautogui.hotkey('ctrl', 't')
driver.switch_to.window(driver.window_handles[1])
pyautogui.typewrite('https://www.virustotal.com/')
pyautogui.press('enter')
pyautogui.hotkey('ctrl', 't')
driver.switch_to.window(driver.window_handles[2])
pyautogui.typewrite('http://www.ipvoid.com/ip-blacklist-check') # Blacklist
pyautogui.press('enter')
pyautogui.hotkey('ctrl', 't')
driver.switch_to.window(driver.window_handles[3])
pyautogui.typewrite('https://www.shodan.io')
pyautogui.press('enter')
driver.switch_to.window(driver.window_handles[0])
return
if re.match(pattern, userInput) and self.selection == advBasicOptions[0]:
print('advanced')
else:
print("something")
if __name__ == '__main__':
app = QApplication(sys.argv)
ex = App()
sys.exit(app.exec_())
输出:
files = ["Hello%","&*hhf","ddh","GTD@JJ"] #input data in list
# going through each element of list
# apllying a filter on each character of string for alphabet or numeric other then special symbol
# joining the charactors back again and putting them in list
result = ["".join(list(filter(str.isalnum, line))) for line in files]
print(result) #print the result
答案 2 :(得分:0)
您可以使用str.isalnum
将返回True。