import re
string = "this is my BНОРAL"
all_caps_string = re.sub('[^A-Z\s]+','',string)
print(all_caps_string)
上述代码的输出为“ BAL”
这个“BНОРAL”是我从google-vision ocr那里得到的单词,它看起来像字母H,但当我检查其ord('Н')时它不是1053,但实际的ord是73,我希望所有大写字母都保留在特定字符串中。是否有任何解决方法?
“编辑” 在对大写字母进行上述提取之后,我想将其与列表中已保存的数据进行匹配,该列表包含“ BHOPAL”,因此如果匹配,则必须将其作为输出。
list_cities = ["BHOPAL","JAIPUR"]
def only_upper(s):
upper_chars = ""
for char in s:
if char.isupper():
upper_chars += char
return upper_chars
cities_extracted = only_upper("this is my 123BНОРAL")
for cities in list_cities:
if cities == cities_extracted:
print(cities)