我有一个词典列表
Sub selectSharepoint()
Dim objXMLHTTP As MSXML2.XMLHTTP
Dim strBatchXml As String
Dim strSoapBody As String
UserName = "john@email.com"
Password = "123456"
Set objXMLHTTP = New MSXML2.XMLHTTP
objXMLHTTP.Open "POST", "https://ts.sharepoint.com/sites/2018_teste/_vti_bin/lists.asmx", False, UserName, Password
'objXMLHTTP.Open "POST", "https://ts.sharepoint.com/sites/2018_teste/_vti_bin/lists.asmx", False 'This way worked the same, and got the same error.
objXMLHTTP.setRequestHeader "Content-Type", "text/xml; charset=UTF-8"
objXMLHTTP.setRequestHeader "Content-Length", "length"
'objXMLHTTP.setRequestHeader "Authorization", "Basic " + Base64Encode(UserName & ":" & Password) 'makes no effect, with or without Base64Encode
objXMLHTTP.setRequestHeader "SOAPAction", "http://schemas.microsoft.com/sharepoint/soap/GetListItems"
strSoapBody = "<?xml version='1.0' encoding='utf-8'?>" & _
" <soap:Envelope xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:soap='http://schemas.xmlsoap.org/soap/envelope/'>" & _
" <soap:Body>" & _
" <GetListItems xmlns='http://schemas.microsoft.com/sharepoint/soap/'>" & _
" <listName>AllNames</listName>" & _
" <query>" & _
" <Query xmlns=''>" & _
" <Where>" & _
" <Eq>" & _
" <FieldRef Name='Name' />" & _
" <Value Type='Text'>Cloe</Value>" & _
" </Eq>" & _
" </Where>" & _
" <OrderBy>" & _
" <FieldRef Name='ID' />" & _
" </OrderBy>" & _
" </Query>" & _
" </query>"
strSoapBody = strSoapBody & _
" <rowLimit>1</rowLimit>" & _
" <viewFields><ViewFields>" & _
" <FieldRef xsi:type='s:string' Name='Name'></FieldRef>" & _
" </ViewFields></viewFields>" & _
" </GetListItems>" & _
" </soap:Body>" & _
" </soap:Envelope>"
objXMLHTTP.send strSoapBody
If objXMLHTTP.Status <> 200 Then
MsgBox ("Error! " & Chr(10) & objXMLHTTP.responseText)
End
Else
tudo = objXMLHTTP.responseText
End If
End Sub
并且我无法摆脱字符和非英语单词.twitter scrape的目标是创建一个简单的单词频率计数。
有没有最好的方法来创建循环并删除所有非英语单词/字符?
这段代码我用来创建wordcount词典
Amazon 120
b 19
maji_opai 1
am\xcd\x9ca\xcd\x89zon\xe2\x80\xa6 1
\xcb\x99\xea\x92\xb3\xe2\x80\x8b\xcb\x99 1
b'RT 46
WorkingGIrl 1
For 1
people 1
love 1
REAL 1
paperback 1
THE 3
PARIS 1
EFFECT 1
10 1
right 1
答案 0 :(得分:0)
如果通过&#34; english&#34;你的意思是"ascii",然后使用string.printable
这是一组所有ASCII可打印字符
import string
ascii_chars = set(string.printable) # speeds things up
def remove_non_ascii_prinatble_from_list(list_of_words):
return [word for word in list_of_words
if all(char in ascii_chars for char in word)]
或在字典上
def remove_non_ascii_prinatble_keys_from_dict(dict_of_words):
return {key: value for key, value in dict_of_words.items()
if all(char in ascii_chars for char in key)}