Question

我的知识非常有限，但我正在努力。

我想在多个字符串变量中拆分一个字符串。

现在，我使用了http服务器脚本，谢谢：https://gist.github.com/bradmontgomery/2219997

我稍微修改了一下，我收到了一个POST字符串解码，我希望将该字符串拆分成不同的字符串或变量。

这是完整的代码：

#!/usr/bin/env python
"""
Very simple HTTP server in python.
Usage::
    ./dummy-web-server.py [<port>]
Send a GET request::
    curl http://localhost
Send a HEAD request::
    curl -I http://localhost
Send a POST request::
    curl -d "foo=bar&bin=baz" http://localhost
"""
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
import SocketServer
import sys
import time
import csv
import urllib
import psycopg2


con = None

class S(BaseHTTPRequestHandler):
    def _set_headers(self):
        self.send_response(200)
        self.send_header('Content-type', 'text/html')
        self.end_headers()

    def do_GET(self):
        self._set_headers()
        self.wfile.write("<html><body><h1>hi!</h1></body></html>")

    def do_HEAD(self):
        self._set_headers()

    def do_POST(self):
    # Doesn't do anything with posted data
        content_length = int(self.headers['Content-Length']) # <--- Gets the size of data
        post_data = self.rfile.read(content_length) # <--- Gets the data itself
        print post_data # <-- Print post data
        self._set_headers()
        dataEncoded = str(post_data)
        dataString = urllib.unquote(post_data)
        timeString = str(time.strftime("%d %m %Y %H:%M:%S"))
        with open("decoded_log.csv",'a') as resultFile:
            wr = csv.writer(resultFile, dialect='excel')
            wr.writerow([dataString,timeString])
        with open("encoded_log.csv",'a') as resultFile:
            wr = csv.writer(resultFile, dialect='excel')
            wr.writerow([dataEncoded,timeString])    
        con = psycopg2.connect("host=localhost dbname=data_log user=USER password=PASSWORD")
        print "DB Connection successful."
        cur = con.cursor()
        cur.execute("INSERT INTO log(data,date_time) VALUES (%s, %s)",(dataString,timeString))
        con.commit()           

def run(server_class=HTTPServer, handler_class=S, port=5400):
    server_address = ('', port)
    httpd = server_class(server_address, handler_class)
    print 'Starting httpd...'
    httpd.serve_forever()

if __name__ == "__main__":
    from sys import argv

    if len(argv) == 2:
        run(port=int(argv[1]))
    else:
        run()

sys.exit()

http://pastebin.com/nkbkD2iZ

然后我收到这个字符串：

％22％22％22％5B001％5D％22％22％3A％22％22C3643％22％22％2C％22％22％7C％5B002％5D％22％22％3A％22％2232303138％ 22％22％2C 22％22％％7C％22％22％3D％22 =

解码的是：

“” “” C3643 “” “” 32303138 “”， “” | “”=“=

所以，我想要的，而我没有这样做，就是提取C3643和32303138，然后我想拆分32303138：

32 303 138

所以一旦完成，我想要这个字符串：

a = C3643 b = 32 c = 303 d = 138

那么，我可以插入PostgreSQL列，每个值。

提前谢谢你！

Answer 1

用正则表达式提取单词，然后像这样切片：

import re

s1 = """\"\"\"\"C3643\"\"\"\"32303138\"\",\"\"|\"\"=\"="""
print s1

s2 = re.findall(r'\w+', s1)
print s2

a = s2[0]
s3 = s2[1]

b,c,d = s3[:2], s3[2:5], s3[5:]
print a
print b
print c
print d

输出：

""""C3643""""32303138"",""|""="=
['C3643', '32303138']
C3643
32
303
138

Answer 2

对于我们这些人，你发现正则表达式难以理解：

$ cat a.txt
mary; 24; female;1993; student
john; 21; male; 1982; student
luke; 22; male; 1988; student

$ cat a.py
import json
arr = []
with open('a.txt', 'r') as f:
    for line in f:
      # split around semicolon and then strip spaces from the ends
      fields = map(lambda s: s.strip(), line.split(';'))
      arr.append({
          "name": fields[0],
          "age": int(fields[1]),
          "gender": fields[2],
          "year": int(fields[3]),
          "occupation": fields[4],
      })
print json.dumps(arr, indent=2)

$ python a.py
[
  {
    "gender": "female",
    "age": 24,
    "occupation": "student",
    "name": "mary",
    "year": 1993
  },
  {
    "gender": "male",
    "age": 21,
    "occupation": "student",
    "name": "john",
    "year": 1982
  },
  {
    "gender": "male",
    "age": 22,
    "occupation": "student",
    "name": "luke",
    "year": 1988
  }
]

将字符串拆分为多个字符串

2 个答案: