如何从python中大括号之间的文件中仅提取特定的结构集

时间:2018-08-18 06:18:31

标签: python python-2.7

a。有一种情况,在我的函数中读取一个包含c结构列表的文件,如下所示,读取该文件并为每个结构提取{}括号之间的所有信息,并将它们存储在数组中。

base_link='http://kteq.in/'
hrefs = ['index']
for link in hrefs:
    url = base_link+link
    html_page = urllib.request.urlopen(url)
    soup = BeautifulSoup(html_page,'html.parser')
    css_links = []
    for link in soup.findAll('link'):
        css_links.append(re.search(r"[A-Za-z0-9:/.-]+.css",link.get('href')))

for i in css_links:
    if i==None:
        continue
   print(i[0])

b。该函数读取两个参数,一个是包含结构的文件,第二个参数仅包含结构名称列表,仅需要从中提取行。要提取{}之间的所有信息,请尝试下面提供的一些示例。

package whatstruct;

  typedef struct packed {
    int [4:0] version;
    char party;
    float parity; 
    char ccc; 
    int [7:0] spend;
  } party_s;

  typedef struct packed {
    int [5:0] cod3;
    int [1:0] group;
    int [51:0] duty;
    char calloff; 
    char selfi;
    int [11:0] snap; 
    int [5:0] longtrip;
    int [1:0] whattodo;
    int [9:0] sky;
    int [7:0] yahoo;
    int [5:0] hurrey;
    int [3:0] appology;
    int [1:0] temp;
    int [2:0] iddd;  
    float dontknow; 
  } dict_s;

  typedef struct packed {
    int [4:0] jan;
    int [12:0] feb;
    int [1:0] mar;
    logic mar; 
    logic april; 
  } months_c;

  typedef struct packed {
    var apple;
    int mango;
    float banana;
    int lichi
  } fruits_s; 
endpackage: whatstruct

c。如果第二个参数提供为“ all”,则能够提取{}之间的所有信息并转储该信息。但是,如果传递的第二个参数是包含结构名称的字符串,则仅需要提取那些结构。我不清楚该怎么做。是否可以使用regexp?还是其他更好的方法?

任何建议都非常有帮助!

更新的代码:

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip


readstructfile = None
filename = None
structnames = None


def readfileanddump(filename, structnames):
    readstructfile = open(filename, "r+")

    test_str = readstructfile.readlines() 

    matches = re.finditer(r"\{(.*?)\}", str(test_str), re.MULTILINE | re.DOTALL)

    for matchNum, match in enumerate(matches):
        for groupNum in range(0, len(match.groups())):
            print (match.group(1))

    # for lines in readstructfile:
    #     print lines

readfileanddump("structpkg.c", all);

1 个答案:

答案 0 :(得分:-1)

按照我的要求进行操作

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip

#import xlwt
#import xlrd 

readstructfile = None
filename = None
structnames = []
filename_and_structnames_l = [] 
global found_struct
global found_struct_idx
global temp_struct
global final_struct

found_struct_idx = {} 
found_struct = 0
temp_struct = []
final_struct = [] 

def readfileanddump(filename_and_structnames):
    global found_struct
    global temp_struct
    global final_struct
    filename_and_structnames_l = filename_and_structnames.split(",") 

    if len(filename_and_structnames_l) < 2:
        filename = filename_and_structnames_l[0]
        structnames.append('all')
        print "1. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 1 and len(filename_and_structnames_l) < 3:
        filename = filename_and_structnames_l[0]
        structnames.append(filename_and_structnames_l[1])
        print "2. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 2: 
        filename = filename_and_structnames_l[0]
        for i in range (1, len(filename_and_structnames_l)):
            structnames.append(filename_and_structnames_l[i])
        print "3. Value of filename %s and structnames %s"%(filename, str(structnames))


    if (len(structnames) == 1) and (structnames[0] == 'all'):
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                print "Value of temp_struct",temp_struct
                final_struct = temp_struct
                print "Value of final_struct", final_struct
            elif found_struct == 1:
                print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                print "Value of lines b4",lines
                print "Value of lines a8",lines.strip()
                #print(re.sub(r"(?:[;]|\s{2,})",r'',lines)[2:])
                temp_struct.append(lines)
    else:
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
                temp_struct = None; temp_struct = []
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                reached_struct = re.sub(r'.*}.|;',r'',lines)
                for i in range (len(structnames)):
                    print "Value of structnames[%d] %s and lines is %s"%(i, structnames[i], reached_struct)
                    if str(structnames[i]).strip() == str(reached_struct).strip():
                        for i in range (len(temp_struct)):
                            final_struct.append(temp_struct[i])
                        print "Value of temp_struct",temp_struct
                        print "Value of final_struct",final_struct
            elif found_struct == 1:
                # print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                # print "Value of lines b4 strip",lines
                # print "Value of lines a8 strip",lines.strip()
                temp_struct.append(lines)