正则表达式与可选的后续文本匹配

时间:2016-11-10 18:52:46

标签: python regex regex-negation

我对正则表达式很新,我需要帮助找到正确的正则表达式。

我有一个表格的文本文件:

apple 4
bananas 5
bananas 5 7
apple 3
apple 6
bananas 3
bananas 4 5
apple 3
bananas 9 

我正在寻找一个与每个"bananas.*"后最后一次"apple.*"匹配的正则表达式,请记住,对于每个"apple.*",可能没有"bananas.*" 。正则表达式应与以下内容匹配:

bananas 5 7 
bananas 4 5
bananas 9

提前致谢。如果有帮助的话,我在python中这样做。

2 个答案:

答案 0 :(得分:1)

使用正则表达式实际上可能

//-----------------------------------------------------------------------------
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<assert.h>
#include"Dictionary.h"
#define MAX_LENGTH 200

// private types --------------------------------------------------------------

typedef struct NodeObj{
    char key[MAX_LENGTH];
    char value[MAX_LENGTH];
    struct NodeObj* next;
} NodeObj;

// Node
typedef NodeObj* Node;

// newNode()
// constructor of the Node type
Node newNode(char* k, char* v) {
    Node N = malloc(sizeof(NodeObj));
    assert(N!=NULL);
    strcpy(N->key, k);
    strcpy(N->value, v);
    N->next = NULL;
    return(N);
}

// freeNode()
// destructor for the Node type
void freeNode(Node* pN){

    if( pN!=NULL && *pN!=NULL ){
       free(*pN);
       *pN = NULL;
    }
}


// StackObj
typedef struct DictionaryObj{
   Node head;
   int numItems;
} DictionaryObj;


// public functions -----------------------------------------------------------

// newDictionary()
// constructor of the Dictionary type


Dictionary newDictionary(void){
   Dictionary D = malloc(sizeof(DictionaryObj));
   assert(D!=NULL);
   D->head = NULL;
   D->numItems = 0;
   return(D);
}

void freeDictionary(Dictionary* pD){
    if(isEmpty(*pD)==0) makeEmpty(*pD);
    if( pD!=NULL && *pD!=NULL ){
       free(*pD);
       *pD = NULL;
    }
}

// isEmpty()
// returns 1 (true) if S is empty, 0 (false) otherwise
// pre: none
int isEmpty(Dictionary D){
   if( D==NULL ){
      fprintf(stderr, 
              "Stack Error: calling isEmpty() on NULL Stack reference\n");
      exit(EXIT_FAILURE);
   }
   return(D->numItems==0);
}

// size()
//Returns size of the list
int size(Dictionary D) {
    return D->numItems;
}

//lookup()
//Searches the dicitonary for a key
char* lookup(Dictionary D, char* kee){
   Node N  = D->head;
   for(int i = 0; i < D->numItems; i++){ 

       if(strcmp(N->key, kee) == 0){

           return N->value;
       }
       else {

           N = N->next;

       }
   }
   return NULL;
}
//inserts a new node at the end if none exists
void insert(Dictionary D, char* kee, char* valu){

   if( D->head==NULL ){
      D->head = newNode(kee, valu);
       return;
   }
   if(lookup(D, kee) != NULL) {
       fprintf(stderr, "cannot insert duplicate keys\n");
       return;
   }    
    Node N = D->head;
    while(N->next != NULL) {
        N=N->next;
    }
   N->next = newNode(kee, valu);
   D->numItems++;
}

// deletes a value from the list
void delete(Dictionary D, char* kee){
   if( D==NULL ){
      fprintf(stderr, "Stack Error: calling delete() on NULL Stack reference\n");
      exit(EXIT_FAILURE);
   }
   if( D->numItems==0 ){
      fprintf(stderr, "Stack Error: calling delete() on empty Stack\n");
      exit(EXIT_FAILURE);
   }
   Node N = D->head;
   while(N!= NULL){
       if(strcmp(N->next->key, kee) == 0) {
           N->next = N->next->next;
           return;
       }
       else {
           N = N->next;
       }
   }
}


//erases the dictionary
void makeEmpty(Dictionary D){
   D->head = NULL;
   D->numItems = 0;
}

// printStack()
// prints a text representation of D to the file pointed to by out
// pre: none
void printDictionary(FILE* out, Dictionary D){
   Node N = D->head;
   while(N!=NULL) {
     fprintf(out, "%s %s\n ", N->key, N->value); 
     N=N->next; 
    printf("Test1\n");   
   }
}

int main(int argc, char* argv[]){
   Dictionary A = newDictionary();
   char* k;
   char* v;
   char* word1[] = {"1","2","3","4","5","6","7"};
   char* word2[] = {"a","b","c","d","e","f","g"};
   int i;
   for(i=0; i<7; i++){  //test insert
       printf("loop\n");
      insert(A, word1[i], word2[i]);
   }

   printDictionary(stdout, A); //test printDictionary
    printf("Print"); 

   //insert(A, "5", "z"); //errorThrown:cannot enter duplicate key.

   delete(A, "1");
   delete(A, "3");
   delete(A, "7");

   printDictionary(stdout, A); //show that they were deleted by printing

   for(i=0; i<7; i++){ // test lookup on pairs with keys that do and don't exist
      k = word1[i];
      v = lookup(A, k);
      printf("key=%s %s%s\n", k, (v==NULL?"not found ":"value="), v);
   }

   // delete(A, "0");  //errorThrown:cannot delete what doesnt exist
   // delete(A, "3");  //errorThrown:cannot delete what doesnt exist

   printf("%s\n", (isEmpty(A)?"isEmpty:true":"isEmpty:false"));//test isEmpty,    size, and makeEmpty
   printf("size:%d\n", size(A));
   makeEmpty(A);
   printf("%s\n", (isEmpty(A)?"isEmpty:true":"isEmpty:false"));

   freeDictionary(&A); //test freeDictionary

   return(EXIT_SUCCESS);
}

请参阅a demo on regex101.com,注意不同的修饰符并使用每场比赛的组^apple.+[\n\r] (?:(bananas.*)[\n\r]?)+

<小时/> 作为完整的1代码:

Python

请参阅a demo on ideone.com

答案 1 :(得分:0)

没有什么需要递归的。这是一个可行的模式:

>>> fruit_lit = """apple 4
bananas 5
bananas 5 7
apple 3
apple 6
bananas 3
bananas 4 5
apple 3
bananas 9"""

>>>  re.findall(r'apple\s*\d*\s*\n(?:bananas\s*(?:\d+\s*)+\n)*(bananas(?:\s*\d+)+)\s*', fruit_list)
['bananas 5 7', 'bananas 4 5', 'bananas 9']

正如许多评论所提到的,正则表达式可能不是获得你想要找到的东西的最好方法。迭代每一行并对每个后续行测试line.starswith('apple')然后line.startswith('banana')可能是更好的方法。