下面我有一些文本示例(在这种情况下,它是C源代码,实际上可以是任何结构化文本)。我试图读取几个文件,这些文件的长度可变且结构不同,例如,找出每个#define
之后是否存在#include "test.h"
。
在预处理指令(#if, #ifdef, #ifndef, #endif
)之内和之外,都适用相同的情况。如果在此之前#include "test.h"
内有适当的#endif
,也可以在#define
之后出现if-directive
。
一些注意事项:
我从文件中提取文本,执行一些操作,例如删除注释,不相关的行,空行,并很快将文件返回为string[]
。这使得在SourceCode[i]
我试图用几种不同的方法来实现解决方案,但从未设法完全解决问题。
我的第一次尝试包括无数的if-else语句和while循环,用于逻辑中每条可以想象的路径。最终导致难以维护,混乱和难以控制。
接下来,我尝试实现一个状态机,以跟踪我在文本文件中的位置,并根据需要跳转到不同的状态。我找不到完美的解决方案。
我尝试过的另一种尝试是使用堆栈,将#define
推入堆栈,检查其内容,将下一行推入堆栈,检查是否为#include
,如果不是然后返回一个错误。当然,一旦有了指令,这也会变得稍微复杂一点,因为一个#include足以用于多个#defines
。
我研究了解析器(主要是Antlr),意识到对于这样的问题,这可能过于矫kill过正,同时考虑到我对解析器绝对不了解,需要自己编写语法。
源代码示例
// directives
#if (TEST == true)
#define START_TEST_1
#include "test.h"
#else
#define START_TEST_2
#include "test.h"
#endif
#if (TEST == true)
#define STOP_TEST_1
#else
#define STOP_TEST_2
#endif
#include "test.h"
// no directives
#define START_TEST_3
#include "test.h"
#define STOP_TEST_3
#include "test.h"
有人有一些一般性提示,也许可以指出我的具体方向。什么是解决此问题的合适解决方案?
编辑:@jdweng
dt.Columns.Add("Next #elif State", typeof(int));
dt.Rows.Add(new object[] { 12, DEFINE_STATE.FOUND_ELIF, 13, 0, 2, 7, 12, 10, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 13, DEFINE_STATE.FOUND_DEFINE_IN_ELIF, -1, 14, 2, 7, 12, 10, ERROR.DEFINE_FOLLOWED_BY_DEFINE, ACTION.SET_DEFINE_ELIF_LINE_NUMBER });
dt.Rows.Add(new object[] { 14, DEFINE_STATE.FOUND_INCLUDE_IN_ELIF, 13, 0, 2, 7, 12, 10, ERROR.NO_ERROR, ACTION.RESET_DEFINE_ELIF_LINE_NUMBER });
我添加了一个检查以查看elif_level是否== 0,如果是,则照常进行操作,删除if-nest的级别。否则,我先删除elif_level,然后再删除if-nest级别。
以上行看起来正确吗?我正在考虑向发现elif的状态添加一个bool变量,然后在找到#endif时弹出所有将elif设置为true的状态。
答案 0 :(得分:2)
我分析这样的文本文件已有40多年了。这是一个复杂的逻辑问题,因此对于任何复杂的逻辑问题,我都将使用状态机。首先,我画了一个状态图
然后我写了代码来实现状态表
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using System.Data;
namespace ConsoleApplication1
{
public enum DEFINE_STATE
{
SPECIAL = -4, //define followed by IF
NONE = -3,
INVALID = -2, //Compile will give error, cannot occur
ERROR = -1,
DO_NOT_CARE = 0,
START = 1,
FOUND_IF = 2,
FOUND_DEFINE_IN_IF = 3,
FOUND_DEFINE_NOT_IN_IF = 4,
FOUND_INCLUDE_IN_IF = 5,
FOUND_ELSE = 6,
FOUND_DEFINE_IN_ELSE = 7,
FOUND_INCLUDE_IN_ELSE = 8,
FOUND_INCLUDE_NOT_IN_IF = 9,
FOUND_END_IF = 10,
RETURN = 11,
FOUND_ELIF = 12,
FOUND_DEFINE_IN_ELIF = 13,
FOUND_INCLUDE_IN_ELIF = 14,
}
public enum ERROR
{
NO_ERROR,
DEFINE_FOLLOWED_BY_DEFINE,
DEFINE_FOLLOWED_BY_DEFINE_OR_IF
}
public enum TABLE_COLUMN
{
STATE = 0,
DESCRIPTION = 1,
DEFINE,
INCLUDE,
IF,
ELSE,
ELIF,
END_IF,
ERROR,
ACTION
}
public enum ACTION
{
NONE,
RESET_DEFINE_LINE_NUMBER,
RESET_DEFINE_IF_LINE_NUMBER,
RESET_DEFINE_ELSE_LINE_NUMBER,
RESET_DEFINE_ELIF_LINE_NUMBER,
SET_DEFINE_LINE_NUMBER,
SET_DEFINE_IF_LINE_NUMBER,
SET_DEFINE_ELSE_LINE_NUMBER,
SET_DEFINE_ELIF_LINE_NUMBER,
}
public class State_Variables
{
public int define_Line_Number = 0;
public int define_If_Line_Number = 0;
public int define_Else_Line_Number = 0;
public int define_Elif_Line_Number = 0;
public int return_end_level = 0;
public DEFINE_STATE state = DEFINE_STATE.START;
public DataRow row { get; set; }
}
class Program
{
const string FILENAME = @"c:\temp\test.txt";
static void Main(string[] args)
{
string pattern = "#include\\s+\"test.h\"";
StreamReader reader = new StreamReader(FILENAME);
string input = "";
DataTable dt = new DataTable();
dt.Columns.Add("State", typeof(int));
dt.Columns.Add("Description", typeof(DEFINE_STATE));
dt.Columns.Add("Next Define State", typeof(int));
dt.Columns.Add("Next Include State", typeof(int));
dt.Columns.Add("Next IF State", typeof(int));
dt.Columns.Add("Next Else State", typeof(int));
dt.Columns.Add("Next ELIF State", typeof(int));
dt.Columns.Add("Next ENDIF State", typeof(int));
dt.Columns.Add("Error Number", typeof(ERROR));
dt.Columns.Add("Action", typeof(ACTION));
//0 do not care
//-1 error
//-2 invalid
dt.Rows.Add(new object[] { 1, DEFINE_STATE.START, 4, 0, 2, -2, -2, -2, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 2, DEFINE_STATE.FOUND_IF, 3, 0, 2, 6, 12, 10, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 3, DEFINE_STATE.FOUND_DEFINE_IN_IF, -1, 5, 2, 6, 12, 10, ERROR.DEFINE_FOLLOWED_BY_DEFINE, ACTION.SET_DEFINE_IF_LINE_NUMBER });
dt.Rows.Add(new object[] { 4, DEFINE_STATE.FOUND_DEFINE_NOT_IN_IF, -1, 9, -4, -2, -2, -2, ERROR.DEFINE_FOLLOWED_BY_DEFINE, ACTION.SET_DEFINE_LINE_NUMBER });
dt.Rows.Add(new object[] { 5, DEFINE_STATE.FOUND_INCLUDE_IN_IF, 3, 0, 2, 6, 12, 10, ERROR.NO_ERROR, ACTION.RESET_DEFINE_IF_LINE_NUMBER });
dt.Rows.Add(new object[] { 6, DEFINE_STATE.FOUND_ELSE, 7, 0, 2, -2, -2, 10, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 7, DEFINE_STATE.FOUND_DEFINE_IN_ELSE, -1, 8, 2, -2, -2, 10, ERROR.DEFINE_FOLLOWED_BY_DEFINE, ACTION.SET_DEFINE_ELSE_LINE_NUMBER });
dt.Rows.Add(new object[] { 8, DEFINE_STATE.FOUND_INCLUDE_IN_ELSE, 7, 0, 2, -2, -2, 10, ERROR.NO_ERROR, ACTION.RESET_DEFINE_ELSE_LINE_NUMBER });
dt.Rows.Add(new object[] { 9, DEFINE_STATE.FOUND_INCLUDE_NOT_IN_IF, 4, 0, 2, -2, -2, -2, ERROR.NO_ERROR, ACTION.RESET_DEFINE_LINE_NUMBER });
dt.Rows.Add(new object[] { 10, DEFINE_STATE.FOUND_END_IF, 11, 1, 2, -2, -2, -2, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 11, DEFINE_STATE.RETURN, -2, -2, 2, -2, -2, -2, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 12, DEFINE_STATE.FOUND_ELIF, 13, 0, 2, -2, 12, 10, ERROR.NO_ERROR, ACTION.NONE });
dt.Rows.Add(new object[] { 13, DEFINE_STATE.FOUND_DEFINE_IN_ELIF, -1, 14, 2, -2, 12, 10, ERROR.DEFINE_FOLLOWED_BY_DEFINE, ACTION.SET_DEFINE_ELIF_LINE_NUMBER });
dt.Rows.Add(new object[] { 14, DEFINE_STATE.FOUND_INCLUDE_IN_ELIF, 13, 0, 2, 7, 12, 10, ERROR.NO_ERROR, ACTION.RESET_DEFINE_ELIF_LINE_NUMBER });
int level = 0;
List<State_Variables> states = new List<State_Variables>();
State_Variables newState = new State_Variables();
states.Add(newState);
DEFINE_STATE nextState = DEFINE_STATE.START;
ACTION action = ACTION.NONE;
int line_number = 0;
while ((input = reader.ReadLine()) != null)
{
line_number++;
input = input.Trim();
if (input.StartsWith("//")) continue; //ignore comments
if (input.Length == 0) continue;
Boolean returnFromIF = false;
Match match = Regex.Match(input, pattern);
//test if end if is followed by include
if (states[level].state == DEFINE_STATE.FOUND_END_IF)
{
int return_end_level = 0;
do
{
if (!match.Success)
{
int define_If_Line_Number = states[level].define_If_Line_Number;
int define_Else_Line_Number = states[level].define_Else_Line_Number;
int define_Elif_Line_Number = states[level].define_Elif_Line_Number;
if (define_If_Line_Number != 0)
{
Console.WriteLine("Define in IF at line {0} does not have and include", define_If_Line_Number.ToString());
}
if (define_Else_Line_Number != 0)
{
Console.WriteLine("Define in ELSE at line {0} does not have and include", define_Else_Line_Number.ToString());
}
if (define_Elif_Line_Number != 0)
{
Console.WriteLine("Define in ELSE at line {0} does not have and include", define_Else_Line_Number.ToString());
}
}
return_end_level = states[level].return_end_level;
states.RemoveAt(level--);
} while (level > return_end_level);
returnFromIF = true;
}
else
{
states[level].row = dt.AsEnumerable().Where(x => x.Field<int>((int)TABLE_COLUMN.STATE) == (int)states[level].state).FirstOrDefault();
}
nextState = DEFINE_STATE.NONE;
//check if defines are terminated with include
if (input.Contains("#define"))
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.DEFINE);
}
if (match.Success)
{
if (returnFromIF)
{
nextState = states[level].state;
}
else
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.INCLUDE);
}
}
if (input.Contains("#if"))
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.IF);
states.Add(new State_Variables());
level++;
states[level].return_end_level = level - 1;
}
if (input.Contains("#else"))
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.ELSE);
}
if (input.Contains("#elif"))
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.ELIF);
states.Add(new State_Variables());
level++;
states[level].return_end_level = states[level - 1].return_end_level;
}
if (input.Contains("#endif"))
{
nextState = (DEFINE_STATE)states[level].row.Field<int>((int)TABLE_COLUMN.END_IF);
}
if ((nextState != DEFINE_STATE.ERROR) && (nextState != DEFINE_STATE.DO_NOT_CARE) && (nextState != DEFINE_STATE.NONE))
{
states[level].state = nextState;
}
switch (nextState)
{
case DEFINE_STATE.DO_NOT_CARE:
//stay at current state
break;
case DEFINE_STATE.NONE: //stay at current state
Console.WriteLine("Did not find state at line {0}", line_number);
break;
case DEFINE_STATE.INVALID:
Console.WriteLine("Invalid IF/ELSE/END_IF at line {0}", line_number);
break;
case DEFINE_STATE.ERROR:
action = states[level].row.Field<ACTION>((int)TABLE_COLUMN.ACTION);
switch (action)
{
case ACTION.SET_DEFINE_LINE_NUMBER:
Console.WriteLine("Define followed by Define at line {0}", states[level].define_Line_Number.ToString());
states[level].define_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_IF_LINE_NUMBER:
Console.WriteLine("Define in IF followed by Define by at line {0}", states[level].define_If_Line_Number.ToString());
states[level].define_If_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_ELSE_LINE_NUMBER:
Console.WriteLine("Define in ELSE followed by Define at line {0}", states[level].define_Else_Line_Number.ToString());
states[level].define_Else_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_ELIF_LINE_NUMBER :
Console.WriteLine("Define in ELIF followed by Define at line {0}", states[level].define_Elif_Line_Number.ToString());
states[level].define_Elif_Line_Number = line_number;
break;
}
break;
case DEFINE_STATE.SPECIAL:
Console.WriteLine("Define followed IF at line {0}", states[level].define_Line_Number.ToString());
states[level - 1].state = DEFINE_STATE.START;
states[level].state = DEFINE_STATE.FOUND_IF;
nextState = DEFINE_STATE.FOUND_IF;
states[level].row = dt.AsEnumerable().Where(x => x.Field<DEFINE_STATE>((int)TABLE_COLUMN.STATE) == nextState).FirstOrDefault();
break;
default:
states[level].row = dt.AsEnumerable().Where(x => x.Field<DEFINE_STATE>((int)TABLE_COLUMN.STATE) == nextState).FirstOrDefault();
action = states[level].row.Field<ACTION>((int)TABLE_COLUMN.ACTION);
switch (action)
{
case ACTION.RESET_DEFINE_LINE_NUMBER:
states[level].define_Line_Number = 0;
break;
case ACTION.RESET_DEFINE_IF_LINE_NUMBER:
states[level].define_If_Line_Number = 0;
break;
case ACTION.RESET_DEFINE_ELSE_LINE_NUMBER:
states[level].define_Else_Line_Number = 0;
break;
case ACTION.RESET_DEFINE_ELIF_LINE_NUMBER:
states[level].define_Elif_Line_Number = 0;
break;
case ACTION.SET_DEFINE_LINE_NUMBER:
states[level].define_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_IF_LINE_NUMBER:
states[level].define_If_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_ELSE_LINE_NUMBER:
states[level].define_Else_Line_Number = line_number;
break;
case ACTION.SET_DEFINE_ELIF_LINE_NUMBER:
states[level].define_Elif_Line_Number = line_number;
break;
}
states[level].state = nextState;
break;
}
}
//final checks
int define_Line_Number = states[level].define_Line_Number;
if (define_Line_Number != 0)
{
Console.WriteLine("Define at line {0} does not have and include", define_Line_Number.ToString());
}
if (level != 0)
{
Console.WriteLine("Did not close all IFs with End_If");
}
Console.WriteLine("Done");
Console.ReadLine();
}
}
}
答案 1 :(得分:1)
如果您要分析的所有文本的公分母是某种层次结构的文档,那么您可能应该首先将其转换为该文档,然后对已解析的文档进行其余的分析,而不要同时对两者进行分析。同时。也许将其转换为XML文档就足以满足您的情况,然后使用XSLT / XPath(或者,如果愿意,可以使用LINQ for XDocument)进行分析。基本上,这也是执行其他代码分析的方式(当然,带有花哨的Roslyn代码分析器除外)。