重新初始化Stanford CoreNLP管道以匹配RegexNER文件更改

时间:2019-03-26 07:33:55

标签: stanford-nlp

我正在开发一个应用程序,它将通过REST HTTP PATCH端点将新规则发送到RegexNER文件。

[Route("RegexNER")]
[HttpPatch]
public IHttpActionResult UpdateRegexNERFile([FromBody] List<RegexNERUpdateDTO> UpdateEntries)
{
    if (UpdateEntries == null)
        return Content(HttpStatusCode.BadRequest, new { message = "Cannot update Regex NER for invalid requests!" });

    string regexNERFilePath = Path.Combine(HttpContext.Current.Server.MapPath("~"), "Models", "regexner_custom.txt");
    string regexFileContents = File.ReadAllText(regexNERFilePath);

    List<string> newEntryList = new List<string>();

    foreach (RegexNERUpdateDTO entry in UpdateEntries)
    {
        foreach (string entityEntry in entry.Entries)
        {
            string newEntry = $"{entityEntry}\t{entry.Entity}";

            if (regexFileContents.IndexOf(newEntry, StringComparison.OrdinalIgnoreCase) == -1)
                newEntryList.Add(newEntry);
        }
    }

    // Remove read only file access
    FileInfo myFile = new FileInfo(regexNERFilePath)
    {
        IsReadOnly = false
    };

    File.AppendAllText(regexNERFilePath, Environment.NewLine + string.Join(Environment.NewLine, newEntryList));
    NLPInitializer.InitializeCoreNLP();
    return Ok();
}

这是用于初始化CoreNLP管道的代码:

public static class NLPInitializer
{
    public static StanfordCoreNLP Pipeline { get; set; }

    /// <summary>
    /// This method is called only once (at startup) to initialize the StanfordCoreNLP pipeline object
    /// </summary>
    public static void InitializeCoreNLP()
    {
        // Path to the folder with models extracted from `stanford-corenlp-3.8.0-models.jar`
        string jarRoot = ConfigurationManager.AppSettings["CoreNLPModelPath"];
        string modelsDirectory = Path.Combine(jarRoot, "edu", "stanford", "nlp", "models");

        // SUTime configuration
        string sutimeRules = Path.Combine(modelsDirectory, "sutime", "defs.sutime.txt") + "," +
                          // Path.Combine(modelsDirectory, "sutime", "english.holidays.sutime.txt") + "," +
                          Path.Combine(modelsDirectory, "sutime", "english.sutime.txt");
        try
        {
            Properties props = new Properties();
            //props.setProperty("ner.model", Path.Combine(modelsDirectory + "ner", "english.muc.7class.distsim.crf.ser.gz"));
            string regexNerFilePath = Path.Combine(HttpContext.Current.Server.MapPath("~"), "Models", "regexner_custom.txt");
            props.setProperty("regexner.mapping", regexNerFilePath);
            props.setProperty("regexner.ignorecase", "true");
            props.setProperty("coref.algorithm", "neural");
            //props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,depparse,mention,coref,sentiment,regexner,relation,natlog,openie");
            props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,depparse,coref,sentiment,regexner,relation,natlog,openie");
            //props.setProperty("ner.useSUTime", "0");
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            props.setProperty("openie.resolve_coref", "false");

            string curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            Pipeline = new StanfordCoreNLP(props);
            Pipeline.addAnnotator(new TimeAnnotator("sutime", props));
            Directory.SetCurrentDirectory(curDir);
        }
        catch (Exception ex)
        {
            throw;
        }
    }
}

但是问题出在这一行 NLPInitializer.InitializeCoreNLP();

我想重新初始化Stanford CoreNLP管道,以获取对RegexNER文件所做的最新更改并根据更新后的规则查找实体。

但是它的管道没有重新初始化,我不知道为什么。

要注意的一件事是,当我重新启动IIS或从inetmgr重新启动应用程序时,CoreNLP将获取对RegexNER文件所做的最新更改。

但是对于每个HTTP PATCH请求,重新启动IIS服务器都是不可行的,此问题是否有替代方法。

我正在使用CoreNLP v3.9.1 english modelCoreNLP 3.9.1 C# Nuget

0 个答案:

没有答案