用于英语探测名称的上下文免费语法

时间:2017-06-22 22:35:40

标签: random context-free-grammar auto-generate

我目前正在编写一个会生成随机数据的应用程序;特别是,随机名称。我取得了一些不错的进展,但对许多生成的名称并不满意。问题在于我的制作规则,我已经附在本文的底部。

基本思想是:辅音,元音,辅音,元音,但有些辅音本身会映射到元音(例如 b< VO> )。

我还没有完全创建规则,但最终的想法将遵循下面显示的格式。然而,我不想完成它,而是希望为生产规则提供更好的基础。

我试图找到一个参考,讨论要么:已经为英语单词创建的CFG,或者反汇编单词字母组合的基本格式的英语参考。不幸的是,我找不到有用的资源来帮助我比现有的更远。有谁知道我应该看的地方,或者我可以看到的参考?

另外:在您看来,您认为上下文敏感的语法可能会更好吗?

    //the following will deal with single vowels and consonants
var CO = ['b','c','d','f','g','h','j','k','l','m','n','p','qu','r','s','t','v','w','x','y','z'];
CO.probabilities = [2.41,4.49,6.87,3.59,3.25,9.84,0.24,1.24,6.5,3.88,10.9,3.11,0.153,9.67,10.2,14.6,1.58,3.81,0.242,3.19,0.12];
CO.name = "CO";
var VO = ['a','e','i','o','u'];
VO.probabilities = [21.43,33.33,18.28,19.7,7.23];
VO.name = "VO";

var LETTER = ['<VO>','<CO>'];
LETTER.probabilities = [38.1,61.9];
LETTER.name = "LETTER";

//the following deal with connsonant pairs
var BH = ['c','p','r','s','t']; //the fisrt part of a th, ph, sh, pair (before H)
BH.probabilities = [20,10,20,25,25];
BH.name = "BH";

var BL = ['b','c','f','g','p','s']; //before letter l
BL.probabilities = [10,20,10,10,25,25]
BL.name = "BL";

var COP = ['<BH>h','<BL>l'] //consonant pairs
COP.probabilities = [50,50];
COP.name = "COP";

//this is a generic syllable, that does not take grammar rules into consideration
var SYL = ['<CO><VO>','<VO><CO>','<CO><VO><VO>'];
SYL.probabilities = [50,20,30];
SYL.name = "SYL";

//the following deal with mid word syllablse
var CLOSED = ['<CO><VO><CO>','<CO><VO><CO><CO>'];
CLOSED.probabilities = [75,25];
CLOSED.name = "CLOSED";

var OPEN = ['<CO><VO>','<CO><CO><VO>'];
OPEN.probabilities = [60,40];
OPEN.name = "OPEN";

var VR = ['<VO>r']; //vowel-r
VR.probabilities = [100];
VR.name = "VR";

var MID = ['<CLOSED>','<OPEN>','<VR>'];
MID.probabilities = [33,33,33];
MID.name = "MID";

//the following will deal with ending syllables
var VCE = ['<VO><CO>e','<LETTER><VO><CO>e'];
VCE.probabilities = [75,25];
VCE.name = "VCE";

var CLE = ['<CO>le'];
CLE.probabilities = [100];
CLE.name = "CLE";

var OE = ['tion','age','ive']; //other endings
OE.probabilities = [33,33,33];
OE.name = "OE";

var ES = ['<VCE>','<CLE>','<OE>','<VR>']; //contains all ending syllables
ES.probabilities = [40,40,20];
ES.name = "ES";

var rules = [CO,VO,BH,BL,COP,LETTER,SYL,CLOSED,OPEN,VR,MID,VCE,CLE,OE,ES];

//These are some highly-defined production rules
var streetSuffix = ['road','street','way','avenue','drive','grove','lane','gardens','place','crescent','close','square','hill','circus','mews','vale','rise','mead'];
streetSuffix.probabilities = [15,15,5,10,5,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7];
var states = ['Alabama','Alaska','American Samoa','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Guam','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Marshall Islands','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Palau','Pennsylvania','Puerto Rico','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virgin Island','Virginia','Washington','West Virginia','Wisconsin','Wyoming'];
var cityNewWordSuffix = ['city','town',''];
var cityEndWordSuffix = ['polis','ville','ford','furt','forth','shire','berg','gurg','borough','brough','field','kirk','bury','stadt',''];

var siteSuffix = ['com','org','net','edu'];

/**
    This will generate a random name of Length length
*/
function generateRandomName() {
    //string will be random length of CO VO pattern for now
    var result;

    result = "<COP><VO><MID><VO><ES>";

    while (hasNonTerminal(result)) {
        result = replaceFirstNonTerminal(result);
    }

    return result;
}

以下是机器在当前状态下生成的几个单词:

  

“cheiroene”,   “sloeraase”   “sledehgeute”   “rhaorenone”   “rheerisute”   “chaereehe”   “sletraoege”   “sluureese”   “chaheyleete”   “chierauhe”   “ploclooate”   “glawofhaice”   “thanisgoage”   “slelaodose”   “blaereode”   “shihudeife”   “slaereene”   “pleheaele”   “rhepicsaile”   “ploeruoge”   “sliareuhe”   “thaereafe”   “thaaraeke”   “cheoreate”   “shofetniote”   “phiraoese”   “clilniueye”   “slepceikede”   “cligloueohe”   “phitleoime”,

0 个答案:

没有答案