我目前正在编写一个会生成随机数据的应用程序;特别是,随机名称。我取得了一些不错的进展,但对许多生成的名称并不满意。问题在于我的制作规则,我已经附在本文的底部。
基本思想是:辅音,元音,辅音,元音,但有些辅音本身会映射到元音(例如 b< VO> )。
我还没有完全创建规则,但最终的想法将遵循下面显示的格式。然而,我不想完成它,而是希望为生产规则提供更好的基础。
我试图找到一个参考,讨论要么:已经为英语单词创建的CFG,或者反汇编单词字母组合的基本格式的英语参考。不幸的是,我找不到有用的资源来帮助我比现有的更远。有谁知道我应该看的地方,或者我可以看到的参考?
另外:在您看来,您认为上下文敏感的语法可能会更好吗?
//the following will deal with single vowels and consonants
var CO = ['b','c','d','f','g','h','j','k','l','m','n','p','qu','r','s','t','v','w','x','y','z'];
CO.probabilities = [2.41,4.49,6.87,3.59,3.25,9.84,0.24,1.24,6.5,3.88,10.9,3.11,0.153,9.67,10.2,14.6,1.58,3.81,0.242,3.19,0.12];
CO.name = "CO";
var VO = ['a','e','i','o','u'];
VO.probabilities = [21.43,33.33,18.28,19.7,7.23];
VO.name = "VO";
var LETTER = ['<VO>','<CO>'];
LETTER.probabilities = [38.1,61.9];
LETTER.name = "LETTER";
//the following deal with connsonant pairs
var BH = ['c','p','r','s','t']; //the fisrt part of a th, ph, sh, pair (before H)
BH.probabilities = [20,10,20,25,25];
BH.name = "BH";
var BL = ['b','c','f','g','p','s']; //before letter l
BL.probabilities = [10,20,10,10,25,25]
BL.name = "BL";
var COP = ['<BH>h','<BL>l'] //consonant pairs
COP.probabilities = [50,50];
COP.name = "COP";
//this is a generic syllable, that does not take grammar rules into consideration
var SYL = ['<CO><VO>','<VO><CO>','<CO><VO><VO>'];
SYL.probabilities = [50,20,30];
SYL.name = "SYL";
//the following deal with mid word syllablse
var CLOSED = ['<CO><VO><CO>','<CO><VO><CO><CO>'];
CLOSED.probabilities = [75,25];
CLOSED.name = "CLOSED";
var OPEN = ['<CO><VO>','<CO><CO><VO>'];
OPEN.probabilities = [60,40];
OPEN.name = "OPEN";
var VR = ['<VO>r']; //vowel-r
VR.probabilities = [100];
VR.name = "VR";
var MID = ['<CLOSED>','<OPEN>','<VR>'];
MID.probabilities = [33,33,33];
MID.name = "MID";
//the following will deal with ending syllables
var VCE = ['<VO><CO>e','<LETTER><VO><CO>e'];
VCE.probabilities = [75,25];
VCE.name = "VCE";
var CLE = ['<CO>le'];
CLE.probabilities = [100];
CLE.name = "CLE";
var OE = ['tion','age','ive']; //other endings
OE.probabilities = [33,33,33];
OE.name = "OE";
var ES = ['<VCE>','<CLE>','<OE>','<VR>']; //contains all ending syllables
ES.probabilities = [40,40,20];
ES.name = "ES";
var rules = [CO,VO,BH,BL,COP,LETTER,SYL,CLOSED,OPEN,VR,MID,VCE,CLE,OE,ES];
//These are some highly-defined production rules
var streetSuffix = ['road','street','way','avenue','drive','grove','lane','gardens','place','crescent','close','square','hill','circus','mews','vale','rise','mead'];
streetSuffix.probabilities = [15,15,5,10,5,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7];
var states = ['Alabama','Alaska','American Samoa','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Guam','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Marshall Islands','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Palau','Pennsylvania','Puerto Rico','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virgin Island','Virginia','Washington','West Virginia','Wisconsin','Wyoming'];
var cityNewWordSuffix = ['city','town',''];
var cityEndWordSuffix = ['polis','ville','ford','furt','forth','shire','berg','gurg','borough','brough','field','kirk','bury','stadt',''];
var siteSuffix = ['com','org','net','edu'];
/**
This will generate a random name of Length length
*/
function generateRandomName() {
//string will be random length of CO VO pattern for now
var result;
result = "<COP><VO><MID><VO><ES>";
while (hasNonTerminal(result)) {
result = replaceFirstNonTerminal(result);
}
return result;
}
以下是机器在当前状态下生成的几个单词:
“cheiroene”, “sloeraase” “sledehgeute” “rhaorenone” “rheerisute” “chaereehe” “sletraoege” “sluureese” “chaheyleete” “chierauhe” “ploclooate” “glawofhaice” “thanisgoage” “slelaodose” “blaereode” “shihudeife” “slaereene” “pleheaele” “rhepicsaile” “ploeruoge” “sliareuhe” “thaereafe” “thaaraeke” “cheoreate” “shofetniote” “phiraoese” “clilniueye” “slepceikede” “cligloueohe” “phitleoime”,