正则表达式不会出现所有事件,但只获得最后一个

时间:2018-01-30 14:19:35

标签: java regex

我有以下正则表达式:(?:cassy\.jobs \((?:([a-z]+(?:_[a-z]+)?) [a-z]+(?:, )?)+(?:PRIMARY KEY \(([a-z]+(?:_[a-z]+)?)\)\))?)

以下字符串:

CREATE TABLE cassy.jobs (job_id int, job_description text, maximum_salary double, minimum_salary double, PRIMARY KEY (job_id)) WITH read_repair_chance = 0.0 AND dclocal_read_repair_chance = 0.1 AND gc_grace_seconds = 864000 AND bloom_filter_fp_chance = 0.01 AND caching = { 'keys' : 'ALL', 'rows_per_partition' : 'NONE' } AND comment = '' AND compaction = { 'class' : 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold' : 32, 'min_threshold' : 4 } AND compression = { 'chunk_length_in_kb' : 64, 'class' : 'org.apache.cassandra.io.compress.LZ4Compressor' } AND default_time_to_live = 0 AND speculative_retry = '99PERCENTILE' AND min_index_interval = 128 AND max_index_interval = 2048 AND crc_check_chance = 1.0 AND cdc = false;, CREATE TABLE cassy.employees (employee_id int, email_add text, frst_name text, hire_date date, job_id int, last_name text, salary double, PRIMARY KEY (employee_id)) WITH read_repair_chance = 0.0 AND dclocal_read_repair_chance = 0.1 AND gc_grace_seconds = 864000 AND bloom_filter_fp_chance = 0.01 AND caching = { 'keys' : 'ALL', 'rows_per_partition' : 'NONE' } AND comment = '' AND compaction = { 'class' : 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold' : 32, 'min_threshold' : 4 } AND compression = { 'chunk_length_in_kb' : 64, 'class' : 'org.apache.cassandra.io.compress.LZ4Compressor' } AND default_time_to_live = 0 AND speculative_retry = '99PERCENTILE' AND min_index_interval = 128 AND max_index_interval = 2048 AND crc_check_chance = 1.0 AND cdc = false;]

我想捕获给定表的列名和主键(如果存在),但我只得到第一个捕获组和主键的最后一次出现。

我已经读过"重复捕获组与捕获重复组"但解决方案似乎不起作用。经过几个小时的搜索,我无法找到答案。

我得到了什么:

group #1: minimum_salary
group #2: job_id

结果应该是什么:

group #1: job_id
group #2: job_description
group #3: maximum_salary
group #4: minimum_salary
group #5: job_id

编辑:添加了java标记,如果仅使用regex无法实现,则可以使用java代码。

1 个答案:

答案 0 :(得分:0)

可以使用正则表达式,但需要2才能获得最终结果。使用正则表达式的缩写,除了第二个正则表达式,我们可以从这个特定的表中提取列名:

regex1 = /(?:cassy\.jobs \(.*?\))/
regex2 = /(?:\s*([a-z_]+) [a-z]+|PRIMARY KEY\s\((.*?)\))/gm

不是Java中最知识的,但在JS中,我就是这样做的:

let regex1 = /(?:cassy\.jobs \(.*?\))/,
  regex2 = /(?:\s*([a-z_]+) [a-z]+|PRIMARY KEY\s\((.*?)\))/gm,
  string = "CREATE TABLE cassy.jobs (job_id int, job_description text, maximum_salary double, minimum_salary double, PRIMARY KEY (job_id)) WITH read_repair_chance = 0.0 AND dclocal_read_repair_chance = 0.1 AND gc_grace_seconds = 864000 AND bloom_filter_fp_chance = 0.01 AND caching = { 'keys' : 'ALL', 'rows_per_partition' : 'NONE' } AND comment = '' AND compaction = { 'class' : 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold' : 32, 'min_threshold' : 4 } AND compression = { 'chunk_length_in_kb' : 64, 'class' : 'org.apache.cassandra.io.compress.LZ4Compressor' } AND default_time_to_live = 0 AND speculative_retry = '99PERCENTILE' AND min_index_interval = 128 AND max_index_interval = 2048 AND crc_check_chance = 1.0 AND cdc = false;, CREATE TABLE cassy.employees (employee_id int, email_add text, frst_name text, hire_date date, job_id int, last_name text, salary double, PRIMARY KEY (employee_id)) WITH read_repair_chance = 0.0 AND dclocal_read_repair_chance = 0.1 AND gc_grace_seconds = 864000 AND bloom_filter_fp_chance = 0.01 AND caching = { 'keys' : 'ALL', 'rows_per_partition' : 'NONE' } AND comment = '' AND compaction = { 'class' : 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold' : 32, 'min_threshold' : 4 } AND compression = { 'chunk_length_in_kb' : 64, 'class' : 'org.apache.cassandra.io.compress.LZ4Compressor' } AND default_time_to_live = 0 AND speculative_retry = '99PERCENTILE' AND min_index_interval = 128 AND max_index_interval = 2048 AND crc_check_chance = 1.0 AND cdc = false;]",
  createTable = regex1.exec(string),
  columnNames = [];

// Put all capture group matches into the columnNames array
while ((m = regex2.exec(createTable)) !== null) {
  m.forEach((match, groupIndex) => {
    if (match !== undefined && groupIndex !== 0) {
      columnNames.push(match)
    }
  });
}

console.log(columnNames)