我想用提供的String计算原子数。到目前为止,我能够得到这样的结果:
iex(1)> Chemistry.tokenize("In[Cu(SO4)2H2]3O")
[{:atom, "In", 1}, {:bracket, "Cu(SO4)2H2", 3}, {:atom, "O", 1}]
如果您认为这是一个好方法,我想问您是否? Tokenize然后递归处理列表?如果是,接下来应如何处理结果?我的想法是评估每个支架片段,直到它只是原子。但我不确定实施情况。谢谢你的任何建议。
这是我到目前为止编写的代码。
defmodule Chemistry do
@moduledoc """
Documentation for Chemistry.
"""
@atom_count_regex ~r/(?<atom>^[A-Z][a-z]?)(?<count>\d*)(?<tail>.*)/
@round_bracket_regex ~r/^[(](?<bracket>.*)[)](?<count>\d*)(?<tail>.*)/
@square_bracket_regex ~r/^[[](?<bracket>.*)[]](?<count>\d*)(?<tail>.*)/
@curly_bracket_regex ~r/^[{](?<bracket>.*)[}](?<count>\d*)(?<tail>.*)/
def tokenize(""), do: []
def tokenize(chunk) do
cond do
Regex.match?(@atom_count_regex, chunk) ->
%{"atom" => atom, "count" => count, "tail" => tail} = Regex.named_captures(@atom_count_regex, chunk)
if count == "" do
[{:atom, atom, 1} | tokenize(tail)]
else
[{:atom, atom, String.to_integer(count)} | tokenize(tail)]
end
String.starts_with?(chunk, ["{", "[", "("]) ->
cond do
String.starts_with?(chunk, "{") ->
process_bracket(chunk, @curly_bracket_regex)
String.starts_with?(chunk, "[") ->
process_bracket(chunk, @square_bracket_regex)
String.starts_with?(chunk, "(") ->
process_bracket(chunk, @round_bracket_regex)
end
end
end
defp process_bracket(chunk, regex) do
%{"bracket" => bracket, "count" => count, "tail" => tail} = Regex.named_captures(regex, chunk)
if count == "" do
[{:bracket, bracket, 1} | tokenize(tail)]
else
[{:bracket, bracket, String.to_integer(count)} | tokenize(tail)]
end
end
end
答案 0 :(得分:1)
对于这样的问题,我倾向于使用这样的递归解析器:
defmodule Parser do
@open_p '(['
@close_p ')]'
@lower ?a..?z
@upper ?A..?Z
def parse(string) do
parse(string, [], "")
end
defp parse("", stack, ""), do: stack
defp parse("", stack, buffer), do: [buffer | stack]
defp parse(<<ch::8>> <> tail, stack, "") when ch in @open_p,
do: parse(tail, [[] | stack], "")
defp parse(<<ch::8>> <> tail, stack, buffer) when ch in @open_p,
do: parse(tail, [[buffer] | stack], "")
# ...
end
如果为{"H", 6}
之类的每个原子构建元组,则每个原子都使用Enum.reduce
进行迭代以构建最终地图。
另一个FYI备注,以下代码构成您的问题:
cond do
String.starts_with?(chunk, "{") ->
process_bracket(chunk, @curly_bracket_regex)
String.starts_with?(chunk, "[") ->
process_bracket(chunk, @square_bracket_regex)
String.starts_with?(chunk, "(") ->
process_bracket(chunk, @round_bracket_regex)
end
如果您颠倒process_bracket
中的参数顺序,可能会像下面的模式匹配一样受益:
case chunk do
"{" <> _ -> @curly_bracket_regex
"[" <> _ -> @square_bracket_regex
"(" <> _ -> @round_bracket_regex
end
|> process_bracket(chunk)
答案 1 :(得分:0)
如果您遵循我在下面草绘的算法,我认为您不需要明确递归 - 您的示例说明: 初始状态:
In[Cu(SO4)2H2]3O
在每个小写字母后面加:
In[Cu:(SO4)2H2]3O
在每个大写字母后面没有小写字母后面加:
In:[Cu:(S:O:4)2H:2]3O:
每个:
后面没有数字后跟1
`In:1[Cu:1(S:1O:4)2H:2]3O:1`
对于括号内化之后的每个数字K
,如下所示
将每个内部letter:number
修改为letter:K*number
In:1[Cu:3*1(S:3*2*1*O:3*2*4)H:3*2]O:1
删除所有括号
In:1Cu:3*1S:3*2*1O:3*2*4H:3*2O:1
拆分号码转换
In:1
Cu:3*1
S:3*2*1
O:3*2*4
H:3*2
O:1
然后做数学&amp;使用elixir地图函数收集术语......