假设我有一个YAML文件,如下所示:
en: errors: # Some comment format: "%{attribute} %{message}" # One more comment messages: "1": "Message 1" "2": "Message 2" long_error_message: | This is a multiline message date: format: "YYYY-MM-DD"
我怎样才能把它读成像这样的Ruby Hash
?
{
'en': {
'errors': {
'format': { value: '%{attribute} %{message}', line: 4 }
'messages': {
'1': { value: 'Message 1', line: 8 },
'2': { value: 'Message 2', line: 9 }
}
'long_error_message' : { value: "This is a\nmultiline message", line: 11 }
},
'date': {
'format': { value: 'YYYY-MM-DD', line: 16 }
}
}
}
我尝试使用YAML: Find line number of key?中提到的提示作为起点并实现了Psych::Handler
,但感觉我必须从Psych重写大量代码才能使其工作
我有什么想法可以解决这个问题吗?
答案 0 :(得分:5)
看起来您想要采用任何标量值作为映射值,并将其替换为包含原始值的value
键的哈希值,以及带有行号的line
键。
以下几乎可行,主要问题是多行字符串,其中给出的行号是Yaml中下一个事物的开始。问题是,当调用处理程序scalar
方法时,解析器已经超出了感兴趣的标量,因此mark
在知道标量已结束时给出位置的行。在大多数情况下,在您的示例中这并不重要,但在多行情况下,它会给出错误的值。在没有进入Psych C代码的情况下,我无法看到从mark
获取解析器信息的任何方法,无需进入Psych C代码。
require 'psych'
# Psych's first step is to parse the Yaml into an AST of Node objects
# so we open the Node class and add a way to track the line.
class Psych::Nodes::Node
attr_accessor :line
end
# We need to provide a handler that will add the line to the node
# as it is parsed. TreeBuilder is the "usual" handler, that
# creates the AST.
class LineNumberHandler < Psych::TreeBuilder
# The handler needs access to the parser in order to call mark
attr_accessor :parser
# We are only interested in scalars, so here we override
# the method so that it calls mark and adds the line info
# to the node.
def scalar value, anchor, tag, plain, quoted, style
mark = parser.mark
s = super
s.line = mark.line
s
end
end
# The next step is to convert the AST to a Ruby object.
# Psych does this using the visitor pattern with the ToRuby
# visitor. Here we patch ToRuby rather than inherit from it
# as it makes the last step a little easier.
class Psych::Visitors::ToRuby
# This is the method for creating hashes. There may be problems
# with Yaml mappings that have tags.
def revive_hash hash, o
o.children.each_slice(2) { |k,v|
key = accept(k)
val = accept(v)
# This is the important bit. If the value is a scalar,
# we replace it with the desired hash.
if v.is_a? ::Psych::Nodes::Scalar
val = { "value" => val, "line" => v.line + 1} # line is 0 based, so + 1
end
# Code dealing with << (for merging hashes) omitted.
# If you need this you will probably need to copy it
# in here. See the method:
# https://github.com/tenderlove/psych/blob/v2.0.13/lib/psych/visitors/to_ruby.rb#L333-L365
hash[key] = val
}
hash
end
end
yaml = get_yaml_from_wherever
# Put it all together
handler = LineNumberHandler.new
parser = Psych::Parser.new(handler)
# Provide the handler with a reference to the parser
handler.parser = parser
# The actual parsing
parser.parse yaml
# We patched ToRuby rather than inherit so we can use to_ruby here
puts handler.root.to_ruby
答案 1 :(得分:3)
诀窍可能是monkeypatch TreeBuilder#scalar
方法:
y='
en:
errors:
# Some comment
format: "%{attribute} %{message}"
# One more comment
messages:
"1": "Message 1"
"2": "Message 2"
long_error_message: |
This is a
multiline message
date:
format: "YYYY-MM-DD"'
require 'yaml'
yphc = Class.new(YAML.parser.handler.class) do
def scalar value, anchor, tag, plain, quoted, style
value = { value: value, line: $line } if style > 1
$line = $parser.mark.line + 1 # handle multilines properly
super value, anchor, tag, plain, quoted, style
end
end
$parser = Psych::Parser.new(yphc.new)
# more careful handling required for multidocs
result = $parser.parse(y).handler.root.to_ruby[0]
实际上,我们差不多完成了。唯一剩下的就是在仅有叶子的中留下带有行号的修补值。我没有故意把这个逻辑放在解析内容中。
def unmark_keys hash
hash.map do |k,v|
[k.is_a?(Hash) ? k[:value] : k, v.is_a?(Hash) ? unmark_keys(v) : v]
end.to_h
end
p unmark_keys result
#⇒ {"en"=>
#⇒ {"errors"=>
#⇒ {
#⇒ "format"=>{:value=>"%{attribute} %{message}", :line=>4},
#⇒ "messages"=>
#⇒ {
#⇒ "1"=>{:value=>"Message 1", :line=>8},
#⇒ "2"=>{:value=>"Message 2", :line=>9}
#⇒ }
#⇒ },
#⇒ "long_error_message"=>{
#⇒ :value=>"This is a\nmultiline message\n", :line=>11
#⇒ },
#⇒ "date"=>{"format"=>{:value=>"YYYY-MM-DD", :line=>16}}
#⇒ }
#⇒ }
当然有人可能想要摆脱全局变量等。我试图尽可能保持核心实现的清洁。
我们走了。希望它有所帮助。
UPD 感谢@matt,上面的代码在标量上失败了:
key1:
val1
key2: val2
YAML允许使用此语法,但上述方法无法正确处理。不会为此返回任何行。除了无理由缺乏标量支持外,还会正确报告其他任何内容,请参阅此答案的评论以获取更多详细信息。
答案 2 :(得分:2)
我已经采用了@ matt的解决方案并创建了一个不需要mankey修补的版本。它还处理跨越多行和YAML的<<
运算符的值。
require "psych"
require "pp"
ValueWithLineNumbers = Struct.new(:value, :lines)
class Psych::Nodes::ScalarWithLineNumber < Psych::Nodes::Scalar
attr_reader :line_number
def initialize(*args, line_number)
super(*args)
@line_number = line_number
end
end
class Psych::TreeWithLineNumbersBuilder < Psych::TreeBuilder
attr_accessor :parser
def scalar(*args)
node = Psych::Nodes::ScalarWithLineNumber.new(*args, parser.mark.line)
@last.children << node
node
end
end
class Psych::Visitors::ToRubyWithLineNumbers < Psych::Visitors::ToRuby
def visit_Psych_Nodes_ScalarWithLineNumber(node)
visit_Psych_Nodes_Scalar(node)
end
private
def revive_hash(hash, node)
node.children.each_slice(2) do |k, v|
key = accept(k)
val = accept(v)
if v.is_a? Psych::Nodes::ScalarWithLineNumber
start_line = end_line = v.line_number + 1
if k.is_a? Psych::Nodes::ScalarWithLineNumber
start_line = k.line_number + 1
end
val = ValueWithLineNumbers.new(val, start_line..end_line)
end
if key == SHOVEL && k.tag != "tag:yaml.org,2002:str"
case v
when Psych::Nodes::Alias, Psych::Nodes::Mapping
begin
hash.merge! val
rescue TypeError
hash[key] = val
end
when Psych::Nodes::Sequence
begin
h = {}
val.reverse_each do |value|
h.merge! value
end
hash.merge! h
rescue TypeError
hash[key] = val
end
else
hash[key] = val
end
else
hash[key] = val
end
end
hash
end
end
# Usage:
handler = Psych::TreeWithLineNumbersBuilder.new
handler.parser = Psych::Parser.new(handler)
handler.parser.parse(yaml)
ruby_with_line_numbers =
Psych::Visitors::ToRubyWithLineNumbers.create.accept(handler.root)
pp ruby_with_line_numbers
我发布了gist of the above以及一些评论和示例
答案 3 :(得分:0)
我们可以手动添加数字,方法是通过Psych提供的解析散列进行递归并找到每个键的行号。以下代码将与您指定的结果匹配。
require 'psych'
def add_line_numbers(lines, hash)
# Ruby cannot iterate and modify a hash at the same time.
# So we dup the hash and iterate over the dup.
iterator = hash.dup
iterator.each do |key, value|
if value.is_a?(Hash)
add_line_numbers(lines, value)
else
index = lines.index { |line| line =~ /^\s.?*#{key}.?\:/ }
hash[key] = { "value" => value, "line" => (index + 1) }
end
end
end
yaml_file = File.expand_path('../foo.yml', __FILE__)
lines = File.readlines(yaml_file)
data = Psych.load(lines.join("\n"))
add_line_numbers(lines, data)
puts data