批量提取引号之间的数字,乘以和写回

时间:2017-01-23 10:42:31

标签: xml batch-file

我有一个文本文件,我使用以下批处理代码提取所需的部分:

@echo off > newfile & setLocal enableDELAYedeXpansioN

set H=
set T=
for /f "tokens=1* delims=[]" %%a in ('find /n "<face>" ^< C:\XML\INTERM\image-0000001.txt') do (
set H=%%a
)
for /f "tokens=1* delims=[]" %%a in ('find /n "</face>" ^< C:\XML\INTERM\image-0000001.txt') do (
set T=%%a
)
for /f "tokens=1* delims=[]" %%a in ('find /n /v "" ^< C:\XML\INTERM\image-0000001.txt') do (
if %%a gtr !H! if %%a lss !T! echo.%%b
)>> c:\XML\ADD\image-0000001.txt
goto :eof

我的结果文本文件是:

<attribute name="personName">Test</attribute>
<attribute name="face-type">human</attribute>
<geometry>
<LinearRing ptCount="36">
<Point x="968.13" y="762.91"/>
<Point x="940.54" y="764.38"/>
<Point x="921.17" y="755.09"/>
<Point x="905.96" y="742.91"/>
<Point x="909" y="705.92"/>
<Point x="918.23" y="660.54"/>
<Point x="926.39" y="629"/>
<Point x="875.92" y="638.23"/>
<Point x="869.77" y="664.38"/>
<Point x="862.08" y="689.77"/>
<Point x="847.46" y="701.31"/>
<Point x="829.77" y="702.85"/>
<Point x="840.3" y="655.52"/>
<Point x="849" y="607.7"/>
<Point x="854.22" y="560.3"/>
<Point x="886.39" y="520.74"/>
<Point x="922.48" y="501.17"/>
<Point x="966.83" y="476.39"/>
<Point x="1001.61" y="445.96"/>
<Point x="1027.26" y="412.04"/>
<Point x="1038.23" y="393.62"/>
<Point x="1051.31" y="417.46"/>
<Point x="1062.08" y="445.92"/>
<Point x="1067.46" y="472.08"/>
<Point x="1075.15" y="505.15"/>
<Point x="1078.23" y="536.69"/>
<Point x="1073.62" y="562.85"/>
<Point x="1062.85" y="592.85"/>
<Point x="1039.87" y="622.91"/>
<Point x="1017.7" y="640.3"/>
<Point x="1003.35" y="658.13"/>
<Point x="1005.09" y="675.09"/>
<Point x="1003.35" y="691.61"/>
<Point x="999.87" y="712.04"/>
<Point x="992.91" y="728.13"/>
<Point x="982.04" y="742.48"/>
</LinearRing>
</geometry>
<eye>
<geometry>
<Rectangle x="995.9565217391305" y="661.6086956521739" width="5.217391304347757" height="12.608695652173992"/>
</geometry>
<eye-bounds>
<attribute name="occlusion">hidden</attribute>
<attribute name="open-closed-state">half-open</attribute>
<geometry>
<Rectangle x="973.3478260869565" y="659.4347826086956" width="29.565217391304373" height="21.739130434782624"/>
</geometry>
</eye-bounds>
</eye>

我需要从xywidthheight(不是LinearRing ptCount="36")的双引号之间提取每个数字,将每个数字相乘2并逐一回写。我必须提到Point标签的数量(在这种情况下是36)可能会有所不同。我的操作系统是Windows 10。

2 个答案:

答案 0 :(得分:2)

这是一个依赖JREPL.BAT的“简单”快速解决方案。它完全取代了整个脚本。

@echo off
setlocal
set "input=C:\XML\INTERM\image-0000001.txt"
set "output=c:\XML\ADD\image-0000001.txt"

jrepl "<face>([\s\S]*)</face>" $1 /m /jmatch /f "%input%" | ^
jrepl "((?:x|y|width|height)=\q)([^\q]*)(\q)" "$txt=$1+(Number($2)*2)+$3" /x /jq /o "%output%"

但实际上你应该使用一个专门用来处理XML文件的工具。

答案 1 :(得分:1)

这是一个纯粹的解决方案,可以以非常缓慢的方式完成您想要的任务。

以下限制适用:

  • 将XML文件视为“普通”文本文件;
  • 该文件必须是ANSI编码的;
  • 该文件必须是Windows文本文件(行尾标记为回车加行换行);
  • 空行丢失;
  • 行不得超过约8190个字符/字节;
  • 感兴趣的代码(PointRectangle)必须各自各自;
  • 感兴趣的标签必须像<Point/>;不允许使用<Point></Point>之类的内容;
  • 标签名称以及属性名称(xywidthheight)必须完全匹配大小写;
  • 属性值必须是(带符号)浮点数(不允许指数格式);
  • 将浮点数舍入为不超过八位小数;
  • 浮点数的绝对值必须小于10 8 ;
  • 将浮点数乘以的整数因子不得超过21的值;
  • 返回的结果(相乘)浮点数恰好是八位小数;

所以这是代码:

@echo off
setlocal EnableExtensions DisableDelayedExpansion

rem // Define constants here:
set "_FILE=%~1"         & rem // (use file provided as first command line argument)
set "_FACTOR=%~2"       & rem // (use factor provided as second command line argument)
set "_TAG[1]=Point"     & rem // (name of XML tag containing numeric values)
set "_TAG[2]=Rectangle" & rem // (name of XML tag containing numeric values)
rem "_TAG[...]=..."
set "_ATT[1]=x"         & rem // (name of XML attribute holding a numeric value)
set "_ATT[2]=y"         & rem // (name of XML attribute holding a numeric value)
set "_ATT[3]=width"     & rem // (name of XML attribute holding a numeric value)
set "_ATT[4]=height"    & rem // (name of XML attribute holding a numeric value)
rem "_ATT[...]=..."

rem // Main routine: check tag names and tag format:
for /F "usebackq delims=" %%K in ("%_FILE%") do (
    set "LINE=%%K"
    set "FLAG="
    for /F "tokens=2,* delims=[]=" %%I in ('2^> nul set _TAG[') do (
        if not defined FLAG (
            setlocal EnableDelayedExpansion
            (cmd /V /C echo(^^!LINE^^!| > nul findstr /R ^
                /C:"^ *<%%J  *[^</>^&(|)][^</>^&(|)]*/> *$"
            ) && (
                for /F "tokens=1,* delims=< " %%L in ("!LINE!") do (
                    endlocal
                    set "COLL=%%L"
                    for /F "delims=/>" %%N in ("%%M") do (
                        call :ATTRIBUTE COLL "%%N" && set "FLAG=#"
                    )
                    setlocal EnableDelayedExpansion
                )
            )
            endlocal
        )
    )
    setlocal EnableDelayedExpansion
    if defined FLAG (
        echo(^<!COLL!/^>
    ) else (
        echo(!LINE!
    )
    endlocal
)

endlocal
exit /B


:ATTRIBUTE  rtn_line_string  val_attribute
    rem // Sub-routine: check attribute names and format:
    setlocal DisableDelayedExpansion
    set "STR=%~2"
    setlocal EnableDelayedExpansion
    :LOOP
    for /F "tokens=1,* delims= " %%A in ("!STR!") do (
        endlocal
        set "NEW=%%A"
        set "STR=%%B"
        set "FLG="
        setlocal EnableDelayedExpansion
        for /F "tokens=1,* delims==" %%E in ('2^> nul set _ATT[') do (
            if not defined FLG (
                (cmd /V /C echo(^^!NEW^^!| > nul findstr /R ^
                    /C:"^%%F=\"[0-9\.+-][0-9\.]*\"$"
                ) && (
                    for /F "tokens=2 delims==" %%C in ("!NEW!") do (
                        call :MULTIPLY PRO %%~C %_FACTOR% && (
                            for /F "delims=" %%D in ("!%~1! %%F="!PRO!"") do (
                                endlocal
                                set "%~1=%%D"
                                set "FLG=#"
                                setlocal EnableDelayedExpansion
                            )
                        )
                    )
                )
            )
        )
        if not defined FLG (
            endlocal
            endlocal
            set "%~1="
            exit /B 1
        )
    )
    if defined STR goto :LOOP
    for /F "delims=" %%D in (^""!%~1!"^") do (
        endlocal
        endlocal
        set "%~1=%%~D"
    )
    exit /B 0


:MULTIPLY  rtn_product  val_float_num  val_integer
    rem // Sub-routine: multiply of floating-point number by integer:
    setlocal DisableDelayedExpansion
    set "MUL=%~3" & if not defined MUL set /A "MUL=1"
    set /A "MUL+=0" & set "NEG="
    if %MUL% LSS 0 set /A "MUL=-MUL" & set "NEG=-"
    set "NUM=%~2" & if not defined NUM set "NUM=0"
    if "%NUM:~,1%"=="-" if defined NEG (set "NEG=") else (set "NEG=-")
    for /F "tokens=1,2 delims=." %%X in ("%NUM%") do (
        set "INT=%%X" & set "FRA=%%Y"
    )
    for /F "tokens=* delims=+-0" %%Z in ("%INT%") do set "INT=%%Z"
    if not defined INT set "INT=0"
    if not "%INT:~,-8%"=="" goto :OVERFLOW
    set "FRA=1%FRA%00000000"
    if %FRA:~8,1% GEQ 5 (set /A "RND=1") else (set /A "RND=0")
    set /A "FRA=%FRA:~,9%%%100000000+RND"
    set /A "RSI=INT*MUL, RSF=FRA*MUL"
    if %RSF% LSS 0 goto :OVERFLOW
    if %RSF% NEQ 0 if %RSF% LSS %FRA% goto :OVERFLOW
    2> nul set /A "RSI+=%RSF:~,-8%" & set "RSF=00000000%RSF%"
    if %RSI% LSS 0 goto :OVERFLOW
    if %RSI% NEQ 0 if %RSI% LSS %INT% goto :OVERFLOW
    (
        endlocal
        set "%~1=%NEG%%RSI%.%RSF:~-8%"
    )
    exit /B 0
    :OVERFLOW
    endlocal
    set "%~1="
    exit /B 1

要使用此脚本 - 让我们将其称为xml-multiply.bat - ,提供XML文件(result.xml)作为第一个命令行参数,以及将属性值乘以({{ 1}}加倍他们)作为第二个,像这样:

2

要将输出写入文件(xml-multiply.bat "result.xml" 2 ),请使用:

result_NEW.xml

要替换原始XML文件,请执行以下操作:

xml-multiply.bat "result.xml" 2 > "result_NEW.xml"