试图转储文件 - Python

时间:2017-04-28 11:28:59

标签: python json web

我试图将网络转储到文件中以获取项目所需的数据。 网络看起来像这样。 Image1

结果文件是:



<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
    <head>        
        <!--Meta tags-->  
        <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">
        <meta name="viewport" content="width=1280"/>
        <meta name="viewport" content="initial-scale=0.8"/>

        <!--Stylesheets--> 
        <link href="../../../css/layout.css"  rel="stylesheet" type="text/css" />
        <link rel="shortcut icon" href="favicon.ico" type="image/x-icon" />

        <!--Janitza Library-->
        <script type="text/javascript" src="../../../lib/jquery.js" ></script>
        <script type="text/javascript" src="../../../lib/jquery-ui.js" ></script>
        <script type="text/javascript" src="../../../lib/lib_full_min.js" ></script>

        <script type="text/javascript" src="../../../lib/app.js" ></script>
        
        <!--Other Javascripts-->
        <script type="text/javascript" src="../../../javascripts/menu.js" ></script>
        
        <!--Javascripts - Language-->   
        <script>
            var vars = null;
            var data = null;
            $.ajax({
                dataType: "json",
                url: "../../../../json.do?_LANGUAGE",
                async: false,
                data: data,
                success: function(data){
                    vars = data;
                }
              });
            var language = vars["_LANGUAGE"][0];
            var systime = new Date().getTime();
            document.write('<script type="text/javascript" src="../../../egg.js?' + systime + '"></sc' + 'ript>');
            document.write('<script type="text/javascript" src="../../../lang/info.js?' + systime + '"></sc' + 'ript>');
            document.write('<script type="text/javascript" src="../../../lang/' + language + '.js?' + systime + '" encoding="utf-8" ></sc' + 'ript>');
            document.write('<script type="text/javascript" src="../../../lang/en.js?' + systime + '" encoding="utf-8" ></sc' + 'ript>');
            
            CurrentLanguage = language.toLowerCase();
            CurrentChallenge = "";         
            
            $( document ).ready(function() {
                InsertsEggs();
              });     
        </script>
        
        <!--Title-->  
        <title></title>
    </head>

    <body>
        <div id="main">
            <div id="top-bar">
                <div class="wrapper">
                    <div class="logo"></div>
                    <div class="devicetype"></div>
                    <div id="languagebox">
                        <ul id="languageselector">
                            <!-- Ausgabe der Flaggen fuer die Sprachauswahl -->
                            <LangSelect></LangSelect>
                            <!-- ENDE Sprachauswahl -->
                        </ul>
                    </div>
                </div>
                <!--End .wrapper--> 
            </div>
            <!--End top-bar--> 
            <div id="menu-bar">
                <div class="wrapper">
                    <div id="navigationbar">
                        <ul id="navigation">

                        </ul>        
                    </div>

                </div>
                <!--End .wrapper--> 
            </div>
            <!--End menu-bar--> 

            <div id="content">
                <div class="wrapper">


    
    <script type="text/javascript" src="javascripts/functions.js" ></script>
    
    <div class="full-width-frame" >

        <div class="top">

            <span class="trans">menu.soverview</span>

        </div>

        <div class="center">

            <div class="whitebar">
                <div class="white-one">
                    <div class="white-img"><img src="../layout/devicename.png"></div>
                    <div class="white-text"><span class="trans">soverview.device</span></div>
                    <div class="white-value"></div>
                </div>
                <div class="white-two">
                    <div class="white-img"><img src="../layout/time.png"></div>
                    <div class="white-text"><span class="trans">soverview.time</span></div>
                    <div class="white-value" id="time">
                        <script>
                            hideL4();
                            time();
                            setInterval(function(){
                                hideL4();
                                time();
                            }, 3000);
                            
                        </script>
                    </div>
                </div>
                <div class="white-three">
                    <div class="white-img"><img src="../layout/freq.png"></div>
                    <div class="white-text"><span class="trans">soverview.freq</span></div>
                    <div class="white-value"><script>document.write(sysvar("_FREQ", AUTOUPDATE, 2));</script></div>
                </div>
            </div>


            <table style="width: 1100px; margin: 0 auto">
                <tr>
                    <td VALIGN="top">
                        <table class="showtable" >
                            <tr>
                                <th><span class="trans">soverview.phase</span></th>
                                <th><span class="trans">soverview.uvll</span></th>
                                <th><span class="trans">soverview.uvln</span></th>
                            </tr>
                            <tr>
                                <td>L1/L2</br>L1/N</td>
                                <td><script>document.write(sysvar("_ULL[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_ULN[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td>L2/L3</br>L2/N</td>
                                <td><script>document.write(sysvar("_ULL[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_ULN[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td>L3/L1</br>L3/N</td>
                                <td><script>document.write(sysvar("_ULL[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_ULN[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td>L4/N</td>
                                <td></td>
                                <td><script>document.write(sysvar("_ULN[3]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                        </table>
                    </td>
                    <td VALIGN="top" align="center">
                        <table class="showtable" >
                            <tr>
                                <th><span class="trans">soverview.phase</span></th>
                                <th><span class="trans">kW</span></th>
                                <th><span class="trans">kWh</span></th>
                                <th><span class="trans">kvar</span></th>
                                <th><span class="trans">kvarh</span></th>
                            </tr>
                            <tr>
                                <td><span class="trans">L1</span></td>
                                <td><script>document.write(sysvar("_PLN[0]", AUTOUPDATE, 2, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[0]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QLN[0]", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[0]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L2</span></td>
                                <td><script>document.write(sysvar("_PLN[1]", AUTOUPDATE, ["k"], 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[1]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QLN[1]", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[1]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L3</span></td>
                                <td><script>document.write(sysvar("_PLN[2]", AUTOUPDATE, 2, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[2]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QLN[2]", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[2]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L4</span></td>
                                <td><script>document.write(sysvar("_PLN[3]", AUTOUPDATE, 2, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[3]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QLN[3]", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[3]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                            <tr></tr>
                            <tr>
                                <td><span class="trans">L1..L3</span></td>
                                <td><script>document.write(sysvar("_P_SUM3", AUTOUPDATE, 2, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[4]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_Q_SUM3", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[4]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L1..L4</span></td>
                                <td><script>document.write(sysvar("_P_SUM", AUTOUPDATE, 2, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_WH[5]", AUTOUPDATE, 0, ["k"], HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_Q_SUM", AUTOUPDATE, 2, ["k"],  HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_QH[5]", AUTOUPDATE, 0, ["k"],  HIDEUNIT));</script></td>
                            </tr>
                        </table>

                    </td>
                    <td VALIGN="top" align="right">
                        <table class="showtable" >
                            <tr>
                                <th><span class="trans">soverview.phase</span></th>
                                <th><span class="trans">IinA</span></th>
                                <th><span class="trans">cos-phi</span></th>
                                <th><span class="trans">THD-U</span></th>
                                <th><span class="trans">THD-I</span></th>
                            </tr>
                            <tr>
                                <td><span class="trans">L1</span></td>
                                <td><script>document.write(sysvar("_ILN[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_PHI[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_ULN[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_IL[0]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L2</span></td>
                                <td><script>document.write(sysvar("_ILN[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_PHI[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_ULN[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_IL[1]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L3</span></td>
                                <td><script>document.write(sysvar("_ILN[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_PHI[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_ULN[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_IL[2]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L4</span></td>
                                <td><script>document.write(sysvar("_ILN[3]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_PHI[3]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_ULN[3]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_THD_IL[3]", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                            </tr>
                            <tr></tr>
                            <tr>
                                <td><span class="trans">L1..L3</span></td>
                                <td><script>document.write(sysvar("_I_SUM3", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_SUM3", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td></td>
                                <td></td>
                            </tr>
                            <tr>
                                <td><span class="trans">L1..L4</span></td>
                                <td><script>document.write(sysvar("_I_SUM", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td><script>document.write(sysvar("_COS_SUM", AUTOUPDATE, 2, HIDEUNIT));</script></td>
                                <td></td>
                                <td></td>
                            </tr>
                        </table>

                    </td>
                </tr>
            </table>

        </div>

        <div class="bottom"></div>

    </div>

    <!--End full-width-frame--> 

    
                </div>
            </div>
            <!--End content--> 

            <div id="footer-bar">
                <div class="wrapper">
                    <div class="footdevice"></div>
                </div>
                <!--End .wrapper--> 
            </div>
            <!--End footer-bar--> 
        </div>
        <!--End main--> 
        
        
    </body>
</html>
&#13;
&#13;
&#13;

也就是说,出现一个奇怪的脚本,应该有图像中出现的数字。

我用来导入网络的代码是:

curl http://www.phl.org/cgi-bin/fidsarrival.pl -o "arrival.txt"

我需要取出数字,这样我就可以将它们取出并插入其他地方。

1 个答案:

答案 0 :(得分:2)

所以你想抓一个网页。

您面临的问题是该网页包含动态内容。 curl只是在服务器上获取网页html但是当你在浏览器中打开页面时,页面上的JavaScript会被执行并从服务器获取一些JSON数据,然后更新网页。

查看Selenium的动态页面。还有一些其他网络抓取工具/框架可以帮助你。