
时间:2011-03-27 14:37:24

网络应用程序托管在my home server上,也可以downloaded在那里。

如果有人可以帮助我解决这个问题,我将非常感激 ief2

<小时/> PS:代码的某些部分是用荷兰语写的,所以这里有一些翻译:

  • Gemeente =城镇/城市
  • Plaats =位置
  • Nummer = Number
  • 基准=日期
  • Dag = Day
  • Maand =月
  • Jaar =年
  • Uur =小时
  • Aankomst =抵达
  • Vertrek =离境
  • Berekenen =计算

<小时/> PPS:下载链接显然不起作用,但下载它没有问题,所以这里有一些代码片段:


<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
        <title>De Lijn Mobile</title>
        <meta name="viewport" content="width = device-width">

        <form name="main" action="calculateRoute.php" method="post">
            Gemeente: <input type="text" name="vertrekGemeente"><br>
            Straat: <input type="text" name="vertrekStraat"><br>
            Nummer: <input type="text" name="vertrekNummer"><br>
            Gemeente: <input type="text" name="aankomstGemeente"><br>
                Straat: <input type="text" name="aankomstStraat"><br>
            Nummer: <input type="text" name="aankomstNummer"><br>
                $now = new Date();
            <input type="radio" name="datumType" value="aankomst" checked> Aankomst<br>
            <input type="radio" name="datumType" value="vertrek"> Vertrek<br>
            Dag: <input type="text" size="2" name="datumDag" value="<?php echo $now->day; ?>"><br>
            Maand: <input type="text" size="2" name="datumMaand" value="<?php echo $now->month; ?>"><br>
            Jaar: <input type="text" size="4" name="datumJaar" value="<?php echo $now->year; ?>"><br>
            Tijdstip: <input type="text" size="2" name="datumUur" value="<?php echo $now->hour; ?>"> : 
            <input type="text" size="2" name="datumMinuten" value="<?php echo $now->minutes; ?>"><br>
            <input type="submit" value="Bereken"><br>


<DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
        <title>De Lijn Mobile - Berekeningen</title>


            echo "Gathering data...<br>";
            $gemeente = $_POST["vertrekGemeente"];
            $straat = $_POST["vertrekStraat"];
            $nummer = $_POST["vertrekNummer"];
            $vertrekPlaats = new Plaats($gemeente, $straat, $nummer);

            $gemeente = $_POST["aankomstGemeente"];
            $straat = $_POST["aankomstStraat"];
            $nummer = $_POST["aankomstNummer"];
            $aankomstPlaats = new Plaats($gemeente, $straat, $nummer);

            $datumType = $_POST["datumType"];
            $dag = $_POST["datumDag"];
            $maand = $_POST["datumMaand"];
            $jaar = $_POST["datumJaar"];
            $uur = $_POST["datumUur"];
            $min = $_POST["datumMinuten"];
            $datum = Date::withDate($jaar, $maand, $dag, $uur, $min);
            $datum->month = $maand;

            echo "Searching...<br>";





// ==== Returns of Route objects or null
define('DATE_ARRIVAL', "aankomst");
define('DATE_DEPARTURE', "vertrek");
function searchDeLijn($dep, $ar, $dateType, $date) {
    $vertrekkenOfAankomen = "aankomen";
    if(DATE_DEPARTURE === $dateType) {
        $vertrekkenOfAankomen = "vertrekken";
    $myMins = (int)$date->minutes;
    $myMins -= ($myMins % 5);
    $postFields = array(
        "form1:vertrekGemeenteInput" => $dep->gemeente,
        "form1:vertrekStraatInput" => $dep->straat,
        "form1:vertrekNrInput" => $dep->nummer,

        "form1:aankomstGemeenteInput" => $ar->gemeente,
        "form1:aankomstStraatInput" => $ar->straat,
        "form1:aankomstNrInput" => $ar->nummer,

        "form1:vertrekkenOfAankomenRadio" => $vertrekkenOfAankomen,
        "form1:dagCombo" => (string)(int)$date->day,
        "form1:maandCombo" => (string)(int)$date->month,
        "form1:jaarCombo" => $date->year,
        "form1:uurCombo" => (string)(int)$date->hour,
        "form1:minutenCombo" => (string)$myMins);


    // do the curl
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL,
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

    $contents = curl_exec($ch);
    if($contents == false) {
        return null;

    echo $contents;

    $myRouteObjects = extractRoutesFromXMLData($contents);
    return $myRouteObjects;

// ==== Returns array of Route objects or null
function extractRoutesFromXMLData($dataString) {
    $tableBody = getResultsTableBody($dataString);
    if($tableBody != null) { return null; }

    $tableRows = getTableRowsOfTableBody($tableBody);
    if($tableRows != null) { return null; }

    // put them in an array
    $myArray = array();
    $count = $tableRows->length;
    for($i = 0; $i < $count; $i++) {
        $aNode = $tableRows->item(0);
        $myArray[] = $aNode;

    return $myArray;

// ==== Returns XMLDocument or null
function getResultsTableBody($dataString) {
    // Get table element
    $status = preg_match('/<TABLE id="routeplanner_overzicht".*?>.*?<\/TABLE>/is',
            $docString, $matches);
    if($status == 0) {
        return null;

    $tableElement = $matches[0];

    // Extract body
    $status = preg_match('/<TBODY>.*?<\/TBODY>/is',
            $tableElement, $matches);
    if($status == 0) {
        return null;

    $doc = new DOMDocument();

    return $doc;

// ==== Retunrs XMLNodeList or null
function getTableRowsOfTableBody($xmlDoc) {
    $xpath = new DOMXPath($domDoc);
    $xpathres = $xpath->evaluate("//tbody[0]/tr");
    if($xpathres == false) {
        return null;

    return $xpathres;


<强> HTMLFormExtractor.py

import sys
import getopt
import urllib
import re

# ############################
# This code may be used by anyone. It may be used in both free
# and commercial software. It may be copied, modified and even
# be sold. The creator of this code takes no responsibility for
# any damage this script could do.
# ############################

# ############################
# ############################
# Usage: ./exec [-x] [URL]
# This application logs all forms of an HTML document and it's
# objects which have the HTML 'name'-attribute set. The program
# currently only works when the attributes of the objects are
# styled like the XML format (eg: name="myname").
# Options:
#   -x: Create an XML document of the following form:
#           ==== BEGIN XML ====
#           formlist
#               form (variable)
#                   attribute (variable)
#                       name
#                       value
#                   object (variable)
#                       type (eg: input)
#                       name (eg: username)
#           ==== END XML ====
#   URL: a URL pointing to an available, HTML file. If it's not
#       specified specified the program will read the HTML document 
#       from the standard input.
# ############################

# ===== DATA =====

# ===== CLASSES =====
class HTMLAttribute:
    def __init__(self, name, value, orString = None):
        self.name = name
        self.value = value
        self.originalString = None

    def withAttributeString(cls, string):
        """Takes a string of the form attrNam="value" """
        attrNameRegex = "\w+="
        attrName = re.findall(attrNameRegex, string)[0]
        attrName = attrName[0:len(attrName)-1]

        valueRegex = "[\"'].*?[\"']"
        value = re.findall(valueRegex, string)[0]
        value = value[1:len(value)-1]

        return cls(attrName, value, string)

class HTMLObject:
    def __init__(self, aName):
        self.name = aName
        self.attributes = [] # contains HTMLAttribute

    def addAttribute(self, anAttribute):

    def getAttributeWithName(self, aName):
        """Returns none or an HTLMAttribute"""
        aName = aName.lower()
        for anAttribute in self.attributes:
            if anAttribute.name.lower() == aName: return anAttribute
        return None

    def withTagString(cls, string):
        """Takes a string of the form <aTagName attrName="value" ... >"""
        tagOnyRegex = "<.*?>"
        regObj = re.compile(tagOnyRegex, re.S)
        string = re.findall(regObj, string)[0]

        tagNameRegex = "(?<=<)\w+[\s>]"
        tagName = re.findall(tagNameRegex, string)[0]
        tagName = tagName[0:len(tagName)-1]

        attrRegex = "\w+=[\"'].*?[\"']"
        allAttributes = re.findall(attrRegex, string)

        myObj = cls(tagName)
        for anAttrString in allAttributes:
            attrObj = HTMLAttribute.withAttributeString(anAttrString)

        return myObj

class HTMLForm:
    def __init__(self, name, htmlObjects):
        self.name = name
        self.HTMLObjects = htmlObjects # list of HTMLObject

# ===== FUNCTIONS =====
def getFormsFromHTML(htmlData):
    regex = re.compile("<form.*?>.*?</form>", re.IGNORECASE | re.S)
    result = re.findall(regex, htmlData)
    return result

def getFormObjects(aForm):
    """Returns a list of HTMLObjects"""
    myRegex = "<(?:"
    myOrRegexLen = len(myRegex)
    for aTagName in FORM_OBJECTS_TAG_NAME:
        myRegex += aTagName + "|"
    if len(myRegex) == myOrRegexLen: return []

    myRegex = myRegex[0:len(myRegex)-1]
    myRegex += ").*?>"

    regObj = re.compile(myRegex, re.S | re.I)
    allObjects = re.findall(regObj, aForm)

    foundObjects = []
    for anObject in allObjects:
        anObj = HTMLObject.withTagString(anObject)

    return foundObjects

def printForms(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list."""
    counter = 0
    for aForm in foundForms:
        print "===== FORM " + str(counter+1) + " ====="

        print "\tATTRIBUTES:"
        for anAttribute in aForm.attributes:
            print "\t\t" + anAttribute.name + ": '" + anAttribute.value + "'"

        print "\n\t" + str(len(foundObjects)) + " OBJECTS:"
        for anObject in foundObjects[counter]:
            nameAttribute = anObject.getAttributeWithName("name")
            if nameAttribute != None:
                print "\t\t" + anObject.name + " (name=\"" + nameAttribute.value + "\")"

        print "\n"
        counter += 1

def createXMLString(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list.

            form (mult)
                attribute (mult)

                object (mult)
                    type (eg: input)
                    name (eg: username)
    counter = 0
    xmlString = "<formlist>\n"
    for aForm in foundForms:
        # make form child
        formXMLChild = "\t<form>\n"

        # add all attributes
        for anAttr in aForm.attributes:
            formXMLChild += "\t\t<attribute>\n"
            formXMLChild += "\t\t\t<name>" + anAttr.name + "</name>\n"
            formXMLChild += "\t\t\t<value>" + anAttr.value + "</value>\n"
            formXMLChild += "\t\t</attribute>\n"

        # add all input objects if they have a name
        for anObject in foundObjects[counter]:
            nameAttr = anObject.getAttributeWithName("name")
            if nameAttr != None:
                formXMLChild += "\t\t<object>\n"
                formXMLChild += "\t\t\t<type>" + anObject.name + "</type>\n"
                formXMLChild += "\t\t\t<name>" + nameAttr.value + "</name>\n"
                formXMLChild += "\t\t</object>\n"

        # end child and append
        formXMLChild += "\t<form>\n\n"
        xmlString += formXMLChild
        counter += 1

    # end xml and return the string
    xmlString = xmlString[0:len(xmlString)-1] + "</formlist>\n"
    return xmlString

# ===== MAIN =====
# Parse the command line options
userArgv = sys.argv[1:]
flags, arguments = getopt.getopt(userArgv, "x")
wantsXMLFormat = flags.count(('-x', '')) > 0
hasURL = len(arguments) > 0;

# Get the HTML data
myHTML = None;
if hasURL:
    myURL = arguments[0];
    urlHandle = urllib.urlopen(myURL)
    if urlHandle == None:
        print "Failed to open the URL"
    myHTML = urlHandle.read()

    myHTML = sys.stdin.read()

# Get all forms
htmlForms = getFormsFromHTML(myHTML)

# Loop with all forms
foundForms = []
foundObjects = [] # list of list
for aFormTag in htmlForms:
    # append the form
    formChilds = getFormObjects(aFormTag)
    formHTMLObject = HTMLObject.withTagString(aFormTag)

    # append a form input object
    allObjects = getFormObjects(aFormTag)

# Print or create xml
if not wantsXMLFormat:
    printForms(foundForms, foundObjects)
    myXMLString = createXMLString(foundForms, foundObjects)
    print myXMLString