如何查找字段名称并填充其值?

时间:2015-05-26 13:50:42

标签: javascript web-scraping web-crawler casperjs

我有以下HTML表格登录到应用程序: 我只对"用户名"感兴趣。和" pwd"表格的字段元素。

<form name="loginForm" METHOD="post"
    ACTION="/arsys/servlet/LoginServlet"
        enctype="x-www-form-encoded">
    <tbody>
    <tr>
        <td class="login" nowrap="nowrap" width="20px">&nbsp;</td>
        <td class="login" colspan="2" nowrap="nowrap">
        <em class="subhead">Please log in.</em>
        </td>
    </tr>
    <tr>
        <td class="login" nowrap="nowrap" width="20px" >&nbsp;</td>
        <td class="login" nowrap="nowrap" id="LoginLabel-id">
            <b><label style="color:#FFFFFF;" for="username-id">User Name</label></b>
        </td>
        <td>
        <input name="username" maxlength="254" id="username-id" value="" class="loginfield" size="30" type="text">
        </td>
    </tr>
    <tr>
        <td class="login" nowrap="nowrap" width="20px">&nbsp;</td>
        <td class="login" id="PasswordLabel-id" nowrap="nowrap">
            <label style="color:#FFFFFF;" for="pwd-id">Password</label>
        </td>
        <td>
        <input name="pwd" maxlength="61" id="pwd-id" class="loginfield" size="30" autocomplete="off" type="password">
        </td>
    </tr>
    <tr>
        <td class="Login" nowrap="nowrap" width="20px">&nbsp;</td>
        <td class="Login" name="auth_label" nowrap="nowrap">
            <label style="color:#FFFFFF;" for="auth-id">Authentication</label>
        </td>
        <td><input type="text" NAME="auth" id="auth-id" maxlength="2048" class="loginfield" size="30"></td>
    </tr>                           
    <tr>
        <td class="Login" nowrap="nowrap" width="20px">&nbsp;</td>
        <td class="loginfield" nowrap="nowrap">&nbsp;</td>
        <td>
            <input type="button" name="login" value="Log In" onClick="doLogin();"><!--;-->&nbsp;
            <input type="button" name="clear" value="Clear" onClick="clearLogin();"><!--;-->
        </td>
    </tr>
    <tr>
        <td class="Login" nowrap="nowrap">&nbsp;</td>
        <td class="Login" nowrap="nowrap">&nbsp;</td>
        <td>
            <input type="hidden" name="timezone" value="">
            <input type="hidden" name="encpwd" value="1">
            <input type="hidden" name="goto" value="" >
            <input type="hidden" name="server" value="" >
            <input type="hidden" name="ipoverride" value="0">
            <input type="hidden" name="initialState" value="-1">
            <input type="hidden" name="returnBack" value="">
        </td>
    </tr>
</tbody>
</form>

这是我对caspejs的第一次尝试,因为它是唯一可以执行javascript函数的解决方案,与提交表单一起。

var utils = require('utils');
var casper = require('casper').create({   
    verbose: true, 
    logLevel: 'debug',
    pageSettings: {
         loadImages:  false,         // The WebPage instance used by Casper will
         loadPlugins: false,         // use these settings
         userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4'
    }
});

// print out all the messages in the headless browser context
casper.on('remote.message', function(msg) {
    this.echo('remote message caught: ' + msg);
});

// print out all the messages in the headless browser context
casper.on("page.error", function(msg, trace) {
    this.echo("Page Error: " + msg, "ERROR");
});

// LOG IN.
var login_url = 'https://usms.upc.biz/arsys/shared/login.jsp';
casper.start(login_url, function() {
   this.test.assertExists('form[name="loginForm"]', 'form is found');
   // utils.dump(this);
   this.fill('form[name="loginForm"]', { 
        'input[name="username-id"]': 'SNIP!', 
        'input[name="pwd"]':  'SNIP!'
    }, true);
});

casper.run();

但是我错误地引用了这些字段,我找不到如何列出所有字段名称以及如何访问它们:

PASS form is found
[info] [remote] attempting to fetch form element from selector: 'form[name="loginForm"]'
[error] [remote] findAll(): invalid selector provided "[name="input[name="username-id"]"]":Error: SYNTAX_ERR: DOM Exception 12
[error] [remote] findAll(): invalid selector provided "[name="input[name="pwd"]"]":Error: SYNTAX_ERR: DOM Exception 12
CasperError: Errors encountered while filling form: no field named "input[name="username-id"]" in form; no field named "input[name="pwd"]" in form

请问您如何设置有问题的2个输入字段, 然后调用onClick="doLogin();

1 个答案:

答案 0 :(得分:1)

假设您使用的是当前最新的casperjs 1.1。

如果是fill(),您需要指定元素name s

this.fill('form[name="loginForm"]', {
    'username': 'SNIP!',
    'pwd':  'SNIP!'
}, true);

您还可以使用fillSelectors()并按ID 找到输入

this.fillSelectors('form[name="loginForm"]', {
    'input#username-id': 'SNIP!',
    'input#pwd-id':  'SNIP!'
}, true);

更新Artjom B的所有学分。

您应该等待username input使用waitForSelector()在DOM中出现:

  

等待直到与远程DOM中提供的选择器表达式匹配的元素才能处理任何下一步。

casper.waitForSelector("#username-id", function() { 
    this.fill('form[name="loginForm"]', { 
        'username': 'SNIP!', 
        'pwd': 'SNIP!' 
    }, true); 
});