处理html表单提交中的UTF字符

时间:2011-02-28 22:25:58

标签: php forms unicode encoding w3c-validation

我的html表单有一些输入文本字段,可以从中文,日文,欧洲,特殊字符(如£等)中获取字符。简而言之,就是unicode字符。

要在服务器端(使用php)处理这些值,我可以假设所有浏览器默认在表单提交时以UTF-8格式编码这些字符。

或者是否有办法告诉浏览器始终以UTF8编码提交这些字符,以便我们可以使用utf8_decode处理这些值?

感谢。

3 个答案:

答案 0 :(得分:2)

在输出HTML之前设置表单页面的字符编码。

header('Content-Type: text/html; charset=utf-8');

答案 1 :(得分:0)

摘自here

/**
 * @usage $var = $this->clean__makesafe_value( $var, array( "urldecode" ), true );
 *     OR $this->clean__makesafe_value( $var, array( "urldecode" ) );
 */
Class Input
{   
    /**
     * Makesafe
     *
     * @param   mixed     REFERENCE: Data to make safe
     * @param   string    KEY [used as parameter-2 in the callback function of array_walk()
     * @param   array     Additional functions to filter the value through, prior to cleaning
     * @return  mixed     VOID if $_output_flag = false; MIXED otherwise.
     */
    private function _clean__makesafe ( &$val, $key, $filters = array() )
    {

        if ( $val === '' )                                                                         // Literally empty string, integer 0 excluded
        {
            return true;
        }

        # Let's apply additional functions, if any, to clean further
        if ( isset( $filters ) and is_array( $filters ) and count( $filters ) )
        {
            foreach( $filters as $_filter )
            {
                if ( is_array( $_filter ) and is_object( $_filter[0] ) and method_exists( $_filter[0], $_filter[1] ) )
                {
                    $val = &$_filter[0]->$_filter[1]( $val );
                }
                elseif ( function_exists( $_filter ) )
                {
                    $val = $_filter( $val );
                }
                else
                {
                    throw new Exception ("Parameter-2 of Input::_clean__makesafe() must be a valid function/method callback!");
                }
            }
        }

        $val = trim( $val );
        // $val = $this->clean__stripslashes( $val );
        $val = str_replace( " " , " " , $val );

        $val = $this->clean__control_characters( $val );

        # Convert all carriage return combos
        $val = str_replace( array( '\r\n', '\n\r', '\r' ), "\n", $val );

        # Continue with cleaning...

        $val = str_replace( "&"             , "&"           , $val );
        $val = str_replace( "<!--"          , "&#060;&#033;--"  , $val );
        $val = str_replace( "-->"           , "--&#062;"        , $val );
        $val = preg_replace( "/<script/i"   , "&#060;script"    , $val );
        $val = str_replace( ">"             , "&gt;"            , $val );
        $val = str_replace( "<"             , "&lt;"            , $val );
        $val = str_replace( '"'             , "&quot;"          , $val );
        $val = str_replace( '\n'            , "<br />"          , $val );                          // Convert literal newlines
        $val = str_replace( '$'             , "&#36;"           , $val );
        $val = str_replace( "!"             , "&#33;"           , $val );
        $val = str_replace( "'"             , "&#39;"           , $val );                          // IMPORTANT: It helps to increase sql query safety.

        # Convert HTML entities into friendly versions of them
        $_list_of_html_entities__from = array( "&#160;","&#161;","&#162;","&#163;","&#164;","&#165;","&#166;","&#167;","&#168;","&#169;","&#170;","&#171;","&#172;","&#173;","&#174;","&#175;","&#176;","&#177;","&#178;","&#179;","&#180;","&#181;","&#182;","&#183;","&#184;","&#185;","&#186;","&#187;","&#188;","&#189;","&#190;","&#191;","&#192;","&#193;","&#194;","&#195;","&#196;","&#197;","&#198;","&#199;","&#200;","&#201;","&#202;","&#203;","&#204;","&#205;","&#206;","&#207;","&#208;","&#209;","&#210;","&#211;","&#212;","&#213;","&#214;","&#215;","&#216;","&#217;","&#218;","&#219;","&#220;","&#221;","&#222;","&#223;","&#224;","&#225;","&#226;","&#227;","&#228;","&#229;","&#230;","&#231;","&#232;","&#233;","&#234;","&#235;","&#236;","&#237;","&#238;","&#239;","&#240;","&#241;","&#242;","&#243;","&#244;","&#245;","&#246;","&#247;","&#248;","&#249;","&#250;","&#251;","&#252;","&#253;","&#254;","&#255;","&#402;","&#913;","&#914;","&#915;","&#916;","&#917;","&#918;","&#919;","&#920;","&#921;","&#922;","&#923;","&#924;","&#925;","&#926;","&#927;","&#928;","&#929;","&#931;","&#932;","&#933;","&#934;","&#935;","&#936;","&#937;","&#x03B1;","&#946;","&#947;","&#948;","&#949;","&#950;","&#951;","&#952;","&#953;","&#954;","&#955;","&#956;","&#957;","&#958;","&#959;","&#960;","&#961;","&#962;","&#963;","&#964;","&#965;","&#966;","&#967;","&#968;","&#969;","&#977;","&#978;","&#982;","&#8226;","&#8230;","&#8242;","&#8243;","&#8254;","&#8260;","&#8472;","&#8465;","&#8476;","&#8482;","&#8501;","&#8592;","&#8593;","&#8594;","&#8595;","&#8596;","&#8629;","&#8656;","&#8657;","&#8658;","&#8659;","&#8660;","&#8704;","&#8706;","&#8707;","&#8709;","&#8711;","&#8712;","&#8713;","&#8715;","&#8719;","&#8721;","&#8722;","&#8727;","&#8730;","&#8733;","&#8734;","&#8736;","&#8743;","&#8744;","&#8745;","&#8746;","&#8747;","&#8756;","&#8764;","&#8773;","&#8776;","&#8800;","&#8801;","&#8804;","&#8805;","&#8834;","&#8835;","&#8836;","&#8838;","&#8839;","&#8853;","&#8855;","&#8869;","&#8901;","&#8968;","&#8969;","&#8970;","&#8971;","&#9001;","&#9002;","&#9674;","&#9824;","&#9827;","&#9829;","&#9830;","&#34;","&#38;","&#60;","&#62;","&#338;","&#339;","&#352;","&#353;","&#376;","&#710;","&#732;","&#8194;","&#8195;","&#8201;","&#8204;","&#8205;","&#8206;","&#8207;","&#8211;","&#8212;","&#8216;","&#8217;","&#8218;","&#8220;","&#8221;","&#8222;","&#8224;","&#8225;","&#8240;","&#8249;","&#8250;","&#8364;" );
        $_list_of_html_entities__to   = array( "&nbsp;","&iexcl;","&cent;","&pound;","&curren;","&yen;","&brvbar;","&sect;","&uml;","&copy;","&ordf;","&laquo;","&not;","&shy;","&reg;","&macr;","&deg;","&plusmn;","&sup2;","&sup3;","&acute;","&micro;","&para;","&middot;","&cedil;","&sup1;","&ordm;","&raquo;","&frac14;","&frac12;","&frac34;","&iquest;","&Agrave;","&Aacute;","&Acirc;","&Atilde;","&Auml;","&Aring;","&AElig;","&Ccedil;","&Egrave;","&Eacute;","&Ecirc;","&Euml;","&Igrave;","&Iacute;","&Icirc;","&Iuml;","&ETH;","&Ntilde;","&Ograve;","&Oacute;","&Ocirc;","&Otilde;","&Ouml;","&times;","&Oslash;","&Ugrave;","&Uacute;","&Ucirc;","&Uuml;","&Yacute;","&THORN;","&szlig;","&agrave;","&aacute;","&acirc;","&atilde;","&auml;","&aring;","&aelig;","&ccedil;","&egrave;","&eacute;","&ecirc;","&euml;","&igrave;","&iacute;","&icirc;","&iuml;","&eth;","&ntilde;","&ograve;","&oacute;","&ocirc;","&otilde;","&ouml;","&divide;","&oslash;","&ugrave;","&uacute;","&ucirc;","&uuml;","&yacute;","&thorn;","&yuml;","&fnof;","&Alpha;","&Beta;","&Gamma;","&Delta;","&Epsilon;","&Zeta;","&Eta;","&Theta;","&Iota;","&Kappa;","&Lambda;","&Mu;","&Nu;","&Xi;","&Omicron;","&Pi;","&Rho;","&Sigma;","&Tau;","&Upsilon;","&Phi;","&Chi;","&Psi;","&Omega;","&alpha;","&beta;","&gamma;","&delta;","&epsilon;","&zeta;","&eta;","&theta;","&iota;","&kappa;","&lambda;","&mu;","&nu;","&xi;","&omicron;","&pi;","&rho;","&sigmaf;","&sigma;","&tau;","&upsilon;","&phi;","&chi;","&psi;","&omega;","&thetasym;","&upsih;","&piv;","&bull;","&hellip;","&prime;","&Prime;","&oline;","&frasl;","&weierp;","&image;","&real;","&trade;","&alefsym;","&larr;","&uarr;","&rarr;","&darr;","&harr;","&crarr;","&lArr;","&uArr;","&rArr;","&dArr;","&hArr;","&forall;","&part;","&exist;","&empty;","&nabla;","&isin;","&notin;","&ni;","&prod;","&sum;","&minus;","&lowast;","&radic;","&prop;","&infin;","&ang;","&and;","&or;","&cap;","&cup;","&int;","&there4;","&sim;","&cong;","&asymp;","&ne;","&equiv;","&le;","&ge;","&sub;","&sup;","&nsub;","&sube;","&supe;","&oplus;","&otimes;","&perp;","&sdot;","&lceil;","&rceil;","&lfloor;","&rfloor;","&lang;","&rang;","&loz;","&spades;","&clubs;","&hearts;","&diams;","&quot;","&amp;","&lt;","&gt;","&OElig;","&oelig;","&Scaron;","&scaron;","&Yuml;","&circ;","&tilde;","&ensp;","&emsp;","&thinsp;","&zwnj;","&zwj;","&lrm;","&rlm;","&ndash;","&mdash;","&lsquo;","&rsquo;","&sbquo;","&ldquo;","&rdquo;","&bdquo;","&dagger;","&Dagger;","&permil;","&lsaquo;","&rsaquo;","&euro;");
        $val = str_replace( $_list_of_html_entities__from , $_list_of_html_entities__to , $val );

        # Ensure unicode chars are OK
        $val = preg_replace("/&amp;(#[0-9]+|[a-z]+);/s", "&\\1;", $val );

        # Try and fix up HTML entities with missing ;
        $val = preg_replace( "/&#(\d+?)([^\d;])/i", "&#\\1;\\2", $val );

        return true;
    }

    /**
     * WRAPPER for clean__makesafe(): Clean's incoming values (usually _GET, _POST)
     *
     * @param    mixed    REF: Mixed value to parse
     * @param    array    Additional functions to filter the value through, prior to cleaning
     * @param    boolean  Whether to return the result or not, defaults to FALSE
     * @return   mixed    MIXED Cleaned value if output_flag is set on; BOOLEAN otherwise
     */
    public function clean__makesafe_value ( &$val, $filters = array(), $do_output = false )
    {
        # If its an array, 'walk-through-it' recursively with Input::_clean__makesafe() ...
        if ( is_array( $val ) )
        {
            array_walk_recursive( $val, array( $this, "_clean__makesafe" ), $filters );
        }
        # ... otherwise, just apply Input::clean__makesafe() to it.
        else
        {
            $this->_clean__makesafe( $val, null, $filters );
        }

        # If explicit return is requested, comply - otherwise go Boolean.
        if ( $do_output )
        {
            return $val;
        }
        return true;
    }
}

答案 2 :(得分:-1)

用于提交内容类型编码的数据
XML HTTP请求可以使用UTF-8字符集发送
在UTF-8中提交表单的方式是通过XML HTTP请求发送它或在您的站点中使用UTF-8字符集