我有夏令营文本编辑器,在插入数据库之前执行htmlspecialchars($VALUE)
,然后在维护文本编辑器更改后执行htmlspecialchars_decode($VALUE)
...
但是我还需要为这一行做搜索功能,那么当使用htmlspecialchars()编码row(VARCHAR)时如何使用LIKE函数?是否有任何SQL函数在选择执行LIKE函数时去除这些标记?
P.S
我正在使用PDO,我的查询如下:
$this->db->prepare("SELECT * FROM t_tasks WHERE a_text LIKE '%$value%'");
在这里,t_text看起来像
<p><i style="background-color: rgb(255, 255, 0);">cdsfsadfasdf</i></p>
答案 0 :(得分:1)
嘈杂的免责声明:以下内容按设计运行,并解决了所提出的问题,但这并不是一个好主意或最佳做法的示例。
恰恰相反,我建议。可靠性完全失败,正如您在查看代码时所看到的那样,我不得不经历一些不必要的杂耍和旋转,因为SQL根本不适合这项工作。
但是,当我需要它来创建一个我无法选择但只能处理使用编码HTML实体存储的数据的环境时,我写了这个 - 并且由于遗留原因无法更改。
它是一个MySQL存储函数,可将实体转换为其utf8编码的等效字符。例如:
mysql> SELECT decode_entities('I ♥ doing “clever” things.') AS decoded_string;
+----------------------------------+
| decoded_string |
+----------------------------------+
| I ♥ doing “clever” things. |
+----------------------------------+
1 row in set (0.00 sec)
因此,对于您的查询,如果我们想测试是否...
<p><i style="background-color: rgb(255, 255, 0);">cdsfsadfasdf</i></p>
...是LIKE '<p><i style=%'
...
mysql> SELECT decode_entities('<p><i style="background-color: rgb(255, 255, 0);">cdsfsadfasdf</i></p>') LIKE '<p><i style=%' AS this_matches;
+--------------+
| this_matches |
+--------------+
| 1 |
+--------------+
1 row in set (0.00 sec)
......我们发现它是。
定义功能后,您将使用...
$stmt = $this->db->prepare("SELECT t.*, decoded_entities(t.a_text) AS a_text_decoded FROM t_tasks t WHERE decode_entities(t.a_text) LIKE CONCAT('%', :value, '%'));
这里的功能是:
DELIMITER $$
DROP FUNCTION IF EXISTS `decode_entities` $$
CREATE FUNCTION `decode_entities`(str LONGTEXT charset utf8) RETURNS longtext CHARSET utf8
NO SQL
DETERMINISTIC
BEGIN
-- decode HTML entities in database strings.
-- this processing is somewhat intensive due to the fact that this is clearly not something the database is
-- necessarily optimal place to accomlish; because of this, the function is optimized to quickly return strings that can't possibly contain entities
-- otherwise, we walk the string, looking for & ... ; then checking the matched inner contents for numeric (&#nnn;) and hex (�) literals,
-- failing that, we search for a named entity in the static string; if we end up with a decimal value, we utf-8 encode that value and replace
-- the entity, in place, in the string, with the utf-8 character; then advance our character pointer by one and then try again.
-- if we can't successfully decipher something that looks like an entity, we leave it as it was
-- the ordering of the values in the "entities' blob (entities are case sensitive) is something of a performance consideration; it may be desirable
-- that the most likely encountered entities in a given application be placed first in the blob, because there is a performance difference
-- of perhaps 30 usec (on a 1 GHz Opteron) when matching the first one compared to matching the last one
-- copy/pasted from https://stackoverflow.com/a/49498332/1695906
IF str IS NULL OR str NOT LIKE '%&%;%' THEN
RETURN str;
END IF;
BEGIN
DECLARE entities BLOB DEFAULT 'AElig,198,Aacute,193,Acirc,194,Agrave,192,Alpha,913,Aring,197,Atilde,195,Auml,196,Beta,914,Ccedil,199,Chi,935,Dagger,8225,Delta,916,ETH,208,Eacute,201,Ecirc,202,Egrave,200,Epsilon,917,Eta,919,Euml,203,Gamma,915,Iacute,205,Icirc,206,Igrave,204,Iota,921,Iuml,207,Kappa,922,Lambda,923,Mu,924,Ntilde,209,Nu,925,OElig,338,Oacute,211,Ocirc,212,Ograve,210,Omega,937,Omicron,927,Oslash,216,Otilde,213,Ouml,214,Phi,934,Pi,928,Prime,8243,Psi,936,Rho,929,Scaron,352,Sigma,931,THORN,222,Tau,932,Theta,920,Uacute,218,Ucirc,219,Ugrave,217,Upsilon,933,Uuml,220,Xi,926,Yacute,221,Yuml,376,Zeta,918,aacute,225,acirc,226,acute,180,aelig,230,agrave,224,alefsym,8501,alpha,945,amp,38,and,8743,ang,8736,apos,39,aring,229,asymp,8776,atilde,227,auml,228,bdquo,8222,beta,946,brvbar,166,bull,8226,cap,8745,ccedil,231,cedil,184,cent,162,chi,967,circ,710,clubs,9827,cong,8773,copy,169,crarr,8629,cup,8746,curren,164,dArr,8659,dagger,8224,darr,8595,deg,176,delta,948,diams,9830,divide,247,eacute,233,ecirc,234,egrave,232,empty,8709,emsp,8195,ensp,8194,epsilon,949,equiv,8801,eta,951,eth,240,euml,235,euro,8364,exist,8707,fnof,402,forall,8704,frac12,189,frac14,188,frac34,190,frasl,8260,gamma,947,ge,8805,gt,62,hArr,8660,harr,8596,hearts,9829,hellip,8230,iacute,237,icirc,238,iexcl,161,igrave,236,image,8465,infin,8734,int,8747,iota,953,iquest,191,isin,8712,iuml,239,kappa,954,lArr,8656,lambda,955,lang,9001,laquo,171,larr,8592,lceil,8968,ldquo,8220,le,8804,lfloor,8970,lowast,8727,loz,9674,lrm,8206,lsaquo,8249,lsquo,8216,lt,60,macr,175,mdash,8212,micro,181,middot,183,minus,8722,mu,956,nabla,8711,nbsp,160,ndash,8211,ne,8800,ni,8715,not,172,notin,8713,nsub,8836,ntilde,241,nu,957,oacute,243,ocirc,244,oelig,339,ograve,242,oline,8254,omega,969,omicron,959,oplus,8853,or,8744,ordf,170,ordm,186,oslash,248,otilde,245,otimes,8855,ouml,246,para,182,part,8706,permil,8240,perp,8869,phi,966,pi,960,piv,982,plusmn,177,pound,163,prime,8242,prod,8719,prop,8733,psi,968,quot,34,rArr,8658,radic,8730,rang,9002,raquo,187,rarr,8594,rceil,8969,rdquo,8221,real,8476,reg,174,rfloor,8971,rho,961,rlm,8207,rsaquo,8250,rsquo,8217,sbquo,8218,scaron,353,sdot,8901,sect,167,shy,173,sigma,963,sigmaf,962,sim,8764,spades,9824,sub,8834,sube,8838,sum,8721,sup1,185,sup2,178,sup3,179,sup,8835,supe,8839,szlig,223,tau,964,there4,8756,theta,952,thetasym,977,thinsp,8201,thorn,254,tilde,732,times,215,trade,8482,uArr,8657,uacute,250,uarr,8593,ucirc,251,ugrave,249,uml,168,upsih,978,upsilon,965,uuml,252,weierp,8472,xi,958,yacute,253,yen,165,yuml,255,zeta,950,zwj,8205,zwnj,8204';
DECLARE len BIGINT UNSIGNED DEFAULT LENGTH(str);
DECLARE ptr BIGINT UNSIGNED DEFAULT 0;
DECLARE nxtamp BIGINT UNSIGNED DEFAULT NULL;
DECLARE nxtsem BIGINT UNSIGNED DEFAULT NULL;
DECLARE sbstr LONGTEXT DEFAULT NULL;
DECLARE decval SMALLINT UNSIGNED DEFAULT NULL;
DECLARE setpos SMALLINT UNSIGNED DEFAULT NULL;
DECLARE uenc TINYTEXT DEFAULT NULL;
walk:
LOOP
SET ptr = ptr + 1;
IF ptr >= len THEN
LEAVE walk;
END IF;
SET nxtamp = LOCATE('&',str,ptr);
IF NOT nxtamp THEN
LEAVE walk;
END IF;
SET nxtsem = LOCATE(';',str,ptr + 1);
IF NOT nxtsem THEN
LEAVE walk;
END IF;
IF nxtsem < nxtamp THEN
ITERATE walk;
END IF;
SET sbstr = SUBSTRING(str FROM nxtamp +1 FOR nxtsem - nxtamp - 1);
IF sbstr RLIKE '^#[0-9]+$' THEN
SET decval = TRIM(LEADING '#' FROM sbstr);
ELSEIF sbstr RLIKE '^#x[0-9a-f]+$' THEN
SET decval = CONV(TRIM(LEADING '#x' FROM sbstr),16,10);
ELSE
SET setpos = FIND_IN_SET(sbstr,entities);
IF setpos > 0 THEN
SET decval = SUBSTRING_INDEX(SUBSTRING_INDEX(entities,',',setpos + 1),',',-1);
ELSE
ITERATE walk;
END IF;
END IF;
IF (decval > 0) THEN
SET uenc = CHAR(CASE
WHEN decval <= 0x7F THEN decval
WHEN decval <= 0x7FF THEN 0xC080 | ((decval >> 6) << 8) | (decval & 0x3F)
WHEN decval <= 0xFFFF THEN 0xE08080 | (((decval >> 12) & 0x0F ) << 16) | (((decval >> 6) & 0x3F ) << 8) | (decval & 0x3F)
ELSE NULL END);
IF uenc IS NOT NULL AND LENGTH(uenc) > 0 THEN
SET str = INSERT(str, nxtamp, 1 + nxtsem - nxtamp, uenc);
END IF;
END IF;
END LOOP;
RETURN str;
END;
END $$
DELIMITER ;
(n.b。这些东西没有被称为&#34;标签&#34; - 它们是&#34; HTML实体。&#34;)
答案 1 :(得分:0)
使用绑定参数,例如:
$stmt = $this->db->prepare("SELECT * FROM t_tasks WHERE a_text LIKE CONCAT('%', :value, '%'));
$stmt->bindParam(':value', $value);