如何在mysql或PHP中获得匹配结果的百分比?

时间:2017-07-03 11:20:41

标签: php mysql search

例如,我搜索特定栏目中的某些文字,例如" abcdefgio"在数据库中,字段值为" abcdefghijk ..."所以我只想在搜索字符串中匹配70 - 80%。

请为我推荐一些算法或库。

2 个答案:

答案 0 :(得分:2)

你可以使用levenshtein距离:

CREATE FUNCTION levenshtein( s1 text, s2 text ) 
  RETURNS INT 
  DETERMINISTIC 
  BEGIN 
    DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT; 
    DECLARE s1_char CHAR; 
    -- max strlen=255 
    DECLARE cv0, cv1 VARBINARY(10240); 
    SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2), cv1 = 0x00, j = 1, i = 1, c = 0; 
    IF s1 = s2 THEN 
      RETURN 0; 
    ELSEIF s1_len = 0 THEN 
      RETURN s2_len; 
    ELSEIF s2_len = 0 THEN 
      RETURN s1_len; 
    ELSE 
      WHILE j <= s2_len DO 
        SET cv1 = CONCAT(cv1, UNHEX(HEX(j))), j = j + 1; 
      END WHILE; 
      WHILE i <= s1_len DO 
        SET s1_char = SUBSTRING(s1, i, 1), c = i, cv0 = UNHEX(HEX(i)), j = 1; 
        WHILE j <= s2_len DO 
          SET c = c + 1; 
          IF s1_char = SUBSTRING(s2, j, 1) THEN  
            SET cost = 0; ELSE SET cost = 1; 
          END IF; 
          SET c_temp = CONV(HEX(SUBSTRING(cv1, j, 1)), 16, 10) + cost; 
          IF c > c_temp THEN SET c = c_temp; END IF; 
            SET c_temp = CONV(HEX(SUBSTRING(cv1, j+1, 1)), 16, 10) + 1; 
            IF c > c_temp THEN  
              SET c = c_temp;  
            END IF; 
            SET cv0 = CONCAT(cv0, UNHEX(HEX(c))), j = j + 1; 
        END WHILE; 
        SET cv1 = cv0, i = i + 1; 
      END WHILE; 
    END IF; 
    RETURN c; 
  END; 

CREATE FUNCTION levenshtein_ratio( s1 text, s2 text ) 
  RETURNS INT 
  DETERMINISTIC 
  BEGIN 
    DECLARE s1_len, s2_len, max_len INT; 
    SET s1_len = LENGTH(s1), s2_len = LENGTH(s2); 
    IF s1_len > s2_len THEN  
      SET max_len = s1_len;  
    ELSE  
      SET max_len = s2_len;  
    END IF; 
    RETURN ROUND((1 - LEVENSHTEIN(s1, s2) / max_len) * 100); 
  END;

使用:

select *
from table1 t1
where levenshtein_ratio(t1.txt1, t1.txt2) > 80

另一种方法是,通过LIKE在MYSQL中搜索并在结果上使用函数levenshtein

答案 1 :(得分:1)

这可能要晚了,但可能仍然有用。 因此,有两种处理方法: 1. @MisterX声明的MySQL Levenshtein函数 2. PHP的相似文本功能

方法如下:

1。为MySQL使用Levenshtein函数

第1步。

从phpMyAdmin的终端或SQL界面中的mysql提示符中,使用以下代码创建LEVENSHTEIN函数(只需复制,粘贴并单击界面上的GO按钮)

DELIMITER $$
DROP FUNCTION IF EXISTS LEVENSHTEIN $$
CREATE FUNCTION LEVENSHTEIN(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8)
  RETURNS INT
  DETERMINISTIC
  BEGIN
    DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT;
    DECLARE s1_char CHAR CHARACTER SET utf8;
    -- max strlen=255 for this function
    DECLARE cv0, cv1 VARBINARY(256);

    SET s1_len = CHAR_LENGTH(s1),
        s2_len = CHAR_LENGTH(s2),
        cv1 = 0x00,
        j = 1,
        i = 1,
        c = 0;

    IF (s1 = s2) THEN
      RETURN (0);
    ELSEIF (s1_len = 0) THEN
      RETURN (s2_len);
    ELSEIF (s2_len = 0) THEN
      RETURN (s1_len);
    END IF;

    WHILE (j <= s2_len) DO
      SET cv1 = CONCAT(cv1, CHAR(j)),
          j = j + 1;
    END WHILE;

    WHILE (i <= s1_len) DO
      SET s1_char = SUBSTRING(s1, i, 1),
          c = i,
          cv0 = CHAR(i),
          j = 1;

      WHILE (j <= s2_len) DO
        SET c = c + 1,
            cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1);

        SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost;
        IF (c > c_temp) THEN
          SET c = c_temp;
        END IF;

        SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1;
        IF (c > c_temp) THEN
          SET c = c_temp;
        END IF;

        SET cv0 = CONCAT(cv0, CHAR(c)),
            j = j + 1;
      END WHILE;

      SET cv1 = cv0,
          i = i + 1;
    END WHILE;

    RETURN (c);
  END $$

DELIMITER ;

第2步 使用以下代码创建levenshtein_ratio函数

DELIMITER $$
CREATE FUNCTION levenshtein_ratio( s1 text, s2 text ) 
  RETURNS INT 
  DETERMINISTIC 
  BEGIN 
    DECLARE s1_len, s2_len, max_len INT; 
    SET s1_len = LENGTH(s1), s2_len = LENGTH(s2); 
    IF s1_len > s2_len THEN  
      SET max_len = s1_len;  
    ELSE  
      SET max_len = s2_len;  
    END IF; 
    RETURN ROUND((1 - LEVENSHTEIN(s1, s2) / max_len) * 100); 
  END $$
DELIMITER ;

第3步:像这样在您的php代码中使用该功能:

 $searchString = $_GET["search"];
    $sql = "select * from table_name
            where levenshtein_ratio($searchString, table_column) >= 70 
                  && levenshtein_ratio($searchString, table_column) <= 80

    " ;
  mysqli_query($conn,$sql)

$ conn引用您的数据库连接字符串的地方。

完成

  1. 使用PHP的相似文本功能。

您这样查询数据库:

$searchString = $_GET["search"];
$sql = "select  * from table_name where table_column like %$searchString%";
$result = mysqli_query($conn,$sql);
$between70and80 = array();
while($row=mysqli_fetch_array($result))
  {
  $string1 = $row["column"];
  $sim = similar_text(strtoupper($search), strtoupper($string1), $perc);
  if($perc>=70 && $perc >=80)
      {
         array_push($between70and80,$string1);
      }

  }

// Now do what you want with $between70and80.

请注意我在相似文本功能中使用的 strtoupper ,我需要两个字符串都处于相同的大小写,因为这会影响百分比匹配。

希望这对某人有帮助。