我有两个函数,findNested用于查找字符串中的子字符串,findOverlap返回一个用于整数字符串的组合($一致)字符串。我想要做的是创建一个数组或使用存储所有数据的数组($ all_data)并返回组合信息,如果$ all_data [$ i] [' linear_sequence']元素数组是嵌套的或重叠的,如果不使用它的行。 $ all_data数组中的每个元素看起来都是$ array1和$ array2。我认为使用array_merge_recursive来组合数据,但我不知道在哪里放置它可以合并多个$ all_data元素。但是现在它在新阵列中显示的值相同,并且不明白为什么。
$array1=array ('linear_sequence' => 'QYDIKYTWNVPKIAPKS','E_ID' => 1416503, 'ant_source_organism_name' => 'Burkholderia pseudomallei K96243');
$array2=array ( 'linear_sequence' => 'QQYDIKYTWNVPKIAPKSEN', 'E_ID' => 1409864, 'ant_source_organism_name' => 'Burkholderia pseudomallei K96243');
function findNested($str1, $str2){
$sl1 = strlen($str1);
$sl2 = strlen($str2);
if ($sl1>$sl2){
if (strpos($str1,$str2) !== false) {
return $str1;
}
}
if ($sl2>$sl1){
if (strpos($str2,$str1) !== false) {
return $str2;
}
}
return false;
}
function findOverlap($str1, $str2){
$return = array();
$sl1 = strlen($str1);
$sl2 = strlen($str2);
$max = $sl1>$sl2?$sl2:$sl1;
$i=1;
while($i<=$max){
$s1 = substr($str1, -$i);
$s2 = substr($str2, 0, $i);
if($s1 == $s2){
$return[] = $s1;
}
$i++;
}
if(!empty($return)){
return $return;
}
return false;
}
function replaceOverlap($str1, $str2, $length = "long"){
$min_overlap=9;
$max_consensus=20;
if($overlap = findOverlap($str1, $str2)){
switch($length){
case "short":
$overlap = $overlap[0];
break;
case "long":
default:
$overlap = $overlap[count($overlap)-1];
break;
}
if (strlen($overlap)>=$min_overlap) {
$str1 = substr($str1, 0, -strlen($overlap));
$str2 = substr($str2, strlen($overlap));
$consensus = $str1.$overlap.$str2;
if (strlen($consensus)<=$max_consensus){
return $consensus;
}
}
}
return false;
}
/*str1=epitope1 and str2=epitope2*/
function clustering($all_data) {
$tmp=array();
$result=array();
//$tmp_keys=array('linear_sequence','E_ID','ant_source_organism_name','E_OBJECT_SOURCE_NAME','mhc_restriction','AS_TYPE','effector_origin','RFS','assay_score','subjects','responded','REFERENCE_ID');
for ($i = 0; $i < count($all_data); ++$i) {
$str1=$all_data[$i]['linear_sequence'];
$row=$all_data[$i];
for ($j = 0; $j < count($all_data); ++$j) {
$str2=$all_data[$j]['linear_sequence'];
$value1 = replaceOverlap($str1,$str2);
$value2 = replaceOverlap($str2,$str1);
$value3 = findNested($str1,$str2);
if ($value1!=false){
if(strlen($value1)>strlen($str1)){
print $i.' '.$all_data[$i]['linear_sequence'];echo"</br>";
print $all_data[$j]['linear_sequence'];echo"</br>";echo"</br>";
$str1=$value1;
}
}
if ($value2!=false){
if(strlen($value2)>strlen($str1)){
print $i.' '.$all_data[$i]['linear_sequence'];echo"</br>";
print $all_data[$j]['linear_sequence'];echo"</br>";echo"</br>";
$str1=$value2;
}
}
if ($value3!=false){
if(strlen($value3)>strlen($str1)){
print $i.' '.$all_data[$i]['linear_sequence'];echo"</br>";
print $all_data[$j]['linear_sequence'];echo"</br>";echo"</br>";
$str1=$value3;
}
}
}
//if(!in_array($str1, $tmp)){
if (array_key_exists($str1, $result)!='True') {
//$tmp[]=$str1;
$result[$str1] = $row;
}
if (array_key_exists($str1, $result)) {
//$tmp[]=$str1;
$result[$str1] = array_merge_recursive($result[$str1],$row);
}
}
foreach ($result as $epitope){
print_r( $epitope);echo"</br>";
}
}