Алгоритм сравнения строк Оливера


Известный также по функции PHP similar_text()

// return the len of longest string both in s1 and s2 and the positions in s1 and s2
function SimStrLen($s1, $s2) {
    $maxLen=0; $tmp=0;
    $pos1=0; $pos2=0;
    $len1 = mb_strlen($s1);
    $len2 = mb_strlen($s2);
    for($p=0;$p<$len1;$p++) {
        for($q=0;$q<$len2;$q++) {
            $tmp=0;
            while( (($p+$tmp) < $len1) && (($q+$tmp) < $len2) &&
                (mb_substr($s1,$p+$tmp,1)==mb_substr($s2,$q+$tmp,1)) ) {
                    $tmp++;
            }
            if ($tmp>$maxLen) {
                $maxLen=$tmp;
                $pos1=$p;
                $pos2=$q;
            }
        }
    }
    return array($maxLen, $pos1, $pos2);
}

// return the full length of longest string both in s1 and s2
function SimFullLen($s1, $s2) {
    list($maxLen, $pos1, $pos2) = SimStrLen($s1,$s2);
    $len = $maxLen;
    if ($maxLen!=0) {
        if (($pos1>0) && ($pos2>0)) {
            $len += SimFullLen(mb_substr($s1,0,$pos1), mb_substr($s2,0,$pos2));
        }
        if ( (($pos1+$maxLen) < mb_strlen($s1)) && (($pos2+$maxLen)<mb_strlen($s2)) ) {
            $len += SimFullLen(mb_substr($s1,$pos1+$maxLen), mb_substr($s2,$pos2+$maxLen));
        }
    }
    return $len;
}

// return best match (integer) value in 0..100
function SimText($s1, $s2) {
    if ($s1=='' || $s2=='') return 0;
    return SimFullLen($s1, $s2) * 200 / (mb_strlen($s1) + mb_strlen($s2));
}

$a = 'Moscow';
$b = 'Moskva';
$c=0;

similar_text($a,$b,$c);
echo $c."\n";
echo SimText($a,$b)."\n";