Paste: jarowinklerdistance
Author: | mrjbq7 |
Mode: | text |
Date: | Sat, 21 Jun 2025 16:15:21 |
Plain Text |
function jarowinklerdistance(s1, s2)
if length(s1) < length(s2)
s1, s2 = s2, s1
end
len1, len2 = length(s1), length(s2)
len2 == 0 && return 0.0
delta = max(0, len1 ÷ 2 - 1)
flag = zeros(Bool, len2) # flags for possible transpositions, begin as false
ch1_match = eltype(s1)[]
for (i, ch1) in enumerate(s1)
from = max(i - delta, 1)
to = min(i + delta + 1, len2)
for j in from:to
if (!flag[j]) && (ch1 == s2[j])
flag[j] = true
push!(ch1_match, ch1)
break
end
end
end
matches = length(ch1_match)
matches == 0 && return 1.0
transpositions, i = 0, 0
for (j, ch2) in enumerate(s2)
if flag[j]
i += 1
transpositions += (ch2 != ch1_match[i])
end
end
jaro = (matches / len1 + matches / len2 + (matches - transpositions/2) / matches) / 3.0
commonprefix = count(i -> s1[i] == s2[i], 1:min(len2, 4))
return 1 - (jaro + commonprefix * 0.1 * (1 - jaro))
end
Author: | mrjbq7 |
Mode: | text |
Date: | Sat, 21 Jun 2025 19:04:00 |
Plain Text |
function jarowinklerdistance(t1::String, t2::String)
s1::Vector{Char} = collect(t1)
s2::Vector{Char} = collect(t2)
if length(s1) < length(s2)
s1, s2 = s2, s1
end
len1, len2 = length(s1), length(s2)
len2 == 0 && return 0.0
delta = max(0, len1 ÷ 2 - 1)
flag = zeros(Bool, len2) # flags for possible transpositions, begin as false
ch1_match = eltype(s1)[]
for (i, ch1) in enumerate(s1)
from = max(i - delta, 1)
to = min(i + delta + 1, len2)
for j in from:to
if (!flag[j]) && (ch1 == s2[j])
flag[j] = true
push!(ch1_match, ch1)
break
end
end
end
matches = length(ch1_match)
matches == 0 && return 1.0
transpositions, i = 0, 0
for (j, ch2) in enumerate(s2)
if flag[j]
i += 1
transpositions += (ch2 != ch1_match[i])
end
end
jaro = (matches / len1 + matches / len2 + (matches - transpositions/2) / matches) / 3.0
commonprefix = count(i -> s1[i] == s2[i], 1:min(len2, 4))
return 1 - (jaro + commonprefix * 0.1 * (1 - jaro))
end
New Annotation