from scipy.spatial.distance import hamming
# For working with strings
string1 = "1010101"
string2 = "1000101"
# Convert to list of integers for hamming function
arr1 = [int(bit) for bit in string1]
arr2 = [int(bit) for bit in string2]
# Calculate Hamming distance
distance = hamming(arr1, arr2) * len(arr1) # Multiply by length because SciPy returns fraction
print(f"Hamming distance: {int(distance)}") # Output: Hamming distance: 1
# For DNA sequences
sequence1 = "ATCGTACT"
sequence2 = "ATCGCACT"
distance = hamming(list(sequence1), list(sequence2)) * len(sequence1)
print(f"Hamming distance: {int(distance)}") # Output: Hamming distance: 1
def hamming_distance(str1: str, str2: str) -> int:
"""Calculate Hamming distance between two strings."""
if len(str1) != len(str2):
raise ValueError("Strings must be of equal length")
return sum(c1 != c2 for c1, c2 in zip(str1, str2))
# Example usage
print(hamming_distance("1010101", "1000101")) # Output: 1
hamming_distance <- function(str1, str2) {
if (nchar(str1) != nchar(str2)) {
stop("Strings must be equal length")
}
sum(strsplit(str1, "")[[1]] != strsplit(str2, "")[[1]])
}
# Example usage
hamming_distance("1010101", "1000101")
# [1] 1
# For DNA sequences
hamming_distance("ATCGTACT", "ATCGCACT")
# [1] 1
Damerau-Levenshtein distance
Damerau-Levenshtein distance
Author
Vinod Chugani
Als erfahrener Experte für Data Science, maschinelles Lernen und generative KI widmet sich Vinod der Weitergabe von Wissen und der Befähigung angehender Data Scientists, in diesem dynamischen Bereich erfolgreich zu sein.
Learn the fundamentals of statistics, including measures of center and spread, probability distributions, and hypothesis testing with no coding involved!