I have two different lists of objects and want to get their similarities, based on the weight of some of the properties. Quickest way seems to be with implementing an IEquatable interface and that is what have I done:
public class CompareEntry : IEquatable<CompareEntry>
{
public int LeadId { get; set; }
public int SaleId { get; set; }
public string Email { get; set; }
public string PhonePrivate { get; set; }
public string PhoneMobile { get; set; }
public string PhoneCompany { get; set; }
public string FirstName { get; set; }
public string Name { get; set; }
public string City { get; set; }
public string ZipCode { get; set; }
public string CompanyName { get; set; }
public bool Equals(CompareEntry other)
{
int weight = 0;
//Check whether the compared object is null.
if (Object.ReferenceEquals(other, null))
{
return false;
}
//Check whether the compared object references the same data.
if (Object.ReferenceEquals(this, other))
{
return true;
}
if ((this.CheckProperties(this.Email, other.Email) && this.Email == other.Email)
|| (this.CheckProperties(this.PhonePrivate, other.PhonePrivate) && this.PhonePrivate == other.PhonePrivate)
|| (this.CheckProperties(this.PhoneMobile, other.PhoneMobile) && this.PhoneMobile == other.PhoneMobile)
|| (this.CheckProperties(this.PhoneCompany, other.PhoneCompany) && this.PhoneCompany == other.PhoneCompany))
{
weight += 100;
}
if ((this.CheckProperties(this.Name, other.Name) && this.Name == other.Name)
|| (this.CheckProperties(this.FirstName, other.FirstName) && this.FirstName == other.FirstName))
{
weight += 25;
}
if ((this.CheckProperties(this.City, other.City) && this.City == other.City)
|| (this.CheckProperties(this.ZipCode, other.ZipCode) && this.ZipCode == other.ZipCode))
{
weight += 12;
}
if (this.CheckProperties(this.CompanyName, other.CompanyName) && this.CompanyName == other.CompanyName)
{
weight += 5;
}
return weight > 50;
}
public override int GetHashCode()
{
unchecked
{
int hash = (int)2166136261;
hash = hash * 16777619 ^ (string.IsNullOrEmpty(Email) ? 0 : Email.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(PhonePrivate) ? 0 : PhonePrivate.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(PhoneMobile) ? 0 : PhoneMobile.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(PhoneCompany) ? 0 : PhoneCompany.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(FirstName) ? 0 : FirstName.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(Name) ? 0 : Name.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(City) ? 0 : City.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(ZipCode) ? 0 : ZipCode.GetHashCode());
//hash = hash * 16777619 ^ (string.IsNullOrEmpty(CompanyName) ? 0 : CompanyName.GetHashCode());
return hash;
}
}
private bool CheckProperties(string prop, string otherProp)
{
return !string.IsNullOrEmpty(prop) && !string.IsNullOrEmpty(otherProp);
}
}
The problem is that when I override GetHashCode() method I only get those who are fully same or in this particular case - only with same Email.
How can I also conditionally check weight in GetHashCode() method, so that I could use correct the method Equals? Or is there a way to do the similarities check with some other way, which is performance good?
Equals
/ GetHashCode
aren't designed to compare things which are "mostly equal". Equality is just a Boolean property in this case. In particular, having a fuzzy "mostly equal" approach leads to problems with transitivity. The documentaiton of Object.Equals
includes this requirment:
If
(x.Equals(y) && y.Equals(z))
returnstrue
, thenx.Equals(z)
returnstrue
.
That simply doesn't hold when you have fuzzy equality. Just because x
is "quite like" y
and y
is "quite like" z
doesn't mean that x
is "quite like" z
.
Now what you can do is have an equality comparer which only compares phone numbers, another equality comparer which only compares names, etc - but that won't really get you fuzzy matching.
See more on this question at Stackoverflow