我有一个数据库表,其中有一列SQLServer Soundex编码为姓氏和名字。
在我的C#程序中,我想使用soundex转换字符串以在查询中使用。

在dotnet库中是否有用于soundex的标准字符串函数,或者是实现该函数的开源库(也许是对string的扩展方法)?

最佳答案

我知道这很晚,但是我也需要类似的东西(尽管不涉及数据库),唯一的答案是不准确的(“Tymczak”和“Pfister”失败)。

这是我想出的:

class Program
{
    public static void Main(string[] args)
    {
                Assert.AreEqual(Soundex.Generate("H"), "H000");
                Assert.AreEqual(Soundex.Generate("Robert"), "R163");
                Assert.AreEqual(Soundex.Generate("Rupert"), "R163");
                Assert.AreEqual(Soundex.Generate("Rubin"), "R150");
                Assert.AreEqual(Soundex.Generate("Ashcraft"), "A261");
                Assert.AreEqual(Soundex.Generate("Ashcroft"), "A261");
                Assert.AreEqual(Soundex.Generate("Tymczak"), "T522");
                Assert.AreEqual(Soundex.Generate("Pfister"), "P236");
                Assert.AreEqual(Soundex.Generate("Gutierrez"), "G362");
                Assert.AreEqual(Soundex.Generate("Jackson"), "J250");
                Assert.AreEqual(Soundex.Generate("VanDeusen"), "V532");
                Assert.AreEqual(Soundex.Generate("Deusen"), "D250");
                Assert.AreEqual(Soundex.Generate("Sword"), "S630");
                Assert.AreEqual(Soundex.Generate("Sord"), "S630");
                Assert.AreEqual(Soundex.Generate("Log-out"), "L230");
                Assert.AreEqual(Soundex.Generate("Logout"), "L230");
                Assert.AreEqual(Soundex.Generate("123"), Soundex.Empty);
                Assert.AreEqual(Soundex.Generate(""), Soundex.Empty);
                Assert.AreEqual(Soundex.Generate(null), Soundex.Empty);
    }
}

public static class Soundex
{
    public const string Empty = "0000";

    private static readonly Regex Sanitiser = new Regex(@"[^A-Z]", RegexOptions.Compiled);
    private static readonly Regex CollapseRepeatedNumbers = new Regex(@"(\d)?\1*[WH]*\1*", RegexOptions.Compiled);
    private static readonly Regex RemoveVowelSounds = new Regex(@"[AEIOUY]", RegexOptions.Compiled);

    public static string Generate(string Phrase)
    {
        // Remove non-alphas
        Phrase = Sanitiser.Replace((Phrase ?? string.Empty).ToUpper(), string.Empty);

        // Nothing to soundex, return empty
        if (string.IsNullOrEmpty(Phrase))
            return Empty;

        // Convert consonants to numerical representation
        var Numified = Numify(Phrase);

        // Remove repeated numberics (characters of the same sound class), even if separated by H or W
        Numified = CollapseRepeatedNumbers.Replace(Numified, @"$1");

        if (Numified.Length > 0 && Numified[0] == Numify(Phrase[0]))
        {
            // Remove first numeric as first letter in same class as subsequent letters
            Numified = Numified.Substring(1);
        }

        // Remove vowels
        Numified = RemoveVowelSounds.Replace(Numified, string.Empty);

        // Concatenate, pad and trim to ensure X### format.
        return string.Format("{0}{1}", Phrase[0], Numified).PadRight(4, '0').Substring(0, 4);
    }

    private static string Numify(string Phrase)
    {
        return new string(Phrase.ToCharArray().Select(Numify).ToArray());
    }

    private static char Numify(char Character)
    {
        switch (Character)
        {
            case 'B': case 'F': case 'P': case 'V':
                return '1';
            case 'C': case 'G': case 'J': case 'K': case 'Q': case 'S': case 'X': case 'Z':
                return '2';
            case 'D': case 'T':
                return '3';
            case 'L':
                return '4';
            case 'M': case 'N':
                return '5';
            case 'R':
                return '6';
            default:
                return Character;
        }
    }
}

关于.net - DotNet Soundex功能,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/11121936/

10-11 02:38