问题描述
我正在尝试进行仿真以测试随机之间平均 Levenshtein距离二进制字符串.
I am trying to run a simulation to test the average Levenshtein distance between randombinary strings.
我的程序在python中,但是我正在使用 C扩展.与此相关且花费大量时间的函数是计算两个字符串之间的Levenshtein距离的方法.
My program is in python but I am using this C extension. The function that is relevant and takes most of the time computes the Levenshtein distance between two strings and is this.
lev_edit_distance(size_t len1, const lev_byte *string1,
size_t len2, const lev_byte *string2,
int xcost)
{
size_t i;
size_t *row; /* we only need to keep one row of costs */
size_t *end;
size_t half;
/* strip common prefix */
while (len1 > 0 && len2 > 0 && *string1 == *string2) {
len1--;
len2--;
string1++;
string2++;
}
/* strip common suffix */
while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
len1--;
len2--;
}
/* catch trivial cases */
if (len1 == 0)
return len2;
if (len2 == 0)
return len1;
/* make the inner cycle (i.e. string2) the longer one */
if (len1 > len2) {
size_t nx = len1;
const lev_byte *sx = string1;
len1 = len2;
len2 = nx;
string1 = string2;
string2 = sx;
}
/* check len1 == 1 separately */
if (len1 == 1) {
if (xcost)
return len2 + 1 - 2*(memchr(string2, *string1, len2) != NULL);
else
return len2 - (memchr(string2, *string1, len2) != NULL);
}
len1++;
len2++;
half = len1 >> 1;
/* initalize first row */
row = (size_t*)malloc(len2*sizeof(size_t));
if (!row)
return (size_t)(-1);
end = row + len2 - 1;
for (i = 0; i < len2 - (xcost ? 0 : half); i++)
row[i] = i;
/* go through the matrix and compute the costs. yes, this is an extremely
* obfuscated version, but also extremely memory-conservative and relatively
* fast. */
if (xcost) {
for (i = 1; i < len1; i++) {
size_t *p = row + 1;
const lev_byte char1 = string1[i - 1];
const lev_byte *char2p = string2;
size_t D = i;
size_t x = i;
while (p <= end) {
if (char1 == *(char2p++))
x = --D;
else
x++;
D = *p;
D++;
if (x > D)
x = D;
*(p++) = x;
}
}
}
else {
/* in this case we don't have to scan two corner triangles (of size len1/2)
* in the matrix because no best path can go throught them. note this
* breaks when len1 == len2 == 2 so the memchr() special case above is
* necessary */
row[0] = len1 - half - 1;
for (i = 1; i < len1; i++) {
size_t *p;
const lev_byte char1 = string1[i - 1];
const lev_byte *char2p;
size_t D, x;
/* skip the upper triangle */
if (i >= len1 - half) {
size_t offset = i - (len1 - half);
size_t c3;
char2p = string2 + offset;
p = row + offset;
c3 = *(p++) + (char1 != *(char2p++));
x = *p;
x++;
D = x;
if (x > c3)
x = c3;
*(p++) = x;
}
else {
p = row + 1;
char2p = string2;
D = x = i;
}
/* skip the lower triangle */
if (i <= half + 1)
end = row + len2 + i - half - 2;
/* main */
while (p <= end) {
size_t c3 = --D + (char1 != *(char2p++));
x++;
if (x > c3)
x = c3;
D = *p;
D++;
if (x > D)
x = D;
*(p++) = x;
}
/* lower triangle sentinel */
if (i <= half) {
size_t c3 = --D + (char1 != *char2p);
x++;
if (x > c3)
x = c3;
*p = x;
}
}
}
i = *end;
free(row);
return i;
}
可以加快速度吗?
我将在AMD FX(tm)-8350八核处理器上的32位ubuntu中运行代码.
I will be running the code in 32 bit ubuntu on an AMD FX(tm)-8350 Eight-Core Processor.
这是调用它的python代码.
Here is the python code that calls it.
from Levenshtein import distance
import random
for i in xrange(16):
sum = 0
for j in xrange(1000):
str1 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
str2 = bin(random.getrandbits(2**i))[2:].zfill(2**i)
sum += distance(str1,str2)
print i,sum/(1000*2**i)
推荐答案
也许可以并行运行.在开始时生成一个庞大的随机变量列表,然后在循环中,一次生成线程(8个线程)以每个线程处理列表中的一个块并将其最终结果添加到sum变量中.或一次生成8个列表,然后一次生成8个.
You could run this parallel maybe. Generate one giant list of randoms at the start, then in your loop, spawn threads (8 threads) at a time to each process one chunk of the list and add its final result to the sum variable. Or generate a list of 8 at once and do 8 at a time.
openmp建议的问题是由于大量数据依赖关系,该算法并行性很差"-Wikipedia
The problem with the openmp suggestion is "This algorithm parallelizes poorly, due to a large number of data dependencies" - Wikipedia
from threading import Thread
sum = 0
def calc_distance(offset) :
sum += distance(randoms[offset][0], randoms[offset][1]) #use whatever addressing scheme is best
threads = []
for i in xrange(8) :
t = new Thread(target=calc_distance, args=(i))
t.start()
threads.append(t)
稍后....
for t in threads :
t.join()
我认为,如果有levenshtein距离内核可用(或可编码),此方法也将在以后很好地移植到opencl.
i think this method would port nicely to opencl later as well if levenshtein distance kernel was available (or codable).
这只是记忆中的一则快速帖子,因此可能有一些问题需要解决.
This is just a quick post from memory so there are probably some kinks to work out.
这篇关于如何加快Levenshtein距离的计算的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!