问题描述
我们有提升的路径映射到字符串对像名称:位置(绝对位置路径一拉 USR / MyFolder中/
)。我们给出了一些位置的拉 USR / MyFolder中/ mysubfolder / MYFILE
。如何找到它的地图位置合适的给定的URL最?
We have map of boost path to string pairs like name:location (absolute location paths a la usr/myfolder/
). We are given with some location a la usr/myfolder/mysubfolder/myfile
. How to find which of maps location fit to given url most?
例子,我们有一个像地图而如果我们需要,我们可以求助于:
Example we have a map like which we can resort if we need:
service1:myfolder/
service2:myfolder/mysubfolder/
service3:myfolder/myothersubfolder/
service4:myfolder/mysubfolder/myfile
我们正在给定值 MyFolder中/ mysubfolder / MYFILE / blablabla /
(路径)。
我们要找出这在我们的地图项涉及最多。
搜索结果应为服务4
与最相关的内容的地图项。
We are given value myfolder/mysubfolder/myfile/blablabla/
(path).We want to find out to which item in our map it relates the most.Search result shall be service4
as map item with most related content.
因此,如何通过给定的字符串值找到哪个地图元素,它涉及的是什么?
So how to find by given string value to which map element it relates the most?
所以original问题是关于通用串案件,但我有一些重构所以没有我只是提升路径工作。
So original question was about general string case but I had some reconfiguration so no I just work on boost paths.
推荐答案
您可以使用 Levenshtein距离一>
修改
因为我终于需要类似的东西我自己,这个问题仍然开放。下面是一些code我打得四处。这两个直线上升字符串的距离,也将Levenshtein算法的路径标记。
Since I finally needed something similar myself, and this question remains open. Here is some code I played around with. Both straight up string distance and also applying the Levenshtein algorithm to the path tokens.
C ++ code
/*
----- String based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
2 : this/is/a/strong/path
2 : that/is/a/strange/path
4 : is/this/a/strange/path
5 : this/is/a/strange
13 : this/is/a
15 : this/is
16 : that/was
18 : this
24 : completely/different/folder
----- Path based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
1 : this/is/a/strange
2 : this/is/a/strong/path
2 : that/is/a/strange/path
2 : this/is/a
2 : is/this/a/strange/path
3 : this/is
4 : this
7 : that/was
8 : completely/different/folder
*/
#include <string>
#include <vector>
#include <algorithm>
#include <stdio.h>
/// returns the smaller of two parameters
template< typename T >
T levmin( T v1, T v2 )
{ return ( v1 < v2 ) ? v1 : v2; }
/// Returns the Levenshtein distance between the specified strings
template < typename T, typename T_STR >
typename T_STR::size_type levstr(const T_STR &s1, const T_STR &s2)
{
typename T_STR::size_type l1 = s1.length(), l2 = s2.length();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + ( s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1 )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Returns the Levenshtein distance between the specified split paths
template < typename T, typename T_STR, typename T_LST >
typename T_STR::size_type levpath(const T_LST &lst1, const T_LST &lst2)
{
typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + levstr< T, T_STR>( lst1[ i - 1 ], lst2[ j - 1 ] )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Generic split path function
/*
Returns a vector of path tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_LST splitpath( const T_STR &s, const T sep )
{ T_LST lst;
typename T_STR::size_type i = 0, l = 0;
while( T_STR::npos != ( i = s.find_first_of( sep, i ) ) )
{ if ( l < i )
lst.push_back( T_STR( s, l, i - l ) );
l = ++i;
} // end while
if ( l < s.length() )
lst.push_back( T_STR( s, l, s.length() - l ) );
return lst;
}
/// Generic join path function
/*
Returns a string of joined vector tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_STR joinpath( const T_LST &p, const T sep )
{ T_STR s;
for ( typename T_LST::const_iterator it = p.begin(); it != p.end(); it++ )
{ if ( s.length() ) s += sep; s += *it; }
return s;
}
// String types
typedef char t_levchar;
typedef std::basic_string< t_levchar > t_levstr;
typedef std::vector< t_levstr > t_levpath;
typedef std::vector< t_levpath > t_levpathlist;
// Sort compare for split paths
template< typename T, typename T_STR, typename T_LST > struct levcmp
{ levcmp( const T_LST &p ) { m_p = p; }
bool operator() ( const T_LST &i, const T_LST &j )
{ return levpath< T, T_STR, T_LST >( i, m_p ) < levpath< T, T_STR, T_LST >( j, m_p ); }
T_LST m_p;
};
// Sort compare for strings
template< typename T, typename T_STR > struct levcmp_str
{ levcmp_str( const T_STR &s ) { m_s = s; }
bool operator() ( const T_STR &i, const T_STR &j )
{ return levstr< T, T_STR >( i, m_s ) < levstr< T, T_STR >( j, m_s ); }
T_STR m_s;
};
int main( int argc, char* argv[] )
{
// Path to compare with
const t_levchar* compare_path = "this/is/a/strange/path";
// Paths to sort
const t_levchar* path_list[] =
{
"this/is/a/strong/path",
"that/is/a/strange/path",
"this/is/a/strange",
"this/is",
"this/is/a",
"this",
"this/is/a/strange/path",
"is/this/a/strange/path",
"that/was",
"completely/different/folder",
0
};
printf( "\n----- String based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from paths
std::vector< t_levstr > paths;
for( int i = 0; path_list[ i ]; i++ )
paths.push_back( path_list[ i ] );
// Sort the paths
std::sort( paths.begin(), paths.end(), levcmp_str< t_levchar, t_levstr >( compare_path ) );
// Show the result
for ( std::vector< t_levstr >::iterator it = paths.begin(); it != paths.end(); it++ )
printf( "%d : %s\n",
(int)levstr< t_levchar, t_levstr >( *it, compare_path ),
(const char*)it->c_str() );
printf( "\n----- Path based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from split paths
t_levpath splitcompare = splitpath< t_levchar, t_levstr, t_levpath >( compare_path, '/' );
t_levpathlist splitpaths;
for ( int i = 0; path_list[ i ]; i++ )
splitpaths.push_back( splitpath< t_levchar, t_levstr, t_levpath >( path_list[ i ], '/' ) );
// Sort the paths
std::sort( splitpaths.begin(), splitpaths.end(),
levcmp< t_levchar, t_levstr, t_levpath >( splitcompare ) );
// Show the results
for ( t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++ )
printf( "%d : %s\n",
(int)levpath< t_levchar, t_levstr, t_levpath >( *it, splitcompare ),
(const char*)joinpath< t_levchar, t_levstr, t_levpath >( *it, '/' ).c_str() );
return 0;
}
这篇关于经与路径的地图如何比较艾德里安给定的路径?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!