MyDiffTest.java:
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
public class MyDiffTest {
private static final String path = "\\xxx\\";
private static final List<String> lines_v1 = readFile2List( path + "txt1.txt" );
private static final List<String> lines_v2 = readFile2List( path + "txt2.txt" );
public static void main(String[] args) {
MinimumTransferWayVO way = calculateMinimumTransferWay(lines_v1, lines_v2);
printOpeartions( way.getOperations() );
}
private static List<String> readFile2List( String filePath ){
BufferedReader reader = null;
try {
List<String> lines = new ArrayList<>();
reader = new BufferedReader(new FileReader(filePath));
String line = reader.readLine();
while (line != null) {
lines.add( line );
line = reader.readLine();
}
return lines;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
private static List<Character> string2CharacterList(String str) {
List<Character> characters = new ArrayList<>();
int length = str.length();
for (int i = 0; i < length; i++) {
char c = str.charAt(i);
characters.add( c );
}
return characters;
}
// 把连续的编辑操作,-+-+-+ 换成 ---++++输出
private static void printOpeartions(List<OperationVO> operations) {
List<EditOperationVO> editOperations = new ArrayList<>();
for( OperationVO operation:operations ){
if( operation instanceof EditOperationVO ){
EditOperationVO editOperation = (EditOperationVO) operation;
editOperations.add( editOperation );
}else {
// 批量输出编辑操作并清空
if( editOperations.size() > 0 ){
for( EditOperationVO editOperation:editOperations ){
System.out.println( "- " + lines_v1.get( editOperation.getIndex1() ) );
}
for( EditOperationVO editOperation:editOperations ){
System.out.println( "+ " + lines_v2.get( editOperation.getIndex2() ) );
}
editOperations.clear();
}
if( operation instanceof ReadOperationVO){
ReadOperationVO readOperation = (ReadOperationVO) operation;
System.out.println( " " + lines_v1.get( readOperation.getIndex1() ) );
}else if( operation instanceof InsertOperationVO){
InsertOperationVO insertOperation = (InsertOperationVO) operation;
System.out.println( "+ " + lines_v2.get( insertOperation.getIndex2() ) );
}else if( operation instanceof DeleteOperationVO){
DeleteOperationVO deleteOperation = (DeleteOperationVO) operation;
System.out.println( "- " + lines_v1.get( deleteOperation.getIndex1() ) );
}
}
}
// 批量输出编辑操作并清空
if( editOperations.size() > 0 ){
for( EditOperationVO editOperation:editOperations ){
System.out.println( "- " + lines_v1.get( editOperation.getIndex1() ) );
}
for( EditOperationVO editOperation:editOperations ){
System.out.println( "+ " + lines_v2.get( editOperation.getIndex2() ) );
}
editOperations.clear();
}
}
private static MinimumTransferWayVO calculateMinimumTransferWay(List<String> lines_v1, List<String> lines_v2 ){
// dp[i][j] 表示的是将 characters1 的前i个元素变换为 characters2 中的前j个元素需要使用的最优( 即需要转换步骤最少 )的转换方式
int size_v1 = lines_v1.size();
int size_v2 = lines_v2.size();
MinimumTransferWayVO[][] dp = new MinimumTransferWayVO[ size_v1 ][ size_v2 ];
for (int index1 = 0; index1 < size_v1; index1++) {
String line_v1 = lines_v1.get( index1 );
String line_v1_trim = line_v1.trim();
for (int index2 = 0; index2 < size_v2; index2++) {
String line_v2 = lines_v2.get( index2 );
String line_v2_trim = line_v2.trim();
MinimumTransferWayVO way = new MinimumTransferWayVO();
if( index1 == 0 ){
if( index2 == 0 ){
// v1 和 v2 都只有 1 行文本,此时最简单,只需要检测这 1 行文本是否相同?
/*
v1 v2
1111111111111111111111 ==》 1111111111111111111111
*/
// todo 不需要操作,最初可以不初始化,直接为null,后面使用的地方再判断是否为null.为空的话,当做长度为0处理
List<OperationVO> operations = new ArrayList<>();
if( line_v1_trim.equals( line_v2_trim ) ){
// 相同,不需要任何变换,放置一个 ReadOperationVO 方便输出转换过程
ReadOperationVO readOperation = new ReadOperationVO();
readOperation.setIndex1( index1 );
readOperation.setIndex2( index2 );
operations.add( readOperation );
way.setOperationCount( 0 );
}else {
// 不相同,需要 1 步修改操作
EditOperationVO editOperation = new EditOperationVO();
editOperation.setIndex1( index1 );
editOperation.setIndex2( index2 );
operations.add( editOperation );
way.setOperationCount( 1 );
}
way.setOperations( operations );
}else {
// v1只有1行文本,v2有多行文本,此时的变换也比较简单,
// 检测 line_v1 在 lines_v2 中是否存在
List<OperationVO> operations = new ArrayList<>();
if( getFirstEqualIndex( lines_v2,line_v1_trim,0,index2 ) != -1 ){
// line_v1 在 lines_v2 中存在
/*
v1 v2
1111111111111111111111 ==》 2222222222222222222222
1111111111111111111111
444444444444444444444444
555555555555555555
如下一种情形,v1转换为v2需要在 "1111111111111111111111" 上面插入一行 "2222222222222222222222",
然后在 "1111111111111111111111" 下面插入 "444444444444444444444444"、"555555555555555555" 行
*/
// firstEqualIndex 介于 0 和 lineNum_v2 之间
int firstEqualIndex = getFirstEqualIndex(lines_v2, line_v1_trim, 0, index2);
int operationCount = 0;
for (int lineNum = 0; lineNum <=index2 ; lineNum++) {
if( lineNum == firstEqualIndex ){
ReadOperationVO readOperation = new ReadOperationVO();
readOperation.setIndex1( index1 );
readOperation.setIndex2( lineNum );
operations.add( readOperation );
}else {
// 插入行
InsertOperationVO insertOperation = new InsertOperationVO();
insertOperation.setIndex2( lineNum );
operations.add( insertOperation );
operationCount++;
}
}
way.setOperationCount( operationCount );
}else {
// line_v1 在 lines_v2 中不存在
/*
v1 v2
1111111111111111111111 ==》 2222222222222222222222
3333333333333333333
444444444444444444444444
555555555555555555
此时,v1 转换成 v2,需要先删除 "1111111111111111111111" 行,
再插入 "2222222222222222222222"、
"3333333333333333333"、
"444444444444444444444444"、
"555555555555555555" 行
*/
DeleteOperationVO deleteOperation = new DeleteOperationVO();
deleteOperation.setIndex1( index1 );
operations.add( deleteOperation );
for (int lineNum = 0; lineNum <= index2; lineNum++) {
InsertOperationVO insertOperation = new InsertOperationVO();
insertOperation.setIndex2( lineNum );
operations.add( insertOperation );
}
way.setOperationCount( index2 + 1 );
}
way.setOperations( operations );
}
}else {
if( index2 == 0 ){
// v1有多行文本,v1只有1行文本,
List<OperationVO> operations = new ArrayList<>();
// 检测 line_v2 是否在 lines_v1 中存在
if( getFirstEqualIndex(lines_v1, line_v2_trim, 0, index1) != -1 ){
// line_v2 在 lines_v1 中存在
/*
v1 v2
11111111111111 333333333333
333333333333
444444444444444
5555555555
*/
// 此时,v1转换为 v2需要删除 "333333333333" 上面的 "11111111111111"行,删除 "333333333333" 下面的 "444444444444444"、"5555555555" 行
int firstEqualIndex = getFirstEqualIndex(lines_v1, line_v2_trim, 0, index1);
int operationCount = 0;
for (int index = 0; index <=index1 ; index++) {
if( index == firstEqualIndex){
ReadOperationVO readOperation= new ReadOperationVO();
readOperation.setIndex1( index );
readOperation.setIndex2( index2 );
operations.add( readOperation );
}else {
// 删除行
DeleteOperationVO deleteOperation = new DeleteOperationVO();
deleteOperation.setIndex1( index );
operations.add( deleteOperation );
operationCount++;
}
}
way.setOperationCount( operationCount );
way.setOperations( operations );
}else {
// line_v2 在 lines_v1 中不存在
/*
v1 v2
11111111111111 22222222222222
333333333333
444444444444444
5555555555
*/
// 此时,v1 转换为 v2 需要删除 "11111111111111"、 "333333333333"、 "444444444444444"、 "5555555555",再新增 "22222222222222"
for (int index = 0; index <=index1 ; index++) {
// 删除行
DeleteOperationVO deleteOperation = new DeleteOperationVO();
deleteOperation.setIndex1( index );
operations.add( deleteOperation );
}
// 插入行
InsertOperationVO insertOperation = new InsertOperationVO();
insertOperation.setIndex2( index2 );
operations.add( insertOperation );
way.setOperationCount( index1 + 2 );
way.setOperations( operations );
}
}else {
List<OperationVO> operations = null;
// v1 和 v2 都有多行文本,最复杂。
// 检测 v1 的 v2 的当前行是否相同
if( line_v1_trim.equals( line_v2_trim ) ){
// line_v1 和 line_v2 相同
/*
v1 v2
11111111 1111111
22222222 33333
... ...
44444444 555555
66666666 66666666
*/
// 如上所示,v1的当前行 "66666666" 和 v2 的当前行 "66666666" 相同,只需要看 v1 点位前面部分 转换wield v2的前面部分的最小转换方式就行了,
MinimumTransferWayVO way_prev = dp[index1 - 1][index2 - 1];
operations = way_prev.getOperations();
// 追加一个读操作,方便输出转换过程
ReadOperationVO readOperation = new ReadOperationVO();
readOperation.setIndex1( index1 );
readOperation.setIndex2( index2 );
operations.add( readOperation );
// minimumTransferWay_prev 不需要了
dp[index1 - 1][index2 - 1] = null;
way.setOperationCount( way_prev.getOperationCount() );
way.setOperations( operations );
}else {
// line_v1 和 line_v2 不相同
/*
v1 v2
11111111 1111111
22222222 33333
... ...
44444444 555555
66666666 7777777
*/
// 如上所示,v1 的当前行 "66666666" 和 v2 的当前行 "7777777" 不相同,则有3个候选转换方式,需要从中选择最小的转换方式
// 1. v1 的前面部分 转换为 v2 的前面部分 + v2 的当前行部分,删除 v1 的当前行 "66666666"
MinimumTransferWayVO way_prev1 = dp[index1 - 1][index2];
// 2. v1 的前面部分 + v1 的当前行部分 转换为 v2 的前面部分,v2 新增当前行 "7777777"
MinimumTransferWayVO way_prev2 = dp[index1 ][index2 - 1];
// 3. v1 的前面部分转换为 v2的前面部分,v1的 当前行 "66666666" 修改为 v2 的当前行 "7777777"
MinimumTransferWayVO way_prev3 = dp[index1 - 1][index2 - 1];
boolean isOne = true;
boolean isTwo = false;
boolean isThree = false;
MinimumTransferWayVO way_prev = way_prev1;
int opeartionCount = way_prev1.getOperationCount();
if( way_prev2.getOperationCount() < opeartionCount ){
way_prev = way_prev2;
opeartionCount = way_prev2.getOperationCount();
isOne = false;
isTwo = true;
isThree = false;
}
if( way_prev3.getOperationCount() < opeartionCount ){
way_prev = way_prev3;
opeartionCount = way_prev3.getOperationCount();
isOne = false;
isTwo = false;
isThree = true;
}
if( isOne ){
operations = new ArrayList<>();
operations.addAll( way_prev.getOperations() );
// 删除 v1 的当前行 "66666666"
DeleteOperationVO deleteOperation = new DeleteOperationVO();
deleteOperation.setIndex1( index1 );
operations.add( deleteOperation );
}else if( isTwo ){
operations = new ArrayList<>();
operations.addAll( way_prev.getOperations() );
// v2 新增当前行 "7777777"
InsertOperationVO insertOperation = new InsertOperationVO();
insertOperation.setIndex2( index2 );
operations.add( insertOperation );
}else if( isThree ){
operations = way_prev.getOperations();
// v1的 当前行 "66666666" 修改为 v2 的当前行 "7777777"
EditOperationVO editOperation = new EditOperationVO();
editOperation.setIndex1( index1 );
editOperation.setIndex2( index2 );
operations.add( editOperation );
}
opeartionCount++;
// 此时 minimumTransferWay_prev3 不需要了
dp[index1 - 1][index2 - 1] = null;
way.setOperationCount( opeartionCount );
way.setOperations( operations );
}
}
}
dp[ index1 ][ index2 ] = way;
}
}
return dp[ size_v1 -1 ][ size_v2 -1 ];
}
private static int getFirstEqualIndex(List<String> lines, String targetLine, int beginIndex, int endIndex) {
for (int i = beginIndex; i <=endIndex ; i++) {
if( targetLine.equals( lines.get( i ).trim() ) ){
return i;
}
}
return -1;
}
}
DemoClass1.java:
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.text.similarity.LevenshteinDistance;
@Slf4j
public class DemoClass1 {
private static final LevenshteinDistance LEVENSHTEIN_DISTANCE = LevenshteinDistance.getDefaultInstance();
public static String null2emptyWithTrim( String str ){
if( str == null ){
str = "";
}
str = str.trim();
return str;
}
public static String requiredStringParamCheck(String param, String paramRemark) {
param = null2emptyWithTrim( param );
if( param.length() == 0 ){
String msg = "操作失败,请求参数 \"" + paramRemark + "\" 为空";
log.error( msg );
throw new BusinessLogicException( msg );
}
return param;
}
public static double calculateSimilarity( String str1,String str2 ){
int distance = LEVENSHTEIN_DISTANCE.apply(str1, str2);
double similarity = 1 - (double) distance / Math.max(str1.length(), str2.length());
System.out.println("相似度:" + similarity);
return similarity;
}
}
DemoClass2.java:
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.text.similarity.LevenshteinDistance;
@Slf4j
public class DemoClass2 {
private static final LevenshteinDistance LEVENSHTEIN_DISTANCE = LevenshteinDistance.getDefaultInstance();
private static final LevenshteinDistance LEVENSHTEIN_DISTANCE1 = LevenshteinDistance.getDefaultInstance();
private static final LevenshteinDistance LEVENSHTEIN_DISTANCE2 = LevenshteinDistance.getDefaultInstance();
public static String null2emptyWithTrim( String str ){
// if( str == null ){
// str = "";
// }
// str = str.trim();
return str;
}
public static String requiredStringParamCheck(String param, String paramRemark) {
return null;
}
public static double calculateSimilarity( String str1,String str2 ){
try {
int distance = LEVENSHTEIN_DISTANCE.apply(str1, str2);
double similarity = 1 - (double) distance / Math.max(str1.length(), str2.length());
return similarity;
}catch ( Exception e ){
e.printStackTrace();
return 0d;
}
}
}
DeleteOperationVO.java:
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class DeleteOperationVO extends OperationVO {
private int index1;
}
EditOperationVO.java:
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class EditOperationVO extends OperationVO {
private int index1;
private int index2;
}
InsertOperationVO.java:
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class InsertOperationVO extends OperationVO {
private int index2;
}
MinimumTransferWayVO.java:
import lombok.Getter;
import lombok.Setter;
import java.io.Serializable;
import java.util.List;
@Getter
@Setter
public class MinimumTransferWayVO implements Serializable {
private List<OperationVO> operations;
}
OperationVO.java:
import lombok.Getter;
import lombok.Setter;
import java.io.Serializable;
@Getter
@Setter
public class OperationVO implements Serializable {
}
ReadOperationVO.java:
import lombok.Getter;
import lombok.Setter;
/**
* ReadOperationVO 不贡献操作步数,只是为了方便输出v1到v2的整个变换过程
**/
@Getter
@Setter
public class ReadOperationVO extends OperationVO {
// 此时 lines_v1[ lineNum_v1 ] 和 lines_v2[ lineNum_v2 ] 相同
private int index1;
private int index2;
}
测试输出:
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.text.similarity.LevenshteinDistance;
@Slf4j
- public class DemoClass1 {
+ public class DemoClass2 {
private static final LevenshteinDistance LEVENSHTEIN_DISTANCE = LevenshteinDistance.getDefaultInstance();
+ private static final LevenshteinDistance LEVENSHTEIN_DISTANCE1 = LevenshteinDistance.getDefaultInstance();
+ private static final LevenshteinDistance LEVENSHTEIN_DISTANCE2 = LevenshteinDistance.getDefaultInstance();
public static String null2emptyWithTrim( String str ){
- if( str == null ){
- str = "";
- }
- str = str.trim();
+ // if( str == null ){
+ // str = "";
+ // }
+ // str = str.trim();
return str;
}
public static String requiredStringParamCheck(String param, String paramRemark) {
- param = null2emptyWithTrim( param );
+ return null;
- if( param.length() == 0 ){
- String msg = "操作失败,请求参数 \"" + paramRemark + "\" 为空";
- log.error( msg );
- throw new BusinessLogicException( msg );
- }
- return param;
}
public static double calculateSimilarity( String str1,String str2 ){
+ try {
int distance = LEVENSHTEIN_DISTANCE.apply(str1, str2);
double similarity = 1 - (double) distance / Math.max(str1.length(), str2.length());
- System.out.println("相似度:" + similarity);
return similarity;
+ }catch ( Exception e ){
+ e.printStackTrace();
+ return 0d;
+ }
}
}