最短超弦问题

给定一组n个字符串arr[],找到包含给定集合中每个字符串的最小字符串作为子字符串。我们可以假设arr[]中没有字符串是另一个字符串的子字符串。 例如:

null
Input:  arr[] = {"geeks", "quiz", "for"}Output: geeksquizforInput:  arr[] = {"catg", "ctaagt", "gcta", "ttca", "atgcatc"}Output: gctaagttcatgcatc

最短超弦贪婪近似算法 最短超弦问题是一个非常复杂的问题 NP难 问题总是找到最短超弦的解决方案需要指数时间。下面是一个近似的贪婪算法。

Let arr[] be given set of strings.1) Create an auxiliary array of strings, temp[].  Copy contents   of arr[] to temp[]2) While temp[] contains more than one strings     a) Find the most overlapping string pair in temp[]. Let this        pair be 'a' and 'b'.      b) Replace 'a' and 'b' with the string obtained after combining        them.3) The only string left in temp[] is the result, return it.

若一个字符串的前缀和另一个字符串的后缀相同或相反,则两个字符串是重叠的。匹配前缀和后缀的最大重叠平均长度最大。 上述算法的工作原理:

arr[] = {"catgc", "ctaagt", "gcta", "ttca", "atgcatc"}Initialize:temp[] = {"catgc", "ctaagt", "gcta", "ttca", "atgcatc"}The most overlapping strings are "catgc" and "atgcatc"(Suffix of length 4 of "catgc" is same as prefix of "atgcatc")Replace two strings with "catgcatc", we gettemp[] = {"catgcatc", "ctaagt", "gcta", "ttca"}The most overlapping strings are "ctaagt" and "gcta"(Prefix of length 3 of "ctaagt" is same as suffix of "gcta")Replace two strings with "gctaagt", we gettemp[] = {"catgcatc", "gctaagt", "ttca"}The most overlapping strings are "catgcatc" and "ttca"(Prefix of length 2 of "catgcatc" as suffix of "ttca")Replace two strings with "ttcatgcatc", we gettemp[] = {"ttcatgcatc", "gctaagt"}Now there are only two strings in temp[], after combingthe two in optimal way, we get tem[] = {"gctaagttcatgcatc"}Since temp[] has only one string now, return it.

下面是上述算法的实现。

C++

// C++ program to find shortest
// superstring using Greedy
// Approximate Algorithm
#include <bits/stdc++.h>
using namespace std;
// Utility function to calculate
// minimum of two numbers
int min( int a, int b)
{
return (a < b) ? a : b;
}
// Function to calculate maximum
// overlap in two given strings
int findOverlappingPair(string str1,
string str2, string &str)
{
// Max will store maximum
// overlap i.e maximum
// length of the matching
// prefix and suffix
int max = INT_MIN;
int len1 = str1.length();
int len2 = str2.length();
// Check suffix of str1 matches
// with prefix of str2
for ( int i = 1; i <=
min(len1, len2); i++)
{
// Compare last i characters
// in str1 with first i
// characters in str2
if (str1.compare(len1-i, i, str2,
0, i) == 0)
{
if (max < i)
{
// Update max and str
max = i;
str = str1 + str2.substr(i);
}
}
}
// Check prefix of str1 matches
// with suffix of str2
for ( int i = 1; i <=
min(len1, len2); i++)
{
// compare first i characters
// in str1 with last i
// characters in str2
if (str1.compare(0, i, str2,
len2-i, i) == 0)
{
if (max < i)
{
// Update max and str
max = i;
str = str2 + str1.substr(i);
}
}
}
return max;
}
// Function to calculate
// smallest string that contains
// each string in the given
// set as substring.
string findShortestSuperstring(string arr[],
int len)
{
// Run len-1 times to
// consider every pair
while (len != 1)
{
// To store  maximum overlap
int max = INT_MIN;
// To store array index of strings
int l, r;
// Involved in maximum overlap
string resStr;
// Maximum overlap
for ( int i = 0; i < len; i++)
{
for ( int j = i + 1; j < len; j++)
{
string str;
// res will store maximum
// length of the matching
// prefix and suffix str is
// passed by reference and
// will store the resultant
// string after maximum
// overlap of arr[i] and arr[j],
// if any.
int res = findOverlappingPair(arr[i],
arr[j], str);
// check for maximum overlap
if (max < res)
{
max = res;
resStr.assign(str);
l = i, r = j;
}
}
}
// Ignore last element in next cycle
len--;
// If no overlap, append arr[len] to arr[0]
if (max == INT_MIN)
arr[0] += arr[len];
else
{
// Copy resultant string to index l
arr[l] = resStr;
// Copy string at last index to index r
arr[r] = arr[len];
}
}
return arr[0];
}
// Driver program
int main()
{
string arr[] = { "catgc" , "ctaagt" ,
"gcta" , "ttca" , "atgcatc" };
int len = sizeof (arr)/ sizeof (arr[0]);
// Function Call
cout << "The Shortest Superstring is "
<< findShortestSuperstring(arr, len);
return 0;
}
// This code is contributed by Aditya Goel


JAVA

// Java program to find shortest
// superstring using Greedy
// Approximate Algorithm
import java.io.*;
import java.util.*;
class GFG
{
static String str;
// Utility function to calculate
// minimum of two numbers
static int min( int a, int b)
{
return (a < b) ? a : b;
}
// Function to calculate maximum
// overlap in two given strings
static int findOverlappingPair(String str1,
String str2)
{
// max will store maximum
// overlap i.e maximum
// length of the matching
// prefix and suffix
int max = Integer.MIN_VALUE;
int len1 = str1.length();
int len2 = str2.length();
// check suffix of str1 matches
// with prefix of str2
for ( int i = 1 ; i <=
min(len1, len2); i++)
{
// compare last i characters
// in str1 with first i
// characters in str2
if (str1.substring(len1 - i).compareTo(
str2.substring( 0 , i)) == 0 )
{
if (max < i)
{
// Update max and str
max = i;
str = str1 + str2.substring(i);
}
}
}
// check prefix of str1 matches
// with suffix of str2
for ( int i = 1 ; i <=
min(len1, len2); i++)
{
// compare first i characters
// in str1 with last i
// characters in str2
if (str1.substring( 0 , i).compareTo(
str2.substring(len2 - i)) == 0 )
{
if (max < i)
{
// pdate max and str
max = i;
str = str2 + str1.substring(i);
}
}
}
return max;
}
// Function to calculate smallest
// string that contains
// each string in the given set as substring.
static String findShortestSuperstring(
String arr[], int len)
{
// run len-1 times to consider every pair
while (len != 1 )
{
// To store maximum overlap
int max = Integer.MIN_VALUE;
// To store array index of strings
// involved in maximum overlap
int l = 0 , r = 0 ;
// to store resultant string after
// maximum overlap
String resStr = "" ;
for ( int i = 0 ; i < len; i++)
{
for ( int j = i + 1 ; j < len; j++)
{
// res will store maximum
// length of the matching
// prefix and suffix str is
// passed by reference and
// will store the resultant
// string after maximum
// overlap of arr[i] and arr[j],
// if any.
int res = findOverlappingPair
(arr[i], arr[j]);
// Check for maximum overlap
if (max < res)
{
max = res;
resStr = str;
l = i;
r = j;
}
}
}
// Ignore last element in next cycle
len--;
// If no overlap,
// append arr[len] to arr[0]
if (max == Integer.MIN_VALUE)
arr[ 0 ] += arr[len];
else
{
// Copy resultant string
// to index l
arr[l] = resStr;
// Copy string at last index
// to index r
arr[r] = arr[len];
}
}
return arr[ 0 ];
}
// Driver Code
public static void main(String[] args)
{
String[] arr = { "catgc" , "ctaagt" ,
"gcta" , "ttca" , "atgcatc" };
int len = arr.length;
System.out.println( "The Shortest Superstring is " +
findShortestSuperstring(arr, len));
}
}
// This code is contributed by
// sanjeev2552


C#

// C# program to find shortest
// superstring using Greedy
// Approximate Algorithm
using System;
class GFG
{
static String str;
// Utility function to calculate
// minimum of two numbers
static int min( int a, int b)
{
return (a < b) ? a : b;
}
// Function to calculate maximum
// overlap in two given strings
static int findOverlappingPair(String str1,
String str2)
{
// max will store maximum
// overlap i.e maximum
// length of the matching
// prefix and suffix
int max = Int32.MinValue;
int len1 = str1.Length;
int len2 = str2.Length;
// check suffix of str1 matches
// with prefix of str2
for ( int i = 1; i <=
min(len1, len2); i++)
{
// compare last i characters
// in str1 with first i
// characters in str2
if (str1.Substring(len1 - i).CompareTo(
str2.Substring(0, i)) == 0)
{
if (max < i)
{
// Update max and str
max = i;
str = str1 + str2.Substring(i);
}
}
}
// check prefix of str1 matches
// with suffix of str2
for ( int i = 1; i <=
min(len1, len2); i++)
{
// compare first i characters
// in str1 with last i
// characters in str2
if (str1.Substring(0, i).CompareTo(
str2.Substring(len2 - i)) == 0)
{
if (max < i)
{
// pdate max and str
max = i;
str = str2 + str1.Substring(i);
}
}
}
return max;
}
// Function to calculate smallest
// string that contains
// each string in the given set as substring.
static String findShortestSuperstring(String []arr, int len)
{
// run len-1 times to consider every pair
while (len != 1)
{
// To store maximum overlap
int max = Int32.MinValue;
// To store array index of strings
// involved in maximum overlap
int l = 0, r = 0;
// to store resultant string after
// maximum overlap
String resStr = "" ;
for ( int i = 0; i < len; i++)
{
for ( int j = i + 1; j < len; j++)
{
// res will store maximum
// length of the matching
// prefix and suffix str is
// passed by reference and
// will store the resultant
// string after maximum
// overlap of arr[i] and arr[j],
// if any.
int res = findOverlappingPair
(arr[i], arr[j]);
// Check for maximum overlap
if (max < res)
{
max = res;
resStr = str;
l = i;
r = j;
}
}
}
// Ignore last element in next cycle
len--;
// If no overlap,
// append arr[len] to arr[0]
if (max == Int32.MinValue)
arr[0] += arr[len];
else
{
// Copy resultant string
// to index l
arr[l] = resStr;
// Copy string at last index
// to index r
arr[r] = arr[len];
}
}
return arr[0];
}
// Driver Code
public static void Main(String[] args)
{
String[] arr = { "catgc" , "ctaagt" ,
"gcta" , "ttca" , "atgcatc" };
int len = arr.Length;
Console.Write( "The Shortest Superstring is " +
findShortestSuperstring(arr, len));
}
}
// This code is contributed by shivanisinghss2110


输出

The Shortest Superstring is gctaagttcatgcatc

上述算法的性能: 证明了上述贪心算法是4个近似值(即该算法生成的超弦长度永远不会超过最短超弦的4倍)。该算法被推测为2近似值(没有人发现它产生的最坏结果超过两倍的情况)。推测的最坏情况示例是{ab K B K c、 b k+1 }例如{“abb”、“bbc”、“bbb”},上述算法可能会生成“abbcbb”(如果选择“abb”和“bbc”作为第一对),但实际最短的超弦是“abbbc”。这里的比例是7/5,但对于大钾,比例接近2。

另一种方法:

我所说的“贪婪方法”是指:每次我们以最大重叠长度合并两个字符串时,将它们从字符串数组中移除,并将合并的字符串放入字符串数组中。

然后问题就变成了:在这个图中找到每个节点只访问一次的最短路径。这是一个旅行推销员的问题。

应用旅行推销员问题DP解决方案。记住记录路径。

以下是上述方法的实施情况:

JAVA

// Java program for above approach
import java.io.*;
import java.util.*;
class Solution
{
// Function to calculate shortest
// super string
public static String shortestSuperstring(
String[] A)
{
int n = A.length;
int [][] graph = new int [n][n];
// Build the graph
for ( int i = 0 ; i < n; i++)
{
for ( int j = 0 ; j < n; j++)
{
graph[i][j] = calc(A[i], A[j]);
graph[j][i] = calc(A[j], A[i]);
}
}
// Creating dp array
int [][] dp = new int [ 1 << n][n];
// Creating path array
int [][] path = new int [ 1 << n][n];
int last = - 1 , min = Integer.MAX_VALUE;
// start TSP DP
for ( int i = 1 ; i < ( 1 << n); i++)
{
Arrays.fill(dp[i], Integer.MAX_VALUE);
// Iterate j from 0 to n - 1
for ( int j = 0 ; j < n; j++)
{
if ((i & ( 1 << j)) > 0 )
{
int prev = i - ( 1 << j);
// Check if prev is zero
if (prev == 0 )
{
dp[i][j] = A[j].length();
}
else
{
// Iterate k from 0 to n - 1
for ( int k = 0 ; k < n; k++)
{
if (dp[prev][k] < Integer.MAX_VALUE &&
dp[prev][k] + graph[k][j] < dp[i][j])
{
dp[i][j] = dp[prev][k] + graph[k][j];
path[i][j] = k;
}
}
}
}
if (i == ( 1 << n) - 1 && dp[i][j] < min)
{
min = dp[i][j];
last = j;
}
}
}
// Build the path
StringBuilder sb = new StringBuilder();
int cur = ( 1 << n) - 1 ;
// Creating a stack
Stack<Integer> stack = new Stack<>();
// Until cur is zero
// push last
while (cur > 0 )
{
stack.push(last);
int temp = cur;
cur -= ( 1 << last);
last = path[temp][last];
}
// Build the result
int i = stack.pop();
sb.append(A[i]);
// Until stack is empty
while (!stack.isEmpty())
{
int j = stack.pop();
sb.append(A[j].substring(A[j].length() -
graph[i][j]));
i = j;
}
return sb.toString();
}
// Function to check
public static int calc(String a, String b)
{
for ( int i = 1 ; i < a.length(); i++)
{
if (b.startsWith(a.substring(i)))
{
return b.length() - a.length() + i;
}
}
// Return size of b
return b.length();
}
// Driver Code
public static void main(String[] args)
{
String[] arr = { "catgc" , "ctaagt" ,
"gcta" , "ttca" , "atgcatc" };
// Function Call
System.out.println( "The Shortest Superstring is " +
shortestSuperstring(arr));
}
}


输出

The Shortest Superstring is gctaagttcatgcatc

时间复杂性: O(n^2*2^n)

对于这个问题,存在更好的近似算法。请参考下面的链接。 最短超弦问题|集合2(使用集合覆盖) 应用: 在基因组计划中很有用,因为它将允许研究人员从一组片段中确定整个编码区。 参考: http://fileadmin.cs.lth.se/cs/Personal/Andrzej_Lingas/superstring.pdf http://math.mit.edu/~goemans/18434S06/乐乐超弦。pdf 本文由 皮尤什 。如果您发现任何不正确的地方,或者您想分享有关上述主题的更多信息,请发表评论。

© 版权声明
THE END
喜欢就支持一下吧
点赞8 分享