给定一个包含较低字母字符的字符串,我们需要找出这个字符串的这样一个子字符串,其长度和频率的乘积在所有可能的子字符串选择中是最大的。 例如:
null
Input : String str = “abddab”Output : 6All unique substring with product of their frequency and length are,Val["a"] = 2 * 1 = 2 Val["ab"] = 2 * 2 = 4Val["abd"] = 1 * 3 = 3Val["abdd"] = 1 * 4 = 4Val["abdda"] = 1 * 5 = 5Val["abddab"] = 1 * 6 = 6Val["b"] = 2 * 1 = 2Val["bd"] = 1 * 2 = 2Val["bdd"] = 1 * 3 = 3Val["bdda"] = 1 * 4 = 4Val["bddab"] = 1 * 5 = 5Val["d"] = 2 * 1 = 2Val["da"] = 1 * 2 = 2Val["dab"] = 1 * 3 = 3Val["dd"] = 1 * 2 = 2Val["dda"] = 1 * 3 = 3Val["ddab"] = 1 * 4 = 4Input : String str = “zzzzzz”Output : 12In above string maximum value 12 can be obtained with substring “zzzz”
A. 简单解决方案 是一个一个地考虑所有子串。对于每个子字符串,计算它在整个字符串中出现的次数。 一 有效解决方案 首先通过构造 最长公共前缀 数组,现在假设lcp[i]的值是K,那么我们可以说第i个和(i+1)个后缀有K个相同的长度前缀,也就是说,有一个长度为K的子字符串重复了两次。同样,假设lcp的三个连续值是(K,K-2,K+1),那么我们可以说有一个长度为(K-2)的子串在字符串中重复三次。 在以上观察之后,我们可以看到我们的结果将是这样一个lcp阵列的范围,其最小元素乘以范围内元素的数量是最大的,因为范围将对应于字符串的频率,范围的最小元素将对应于重复字符串的长度,现在这个改进的问题可以类似于 直方图问题中的最大矩形 . 在下面的代码中,lcp数组由 Kasai算法 .
CPP
// C++ program to find substring with highest // frequency length product #include <bits/stdc++.h> using namespace std; // Structure to store information of a suffix struct suffix { int index; // To store original index int rank[2]; // To store ranks and next rank pair }; // A comparison function used by sort() to compare // two suffixes. Compares two pairs, returns 1 if // first pair is smaller int cmp( struct suffix a, struct suffix b) { return (a.rank[0] == b.rank[0])? (a.rank[1] < b.rank[1] ?1: 0): (a.rank[0] < b.rank[0] ?1: 0); } // This is the main function that takes a string // 'txt' of size n as an argument, builds and // return the suffix array for the given string vector< int > buildSuffixArray(string txt, int n) { // A structure to store suffixes and their indexes struct suffix suffixes[n]; // Store suffixes and their indexes in an array // of structures. The structure is needed to sort // the suffixes alphabetically and maintain their // old indexes while sorting for ( int i = 0; i < n; i++) { suffixes[i].index = i; suffixes[i].rank[0] = txt[i] - 'a' ; suffixes[i].rank[1] = ((i+1) < n)? (txt[i + 1] - 'a' ): -1; } // Sort the suffixes using the comparison function // defined above. sort(suffixes, suffixes+n, cmp); // At his point, all suffixes are sorted according to first // 2 characters. Let us sort suffixes according to first 4 // characters, then first 8 and so on // This array is needed to get the index in suffixes[] // from original index. This mapping is needed to get // next suffix. int ind[n]; for ( int k = 4; k < 2*n; k = k*2) { // Assigning rank and index values to first suffix int rank = 0; int prev_rank = suffixes[0].rank[0]; suffixes[0].rank[0] = rank; ind[suffixes[0].index] = 0; // Assigning rank to suffixes for ( int i = 1; i < n; i++) { // If first rank and next ranks are same as // that of previous suffix in array, assign // the same new rank to this suffix if (suffixes[i].rank[0] == prev_rank && suffixes[i].rank[1] == suffixes[i-1].rank[1]) { prev_rank = suffixes[i].rank[0]; suffixes[i].rank[0] = rank; } else // Otherwise increment rank and assign { prev_rank = suffixes[i].rank[0]; suffixes[i].rank[0] = ++rank; } ind[suffixes[i].index] = i; } // Assign next rank to every suffix for ( int i = 0; i < n; i++) { int nextindex = suffixes[i].index + k/2; suffixes[i].rank[1] = (nextindex < n)? suffixes[ind[nextindex]].rank[0]: -1; } // Sort the suffixes according to first k characters sort(suffixes, suffixes+n, cmp); } // Store indexes of all sorted suffixes in the suffix array vector< int >suffixArr; for ( int i = 0; i < n; i++) suffixArr.push_back(suffixes[i].index); // Return the suffix array return suffixArr; } /* To construct and return LCP */ vector< int > kasai(string txt, vector< int > suffixArr) { int n = suffixArr.size(); // To store LCP array vector< int > lcp(n, 0); // An auxiliary array to store inverse of suffix array // elements. For example if suffixArr[0] is 5, the // invSuff[5] would store 0. This is used to get next // suffix string from suffix array. vector< int > invSuff(n, 0); // Fill values in invSuff[] for ( int i=0; i < n; i++) invSuff[suffixArr[i]] = i; // Initialize length of previous LCP int k = 0; // Process all suffixes one by one starting from // first suffix in txt[] for ( int i=0; i<n; i++) { /* If the current suffix is at n-1, then we don’t have next substring to consider. So lcp is not defined for this substring, we put zero. */ if (invSuff[i] == n-1) { k = 0; continue ; } /* j contains index of the next substring to be considered to compare with the present substring, i.e., next string in suffix array */ int j = suffixArr[invSuff[i]+1]; // Directly start matching from k'th index as // at-least k-1 characters will match while (i+k<n && j+k<n && txt[i+k]==txt[j+k]) k++; lcp[invSuff[i]] = k; // lcp for the present suffix. // Deleting the starting character from the string. if (k>0) k--; } // return the constructed lcp array return lcp; } // method to get LCP array vector< int > getLCPArray(string str) { vector< int >suffixArr = buildSuffixArray(str, str.length()); return kasai(str, suffixArr); } // The main function to find the maximum rectangular // area under given histogram with n bars int getMaxArea( int hist[], int n) { // Create an empty stack. The stack holds indexes // of hist[] array. The bars stored in stack are // always in increasing order of their heights. stack< int > s; int max_area = 0; // Initialize max area int tp; // To store top of stack // To store area with top bar as the smallest bar int area_with_top; // Run through all bars of given histogram int i = 0; while (i < n) { // If this bar is higher than the bar on // top stack, push it to stack if (s.empty() || hist[s.top()] <= hist[i]) s.push(i++); // If this bar is lower than top of stack, // then calculate area of rectangle with // stack top as the smallest (or minimum // height) bar. 'i' is 'right index' for // the top and element before top in stack // is 'left index' else { tp = s.top(); // store the top index s.pop(); // pop the top // Calculate the area with hist[tp] // stack as smallest bar area_with_top = hist[tp] * (s.empty() ? (i + 1) : i - s.top()); // update max area, if needed if (max_area < area_with_top) max_area = area_with_top; } } // Now pop the remaining bars from stack // and calculate area with every // popped bar as the smallest bar while (s.empty() == false ) { tp = s.top(); s.pop(); area_with_top = hist[tp] * (s.empty() ? (i + 1) : i - s.top()); if (max_area < area_with_top) max_area = area_with_top; } return max_area; } // Returns maximum product of frequency and length // of a substring. int maxProductOfFreqLength(string str) { // get LCP array by Kasai's algorithm vector< int > lcp = getLCPArray(str); int hist[lcp.size()]; // copy lcp array into hist array for ( int i = 0; i < lcp.size(); i++) hist[i] = lcp[i]; // get the maximum area under lcp histogram int substrMaxValue = getMaxArea(hist, lcp.size()); // if string length itself is greater than // histogram area, then return that if (str.length() > substrMaxValue) return str.length(); else return substrMaxValue; } // Driver code to test above methods int main() { string str = "abddab" ; cout << maxProductOfFreqLength(str) << endl; return 0; } |
输出:
6
本文由 乌特卡什·特里维迪 .如果你喜欢GeekSforgek,并想贡献自己的力量,你也可以使用 写极客。组织 或者把你的文章寄去评论-team@geeksforgeeks.org.看到你的文章出现在Geeksforgeks主页上,并帮助其他极客。 如果您发现任何不正确的地方,或者您想分享有关上述主题的更多信息,请写下评论。
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END