Given an array of strings words
, return the smallest string that contains each string in words
as a substring. If there are multiple valid strings of the smallest length, return any of them.
You may assume that no string in words
is a substring of another string in words.
文章目录
- 问题理解
- 分步解法
- 第一步:计算字符串间的重叠部分
- 第二步:初始化动态规划表
- 第三步:状态转移
- 第四步:找到最优解
- 第五步:回溯构建结果字符串
- C++ 完整实现
- 复杂度分析
Example 1:
Input: words = ["alex","loves","leetcode"]
Output: "alexlovesleetcode"
Explanation: All permutations of "alex","loves","leetcode" would also be accepted.
Example 2:
Input: words = ["catg","ctaagt","gcta","ttca","atgcatc"]
Output: "gctaagttcatgcatc"
问题理解
我们需要找到一个最短的字符串S,使得给定的字符串数组A中的所有字符串都是S的子串。例如:
- 输入:[“alex”,“loves”,“leetcode”]
- 输出:“alexlovesleetcode”(也可以是其他排列组合)
分步解法
第一步:计算字符串间的重叠部分
目标: 对于每对字符串A[i]
和A[j]
,计算A[j]
可以重叠在A[i]
后面的最大字符数。
实现:
int calcOverlap(const string& a, const string& b) {int max_len = min(a.length(), b.length());for (int k = max_len; k >= 1; --k) {if (a.substr(a.length() - k) == b.substr(0, k)) {return k;}}return 0;
}
示例:
calcOverlap("alex", "loves") → 1 ("x"和"l"不匹配,"e"和"l"不匹配,"l"和"l"匹配)
calcOverlap("loves", "leetcode") → 2 ("es"和"le"不匹配,"es"和"le"不匹配,"oves"和"leet"不匹配,"ves"和"eet"不匹配)
第二步:初始化动态规划表
定义状态:
dp[mask][i]
:使用mask
代表的字符串集合,且最后一个字符串是A[i]时的最短超级字符串长度parent[mask][i]
:记录前一个字符串的索引
初始化:
vector<vector<int>> dp(1 << n, vector<int>(n, INT_MAX));
vector<vector<int>> parent(1 << n, vector<int>(n, -1));// 只包含一个字符串的情况
for (int i = 0; i < n; ++i) {dp[1 << i][i] = A[i].length();
}
解释: 当mask
只有第i
位为1时,最短长度就是A[i]
本身的长度
第三步:状态转移
转移方程:
对于每个状态mask
和每个可能的最后一个字符串i
:
- 找到不包含
i
的前一个状态prev_mask
- 遍历
prev_mask
中的所有可能的前一个字符串j - 计算新长度
= dp[prev_mask][j] + len(A[i]) - overlap[j][i]
- 如果新长度更优,则更新
dp
表和parent
表
实现:
for (int mask = 1; mask < (1 << n); ++mask) {for (int i = 0; i < n; ++i) {if (!(mask & (1 << i))) continue;int prev_mask = mask ^ (1 << i);if (prev_mask == 0) continue;for (int j = 0; j < n; ++j) {if (prev_mask & (1 << j)) {int new_len = dp[prev_mask][j] + A[i].length() - overlap[j][i];if (new_len < dp[mask][i]) {dp[mask][i] = new_len;parent[mask][i] = j;}}}}
}
第四步:找到最优解
步骤:
- 找到包含所有字符串的状态
mask = (1<<n)-1
- 在这个状态下找到长度最小的
dp[mask][i]
- 记录最后一个字符串的索引
last
int mask = (1 << n) - 1;
int last = min_element(dp[mask].begin(), dp[mask].end()) - dp[mask].begin();
第五步:回溯构建结果字符串
步骤:
- 从最后一个字符串开始,通过parent表回溯路径
- 反转路径得到正确的顺序
- 按照路径顺序拼接字符串,利用预先计算的重叠部分
vector<int> path;
int curr_mask = mask;
int curr = last;while (curr_mask) {path.push_back(curr);int prev = parent[curr_mask][curr];if (prev == -1) break;curr_mask ^= (1 << curr);curr = prev;
}reverse(path.begin(), path.end());string result = A[path[0]];
for (int i = 1; i < path.size(); ++i) {int prev = path[i-1];int curr = path[i];int overlap_len = overlap[prev][curr];result += A[curr].substr(overlap_len);
}
C++ 完整实现
#include <vector>
#include <string>
#include <algorithm>
#include <climits>using namespace std;class Solution {
public:string shortestSuperstring(vector<string>& A) {int n = A.size();// 计算重叠矩阵vector<vector<int>> overlap(n, vector<int>(n, 0));for (int i = 0; i < n; ++i) {for (int j = 0; j < n; ++j) {if (i != j) {overlap[i][j] = calcOverlap(A[i], A[j]);}}}// DP表:dp[mask][i] 表示使用mask代表的字符串集合,最后一个字符串是i时的最短长度vector<vector<int>> dp(1 << n, vector<int>(n, INT_MAX));vector<vector<int>> parent(1 << n, vector<int>(n, -1));// 初始化:只包含一个字符串的情况for (int i = 0; i < n; ++i) {dp[1 << i][i] = A[i].length();}// 状态转移for (int mask = 1; mask < (1 << n); ++mask) {for (int i = 0; i < n; ++i) {if (!(mask & (1 << i))) continue;int prev_mask = mask ^ (1 << i);if (prev_mask == 0) continue;for (int j = 0; j < n; ++j) {if (prev_mask & (1 << j)) {int new_len = dp[prev_mask][j] + A[i].length() - overlap[j][i];if (new_len < dp[mask][i]) {dp[mask][i] = new_len;parent[mask][i] = j;}}}}}// 找到最优解int mask = (1 << n) - 1;int last = min_element(dp[mask].begin(), dp[mask].end()) - dp[mask].begin();int min_len = dp[mask][last];// 回溯构建结果字符串string result;int curr_mask = mask;int curr = last;vector<int> path;while (curr_mask) {path.push_back(curr);int prev = parent[curr_mask][curr];if (prev == -1) break;curr_mask ^= (1 << curr);curr = prev;}reverse(path.begin(), path.end());// 构建最终字符串result = A[path[0]];for (int i = 1; i < path.size(); ++i) {int prev = path[i-1];int curr = path[i];int overlap_len = overlap[prev][curr];result += A[curr].substr(overlap_len);}return result;}private:// 计算两个字符串的最大重叠长度int calcOverlap(const string& a, const string& b) {int max_len = min(a.length(), b.length());for (int k = max_len; k >= 1; --k) {if (a.substr(a.length() - k) == b.substr(0, k)) {return k;}}return 0;}
};
复杂度分析
-
时间复杂度: O ( n 2 ⋅ 2 n ) O(n²·2ⁿ) O(n2⋅2n)
- 外层循环 2 n 2ⁿ 2n个状态
- 内层循环 n n n个字符串
- 每个状态转移最多考虑 n n n种可能
-
空间复杂度: O ( n ⋅ 2 n ) O(n·2ⁿ) O(n⋅2n)
- 存储DP表和parent表