-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDNASequence.txt
36 lines (32 loc) · 1.21 KB
/
DNASequence.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
187. Repeated DNA sequence
The DNA sequence is composed of a series of nucleotides abbreviated as 'A', 'C', 'G', and 'T'.
For example, "ACGAATTCCG" is a DNA sequence.
When studying DNA, it is useful to identify repeated sequences within the DNA.
Given a string s that represents a DNA sequence, return all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
You may return the answer in any order.
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
vector<string> l;
long long hash = 0;
int DNA[s.size()];
unordered_map<long long,int> m;
if(s.size() < 10) return l;
for(int i = 0 ; i < s.size() ; i++){
if(s[i] == 'A') DNA[i] = 1;
else if(s[i] == 'C') DNA[i] = 2;
else if(s[i] == 'G') DNA[i] = 3;
else if(s[i] == 'T') DNA[i] = 4;
}
for(int i = 0 ; i < 10 ; i++){
hash = hash * 4+DNA[i];
}
m[hash]++;
for(int i = 1 ; i < s.size()-9 ; i++){
hash = hash * 4-DNA[i - 1]*pow(4,10)+DNA[i + 9];
m[hash]++;
if(m[hash] == 2) l.push_back(s.substr(i,10));
}
return l;
}
};