-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Daniel Lemire
committed
Feb 4, 2024
1 parent
ee7f0f1
commit 4d26cbc
Showing
17 changed files
with
1,336 additions
and
1,428 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,105 +1,111 @@ | ||
#include <fstream> | ||
#include <string> | ||
#include "cyclichash.h" | ||
#include "rabinkarphash.h" | ||
#include "generalhash.h" | ||
#include "rabinkarphash.h" | ||
#include "threewisehash.h" | ||
#include "ztimer.h" | ||
#include <fstream> | ||
#include <string> | ||
|
||
using namespace std; | ||
|
||
|
||
template<class hashfunction> | ||
double hashALot( int n, int L, uint ttimes,uint sizeoftest , vector<uint32> & recorder) { | ||
ZTimer t; | ||
for(uint times = 0; times<ttimes; ++times) { | ||
hashfunction hf(n,L); | ||
for(uint k = 0; k<static_cast<uint>(n); ++k) { | ||
hf.eat(static_cast<unsigned char>(k)); | ||
} | ||
for(uint k = n; k<sizeoftest; ++k) { | ||
hf.update(static_cast<unsigned char>(k-n),static_cast<unsigned char>(k)); | ||
} | ||
/* The goal of the recorder is to prevent | ||
the compiler from deciding that this whole computation | ||
is not required! | ||
*/ | ||
recorder.push_back(hf.hashvalue); | ||
template <class hashfunction> | ||
double hashALot(int n, int L, uint ttimes, uint sizeoftest, | ||
vector<uint32> &recorder) { | ||
ZTimer t; | ||
for (uint times = 0; times < ttimes; ++times) { | ||
hashfunction hf(n, L); | ||
for (uint k = 0; k < static_cast<uint>(n); ++k) { | ||
hf.eat(static_cast<unsigned char>(k)); | ||
} | ||
for (uint k = n; k < sizeoftest; ++k) { | ||
hf.update(static_cast<unsigned char>(k - n), | ||
static_cast<unsigned char>(k)); | ||
} | ||
return t.split()/(1000.0*ttimes); | ||
/* The goal of the recorder is to prevent | ||
the compiler from deciding that this whole computation | ||
is not required! | ||
*/ | ||
recorder.push_back(hf.hashvalue); | ||
} | ||
return t.split() / (1000.0 * ttimes); | ||
} | ||
|
||
|
||
template<class hashfunction> | ||
double hashALot( int n, int L, uint ttimes , vector<uint32> & recorder, vector<unsigned char> & data) { | ||
ZTimer t; | ||
for(uint times = 0; times<ttimes; ++times) { | ||
hashfunction hf(n,L); | ||
for(uint k = 0; k<static_cast<uint>(n); ++k) { | ||
hf.eat(data[k]); | ||
} | ||
for(uint k = n; k<data.size(); ++k) { | ||
hf.update(data[k-n],data[k]); | ||
} | ||
/* The goal of the recorder is to prevent | ||
the compiler from deciding that this whole computation | ||
is not required! | ||
*/ | ||
recorder.push_back(hf.hashvalue); | ||
template <class hashfunction> | ||
double hashALot(int n, int L, uint ttimes, vector<uint32> &recorder, | ||
vector<unsigned char> &data) { | ||
ZTimer t; | ||
for (uint times = 0; times < ttimes; ++times) { | ||
hashfunction hf(n, L); | ||
for (uint k = 0; k < static_cast<uint>(n); ++k) { | ||
hf.eat(data[k]); | ||
} | ||
for (uint k = n; k < data.size(); ++k) { | ||
hf.update(data[k - n], data[k]); | ||
} | ||
return t.split()/1000.0; | ||
/* The goal of the recorder is to prevent | ||
the compiler from deciding that this whole computation | ||
is not required! | ||
*/ | ||
recorder.push_back(hf.hashvalue); | ||
} | ||
return t.split() / 1000.0; | ||
} | ||
|
||
void synthetic() { | ||
int L = 19; | ||
vector<uint32> recorder; | ||
uint sizeoftest = 100000000; | ||
cout<<"#n three-wise General BufferedGeneral Cyclic Karp-Rabin "<<endl; | ||
for(uint n = 1; n+L<=32; ++n) { | ||
cout<<n<<" "<<hashALot<ThreeWiseHash<> >(n,L,1,sizeoftest,recorder)<<" "; | ||
cout<<hashALot<GeneralHash<NOPRECOMP> >(n,L,1,sizeoftest,recorder)<<" "; | ||
cout<<hashALot<GeneralHash<FULLPRECOMP> >(n,L,1,sizeoftest,recorder)<<" "; | ||
cout<<hashALot<CyclicHash<> >(n,L+n,1,sizeoftest,recorder)<< " "; | ||
cout<<hashALot<KarpRabinHash<> >(n,L,1,sizeoftest,recorder)<<endl; | ||
} | ||
cout <<"# L= "<<L<<" char-length= "<<sizeoftest<<endl; | ||
int L = 19; | ||
vector<uint32> recorder; | ||
uint sizeoftest = 100000000; | ||
cout << "#n three-wise General BufferedGeneral Cyclic Karp-Rabin " << endl; | ||
for (uint n = 1; n + L <= 32; ++n) { | ||
cout << n << " " << hashALot<ThreeWiseHash<>>(n, L, 1, sizeoftest, recorder) | ||
<< " "; | ||
cout << hashALot<GeneralHash<NOPRECOMP>>(n, L, 1, sizeoftest, recorder) | ||
<< " "; | ||
cout << hashALot<GeneralHash<FULLPRECOMP>>(n, L, 1, sizeoftest, recorder) | ||
<< " "; | ||
cout << hashALot<CyclicHash<>>(n, L + n, 1, sizeoftest, recorder) << " "; | ||
cout << hashALot<KarpRabinHash<>>(n, L, 1, sizeoftest, recorder) << endl; | ||
} | ||
cout << "# L= " << L << " char-length= " << sizeoftest << endl; | ||
} | ||
|
||
void grabFileContent(vector<unsigned char> & data, string filename) { | ||
string line; | ||
ifstream file(filename.c_str()); | ||
void grabFileContent(vector<unsigned char> &data, string filename) { | ||
string line; | ||
ifstream file(filename.c_str()); | ||
std::getline(file, line); | ||
while (file.good()) { | ||
std::getline(file, line); | ||
while ( file.good() ) { | ||
std::getline(file, line); | ||
for(uint k = 0; k<line.size(); ++k) | ||
data.push_back(line[k]);//presumably not very fast to do it char by char | ||
} | ||
file.close(); | ||
for (uint k = 0; k < line.size(); ++k) | ||
data.push_back(line[k]); // presumably not very fast to do it char by char | ||
} | ||
file.close(); | ||
} | ||
void realdata(string filename) { | ||
int L = 19; | ||
vector<uint32> recorder; | ||
uint repeats=1; | ||
vector<unsigned char> data; | ||
grabFileContent(data, filename); | ||
cout<<"#n three-wise General BufferedGeneral Cyclic Karp-Rabin "<<endl; | ||
for(uint n = 1; n+L<=32; ++n) { | ||
cout<<n<<" "<<hashALot<ThreeWiseHash<> >(n,L,repeats,recorder,data)<<" "; | ||
cout<<hashALot<GeneralHash<NOPRECOMP> >(n,L,repeats,recorder,data)<<" "; | ||
cout<<hashALot<GeneralHash<FULLPRECOMP> >(n,L,repeats,recorder,data)<<" "; | ||
cout<<hashALot<CyclicHash<> >(n,L+n,repeats,recorder,data)<< " "; | ||
cout<<hashALot<KarpRabinHash<> >(n,L,repeats,recorder,data)<<endl; | ||
} | ||
cout <<"# L= "<<L<<" char-length= "<<data.size()<< " repeats="<<repeats<<endl; | ||
|
||
int L = 19; | ||
vector<uint32> recorder; | ||
uint repeats = 1; | ||
vector<unsigned char> data; | ||
grabFileContent(data, filename); | ||
cout << "#n three-wise General BufferedGeneral Cyclic Karp-Rabin " << endl; | ||
for (uint n = 1; n + L <= 32; ++n) { | ||
cout << n << " " << hashALot<ThreeWiseHash<>>(n, L, repeats, recorder, data) | ||
<< " "; | ||
cout << hashALot<GeneralHash<NOPRECOMP>>(n, L, repeats, recorder, data) | ||
<< " "; | ||
cout << hashALot<GeneralHash<FULLPRECOMP>>(n, L, repeats, recorder, data) | ||
<< " "; | ||
cout << hashALot<CyclicHash<>>(n, L + n, repeats, recorder, data) << " "; | ||
cout << hashALot<KarpRabinHash<>>(n, L, repeats, recorder, data) << endl; | ||
} | ||
cout << "# L= " << L << " char-length= " << data.size() | ||
<< " repeats=" << repeats << endl; | ||
} | ||
|
||
int main(int params, char ** args) { | ||
if (params == 1) | ||
synthetic(); | ||
else | ||
realdata(args[1]); | ||
int main(int params, char **args) { | ||
if (params == 1) | ||
synthetic(); | ||
else | ||
realdata(args[1]); | ||
|
||
return 0; | ||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,45 @@ | ||
#include <iostream> | ||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
#include <memory> | ||
#include <iostream> | ||
|
||
#include "rabinkarphash.h" | ||
|
||
int main(int argc, char * argv[]) | ||
{ | ||
size_t q = 3; | ||
size_t k = 4; | ||
typedef KarpRabinHash<> HashFunction; | ||
std::vector<std::unique_ptr<HashFunction> > hashPtr(q); | ||
for(size_t z = 0; z < hashPtr.size(); ++z) | ||
{ | ||
std::unique_ptr<HashFunction> & ptr = hashPtr[z]; | ||
ptr.reset(new HashFunction(k, 12)); | ||
} | ||
|
||
std::string str = "ACGTAACGT"; | ||
for (size_t j = 0; j < k; j++) | ||
{ | ||
for(size_t z = 0; z < hashPtr.size(); ++z) | ||
{ | ||
std::unique_ptr<HashFunction> & ptr = hashPtr[z]; | ||
ptr->eat(str[j]); | ||
} | ||
int main() { | ||
size_t q = 3; | ||
size_t k = 4; | ||
typedef KarpRabinHash<> HashFunction; | ||
std::vector<std::unique_ptr<HashFunction>> hashPtr(q); | ||
for (size_t z = 0; z < hashPtr.size(); ++z) { | ||
std::unique_ptr<HashFunction> &ptr = hashPtr[z]; | ||
ptr.reset(new HashFunction(k, 12)); | ||
} | ||
|
||
std::string str = "ACGTAACGT"; | ||
for (size_t j = 0; j < k; j++) { | ||
for (size_t z = 0; z < hashPtr.size(); ++z) { | ||
std::unique_ptr<HashFunction> &ptr = hashPtr[z]; | ||
ptr->eat(str[j]); | ||
} | ||
} | ||
|
||
for (size_t i = 0;; i++) | ||
{ | ||
std::cout << std::string(str.begin() + i, str.begin() + i + k); | ||
for(size_t z = 0; z < hashPtr.size(); ++z) | ||
{ | ||
std::unique_ptr<HashFunction> & ptr = hashPtr[z]; | ||
std::cout << ' ' << ptr->hashvalue; | ||
} | ||
for (size_t i = 0;; i++) { | ||
std::cout << std::string(str.begin() + i, str.begin() + i + k); | ||
for (size_t z = 0; z < hashPtr.size(); ++z) { | ||
std::unique_ptr<HashFunction> &ptr = hashPtr[z]; | ||
std::cout << ' ' << ptr->hashvalue; | ||
} | ||
|
||
std::cout << std::endl; | ||
if (i + k < str.size()) | ||
{ | ||
for(size_t z = 0; z < hashPtr.size(); ++z) | ||
{ | ||
std::unique_ptr<HashFunction> & ptr = hashPtr[z]; | ||
ptr->update(str[i], str[i + k]); | ||
} | ||
} | ||
else | ||
{ | ||
break; | ||
} | ||
std::cout << std::endl; | ||
if (i + k < str.size()) { | ||
for (size_t z = 0; z < hashPtr.size(); ++z) { | ||
std::unique_ptr<HashFunction> &ptr = hashPtr[z]; | ||
ptr->update(str[i], str[i + k]); | ||
} | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
return 0; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
#include <iostream> | ||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
#include <memory> | ||
#include <iostream> | ||
|
||
// given hash value of "ABCD", can I have value of | ||
// "ABCDE", without computing the whole hash value? | ||
|
||
#include "cyclichash.h" | ||
|
||
|
||
int main(int argc, char * argv[]) | ||
{ | ||
CyclicHash<> hf(5,19); | ||
string input = "ABCDE"; | ||
hf.eat(input[0]);//A | ||
hf.eat(input[1]);//B | ||
hf.eat(input[2]);//C | ||
hf.eat(input[3]);//D | ||
cout<<"Hash value of ABCD is " << hf.hashvalue << endl; | ||
// we check the answer going the long way... | ||
const std::vector<unsigned char> charvectslice(input.begin(), input.begin()+4); | ||
uint32_t trueanswerslice = hf.hash(charvectslice); | ||
if(trueanswerslice != hf.hashvalue ) throw runtime_error("bug"); | ||
// we continue | ||
hf.eat(input[4]);//E | ||
cout<<"Hash value of ABCDE is " << hf.hashvalue << endl; | ||
// we check the answer going the long way | ||
const std::vector<unsigned char> charvect(input.begin(), input.end()); | ||
uint32_t trueanswer = hf.hash(charvect); | ||
if(trueanswer != hf.hashvalue ) throw runtime_error("bug"); | ||
return 0; | ||
|
||
int main() { | ||
CyclicHash<> hf(5, 19); | ||
string input = "ABCDE"; | ||
hf.eat(input[0]); // A | ||
hf.eat(input[1]); // B | ||
hf.eat(input[2]); // C | ||
hf.eat(input[3]); // D | ||
cout << "Hash value of ABCD is " << hf.hashvalue << endl; | ||
// we check the answer going the long way... | ||
const std::vector<unsigned char> charvectslice(input.begin(), | ||
input.begin() + 4); | ||
uint32_t trueanswerslice = hf.hash(charvectslice); | ||
if (trueanswerslice != hf.hashvalue) | ||
throw runtime_error("bug"); | ||
// we continue | ||
hf.eat(input[4]); // E | ||
cout << "Hash value of ABCDE is " << hf.hashvalue << endl; | ||
// we check the answer going the long way | ||
const std::vector<unsigned char> charvect(input.begin(), input.end()); | ||
uint32_t trueanswer = hf.hash(charvect); | ||
if (trueanswer != hf.hashvalue) | ||
throw runtime_error("bug"); | ||
return 0; | ||
} |
Oops, something went wrong.