Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Lemire committed Feb 4, 2024
1 parent ee7f0f1 commit 4d26cbc
Show file tree
Hide file tree
Showing 17 changed files with 1,336 additions and 1,428 deletions.
170 changes: 88 additions & 82 deletions benchmarks/speedtesting.cpp
Original file line number Diff line number Diff line change
@@ -1,105 +1,111 @@
#include <fstream>
#include <string>
#include "cyclichash.h"
#include "rabinkarphash.h"
#include "generalhash.h"
#include "rabinkarphash.h"
#include "threewisehash.h"
#include "ztimer.h"
#include <fstream>
#include <string>

using namespace std;


template<class hashfunction>
double hashALot( int n, int L, uint ttimes,uint sizeoftest , vector<uint32> & recorder) {
ZTimer t;
for(uint times = 0; times<ttimes; ++times) {
hashfunction hf(n,L);
for(uint k = 0; k<static_cast<uint>(n); ++k) {
hf.eat(static_cast<unsigned char>(k));
}
for(uint k = n; k<sizeoftest; ++k) {
hf.update(static_cast<unsigned char>(k-n),static_cast<unsigned char>(k));
}
/* The goal of the recorder is to prevent
the compiler from deciding that this whole computation
is not required!
*/
recorder.push_back(hf.hashvalue);
template <class hashfunction>
double hashALot(int n, int L, uint ttimes, uint sizeoftest,
vector<uint32> &recorder) {
ZTimer t;
for (uint times = 0; times < ttimes; ++times) {
hashfunction hf(n, L);
for (uint k = 0; k < static_cast<uint>(n); ++k) {
hf.eat(static_cast<unsigned char>(k));
}
for (uint k = n; k < sizeoftest; ++k) {
hf.update(static_cast<unsigned char>(k - n),
static_cast<unsigned char>(k));
}
return t.split()/(1000.0*ttimes);
/* The goal of the recorder is to prevent
the compiler from deciding that this whole computation
is not required!
*/
recorder.push_back(hf.hashvalue);
}
return t.split() / (1000.0 * ttimes);
}


template<class hashfunction>
double hashALot( int n, int L, uint ttimes , vector<uint32> & recorder, vector<unsigned char> & data) {
ZTimer t;
for(uint times = 0; times<ttimes; ++times) {
hashfunction hf(n,L);
for(uint k = 0; k<static_cast<uint>(n); ++k) {
hf.eat(data[k]);
}
for(uint k = n; k<data.size(); ++k) {
hf.update(data[k-n],data[k]);
}
/* The goal of the recorder is to prevent
the compiler from deciding that this whole computation
is not required!
*/
recorder.push_back(hf.hashvalue);
template <class hashfunction>
double hashALot(int n, int L, uint ttimes, vector<uint32> &recorder,
vector<unsigned char> &data) {
ZTimer t;
for (uint times = 0; times < ttimes; ++times) {
hashfunction hf(n, L);
for (uint k = 0; k < static_cast<uint>(n); ++k) {
hf.eat(data[k]);
}
for (uint k = n; k < data.size(); ++k) {
hf.update(data[k - n], data[k]);
}
return t.split()/1000.0;
/* The goal of the recorder is to prevent
the compiler from deciding that this whole computation
is not required!
*/
recorder.push_back(hf.hashvalue);
}
return t.split() / 1000.0;
}

void synthetic() {
int L = 19;
vector<uint32> recorder;
uint sizeoftest = 100000000;
cout<<"#n three-wise General BufferedGeneral Cyclic Karp-Rabin "<<endl;
for(uint n = 1; n+L<=32; ++n) {
cout<<n<<" "<<hashALot<ThreeWiseHash<> >(n,L,1,sizeoftest,recorder)<<" ";
cout<<hashALot<GeneralHash<NOPRECOMP> >(n,L,1,sizeoftest,recorder)<<" ";
cout<<hashALot<GeneralHash<FULLPRECOMP> >(n,L,1,sizeoftest,recorder)<<" ";
cout<<hashALot<CyclicHash<> >(n,L+n,1,sizeoftest,recorder)<< " ";
cout<<hashALot<KarpRabinHash<> >(n,L,1,sizeoftest,recorder)<<endl;
}
cout <<"# L= "<<L<<" char-length= "<<sizeoftest<<endl;
int L = 19;
vector<uint32> recorder;
uint sizeoftest = 100000000;
cout << "#n three-wise General BufferedGeneral Cyclic Karp-Rabin " << endl;
for (uint n = 1; n + L <= 32; ++n) {
cout << n << " " << hashALot<ThreeWiseHash<>>(n, L, 1, sizeoftest, recorder)
<< " ";
cout << hashALot<GeneralHash<NOPRECOMP>>(n, L, 1, sizeoftest, recorder)
<< " ";
cout << hashALot<GeneralHash<FULLPRECOMP>>(n, L, 1, sizeoftest, recorder)
<< " ";
cout << hashALot<CyclicHash<>>(n, L + n, 1, sizeoftest, recorder) << " ";
cout << hashALot<KarpRabinHash<>>(n, L, 1, sizeoftest, recorder) << endl;
}
cout << "# L= " << L << " char-length= " << sizeoftest << endl;
}

void grabFileContent(vector<unsigned char> & data, string filename) {
string line;
ifstream file(filename.c_str());
void grabFileContent(vector<unsigned char> &data, string filename) {
string line;
ifstream file(filename.c_str());
std::getline(file, line);
while (file.good()) {
std::getline(file, line);
while ( file.good() ) {
std::getline(file, line);
for(uint k = 0; k<line.size(); ++k)
data.push_back(line[k]);//presumably not very fast to do it char by char
}
file.close();
for (uint k = 0; k < line.size(); ++k)
data.push_back(line[k]); // presumably not very fast to do it char by char
}
file.close();
}
void realdata(string filename) {
int L = 19;
vector<uint32> recorder;
uint repeats=1;
vector<unsigned char> data;
grabFileContent(data, filename);
cout<<"#n three-wise General BufferedGeneral Cyclic Karp-Rabin "<<endl;
for(uint n = 1; n+L<=32; ++n) {
cout<<n<<" "<<hashALot<ThreeWiseHash<> >(n,L,repeats,recorder,data)<<" ";
cout<<hashALot<GeneralHash<NOPRECOMP> >(n,L,repeats,recorder,data)<<" ";
cout<<hashALot<GeneralHash<FULLPRECOMP> >(n,L,repeats,recorder,data)<<" ";
cout<<hashALot<CyclicHash<> >(n,L+n,repeats,recorder,data)<< " ";
cout<<hashALot<KarpRabinHash<> >(n,L,repeats,recorder,data)<<endl;
}
cout <<"# L= "<<L<<" char-length= "<<data.size()<< " repeats="<<repeats<<endl;

int L = 19;
vector<uint32> recorder;
uint repeats = 1;
vector<unsigned char> data;
grabFileContent(data, filename);
cout << "#n three-wise General BufferedGeneral Cyclic Karp-Rabin " << endl;
for (uint n = 1; n + L <= 32; ++n) {
cout << n << " " << hashALot<ThreeWiseHash<>>(n, L, repeats, recorder, data)
<< " ";
cout << hashALot<GeneralHash<NOPRECOMP>>(n, L, repeats, recorder, data)
<< " ";
cout << hashALot<GeneralHash<FULLPRECOMP>>(n, L, repeats, recorder, data)
<< " ";
cout << hashALot<CyclicHash<>>(n, L + n, repeats, recorder, data) << " ";
cout << hashALot<KarpRabinHash<>>(n, L, repeats, recorder, data) << endl;
}
cout << "# L= " << L << " char-length= " << data.size()
<< " repeats=" << repeats << endl;
}

int main(int params, char ** args) {
if (params == 1)
synthetic();
else
realdata(args[1]);
int main(int params, char **args) {
if (params == 1)
synthetic();
else
realdata(args[1]);

return 0;
return 0;
}

77 changes: 33 additions & 44 deletions examples/example.cpp
Original file line number Diff line number Diff line change
@@ -1,56 +1,45 @@
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <memory>
#include <iostream>

#include "rabinkarphash.h"

int main(int argc, char * argv[])
{
size_t q = 3;
size_t k = 4;
typedef KarpRabinHash<> HashFunction;
std::vector<std::unique_ptr<HashFunction> > hashPtr(q);
for(size_t z = 0; z < hashPtr.size(); ++z)
{
std::unique_ptr<HashFunction> & ptr = hashPtr[z];
ptr.reset(new HashFunction(k, 12));
}

std::string str = "ACGTAACGT";
for (size_t j = 0; j < k; j++)
{
for(size_t z = 0; z < hashPtr.size(); ++z)
{
std::unique_ptr<HashFunction> & ptr = hashPtr[z];
ptr->eat(str[j]);
}
int main() {
size_t q = 3;
size_t k = 4;
typedef KarpRabinHash<> HashFunction;
std::vector<std::unique_ptr<HashFunction>> hashPtr(q);
for (size_t z = 0; z < hashPtr.size(); ++z) {
std::unique_ptr<HashFunction> &ptr = hashPtr[z];
ptr.reset(new HashFunction(k, 12));
}

std::string str = "ACGTAACGT";
for (size_t j = 0; j < k; j++) {
for (size_t z = 0; z < hashPtr.size(); ++z) {
std::unique_ptr<HashFunction> &ptr = hashPtr[z];
ptr->eat(str[j]);
}
}

for (size_t i = 0;; i++)
{
std::cout << std::string(str.begin() + i, str.begin() + i + k);
for(size_t z = 0; z < hashPtr.size(); ++z)
{
std::unique_ptr<HashFunction> & ptr = hashPtr[z];
std::cout << ' ' << ptr->hashvalue;
}
for (size_t i = 0;; i++) {
std::cout << std::string(str.begin() + i, str.begin() + i + k);
for (size_t z = 0; z < hashPtr.size(); ++z) {
std::unique_ptr<HashFunction> &ptr = hashPtr[z];
std::cout << ' ' << ptr->hashvalue;
}

std::cout << std::endl;
if (i + k < str.size())
{
for(size_t z = 0; z < hashPtr.size(); ++z)
{
std::unique_ptr<HashFunction> & ptr = hashPtr[z];
ptr->update(str[i], str[i + k]);
}
}
else
{
break;
}
std::cout << std::endl;
if (i + k < str.size()) {
for (size_t z = 0; z < hashPtr.size(); ++z) {
std::unique_ptr<HashFunction> &ptr = hashPtr[z];
ptr->update(str[i], str[i + k]);
}
} else {
break;
}
}

return 0;
return 0;
}
50 changes: 25 additions & 25 deletions examples/example2.cpp
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <memory>
#include <iostream>

// given hash value of "ABCD", can I have value of
// "ABCDE", without computing the whole hash value?

#include "cyclichash.h"


int main(int argc, char * argv[])
{
CyclicHash<> hf(5,19);
string input = "ABCDE";
hf.eat(input[0]);//A
hf.eat(input[1]);//B
hf.eat(input[2]);//C
hf.eat(input[3]);//D
cout<<"Hash value of ABCD is " << hf.hashvalue << endl;
// we check the answer going the long way...
const std::vector<unsigned char> charvectslice(input.begin(), input.begin()+4);
uint32_t trueanswerslice = hf.hash(charvectslice);
if(trueanswerslice != hf.hashvalue ) throw runtime_error("bug");
// we continue
hf.eat(input[4]);//E
cout<<"Hash value of ABCDE is " << hf.hashvalue << endl;
// we check the answer going the long way
const std::vector<unsigned char> charvect(input.begin(), input.end());
uint32_t trueanswer = hf.hash(charvect);
if(trueanswer != hf.hashvalue ) throw runtime_error("bug");
return 0;

int main() {
CyclicHash<> hf(5, 19);
string input = "ABCDE";
hf.eat(input[0]); // A
hf.eat(input[1]); // B
hf.eat(input[2]); // C
hf.eat(input[3]); // D
cout << "Hash value of ABCD is " << hf.hashvalue << endl;
// we check the answer going the long way...
const std::vector<unsigned char> charvectslice(input.begin(),
input.begin() + 4);
uint32_t trueanswerslice = hf.hash(charvectslice);
if (trueanswerslice != hf.hashvalue)
throw runtime_error("bug");
// we continue
hf.eat(input[4]); // E
cout << "Hash value of ABCDE is " << hf.hashvalue << endl;
// we check the answer going the long way
const std::vector<unsigned char> charvect(input.begin(), input.end());
uint32_t trueanswer = hf.hash(charvect);
if (trueanswer != hf.hashvalue)
throw runtime_error("bug");
return 0;
}
Loading

0 comments on commit 4d26cbc

Please sign in to comment.