Revert "Allows to choose orthogonal sub-vectors for LSH without using a static table among LshTable instances"
This reverts commit e63d7de87c3565555e4a73220a465ed39488160d.
This commit is contained in:
parent
4c54b287e4
commit
0be18aca81
@ -109,22 +109,10 @@ public:
|
||||
*/
|
||||
void buildIndex()
|
||||
{
|
||||
std::vector<size_t> indices(feature_size_ * CHAR_BIT);
|
||||
|
||||
tables_.resize(table_number_);
|
||||
for (unsigned int i = 0; i < table_number_; ++i) {
|
||||
|
||||
//re-initialize the random indices table that the LshTable will use to pick its sub-dimensions
|
||||
if( (indices.size() == feature_size_ * CHAR_BIT) || (indices.size() < key_size_) )
|
||||
{
|
||||
indices.resize( feature_size_ * CHAR_BIT );
|
||||
for (size_t i = 0; i < feature_size_ * CHAR_BIT; ++i)
|
||||
indices[i] = i;
|
||||
std::random_shuffle(indices.begin(), indices.end());
|
||||
}
|
||||
|
||||
lsh::LshTable<ElementType>& table = tables_[i];
|
||||
table = lsh::LshTable<ElementType>(feature_size_, key_size_, indices);
|
||||
table = lsh::LshTable<ElementType>(feature_size_, key_size_);
|
||||
|
||||
// Add the features to the table
|
||||
table.add(dataset_);
|
||||
|
@ -153,7 +153,7 @@ public:
|
||||
* @param feature_size is the size of the feature (considered as a ElementType[])
|
||||
* @param key_size is the number of bits that are turned on in the feature
|
||||
*/
|
||||
LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/, std::vector<size_t> & /*indices*/)
|
||||
LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/)
|
||||
{
|
||||
std::cerr << "LSH is not implemented for that type" << std::endl;
|
||||
assert(0);
|
||||
@ -339,20 +339,34 @@ private:
|
||||
// Specialization for unsigned char
|
||||
|
||||
template<>
|
||||
inline LshTable<unsigned char>::LshTable( unsigned int feature_size,
|
||||
unsigned int subsignature_size,
|
||||
std::vector<size_t> & indices )
|
||||
inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
|
||||
{
|
||||
initialize(subsignature_size);
|
||||
// Allocate the mask
|
||||
mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
|
||||
|
||||
// A bit brutal but fast to code
|
||||
static std::vector<size_t>* indices = NULL;
|
||||
|
||||
//Ensure the Nth bit will be selected only once among the different LshTables
|
||||
//to avoid having two different tables with signatures sharing many dimensions/many bits
|
||||
if( indices == NULL )
|
||||
{
|
||||
indices = new std::vector<size_t>( feature_size * CHAR_BIT );
|
||||
}
|
||||
else if( indices->size() < key_size_ )
|
||||
{
|
||||
indices->resize( feature_size * CHAR_BIT );
|
||||
for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) {
|
||||
(*indices)[i] = i;
|
||||
}
|
||||
std::random_shuffle(indices->begin(), indices->end());
|
||||
}
|
||||
|
||||
// Generate a random set of order of subsignature_size_ bits
|
||||
for (unsigned int i = 0; i < key_size_; ++i) {
|
||||
//Ensure the Nth bit will be selected only once among the different LshTables
|
||||
//to avoid having two different tables with signatures sharing many dimensions/many bits
|
||||
size_t index = indices[0];
|
||||
indices.erase( indices.begin() );
|
||||
size_t index = (*indices)[0];
|
||||
indices->erase( indices->begin() );
|
||||
|
||||
// Set that bit in the mask
|
||||
size_t divisor = CHAR_BIT * sizeof(size_t);
|
||||
|
Loading…
x
Reference in New Issue
Block a user