Revert "Allows to choose orthogonal sub-vectors for LSH without using a static table among LshTable instances"
This reverts commit e63d7de87c3565555e4a73220a465ed39488160d.
This commit is contained in:
parent
4c54b287e4
commit
0be18aca81
@ -109,22 +109,10 @@ public:
|
|||||||
*/
|
*/
|
||||||
void buildIndex()
|
void buildIndex()
|
||||||
{
|
{
|
||||||
std::vector<size_t> indices(feature_size_ * CHAR_BIT);
|
|
||||||
|
|
||||||
tables_.resize(table_number_);
|
tables_.resize(table_number_);
|
||||||
for (unsigned int i = 0; i < table_number_; ++i) {
|
for (unsigned int i = 0; i < table_number_; ++i) {
|
||||||
|
|
||||||
//re-initialize the random indices table that the LshTable will use to pick its sub-dimensions
|
|
||||||
if( (indices.size() == feature_size_ * CHAR_BIT) || (indices.size() < key_size_) )
|
|
||||||
{
|
|
||||||
indices.resize( feature_size_ * CHAR_BIT );
|
|
||||||
for (size_t i = 0; i < feature_size_ * CHAR_BIT; ++i)
|
|
||||||
indices[i] = i;
|
|
||||||
std::random_shuffle(indices.begin(), indices.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
lsh::LshTable<ElementType>& table = tables_[i];
|
lsh::LshTable<ElementType>& table = tables_[i];
|
||||||
table = lsh::LshTable<ElementType>(feature_size_, key_size_, indices);
|
table = lsh::LshTable<ElementType>(feature_size_, key_size_);
|
||||||
|
|
||||||
// Add the features to the table
|
// Add the features to the table
|
||||||
table.add(dataset_);
|
table.add(dataset_);
|
||||||
|
@ -153,7 +153,7 @@ public:
|
|||||||
* @param feature_size is the size of the feature (considered as a ElementType[])
|
* @param feature_size is the size of the feature (considered as a ElementType[])
|
||||||
* @param key_size is the number of bits that are turned on in the feature
|
* @param key_size is the number of bits that are turned on in the feature
|
||||||
*/
|
*/
|
||||||
LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/, std::vector<size_t> & /*indices*/)
|
LshTable(unsigned int /*feature_size*/, unsigned int /*key_size*/)
|
||||||
{
|
{
|
||||||
std::cerr << "LSH is not implemented for that type" << std::endl;
|
std::cerr << "LSH is not implemented for that type" << std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -339,20 +339,34 @@ private:
|
|||||||
// Specialization for unsigned char
|
// Specialization for unsigned char
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline LshTable<unsigned char>::LshTable( unsigned int feature_size,
|
inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
|
||||||
unsigned int subsignature_size,
|
|
||||||
std::vector<size_t> & indices )
|
|
||||||
{
|
{
|
||||||
initialize(subsignature_size);
|
initialize(subsignature_size);
|
||||||
// Allocate the mask
|
// Allocate the mask
|
||||||
mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
|
mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
|
||||||
|
|
||||||
|
// A bit brutal but fast to code
|
||||||
|
static std::vector<size_t>* indices = NULL;
|
||||||
|
|
||||||
|
//Ensure the Nth bit will be selected only once among the different LshTables
|
||||||
|
//to avoid having two different tables with signatures sharing many dimensions/many bits
|
||||||
|
if( indices == NULL )
|
||||||
|
{
|
||||||
|
indices = new std::vector<size_t>( feature_size * CHAR_BIT );
|
||||||
|
}
|
||||||
|
else if( indices->size() < key_size_ )
|
||||||
|
{
|
||||||
|
indices->resize( feature_size * CHAR_BIT );
|
||||||
|
for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) {
|
||||||
|
(*indices)[i] = i;
|
||||||
|
}
|
||||||
|
std::random_shuffle(indices->begin(), indices->end());
|
||||||
|
}
|
||||||
|
|
||||||
// Generate a random set of order of subsignature_size_ bits
|
// Generate a random set of order of subsignature_size_ bits
|
||||||
for (unsigned int i = 0; i < key_size_; ++i) {
|
for (unsigned int i = 0; i < key_size_; ++i) {
|
||||||
//Ensure the Nth bit will be selected only once among the different LshTables
|
size_t index = (*indices)[0];
|
||||||
//to avoid having two different tables with signatures sharing many dimensions/many bits
|
indices->erase( indices->begin() );
|
||||||
size_t index = indices[0];
|
|
||||||
indices.erase( indices.begin() );
|
|
||||||
|
|
||||||
// Set that bit in the mask
|
// Set that bit in the mask
|
||||||
size_t divisor = CHAR_BIT * sizeof(size_t);
|
size_t divisor = CHAR_BIT * sizeof(size_t);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user