123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- #include "pch.h"
- /*
- *
- * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the Delft University of Technology.
- * 4. Neither the name of the Delft University of Technology nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
- * OF SUCH DAMAGE.
- *
- */
- #include <math.h>
- #include <float.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <cmath>
- #include "sptree.h"
- // Constructs cell
- Cell::Cell(unsigned int inp_dimension) {
- dimension = inp_dimension;
- corner = (double*) malloc(dimension * sizeof(double));
- width = (double*) malloc(dimension * sizeof(double));
- }
- Cell::Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width) {
- dimension = inp_dimension;
- corner = (double*) malloc(dimension * sizeof(double));
- width = (double*) malloc(dimension * sizeof(double));
- for(int d = 0; d < dimension; d++) setCorner(d, inp_corner[d]);
- for(int d = 0; d < dimension; d++) setWidth( d, inp_width[d]);
- }
- // Destructs cell
- Cell::~Cell() {
- free(corner);
- free(width);
- }
- double Cell::getCorner(unsigned int d) {
- return corner[d];
- }
- double Cell::getWidth(unsigned int d) {
- return width[d];
- }
- void Cell::setCorner(unsigned int d, double val) {
- corner[d] = val;
- }
- void Cell::setWidth(unsigned int d, double val) {
- width[d] = val;
- }
- // Checks whether a point lies in a cell
- bool Cell::containsPoint(double point[])
- {
- for(int d = 0; d < dimension; d++) {
- if(corner[d] - width[d] > point[d]) return false;
- if(corner[d] + width[d] < point[d]) return false;
- }
- return true;
- }
- // Default constructor for SPTree -- build tree, too!
- SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N)
- {
-
- // Compute mean, width, and height of current map (boundaries of SPTree)
- int nD = 0;
- double* mean_Y = (double*) calloc(D, sizeof(double));
- double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX;
- double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX;
- for(unsigned int n = 0; n < N; n++) {
- for(unsigned int d = 0; d < D; d++) {
- mean_Y[d] += inp_data[n * D + d];
- if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d];
- if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d];
- }
- nD += D;
- }
- for(int d = 0; d < D; d++) mean_Y[d] /= (double) N;
-
- // Construct SPTree
- double* width = (double*) malloc(D * sizeof(double));
- for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5;
- init(NULL, D, inp_data, mean_Y, width);
- fill(N);
-
- // Clean up memory
- free(mean_Y);
- free(max_Y);
- free(min_Y);
- free(width);
- }
- // Constructor for SPTree with particular size and parent -- build the tree, too!
- SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
- {
- init(NULL, D, inp_data, inp_corner, inp_width);
- fill(N);
- }
- // Constructor for SPTree with particular size (do not fill the tree)
- SPTree::SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
- {
- init(NULL, D, inp_data, inp_corner, inp_width);
- }
- // Constructor for SPTree with particular size and parent (do not fill tree)
- SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) {
- init(inp_parent, D, inp_data, inp_corner, inp_width);
- }
- // Constructor for SPTree with particular size and parent -- build the tree, too!
- SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
- {
- init(inp_parent, D, inp_data, inp_corner, inp_width);
- fill(N);
- }
- // Main initialization function
- void SPTree::init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
- {
- parent = inp_parent;
- dimension = D;
- no_children = 2;
- for(unsigned int d = 1; d < D; d++) no_children *= 2;
- data = inp_data;
- is_leaf = true;
- size = 0;
- cum_size = 0;
-
- boundary = new Cell(dimension);
- for(unsigned int d = 0; d < D; d++) boundary->setCorner(d, inp_corner[d]);
- for(unsigned int d = 0; d < D; d++) boundary->setWidth( d, inp_width[d]);
-
- children = (SPTree**) malloc(no_children * sizeof(SPTree*));
- for(unsigned int i = 0; i < no_children; i++) children[i] = NULL;
- center_of_mass = (double*) malloc(D * sizeof(double));
- for(unsigned int d = 0; d < D; d++) center_of_mass[d] = .0;
-
- buff = (double*) malloc(D * sizeof(double));
- }
- // Destructor for SPTree
- SPTree::~SPTree()
- {
- for(unsigned int i = 0; i < no_children; i++) {
- if(children[i] != NULL) delete children[i];
- }
- free(children);
- free(center_of_mass);
- free(buff);
- delete boundary;
- }
- // Update the data underlying this tree
- void SPTree::setData(double* inp_data)
- {
- data = inp_data;
- }
- // Get the parent of the current tree
- SPTree* SPTree::getParent()
- {
- return parent;
- }
- // Insert a point into the SPTree
- bool SPTree::insert(unsigned int new_index)
- {
- // Ignore objects which do not belong in this quad tree
- double* point = data + new_index * dimension;
- if(!boundary->containsPoint(point))
- return false;
-
- // Online update of cumulative size and center-of-mass
- cum_size++;
- double mult1 = (double) (cum_size - 1) / (double) cum_size;
- double mult2 = 1.0 / (double) cum_size;
- for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] *= mult1;
- for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] += mult2 * point[d];
-
- // If there is space in this quad tree and it is a leaf, add the object here
- if(is_leaf && size < QT_NODE_CAPACITY) {
- index[size] = new_index;
- size++;
- return true;
- }
-
- // Don't add duplicates for now (this is not very nice)
- bool any_duplicate = false;
- for(unsigned int n = 0; n < size; n++) {
- bool duplicate = true;
- for(unsigned int d = 0; d < dimension; d++) {
- if(point[d] != data[index[n] * dimension + d]) { duplicate = false; break; }
- }
- any_duplicate = any_duplicate | duplicate;
- }
- if(any_duplicate) return true;
-
- // Otherwise, we need to subdivide the current cell
- if(is_leaf) subdivide();
-
- // Find out where the point can be inserted
- for(unsigned int i = 0; i < no_children; i++) {
- if(children[i]->insert(new_index)) return true;
- }
-
- // Otherwise, the point cannot be inserted (this should never happen)
- return false;
- }
-
- // Create four children which fully divide this cell into four quads of equal area
- void SPTree::subdivide() {
-
- // Create new children
- double* new_corner = (double*) malloc(dimension * sizeof(double));
- double* new_width = (double*) malloc(dimension * sizeof(double));
- for(unsigned int i = 0; i < no_children; i++) {
- unsigned int div = 1;
- for(unsigned int d = 0; d < dimension; d++) {
- new_width[d] = .5 * boundary->getWidth(d);
- if((i / div) % 2 == 1) new_corner[d] = boundary->getCorner(d) - .5 * boundary->getWidth(d);
- else new_corner[d] = boundary->getCorner(d) + .5 * boundary->getWidth(d);
- div *= 2;
- }
- children[i] = new SPTree(this, dimension, data, new_corner, new_width);
- }
- free(new_corner);
- free(new_width);
-
- // Move existing points to correct children
- for(unsigned int i = 0; i < size; i++) {
- bool success = false;
- for(unsigned int j = 0; j < no_children; j++) {
- if(!success) success = children[j]->insert(index[i]);
- }
- index[i] = -1;
- }
-
- // Empty parent node
- size = 0;
- is_leaf = false;
- }
- // Build SPTree on dataset
- void SPTree::fill(unsigned int N)
- {
- for(unsigned int i = 0; i < N; i++) insert(i);
- }
- // Checks whether the specified tree is correct
- bool SPTree::isCorrect()
- {
- for(unsigned int n = 0; n < size; n++) {
- double* point = data + index[n] * dimension;
- if(!boundary->containsPoint(point)) return false;
- }
- if(!is_leaf) {
- bool correct = true;
- for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect();
- return correct;
- }
- else return true;
- }
- // Build a list of all indices in SPTree
- void SPTree::getAllIndices(unsigned int* indices)
- {
- getAllIndices(indices, 0);
- }
- // Build a list of all indices in SPTree
- unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc)
- {
-
- // Gather indices in current quadrant
- for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i];
- loc += size;
-
- // Gather indices in children
- if(!is_leaf) {
- for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc);
- }
- return loc;
- }
- unsigned int SPTree::getDepth() {
- if(is_leaf) return 1;
- int depth = 0;
- for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth());
- return 1 + depth;
- }
- // Compute non-edge forces using Barnes-Hut algorithm
- void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q)
- {
-
- // Make sure that we spend no time on empty nodes or self-interactions
- if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return;
-
- // Compute distance between point and center-of-mass
- double D = .0;
- unsigned int ind = point_index * dimension;
- for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind + d] - center_of_mass[d];
- for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
-
- // Check whether we can use this node as a "summary"
- double max_width = 0.0;
- double cur_width;
- for(unsigned int d = 0; d < dimension; d++) {
- cur_width = boundary->getWidth(d);
- max_width = (max_width > cur_width) ? max_width : cur_width;
- }
- if(is_leaf || max_width / sqrt(D) < theta) {
-
- // Compute and add t-SNE force between point and current node
- D = 1.0 / (1.0 + D);
- double mult = cum_size * D;
- *sum_Q += mult;
- mult *= D;
- for(unsigned int d = 0; d < dimension; d++) neg_f[d] += mult * buff[d];
- }
- else {
- // Recursively apply Barnes-Hut to children
- for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q);
- }
- }
- // Computes edge forces
- void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f)
- {
-
- // Loop over all edges in the graph
- unsigned int ind1 = 0;
- unsigned int ind2 = 0;
- double D;
- for(unsigned int n = 0; n < N; n++) {
- for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) {
-
- // Compute pairwise distance and Q-value
- D = 1.0;
- ind2 = col_P[i] * dimension;
- for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind1 + d] - data[ind2 + d];
- for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
- D = val_P[i] / D;
-
- // Sum positive force
- for(unsigned int d = 0; d < dimension; d++) pos_f[ind1 + d] += D * buff[d];
- }
- ind1 += dimension;
- }
- }
- // Print out tree
- void SPTree::print()
- {
- if(cum_size == 0) {
- printf("Empty node\n");
- return;
- }
- if(is_leaf) {
- printf("Leaf node; data = [");
- for(int i = 0; i < size; i++) {
- double* point = data + index[i] * dimension;
- for(int d = 0; d < dimension; d++) printf("%f, ", point[d]);
- printf(" (index = %d)", index[i]);
- if(i < size - 1) printf("\n");
- else printf("]\n");
- }
- }
- else {
- printf("Intersection node with center-of-mass = [");
- for(int d = 0; d < dimension; d++) printf("%f, ", center_of_mass[d]);
- printf("]; children are:\n");
- for(int i = 0; i < no_children; i++) children[i]->print();
- }
- }
|