sptree.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. #include "pch.h"
  2. /*
  3. *
  4. * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * 3. All advertising materials mentioning features or use of this software
  15. * must display the following acknowledgement:
  16. * This product includes software developed by the Delft University of Technology.
  17. * 4. Neither the name of the Delft University of Technology nor the names of
  18. * its contributors may be used to endorse or promote products derived from
  19. * this software without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
  22. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  24. * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  26. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  27. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  29. * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
  30. * OF SUCH DAMAGE.
  31. *
  32. */
  33. #include <math.h>
  34. #include <float.h>
  35. #include <stdlib.h>
  36. #include <stdio.h>
  37. #include <cmath>
  38. #include "sptree.h"
  39. // Constructs cell
  40. Cell::Cell(unsigned int inp_dimension) {
  41. dimension = inp_dimension;
  42. corner = (double*) malloc(dimension * sizeof(double));
  43. width = (double*) malloc(dimension * sizeof(double));
  44. }
  45. Cell::Cell(unsigned int inp_dimension, double* inp_corner, double* inp_width) {
  46. dimension = inp_dimension;
  47. corner = (double*) malloc(dimension * sizeof(double));
  48. width = (double*) malloc(dimension * sizeof(double));
  49. for(int d = 0; d < dimension; d++) setCorner(d, inp_corner[d]);
  50. for(int d = 0; d < dimension; d++) setWidth( d, inp_width[d]);
  51. }
  52. // Destructs cell
  53. Cell::~Cell() {
  54. free(corner);
  55. free(width);
  56. }
  57. double Cell::getCorner(unsigned int d) {
  58. return corner[d];
  59. }
  60. double Cell::getWidth(unsigned int d) {
  61. return width[d];
  62. }
  63. void Cell::setCorner(unsigned int d, double val) {
  64. corner[d] = val;
  65. }
  66. void Cell::setWidth(unsigned int d, double val) {
  67. width[d] = val;
  68. }
  69. // Checks whether a point lies in a cell
  70. bool Cell::containsPoint(double point[])
  71. {
  72. for(int d = 0; d < dimension; d++) {
  73. if(corner[d] - width[d] > point[d]) return false;
  74. if(corner[d] + width[d] < point[d]) return false;
  75. }
  76. return true;
  77. }
  78. // Default constructor for SPTree -- build tree, too!
  79. SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N)
  80. {
  81. // Compute mean, width, and height of current map (boundaries of SPTree)
  82. int nD = 0;
  83. double* mean_Y = (double*) calloc(D, sizeof(double));
  84. double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX;
  85. double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX;
  86. for(unsigned int n = 0; n < N; n++) {
  87. for(unsigned int d = 0; d < D; d++) {
  88. mean_Y[d] += inp_data[n * D + d];
  89. if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d];
  90. if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d];
  91. }
  92. nD += D;
  93. }
  94. for(int d = 0; d < D; d++) mean_Y[d] /= (double) N;
  95. // Construct SPTree
  96. double* width = (double*) malloc(D * sizeof(double));
  97. for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5;
  98. init(NULL, D, inp_data, mean_Y, width);
  99. fill(N);
  100. // Clean up memory
  101. free(mean_Y);
  102. free(max_Y);
  103. free(min_Y);
  104. free(width);
  105. }
  106. // Constructor for SPTree with particular size and parent -- build the tree, too!
  107. SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
  108. {
  109. init(NULL, D, inp_data, inp_corner, inp_width);
  110. fill(N);
  111. }
  112. // Constructor for SPTree with particular size (do not fill the tree)
  113. SPTree::SPTree(unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
  114. {
  115. init(NULL, D, inp_data, inp_corner, inp_width);
  116. }
  117. // Constructor for SPTree with particular size and parent (do not fill tree)
  118. SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width) {
  119. init(inp_parent, D, inp_data, inp_corner, inp_width);
  120. }
  121. // Constructor for SPTree with particular size and parent -- build the tree, too!
  122. SPTree::SPTree(SPTree* inp_parent, unsigned int D, double* inp_data, unsigned int N, double* inp_corner, double* inp_width)
  123. {
  124. init(inp_parent, D, inp_data, inp_corner, inp_width);
  125. fill(N);
  126. }
  127. // Main initialization function
  128. void SPTree::init(SPTree* inp_parent, unsigned int D, double* inp_data, double* inp_corner, double* inp_width)
  129. {
  130. parent = inp_parent;
  131. dimension = D;
  132. no_children = 2;
  133. for(unsigned int d = 1; d < D; d++) no_children *= 2;
  134. data = inp_data;
  135. is_leaf = true;
  136. size = 0;
  137. cum_size = 0;
  138. boundary = new Cell(dimension);
  139. for(unsigned int d = 0; d < D; d++) boundary->setCorner(d, inp_corner[d]);
  140. for(unsigned int d = 0; d < D; d++) boundary->setWidth( d, inp_width[d]);
  141. children = (SPTree**) malloc(no_children * sizeof(SPTree*));
  142. for(unsigned int i = 0; i < no_children; i++) children[i] = NULL;
  143. center_of_mass = (double*) malloc(D * sizeof(double));
  144. for(unsigned int d = 0; d < D; d++) center_of_mass[d] = .0;
  145. buff = (double*) malloc(D * sizeof(double));
  146. }
  147. // Destructor for SPTree
  148. SPTree::~SPTree()
  149. {
  150. for(unsigned int i = 0; i < no_children; i++) {
  151. if(children[i] != NULL) delete children[i];
  152. }
  153. free(children);
  154. free(center_of_mass);
  155. free(buff);
  156. delete boundary;
  157. }
  158. // Update the data underlying this tree
  159. void SPTree::setData(double* inp_data)
  160. {
  161. data = inp_data;
  162. }
  163. // Get the parent of the current tree
  164. SPTree* SPTree::getParent()
  165. {
  166. return parent;
  167. }
  168. // Insert a point into the SPTree
  169. bool SPTree::insert(unsigned int new_index)
  170. {
  171. // Ignore objects which do not belong in this quad tree
  172. double* point = data + new_index * dimension;
  173. if(!boundary->containsPoint(point))
  174. return false;
  175. // Online update of cumulative size and center-of-mass
  176. cum_size++;
  177. double mult1 = (double) (cum_size - 1) / (double) cum_size;
  178. double mult2 = 1.0 / (double) cum_size;
  179. for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] *= mult1;
  180. for(unsigned int d = 0; d < dimension; d++) center_of_mass[d] += mult2 * point[d];
  181. // If there is space in this quad tree and it is a leaf, add the object here
  182. if(is_leaf && size < QT_NODE_CAPACITY) {
  183. index[size] = new_index;
  184. size++;
  185. return true;
  186. }
  187. // Don't add duplicates for now (this is not very nice)
  188. bool any_duplicate = false;
  189. for(unsigned int n = 0; n < size; n++) {
  190. bool duplicate = true;
  191. for(unsigned int d = 0; d < dimension; d++) {
  192. if(point[d] != data[index[n] * dimension + d]) { duplicate = false; break; }
  193. }
  194. any_duplicate = any_duplicate | duplicate;
  195. }
  196. if(any_duplicate) return true;
  197. // Otherwise, we need to subdivide the current cell
  198. if(is_leaf) subdivide();
  199. // Find out where the point can be inserted
  200. for(unsigned int i = 0; i < no_children; i++) {
  201. if(children[i]->insert(new_index)) return true;
  202. }
  203. // Otherwise, the point cannot be inserted (this should never happen)
  204. return false;
  205. }
  206. // Create four children which fully divide this cell into four quads of equal area
  207. void SPTree::subdivide() {
  208. // Create new children
  209. double* new_corner = (double*) malloc(dimension * sizeof(double));
  210. double* new_width = (double*) malloc(dimension * sizeof(double));
  211. for(unsigned int i = 0; i < no_children; i++) {
  212. unsigned int div = 1;
  213. for(unsigned int d = 0; d < dimension; d++) {
  214. new_width[d] = .5 * boundary->getWidth(d);
  215. if((i / div) % 2 == 1) new_corner[d] = boundary->getCorner(d) - .5 * boundary->getWidth(d);
  216. else new_corner[d] = boundary->getCorner(d) + .5 * boundary->getWidth(d);
  217. div *= 2;
  218. }
  219. children[i] = new SPTree(this, dimension, data, new_corner, new_width);
  220. }
  221. free(new_corner);
  222. free(new_width);
  223. // Move existing points to correct children
  224. for(unsigned int i = 0; i < size; i++) {
  225. bool success = false;
  226. for(unsigned int j = 0; j < no_children; j++) {
  227. if(!success) success = children[j]->insert(index[i]);
  228. }
  229. index[i] = -1;
  230. }
  231. // Empty parent node
  232. size = 0;
  233. is_leaf = false;
  234. }
  235. // Build SPTree on dataset
  236. void SPTree::fill(unsigned int N)
  237. {
  238. for(unsigned int i = 0; i < N; i++) insert(i);
  239. }
  240. // Checks whether the specified tree is correct
  241. bool SPTree::isCorrect()
  242. {
  243. for(unsigned int n = 0; n < size; n++) {
  244. double* point = data + index[n] * dimension;
  245. if(!boundary->containsPoint(point)) return false;
  246. }
  247. if(!is_leaf) {
  248. bool correct = true;
  249. for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect();
  250. return correct;
  251. }
  252. else return true;
  253. }
  254. // Build a list of all indices in SPTree
  255. void SPTree::getAllIndices(unsigned int* indices)
  256. {
  257. getAllIndices(indices, 0);
  258. }
  259. // Build a list of all indices in SPTree
  260. unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc)
  261. {
  262. // Gather indices in current quadrant
  263. for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i];
  264. loc += size;
  265. // Gather indices in children
  266. if(!is_leaf) {
  267. for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc);
  268. }
  269. return loc;
  270. }
  271. unsigned int SPTree::getDepth() {
  272. if(is_leaf) return 1;
  273. int depth = 0;
  274. for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth());
  275. return 1 + depth;
  276. }
  277. // Compute non-edge forces using Barnes-Hut algorithm
  278. void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q)
  279. {
  280. // Make sure that we spend no time on empty nodes or self-interactions
  281. if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return;
  282. // Compute distance between point and center-of-mass
  283. double D = .0;
  284. unsigned int ind = point_index * dimension;
  285. for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind + d] - center_of_mass[d];
  286. for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
  287. // Check whether we can use this node as a "summary"
  288. double max_width = 0.0;
  289. double cur_width;
  290. for(unsigned int d = 0; d < dimension; d++) {
  291. cur_width = boundary->getWidth(d);
  292. max_width = (max_width > cur_width) ? max_width : cur_width;
  293. }
  294. if(is_leaf || max_width / sqrt(D) < theta) {
  295. // Compute and add t-SNE force between point and current node
  296. D = 1.0 / (1.0 + D);
  297. double mult = cum_size * D;
  298. *sum_Q += mult;
  299. mult *= D;
  300. for(unsigned int d = 0; d < dimension; d++) neg_f[d] += mult * buff[d];
  301. }
  302. else {
  303. // Recursively apply Barnes-Hut to children
  304. for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q);
  305. }
  306. }
  307. // Computes edge forces
  308. void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f)
  309. {
  310. // Loop over all edges in the graph
  311. unsigned int ind1 = 0;
  312. unsigned int ind2 = 0;
  313. double D;
  314. for(unsigned int n = 0; n < N; n++) {
  315. for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) {
  316. // Compute pairwise distance and Q-value
  317. D = 1.0;
  318. ind2 = col_P[i] * dimension;
  319. for(unsigned int d = 0; d < dimension; d++) buff[d] = data[ind1 + d] - data[ind2 + d];
  320. for(unsigned int d = 0; d < dimension; d++) D += buff[d] * buff[d];
  321. D = val_P[i] / D;
  322. // Sum positive force
  323. for(unsigned int d = 0; d < dimension; d++) pos_f[ind1 + d] += D * buff[d];
  324. }
  325. ind1 += dimension;
  326. }
  327. }
  328. // Print out tree
  329. void SPTree::print()
  330. {
  331. if(cum_size == 0) {
  332. printf("Empty node\n");
  333. return;
  334. }
  335. if(is_leaf) {
  336. printf("Leaf node; data = [");
  337. for(int i = 0; i < size; i++) {
  338. double* point = data + index[i] * dimension;
  339. for(int d = 0; d < dimension; d++) printf("%f, ", point[d]);
  340. printf(" (index = %d)", index[i]);
  341. if(i < size - 1) printf("\n");
  342. else printf("]\n");
  343. }
  344. }
  345. else {
  346. printf("Intersection node with center-of-mass = [");
  347. for(int d = 0; d < dimension; d++) printf("%f, ", center_of_mass[d]);
  348. printf("]; children are:\n");
  349. for(int i = 0; i < no_children; i++) children[i]->print();
  350. }
  351. }