SHOGUN  v3.0.0
orderTree.h
Go to the documentation of this file.
1 /* This program is free software: you can redistribute it and/or modify
2  * it under the terms of the GNU General Public License as published by
3  * the Free Software Foundation, either version 3 of the License, or
4  * (at your option) any later version.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program. If not, see <http://www.gnu.org/licenses/>.
13  *
14  * Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
15  */
16
17 #ifndef ORDERTREE_SLEP
18 #define ORDERTREE_SLEP
19
20 #define IGNORE_IN_CLASSLIST
21
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <time.h>
25 #include <math.h>
26
27
28 /*
29  * In this file, we propose an O(n^2) algorithm for solving the problem:
30  *
31  * min 1/2 \|x - u\|^2
32  * s.t. x_i \ge x_j \ge 0, (i,j) \in I,
33  *
34  * where I is the edge set of the tree
35  *
36  *
37  */
38
39 /*
40  * Last updated on January, 21, 2011
41  *
42  * 1) the function merge is a non-recursive process for merging one tree with the other
43  *
44  * 2) we follow the writeup to revise the function computeMaximalMean
45  *
46  */
47
48 #ifndef DOXYGEN_SHOULD_SKIP_THIS
49 IGNORE_IN_CLASSLIST struct NodeNum
50 {
51  int node_num;
52  struct NodeNum *next;
53 };
54
55 IGNORE_IN_CLASSLIST struct ChildrenNum
56 {
57  int children_num;
58  int *children;
59 };
60
61 IGNORE_IN_CLASSLIST struct Node
62 {
63  int flag; /*if the maximal root-tree of the subtree rooted at this node has been computed, flag=1, otherwise 0*/
64  double m; /*During the computation, it stores the maximal mean from this node to (grandson) child node
65  *The number of nodes on this path is stored in num
66  *
67  *It is intialized with the value of u(node_num)
68  */
69  int num; /*the number of nodes, whose avarage gives the maximal mean---x*/
70  struct Node *brother; /*the pointer to the brother node(s)*/
71  struct Node *child; /*the pointer to the child node(s)*/
72  struct NodeNum *firstNode; /*the first node in the "maximal mean" group*/
73  struct NodeNum *lastNode; /*the last node in the "maximal mean" group*/
74 };
75 #endif
76
77 /*
78  * We build a tree with the input from a file
79  *
80  * The file has n rows represented in the following format
81  *
82  | parent | number of children | children
83  18 3 10 13 17
84  10 3 5 8 9
85  13 2 11 12
86  17 3 13 14 15
87  5 2 1 4
88  8 2 6 7
89  9 0
90  11 0
91  12 0
92  14 0
93  15 0
94  16 0
95  1 0
96  4 2 2 3
97  6 0
98  7 0
99  2 0
100  3 0
101  *
102  *
103  * Each row provides the information of one parent node and its children
104  *
105  * If a parent node is not included in any row, it is regarded that it is the leaf node.
106  * For example, it is valid that the rows with zero children can be deleted.
107  *
108  * Node number is numbered from 1 to n, where n denotes the number of nodes.
109  *
110  * In the program, we deduct the number by 1, as C starts from 0, instead of 1.
111  *
112  */
113
114 void readFromFile(char * FileName, struct ChildrenNum ** TreeInfo, int n){
115  FILE *fp;
116  struct ChildrenNum * treeInfo;
117  int i, j, num, nodeId;
118
119
120  fp=fopen(FileName, "r");
121
122  if(!fp){
123  printf("\n\n Fatal Error!!!");
124  printf("\n\n Failure in reading the file:%s!", FileName);
125  printf("\n\n The program does not check the correctness of the tree provided in the file: %s!", FileName);
126  return;
127  }
128
129  treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);
130
131  if(!treeInfo){
132  printf("\n Allocation of treeInfo failure!");
133  return;
134  }
135
136  for(i=0;i<n;i++){
137  treeInfo[i].children_num=0;
138  treeInfo[i].children=NULL;
139  }
140
141
142  while (!feof(fp)) {
143
144  i=-1;num=-1;
145  if ( fscanf(fp, "%d %d", &i, &num)!=2){
146
147  /*if this is due to extra spaces/breaks etc., we terminate reading the file */
148  if(feof(fp))
149  break;
150
151  printf("\n For each row, it should has at least two numbers: nodeNum and number of children");
152  return;
153  }
154
155  if (i>n || i<1){
156  printf("\n The node number should be between [1, %d]!",n);
157  return;
158  }
159
160  i=i-1;
161  /*i=i-1, as C starts from 0 instead of 1*/
162  if (num>0){
163  treeInfo[i].children_num=num;
164
165  treeInfo[i].children=(int *)malloc(sizeof(int)*num);
166
167  if(!treeInfo[i].children){
168  printf("\n Allocation of treeInfo failure!");
169  return;
170  }
171
172  for(j=0;j<num;j++){
173  if(!fscanf(fp, "%d", &nodeId) ){
174  printf("\n This row should have %d children nodes!", num);
175  return;
176  }
177  else{
178  if (nodeId>n || nodeId<1){
179  printf("\n The node number should be between [1, %d]!", n);
180  return;
181  }
182
183  treeInfo[i].children[j]=nodeId-1;
184  /*add -1, as C starts from 0 instead of 1*/
185  }
186
187  }
188  }
189  }
190
191  fclose(fp);
192
193  /*
194  printf("\n In readFromFile!");
195  for(i=0;i<n;i++){
196  printf("\n %d: %d:",i, treeInfo[i].children_num);
197
198  for(j=0;j<treeInfo[i].children_num;j++)
199  printf(" %d", treeInfo[i].children[j]);
200  }
201  printf("\n Out of readFromFile!");
202  */
203
204
205  *TreeInfo=treeInfo;/*set value for TreeInfo*/
206 }
207
208
209 /*
210  *
211  * We build the tree in a recursive manner
212  *
213  */
214 void buildTree(struct Node* root, struct ChildrenNum * treeInfo, double *u){
215
216
217  struct Node * newNode;
218  struct NodeNum * currentNode;
219  int currentRoot=root->firstNode->node_num;
220  int numberOfChildren=treeInfo[currentRoot].children_num;
221  int i;
222
223  /* insert the children nodes of the current root
224  */
225  for(i=0;i<numberOfChildren;i++){
226
227
228  newNode=(struct Node *)malloc(sizeof(struct Node));
229  currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
230
231  if(!newNode){
232  printf("\n Allocation in buildTree failure!");
233  return;
234  }
235
236  if(!currentNode){
237  printf("\n Allocation in buildTree failure!");
238  return;
239  }
240
241
242  newNode->flag=0;
243  newNode->m=u[treeInfo[currentRoot].children[i]];
244  newNode->num=1;
245  newNode->child=NULL;
246
247  currentNode->node_num=treeInfo[currentRoot].children[i];
248  currentNode->next=NULL;
249  newNode->firstNode=newNode->lastNode=currentNode;
250
251  /*
252  * insert newnode to be the children nodes of root
253  *
254  */
255  newNode->brother=root->child;
256  root->child=newNode;
257
258  /*
259  * treat newNode as the root, and add its children
260  *
261  */
262
263  buildTree(newNode, treeInfo, u);
264  }
265 }
266
267 /*
268  * initilize the root, which means that the tree is built by this function.
269  * as the root is the starting point of a tree
270  *
271  * we use the input file for building the tree
272  */
273
274 void initializeRoot(struct Node ** Root, char * FileName, double *u, int rootNum, int n){
275
276  struct NodeNum * currentNode;
277  struct Node *root;
278  struct ChildrenNum * treeInfo;
279  int i;
280
281  /*read the from the file to construct treeInfo*/
282  readFromFile(FileName, &treeInfo, n);
283
284  if(rootNum>n || rootNum <1){
285  printf("\n The node number of the root should be between [1, %d]!", n);
286  return;
287  }
288
289  rootNum=rootNum-1;
290  /*add -1, as C starts from 0 instead of 1*/
291
292  root=(struct Node *)malloc(sizeof(struct Node));
293  currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
294
295  if(!root){
296  printf("\n Allocation in computeGroups failure!");
297  return;
298  }
299
300  if(!currentNode){
301  printf("\n Allocation in computeGroups failure!");
302  return;
303  }
304
305
306  root->flag=0;
307  root->m=u[rootNum];
308  root->num=1;
309  root->brother=root->child=NULL;
310
311  currentNode->node_num=rootNum;
312  currentNode->next=NULL;
313  root->firstNode=root->lastNode=currentNode;
314
315  /*build the tree using buildTree*/
316  buildTree(root, treeInfo, u);
317
318  /*free treeInfo*/
319  for(i=0;i<n;i++){
320  if (treeInfo[i].children_num)
321  free(treeInfo[i].children);
322  }
323  free(treeInfo);
324
325  *Root=root;
326 }
327
328
329
330 /*
331  * initilize the root for the full binary tree
332  *
333  * We do not need to give the input file, as binary tree is very special
334  */
335
336 void initializeRootBinary(struct Node ** Root, double *u, int n){
337
338  struct NodeNum * currentNode;
339  struct Node *root;
340  struct ChildrenNum * treeInfo;
341  int rootNum=1;
342  int i, half=n/2;
343
344  /*
345  *
346  * readFromFile(FileName, &treeInfo, n);
347  *
348  * Replace the above function.
349  *
350  * we build treeInfo here directly using the special structure
351  *
352  */
353
354  treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);
355  if(!treeInfo){
356  printf("\n Allocation of treeInfo failure!");
357  return;
358  }
359
360  for(i=0;i<half;i++){
361  treeInfo[i].children_num=2;
362  treeInfo[i].children=(int *)malloc(sizeof(int)*2);
363  treeInfo[i].children[0]=2*i+1;
364  treeInfo[i].children[1]=2*i+2;
365  }
366
367  for(i=half;i<n;i++){
368  treeInfo[i].children_num=0;
369  treeInfo[i].children=NULL;
370  }
371
372
373  rootNum=rootNum-1;
374  /*add -1, as C starts from 0 instead of 1*/
375
376  root=(struct Node *)malloc(sizeof(struct Node));
377  currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
378
379  if(!root){
380  printf("\n Allocation in computeGroups failure!");
381  return;
382  }
383
384  if(!currentNode){
385  printf("\n Allocation in computeGroups failure!");
386  return;
387  }
388
389
390  root->flag=0;
391  root->m=u[rootNum];
392  root->num=1;
393  root->brother=root->child=NULL;
394
395  currentNode->node_num=rootNum;
396  currentNode->next=NULL;
397  root->firstNode=root->lastNode=currentNode;
398
399  /*build the tree using buildTree*/
400  buildTree(root, treeInfo, u);
401
402  /*free treeInfo*/
403  for(i=0;i<half;i++){
404  free(treeInfo[i].children);
405  }
406  free(treeInfo);
407
408  *Root=root;
409 }
410
411
412 /*
413  * initilize the root for the full binary tree
414  *
415  * We do not need to give the input file, as tree of depth 1 is very special
416  */
417
418 void initializeRootDepth1(struct Node ** Root, double *u, int n){
419
420  struct NodeNum * currentNode;
421  struct Node *root;
422  struct ChildrenNum * treeInfo;
423  int rootNum=1;
424  int i;
425
426  /*
427  * readFromFile(FileName, &treeInfo, n);
428  *
429  * we build treeInfo here, using the special structure of the tree with depth 1
430  *
431  */
432
433  treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);
434  if(!treeInfo){
435  printf("\n Allocation of treeInfo failure!");
436  return;
437  }
438
439  for(i=0;i<n;i++){
440  treeInfo[i].children_num=0;
441  treeInfo[i].children=NULL;
442  }
443
444  /*process the root*/
445  if (n>1){
446  treeInfo[0].children_num=n-1;
447  treeInfo[0].children=(int *)malloc(sizeof(int)*(n-1));
448  for(i=1;i<n;i++)
449  treeInfo[0].children[i-1]=i;
450  }
451
452  rootNum=rootNum-1;
453  /*add -1, as C starts from 0 instead of 1*/
454
455  root=(struct Node *)malloc(sizeof(struct Node));
456  currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
457
458  if(!root){
459  printf("\n Allocation in computeGroups failure!");
460  return;
461  }
462
463  if(!currentNode){
464  printf("\n Allocation in computeGroups failure!");
465  return;
466  }
467
468
469  root->flag=0;
470  root->m=u[rootNum];
471  root->num=1;
472  root->brother=root->child=NULL;
473
474  currentNode->node_num=rootNum;
475  currentNode->next=NULL;
476  root->firstNode=root->lastNode=currentNode;
477
478  /*build the tree using buildTree*/
479  buildTree(root, treeInfo, u);
480
481  /*free treeInfo*/
482  if(n>1)
483  free(treeInfo[0].children);
484  free(treeInfo);
485
486  *Root=root;
487 }
488
489
490
491 /*
492  * merge root with maxNode
493  */
494 void merge(struct Node * root, struct Node * maxNode ){
495  struct Node * childrenNode, *maxNodeChild;
496
497  root->m= (root->m* root->num + maxNode->m *maxNode->num)/(root->num + maxNode->num);
498  root->num+=maxNode->num;
499  root->lastNode->next=maxNode->firstNode;
500  root->lastNode=maxNode->lastNode;
501
502  /*
503  * update the brother list of maxNode (when removing maxNode)
504  *
505  */
506  if (root->child==maxNode){
507  root->child=maxNode->brother;
508  }
509  else{
510  childrenNode=root->child;
511
512  while(childrenNode->brother!=maxNode){
513  childrenNode=childrenNode->brother;
514  }
515  /*childrenNode's brother is maxNode*/
516  childrenNode->brother=maxNode->brother;
517  }
518
519
520  /*
521  * change the children of maxNode to the children of root
522  */
523  maxNodeChild=maxNode->child;
524  if (maxNodeChild){
525  /*if maxNode has at least a child*/
526
527  while(maxNodeChild->brother)
528  maxNodeChild=maxNodeChild->brother;
529  /*maxNodeChild points to the last child of maxNode*/
530
531  maxNodeChild->brother=root->child;
532  root->child=maxNode->child;
533  }
534
535  /*
536  * remove maxNode from the children list of root
537  */
538  free(maxNode);
539
540 }
541
542
543
544 /*
545  * compute the maximal mean for each node
546  *
547  */
548
549 void computeMaximalMean(struct Node * root){
550  struct Node * childrenNode, *maxNode;
551  double mean;
552
553  /*if root already points to a leaf node, we do nothing*/
554  if(!root->child){
555
556  /*the value of a maximal root-tree is non-negative*/
557  if (root->m <0)
558  root->m =0;
559
560  root->flag=1;
561  return;
562  }
563
564  /*the following loop corresponds to line 5-20 of the algorithm*/
565  while(1){
566
567  childrenNode=root->child;
568  if(!childrenNode){
569
570  if (root->m <0)
571  root->m =0;
572
573  root->flag=1;
574  return;
575  }
576
577  /*we note that, childrenNode->m >=0*/
578
579  mean=0;
580
581  /*visit all its children nodes, to get the maximal "mean" and corresponding maxNode*/
582  while(childrenNode){
583
584  /*if the maximal root-tree at childrenNode is not computed, we compute it*/
585  if (!childrenNode->flag)
586  computeMaximalMean(childrenNode);
587
588  if (childrenNode->m >= mean){
589  mean=childrenNode->m;
590  maxNode=childrenNode;
591  }
592
593  childrenNode=childrenNode->brother;
594  }
595
596  if ( (root->m <= 0) && (mean==0) ){
597  /* merge root with all its children, in this case,
598  * its children is a super-node
599  * (thus does not has any other children, due to merge)*/
600
601  childrenNode=root->child;
602  while(childrenNode){
603  merge(root, childrenNode);
604  childrenNode=root->child;
605  }
606
607  root->m =0;
608  root->flag=1;
609  return;
610  }
611
612  if (root->m > mean){
613
614  root->flag=1;
615  return;
616  }
617
618  merge(root,maxNode);
619  }
620
621 }
622
623
624
625 /*
626  * compute the maximal mean for each node, without the non-negative constraint
627  *
628  * Composed on November 23, 2011.
629  *
630  */
631
632 void computeMaximalMean_without_nonnegative(struct Node * root){
633  struct Node * childrenNode, *maxNode;
634  double mean;
635  int mean_flag;
636
637  /*if root already points to a leaf node, we do nothing*/
638  if(!root->child){
639
640  /*the value of a maximal root-tree is not necessarily non-negative, when the non-negative constraint is not imposed*/
641
642  /*
643  The following is removed
644  if (root->m <0)
645  root->m =0;
646  */
647
648
649  root->flag=1;
650  return;
651  }
652
653  /*the following loop corresponds to line 5-20 of the algorithm */
654  while(1){
655
656  childrenNode=root->child;
657  if(!childrenNode){
658
659  /*the value of a maximal root-tree is not necessarily non-negative, when the non-negative constraint is not imposed*/
660
661  /*
662  The following is removed
663
664  if (root->m <0)
665  root->m =0;
666  */
667
668  root->flag=1;
669  return;
670  }
671
672  /*we note that, childrenNode->m >=0 does not necesarily hold.
673  Therefore, for mean, we need to initialize with a small value. We initialize it with the value of its first child node
674  */
675
676  mean_flag=0; /*0 denotes that "mean" has not been really specified*/
677
678  /*visit all its children nodes, to get the maximal "mean" and corresponding maxNode*/
679  while(childrenNode){
680
681  /*if the maximal root-tree at childrenNode is not computed, we compute it*/
682  if (!childrenNode->flag)
684
685  /*if mean has not been specified, let us specify it, and set mean_flag to 1*/
686  if (!mean_flag){
687  mean=childrenNode->m;
688  mean_flag=1;
689  }
690
691  if (childrenNode->m >= mean){
692  mean=childrenNode->m;
693  maxNode=childrenNode;
694  }
695
696  childrenNode=childrenNode->brother;
697  }
698
699  if (root->m > mean){
700
701  root->flag=1;
702  return;
703  }
704
705  merge(root,maxNode);
706  }
707
708 }
709
710
711 /*
712  * computeSolution
713  *
714  */
715
716
717 void computeSolution(double *x, struct Node *root){
718  struct Node * child;
719  struct NodeNum *currentNode;
720  double mean;
721
722  if (root){
723  /*
724  * process the root
725  *
726  * set the value for x
727  */
728
729  mean=root->m;
730
731  currentNode=root->firstNode;
732  while(currentNode){
733  x[currentNode->node_num]=mean;
734  currentNode=currentNode->next;
735  }
736
737  /*process the children of root*/
738  child=root->child;
739  while(child){
740  computeSolution(x, child);
741
742  child=child->brother;
743  }
744  }
745 }
746
747 /*
748  * traverse the tree
749  * used for debugging the correctness of the code
750  */
751
752 void traversalTree(struct Node *root){
753  struct Node * child;
754  struct NodeNum *currentNode;
755
756  if (root){
757  printf("\n\n root->m =%2.5f, num:%d \n Nodes:",root->m,root->num);
758
759  currentNode=root->firstNode;
760  while(currentNode){
761  printf(" %d ", currentNode->node_num);
762  currentNode=currentNode->next;
763  }
764
765  printf("\n root: %d, child:", root->m);
766
767  /*print out the children of root*/
768  child=root->child;
769  while(child){
770  printf(" %d", child->m);
771  child=child->brother;
772  }
773
774  /*print out the children of children*/
775  child=root->child;
776  while(child){
777  traversalTree(child);
778
779  child=child->brother;
780  }
781  }
782 }
783
784
785
786
787
788 /*
789  * free the dynamic space generated by alloc
790  */
791
792 void deleteTree(struct Node *root){
793  struct Node *child, *temp;
794  struct NodeNum *currentNode;
795
796  if (root){
797
798  child=root->child;
799
800  while(child){
801
802  temp=child->brother;
803  /*point to its brother*/
804
805  deleteTree(child);
806  /*free its chlidren*/
807
808  child=temp;
809  }
810
811  /*
812  * free root
813  *
814  * 1. free NodeNum pointed by firstNode and lastNode
815  * 2. free Node
816  */
817  currentNode=root->firstNode;
818  while(currentNode){
819  root->firstNode=currentNode->next;
820  free(currentNode);
821
822  currentNode=root->firstNode;
823  }
824  root->lastNode=NULL;
825  free(root);
826  }
827 }
828
829 /*
830  * This is the main function for the general tree
831  *
832  */
833
834 void orderTree(double *x, char * FileName, double *u, int rootNum, int n){
835  struct Node * root;
836
837  /*
838  * build the tree using initializeRoot
839  */
840  initializeRoot(&root, FileName, u, rootNum, n);
841
842  /*
843  printf("\n\n Before computation");
844  traversalTree(root);
845  */
846
847
848  /*
849  * compute the maximal average for each node
850  */
851
852  computeMaximalMean(root);
853
854
855  /*compute the solution from the tree*/
856
857  computeSolution(x, root);
858
859
860  /*
861  printf("\n\n After computation");
862  traversalTree(root);
863  */
864
865
866  /* delete the tree
867  */
868  deleteTree(root);
869 }
870
871
872 /*
873  * This is the main function for the general tree, without the non-negative constraint
874  *
875  */
876
877 void orderTree_without_nonnegative(double *x, char * FileName, double *u, int rootNum, int n){
878  struct Node * root;
879
880  /*
881  * build the tree using initializeRoot
882  */
883  initializeRoot(&root, FileName, u, rootNum, n);
884
885  /*
886  printf("\n\n Before computation");
887  traversalTree(root);
888  */
889
890
891  /*
892  * compute the maximal average for each node
893  */
894
896
897
898  /*compute the solution from the tree*/
899
900  computeSolution(x, root);
901
902
903  /*
904  printf("\n\n After computation");
905  traversalTree(root);
906  */
907
908
909  /* delete the tree
910  */
911  deleteTree(root);
912 }
913
914
915
916 /*
917  * This is the main function for the full binary tree
918  *
919  */
920
921 void orderTreeBinary(double *x, double *u, int n){
922  struct Node * root;
923
924  /*
925  * build the tree using initializeRootBinary for the binary tree
926  *
927  * please make sure that n=2^{depth +1} -1
928  *
929  */
930
931  initializeRootBinary(&root, u, n);
932
933  /*
934  printf("\n\n Before computation");
935  traversalTree(root);
936  */
937
938
939  /*
940  * compute the maximal average for each node
941  */
942
943  computeMaximalMean(root);
944
945
946  /*compute the solution from the tree*/
947
948  computeSolution(x, root);
949
950
951  /*
952  printf("\n\n After computation");
953  traversalTree(root);
954  */
955
956
957  /* delete the tree
958  */
959  deleteTree(root);
960 }
961
962
963 /*
964  * This is the main function for the tree with depth 1
965  *
966  */
967
968 void orderTreeDepth1(double *x, double *u, int n){
969  struct Node * root;
970
971  /*
972  * build the tree using initializeRootDepth1 for the tree with depth 1
973  *
974  */
975
976  initializeRootDepth1(&root, u, n);
977
978  /*
979  printf("\n\n Before computation");
980  traversalTree(root);
981  */
982
983
984  /*
985  * compute the maximal average for each node
986  */
987
988  computeMaximalMean(root);
989
990
991  /*compute the solution from the tree*/
992
993  computeSolution(x, root);
994
995
996  /*
997  printf("\n\n After computation");
998  traversalTree(root);
999  */
1000
1001
1002  /* delete the tree
1003  */
1004  deleteTree(root);
1005 }
1006 #endif /* ----- #ifndef ORDERTREE_SLEP ----- */

SHOGUN Machine Learning Toolbox - Documentation