Point Cloud Library (PCL)  1.14.0-dev
decision_tree_trainer.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2010-2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #pragma once
39 
40 #include <pcl/common/common.h>
41 #include <pcl/ml/dt/decision_tree.h>
42 #include <pcl/ml/dt/decision_tree_data_provider.h>
43 #include <pcl/ml/feature_handler.h>
44 #include <pcl/ml/stats_estimator.h>
45 
46 #include <vector>
47 
48 namespace pcl {
49 
50 /** Trainer for decision trees. */
51 template <class FeatureType,
52  class DataSet,
53  class LabelType,
54  class ExampleIndex,
55  class NodeType>
57 
58 public:
59  /** Constructor. */
61 
62  /** Destructor. */
64 
65  /** Sets the feature handler used to create and evaluate features.
66  *
67  * \param[in] feature_handler the feature handler
68  */
69  inline void
72  {
73  feature_handler_ = &feature_handler;
74  }
75 
76  /** Sets the object for estimating the statistics for tree nodes.
77  *
78  * \param[in] stats_estimator the statistics estimator
79  */
80  inline void
83  {
84  stats_estimator_ = &stats_estimator;
85  }
86 
87  /** Sets the maximum depth of the learned tree.
88  *
89  * \param[in] max_tree_depth maximum depth of the learned tree
90  */
91  inline void
92  setMaxTreeDepth(const std::size_t max_tree_depth)
93  {
94  max_tree_depth_ = max_tree_depth;
95  }
96 
97  /** Sets the number of features used to find optimal decision features.
98  *
99  * \param[in] num_of_features the number of features
100  */
101  inline void
102  setNumOfFeatures(const std::size_t num_of_features)
103  {
104  num_of_features_ = num_of_features;
105  }
106 
107  /** Sets the number of thresholds tested for finding the optimal decision
108  * threshold on the feature responses.
109  *
110  * \param[in] num_of_threshold the number of thresholds
111  */
112  inline void
113  setNumOfThresholds(const std::size_t num_of_threshold)
114  {
115  num_of_thresholds_ = num_of_threshold;
116  }
117 
118  /** Sets the input data set used for training.
119  *
120  * \param[in] data_set the data set used for training
121  */
122  inline void
123  setTrainingDataSet(DataSet& data_set)
124  {
125  data_set_ = data_set;
126  }
127 
128  /** Example indices that specify the data used for training.
129  *
130  * \param[in] examples the examples
131  */
132  inline void
133  setExamples(std::vector<ExampleIndex>& examples)
134  {
135  examples_ = examples;
136  }
137 
138  /** Sets the label data corresponding to the example data.
139  *
140  * \param[in] label_data the label data
141  */
142  inline void
143  setLabelData(std::vector<LabelType>& label_data)
144  {
145  label_data_ = label_data;
146  }
147 
148  /** Sets the minimum number of examples to continue growing a tree.
149  *
150  * \param[in] n number of examples
151  */
152  inline void
153  setMinExamplesForSplit(std::size_t n)
154  {
155  min_examples_for_split_ = n;
156  }
157 
158  /** Specify the thresholds to be used when evaluating features.
159  *
160  * \param[in] thres the threshold values
161  */
162  void
163  setThresholds(std::vector<float>& thres)
164  {
165  thresholds_ = thres;
166  }
167 
168  /** Specify the data provider.
169  *
170  * \param[in] dtdp the data provider that should implement getDatasetAndLabels()
171  * function
172  */
173  void
175  typename pcl::DecisionTreeTrainerDataProvider<FeatureType,
176  DataSet,
177  LabelType,
178  ExampleIndex,
179  NodeType>::Ptr& dtdp)
180  {
181  decision_tree_trainer_data_provider_ = dtdp;
182  }
183 
184  /** Specify if the features are randomly generated at each split node.
185  *
186  * \param[in] b do it or not
187  */
188  void
190  {
191  random_features_at_split_node_ = b;
192  }
193 
194  /** Trains a decision tree using the set training data and settings.
195  *
196  * \param[out] tree destination for the trained tree
197  */
198  void
199  train(DecisionTree<NodeType>& tree);
200 
201 protected:
202  /** Trains a decision tree node from the specified features, label data, and
203  * examples.
204  *
205  * \param[in] features the feature pool used for training
206  * \param[in] examples the examples used for training
207  * \param[in] label_data the label data corresponding to the examples
208  * \param[in] max_depth the maximum depth of the remaining tree
209  * \param[out] node the resulting node
210  */
211  void
212  trainDecisionTreeNode(std::vector<FeatureType>& features,
213  std::vector<ExampleIndex>& examples,
214  std::vector<LabelType>& label_data,
215  std::size_t max_depth,
216  NodeType& node);
217 
218  /** Creates uniformly distributed thresholds over the range of the supplied
219  * values.
220  *
221  * \param[in] num_of_thresholds the number of thresholds to create
222  * \param[in] values the values for estimating the expected value range
223  * \param[out] thresholds the resulting thresholds
224  */
225  static void
226  createThresholdsUniform(const std::size_t num_of_thresholds,
227  std::vector<float>& values,
228  std::vector<float>& thresholds);
229 
230 private:
231  /** Maximum depth of the learned tree. */
232  std::size_t max_tree_depth_{15};
233  /** Number of features used to find optimal decision features. */
234  std::size_t num_of_features_{1000};
235  /** Number of thresholds. */
236  std::size_t num_of_thresholds_{10};
237 
238  /** FeatureHandler instance, responsible for creating and evaluating features. */
240  /** StatsEstimator instance, responsible for gathering stats about a node. */
242  nullptr};
243 
244  /** The training data set. */
245  DataSet data_set_{};
246  /** The label data. */
247  std::vector<LabelType> label_data_{};
248  /** The example data. */
249  std::vector<ExampleIndex> examples_{};
250 
251  /** Minimum number of examples to split a node. */
252  std::size_t min_examples_for_split_{0u};
253  /** Thresholds to be used instead of generating uniform distributed thresholds. */
254  std::vector<float> thresholds_{};
255  /** The data provider which is called before training a specific tree, if pointer is
256  * NULL, then data_set_ is used. */
257  typename pcl::DecisionTreeTrainerDataProvider<FeatureType,
258  DataSet,
259  LabelType,
260  ExampleIndex,
261  NodeType>::Ptr
262  decision_tree_trainer_data_provider_{nullptr};
263  /** If true, random features are generated at each node, otherwise, at start of
264  * training the tree */
265  bool random_features_at_split_node_{false};
266 };
267 
268 } // namespace pcl
269 
270 #include <pcl/ml/impl/dt/decision_tree_trainer.hpp>
Class representing a decision tree.
Definition: decision_tree.h:49
Trainer for decision trees.
void setRandomFeaturesAtSplitNode(bool b)
Specify if the features are randomly generated at each split node.
void setStatsEstimator(pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex > &stats_estimator)
Sets the object for estimating the statistics for tree nodes.
void setDecisionTreeDataProvider(typename pcl::DecisionTreeTrainerDataProvider< FeatureType, DataSet, LabelType, ExampleIndex, NodeType >::Ptr &dtdp)
Specify the data provider.
void setMaxTreeDepth(const std::size_t max_tree_depth)
Sets the maximum depth of the learned tree.
void setNumOfThresholds(const std::size_t num_of_threshold)
Sets the number of thresholds tested for finding the optimal decision threshold on the feature respon...
void setFeatureHandler(pcl::FeatureHandler< FeatureType, DataSet, ExampleIndex > &feature_handler)
Sets the feature handler used to create and evaluate features.
void setTrainingDataSet(DataSet &data_set)
Sets the input data set used for training.
void setMinExamplesForSplit(std::size_t n)
Sets the minimum number of examples to continue growing a tree.
virtual ~DecisionTreeTrainer()
Destructor.
void setThresholds(std::vector< float > &thres)
Specify the thresholds to be used when evaluating features.
void setLabelData(std::vector< LabelType > &label_data)
Sets the label data corresponding to the example data.
void setExamples(std::vector< ExampleIndex > &examples)
Example indices that specify the data used for training.
void setNumOfFeatures(const std::size_t num_of_features)
Sets the number of features used to find optimal decision features.
DecisionTreeTrainer()
Constructor.
Utility class interface which is used for creating and evaluating features.
Define standard C methods and C++ classes that are common to all methods.
#define PCL_EXPORTS
Definition: pcl_macros.h:323