11 #include <boost/array.hpp>
24 class Histogram :
public boost::array<std::uint32_t,NUMLABELS> {
26 inline Histogram() { std::fill(begin(), end(), 0); }
76 std::copy(
B.attribs,
B.attribs + NUMATTRIBS,
attribs );
85 std::uint64_t Ntotal = 0;
86 for(
int li=0;li<NUMLABELS;++li) Ntotal += std::uint64_t(h[li]);
96 for(
int li=0;li<NUMLABELS;++li) {
98 double p = double(h[li]) / Ntotal;
114 for(
int li=0;li<NUMLABELS;++li) {
115 std::uint64_t Ni = std::uint64_t(htrue[li]) + std::uint64_t(hfalse[li]);
117 double p = double(Ni) / Ntotal;
134 double Ntotal = Ntrue + Nfalse;
137 if( Ntotal == 0 )
return 0.;
138 return e0 - (Ntrue/Ntotal)*etrue - (Nfalse/Ntotal)*efalse;
145 for(
int li=0;li<NUMLABELS;++li) os<< h[li]<<
" ";
151 for(
int li=0;li<NUMLABELS;++li) is >> h[li];
174 os.write( (
const char*)&lfs[0],
sizeof(
LabeledFeature)*lfs.size() );
204 std::ofstream fout(filename.c_str() );
205 if( !fout.is_open() )
throw std::runtime_error(std::string(
"(E) could not open ") + filename );
207 fout<<int(attribId)<<
" "<<int(threshold)<<
"\n";
217 std::ifstream fin(filename.c_str() );
218 if( !fin.is_open() )
throw std::runtime_error(std::string(
"(E) could not open") + filename );
220 fin>>attribId >>threshold>>gain>>HP;
221 if( fin.fail() )
throw std::runtime_error(std::string(
"(E) malformed splitInfo file ") + filename );
void writeInfoFile(const std::string &filename, int attribId, Attrib threshold, double gain, const HistogramPair &HP)
static double entropy(const Histogram &h)
This is cool.
static std::uint64_t numElements(const Histogram &h)
std::istream & operator>>(std::istream &is, AttribLocation &aloc)
static double informationGain(const HistogramPair &hp)
This will compute the gain in information resulting from the split.
static void writeLabeledFeatureVec(std::ostream &os, const std::vector< LabeledFeature > &lfs)
std::ostream & operator<<(std::ostream &os, const AttribLocation &aloc)
void readInfoFile(const std::string &filename, int &attribId, Attrib &threshold, double &gain, HistogramPair &HP)
static double entropy_merged(const HistogramPair &hp)
This is a little weird.
void accumTrue(const Label label)
void accumFalse(const Label label)
const Histogram h_false() const
const Histogram h_true() const
LabeledAttrib(const Label &label, const Attrib &attrib)
Attrib attribs[NUMATTRIBS]
LabeledFeature(const LabeledFeature &B)
SplitPoint(int ai, Attrib t)