Skip to content

Commit

Permalink
fix sometimes python cachelist problem
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen committed May 20, 2014
1 parent ccde443 commit 76c4407
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 11 deletions.
4 changes: 2 additions & 2 deletions python/xgboost_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ namespace xgboost{
private:
bool init_trainer, init_model;
public:
Booster(const std::vector<const regrank::DMatrix *> mats){
Booster(const std::vector<regrank::DMatrix *> mats){
silent = 1;
init_trainer = false;
init_model = false;
Expand Down Expand Up @@ -223,7 +223,7 @@ extern "C"{

// xgboost implementation
void *XGBoosterCreate( void *dmats[], size_t len ){
std::vector<const xgboost::regrank::DMatrix*> mats;
std::vector<xgboost::regrank::DMatrix*> mats;
for( size_t i = 0; i < len; ++i ){
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
dtr->CheckInit();
Expand Down
24 changes: 17 additions & 7 deletions regrank/xgboost_regrank.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace xgboost{
* \brief a regression booter associated with training and evaluating data
* \param mats array of pointers to matrix whose prediction result need to be cached
*/
RegRankBoostLearner(const std::vector<const DMatrix *>& mats){
RegRankBoostLearner(const std::vector<DMatrix *>& mats){
silent = 0;
obj_ = NULL;
name_obj_ = "reg:linear";
Expand All @@ -45,7 +45,7 @@ namespace xgboost{
* data matrices to continue training otherwise it will cause error
* \param mats array of pointers to matrix whose prediction result need to be cached
*/
inline void SetCacheData(const std::vector<const DMatrix *>& mats){
inline void SetCacheData(const std::vector<DMatrix *>& mats){
// estimate feature bound
int num_feature = 0;
// assign buffer index
Expand All @@ -58,7 +58,9 @@ namespace xgboost{
if( mats[i] == mats[j] ) dupilicate = true;
}
if( dupilicate ) continue;
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
// set mats[i]'s cache learner pointer to this
mats[i]->cache_learner_ptr_ = this;
cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
buffer_size += static_cast<unsigned>(mats[i]->Size());
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
}
Expand Down Expand Up @@ -342,17 +344,25 @@ namespace xgboost{
private:
struct CacheEntry{
const DMatrix *mat_;
int buffer_offset_;
CacheEntry(const DMatrix *mat, int buffer_offset)
:mat_(mat), buffer_offset_(buffer_offset){}
int buffer_offset_;
size_t num_row_;
CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
};
/*! \brief the entries indicates that we have internal prediction cache */
std::vector<CacheEntry> cache_;
private:
// find internal bufer offset for certain matrix, if not exist, return -1
inline int FindBufferOffset(const DMatrix &mat){
for(size_t i = 0; i < cache_.size(); ++i){
if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_;
if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
if( cache_[i].num_row_ == mat.Size() ){
return cache_[i].buffer_offset_;
}else{
fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
fflush( stderr );
}
}
}
return -1;
}
Expand Down
8 changes: 7 additions & 1 deletion regrank/xgboost_regrank_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,15 @@ namespace xgboost{
booster::FMatrixS data;
/*! \brief information fields */
Info info;
/*!
* \brief cache pointer to verify if the data structure is cached in some learner
* this is a bit ugly, we need to have double check verification, so if one side get deleted,
* and some strange re-allocation gets the same pointer we will still be fine
*/
void *cache_learner_ptr_;
public:
/*! \brief default constructor */
DMatrix(void){}
DMatrix(void):cache_learner_ptr_(NULL){}
/*! \brief get the number of instances */
inline size_t Size() const{
return data.NumRow();
Expand Down
2 changes: 1 addition & 1 deletion regrank/xgboost_regrank_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ namespace xgboost{
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
devalall.push_back(deval.back());
}
std::vector<const DMatrix *> dcache(1, &data);
std::vector<DMatrix *> dcache(1, &data);
for( size_t i = 0; i < deval.size(); ++ i){
dcache.push_back( deval[i] );
}
Expand Down

0 comments on commit 76c4407

Please sign in to comment.