From 76c44072d14cecfda002783fd1b729c8ae79c83c Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 20 May 2014 15:42:19 -0700 Subject: [PATCH] fix sometimes python cachelist problem --- python/xgboost_python.cpp | 4 ++-- regrank/xgboost_regrank.h | 24 +++++++++++++++++------- regrank/xgboost_regrank_data.h | 8 +++++++- regrank/xgboost_regrank_main.cpp | 2 +- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index 7c63fc6ac6a3..9ad0f34afd9d 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -112,7 +112,7 @@ namespace xgboost{ private: bool init_trainer, init_model; public: - Booster(const std::vector mats){ + Booster(const std::vector mats){ silent = 1; init_trainer = false; init_model = false; @@ -223,7 +223,7 @@ extern "C"{ // xgboost implementation void *XGBoosterCreate( void *dmats[], size_t len ){ - std::vector mats; + std::vector mats; for( size_t i = 0; i < len; ++i ){ DMatrix *dtr = static_cast(dmats[i]); dtr->CheckInit(); diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index e6559b06389f..356b82683fa5 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -31,7 +31,7 @@ namespace xgboost{ * \brief a regression booter associated with training and evaluating data * \param mats array of pointers to matrix whose prediction result need to be cached */ - RegRankBoostLearner(const std::vector& mats){ + RegRankBoostLearner(const std::vector& mats){ silent = 0; obj_ = NULL; name_obj_ = "reg:linear"; @@ -45,7 +45,7 @@ namespace xgboost{ * data matrices to continue training otherwise it will cause error * \param mats array of pointers to matrix whose prediction result need to be cached */ - inline void SetCacheData(const std::vector& mats){ + inline void SetCacheData(const std::vector& mats){ // estimate feature bound int num_feature = 0; // assign buffer index @@ -58,7 +58,9 @@ namespace xgboost{ if( mats[i] == mats[j] ) dupilicate = true; } if( dupilicate ) continue; - cache_.push_back( CacheEntry( mats[i], buffer_size ) ); + // set mats[i]'s cache learner pointer to this + mats[i]->cache_learner_ptr_ = this; + cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) ); buffer_size += static_cast(mats[i]->Size()); num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol())); } @@ -342,9 +344,10 @@ namespace xgboost{ private: struct CacheEntry{ const DMatrix *mat_; - int buffer_offset_; - CacheEntry(const DMatrix *mat, int buffer_offset) - :mat_(mat), buffer_offset_(buffer_offset){} + int buffer_offset_; + size_t num_row_; + CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row) + :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){} }; /*! \brief the entries indicates that we have internal prediction cache */ std::vector cache_; @@ -352,7 +355,14 @@ namespace xgboost{ // find internal bufer offset for certain matrix, if not exist, return -1 inline int FindBufferOffset(const DMatrix &mat){ for(size_t i = 0; i < cache_.size(); ++i){ - if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_; + if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) { + if( cache_[i].num_row_ == mat.Size() ){ + return cache_[i].buffer_offset_; + }else{ + fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" ); + fflush( stderr ); + } + } } return -1; } diff --git a/regrank/xgboost_regrank_data.h b/regrank/xgboost_regrank_data.h index f9c78f51cb13..7c3138089833 100644 --- a/regrank/xgboost_regrank_data.h +++ b/regrank/xgboost_regrank_data.h @@ -52,9 +52,15 @@ namespace xgboost{ booster::FMatrixS data; /*! \brief information fields */ Info info; + /*! + * \brief cache pointer to verify if the data structure is cached in some learner + * this is a bit ugly, we need to have double check verification, so if one side get deleted, + * and some strange re-allocation gets the same pointer we will still be fine + */ + void *cache_learner_ptr_; public: /*! \brief default constructor */ - DMatrix(void){} + DMatrix(void):cache_learner_ptr_(NULL){} /*! \brief get the number of instances */ inline size_t Size() const{ return data.NumRow(); diff --git a/regrank/xgboost_regrank_main.cpp b/regrank/xgboost_regrank_main.cpp index be7bbbb35953..4e8a59564745 100644 --- a/regrank/xgboost_regrank_main.cpp +++ b/regrank/xgboost_regrank_main.cpp @@ -126,7 +126,7 @@ namespace xgboost{ deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0); devalall.push_back(deval.back()); } - std::vector dcache(1, &data); + std::vector dcache(1, &data); for( size_t i = 0; i < deval.size(); ++ i){ dcache.push_back( deval[i] ); }