diff --git a/CMakeLists.txt b/CMakeLists.txt index bfea8b35..1737fb05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ add_library(DataFrame::DataFrame ALIAS DataFrame) target_sources(DataFrame PRIVATE src/Utils/DateTime.cc) -target_compile_features(DataFrame PUBLIC cxx_std_20) +target_compile_features(DataFrame PUBLIC cxx_std_23) target_compile_definitions( DataFrame PRIVATE $<$:HMDF_HAVE_CLOCK_GETTIME> diff --git a/README.md b/README.md index 07c5c20e..2ef8428c 100644 --- a/README.md +++ b/README.md @@ -24,15 +24,13 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --> +[![C++23](https://img.shields.io/badge/C%2B%2B-23-blue.svg)](https://isocpp.org/std/the-standard ) [![Build status](https://ci.appveyor.com/api/projects/status/hjw01qui3bvxs8yi?svg=true)](https://ci.appveyor.com/project/hosseinmoein/dataframe) +
![GitHub](https://img.shields.io/github/license/hosseinmoein/DataFrame.svg?color=red&style=popout) -[![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://isocpp.org/std/the-standard ) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/db646376a4014c3788c7224e670fe451)](https://app.codacy.com/manual/hosseinmoein/DataFrame?utm_source=github.com&utm_medium=referral&utm_content=hosseinmoein/DataFrame&utm_campaign=Badge_Grade_Dashboard)
-[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/hosseinmoein/DataFrame/master) -[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/hosseinmoein/DataFrame/graphs/commit-activity) ![GitHub tag (latest by date)](https://img.shields.io/github/tag-date/hosseinmoein/DataFrame.svg?color=blue&label=Official%20Release&style=popout) -
![Conan Center](https://img.shields.io/conan/v/dataframe) [![VCPKG package](https://repology.org/badge/version-for-repo/vcpkg/dataframe.svg)](https://repology.org/project/dataframe/versions) diff --git a/benchmarks/dataframe_performance.cc b/benchmarks/dataframe_performance.cc index 16e502e5..f617a91c 100644 --- a/benchmarks/dataframe_performance.cc +++ b/benchmarks/dataframe_performance.cc @@ -36,7 +36,8 @@ using namespace hmdf; using namespace std::chrono; constexpr std::size_t ALIGNMENT = 64; -constexpr std::size_t SIZE = 300000000; +// constexpr std::size_t SIZE = 300000000; +constexpr std::size_t SIZE = 10000000; typedef StdDataFrame64 MyDataFrame; @@ -57,7 +58,7 @@ int main(int, char *[]) { std::cout << "Data generation/load time: " << double(duration_cast(second - first).count()) / 1000000.0 - << std::endl; + << " secs" << std::endl; MeanVisitor n_mv; VarVisitor ln_vv; @@ -81,14 +82,25 @@ int main(int, char *[]) { const auto fourth = high_resolution_clock::now(); + // df.sort("log_normal", sort_spec::ascen, + // "exponential", sort_spec::ascen); + // std::cout << "1001th value in normal column: " + // << df.get_column("normal")[1001] << std::endl; + + const auto fifth = high_resolution_clock::now(); + std::cout << "Calculation time: " << double(duration_cast(third - second).count()) / 1000000.0 - << '\n' + << " secs\n" << "Selection time: " << double(duration_cast(fourth - third).count()) / 1000000.0 - << '\n' + << " secs\n" + // << "Sorting time: " + // << double(duration_cast(fifth - fourth).count()) / 1000000.0 + // << " secs\n" << "Overall time: " - << double(duration_cast(fourth - first).count()) / 1000000.0 + << double(duration_cast(fifth - first).count()) / 1000000.0 + << " secs" << std::endl; return (0); } diff --git a/benchmarks/polars_performance.py b/benchmarks/polars_performance.py index 31596244..f6b9463f 100644 --- a/benchmarks/polars_performance.py +++ b/benchmarks/polars_performance.py @@ -4,7 +4,8 @@ # ------------------------------------------------------------------------------ -SIZE: int = 300000000 +# SIZE: int = 300000000 +SIZE: int = 10000000 first = datetime.datetime.now() df = pl.DataFrame({"normal": np.random.normal(size=SIZE), @@ -13,7 +14,7 @@ }) second = datetime.datetime.now() print(f"Data generation/load time: " - f"{(second - first).seconds}.{(second - first).microseconds}") + f"{(second - first).seconds}.{(second - first).microseconds} secs") df2 = df.select( mean = pl.col("normal").mean(), @@ -32,9 +33,14 @@ print(f"Number of rows after select: {df3.select(pl.count()).item()}") fourth = datetime.datetime.now() -print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds}") -print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds}") -print(f"Overall time: {(fourth - first).seconds}.{(fourth - first).microseconds}") +# df4 = df.sort(["log_normal", "exponential"]); +# print(f"1001th value in normal column: {df4['normal'][1001]}") +fifth = datetime.datetime.now() + +print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds} secs") +print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds} secs") +# print(f"Sorting time: {(fifth - fourth).seconds}.{(fifth - fourth).microseconds} secs") +print(f"Overall time: {(fifth - first).seconds}.{(fifth - first).microseconds} secs") # ------------------------------------------------------------------------------ diff --git a/include/DataFrame/DataFrameStatsVisitors.h b/include/DataFrame/DataFrameStatsVisitors.h index e07611e2..014d90cf 100644 --- a/include/DataFrame/DataFrameStatsVisitors.h +++ b/include/DataFrame/DataFrameStatsVisitors.h @@ -4842,6 +4842,9 @@ struct LowessVisitor { const Y &y_begin, const Y &y_end, // dependent variable const X &x_begin, const X &x_end) { // independent variable + using bool_vec_t = + std::vector::type>; + assert(frac_ >= 0 && frac_ <= 1); assert(loop_n_ > 2); @@ -4862,7 +4865,10 @@ struct LowessVisitor { [] (auto lhs, auto rhs) -> bool { return (lhs < rhs); }); - _sort_by_sorted_index_(yvals, sorting_idxs, col_s); + + bool_vec_t done_vec (col_s); + + _sort_by_sorted_index_(yvals, sorting_idxs, done_vec, col_s); lowess_(idx_begin, idx_end, yvals.begin(), yvals.end(), xvals.begin(), xvals.end()); diff --git a/include/DataFrame/DataFrameTypes.h b/include/DataFrame/DataFrameTypes.h index 7e2977ff..f4fe86f7 100644 --- a/include/DataFrame/DataFrameTypes.h +++ b/include/DataFrame/DataFrameTypes.h @@ -579,10 +579,10 @@ struct RandGenParams { std::size_t t_dist { 1 }; // The μ distribution parameter (the mean of the distribution) // - double mean { 1.0 }; + double mean { 0 }; // the σ distribution parameter (standard deviation) // - double std { 0 }; + double std { 1 }; // The λ distribution parameter (the rate parameter) // double lambda { 1.0 }; diff --git a/include/DataFrame/Internals/DataFrame.tcc b/include/DataFrame/Internals/DataFrame.tcc index f88f398e..538a2cb1 100644 --- a/include/DataFrame/Internals/DataFrame.tcc +++ b/include/DataFrame/Internals/DataFrame.tcc @@ -31,9 +31,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include +#include // ---------------------------------------------------------------------------- @@ -117,28 +119,6 @@ void DataFrame::remove_lock () { lock_ = nullptr; } // ---------------------------------------------------------------------------- - -template -template -void DataFrame:: -sort_common_(DataFrame &df, CF &&comp_func, bool ignore_index) { - - const size_type idx_s = df.indices_.size(); - StlVecType sorting_idxs(idx_s, 0); - - std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); - std::sort(sorting_idxs.begin(), sorting_idxs.end(), comp_func); - - sort_functor_ functor (sorting_idxs, idx_s); - - for (const auto &iter : df.data_) [[likely]] - iter.change(functor); - if (! ignore_index) - _sort_by_sorted_index_(df.indices_, sorting_idxs, idx_s); -} - -// ---------------------------------------------------------------------------- - template template void @@ -691,81 +671,67 @@ sort(const char *name, sort_spec dir, bool ignore_index) { make_consistent(); - const SpinGuard guard (lock_); + ColumnVecType *vec { nullptr}; + const SpinGuard guard (lock_); if (! ::strcmp(name, DF_INDEX_COL_NAME)) { - const auto &idx_vec = get_index(); - auto a = - [&idx_vec](size_type i, size_type j) -> bool { - return (idx_vec[i] < idx_vec[j]); - }; - auto d = - [&idx_vec](size_type i, size_type j) -> bool { - return (idx_vec[i] > idx_vec[j]); - }; - auto aa = - [&idx_vec](size_type i, size_type j) -> bool { - return (abs__(idx_vec[i]) < abs__(idx_vec[j])); - }; - auto ad = - [&idx_vec](size_type i, size_type j) -> bool { - return (abs__(idx_vec[i]) > abs__(idx_vec[j])); - }; - - if (dir == sort_spec::ascen) - sort_common_(*this, - std::move(a), - ignore_index); - else if (dir == sort_spec::desce) - sort_common_(*this, - std::move(d), - ignore_index); - else if (dir == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa), - ignore_index); - else if (dir == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad), - ignore_index); + vec = reinterpret_cast *>(&indices_); + ignore_index = true; } - else { - const auto &col_vec = get_column(name); - auto a = - [&col_vec](size_type i, size_type j) -> bool { - return (col_vec[i] < col_vec[j]); - }; - auto d = - [&col_vec](size_type i, size_type j) -> bool { - return (col_vec[i] > col_vec[j]); - }; - auto aa = - [&col_vec](size_type i, size_type j) -> bool { - return (abs__(col_vec[i]) < abs__(col_vec[j])); - }; - auto ad = - [&col_vec](size_type i, size_type j) -> bool { - return (abs__(col_vec[i]) > abs__(col_vec[j])); - }; - - if (dir == sort_spec::ascen) [[likely]] - sort_common_(*this, - std::move(a), - ignore_index); - else if (dir == sort_spec::desce) - sort_common_(*this, - std::move(d), - ignore_index); - else if (dir == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa), - ignore_index); - else if (dir == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad), - ignore_index); + else + vec = &(get_column(name, false)); + + auto a = [](const auto &lhs, const auto &rhs) -> bool { + return (std::get<0>(lhs) < std::get<0>(rhs)); + }; + auto d = [](const auto &lhs, const auto &rhs) -> bool { + return (std::get<0>(lhs) > std::get<0>(rhs)); + }; + auto aa = [](const auto &lhs, const auto &rhs) -> bool { + return (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))); + }; + auto ad = [](const auto &lhs, const auto &rhs) -> bool { + return (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))); + }; + + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = std::ranges::views::zip(*vec, sorting_idxs); + auto zip_idx = std::ranges::views::zip(*vec, indices_, sorting_idxs); + + if (dir == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, a); + else + std::ranges::sort(zip, a); + } + else if (dir == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d); + else + std::ranges::sort(zip, d); + } + else if (dir == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa); + else + std::ranges::sort(zip, aa); + } + else if (dir == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad); + else + std::ranges::sort(zip, ad); } + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name) + data_[citer.second].change(functor); return; } @@ -774,219 +740,275 @@ sort(const char *name, sort_spec dir, bool ignore_index) { template template void DataFrame:: -sort(const char *name1, sort_spec dir1, const char *name2, sort_spec dir2, +sort(const char *name1, sort_spec dir1, + const char *name2, sort_spec dir2, bool ignore_index) { make_consistent(); - const ColumnVecType *vec1 { nullptr}; - const ColumnVecType *vec2 { nullptr}; - const SpinGuard guard (lock_); + ColumnVecType *vec1 { nullptr}; + ColumnVecType *vec2 { nullptr}; + const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); auto a_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto d_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto a_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto d_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto aa_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto ad_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto aa_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto ad_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto a_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto a_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) < vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto d_aa = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) < abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))); }; auto d_ad = - [vec1, vec2](size_type i, size_type j) -> bool { - if (vec1->at(i) > vec1->at(j)) + [](const auto &lhs, const auto &rhs) -> bool { + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); - return (abs__(vec2->at(i)) > abs__(vec2->at(j))); + return (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))); }; auto aa_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto ad_a = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) < vec2->at(j)); + return (std::get<1>(lhs) < std::get<1>(rhs)); }; auto aa_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; auto ad_d = - [vec1, vec2](size_type i, size_type j) -> bool { - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + [](const auto &lhs, const auto &rhs) -> bool { + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); - return (vec2->at(i) > vec2->at(j)); + return (std::get<1>(lhs) > std::get<1>(rhs)); }; + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); - if (dir1 == sort_spec::ascen && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(a_a), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(d_d), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(a_d), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(d_a), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(aa_aa), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(ad_ad), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(aa_ad), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(ad_aa), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(a_aa), - ignore_index); - else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(a_ad), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_ascen) - sort_common_(*this, - std::move(d_aa), - ignore_index); - else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_desce) - sort_common_(*this, - std::move(d_ad), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(aa_a), - ignore_index); - else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::ascen) - sort_common_(*this, - std::move(ad_a), - ignore_index); - else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::desce) - sort_common_(*this, - std::move(aa_d), - ignore_index); - else // dir1 == sort_spec::abs_desce && dir2 == sort_spec::desce - sort_common_(*this, - std::move(ad_d), - ignore_index); + auto zip = std::ranges::views::zip(*vec1, *vec2, sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, indices_, sorting_idxs); + + if (dir1 == sort_spec::ascen && dir2 == sort_spec::ascen) { + // if (! ignore_index) + // std::sort(std::execution::par_unseq, + // zip_idx.begin(), zip_idx.end(), a_a); + // else + // std::sort(std::execution::par_unseq, zip.begin(), zip.end(), a_a); + if (! ignore_index) + std::ranges::sort(zip_idx, a_a); + else + std::ranges::sort(zip, a_a); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_d); + else + std::ranges::sort(zip, d_d); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_d); + else + std::ranges::sort(zip, a_d); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_a); + else + std::ranges::sort(zip, d_a); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_aa); + else + std::ranges::sort(zip, aa_aa); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_ad); + else + std::ranges::sort(zip, ad_ad); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_ad); + else + std::ranges::sort(zip, aa_ad); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_aa); + else + std::ranges::sort(zip, ad_aa); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_aa); + else + std::ranges::sort(zip, a_aa); + } + else if (dir1 == sort_spec::ascen && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, a_ad); + else + std::ranges::sort(zip, a_ad); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_aa); + else + std::ranges::sort(zip, d_aa); + } + else if (dir1 == sort_spec::desce && dir2 == sort_spec::abs_desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, d_ad); + else + std::ranges::sort(zip, d_ad); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_a); + else + std::ranges::sort(zip, aa_a); + } + else if (dir1 == sort_spec::abs_desce && dir2 == sort_spec::ascen) { + if (! ignore_index) + std::ranges::sort(zip_idx, ad_a); + else + std::ranges::sort(zip, ad_a); + } + else if (dir1 == sort_spec::abs_ascen && dir2 == sort_spec::desce) { + if (! ignore_index) + std::ranges::sort(zip_idx, aa_d); + else + std::ranges::sort(zip, aa_d); + } + else { // dir1 == sort_spec::abs_desce && dir2 == sort_spec::desce + if (! ignore_index) + std::ranges::sort(zip_idx, ad_d); + else + std::ranges::sort(zip, ad_d); + } + + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && citer.first != name2) + data_[citer.second].change(functor); return; } @@ -1002,90 +1024,115 @@ sort(const char *name1, sort_spec dir1, make_consistent(); - const ColumnVecType *vec1 { nullptr}; - const ColumnVecType *vec2 { nullptr}; - const ColumnVecType *vec3 { nullptr}; - const SpinGuard guard (lock_); + ColumnVecType *vec1 { nullptr}; + ColumnVecType *vec2 { nullptr}; + ColumnVecType *vec3 { nullptr}; + const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); - if (! ::strcmp(name3, DF_INDEX_COL_NAME)) + if (! ::strcmp(name3, DF_INDEX_COL_NAME)) { vec3 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec3 = &(get_column(name3, false)); auto cf = - [vec1, vec2, vec3, dir1, dir2, dir3] - (size_type i, size_type j) -> bool { + [dir1, dir2, dir3](const auto &lhs, const auto &rhs) -> bool { if (dir1 == sort_spec::ascen) { - if (vec1->at(i) < vec1->at(j)) + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::desce) { - if (vec1->at(i) > vec1->at(j)) + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::abs_ascen) { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); } if (dir2 == sort_spec::ascen) { - if (vec2->at(i) < vec2->at(j)) + if (std::get<1>(lhs) < std::get<1>(rhs)) return (true); - else if (vec2->at(i) > vec2->at(j)) + else if (std::get<1>(lhs) > std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::desce) { - if (vec2->at(i) > vec2->at(j)) + if (std::get<1>(lhs) > std::get<1>(rhs)) return (true); - else if (vec2->at(i) < vec2->at(j)) + else if (std::get<1>(lhs) < std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::abs_ascen) { - if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (false); } if (dir3 == sort_spec::ascen) - return (vec3->at(i) < vec3->at(j)); + return (std::get<2>(lhs) < std::get<2>(rhs)); else if (dir3 == sort_spec::desce) - return (vec3->at(i) > vec3->at(j)); + return (std::get<2>(lhs) > std::get<2>(rhs)); else if (dir3 == sort_spec::abs_ascen) - return (abs__(vec3->at(i)) < abs__(vec3->at(j))); + return (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))); else // sort_spec::abs_desce - return (abs__(vec3->at(i)) > abs__(vec3->at(j))); + return (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))); }; - sort_common_(*this, std::move(cf), ignore_index); + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = std::ranges::views::zip(*vec1, *vec2, *vec3, sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, *vec3, indices_, sorting_idxs); + + if (! ignore_index) + std::ranges::sort(zip_idx, cf); + else + std::ranges::sort(zip, cf); + + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && + citer.first != name2 && + citer.first != name3) + data_[citer.second].change(functor); return; } @@ -1108,115 +1155,145 @@ sort(const char *name1, sort_spec dir1, const ColumnVecType *vec4 { nullptr}; const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); - if (! ::strcmp(name3, DF_INDEX_COL_NAME)) + if (! ::strcmp(name3, DF_INDEX_COL_NAME)) { vec3 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec3 = &(get_column(name3, false)); - if (! ::strcmp(name4, DF_INDEX_COL_NAME)) + if (! ::strcmp(name4, DF_INDEX_COL_NAME)) { vec4 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec4 = &(get_column(name4, false)); auto cf = - [vec1, vec2, vec3, vec4, dir1, dir2, dir3, dir4] - (size_type i, size_type j) -> bool { + [dir1, dir2, dir3, dir4](const auto &lhs, const auto &rhs) -> bool { if (dir1 == sort_spec::ascen) { - if (vec1->at(i) < vec1->at(j)) + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::desce) { - if (vec1->at(i) > vec1->at(j)) + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::abs_ascen) { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); } if (dir2 == sort_spec::ascen) { - if (vec2->at(i) < vec2->at(j)) + if (std::get<1>(lhs) < std::get<1>(rhs)) return (true); - else if (vec2->at(i) > vec2->at(j)) + else if (std::get<1>(lhs) > std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::desce) { - if (vec2->at(i) > vec2->at(j)) + if (std::get<1>(lhs) > std::get<1>(rhs)) return (true); - else if (vec2->at(i) < vec2->at(j)) + else if (std::get<1>(lhs) < std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::abs_ascen) { - if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (false); } if (dir3 == sort_spec::ascen) { - if (vec3->at(i) < vec3->at(j)) + if (std::get<2>(lhs) < std::get<2>(rhs)) return (true); - else if (vec3->at(i) > vec3->at(j)) + else if (std::get<2>(lhs) > std::get<2>(rhs)) return (false); } else if (dir3 == sort_spec::desce) { - if (vec3->at(i) > vec3->at(j)) + if (std::get<2>(lhs) > std::get<2>(rhs)) return (true); - else if (vec3->at(i) < vec3->at(j)) + else if (std::get<2>(lhs) < std::get<2>(rhs)) return (false); } else if (dir3 == sort_spec::abs_ascen) { - if (abs__(vec3->at(i)) < abs__(vec3->at(j))) + if (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))) return (true); - else if (abs__(vec3->at(i)) > abs__(vec3->at(j))) + else if (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec3->at(i)) > abs__(vec3->at(j))) + if (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))) return (true); - else if (abs__(vec3->at(i)) < abs__(vec3->at(j))) + else if (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))) return (false); } if (dir4 == sort_spec::ascen) - return (vec4->at(i) < vec4->at(j)); + return (std::get<3>(lhs) < std::get<3>(rhs)); else if (dir4 == sort_spec::desce) - return (vec4->at(i) > vec4->at(j)); + return (std::get<3>(lhs) > std::get<3>(rhs)); else if (dir4 == sort_spec::abs_ascen) - return (abs__(vec4->at(i)) < abs__(vec4->at(j))); + return (abs__(std::get<3>(lhs)) < abs__(std::get<3>(rhs))); else // sort_spec::abs_desce - return (abs__(vec4->at(i)) > abs__(vec4->at(j))); + return (abs__(std::get<3>(lhs)) > abs__(std::get<3>(rhs))); }; - sort_common_(*this, std::move(cf), ignore_index); + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = + std::ranges::views::zip(*vec1, *vec2, *vec3, *vec4, sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, *vec3, *vec4, + indices_, sorting_idxs); + + if (! ignore_index) + std::ranges::sort(zip_idx, cf); + else + std::ranges::sort(zip, cf); + + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && + citer.first != name2 && + citer.first != name3 && + citer.first != name4) + data_[citer.second].change(functor); return; } @@ -1242,145 +1319,180 @@ sort(const char *name1, sort_spec dir1, const ColumnVecType *vec5 { nullptr}; const SpinGuard guard (lock_); - if (! ::strcmp(name1, DF_INDEX_COL_NAME)) + if (! ::strcmp(name1, DF_INDEX_COL_NAME)) { vec1 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec1 = &(get_column(name1, false)); - if (! ::strcmp(name2, DF_INDEX_COL_NAME)) + if (! ::strcmp(name2, DF_INDEX_COL_NAME)) { vec2 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec2 = &(get_column(name2, false)); - if (! ::strcmp(name3, DF_INDEX_COL_NAME)) + if (! ::strcmp(name3, DF_INDEX_COL_NAME)) { vec3 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec3 = &(get_column(name3, false)); - if (! ::strcmp(name4, DF_INDEX_COL_NAME)) + if (! ::strcmp(name4, DF_INDEX_COL_NAME)) { vec4 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec4 = &(get_column(name4, false)); - if (! ::strcmp(name4, DF_INDEX_COL_NAME)) + if (! ::strcmp(name4, DF_INDEX_COL_NAME)) { vec5 = reinterpret_cast *>(&indices_); + ignore_index = true; + } else vec5 = &(get_column(name5, false)); auto cf = - [vec1, vec2, vec3, vec4, vec5, dir1, dir2, dir3, dir4, dir5] - (size_type i, size_type j) -> bool { + [dir1, dir2, dir3, dir4, dir5] + (const auto &lhs, const auto &rhs) -> bool { if (dir1 == sort_spec::ascen) { - if (vec1->at(i) < vec1->at(j)) + if (std::get<0>(lhs) < std::get<0>(rhs)) return (true); - else if (vec1->at(i) > vec1->at(j)) + else if (std::get<0>(lhs) > std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::desce) { - if (vec1->at(i) > vec1->at(j)) + if (std::get<0>(lhs) > std::get<0>(rhs)) return (true); - else if (vec1->at(i) < vec1->at(j)) + else if (std::get<0>(lhs) < std::get<0>(rhs)) return (false); } else if (dir1 == sort_spec::abs_ascen) { - if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec1->at(i)) > abs__(vec1->at(j))) + if (abs__(std::get<0>(lhs)) > abs__(std::get<0>(rhs))) return (true); - else if (abs__(vec1->at(i)) < abs__(vec1->at(j))) + else if (abs__(std::get<0>(lhs)) < abs__(std::get<0>(rhs))) return (false); } if (dir2 == sort_spec::ascen) { - if (vec2->at(i) < vec2->at(j)) + if (std::get<1>(lhs) < std::get<1>(rhs)) return (true); - else if (vec2->at(i) > vec2->at(j)) + else if (std::get<1>(lhs) > std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::desce) { - if (vec2->at(i) > vec2->at(j)) + if (std::get<1>(lhs) > std::get<1>(rhs)) return (true); - else if (vec2->at(i) < vec2->at(j)) + else if (std::get<1>(lhs) < std::get<1>(rhs)) return (false); } else if (dir2 == sort_spec::abs_ascen) { - if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec2->at(i)) > abs__(vec2->at(j))) + if (abs__(std::get<1>(lhs)) > abs__(std::get<1>(rhs))) return (true); - else if (abs__(vec2->at(i)) < abs__(vec2->at(j))) + else if (abs__(std::get<1>(lhs)) < abs__(std::get<1>(rhs))) return (false); } if (dir3 == sort_spec::ascen) { - if (vec3->at(i) < vec3->at(j)) + if (std::get<2>(lhs) < std::get<2>(rhs)) return (true); - else if (vec3->at(i) > vec3->at(j)) + else if (std::get<2>(lhs) > std::get<2>(rhs)) return (false); } else if (dir3 == sort_spec::desce) { - if (vec3->at(i) > vec3->at(j)) + if (std::get<2>(lhs) > std::get<2>(rhs)) return (true); - else if (vec3->at(i) < vec3->at(j)) + else if (std::get<2>(lhs) < std::get<2>(rhs)) return (false); } else if (dir3 == sort_spec::abs_ascen) { - if (abs__(vec3->at(i)) < abs__(vec3->at(j))) + if (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))) return (true); - else if (abs__(vec3->at(i)) > abs__(vec3->at(j))) + else if (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec3->at(i)) > abs__(vec3->at(j))) + if (abs__(std::get<2>(lhs)) > abs__(std::get<2>(rhs))) return (true); - else if (abs__(vec3->at(i)) < abs__(vec3->at(j))) + else if (abs__(std::get<2>(lhs)) < abs__(std::get<2>(rhs))) return (false); } if (dir4 == sort_spec::ascen) { - if (vec4->at(i) < vec4->at(j)) + if (std::get<3>(lhs) < std::get<3>(rhs)) return (true); - else if (vec4->at(i) > vec4->at(j)) + else if (std::get<3>(lhs) > std::get<3>(rhs)) return (false); } else if (dir4 == sort_spec::desce) { - if (vec4->at(i) > vec4->at(j)) + if (std::get<3>(lhs) > std::get<3>(rhs)) return (true); - else if (vec4->at(i) < vec4->at(j)) + else if (std::get<3>(lhs) < std::get<3>(rhs)) return (false); } else if (dir4 == sort_spec::abs_ascen) { - if (abs__(vec4->at(i)) < abs__(vec4->at(j))) + if (abs__(std::get<3>(lhs)) < abs__(std::get<3>(rhs))) return (true); - else if (abs__(vec4->at(i)) > abs__(vec4->at(j))) + else if (abs__(std::get<3>(lhs)) > abs__(std::get<3>(rhs))) return (false); } else { // sort_spec::abs_desce - if (abs__(vec4->at(i)) > abs__(vec4->at(j))) + if (abs__(std::get<3>(lhs)) > abs__(std::get<3>(rhs))) return (true); - else if (abs__(vec4->at(i)) < abs__(vec4->at(j))) + else if (abs__(std::get<3>(lhs)) < abs__(std::get<3>(rhs))) return (false); } if (dir5 == sort_spec::ascen) - return (vec5->at(i) < vec5->at(j)); + return (std::get<4>(lhs) < std::get<4>(rhs)); else if (dir5 == sort_spec::desce) - return (vec5->at(i) > vec5->at(j)); + return (std::get<4>(lhs) > std::get<4>(rhs)); else if (dir5 == sort_spec::abs_ascen) - return (abs__(vec5->at(i)) < abs__(vec5->at(j))); + return (abs__(std::get<4>(lhs)) < abs__(std::get<4>(rhs))); else // sort_spec::abs_desce - return (abs__(vec5->at(i)) > abs__(vec5->at(j))); + return (abs__(std::get<4>(lhs)) > abs__(std::get<4>(rhs))); }; - sort_common_(*this, std::move(cf), ignore_index); + const size_type idx_s = indices_.size(); + StlVecType sorting_idxs(idx_s); + + std::iota(sorting_idxs.begin(), sorting_idxs.end(), 0); + + auto zip = + std::ranges::views::zip(*vec1, *vec2, *vec3, *vec4, *vec5, + sorting_idxs); + auto zip_idx = + std::ranges::views::zip(*vec1, *vec2, *vec3, *vec4, *vec5, + indices_, sorting_idxs); + + if (! ignore_index) + std::ranges::sort(zip_idx, cf); + else + std::ranges::sort(zip, cf); + + sort_functor_ functor (sorting_idxs, idx_s); + + for (const auto &citer : column_list_) [[likely]] + if (citer.first != name1 && + citer.first != name2 && + citer.first != name3 && + citer.first != name4 && + citer.first != name5) + data_[citer.second].change(functor); return; } diff --git a/include/DataFrame/Internals/DataFrame_functors.h b/include/DataFrame/Internals/DataFrame_functors.h index 754c4f05..10c4885f 100644 --- a/include/DataFrame/Internals/DataFrame_functors.h +++ b/include/DataFrame/Internals/DataFrame_functors.h @@ -64,11 +64,11 @@ template struct sort_functor_ : DataVec::template visitor_base { inline sort_functor_ (const StlVecType &si, size_t is) - : sorted_idxs(si), idx_s(is) { } + : sorted_idxs(si), idx_s(is), done_vec(idx_s) { } const StlVecType &sorted_idxs; - StlVecType sorted_idxs_copy; - const size_t idx_s; + const size_t idx_s; + StlVecType done_vec; template void operator() (T2 &vec); diff --git a/include/DataFrame/Internals/DataFrame_misc.tcc b/include/DataFrame/Internals/DataFrame_misc.tcc index 2c920315..205f9283 100644 --- a/include/DataFrame/Internals/DataFrame_misc.tcc +++ b/include/DataFrame/Internals/DataFrame_misc.tcc @@ -77,8 +77,7 @@ template void DataFrame::sort_functor_::operator() (T2 &vec) { - sorted_idxs_copy = sorted_idxs; - _sort_by_sorted_index_(vec, sorted_idxs_copy, idx_s); + _sort_by_sorted_index_(vec, sorted_idxs, done_vec, idx_s); return; } diff --git a/include/DataFrame/Internals/DataFrame_private_decl.h b/include/DataFrame/Internals/DataFrame_private_decl.h index 75060265..828a2511 100644 --- a/include/DataFrame/Internals/DataFrame_private_decl.h +++ b/include/DataFrame/Internals/DataFrame_private_decl.h @@ -56,10 +56,6 @@ void read_csv2_(std::istream &file, size_type starting_row, size_type num_rows); -template -static void -sort_common_(DataFrame &df, CF &&comp_func, bool ignore_index); - template static void fill_missing_value_(ColumnVecType &vec, diff --git a/include/DataFrame/Internals/DataFrame_standalone.tcc b/include/DataFrame/Internals/DataFrame_standalone.tcc index e655e485..a654ab5c 100644 --- a/include/DataFrame/Internals/DataFrame_standalone.tcc +++ b/include/DataFrame/Internals/DataFrame_standalone.tcc @@ -863,25 +863,28 @@ inline static O _remove_copy_if_(I first, I last, O d_first, PRE predicate) { // ---------------------------------------------------------------------------- -template +template static inline void -_sort_by_sorted_index_(T &to_be_sorted, V &sorting_idxs, size_t idx_s) { - - if (idx_s > 0) { - idx_s -= 1; - for (size_t i = 0; i < idx_s; ++i) [[likely]] { - // while the element i is not yet in place - // - while (sorting_idxs[i] != sorting_idxs[sorting_idxs[i]]) { - // swap it with the element at its final place - // - const size_t j = sorting_idxs[i]; - - std::swap(to_be_sorted[j], to_be_sorted[sorting_idxs[j]]); - std::swap(sorting_idxs[i], sorting_idxs[j]); +_sort_by_sorted_index_(T &to_be_sorted, + const V &sorting_idxs, + BV &done_vec, + size_t idx_s) { + + std::fill(done_vec.begin(), done_vec.end(), false); + for (std::size_t i = 0; i < idx_s; ++i) [[likely]] + if (! done_vec[i]) { + done_vec[i] = true; + + std::size_t prev_j = i; + std::size_t j = sorting_idxs[i]; + + while (i != j) { + std::swap(to_be_sorted[prev_j], to_be_sorted[j]); + done_vec[j] = true; + prev_j = j; + j = sorting_idxs[j]; } } - } } // ---------------------------------------------------------------------------- diff --git a/src/Makefile.Linux.GCC64 b/src/Makefile.Linux.GCC64 index e67e1a0b..38f35349 100644 --- a/src/Makefile.Linux.GCC64 +++ b/src/Makefile.Linux.GCC64 @@ -8,10 +8,11 @@ BUILD_DEFINE = Linux_GCC64 CXX = /usr/bin/g++ -INCLUDES = -I/usr/include/c++/7 -I/usr/include +# INCLUDES = -I/usr/include/c++/7 -I/usr/include +INCLUDES = LFLAGS = -CXXFLAGS = -O3 $(INCLUDES) $(DEFINES) -std=c++20 +CXXFLAGS = -O3 $(INCLUDES) $(DEFINES) -std=c++2b PLATFORM_LIBS = -lpthread -ldl -lm -lstdc++ diff --git a/src/Makefile.Linux.GCC64D b/src/Makefile.Linux.GCC64D index 51f30664..ed438717 100644 --- a/src/Makefile.Linux.GCC64D +++ b/src/Makefile.Linux.GCC64D @@ -8,11 +8,12 @@ BUILD_DEFINE = Linux_GCC64 CXX = /usr/bin/g++ -INCLUDES = -I/usr/include/c++/7 -I/usr/inc17lude +# INCLUDES = -I/usr/include/c++/7 -I/usr/include +INCLUDES = LFLAGS = -CXXFLAGS = -g $(INCLUDES) $(DEFINES) -D_GLIBCXX_DEBUG -pedantic -Wall -Wextra -std=c++20 -# CXXFLAGS = -g $(INCLUDES) $(DEFINES) -std=c++20 +CXXFLAGS = -g $(INCLUDES) $(DEFINES) -D_GLIBCXX_DEBUG -pedantic -Wall -Wextra -std=c++2b +# CXXFLAGS = -g $(INCLUDES) $(DEFINES) -std=c++2b PLATFORM_LIBS = -lpthread -ldl -lm -lstdc++ -fsanitize-address-use-after-scope -fsanitize=address