Skip to content

Commit

Permalink
Merge pull request #270 from hosseinmoein/Hossein/Cpp23
Browse files Browse the repository at this point in the history
Using C++23 to reimplement sort by using zip
  • Loading branch information
hosseinmoein authored Nov 29, 2023
2 parents 6e382c3 + 0abd9e4 commit b721632
Show file tree
Hide file tree
Showing 13 changed files with 517 additions and 383 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ add_library(DataFrame::DataFrame ALIAS DataFrame)

target_sources(DataFrame PRIVATE src/Utils/DateTime.cc)

target_compile_features(DataFrame PUBLIC cxx_std_20)
target_compile_features(DataFrame PUBLIC cxx_std_23)
target_compile_definitions(
DataFrame
PRIVATE $<$<BOOL:${HMDF_HAVE_CLOCK_GETTIME}>:HMDF_HAVE_CLOCK_GETTIME>
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[![C++23](https://img.shields.io/badge/C%2B%2B-23-blue.svg)](https://isocpp.org/std/the-standard )
[![Build status](https://ci.appveyor.com/api/projects/status/hjw01qui3bvxs8yi?svg=true)](https://ci.appveyor.com/project/hosseinmoein/dataframe)
<BR>
![GitHub](https://img.shields.io/github/license/hosseinmoein/DataFrame.svg?color=red&style=popout)
[![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://isocpp.org/std/the-standard )
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/db646376a4014c3788c7224e670fe451)](https://app.codacy.com/manual/hosseinmoein/DataFrame?utm_source=github.com&utm_medium=referral&utm_content=hosseinmoein/DataFrame&utm_campaign=Badge_Grade_Dashboard)
<BR>
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/hosseinmoein/DataFrame/master)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/hosseinmoein/DataFrame/graphs/commit-activity)
![GitHub tag (latest by date)](https://img.shields.io/github/tag-date/hosseinmoein/DataFrame.svg?color=blue&label=Official%20Release&style=popout)
<BR>
![Conan Center](https://img.shields.io/conan/v/dataframe)
[![VCPKG package](https://repology.org/badge/version-for-repo/vcpkg/dataframe.svg)](https://repology.org/project/dataframe/versions)

Expand Down
22 changes: 17 additions & 5 deletions benchmarks/dataframe_performance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ using namespace hmdf;
using namespace std::chrono;

constexpr std::size_t ALIGNMENT = 64;
constexpr std::size_t SIZE = 300000000;
// constexpr std::size_t SIZE = 300000000;
constexpr std::size_t SIZE = 10000000;

typedef StdDataFrame64<time_t> MyDataFrame;

Expand All @@ -57,7 +58,7 @@ int main(int, char *[]) {

std::cout << "Data generation/load time: "
<< double(duration_cast<microseconds>(second - first).count()) / 1000000.0
<< std::endl;
<< " secs" << std::endl;

MeanVisitor<double, time_t> n_mv;
VarVisitor<double, time_t> ln_vv;
Expand All @@ -81,14 +82,25 @@ int main(int, char *[]) {

const auto fourth = high_resolution_clock::now();

// df.sort<double, double, double>("log_normal", sort_spec::ascen,
// "exponential", sort_spec::ascen);
// std::cout << "1001th value in normal column: "
// << df.get_column<double>("normal")[1001] << std::endl;

const auto fifth = high_resolution_clock::now();

std::cout << "Calculation time: "
<< double(duration_cast<microseconds>(third - second).count()) / 1000000.0
<< '\n'
<< " secs\n"
<< "Selection time: "
<< double(duration_cast<microseconds>(fourth - third).count()) / 1000000.0
<< '\n'
<< " secs\n"
// << "Sorting time: "
// << double(duration_cast<microseconds>(fifth - fourth).count()) / 1000000.0
// << " secs\n"
<< "Overall time: "
<< double(duration_cast<microseconds>(fourth - first).count()) / 1000000.0
<< double(duration_cast<microseconds>(fifth - first).count()) / 1000000.0
<< " secs"
<< std::endl;
return (0);
}
Expand Down
16 changes: 11 additions & 5 deletions benchmarks/polars_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

# ------------------------------------------------------------------------------

SIZE: int = 300000000
# SIZE: int = 300000000
SIZE: int = 10000000

first = datetime.datetime.now()
df = pl.DataFrame({"normal": np.random.normal(size=SIZE),
Expand All @@ -13,7 +14,7 @@
})
second = datetime.datetime.now()
print(f"Data generation/load time: "
f"{(second - first).seconds}.{(second - first).microseconds}")
f"{(second - first).seconds}.{(second - first).microseconds} secs")

df2 = df.select(
mean = pl.col("normal").mean(),
Expand All @@ -32,9 +33,14 @@
print(f"Number of rows after select: {df3.select(pl.count()).item()}")
fourth = datetime.datetime.now()

print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds}")
print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds}")
print(f"Overall time: {(fourth - first).seconds}.{(fourth - first).microseconds}")
# df4 = df.sort(["log_normal", "exponential"]);
# print(f"1001th value in normal column: {df4['normal'][1001]}")
fifth = datetime.datetime.now()

print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds} secs")
print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds} secs")
# print(f"Sorting time: {(fifth - fourth).seconds}.{(fifth - fourth).microseconds} secs")
print(f"Overall time: {(fifth - first).seconds}.{(fifth - first).microseconds} secs")

# ------------------------------------------------------------------------------

Expand Down
8 changes: 7 additions & 1 deletion include/DataFrame/DataFrameStatsVisitors.h
Original file line number Diff line number Diff line change
Expand Up @@ -4842,6 +4842,9 @@ struct LowessVisitor {
const Y &y_begin, const Y &y_end, // dependent variable
const X &x_begin, const X &x_end) { // independent variable

using bool_vec_t =
std::vector<bool, typename allocator_declare<bool, A>::type>;

assert(frac_ >= 0 && frac_ <= 1);
assert(loop_n_ > 2);

Expand All @@ -4862,7 +4865,10 @@ struct LowessVisitor {
[] (auto lhs, auto rhs) -> bool {
return (lhs < rhs);
});
_sort_by_sorted_index_(yvals, sorting_idxs, col_s);

bool_vec_t done_vec (col_s);

_sort_by_sorted_index_(yvals, sorting_idxs, done_vec, col_s);
lowess_(idx_begin, idx_end,
yvals.begin(), yvals.end(),
xvals.begin(), xvals.end());
Expand Down
4 changes: 2 additions & 2 deletions include/DataFrame/DataFrameTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,10 @@ struct RandGenParams {
std::size_t t_dist { 1 };
// The μ distribution parameter (the mean of the distribution)
//
double mean { 1.0 };
double mean { 0 };
// the σ distribution parameter (standard deviation)
//
double std { 0 };
double std { 1 };
// The λ distribution parameter (the rate parameter)
//
double lambda { 1.0 };
Expand Down
Loading

0 comments on commit b721632

Please sign in to comment.