From 3bd3f480c66a81bc012efd838cef6e0cbda52870 Mon Sep 17 00:00:00 2001 From: Muhammad Haris <101793258+headlessNode@users.noreply.github.com> Date: Thu, 3 Oct 2024 12:54:30 +0500 Subject: [PATCH] feat: add C `ndarray` API and refactor `blas/ext/base/dcusumpw` PR-URL: https://github.com/stdlib-js/stdlib/pull/2981 Co-authored-by: Athan Reines Reviewed-by: Athan Reines Signed-off-by: Athan Reines --- .../@stdlib/blas/ext/base/dcusumpw/README.md | 139 +++++++++++++++++- .../ext/base/dcusumpw/benchmark/benchmark.js | 9 +- .../dcusumpw/benchmark/benchmark.native.js | 9 +- .../dcusumpw/benchmark/benchmark.ndarray.js | 9 +- .../benchmark/benchmark.ndarray.native.js | 9 +- .../dcusumpw/benchmark/c/benchmark.length.c | 63 +++++++- .../blas/ext/base/dcusumpw/docs/repl.txt | 6 +- .../ext/base/dcusumpw/examples/c/example.c | 12 +- .../blas/ext/base/dcusumpw/examples/index.js | 7 +- .../include/stdlib/blas/ext/base/dcusumpw.h | 9 +- .../blas/ext/base/dcusumpw/lib/dcusumpw.js | 22 +-- .../ext/base/dcusumpw/lib/ndarray.native.js | 13 +- .../blas/ext/base/dcusumpw/manifest.json | 32 ++-- .../blas/ext/base/dcusumpw/src/addon.c | 30 +++- .../base/dcusumpw/src/{dcusumpw.c => main.c} | 69 ++++----- 15 files changed, 313 insertions(+), 125 deletions(-) rename lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/{dcusumpw.c => main.c} (54%) diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/README.md b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/README.md index 1ecdb3b9d7c..c5f2098bd11 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/README.md +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/README.md @@ -115,7 +115,7 @@ The function has the following additional parameters: - **offsetX**: starting index for `x`. - **offsetY**: starting index for `y`. -While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying `buffer`, `offsetX` and `offsetY` parameters support indexing semantics based on a starting indices. For example, to calculate the cumulative sum of every other value in the strided input array starting from the second value and to store in the last `N` elements of the strided output array starting from the last element +While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying buffer, offset parameters support indexing semantics based on starting indices. For example, to calculate the cumulative sum of every other value in the strided input array starting from the second value and to store in the last `N` elements of the strided output array starting from the last element ```javascript var Float64Array = require( '@stdlib/array/float64' ); @@ -149,12 +149,13 @@ dcusumpw.ndarray( 4, 0.0, x, 2, 1, y, -1, y.length-1 ); ```javascript -var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); var Float64Array = require( '@stdlib/array/float64' ); var dcusumpw = require( '@stdlib/blas/ext/base/dcusumpw' ); -var x = filledarrayBy( 10, 'float64', discreteUniform( 0, 100 ) ); +var x = discreteUniform( 10, -100, 100, { + 'dtype': 'float64' +}); var y = new Float64Array( x.length ); console.log( x ); @@ -168,8 +169,138 @@ console.log( y ); + + * * * +
+ +## C APIs + + + +
+ +
+ + + + + +
+ +### Usage + +```c +#include "stdlib/blas/ext/base/dcusumpw.h" +``` + +#### stdlib_strided_dcusumpw( N, sum, \*X, strideX, \*Y, strideY ) + +Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation. + +```c +const double x[] = { 1.0, 2.0, 3.0, 4.0 }; +double y[] = { 0.0, 0.0, 0.0, 0.0 }; + +stdlib_strided_dcusumpw( 4, 0.0, x, 1, y, 1 ); +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **sum**: `[in] double` initial sum. +- **X**: `[in] double*` input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **Y**: `[out] double*` output array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. + +```c +void stdlib_strided_dcusumpw( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, double *Y, const CBLAS_INT strideY ); +``` + + + +#### stdlib_strided_dcusumpw_ndarray( N, sum, \*X, strideX, offsetX, \*Y, strideY, offsetY ) + + + +Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation and alternative indexing semantics. + +```c +const double x[] = { 1.0, 2.0, 3.0, 4.0 } +double y[] = { 0.0, 0.0, 0.0, 0.0 } + +stdlib_strided_dcusumpw_ndarray( 4, 0.0, x, 1, 0, y, 1, 0 ); +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **sum**: `[in] double` initial sum. +- **X**: `[in] double*` input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. +- **Y**: `[out] double*` output array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. +- **offsetY**: `[in] CBLAS_INT` starting index for `Y`. + +```c +void stdlib_strided_dcusumpw_ndarray( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); +``` + +
+ + + + + +
+ +
+ + + + + +
+ +### Examples + +```c +#include "stdlib/blas/ext/base/dcusumpw.h" +#include + +int main( void ) { + // Create strided arrays: + const double x[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 }; + double y[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + + // Specify the number of elements: + const int N = 4; + + // Specify stride lengths: + const int strideX = 2; + const int strideY = -2; + + // Compute the cumulative sum: + stdlib_strided_dcusumpw( N, 0.0, x, strideX, y, strideY ); + + // Print the result: + for ( int i = 0; i < 8; i++ ) { + printf( "y[ %d ] = %lf\n", i, y[ i ] ); + } +} +``` + +
+ + + +
+ + +
## References diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.js index 672fd14498c..e85a79967f7 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.js @@ -21,8 +21,7 @@ // MODULES // var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var Float64Array = require( '@stdlib/array/float64' ); @@ -32,7 +31,9 @@ var dcusumpw = require( './../lib/dcusumpw.js' ); // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float64' +}; // FUNCTIONS // @@ -45,7 +46,7 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float64', rand ); + var x = uniform( len, -100, 100, options ); var y = new Float64Array( len ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.native.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.native.js index b2c8bcd5ec6..98a5a2629a7 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.native.js @@ -22,8 +22,7 @@ var resolve = require( 'path' ).resolve; var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var Float64Array = require( '@stdlib/array/float64' ); @@ -33,7 +32,9 @@ var pkg = require( './../package.json' ).name; // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float64' +}; var dcusumpw = tryRequire( resolve( __dirname, './../lib/dcusumpw.native.js' ) ); var opts = { 'skip': ( dcusumpw instanceof Error ) @@ -50,7 +51,7 @@ var opts = { * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float64', rand ); + var x = uniform( len, -100, 100, options ); var y = new Float64Array( len ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.js index 6119c2d3011..b98bdc47497 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.js @@ -21,8 +21,7 @@ // MODULES // var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var Float64Array = require( '@stdlib/array/float64' ); @@ -32,7 +31,9 @@ var dcusumpw = require( './../lib/ndarray.js' ); // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float64' +}; // FUNCTIONS // @@ -45,7 +46,7 @@ var rand = uniform( -10.0, 10.0 ); * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float64', rand ); + var x = uniform( len, -100, 100, options ); var y = new Float64Array( len ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.native.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.native.js index 466aef05f41..db79814788d 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/benchmark.ndarray.native.js @@ -22,8 +22,7 @@ var resolve = require( 'path' ).resolve; var bench = require( '@stdlib/bench' ); -var uniform = require( '@stdlib/random/base/uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var uniform = require( '@stdlib/random/array/uniform' ); var isnan = require( '@stdlib/math/base/assert/is-nan' ); var pow = require( '@stdlib/math/base/special/pow' ); var Float64Array = require( '@stdlib/array/float64' ); @@ -33,7 +32,9 @@ var pkg = require( './../package.json' ).name; // VARIABLES // -var rand = uniform( -10.0, 10.0 ); +var options = { + 'dtype': 'float64' +}; var dcusumpw = tryRequire( resolve( __dirname, './../lib/ndarray.native.js' ) ); var opts = { 'skip': ( dcusumpw instanceof Error ) @@ -50,7 +51,7 @@ var opts = { * @returns {Function} benchmark function */ function createBenchmark( len ) { - var x = filledarrayBy( len, 'float64', rand ); + var x = uniform( len, -100, 100, options ); var y = new Float64Array( len ); return benchmark; diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/c/benchmark.length.c b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/c/benchmark.length.c index 821307b2e10..13e609fb684 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/c/benchmark.length.c +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/benchmark/c/benchmark.length.c @@ -94,13 +94,16 @@ static double rand_double( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; - double x[ len ]; - double y[ len ]; + double *x; + double *y; double t; int i; + x = (double *)malloc( len * sizeof(double) ); + y = (double *)malloc( len * sizeof(double) ); + for ( i = 0; i < len; i++ ) { x[ i ] = ( rand_double() * 20000.0 ) - 10000.0; y[ i ] = 0.0; @@ -118,6 +121,47 @@ static double benchmark( int iterations, int len ) { if ( y[ len-1 ] != y[ len-1 ] ) { printf( "should not return NaN\n" ); } + free( x ); + free( y ); + return elapsed; +} + +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + double *x; + double *y; + double t; + int i; + + x = (double *)malloc( len * sizeof(double) ); + y = (double *)malloc( len * sizeof(double) ); + + for ( i = 0; i < len; i++ ) { + x[ i ] = ( rand_double() * 20000.0 ) - 10000.0; + y[ i ] = 0.0; + } + t = tic(); + for ( i = 0; i < iterations; i++ ) { + x[ 0 ] += 1.0; + stdlib_strided_dcusumpw_ndarray( len, 0.0, x, 1, 0, y, 1, 0 ); + if ( y[ 0 ] != y[ 0 ] ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( y[ len-1 ] != y[ len-1 ] ) { + printf( "should not return NaN\n" ); + } + free( x ); + free( y ); return elapsed; } @@ -143,7 +187,18 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + } + for ( i = MIN; i <= MAX; i++ ) { + len = pow( 10, i ); + iter = ITERATIONS / pow( 10, i-1 ); + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/docs/repl.txt b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/docs/repl.txt index c51084fb34b..fef83956712 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/docs/repl.txt +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/docs/repl.txt @@ -3,7 +3,7 @@ Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation. - The `N` and `stride` parameters determine which elements in the strided + The `N` and stride parameters determine which elements in the strided arrays are accessed at runtime. Indexing is relative to the first index. To introduce an offset, use a typed @@ -66,8 +66,8 @@ elements using pairwise summation and alternative indexing semantics. While typed array views mandate a view offset based on the underlying - buffer, the `offset` parameter supports indexing semantics based on a - starting index. + buffer, offset parameters support indexing semantics based on starting + indices. Parameters ---------- diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/c/example.c b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/c/example.c index 2cc20a53e79..c2178a2585a 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/c/example.c +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/c/example.c @@ -17,9 +17,7 @@ */ #include "stdlib/blas/ext/base/dcusumpw.h" -#include #include -#include int main( void ) { // Create strided arrays: @@ -27,17 +25,17 @@ int main( void ) { double y[] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; // Specify the number of elements: - const int64_t N = 4; + const int N = 4; // Specify stride lengths: - const int64_t strideX = 2; - const int64_t strideY = -2; + const int strideX = 2; + const int strideY = -2; // Compute the cumulative sum: stdlib_strided_dcusumpw( N, 0.0, x, strideX, y, strideY ); // Print the result: - for ( int64_t i = 0; i < 8; i++ ) { - printf( "y[ %"PRId64" ] = %lf\n", i, y[ i ] ); + for ( int i = 0; i < 8; i++ ) { + printf( "y[ %d ] = %lf\n", i, y[ i ] ); } } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/index.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/index.js index 478000ca3f8..c08332d22f7 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/index.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/examples/index.js @@ -18,12 +18,13 @@ 'use strict'; -var discreteUniform = require( '@stdlib/random/base/discrete-uniform' ).factory; -var filledarrayBy = require( '@stdlib/array/filled-by' ); +var discreteUniform = require( '@stdlib/random/array/discrete-uniform' ); var Float64Array = require( '@stdlib/array/float64' ); var dcusumpw = require( './../lib' ); -var x = filledarrayBy( 10, 'float64', discreteUniform( 0, 100 ) ); +var x = discreteUniform( 10, -100, 100, { + 'dtype': 'float64' +}); var y = new Float64Array( x.length ); console.log( x ); diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/include/stdlib/blas/ext/base/dcusumpw.h b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/include/stdlib/blas/ext/base/dcusumpw.h index f0103709984..65337e636f4 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/include/stdlib/blas/ext/base/dcusumpw.h +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/include/stdlib/blas/ext/base/dcusumpw.h @@ -19,7 +19,7 @@ #ifndef STDLIB_BLAS_EXT_BASE_DCUSUMPW_H #define STDLIB_BLAS_EXT_BASE_DCUSUMPW_H -#include +#include "stdlib/blas/base/shared.h" /* * If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler. @@ -31,7 +31,12 @@ extern "C" { /** * Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation. */ -void stdlib_strided_dcusumpw( const int64_t N, const double sum, const double *X, const int64_t strideX, double *Y, const int64_t strideY ); +void API_SUFFIX(stdlib_strided_dcusumpw)( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, double *Y, const CBLAS_INT strideY ); + +/** +* Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation and alternative indexing semantics. +*/ +void API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); #ifdef __cplusplus } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/dcusumpw.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/dcusumpw.js index d2ad1a7a483..3fcf53053b0 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/dcusumpw.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/dcusumpw.js @@ -20,7 +20,8 @@ // MODULES // -var cusum = require( './ndarray.js' ); +var stride2offset = require( '@stdlib/strided/base/stride2offset' ); +var ndarray = require( './ndarray.js' ); // MAIN // @@ -54,23 +55,8 @@ var cusum = require( './ndarray.js' ); * // returns [ 1.0, -1.0, 1.0 ] */ function dcusumpw( N, sum, x, strideX, y, strideY ) { - var ix; - var iy; - - if ( N <= 0 ) { - return y; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - return cusum( N, sum, x, strideX, ix, y, strideY, iy ); + ndarray( N, sum, x, strideX, stride2offset( N, strideX ), y, strideY, stride2offset( N, strideY ) ); // eslint-disable-line max-len + return y; } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/ndarray.native.js b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/ndarray.native.js index 4e689127267..61bda1aa498 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/ndarray.native.js +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided/base/offset-view' ); -var addon = require( './dcusumpw.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -50,14 +48,7 @@ var addon = require( './dcusumpw.native.js' ); * // returns [ 1.0, -1.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0 ] */ function dcusumpw( N, sum, x, strideX, offsetX, y, strideY, offsetY ) { - var viewX; - var viewY; - offsetX = minViewBufferIndex( N, strideX, offsetX ); - offsetY = minViewBufferIndex( N, strideY, offsetY ); - - viewX = offsetView( x, offsetX ); - viewY = offsetView( y, offsetY ); - addon( N, sum, viewX, strideX, viewY, strideY ); + addon.ndarray( N, sum, x, strideX, offsetX, y, strideY, offsetY ); return y; } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/manifest.json b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/manifest.json index 68f3a5646e2..0ec8bf37bc2 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/manifest.json +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/manifest.json @@ -28,50 +28,52 @@ { "task": "build", "src": [ - "./src/dcusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], "dependencies": [ "@stdlib/napi/argv", "@stdlib/napi/argv-int64", "@stdlib/napi/argv-double", "@stdlib/napi/argv-strided-float64array", - "@stdlib/napi/export" + "@stdlib/napi/export", + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, { "task": "benchmark", "src": [ - "./src/dcusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" + ] }, { "task": "examples", "src": [ - "./src/dcusumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" + ] } ] } diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/addon.c b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/addon.c index 2d37a4bfab2..ec136a456b1 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/addon.c +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/addon.c @@ -17,11 +17,13 @@ */ #include "stdlib/blas/ext/base/dcusumpw.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/napi/export.h" #include "stdlib/napi/argv.h" #include "stdlib/napi/argv_int64.h" #include "stdlib/napi/argv_double.h" #include "stdlib/napi/argv_strided_float64array.h" -#include "stdlib/napi/export.h" +#include /** * Receives JavaScript callback invocation data. @@ -38,9 +40,29 @@ static napi_value addon( napi_env env, napi_callback_info info ) { STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 5 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, X, N, strideX, argv, 2 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, Y, N, strideY, argv, 4 ); - stdlib_strided_dcusumpw( N, sum, X, strideX, Y, strideY ); + API_SUFFIX(stdlib_strided_dcusumpw)( N, sum, X, strideX, Y, strideY ); + return NULL; +} + +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 8 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_DOUBLE( env, sum, argv, 1 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 4 ); + STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 6 ); + STDLIB_NAPI_ARGV_INT64( env, offsetY, argv, 7 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, X, N, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY( env, Y, N, strideY, argv, 5 ); + API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( N, sum, X, strideX, offsetX, Y, strideY, offsetY ); return NULL; } - // Register a Node-API module: -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ); +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ) diff --git a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/dcusumpw.c b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/main.c similarity index 54% rename from lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/dcusumpw.c rename to lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/main.c index 06c0e5692f8..fc64d0d8642 100644 --- a/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/dcusumpw.c +++ b/lib/node_modules/@stdlib/blas/ext/base/dcusumpw/src/main.c @@ -17,11 +17,28 @@ */ #include "stdlib/blas/ext/base/dcusumpw.h" -#include +#include "stdlib/strided/base/stride2offset.h" +#include "stdlib/blas/base/shared.h" /** * Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation. * +* @param N number of indexed elements +* @param sum initial sum +* @param X input array +* @param strideX X stride length +* @param Y output array +* @param strideY Y stride length +*/ +void API_SUFFIX(stdlib_strided_dcusumpw)( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, double *Y, const CBLAS_INT strideY ) { + const CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + const CBLAS_INT oy = stdlib_strided_stride2offset( N, strideY ); + API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( N, sum, X, strideX, ox, Y, strideY, oy ); +} + +/** +* Computes the cumulative sum of double-precision floating-point strided array elements using pairwise summation and indexing semantics. +* * ## Method * * - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. @@ -33,34 +50,24 @@ * @param N number of indexed elements * @param sum initial sum * @param X input array -* @param strideX X stride length +* @param strideX X index increment +* @param offsetX X starting index * @param Y output array -* @param strideY Y stride length +* @param strideY Y index increment +* @param offsetY Y starting index */ -void stdlib_strided_dcusumpw( const int64_t N, const double sum, const double *X, const int64_t strideX, double *Y, const int64_t strideY ) { - double *xp1; - double *xp2; - double *yp1; - double *yp2; - int64_t ix; - int64_t iy; - int64_t i; - int64_t n; +void API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( const CBLAS_INT N, const double sum, const double *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, double *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + CBLAS_INT ix; + CBLAS_INT iy; + CBLAS_INT i; + CBLAS_INT n; double s; if ( N <= 0 ) { return; } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } + ix = offsetX; + iy = offsetY; // Blocksize for pairwise summation... if ( N <= 128 ) { s = 0.0; @@ -73,22 +80,8 @@ void stdlib_strided_dcusumpw( const int64_t N, const double sum, const double *X return; } n = N / 2; - if ( strideX < 0 ) { - xp1 = (double *)X + ( (n-N)*strideX ); - xp2 = (double *)X; - } else { - xp1 = (double *)X; - xp2 = (double *)X + ( n*strideX ); - } - if ( strideY < 0 ) { - yp1 = Y + ( (n-N)*strideY ); - yp2 = Y; - } else { - yp1 = Y; - yp2 = Y + ( n*strideY ); - } - stdlib_strided_dcusumpw( n, sum, xp1, strideX, yp1, strideY ); + API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( n, sum, X, strideX, ix, Y, strideY, iy ); iy += (n-1) * strideY; - stdlib_strided_dcusumpw( N-n, Y[ iy ], xp2, strideX, yp2, strideY ); + API_SUFFIX(stdlib_strided_dcusumpw_ndarray)( N-n, Y[ iy ], X, strideX, ix+(n*strideX), Y, strideY, iy+strideY ); return; }