Skip to content

Commit

Permalink
Further performance improvements for FP math (#905)
Browse files Browse the repository at this point in the history
* Further performance improvements for FP math

More FDIV->FMUL opportunities unlocked, following in the
spirit of #852

* Formatting fix

* Update src/h3lib/lib/localij.c

Co-authored-by: Nick Rabinowitz <[email protected]>

* Add #905 to CHANGELOG.md

* Save one fdiv and maybe a cosine

---------

Co-authored-by: Nick Rabinowitz <[email protected]>
  • Loading branch information
heshpdx and nrabinowitz authored Sep 19, 2024
1 parent a1037eb commit 4899d29
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 31 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The public API of this library consists of the functions declared in file
### Changed
- Replace internal algorithm for `polygonToCells` with a new version that is more memory-efficient (#785)
- Reorganize tests into public / internal. (#762)
- Performance enhancement for aarch64, should not affect other platforms (#790, #792, #852)
- Performance enhancement for aarch64, may improve other platforms (#790, #792, #852, #905)
- `clang-format` upgraded to version 14. (#834)
- Fixed tests that incorrectly did not test resolution 15. (#820)
- Use `CMAKE_INSTALL_LIBDIR` when choosing where to install library files. (#819)
Expand Down
3 changes: 3 additions & 0 deletions src/h3lib/include/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
/** one third **/
#define M_ONETHIRD 0.333333333333333333333333333333333333333

/** one seventh (1/7) **/
#define M_ONESEVENTH 0.14285714285714285714285714285714285

/** rotation angle between Class II and Class III resolution axes
* (asin(sqrt(3.0 / 28.0))) */
#define M_AP7_ROT_RADS 0.333473172251832115336090755351601070065900389
Expand Down
10 changes: 5 additions & 5 deletions src/h3lib/lib/bbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ bool bboxIsTransmeridian(const BBox *bbox) { return bbox->east < bbox->west; }
* @param center Output center coordinate
*/
void bboxCenter(const BBox *bbox, LatLng *center) {
center->lat = (bbox->north + bbox->south) / 2.0;
center->lat = (bbox->north + bbox->south) * 0.5;
// If the bbox crosses the antimeridian, shift east 360 degrees
double east = bboxIsTransmeridian(bbox) ? bbox->east + M_2PI : bbox->east;
center->lng = constrainLng((east + bbox->west) / 2.0);
center->lng = constrainLng((east + bbox->west) * 0.5);
}

/**
Expand Down Expand Up @@ -265,8 +265,8 @@ H3Error lineHexEstimate(const LatLng *origin, const LatLng *destination,
void scaleBBox(BBox *bbox, double scale) {
double width = bboxWidthRads(bbox);
double height = bboxHeightRads(bbox);
double widthBuffer = (width * scale - width) / 2;
double heightBuffer = (height * scale - height) / 2;
double widthBuffer = (width * scale - width) * 0.5;
double heightBuffer = (height * scale - height) * 0.5;
// Scale north and south, clamping to latitude domain
bbox->north += heightBuffer;
if (bbox->north > M_PI_2) bbox->north = M_PI_2;
Expand Down Expand Up @@ -308,4 +308,4 @@ void bboxNormalization(const BBox *a, const BBox *b,
: aIsTransmeridian ? NORMALIZE_EAST
: aToBTrendsEast ? NORMALIZE_WEST
: NORMALIZE_EAST;
}
}
16 changes: 8 additions & 8 deletions src/h3lib/lib/coordijk.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,8 @@ H3Error _upAp7Checked(CoordIJK *ijk) {
}
}

ijk->i = (int)lround(((i * 3) - j) / 7.0);
ijk->j = (int)lround((i + (j * 2)) / 7.0);
ijk->i = (int)lround(((i * 3) - j) * M_ONESEVENTH);
ijk->j = (int)lround((i + (j * 2)) * M_ONESEVENTH);
ijk->k = 0;

// Expected not to be reachable, because max + min or max - min would need
Expand Down Expand Up @@ -393,8 +393,8 @@ H3Error _upAp7rChecked(CoordIJK *ijk) {
}
}

ijk->i = (int)lround(((i * 2) + j) / 7.0);
ijk->j = (int)lround(((j * 3) - i) / 7.0);
ijk->i = (int)lround(((i * 2) + j) * M_ONESEVENTH);
ijk->j = (int)lround(((j * 3) - i) * M_ONESEVENTH);
ijk->k = 0;

// Expected not to be reachable, because max + min or max - min would need
Expand All @@ -417,8 +417,8 @@ void _upAp7(CoordIJK *ijk) {
int i = ijk->i - ijk->k;
int j = ijk->j - ijk->k;

ijk->i = (int)lround((3 * i - j) / 7.0);
ijk->j = (int)lround((i + 2 * j) / 7.0);
ijk->i = (int)lround((3 * i - j) * M_ONESEVENTH);
ijk->j = (int)lround((i + 2 * j) * M_ONESEVENTH);
ijk->k = 0;
_ijkNormalize(ijk);
}
Expand All @@ -434,8 +434,8 @@ void _upAp7r(CoordIJK *ijk) {
int i = ijk->i - ijk->k;
int j = ijk->j - ijk->k;

ijk->i = (int)lround((2 * i + j) / 7.0);
ijk->j = (int)lround((3 * j - i) / 7.0);
ijk->i = (int)lround((2 * i + j) * M_ONESEVENTH);
ijk->j = (int)lround((3 * j - i) * M_ONESEVENTH);
ijk->k = 0;
_ijkNormalize(ijk);
}
Expand Down
4 changes: 2 additions & 2 deletions src/h3lib/lib/faceijk.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ void _geoToHex2d(const LatLng *g, int res, int *face, Vec2d *v) {
_geoToClosestFace(g, face, &sqd);

// cos(r) = 1 - 2 * sin^2(r/2) = 1 - 2 * (sqd / 4) = 1 - sqd/2
double r = acos(1 - sqd / 2);
double r = acos(1 - sqd * 0.5);

if (r < EPSILON) {
v->x = v->y = 0.0;
Expand All @@ -413,7 +413,7 @@ void _geoToHex2d(const LatLng *g, int res, int *face, Vec2d *v) {
r = tan(r);

// scale for current resolution length u
r /= RES0_U_GNOMONIC;
r *= INV_RES0_U_GNOMONIC;
for (int i = 0; i < res; i++) r *= M_SQRT7;

// we now have (r, theta) in hex2d with theta ccw from x-axes
Expand Down
19 changes: 10 additions & 9 deletions src/h3lib/lib/latLng.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ double normalizeLng(const double lng,
* @return the great circle distance in radians between a and b
*/
double H3_EXPORT(greatCircleDistanceRads)(const LatLng *a, const LatLng *b) {
double sinLat = sin((b->lat - a->lat) / 2.0);
double sinLng = sin((b->lng - a->lng) / 2.0);
double sinLat = sin((b->lat - a->lat) * 0.5);
double sinLng = sin((b->lng - a->lng) * 0.5);

double A = sinLat * sinLat + cos(a->lat) * cos(b->lat) * sinLng * sinLng;

Expand Down Expand Up @@ -258,9 +258,10 @@ void _geoAzDistanceRads(const LatLng *p1, double az, double distance,
p2->lat = -M_PI_2;
p2->lng = 0.0;
} else {
sinlng = sin(az) * sin(distance) / cos(p2->lat);
double invcosp2lat = 1.0 / cos(p2->lat);
sinlng = sin(az) * sin(distance) * invcosp2lat;
coslng = (cos(distance) - sin(p1->lat) * sin(p2->lat)) /
cos(p1->lat) / cos(p2->lat);
cos(p1->lat) * invcosp2lat;
if (sinlng > 1.0) sinlng = 1.0;
if (sinlng < -1.0) sinlng = -1.0;
if (coslng > 1.0) coslng = 1.0;
Expand Down Expand Up @@ -355,12 +356,12 @@ H3Error H3_EXPORT(getNumCells)(int res, int64_t *out) {
* @return area in radians^2 of triangle on unit sphere
*/
double triangleEdgeLengthsToArea(double a, double b, double c) {
double s = (a + b + c) / 2;
double s = (a + b + c) * 0.5;

a = (s - a) / 2;
b = (s - b) / 2;
c = (s - c) / 2;
s = s / 2;
a = (s - a) * 0.5;
b = (s - b) * 0.5;
c = (s - c) * 0.5;
s = s * 0.5;

return 4 * atan(sqrt(tan(s) * tan(a) * tan(b) * tan(c)));
}
Expand Down
10 changes: 4 additions & 6 deletions src/h3lib/lib/localij.c
Original file line number Diff line number Diff line change
Expand Up @@ -702,12 +702,10 @@ H3Error H3_EXPORT(gridPathCells)(H3Index start, H3Index end, H3Index *out) {
ijkToCube(&startIjk);
ijkToCube(&endIjk);

double iStep =
distance ? (double)(endIjk.i - startIjk.i) / (double)distance : 0;
double jStep =
distance ? (double)(endIjk.j - startIjk.j) / (double)distance : 0;
double kStep =
distance ? (double)(endIjk.k - startIjk.k) / (double)distance : 0;
double invDistance = distance ? 1.0 / (double)distance : 0;
double iStep = (double)(endIjk.i - startIjk.i) * invDistance;
double jStep = (double)(endIjk.j - startIjk.j) * invDistance;
double kStep = (double)(endIjk.k - startIjk.k) * invDistance;

CoordIJK currentIjk = {startIjk.i, startIjk.j, startIjk.k};
for (int64_t n = 0; n <= distance; n++) {
Expand Down

0 comments on commit 4899d29

Please sign in to comment.