Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding explicit batching #107

Merged
merged 9 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## 0.12.0 [unreleased]

### Features

1. [#107](https://github.com/InfluxCommunity/influxdb3-go/pull/107): Add `Batcher` to simplify the process of writing data in batches.

## 0.11.0 [2024-09-27]

### Bug Fixes
Expand Down
8 changes: 4 additions & 4 deletions examples/IOx/iox.go → examples/Basic/basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
"os"
"time"

"github.com/apache/arrow/go/v15/arrow"
"github.com/InfluxCommunity/influxdb3-go/influxdb3"
"github.com/apache/arrow/go/v15/arrow"
)

func main() {
Expand Down Expand Up @@ -37,7 +37,7 @@ func main() {
// Create a Point using the full params constructor.
p := influxdb3.NewPoint("stat",
map[string]string{"location": "Paris"},
map[string]interface{}{
map[string]any{
"temperature": 24.5,
"humidity": 40,
},
Expand All @@ -49,7 +49,7 @@ func main() {
panic(err)
}

// Create a Point using the fluent interface (method chaining).
// Create a Point using the fluent interface (method chaining).
p = influxdb3.NewPointWithMeasurement("stat").
SetTag("location", "London").
SetField("temperature", 17.1).
Expand All @@ -71,7 +71,7 @@ func main() {
Time time.Time `lp:"timestamp"`
}{"stat", "Madrid", 33.8, 35, time.Now()}

// Write the data.
// Write the data.
err = client.WriteData(context.Background(), []any{sensorData})
if err != nil {
panic(err)
Expand Down
148 changes: 148 additions & 0 deletions examples/Batching/batching.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package main

import (
"context"
"fmt"
"math/rand"
"os"
"text/tabwriter"
"time"

"github.com/InfluxCommunity/influxdb3-go/influxdb3"
"github.com/InfluxCommunity/influxdb3-go/influxdb3/batching"
"github.com/apache/arrow/go/v15/arrow"
)

const NumPoints = 54

func main() {
// Create a random number generator
r := rand.New(rand.NewSource(456))
// Retrieve credentials from environment variables.
url := os.Getenv("INFLUX_URL")
token := os.Getenv("INFLUX_TOKEN")
database := os.Getenv("INFLUX_DATABASE")

// Instantiate a client using your credentials.
client, err := influxdb3.New(influxdb3.ClientConfig{
Host: url,
Token: token,
Database: database,
})
if err != nil {
panic(err)
}

// Close the client when finished and raise any errors.
defer func(client *influxdb3.Client) {
err := client.Close()
if err != nil {
panic(err)
}
}(client)

// Synchronous use

// Create a Batcher with a size of 5
b := batching.NewBatcher(batching.WithSize(5))

// Simulate delay of a second
t := time.Now().Add(-NumPoints * time.Second)

// Write points synchronously to the batcher
for range NumPoints {
p := influxdb3.NewPoint("stat",
map[string]string{"location": "Paris"},
map[string]any{
"temperature": 15 + r.Float64()*20,
"humidity": 30 + r.Int63n(40),
},
t)

// Add the point to the batcher
b.Add(p)

// Update time
t = t.Add(time.Second)

// If the batcher is ready, write the batch to the client and reset the batcher
if b.Ready() {
err := client.WritePoints(context.Background(), b.Emit())
if err != nil {
panic(err)
}
}
}

// Write the final batch to the client
err = client.WritePoints(context.Background(), b.Emit())
if err != nil {
panic(err)
}

// Asynchronous use

// Create a batcher with a size of 5, a ready callback and an emit callback to write the batch to the client
b = batching.NewBatcher(
batching.WithSize(5),
batching.WithReadyCallback(func() { fmt.Println("-- ready --") }),
batching.WithEmitCallback(func(points []*influxdb3.Point) {
err = client.WritePoints(context.Background(), points)
if err != nil {
panic(err)
}
}),
)

// Simulate delay of a second
t = time.Now().Add(-NumPoints * time.Second)

// Write points synchronously to the batcher
for range NumPoints {
p := influxdb3.NewPoint("stat",
map[string]string{"location": "Madrid"},
map[string]any{
"temperature": 15 + r.Float64()*20,
"humidity": 30 + r.Int63n(40),
},
t)

// Add the point to the batcher
b.Add(p)

// Update time
t = t.Add(time.Second)
}

// Write the final batch to the client
err = client.WritePoints(context.Background(), b.Emit())
if err != nil {
panic(err)
}

// Prepare an SQL query
query := `
SELECT *
FROM stat
WHERE time >= now() - interval '5 minutes'
AND location IN ('Paris', 'Madrid')
`

// Run the query
iterator, err := client.Query(context.Background(), query)
if err != nil {
panic(err)
}

// Use a tabwriter to format the output
w := tabwriter.NewWriter(os.Stdout, 1, 1, 1, ' ', 0)
defer w.Flush()

fmt.Fprintln(w, "\nTime\tLocation\tTemperature\tHumidity")
// Process the data
for iterator.Next() {
value := iterator.Value()
t := (value["time"].(arrow.Timestamp)).ToTime(arrow.Nanosecond).Format(time.RFC3339)
fmt.Fprintf(w, "%v\t%s\t%.1f\t%d\n", t, value["location"], value["temperature"], value["humidity"])
}
}
4 changes: 3 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Examples

- [Write and query data](IOx/iox.go) - A complete Go example that demonstrates the different ways of writing data, and then queries your data stored in InfluxDB v3 (formerly InfluxDB IOx).
- [Write and query data](Basic/basic.go) - A complete Go example that demonstrates the different ways of writing data, and then queries your data stored in InfluxDB v3 (formerly InfluxDB IOx).
- [Downsampling](Downsampling/downsampling.go) - A complete Go example that uses a downsampling query and then writes downsampled data back to a different table.
- [HTTP Error Handling](HTTPErrorHandled/httpErrorHandled.go) - A complete Go example for reading HTTP headers in case of an server error occurs.
- [Batching write](Batching/batching.go) - A complete Go example that demonstrates how to write data in batches.
153 changes: 153 additions & 0 deletions influxdb3/batching/batcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
The MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

// Package batching provides a batcher to collect points and emit them as batches.
package batching

import (
"sync"

"github.com/InfluxCommunity/influxdb3-go/influxdb3"
)

// Option to adapt properties of a batcher
type Option func(*Batcher)

// WithSize changes the batch-size emitted by the batcher
func WithSize(size int) Option {
return func(b *Batcher) {
b.size = size
}
}

// WithCapacity changes the initial capacity of the points buffer
func WithCapacity(capacity int) Option {
return func(b *Batcher) {
b.capacity = capacity
}
}

// WithReadyCallback sets the function called when a new batch is ready. The
// batcher will wait for the callback to finish, so please return as fast as
// possible and move long-running processing to a go-routine.
func WithReadyCallback(f func()) Option {
return func(b *Batcher) {
b.callbackReady = f
}
}

// WithEmitCallback sets the function called when a new batch is ready with the
// batch of points. The batcher will wait for the callback to finish, so please
// return as fast as possible and move long-running processing to a go-routine.
func WithEmitCallback(f func([]*influxdb3.Point)) Option {
return func(b *Batcher) {
b.callbackEmit = f
}
}

// DefaultBatchSize is the default number of points emitted
const DefaultBatchSize = 1000

// DefaultCapacity is the default initial capacity of the point buffer
const DefaultCapacity = 2 * DefaultBatchSize

// Batcher collects points and emits them as batches
type Batcher struct {
size int
capacity int

callbackReady func()
callbackEmit func([]*influxdb3.Point)

points []*influxdb3.Point
sync.Mutex
}

// NewBatcher creates and initializes a new Batcher instance applying the
// specified options. By default, a batch-size is DefaultBatchSize and the
// initial capacity is DefaultCapacity.
func NewBatcher(options ...Option) *Batcher {
// Set up a batcher with the default values
b := &Batcher{
size: DefaultBatchSize,
capacity: DefaultCapacity,
}

// Apply the options
for _, o := range options {
o(b)
}

// Setup the internal data
b.points = make([]*influxdb3.Point, 0, b.capacity)

return b
}

// Add a metric to the batcher and call the given callbacks if any
func (b *Batcher) Add(p *influxdb3.Point) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering if this could be a variadic function, so that []*influxdb3.Point could be used as an argument as well, or whether we might want o support adding []*influxdb3.Point in another function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Variadic parameters look good.

b.Lock()
defer b.Unlock()

// Add the point
b.points = append(b.points, p)

// Call callbacks if a new batch is ready
if b.isReady() {
if b.callbackReady != nil {
b.callbackReady()
}
if b.callbackEmit != nil {
b.callbackEmit(b.emitPoints())
}
}
}

// Ready tells the call if a new batch is ready to be emitted
func (b *Batcher) Ready() bool {
b.Lock()
defer b.Unlock()
return b.isReady()
}

func (b *Batcher) isReady() bool {
return len(b.points) >= b.size
}

// Emit returns a new batch of points with the provided batch size or with the
// remaining points. Please drain the points at the end of your processing to
// get the remaining points not filling up a batch.
func (b *Batcher) Emit() []*influxdb3.Point {
b.Lock()
defer b.Unlock()

return b.emitPoints()
}

func (b *Batcher) emitPoints() []*influxdb3.Point {
l := min(b.size, len(b.points))

points := b.points[:l]
b.points = b.points[l:]

return points
}
Loading