Skip to content

Commit

Permalink
Fix concurrent map write fatal error.
Browse files Browse the repository at this point in the history
Occurred on macOS due to use of a static map that got shared among all
reporters, but revealed a badly designed constructor for the reporter
which is refactored in this commit.

The fatal error that is fixed:

    fatal error: concurrent map writes

    goroutine 42 [running]:
    github.com/telepresenceio/telepresence/v2/pkg/client/scout.getInstallIDFromFilesystem({0x1061f0b68, 0xc0005edbc0}, 0xc0001f13b0, {0x1056c9d01, 0x3})
    	github.com/telepresenceio/telepresence/v2/pkg/client/scout/reporter.go:149 +0x7e5
    github.com/telepresenceio/telepresence/v2/pkg/client/scout.NewReporterForInstallType.func1(0xc0001f13b0)
    	github.com/telepresenceio/telepresence/v2/pkg/client/scout/reporter.go:186 +0x32

Signed-off-by: Thomas Hallgren <[email protected]>
  • Loading branch information
thallgren committed Nov 25, 2023
1 parent fa73ab7 commit 8cf1fa2
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 104 deletions.
17 changes: 13 additions & 4 deletions CHANGELOG.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,30 @@ items:
notes:
- type: bugfix
title: The DNS search path on Windows is now restored when Telepresence quits
body: The DNS search path that Telepresence uses to simulate the DNS lookup functionality in the connected
body: >-
The DNS search path that Telepresence uses to simulate the DNS lookup functionality in the connected
cluster namespace was not removed by a <code>telepresence quit</code>, resulting in connectivity problems
from the workstation. Telepresence will now remove the entries that it has added to the search list when
it quits.
- type: bugfix
title: The user-daemon would sometimes get killed when used by multiple simultaneous CLI clients.
body: >-
The user-daemon would die with a fatal &quot;fatal error: concurrent map writes&quot; error in the
<code>connector.log</code>, effectively killing the ongoing connection.
- type: bugfix
title: Multiple services ports using the same target port would not get intercepted correctly.
body: Intercepts didn't work when multiple service ports were using the same container port. Telepresence would
body: >-
Intercepts didn't work when multiple service ports were using the same container port. Telepresence would
think that one of the ports wasn't intercepted and therefore disable the intercept of the container port.
- type: bugfix
title: Root daemon refuses to disconnect.
body: The root daemon would sometimes hang forever when attempting to disconnect due to a deadlock in
body: >-
The root daemon would sometimes hang forever when attempting to disconnect due to a deadlock in
the VIF-device.
- type: bugfix
title: Fix panic in user daemon when traffic-manager was unreachable
body: The user daemon would panic if the traffic-manager was unreachable. It will now instead report
body: >-
The user daemon would panic if the traffic-manager was unreachable. It will now instead report
a proper error to the client.
- type: change
title: Removal of backward support for versions predating 2.6.0
Expand Down
2 changes: 1 addition & 1 deletion integration_test/helm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (s *helmSuite) Test_HelmMultipleInstalls() {
s.Contains(stdout, "Connected to context")
s.Eventually(func() bool {
return itest.Run(ctx, "curl", "--silent", "--connect-timeout", "1", fmt.Sprintf("%s.%s", svc, s.appSpace2)) == nil
}, 7*time.Second, 1*time.Second)
}, 15*time.Second, 3*time.Second)
})

s.Run("Can intercept", func() {
Expand Down
11 changes: 4 additions & 7 deletions pkg/client/scout/os_metadata_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ import (
"github.com/datawire/dlib/dlog"
)

func getOsMetadata(ctx context.Context) map[string]any {
osMeta := map[string]any{
"os_version": "unknown",
"os_build_version": "unknown",
"os_name": "unknown",
}
func setOsMetadata(ctx context.Context, osMeta map[string]any) {
osMeta["os_version"] = "unknown"
osMeta["os_build_version"] = "unknown"
osMeta["os_name"] = "unknown"
cmd := dexec.CommandContext(ctx, "sw_vers")
cmd.DisableLogging = true
if r, err := cmd.Output(); err != nil {
Expand All @@ -36,5 +34,4 @@ func getOsMetadata(ctx context.Context) map[string]any {
}
}
}
return osMeta
}
8 changes: 3 additions & 5 deletions pkg/client/scout/os_metadata_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ func isWSL(ctx context.Context) bool {
return strings.Contains(v, "WSL") || strings.Contains(v, "Windows")
}

func getOsMetadata(ctx context.Context) map[string]any {
osMeta := map[string]any{}
func setOsMetadata(ctx context.Context, osMeta map[string]any) {
osMeta["os_docker"] = isDocker(ctx)
osMeta["os_wsl"] = isWSL(ctx)
f, err := os.Open("/etc/os-release")
Expand All @@ -38,7 +37,7 @@ func getOsMetadata(ctx context.Context) map[string]any {
}
if err != nil {
dlog.Warnf(ctx, "Unable to open /etc/os-release or /usr/lib/os-release: %v", err)
return osMeta
return
}
scanner := bufio.NewScanner(f)
osRelease := map[string]string{}
Expand All @@ -49,7 +48,7 @@ func getOsMetadata(ctx context.Context) map[string]any {
}
if err := scanner.Err(); err != nil {
dlog.Warnf(ctx, "Unable to scan contents of /etc/os-release: %v", err)
return osMeta
return
}
// Different Linuxes will report things in different ways, so this will scan the
// contents of osRelease and look for each of the different keys that a value might be under
Expand All @@ -65,5 +64,4 @@ func getOsMetadata(ctx context.Context) map[string]any {
osMeta["os_name"] = getFromOSRelease("ID", "NAME")
osMeta["os_version"] = getFromOSRelease("VERSION", "VERSION_ID")
osMeta["os_build_version"] = getFromOSRelease("BUILD_ID")
return osMeta
}
3 changes: 2 additions & 1 deletion pkg/client/scout/os_metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import (

func TestOsMetadata(t *testing.T) {
ctx := dlog.NewTestContext(t, false)
osMeta := getOsMetadata(ctx)
osMeta := make(map[string]any)
setOsMetadata(ctx, osMeta)
for _, k := range []string{"os_version", "os_name"} {
v := osMeta[k]
if v == "" || v == "unknown" {
Expand Down
6 changes: 2 additions & 4 deletions pkg/client/scout/os_metadata_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ import (
"github.com/telepresenceio/telepresence/v2/pkg/proc"
)

func getOsMetadata(ctx context.Context) map[string]any {
func setOsMetadata(ctx context.Context, osMeta map[string]any) {
cmd := proc.CommandContext(ctx, "wmic", "os", "get", "Caption,Version,BuildNumber", "/value")
cmd.DisableLogging = true
r, err := cmd.Output()
osMeta := map[string]any{}
if err != nil {
dlog.Warnf(ctx, "Error running wmic: %v", err)
return osMeta
return
}
scanner := bufio.NewScanner(bytes.NewReader(r))
for scanner.Scan() {
Expand All @@ -42,5 +41,4 @@ func getOsMetadata(ctx context.Context) map[string]any {
if err := scanner.Err(); err != nil {
dlog.Warnf(ctx, "Unable to scan wmic output: %v", err)
}
return osMeta
}
96 changes: 45 additions & 51 deletions pkg/client/scout/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ var idFiles = map[InstallType]string{ //nolint:gochecknoglobals // constant
Docker: "docker_id",
}

// getInstallIDFromFilesystem returns the telepresence install ID, and also sets the reporter base
// metadata to include any conflicting install IDs written by old versions of the product.
func getInstallIDFromFilesystem(ctx context.Context, reporter *metriton.Reporter, installType InstallType) (string, error) {
// setInstallIDFromFilesystem sets the telepresence install ID in the given map, including any conflicting
// install IDs written by old versions of the product.
//
//nolint:gochecknoglobals // can be overridden for test purposes
var setInstallIDFromFilesystem = func(ctx context.Context, installType InstallType, md map[string]any) (string, error) {
type filecacheEntry struct {
Body string
Err error
Expand Down Expand Up @@ -146,7 +148,7 @@ func getInstallIDFromFilesystem(ctx context.Context, reporter *metriton.Reporter
}
}

reporter.BaseMetadata["new_install"] = len(allIDs) == 0
md["new_install"] = len(allIDs) == 0

// We don't want to add the extra ids until we've decided if it's a new install or not
// this is because we'd like a new install of type A to be reported even if there's already
Expand All @@ -167,7 +169,7 @@ func getInstallIDFromFilesystem(ctx context.Context, reporter *metriton.Reporter

for product, id := range allIDs {
if id != retID {
reporter.BaseMetadata["install_id_"+product] = id
md["install_id_"+product] = id
}
}
return retID, nil
Expand All @@ -178,23 +180,28 @@ func getInstallIDFromFilesystem(ctx context.Context, reporter *metriton.Reporter
const bufferSize = 40

func NewReporterForInstallType(ctx context.Context, mode string, installType InstallType, reportAnnotators []ReportAnnotator, reportMutators []ReportMutator) Reporter {
r := &reporter{
reporter: &metriton.Reporter{
Application: "telepresence2",
Version: client.Version(),
GetInstallID: func(r *metriton.Reporter) (string, error) {
id, err := getInstallIDFromFilesystem(ctx, r, installType)
if err != nil {
id = "00000000-0000-0000-0000-000000000000"
r.BaseMetadata["new_install"] = true
r.BaseMetadata["install_id_error"] = err.Error()
}
return id, nil
},
},
reportAnnotators: reportAnnotators,
reportMutators: reportMutators,
md := make(map[string]any, 12)
setOsMetadata(ctx, md)
installID, err := setInstallIDFromFilesystem(ctx, installType, md)
if err != nil {
installID = "00000000-0000-0000-0000-000000000000"
md["new_install"] = true
md["install_id_error"] = err.Error()
}
// Fixed (growing) metadata passed with every report
md["mode"] = mode
md["trace_id"] = uuid.NewString() // It's sent as JSON so might as well convert it to a string once here.
md["goos"] = runtime.GOOS
md["goarch"] = runtime.GOARCH

// Discover how Telepresence was installed based on the binary's location
installMethod, err := client.GetInstallMechanism()
if err != nil {
dlog.Errorf(ctx, "scout error getting executable: %s", err)
}
md["install_method"] = installMethod
setDefaultEnvironmentMetadata(md)

if env := client.GetEnv(ctx); env != nil && !env.ScoutDisable {
// Some tests disable scout reporting by setting the host IP to 127.0.0.1. This spams
// the logs with lots of "connection refused" messages and makes them hard to read.
Expand All @@ -208,8 +215,21 @@ func NewReporterForInstallType(ctx context.Context, mode string, installType Ins
}
}
}
r.initialize(ctx, mode, runtime.GOOS, runtime.GOARCH)
return r

return &reporter{
reporter: &metriton.Reporter{
Application: "telepresence2",
Version: client.Version(),
GetInstallID: func(r *metriton.Reporter) (string, error) {
return installID, nil
},
BaseMetadata: md,
},
reportAnnotators: reportAnnotators,
reportMutators: reportMutators,
buffer: make(chan bufEntry, bufferSize),
done: make(chan struct{}),
}
}

// DefaultReportAnnotators are the default annotator functions that the NewReporter function will pass to NewReporterForInstallType.
Expand Down Expand Up @@ -267,30 +287,6 @@ func SetMetadatum(ctx context.Context, key string, value any) {
}
}

// initialization broken out or constructor for the benefit of testing.
func (r *reporter) initialize(ctx context.Context, mode, goos, goarch string) {
r.buffer = make(chan bufEntry, bufferSize)
r.done = make(chan struct{})

// Fixed (growing) metadata passed with every report
baseMeta := getOsMetadata(ctx)
baseMeta["mode"] = mode
baseMeta["trace_id"] = uuid.NewString() // It's sent as JSON so might as well convert it to a string once here.
baseMeta["goos"] = goos
baseMeta["goarch"] = goarch

// Discover how Telepresence was installed based on the binary's location
installMethod, err := client.GetInstallMechanism()
if err != nil {
dlog.Errorf(ctx, "scout error getting executable: %s", err)
}
baseMeta["install_method"] = installMethod
for k, v := range getDefaultEnvironmentMetadata() {
baseMeta[k] = v
}
r.reporter.BaseMetadata = baseMeta
}

func (r *reporter) InstallID() string {
return r.reporter.InstallID()
}
Expand Down Expand Up @@ -399,15 +395,13 @@ func (r *reporter) doReport(ctx context.Context, be *bufEntry) {
}
}

// Returns a metadata map containing all the additional environment variables to be reported.
func getDefaultEnvironmentMetadata() map[string]string {
metadata := map[string]string{}
// setDefaultEnvironmentMetadata sets all the additional environment variables to be reported.
func setDefaultEnvironmentMetadata(metadata map[string]any) {
for _, e := range os.Environ() {
pair := strings.SplitN(e, "=", 2)
if strings.HasPrefix(pair[0], EnvironmentMetadataPrefix) {
key := strings.ToLower(strings.TrimPrefix(pair[0], EnvironmentMetadataPrefix))
metadata[key] = pair[1]
}
}
return metadata
}
51 changes: 20 additions & 31 deletions pkg/client/scout/reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,9 @@ func TestInstallID(t *testing.T) {

func TestReport(t *testing.T) {
const (
mockVersion = "v2.4.5-test"
mockApplication = "telepresence2"
mockInstallID = "00000000-1111-2222-3333-444444444444"
mockMode = "test-mode"
mockOS = "linux"
mockARCH = "amd64"
mockAction = "test-action"
mockInstallID = "00000000-1111-2222-3333-444444444444"
mockMode = "test-mode"
mockAction = "test-action"
)
type testcase struct {
InputEnv map[string]string
Expand All @@ -353,8 +349,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": mockAction,
"mode": mockMode,
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
},
},
"with-additional-scout-meta": {
Expand All @@ -371,8 +367,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": mockAction,
"mode": mockMode,
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
"extra_field_1": "extra value 1",
"extra_field_2": "extra value 2",
},
Expand All @@ -385,8 +381,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": mockAction,
"mode": mockMode,
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
"extra_field_1": "extra value 1",
"extra_field_2": "extra value 2",
},
Expand All @@ -405,8 +401,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": mockAction,
"mode": mockMode,
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
"extra_field_1": "extra value 1",
},
},
Expand All @@ -420,8 +416,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": mockAction,
"mode": "overridden mode",
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
},
},
"with-report-annotators": {
Expand All @@ -445,8 +441,8 @@ func TestReport(t *testing.T) {
ExpectedMetadata: map[string]any{
"action": "overridden action",
"mode": "overridden mode",
"goos": mockOS,
"goarch": mockARCH,
"goos": runtime.GOOS,
"goarch": runtime.GOARCH,
"extra_field": "extra value", // Not overridden by annotation
"annotation": "annotated value",
},
Expand Down Expand Up @@ -488,19 +484,12 @@ func TestReport(t *testing.T) {
for k, v := range tcData.InputEnv {
os.Setenv(k, v)
}
scout := &reporter{
buffer: make(chan bufEntry, 40),
reporter: &metriton.Reporter{
Application: mockApplication,
Version: mockVersion,
GetInstallID: func(r *metriton.Reporter) (string, error) {
return mockInstallID, nil
},
Endpoint: testServer.URL,
},
reportAnnotators: tcData.ReportAnnotators,

setInstallIDFromFilesystem = func(ctx context.Context, installType InstallType, md map[string]any) (string, error) {
return mockInstallID, nil
}
scout.initialize(ctx, mockMode, mockOS, mockARCH)
scout := NewReporterForInstallType(ctx, mockMode, CLI, tcData.ReportAnnotators, nil).(*reporter)
scout.reporter.Endpoint = testServer.URL

// Start scout report processing...
sc, cancel := context.WithCancel(dcontext.WithSoftness(ctx))
Expand Down

0 comments on commit 8cf1fa2

Please sign in to comment.