Skip to content

Commit

Permalink
Analytics (#1599)
Browse files Browse the repository at this point in the history
Co-authored-by: Eyal Bukchin <[email protected]>
  • Loading branch information
aviramha and eyalb181 authored Jul 2, 2023
1 parent babdb3b commit 5139e73
Show file tree
Hide file tree
Showing 24 changed files with 519 additions and 25 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ concurrency:

env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
MIRRORD_TELEMETRY: false

jobs:
towncrier_check:
Expand Down
24 changes: 24 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
futures = "0.3"
thiserror = "1"
k8s-openapi = { version = "0.18", features = ["v1_24"] }
reqwest = { version = "0.11", default-features = false, features = ["blocking", "rustls-tls"]}
reqwest = { version = "0.11", default-features = false, features = ["blocking", "rustls-tls", "json"] }
kube = { version = "0.82", default-features = false, features = ["runtime", "derive", "client", "ws", "rustls-tls"] }
trust-dns-resolver = { version = "0.22", features = ["serde-config", "tokio-runtime"] }
tokio-util = { version = "0.7", features = ["net", "codec"] }
Expand Down
24 changes: 13 additions & 11 deletions TELEMETRY.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# Telemetry
# Telemetry / Analytics

mirrord sends anonymous usage statistics to our systems. The information sent is:
1. mirrord version.
2. cli/extension.
3. platform (linux, macos).
mirrord sends anonymous usage statistics to our systems.
We don't store IP addresses, and we don't create any unique identifier for the user.

In our databases, we don't store IP, and we don't create any unique identifier for the user.
Data collected is session duration and what features were used (steal/mirror/fs mode, etc).
This helps us to improve the product and by better understanding our users.
Types of data sent:
1. Feature on/off
2. Feature enum value (steal/mirror, read/write)
3. Feature count (how many ports in listen_ports)

## Disabling

### CLI
You can disable telemetry by specifying `--no-telemetry`.

### Extension
In the settings of the extension.
Telemetry can be disabled by specifying the following in the mirrord config file:
```json
{"telemetry": false}
```
1 change: 1 addition & 0 deletions changelog.d/+analytics.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new analytics, see TELEMETRY.md for more details.
10 changes: 9 additions & 1 deletion mirrord-schema.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "LayerFileConfig",
"description": "mirrord allows for a high degree of customization when it comes to which features you want to enable, and how they should function.\n\nAll of the configuration fields have a default value, so a minimal configuration would be no configuration at all.\n\nTo help you get started, here are examples of a basic configuration file, and a complete configuration file containing all fields.\n\n### Basic `config.json` {#root-basic}\n\n```json { \"target\": \"pod/bear-pod\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Complete `config.json` {#root-complete}\n\n```json { \"accept_invalid_certificates\": false, \"skip_processes\": \"ide-debugger\", \"pause\": false, \"target\": { \"path\": \"pod/bear-pod\", \"namespace\": \"default\" }, \"connect_tcp\": null, \"agent\": { \"log_level\": \"info\", \"namespace\": \"default\", \"image\": \"ghcr.io/metalbear-co/mirrord:latest\", \"image_pull_policy\": \"IfNotPresent\", \"image_pull_secrets\": [ { \"secret-key\": \"secret\" } ], \"ttl\": 30, \"ephemeral\": false, \"communication_timeout\": 30, \"startup_timeout\": 360, \"network_interface\": \"eth0\", \"flush_connections\": true }, \"feature\": { \"env\": { \"include\": \"DATABASE_USER;PUBLIC_ENV\", \"exclude\": \"DATABASE_PASSWORD;SECRET_ENV\", \"overrides\": { \"DATABASE_CONNECTION\": \"db://localhost:7777/my-db\", \"LOCAL_BEAR\": \"panda\" } }, \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\.json\" , \"read_only\": [ \".+\\.yaml\", \".+important-file\\.txt\" ], \"local\": [ \".+\\.js\", \".+\\.mjs\" ] }, \"network\": { \"incoming\": { \"mode\": \"steal\", \"http_header_filter\": { \"filter\": \"host: api\\..+\", \"ports\": [80, 8080] }, \"port_mapping\": [[ 7777, 8888 ]], \"ignore_localhost\": false, \"ignore_ports\": [9999, 10000] }, \"outgoing\": { \"tcp\": true, \"udp\": true, \"ignore_localhost\": false, \"unix_streams\": \"bear.+\" }, \"dns\": false }, \"capture_error_trace\": false }, \"operator\": true, \"kubeconfig\": \"~/.kube/config\", \"sip_binaries\": \"bash\" } ```\n\n# Options {#root-options}",
"description": "mirrord allows for a high degree of customization when it comes to which features you want to enable, and how they should function.\n\nAll of the configuration fields have a default value, so a minimal configuration would be no configuration at all.\n\nTo help you get started, here are examples of a basic configuration file, and a complete configuration file containing all fields.\n\n### Basic `config.json` {#root-basic}\n\n```json { \"target\": \"pod/bear-pod\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Complete `config.json` {#root-complete}\n\n```json { \"accept_invalid_certificates\": false, \"skip_processes\": \"ide-debugger\", \"pause\": false, \"target\": { \"path\": \"pod/bear-pod\", \"namespace\": \"default\" }, \"connect_tcp\": null, \"agent\": { \"log_level\": \"info\", \"namespace\": \"default\", \"image\": \"ghcr.io/metalbear-co/mirrord:latest\", \"image_pull_policy\": \"IfNotPresent\", \"image_pull_secrets\": [ { \"secret-key\": \"secret\" } ], \"ttl\": 30, \"ephemeral\": false, \"communication_timeout\": 30, \"startup_timeout\": 360, \"network_interface\": \"eth0\", \"flush_connections\": true }, \"feature\": { \"env\": { \"include\": \"DATABASE_USER;PUBLIC_ENV\", \"exclude\": \"DATABASE_PASSWORD;SECRET_ENV\", \"overrides\": { \"DATABASE_CONNECTION\": \"db://localhost:7777/my-db\", \"LOCAL_BEAR\": \"panda\" } }, \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\.json\" , \"read_only\": [ \".+\\.yaml\", \".+important-file\\.txt\" ], \"local\": [ \".+\\.js\", \".+\\.mjs\" ] }, \"network\": { \"incoming\": { \"mode\": \"steal\", \"http_header_filter\": { \"filter\": \"host: api\\..+\", \"ports\": [80, 8080] }, \"port_mapping\": [[ 7777, 8888 ]], \"ignore_localhost\": false, \"ignore_ports\": [9999, 10000] }, \"outgoing\": { \"tcp\": true, \"udp\": true, \"ignore_localhost\": false, \"unix_streams\": \"bear.+\" }, \"dns\": false }, \"capture_error_trace\": false }, \"operator\": true, \"kubeconfig\": \"~/.kube/config\", \"sip_binaries\": \"bash\" \"telemetry\": true, } ```\n\n# Options {#root-options}",
"type": "object",
"properties": {
"accept_invalid_certificates": {
Expand Down Expand Up @@ -116,6 +116,14 @@
"type": "null"
}
]
},
"telemetry": {
"title": "telemetry {#root-telemetry}",
"description": "Controls whether or not mirrord sends telemetry data to MetalBear cloud. Telemetry sent doesn't contain personal identifiers or any data that should be considered sensitive. It is used to improve the product. [For more information](https://github.com/metalbear-co/mirrord/blob/main/TELEMETRY.md)",
"type": [
"boolean",
"null"
]
}
},
"additionalProperties": false,
Expand Down
25 changes: 25 additions & 0 deletions mirrord/analytics/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "mirrord-analytics"
version.workspace = true
authors.workspace = true
description.workspace = true
documentation.workspace = true
readme.workspace = true
homepage.workspace = true
repository.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
publish.workspace = true
edition.workspace = true


[dependencies]

serde.workspace = true
reqwest.workspace = true
tracing.workspace = true

[dev-dependencies]
serde_json.workspace = true
assert-json-diff = "2"
182 changes: 182 additions & 0 deletions mirrord/analytics/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use std::collections::HashMap;

use serde::{Deserialize, Serialize};
use tracing::info;

const CURRENT_VERSION: &str = env!("CARGO_PKG_VERSION");

/// Possible values for analytic data
/// This is strict so we won't send sensitive data by accident.
/// (Don't add strings)
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum AnalyticValue {
Bool(bool),
Number(u32),
Nested(Analytics),
}

/// Struct to store analytics data.
/// Example usage that would output the following json
/// ```json
/// {
/// "a": true,
/// "b": false,
/// "c": 3,
/// "extra": {
/// "d": true,
/// "e": true
/// }
/// }
/// ```
/// ```
/// use mirrord_analytics::{Analytics, CollectAnalytics};
/// let mut analytics = Analytics::default();
/// analytics.add("a", true);
/// analytics.add("b", false);
/// analytics.add("c", 3);
///
/// struct A {}
/// impl CollectAnalytics for A {
/// fn collect_analytics(&self, analytics: &mut Analytics) {
/// analytics.add("d", true);
/// }
/// }
///
/// struct B {}
/// impl CollectAnalytics for B {
/// fn collect_analytics(&self, analytics: &mut Analytics) {
/// let a = A {};
/// a.collect_analytics(analytics);
/// analytics.add("e", true);
/// }
/// }
/// let b = B {};
/// analytics.add("extra", b);
/// ```
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Analytics {
#[serde(flatten)]
data: HashMap<String, AnalyticValue>,
}

impl Analytics {
pub fn add<Key: ToString, Value: Into<AnalyticValue>>(&mut self, key: Key, value: Value) {
self.data.insert(key.to_string(), value.into());
}
}

/// Structs that collect analytics about themselves should implement this trait
pub trait CollectAnalytics {
/// Write analytics data to the given `Analytics` struct
fn collect_analytics(&self, analytics: &mut Analytics);
}

impl From<bool> for AnalyticValue {
fn from(b: bool) -> Self {
AnalyticValue::Bool(b)
}
}

impl From<u32> for AnalyticValue {
fn from(n: u32) -> Self {
AnalyticValue::Number(n)
}
}

impl From<usize> for AnalyticValue {
fn from(n: usize) -> Self {
AnalyticValue::Number(u32::try_from(n).unwrap_or(u32::MAX))
}
}

impl From<Analytics> for AnalyticValue {
fn from(analytics: Analytics) -> Self {
AnalyticValue::Nested(analytics)
}
}

impl<T: CollectAnalytics> From<T> for AnalyticValue {
fn from(other: T) -> Self {
let mut analytics = Analytics::default();
other.collect_analytics(&mut analytics);
analytics.into()
}
}

#[derive(Debug, Serialize, Deserialize)]
struct AnalyticsReport {
event_properties: Analytics,
platform: String,
duration: u32,
version: String,
operator: bool,
}

pub async fn send_analytics(analytics: Analytics, duration: u32, operator: bool) {
let report = AnalyticsReport {
event_properties: analytics,
platform: std::env::consts::OS.to_string(),
version: CURRENT_VERSION.to_string(),
duration,
operator,
};

let client = reqwest::Client::new();
let res = client
.post("https://analytics.metalbear.co/api/v1/event")
.json(&report)
.send()
.await;
if let Err(e) = res {
info!("Failed to send analytics: {e}");
}
}

#[cfg(test)]
mod tests {
use assert_json_diff::assert_json_eq;
use serde_json::json;

use super::*;
/// this tests creates a struct that is flatten and one that is nested
/// serializes it and verifies it's correct
#[test]
fn happy_flow() {
let mut analytics = Analytics::default();
analytics.add("a", true);
analytics.add("b", false);
analytics.add("c", 3);

struct A {}
impl CollectAnalytics for A {
fn collect_analytics(&self, analytics: &mut Analytics) {
analytics.add("d", true);
}
}

struct B {}
impl CollectAnalytics for B {
fn collect_analytics(&self, analytics: &mut Analytics) {
let a = A {};
a.collect_analytics(analytics);
analytics.add("e", true);
}
}
let b = B {};
analytics.add("extra", b);

assert_json_eq!(
analytics,
json!({
"a": true,
"b": false,
"c": 3,
"extra": {
"d": true,
"e": true
}
})
);
}
}
1 change: 1 addition & 0 deletions mirrord/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ mirrord-kube = { path = "../kube" }
mirrord-config = { path = "../config" }
mirrord-protocol = { path = "../protocol" }
mirrord-console = { path = "../console" }
mirrord-analytics = { path = "../analytics" }

actix-codec.workspace = true
clap.workspace = true
Expand Down
6 changes: 5 additions & 1 deletion mirrord/cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,14 @@ pub(super) struct ExecArgs {
#[arg(long)]
pub no_udp_outgoing: bool,

/// Disable telemetry - this also disables version check. See https://github.com/metalbear-co/mirrord/blob/main/TELEMETRY.md
/// Disable telemetry. See https://github.com/metalbear-co/mirrord/blob/main/TELEMETRY.md
#[arg(long)]
pub no_telemetry: bool,

#[arg(long)]
/// Disable version check on startup.
pub disable_version_check: bool,

/// Load config from config file
#[arg(short = 'f', long)]
pub config_file: Option<PathBuf>,
Expand Down
Loading

0 comments on commit 5139e73

Please sign in to comment.