From defa69734adc2c45f71d2c8694c04ba903de5d80 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 5 Nov 2024 14:03:17 +0000
Subject: [PATCH] finish hooking up livekit

---
 compose.yml                                   |  58 +++-
 data-template/element-call/config.json        |  14 +
 data-template/livekit/config.yaml             | 321 ++++++++++++++++++
 data-template/nginx/conf.d/app.conf           |  21 ++
 .../nginx/www/.well-known/matrix/client       |   2 +-
 data-template/synapse/homeserver.yaml         |   9 +
 init/init.sh                                  |   2 +-
 init/livekit-jwt-entrypoint.sh                |  10 +
 8 files changed, 426 insertions(+), 11 deletions(-)
 create mode 100644 data-template/element-call/config.json
 create mode 100644 data-template/livekit/config.yaml
 create mode 100755 init/livekit-jwt-entrypoint.sh

diff --git a/compose.yml b/compose.yml
index 24312db..0765a3f 100644
--- a/compose.yml
+++ b/compose.yml
@@ -3,10 +3,14 @@ networks:
   backend:
 
 secrets:
-   postgres_password:
-      file: secrets/postgres/postgres_password
-   synapse_signing_key:
-      file: secrets/synapse/${DOMAIN}.signing.key
+  postgres_password:
+    file: secrets/postgres/postgres_password
+  synapse_signing_key:
+    file: secrets/synapse/${DOMAIN}.signing.key
+  livekit_api_key:
+    file: secrets/livekit/livekit_api_key
+  livekit_secret_key:
+    file: secrets/livekit/livekit_secret_key
 
 services:
   # dependencies for optionally generating default configs + secrets
@@ -145,7 +149,7 @@ services:
       redis:
         condition: service_started
       postgres:
-        condition: service_healthy
+        condition: service_started
       init:
         condition: service_completed_successfully
 
@@ -208,7 +212,7 @@ services:
     command: "server --config=/data/config.yaml"
     depends_on:
       postgres:
-        condition: service_healthy
+        condition: service_started
       init:
         condition: service_completed_successfully
 
@@ -261,9 +265,16 @@ services:
     command: --config /etc/livekit.yaml --node-ip ${LIVEKIT_NODE_IP}
     ports:
       # - 7880:7880 # HTTP listener
-      - 7881:7881 # WS signalling
-      # - 50000-60000:50000-60000/tcp # TCP media
-      # - 50000-60000:50000-60000/udp # UDP media
+      - 7881:7881 # TCP WebRTC transport, advertised via SDP
+
+      # TODO: expose livekit-turn on TCP & UDP 443 via nginx
+      # At least this would allow UDP turn on port 443 for better perf.
+
+      # You can't expose a massive range here as it literally sets up 10,000 userland listeners, which takes forever
+      # and will clash with any existing high-numbered ports.
+      # So for now, tunnel everything via TCP 7881. FIXME!
+      #- 50000-60000:50000-60000/tcp # TCP media
+      #- 50000-60000:50000-60000/udp # UDP media
     networks:
       - backend
     depends_on:
@@ -271,3 +282,32 @@ services:
         condition: service_completed_successfully
       redis:
         condition: service_started
+
+  livekit-jwt:
+    build:
+      # evil hack to pull in bash so we can run an entrypoint.sh
+      # FIXME: it's a bit wasteful; the alternative would be to modify lk-jwt-service to pick up secrets from disk
+      dockerfile_inline: |
+        FROM ghcr.io/element-hq/lk-jwt-service:latest-ci AS builder
+        FROM alpine:latest
+        RUN apk update && apk add bash
+        COPY --from=builder /lk-jwt-service /
+    restart: unless-stopped
+    volumes:
+      - ${VOLUME_PATH}/data/nginx/ssl/rootCA.pem:/etc/ssl/certs/ca-certificates.crt
+      - ${VOLUME_PATH}/init/livekit-jwt-entrypoint.sh:/entrypoint.sh
+    entrypoint: /entrypoint.sh
+    env_file: .env
+    deploy:
+      restart_policy:
+        condition: on-failure
+    networks:
+      - backend
+    secrets:
+      - livekit_api_key
+      - livekit_secret_key
+    depends_on:
+      init:
+        condition: service_completed_successfully
+      livekit:
+        condition: service_started
diff --git a/data-template/element-call/config.json b/data-template/element-call/config.json
new file mode 100644
index 0000000..3860378
--- /dev/null
+++ b/data-template/element-call/config.json
@@ -0,0 +1,14 @@
+{
+  "default_server_config": {
+    "m.homeserver": {
+      "base_url": "https://${ELEMENT_CALL_FQDN}",
+      "server_name": "${DOMAIN}"
+    }
+  },
+  "livekit": {
+    "livekit_service_url": "https://${LIVEKIT_JWT_FQDN}"
+  },
+  "features": {
+    "feature_use_device_session_member_events": true
+  }
+}
\ No newline at end of file
diff --git a/data-template/livekit/config.yaml b/data-template/livekit/config.yaml
new file mode 100644
index 0000000..fad2a77
--- /dev/null
+++ b/data-template/livekit/config.yaml
@@ -0,0 +1,321 @@
+${CONFIG_HEADER}
+# From https://github.com/livekit/livekit/blob/master/config-sample.yaml
+
+# Copyright 2024 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# main TCP port for RoomService and RTC endpoint
+# for production setups, this port should be placed behind a load balancer with TLS
+port: 7880
+
+# when redis is set, LiveKit will automatically operate in a fully distributed fashion
+# clients could connect to any node and be routed to the same room
+redis:
+  address: redis:6379
+  # db: 0
+  # username: myuser
+  # password: mypassword
+  # To use sentinel remove the address key above and add the following
+  # sentinel_master_name: livekit
+  # sentinel_addresses:
+  # - livekit-redis-node-0.livekit-redis-headless:26379
+  # - livekit-redis-node-1.livekit-redis-headless:26379
+  # If you use a different set of credentials for sentinel add
+  # sentinel_username: user
+  # sentinel_password: pass
+  #
+  # To use TLS with redis
+  # tls:
+  #   enabled: true
+  #   # when set to true, LiveKit will not verify the server's certificate, defaults to true
+  #   insecure: false
+  #   server_name: myserver.com
+  #   # file containing trusted root certificates for verification
+  #   ca_cert_file: /path/to/ca.crt
+  #   client_cert_file: /path/to/client.crt
+  #   client_key_file: /path/to/client.key
+  #
+  # To use cluster remove the address key above and add the following
+  # cluster_addresses:
+  # - livekit-redis-node-0.livekit-redis-headless:6379
+  # - livekit-redis-node-1.livekit-redis-headless:6380
+  # And it will use the password key above as cluster password
+  # And the db key will not be used due to cluster mode not support it.
+
+# WebRTC configuration
+rtc:
+  # UDP ports to use for client traffic.
+  # this port range should be open for inbound traffic on the firewall
+  port_range_start: 50000
+  port_range_end: 60000
+  # when set, LiveKit enable WebRTC ICE over TCP when UDP isn't available
+  # this port *cannot* be behind load balancer or TLS, and must be exposed on the node
+  # WebRTC transports are encrypted and do not require additional encryption
+  # only 80/443 on public IP are allowed if less than 1024
+  tcp_port: 7881
+  # when set to true, attempts to discover the host's public IP via STUN
+  # this is useful for cloud environments such as AWS & Google where hosts have an internal IP
+  # that maps to an external one
+  use_external_ip: false
+  # # when set, LiveKit will attempt to use a UDP mux so all UDP traffic goes through
+  # # listed port(s). To maximize system performance, we recommend using a range of ports
+  # # greater or equal to the number of vCPUs on the machine.
+  # # port_range_start & end must not be set for this config to take effect
+  # udp_port: 7882-7892
+  # # when set to true, server will use a lite ice agent, that will speed up ice connection, but
+  # # might cause connect issue if server running behind NAT.
+  # use_ice_lite: true
+  # # optional STUN servers for LiveKit clients to use. Clients will be configured to use these STUN servers automatically.
+  # # by default LiveKit clients use Google's public STUN servers
+  # stun_servers:
+  #   - server1
+  # # optional TURN servers for clients. This isn't necessary if using embedded TURN server (see below).
+  # turn_servers:
+  #   - host: myhost.com
+  #     port: 443
+  #     # tls, tcp, or udp
+  #     protocol: tls
+  #     username: ""
+  #     credential: ""
+  # # allows LiveKit to monitor congestion when sending streams and automatically
+  # # manage bandwidth utilization to avoid congestion/loss. Enabled by default
+  # congestion_control:
+  #   enabled: true
+  #   # in the unlikely event of highly congested networks, SFU may choose to pause some tracks
+  #   # in order to allow others to stream smoothly. You can disable this behavior here
+  #   allow_pause: true
+  # # allows automatic connection fallback to TCP and TURN/TLS (if configured) when UDP has been unstable, default true
+  # allow_tcp_fallback: true
+  # # number of packets to buffer in the SFU for video, defaults to 500
+  # packet_buffer_size_video: 500
+  # # number of packets to buffer in the SFU for audio, defaults to 200
+  # packet_buffer_size_audio: 200
+  # # minimum amount of time between pli/fir rtcp packets being sent to an individual
+  # # producer. Increasing these times can lead to longer black screens when new participants join,
+  # # while reducing them can lead to higher stream bitrate.
+  # pli_throttle:
+  #   low_quality: 500ms
+  #   mid_quality: 1s
+  #   high_quality: 1s
+  # # when set, Livekit will collect loopback candidates, it is useful for some VM have public address mapped to its loopback interface.
+  # enable_loopback_candidate: true
+  # # network interface filter. If the machine has more than one network interface and you'd like it to use or skip specific interfaces
+  # # both inclusion and exclusion filters can be used together. If neither is defined (default), all interfaces on the machine will be used.
+  # # If both of them are set, then only include takes effect.
+  # interfaces:
+  #   includes:
+  #     - en0
+  #   excludes:
+  #     - docker0
+  # # ip address filter. If the machine has more than one ip address and you'd like it to use or skip specific ips,
+  # # both inclusion and exclusion CIDR filters can be used together. If neither is defined (default), all ip on the machine will be used.
+  # # If both of them are set, then only include takes effect.
+  # ips:
+  #   includes:
+  #     - 10.0.0.0/16
+  #   excludes:
+  #     - 192.168.1.0/24
+  # # Set to true to enable mDNS name candidate. This should be left disabled for most users.
+  # # when enabled, it will impact performance since each PeerConnection will process the same mDNS message independently
+  # use_mdns: true
+  # # Set to false to disable strict ACKs for peer connections where LiveKit is the dialing side,
+  # # ie. subscriber peer connections. Disabling strict ACKs will prevent clients that do not ACK
+  # # peer connections from getting kicked out of rooms by the monitor. Note that if strict ACKs
+  # # are disabled and clients don't ACK opened peer connections, only reliable, ordered delivery
+  # # will be available.
+  # strict_acks: true
+  # # enable batch write to merge network write system calls to reduce cpu usage. Outgoing packets
+  # # will be queued until length of queue equal to `batch_size` or time elapsed since last write exceeds `max_flush_interval`.
+  # batch_io:
+  #    batch_size: 128
+  #    max_flush_interval: 2ms
+  # # max number of bytes to buffer for data channel. 0 means unlimited.
+  # # when this limit is breached, data messages will be dropped till the buffered amount drops below this limit.
+  # data_channel_max_buffered_amount: 0
+
+# when enabled, LiveKit will expose prometheus metrics on :6789/metrics
+# prometheus_port: 6789
+
+# API key / secret pairs.
+# Keys are used for JWT authentication, server APIs would require a keypair in order to generate access tokens
+# and make calls to the server
+keys:
+  ${SECRETS_LIVEKIT_API_KEY}: '${SECRETS_LIVEKIT_SECRET_KEY}'
+# Logging config
+# logging:
+#   # log level, valid values: debug, info, warn, error
+#   level: info
+#   # log level for pion, default error
+#   pion_level: error
+#   # when set to true, emit json fields
+#   json: false
+#   # for production setups, enables sampling algorithm
+#   # https://github.com/uber-go/zap/blob/master/FAQ.md#why-sample-application-logs
+#   sample: false
+
+# Default room config
+# Each room created will inherit these settings. If rooms are created explicitly with CreateRoom, they will take
+# precedence over defaults
+# room:
+#   # allow rooms to be automatically created when participants join, defaults to true
+#   # auto_create: false
+#   # number of seconds to keep the room open if no one joins
+#   empty_timeout: 300
+#   # number of seconds to keep the room open after everyone leaves
+#   departure_timeout: 20
+#   # limit number of participants that can be in a room, 0 for no limit
+#   max_participants: 0
+#   # only accept specific codecs for clients publishing to this room
+#   # this is useful to standardize codecs across clients
+#   # other supported codecs are video/h264, video/vp9, video/av1, audio/red
+#   enabled_codecs:
+#     - mime: audio/opus
+#     - mime: video/vp8
+#   # allow tracks to be unmuted remotely, defaults to false
+#   # tracks can always be muted from the Room Service APIs
+#   enable_remote_unmute: true
+#   # control playout delay in ms of video track (and associated audio track)
+#   playout_delay:
+#     enabled: true
+#     min: 100
+#     max: 2000
+#   # improves A/V sync when playout_delay set to a value larger than 200ms. It will disables transceiver re-use
+#   # so not recommended for rooms with frequent subscription changes
+#   sync_streams: true
+
+# Webhooks
+# when configured, LiveKit notifies your URL handler with room events
+# webhook:
+#   # the API key to use in order to sign the message
+#   # this must match one of the keys LiveKit is configured with
+#   api_key: <api_key>
+#   # list of URLs to be notified of room events
+#   urls:
+#     - https://your-host.com/handler
+
+# Signal Relay
+# since v1.4.0, a more reliable, psrpc based signal relay is available
+# this gives us the ability to reliably proxy messages between a signal server and RTC node
+# signal_relay:
+#   # amount of time a message delivery is tried before giving up
+#   retry_timeout: 30s
+#   # minimum amount of time to wait for RTC node to ack,
+#   # retries use exponentially increasing wait on every subsequent try
+#   # with an upper bound of max_retry_interval
+#   min_retry_interval: 500ms
+#   # maximum amount of time to wait for RTC node to ack
+#   max_retry_interval: 5s
+#   # number of messages to buffer before dropping
+#   stream_buffer_size: 1000
+
+# PSRPC
+# since v1.5.1, a more reliable, psrpc based internal rpc
+# psrpc:
+#   # maximum number of rpc attempts
+#   max_attempts: 3
+#   # initial time to wait for calls to complete
+#   timeout: 500ms
+#   # amount of time added to the timeout after each failure
+#   backoff: 500ms
+#   # number of messages to buffer before dropping
+#   buffer_size: 1000
+
+# customize audio level sensitivity
+# audio:
+#   # minimum level to be considered active, 0-127, where 0 is loudest
+#   # defaults to 30
+#   active_level: 30
+#   # percentile to measure, a participant is considered active if it has exceeded the
+#   # ActiveLevel more than MinPercentile% of the time
+#   # defaults to 40
+#   min_percentile: 40
+#   # frequency in ms to notify changes to clients, defaults to 500
+#   update_interval: 500
+#   # to prevent speaker updates from too jumpy, smooth out values over N samples
+#   smooth_intervals: 4
+#   # enable red encoding downtrack for opus only audio up track
+#   active_red_encoding: true
+
+# turn server
+# turn:
+#   # Uses TLS. Requires cert and key pem files by either:
+#   # - using turn.secretName if deploying with our helm chart, or
+#   # - setting LIVEKIT_TURN_CERT and LIVEKIT_TURN_KEY env vars with file locations, or
+#   # - using cert_file and key_file below
+#   # defaults to false
+#   enabled: false
+#   # defaults to 3478 - recommended to 443 if not running HTTP3/QUIC server
+#   # only 53/80/443 are allowed if less than 1024
+#   udp_port: 3478
+#   # defaults to 5349 - if not using a load balancer, this must be set to 443
+#   tls_port: 5349
+#   # set UDP port range for TURN relay to connect to LiveKit SFU, by default it uses a any available port
+#   relay_range_start: 1024
+#   relay_range_end: 30000
+#   # set external_tls to true if using a L4 load balancer to terminate TLS. when enabled,
+#   # LiveKit expects unencrypted traffic on tls_port, and still advertise tls_port as a TURN/TLS candidate.
+#   external_tls: true
+#   # needs to match tls cert domain
+#   domain: turn.myhost.com
+#   # optional (set only if not using external TLS termination)
+#   # cert_file: /path/to/cert.pem
+#   # key_file: /path/to/key.pem
+
+# ingress server
+# ingress:
+#   # Prefix used to generate RTMP URLs for RTMP ingress.
+#   rtmp_base_url: "rtmp://my.domain.com/live"
+#   # Prefix used to generate WHIP URLs for WHIP ingress.
+#   whip_base_url: "http://my.domain.com/whip"
+
+# Region of the current node. Required if using regionaware node selector
+# region: us-west-2
+
+# # node selector
+# node_selector:
+#   # default: any. valid values: any, sysload, cpuload, regionaware
+#   kind: sysload
+#   # priority used for selection of node when multiple are available
+#   # default: random. valid values: random, sysload, cpuload, rooms, clients, tracks, bytespersec
+#   sort_by: sysload
+#   # used in sysload and regionaware
+#   # do not assign room to node if load per CPU exceeds sysload_limit
+#   sysload_limit: 0.7
+#   # used in regionaware
+#   # list of regions and their lat/lon coordinates
+#   regions:
+#     - name: us-west-2
+#       lat: 44.19434095976287
+#       lon: -123.0674908379146
+
+# # node limits
+# # set to -1 to disable a limit
+# limit:
+#   # defaults to 400 tracks in & out per CPU, up to 8000
+#   num_tracks: -1
+#   # defaults to 1 GB/s, or just under 10 Gbps
+#   bytes_per_sec: 1_000_000_000
+#   # how many tracks (audio / video) that a single participant can subscribe at same time.
+#   # if the limit is exceeded, subscriptions will be pending until any subscribed track has been unsubscribed.
+#   # value less or equal than 0 means no limit.
+#   subscription_limit_video: 0
+#   subscription_limit_audio: 0
+#   # limit size of room and participant's metadata, 0 for no limit
+#   max_metadata_size: 0
+#   # limit size of participant attributes, 0 for no limit
+#   max_attributes_size: 0
+#   # limit length of room names
+#   max_room_name_length: 0
+#   # limit length of participant identity
+#   max_participant_identity_length: 0
diff --git a/data-template/nginx/conf.d/app.conf b/data-template/nginx/conf.d/app.conf
index 8ffa4ad..6f8b361 100644
--- a/data-template/nginx/conf.d/app.conf
+++ b/data-template/nginx/conf.d/app.conf
@@ -3,6 +3,11 @@ ${CONFIG_HEADER}
 # taken from https://element-hq.github.io/synapse/latest/reverse_proxy.html
 # mixed with https://github.com/wmnnd/nginx-certbot/tree/master/etc/nginx/conf.d/nginx
 
+# log_format vhosts '$host $remote_addr - $remote_user [$time_local] '
+#                   '"$request" $status $body_bytes_sent '
+#                   '"$http_referer" "$http_user_agent"';
+# access_log /dev/stdout vhosts;
+
 server {
     server_name ${DOMAIN};
     server_tokens off;
@@ -92,6 +97,22 @@ server {
 
     location / {
         proxy_pass http://livekit:7880;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade ${DOLLAR}http_upgrade;
+        proxy_set_header Connection "upgrade";
+        proxy_set_header Host ${DOLLAR}host;
+        proxy_set_header X-Forwarded-For ${DOLLAR}remote_addr;
+    }
+}
+
+server {
+    server_name ${LIVEKIT_JWT_FQDN};
+    server_tokens off;
+
+    include /etc/nginx/conf.d/include/ssl.conf;
+
+    location / {
+        proxy_pass http://livekit-jwt:8080;
         proxy_set_header X-Forwarded-For ${DOLLAR}remote_addr;
     }
 }
diff --git a/data-template/nginx/www/.well-known/matrix/client b/data-template/nginx/www/.well-known/matrix/client
index 9d32a7c..2a26322 100644
--- a/data-template/nginx/www/.well-known/matrix/client
+++ b/data-template/nginx/www/.well-known/matrix/client
@@ -12,7 +12,7 @@
     "org.matrix.msc4143.rtc_foci": [
         {
             "type": "livekit",
-            "livekit_service_url": "https://${LIVEKIT_FQDN}"
+            "livekit_service_url": "https://${LIVEKIT_JWT_FQDN}"
         }
     ]
 }
diff --git a/data-template/synapse/homeserver.yaml b/data-template/synapse/homeserver.yaml
index 2707ba2..e3bf564 100644
--- a/data-template/synapse/homeserver.yaml
+++ b/data-template/synapse/homeserver.yaml
@@ -102,4 +102,13 @@ experimental_features:
     admin_token: '${SECRETS_MAS_MATRIX_SECRET}'
     account_management_url: "https://${MAS_FQDN}/account"
 
+  # MSC3266: Room summary API. Used for knocking over federation
+  msc3266_enabled: true
+
+# The maximum allowed duration by which sent events can be delayed, as
+# per MSC4140. Must be a positive value if set.  Defaults to no
+# duration (null), which disallows sending delayed events.
+# Needed for MatrixRTC
+max_event_delay_duration: 24h
+
 # vim:ft=yaml
\ No newline at end of file
diff --git a/init/init.sh b/init/init.sh
index e7c6d93..69950fe 100755
--- a/init/init.sh
+++ b/init/init.sh
@@ -96,4 +96,4 @@ export DOLLAR='$' # evil hack to escape dollars in config files
 
 template "/data-template/element-web"
 template "/data-template/element-call"
-template "/data-template/nginx"
\ No newline at end of file
+template "/data-template/nginx"
diff --git a/init/livekit-jwt-entrypoint.sh b/init/livekit-jwt-entrypoint.sh
new file mode 100755
index 0000000..3135b58
--- /dev/null
+++ b/init/livekit-jwt-entrypoint.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# launch lk-jwt-service with secrets from disk
+
+export LK_JWT_PORT=8080
+export LIVEKIT_URL=wss://${LIVEKIT_FQDN}
+export LIVEKIT_KEY=$(</run/secrets/livekit_api_key)
+export LIVEKIT_SECRET=$(</run/secrets/livekit_secret_key)
+
+exec /lk-jwt-service
\ No newline at end of file