Compare commits

..

17 Commits

29 changed files with 211 additions and 18 deletions

1
.gitattributes vendored
View File

@@ -1,2 +1,3 @@
_secrets.yaml filter=git-crypt diff=git-crypt
secrets.yaml filter=git-crypt diff=git-crypt
*.agekey filter=git-crypt diff=git-crypt

28
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,28 @@
default_install_hook_types: [pre-commit, commit-msg]
exclude: gotk-.*.yaml
repos:
- repo: builtin
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args:
- --allow-multiple-documents
- id: check-added-large-files
- id: check-merge-conflict
- id: check-executables-have-shebangs
- repo: https://github.com/crate-ci/typos
rev: v1.40.0
hooks:
- id: typos
- repo: https://github.com/sirwart/ripsecrets
rev: v0.1.11
hooks:
- id: ripsecrets-system
- repo: https://github.com/crate-ci/committed
rev: v1.1.8
hooks:
- id: committed

3
.secretsignore Normal file
View File

@@ -0,0 +1,3 @@
_secrets.yaml
secrets.yaml
*.agekey

View File

@@ -65,3 +65,9 @@ Upgrading talos or changing the schematic:
```bash
talosctl upgrade --nodes <node_id> --image factory.talos.dev/metal-installer/<schematic_id>:<version>
```
To upgrade kubernetes or inline manifests, first apply the updated controlplane configs, then run:
```bash
talosctl upgrade-k8s
```

2
committed.toml Normal file
View File

@@ -0,0 +1,2 @@
style = "conventional"
ignore_author_re = "Flux"

View File

@@ -21,11 +21,23 @@ dns:
ntp: nl.pool.ntp.org
install: true
autoInstall: false
advertiseRoutes: true
patches:
- !patch hostname
- !patch install-disk
- !patch network
- !patch vip
- !patch tailscale
- !patch cilium
- !patch spegel
- !patch longhorn
- !patch longhorn-user-volume
- !patch local-path-provisioner-volume
- !patch limit-ephemeral
- !patch metrics
patchesControlPlane:
- !patch allow-control-plane-workloads
- !patch sops
- !patch cluster-variables
- !patch metrics-cluster
- !patch gateway-api

BIN
nodes/testing/_age.agekey Normal file

Binary file not shown.

View File

@@ -4,5 +4,7 @@ installDisk: /dev/vda
autoInstall: true
cluster:
name: testing
production: false
controlPlaneIp: 192.168.1.100
secretsFile: !realpath _secrets.yaml
sopsKeyFile: !realpath _age.agekey

BIN
nodes/titan/_age.agekey Normal file

Binary file not shown.

View File

@@ -2,6 +2,8 @@ netmask: 255.255.252.0
gateway: 10.0.0.1
installDisk: /dev/sda
cluster:
name: hellas
name: titan
production: true
controlPlaneIp: 10.0.2.1
secretsFile: !realpath _secrets.yaml
sopsKeyFile: !realpath _age.agekey

11
patches/cilium.yaml Normal file
View File

@@ -0,0 +1,11 @@
machine:
features:
hostDNS:
# This option is enabled by default and causes issues with cilium
forwardKubeDNSToHost: false
cluster:
network:
cni:
name: none
proxy:
disabled: true

View File

@@ -0,0 +1,16 @@
cluster:
inlineManifests:
- name: cluster-variables
contents: |
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-variables
namespace: flux-system
data:
cluster_env: {%- if node.cluster.production %} production {%- else %} staging {%- endif %}

3
patches/gateway-api.yaml Normal file
View File

@@ -0,0 +1,3 @@
cluster:
extraManifests:
- https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.4.1/standard-install.yaml

View File

@@ -0,0 +1,5 @@
apiVersion: v1alpha1
kind: VolumeConfig
name: EPHEMERAL
provisioning:
maxSize: 30GB

View File

@@ -0,0 +1,8 @@
apiVersion: v1alpha1
kind: UserVolumeConfig
name: local-path-provisioner
provisioning:
diskSelector:
match: system_disk
grow: true
maxSize: 10GB

View File

@@ -0,0 +1,8 @@
apiVersion: v1alpha1
kind: UserVolumeConfig
name: longhorn
provisioning:
diskSelector:
match: system_disk
grow: true
maxSize: 2000GB

10
patches/longhorn.yaml Normal file
View File

@@ -0,0 +1,10 @@
machine:
kubelet:
extraMounts:
- destination: /var/lib/longhorn
type: bind
source: /var/lib/longhorn
options:
- bind
- rshared
- rw

View File

@@ -0,0 +1,4 @@
cluster:
extraManifests:
- https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml
- https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

4
patches/metrics.yaml Normal file
View File

@@ -0,0 +1,4 @@
machine:
kubelet:
extraArgs:
rotate-server-certificates: true

16
patches/openebs.yaml Normal file
View File

@@ -0,0 +1,16 @@
machine:
# This is only needed on nodes that will have storage
sysctls:
vm.nr_hugepages: "1024"
nodeLabels:
openebs.io/engine: mayastor
# This is needed on ALL nodes
kubelet:
extraMounts:
- destination: /var/local
type: bind
source: /var/local
options:
- bind
- rshared
- rw

17
patches/sops.yaml Normal file
View File

@@ -0,0 +1,17 @@
cluster:
inlineManifests:
- name: sops-key
contents: |
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
---
apiVersion: v1
kind: Secret
metadata:
name: sops-gpg
namespace: flux-system
data:
age.agekey: |
{{ helper.load_secret(node.cluster.sopsKeyFile) }}

7
patches/spegel.yaml Normal file
View File

@@ -0,0 +1,7 @@
machine:
files:
- path: /etc/cri/conf.d/20-customization.part
op: create
content: |
[plugins."io.containerd.cri.v1.images"]
discard_unpacked_layers = false

View File

@@ -3,5 +3,5 @@ kind: ExtensionServiceConfig
name: tailscale
environment:
- TS_AUTHKEY={{ config.tailscale.authKey }}
- TS_EXTRA_ARGS=--login-server {{ config.tailscale.loginServer }}
- TS_ROUTES={{ helper.tailscale_subnet(node.gateway, node.netmask) }}
- TS_EXTRA_ARGS=--login-server {{ config.tailscale.loginServer }} --advertise-tags=tag:cluster-{{ node.cluster.name }}
- TS_ROUTES={% if node.advertiseRoutes -%} {{ helper.tailscale_subnet(node.gateway, node.netmask) }} {%- endif %}

View File

@@ -3,6 +3,7 @@
# Adapted from: https://enix.io/en/blog/pxe-talos/
import base64
import functools
import json
import pathlib
@@ -12,7 +13,7 @@ import git
import requests
import yaml
from jinja2 import Environment, FileSystemLoader, StrictUndefined, Template
from mergedeep import merge
from mergedeep import Strategy, merge
from netaddr import IPAddress
REPO = git.Repo(sys.path[0], search_parent_directories=True)
@@ -38,12 +39,24 @@ TEMPLATES = Environment(
)
# When we try to make a deep copy of the nodes dict it fails as the Template
# does not implement __deepcopy__, so this wrapper type facilitates that
class TemplateWrapper:
def __init__(self, template: Template):
self.template = template
def __deepcopy__(self, memo):
# NOTE: This is not a true deepcopy, but since we know we won't modify
# the template this is fine.
return self
def render_templates(node: dict, args: dict):
class Inner(json.JSONEncoder):
def default(self, o):
if isinstance(o, Template):
if isinstance(o, TemplateWrapper):
try:
rendered = o.render(args | {"node": node})
rendered = o.template.render(args | {"node": node})
except Exception as e:
e.add_note(f"While rendering for: {node['hostname']}")
raise e
@@ -59,6 +72,9 @@ def tailscale_subnet(gateway: str, netmask: str):
netmask_bits = IPAddress(netmask).netmask_bits()
return f"{IPAddress(gateway) & IPAddress(netmask)}/{netmask_bits}"
def load_secret(path: str):
with open(path) as f:
return base64.b64encode(f.read().encode()).decode()
@functools.cache
def get_schematic_id(schematic: str):
@@ -84,7 +100,7 @@ def template_constructor(environment: Environment):
patch_name = loader.construct_scalar(node)
try:
template = environment.get_template(f"{patch_name}.yaml")
return template
return TemplateWrapper(template)
except Exception:
raise yaml.MarkedYAMLError("Failed to load patch", node.start_mark)
@@ -125,7 +141,12 @@ def get_defaults(directory: pathlib.Path, root: pathlib.Path):
# Stop recursion when reaching root directory
if directory != root:
return get_defaults(directory.parent, root) | yml_data
return merge(
{},
get_defaults(directory.parent, root),
yml_data,
strategy=Strategy.TYPESAFE_REPLACE,
)
else:
return yml_data
@@ -143,12 +164,12 @@ def main():
config = yaml.safe_load(fyaml)
with open(ROOT.joinpath("secrets.yaml")) as fyaml:
merge(config, yaml.safe_load(fyaml))
merge(config, yaml.safe_load(fyaml), strategy=Strategy.TYPESAFE_REPLACE)
template_args = {
"config": config,
"root": ROOT,
"helper": {"tailscale_subnet": tailscale_subnet},
"helper": {"tailscale_subnet": tailscale_subnet, "load_secret": load_secret},
}
nodes = []
@@ -157,7 +178,12 @@ def main():
with open(fullname) as fyaml:
yml_data = yaml.load(fyaml, Loader=get_loader(fullname.parent))
yml_data = get_defaults(fullname.parent, NODES) | yml_data
yml_data = merge(
{},
get_defaults(fullname.parent, NODES),
yml_data,
strategy=Strategy.TYPESAFE_REPLACE,
)
yml_data["hostname"] = fullname.stem
yml_data["filename"] = filename
nodes.append(yml_data)
@@ -172,11 +198,13 @@ def main():
)
)
# Get all clusters
# HACK: We can't hash a dict, so we first convert it to json, the use set
# to get all the unique entries, and then convert it back
# NOTE: This assumes that all nodes in the cluster use the same definition for the cluster
clusters = [
dict(s) for s in set(frozenset(node["cluster"].items()) for node in nodes)
]
clusters = list(
json.loads(cluster)
for cluster in set(json.dumps(node["cluster"]) for node in nodes)
)
template_args |= {"nodes": nodes, "clusters": clusters}

View File

@@ -3,9 +3,9 @@ set -euo pipefail
ROOT=$(git rev-parse --show-toplevel)
VM_NAME="talos-vm"
VCPUS="2"
RAM_MB="2048"
DISK_GB="10"
VCPUS="6"
RAM_MB="16384"
DISK_GB="100"
NETWORK=talos
CONNECTION="qemu:///system"