Compare commits

..

17 Commits

29 changed files with 211 additions and 18 deletions

1
.gitattributes vendored
View File

@@ -1,2 +1,3 @@
_secrets.yaml filter=git-crypt diff=git-crypt _secrets.yaml filter=git-crypt diff=git-crypt
secrets.yaml filter=git-crypt diff=git-crypt secrets.yaml filter=git-crypt diff=git-crypt
*.agekey filter=git-crypt diff=git-crypt

28
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,28 @@
default_install_hook_types: [pre-commit, commit-msg]
exclude: gotk-.*.yaml
repos:
- repo: builtin
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args:
- --allow-multiple-documents
- id: check-added-large-files
- id: check-merge-conflict
- id: check-executables-have-shebangs
- repo: https://github.com/crate-ci/typos
rev: v1.40.0
hooks:
- id: typos
- repo: https://github.com/sirwart/ripsecrets
rev: v0.1.11
hooks:
- id: ripsecrets-system
- repo: https://github.com/crate-ci/committed
rev: v1.1.8
hooks:
- id: committed

3
.secretsignore Normal file
View File

@@ -0,0 +1,3 @@
_secrets.yaml
secrets.yaml
*.agekey

View File

@@ -65,3 +65,9 @@ Upgrading talos or changing the schematic:
```bash ```bash
talosctl upgrade --nodes <node_id> --image factory.talos.dev/metal-installer/<schematic_id>:<version> talosctl upgrade --nodes <node_id> --image factory.talos.dev/metal-installer/<schematic_id>:<version>
``` ```
To upgrade kubernetes or inline manifests, first apply the updated controlplane configs, then run:
```bash
talosctl upgrade-k8s
```

2
committed.toml Normal file
View File

@@ -0,0 +1,2 @@
style = "conventional"
ignore_author_re = "Flux"

View File

@@ -21,11 +21,23 @@ dns:
ntp: nl.pool.ntp.org ntp: nl.pool.ntp.org
install: true install: true
autoInstall: false autoInstall: false
advertiseRoutes: true
patches: patches:
- !patch hostname - !patch hostname
- !patch install-disk - !patch install-disk
- !patch network - !patch network
- !patch vip - !patch vip
- !patch tailscale - !patch tailscale
- !patch cilium
- !patch spegel
- !patch longhorn
- !patch longhorn-user-volume
- !patch local-path-provisioner-volume
- !patch limit-ephemeral
- !patch metrics
patchesControlPlane: patchesControlPlane:
- !patch allow-control-plane-workloads - !patch allow-control-plane-workloads
- !patch sops
- !patch cluster-variables
- !patch metrics-cluster
- !patch gateway-api

BIN
nodes/testing/_age.agekey Normal file

Binary file not shown.

View File

@@ -4,5 +4,7 @@ installDisk: /dev/vda
autoInstall: true autoInstall: true
cluster: cluster:
name: testing name: testing
production: false
controlPlaneIp: 192.168.1.100 controlPlaneIp: 192.168.1.100
secretsFile: !realpath _secrets.yaml secretsFile: !realpath _secrets.yaml
sopsKeyFile: !realpath _age.agekey

BIN
nodes/titan/_age.agekey Normal file

Binary file not shown.

View File

@@ -2,6 +2,8 @@ netmask: 255.255.252.0
gateway: 10.0.0.1 gateway: 10.0.0.1
installDisk: /dev/sda installDisk: /dev/sda
cluster: cluster:
name: hellas name: titan
production: true
controlPlaneIp: 10.0.2.1 controlPlaneIp: 10.0.2.1
secretsFile: !realpath _secrets.yaml secretsFile: !realpath _secrets.yaml
sopsKeyFile: !realpath _age.agekey

11
patches/cilium.yaml Normal file
View File

@@ -0,0 +1,11 @@
machine:
features:
hostDNS:
# This option is enabled by default and causes issues with cilium
forwardKubeDNSToHost: false
cluster:
network:
cni:
name: none
proxy:
disabled: true

View File

@@ -0,0 +1,16 @@
cluster:
inlineManifests:
- name: cluster-variables
contents: |
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-variables
namespace: flux-system
data:
cluster_env: {%- if node.cluster.production %} production {%- else %} staging {%- endif %}

3
patches/gateway-api.yaml Normal file
View File

@@ -0,0 +1,3 @@
cluster:
extraManifests:
- https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.4.1/standard-install.yaml

View File

@@ -0,0 +1,5 @@
apiVersion: v1alpha1
kind: VolumeConfig
name: EPHEMERAL
provisioning:
maxSize: 30GB

View File

@@ -0,0 +1,8 @@
apiVersion: v1alpha1
kind: UserVolumeConfig
name: local-path-provisioner
provisioning:
diskSelector:
match: system_disk
grow: true
maxSize: 10GB

View File

@@ -0,0 +1,8 @@
apiVersion: v1alpha1
kind: UserVolumeConfig
name: longhorn
provisioning:
diskSelector:
match: system_disk
grow: true
maxSize: 2000GB

10
patches/longhorn.yaml Normal file
View File

@@ -0,0 +1,10 @@
machine:
kubelet:
extraMounts:
- destination: /var/lib/longhorn
type: bind
source: /var/lib/longhorn
options:
- bind
- rshared
- rw

View File

@@ -0,0 +1,4 @@
cluster:
extraManifests:
- https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml
- https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

4
patches/metrics.yaml Normal file
View File

@@ -0,0 +1,4 @@
machine:
kubelet:
extraArgs:
rotate-server-certificates: true

16
patches/openebs.yaml Normal file
View File

@@ -0,0 +1,16 @@
machine:
# This is only needed on nodes that will have storage
sysctls:
vm.nr_hugepages: "1024"
nodeLabels:
openebs.io/engine: mayastor
# This is needed on ALL nodes
kubelet:
extraMounts:
- destination: /var/local
type: bind
source: /var/local
options:
- bind
- rshared
- rw

17
patches/sops.yaml Normal file
View File

@@ -0,0 +1,17 @@
cluster:
inlineManifests:
- name: sops-key
contents: |
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
---
apiVersion: v1
kind: Secret
metadata:
name: sops-gpg
namespace: flux-system
data:
age.agekey: |
{{ helper.load_secret(node.cluster.sopsKeyFile) }}

7
patches/spegel.yaml Normal file
View File

@@ -0,0 +1,7 @@
machine:
files:
- path: /etc/cri/conf.d/20-customization.part
op: create
content: |
[plugins."io.containerd.cri.v1.images"]
discard_unpacked_layers = false

View File

@@ -3,5 +3,5 @@ kind: ExtensionServiceConfig
name: tailscale name: tailscale
environment: environment:
- TS_AUTHKEY={{ config.tailscale.authKey }} - TS_AUTHKEY={{ config.tailscale.authKey }}
- TS_EXTRA_ARGS=--login-server {{ config.tailscale.loginServer }} - TS_EXTRA_ARGS=--login-server {{ config.tailscale.loginServer }} --advertise-tags=tag:cluster-{{ node.cluster.name }}
- TS_ROUTES={{ helper.tailscale_subnet(node.gateway, node.netmask) }} - TS_ROUTES={% if node.advertiseRoutes -%} {{ helper.tailscale_subnet(node.gateway, node.netmask) }} {%- endif %}

View File

@@ -3,6 +3,7 @@
# Adapted from: https://enix.io/en/blog/pxe-talos/ # Adapted from: https://enix.io/en/blog/pxe-talos/
import base64
import functools import functools
import json import json
import pathlib import pathlib
@@ -12,7 +13,7 @@ import git
import requests import requests
import yaml import yaml
from jinja2 import Environment, FileSystemLoader, StrictUndefined, Template from jinja2 import Environment, FileSystemLoader, StrictUndefined, Template
from mergedeep import merge from mergedeep import Strategy, merge
from netaddr import IPAddress from netaddr import IPAddress
REPO = git.Repo(sys.path[0], search_parent_directories=True) REPO = git.Repo(sys.path[0], search_parent_directories=True)
@@ -38,12 +39,24 @@ TEMPLATES = Environment(
) )
# When we try to make a deep copy of the nodes dict it fails as the Template
# does not implement __deepcopy__, so this wrapper type facilitates that
class TemplateWrapper:
def __init__(self, template: Template):
self.template = template
def __deepcopy__(self, memo):
# NOTE: This is not a true deepcopy, but since we know we won't modify
# the template this is fine.
return self
def render_templates(node: dict, args: dict): def render_templates(node: dict, args: dict):
class Inner(json.JSONEncoder): class Inner(json.JSONEncoder):
def default(self, o): def default(self, o):
if isinstance(o, Template): if isinstance(o, TemplateWrapper):
try: try:
rendered = o.render(args | {"node": node}) rendered = o.template.render(args | {"node": node})
except Exception as e: except Exception as e:
e.add_note(f"While rendering for: {node['hostname']}") e.add_note(f"While rendering for: {node['hostname']}")
raise e raise e
@@ -59,6 +72,9 @@ def tailscale_subnet(gateway: str, netmask: str):
netmask_bits = IPAddress(netmask).netmask_bits() netmask_bits = IPAddress(netmask).netmask_bits()
return f"{IPAddress(gateway) & IPAddress(netmask)}/{netmask_bits}" return f"{IPAddress(gateway) & IPAddress(netmask)}/{netmask_bits}"
def load_secret(path: str):
with open(path) as f:
return base64.b64encode(f.read().encode()).decode()
@functools.cache @functools.cache
def get_schematic_id(schematic: str): def get_schematic_id(schematic: str):
@@ -84,7 +100,7 @@ def template_constructor(environment: Environment):
patch_name = loader.construct_scalar(node) patch_name = loader.construct_scalar(node)
try: try:
template = environment.get_template(f"{patch_name}.yaml") template = environment.get_template(f"{patch_name}.yaml")
return template return TemplateWrapper(template)
except Exception: except Exception:
raise yaml.MarkedYAMLError("Failed to load patch", node.start_mark) raise yaml.MarkedYAMLError("Failed to load patch", node.start_mark)
@@ -125,7 +141,12 @@ def get_defaults(directory: pathlib.Path, root: pathlib.Path):
# Stop recursion when reaching root directory # Stop recursion when reaching root directory
if directory != root: if directory != root:
return get_defaults(directory.parent, root) | yml_data return merge(
{},
get_defaults(directory.parent, root),
yml_data,
strategy=Strategy.TYPESAFE_REPLACE,
)
else: else:
return yml_data return yml_data
@@ -143,12 +164,12 @@ def main():
config = yaml.safe_load(fyaml) config = yaml.safe_load(fyaml)
with open(ROOT.joinpath("secrets.yaml")) as fyaml: with open(ROOT.joinpath("secrets.yaml")) as fyaml:
merge(config, yaml.safe_load(fyaml)) merge(config, yaml.safe_load(fyaml), strategy=Strategy.TYPESAFE_REPLACE)
template_args = { template_args = {
"config": config, "config": config,
"root": ROOT, "root": ROOT,
"helper": {"tailscale_subnet": tailscale_subnet}, "helper": {"tailscale_subnet": tailscale_subnet, "load_secret": load_secret},
} }
nodes = [] nodes = []
@@ -157,7 +178,12 @@ def main():
with open(fullname) as fyaml: with open(fullname) as fyaml:
yml_data = yaml.load(fyaml, Loader=get_loader(fullname.parent)) yml_data = yaml.load(fyaml, Loader=get_loader(fullname.parent))
yml_data = get_defaults(fullname.parent, NODES) | yml_data yml_data = merge(
{},
get_defaults(fullname.parent, NODES),
yml_data,
strategy=Strategy.TYPESAFE_REPLACE,
)
yml_data["hostname"] = fullname.stem yml_data["hostname"] = fullname.stem
yml_data["filename"] = filename yml_data["filename"] = filename
nodes.append(yml_data) nodes.append(yml_data)
@@ -172,11 +198,13 @@ def main():
) )
) )
# Get all clusters # HACK: We can't hash a dict, so we first convert it to json, the use set
# to get all the unique entries, and then convert it back
# NOTE: This assumes that all nodes in the cluster use the same definition for the cluster # NOTE: This assumes that all nodes in the cluster use the same definition for the cluster
clusters = [ clusters = list(
dict(s) for s in set(frozenset(node["cluster"].items()) for node in nodes) json.loads(cluster)
] for cluster in set(json.dumps(node["cluster"]) for node in nodes)
)
template_args |= {"nodes": nodes, "clusters": clusters} template_args |= {"nodes": nodes, "clusters": clusters}

View File

@@ -3,9 +3,9 @@ set -euo pipefail
ROOT=$(git rev-parse --show-toplevel) ROOT=$(git rev-parse --show-toplevel)
VM_NAME="talos-vm" VM_NAME="talos-vm"
VCPUS="2" VCPUS="6"
RAM_MB="2048" RAM_MB="16384"
DISK_GB="10" DISK_GB="100"
NETWORK=talos NETWORK=talos
CONNECTION="qemu:///system" CONNECTION="qemu:///system"