286 lines
9.5 KiB
Python
286 lines
9.5 KiB
Python
import os
|
|
import json
|
|
import subprocess
|
|
from datetime import datetime
|
|
from flask import Flask, jsonify, render_template
|
|
import re
|
|
|
|
app = Flask(__name__)
|
|
|
|
POLL_SECONDS = int(os.getenv("POLL_SECONDS", "10"))
|
|
APP_DOMAIN = os.getenv("APP_DOMAIN", "peterstockings.com")
|
|
DOCKER = os.getenv("DOCKER_BIN", "/usr/bin/docker")
|
|
SHOW_INFRA = os.getenv("SHOW_INFRA", "1") == "1"
|
|
|
|
_UNIT = {
|
|
"b": 1,
|
|
"kb": 1000, "mb": 1000**2, "gb": 1000**3, "tb": 1000**4,
|
|
"kib": 1024, "mib": 1024**2, "gib": 1024**3, "tib": 1024**4,
|
|
}
|
|
|
|
# Optional JSON map: {"gitea":"https://gitea.peterstockings.com", "bloodpressure":"https://bp.peterstockings.com"}
|
|
APP_URL_OVERRIDES = {}
|
|
try:
|
|
APP_URL_OVERRIDES = json.loads(os.getenv("APP_URL_OVERRIDES", "{}"))
|
|
except Exception:
|
|
APP_URL_OVERRIDES = {}
|
|
|
|
def sh(cmd: list[str]) -> str:
|
|
return subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True).strip()
|
|
|
|
def docker_ps_all() -> list[dict]:
|
|
# Name + Image + Status + Ports
|
|
fmt = "{{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}"
|
|
out = sh([DOCKER, "ps", "--format", fmt])
|
|
rows = []
|
|
for line in out.splitlines():
|
|
name, image, status, ports = line.split("\t")
|
|
rows.append({"name": name, "image": image, "status": status, "ports": ports})
|
|
return rows
|
|
|
|
def docker_stats() -> dict:
|
|
# Name + CPU + MemUsage + MemPerc
|
|
fmt = "{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}"
|
|
out = sh([DOCKER, "stats", "--no-stream", "--format", fmt])
|
|
stats = {}
|
|
for line in out.splitlines():
|
|
name, cpu, mem_usage, mem_pct = line.split("\t")
|
|
# mem_usage like: "58.84MiB / 384MiB"
|
|
mem_used, mem_limit = [s.strip() for s in mem_usage.split("/", 1)]
|
|
stats[name] = {
|
|
"cpu": cpu,
|
|
"mem_used": mem_used,
|
|
"mem_limit": mem_limit,
|
|
"mem_pct": mem_pct,
|
|
}
|
|
return stats
|
|
|
|
import re
|
|
|
|
def docker_info() -> dict:
|
|
# docker info --format "{{json .}}" gives us structured host-level info
|
|
out = sh([DOCKER, "info", "--format", "{{json .}}"])
|
|
return json.loads(out)
|
|
|
|
def docker_system_df() -> dict:
|
|
# Parse `docker system df` (text). It's stable enough for a dashboard.
|
|
out = sh([DOCKER, "system", "df"])
|
|
# Example lines:
|
|
# Images 175 18 15.15GB 13.93GB (91%)
|
|
# Containers 27 26 145.1MB 16.57kB (0%)
|
|
# Local Volumes 47 1 817.7MB 817.7MB (100%)
|
|
# Build Cache 889 0 423B 423B
|
|
rows = {}
|
|
for line in out.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("TYPE"):
|
|
continue
|
|
parts = re.split(r"\s{2,}", line)
|
|
if len(parts) >= 5:
|
|
typ, total, active, size, reclaimable = parts[:5]
|
|
rows[typ] = {
|
|
"total": total,
|
|
"active": active,
|
|
"size": size,
|
|
"reclaimable": reclaimable,
|
|
}
|
|
return rows
|
|
|
|
def system_summary() -> dict:
|
|
info = docker_info()
|
|
df = docker_system_df()
|
|
|
|
return {
|
|
"name": info.get("Name", ""),
|
|
"server_version": info.get("ServerVersion", ""),
|
|
"operating_system": info.get("OperatingSystem", ""),
|
|
"os_type": info.get("OSType", ""),
|
|
"architecture": info.get("Architecture", ""),
|
|
"kernel_version": info.get("KernelVersion", ""),
|
|
"cpus": info.get("NCPU", ""),
|
|
"mem_total": info.get("MemTotal", ""), # bytes
|
|
"containers": info.get("Containers", ""),
|
|
"containers_running": info.get("ContainersRunning", ""),
|
|
"containers_stopped": info.get("ContainersStopped", ""),
|
|
"images": info.get("Images", ""),
|
|
"docker_root_dir": info.get("DockerRootDir", ""),
|
|
"system_df": df,
|
|
}
|
|
|
|
def format_bytes(n: int) -> str:
|
|
# for mem_total
|
|
units = ["B", "KB", "MB", "GB", "TB"]
|
|
f = float(n)
|
|
for u in units:
|
|
if f < 1024 or u == units[-1]:
|
|
return f"{f:.1f}{u}"
|
|
f /= 1024
|
|
return f"{n}B"
|
|
|
|
def docker_inspect_restart_count(container_name: str) -> int:
|
|
# RestartCount is useful when stuff is flapping / OOMing
|
|
try:
|
|
out = sh([DOCKER, "inspect", "-f", "{{.RestartCount}}", container_name])
|
|
return int(out.strip())
|
|
except Exception:
|
|
return 0
|
|
|
|
def is_app_web_container(name: str) -> bool:
|
|
# Dokku apps typically have containers like "<app>.web.1"
|
|
return name.endswith(".web.1") and not name.startswith("dokku.")
|
|
|
|
def infer_app_name(container_name: str) -> str:
|
|
# "<app>.web.1" -> "<app>"
|
|
return container_name.rsplit(".web.1", 1)[0]
|
|
|
|
def infer_url(app_name: str) -> str:
|
|
if app_name in APP_URL_OVERRIDES:
|
|
return APP_URL_OVERRIDES[app_name]
|
|
# default
|
|
return f"https://{app_name}.{APP_DOMAIN}"
|
|
|
|
def classify_infra(container_name: str) -> bool:
|
|
return (
|
|
container_name.startswith("dokku.postgres.")
|
|
or container_name.startswith("dokku.redis.")
|
|
or container_name.startswith("dokku.mysql.")
|
|
or container_name.startswith("dokku.mongodb.")
|
|
or container_name == "dokku.minio.storage"
|
|
or container_name == "logspout"
|
|
)
|
|
|
|
def collect():
|
|
ps_rows = docker_ps_all()
|
|
stats = docker_stats()
|
|
|
|
apps = []
|
|
infra = []
|
|
|
|
for r in ps_rows:
|
|
name = r["name"]
|
|
s = stats.get(name, {})
|
|
row = {
|
|
"container": name,
|
|
"image": r["image"],
|
|
"status": r["status"],
|
|
"ports": r["ports"],
|
|
"cpu": s.get("cpu", ""),
|
|
"mem_used": s.get("mem_used", ""),
|
|
"mem_limit": s.get("mem_limit", ""),
|
|
"mem_pct": s.get("mem_pct", ""),
|
|
"restarts": docker_inspect_restart_count(name),
|
|
}
|
|
|
|
if is_app_web_container(name):
|
|
app_name = infer_app_name(name)
|
|
row["app"] = app_name
|
|
row["url"] = infer_url(app_name)
|
|
apps.append(row)
|
|
elif SHOW_INFRA and classify_infra(name):
|
|
infra.append(row)
|
|
|
|
# Sort stable
|
|
apps.sort(key=lambda x: x["app"])
|
|
infra.sort(key=lambda x: x["container"])
|
|
|
|
# Simple top-line summary
|
|
warnings = []
|
|
for a in apps:
|
|
# mem_pct is like "15.32%"
|
|
try:
|
|
pct = float(a["mem_pct"].replace("%", "")) if a["mem_pct"] else 0.0
|
|
except Exception:
|
|
pct = 0.0
|
|
if pct >= 85:
|
|
warnings.append(f"{a['app']} RAM high ({a['mem_pct']})")
|
|
if a["restarts"] >= 3:
|
|
warnings.append(f"{a['app']} restarting (restarts={a['restarts']})")
|
|
|
|
sysinfo = system_summary()
|
|
|
|
# format mem bytes nicely
|
|
try:
|
|
mem_total_h = format_bytes(int(sysinfo["mem_total"]))
|
|
except Exception:
|
|
mem_total_h = ""
|
|
|
|
sysinfo["mem_total_h"] = mem_total_h
|
|
|
|
# --- Gauges (live-ish) ---
|
|
total_cpu_pct = 0.0
|
|
total_mem_used_bytes = 0
|
|
|
|
for name, s in stats.items():
|
|
total_cpu_pct += pct_str_to_float(s.get("cpu", "0%"))
|
|
total_mem_used_bytes += parse_human_bytes(s.get("mem_used", "0B"))
|
|
|
|
sysinfo = system_summary()
|
|
|
|
# Host total RAM (bytes) comes from docker info
|
|
host_mem_total = int(sysinfo.get("mem_total") or 0)
|
|
ram_pct = (total_mem_used_bytes / host_mem_total * 100.0) if host_mem_total else 0.0
|
|
|
|
# Docker disk: images "Size" and "Reclaimable"
|
|
df_images = sysinfo.get("system_df", {}).get("Images", {})
|
|
images_size_bytes = parse_human_bytes(df_images.get("size", "0B"))
|
|
|
|
# Reclaimable looks like "13.93GB (91%)" so grab the first token
|
|
reclaimable_raw = (df_images.get("reclaimable") or "").split(" ", 1)[0]
|
|
images_reclaimable_bytes = parse_human_bytes(reclaimable_raw) if reclaimable_raw else 0
|
|
|
|
images_used_bytes = max(0, images_size_bytes - images_reclaimable_bytes)
|
|
disk_pct = (images_used_bytes / images_size_bytes * 100.0) if images_size_bytes else 0.0
|
|
|
|
gauges = {
|
|
"cpu_total_pct": clamp(total_cpu_pct), # sum of container CPU%, can exceed 100 if multi-core; we clamp for display
|
|
"ram_used_bytes": total_mem_used_bytes,
|
|
"ram_total_bytes": host_mem_total,
|
|
"ram_pct": clamp(ram_pct),
|
|
"docker_images_size_bytes": images_size_bytes,
|
|
"docker_images_used_bytes": images_used_bytes,
|
|
"docker_images_pct": clamp(disk_pct),
|
|
}
|
|
|
|
return {
|
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
|
"poll_seconds": POLL_SECONDS,
|
|
"domain": APP_DOMAIN,
|
|
"system": sysinfo,
|
|
"gauges": gauges,
|
|
"apps": apps,
|
|
"infra": infra,
|
|
"warnings": warnings,
|
|
}
|
|
|
|
def parse_human_bytes(s: str) -> int:
|
|
# Handles "58.84MiB", "145.1MB", "423B"
|
|
s = s.strip()
|
|
m = re.match(r"^([0-9]*\.?[0-9]+)\s*([A-Za-z]+)$", s)
|
|
if not m:
|
|
return 0
|
|
val = float(m.group(1))
|
|
unit = m.group(2).lower()
|
|
return int(val * _UNIT.get(unit, 0))
|
|
|
|
def pct_str_to_float(p: str) -> float:
|
|
try:
|
|
return float(p.strip().replace("%", ""))
|
|
except Exception:
|
|
return 0.0
|
|
|
|
def clamp(n: float, lo: float = 0.0, hi: float = 100.0) -> float:
|
|
return max(lo, min(hi, n))
|
|
|
|
@app.get("/")
|
|
def index():
|
|
return render_template("index.html", poll_seconds=POLL_SECONDS)
|
|
|
|
@app.get("/partial/apps")
|
|
def partial_apps():
|
|
data = collect()
|
|
return render_template("apps_table.html", data=data)
|
|
|
|
@app.get("/api/status")
|
|
def api_status():
|
|
return jsonify(collect())
|