package apps import ( "fmt" "io/fs" "net/url" "os" "path/filepath" "strings" ) // Cache stores fetched URL responses on disk under /_app/. // Files are name-keyed by upstream host + path so operators can list // and inspect them by hand. There is no metadata, no SHA-256, no // expiration — fetch-once-and-keep-forever. To force a refetch, // delete the cache file. type Cache struct { root string } // NewCache creates a Cache rooted at the given path. The directory is // created if missing. Stale *.tmp files left over from interrupted // writes are swept on construction. func NewCache(root string) (*Cache, error) { root = filepath.Clean(root) if err := os.MkdirAll(root, 0o755); err != nil { return nil, fmt.Errorf("create cache root: %w", err) } c := &Cache{root: root} if err := c.sweepTemps(); err != nil { return nil, fmt.Errorf("sweep temps: %w", err) } return c, nil } // Root returns the cache directory absolute path. func (c *Cache) Root() string { return c.root } // keyForURL converts a URL into a relative filesystem path under the // cache root. // // Layout: /[:]/. The full origin tuple is in // the key so two URLs that resolve different content cannot collide: // // https://example.com/x.html → https/example.com/x.html // http://example.com/x.html → http/example.com/x.html // https://example.com:8443/x.html → https/example.com:8443/x.html // // No port stripping. The previous behavior — collapsing :443 onto bare // host for https (and :80 for http) — was a defensible HTTP convention // but conflated "the operator wrote a URL with the default port" with // "the operator wrote a bare-host URL". With explicit port preserved, // every URL maps to exactly one filesystem path; operators can still // `ls _app/https/example.com/` to inspect what's cached. Scheme // segregation prevents an http:// hit from masquerading as an https:// // hit when both are deliberately distinct (rare, but real on // reverse-proxied stacks where http and https serve different bytes). // // Host is lowercased so the canonical-host normalization survives // case-insensitive DNS. Port is preserved verbatim. func keyForURL(rawURL string) (string, error) { u, err := url.Parse(rawURL) if err != nil { return "", fmt.Errorf("parse URL: %w", err) } if u.Scheme != "http" && u.Scheme != "https" { return "", fmt.Errorf("unsupported scheme %q", u.Scheme) } if u.Host == "" { return "", fmt.Errorf("URL is missing host") } if u.RawQuery != "" { return "", fmt.Errorf("URL must not contain query string: %s", rawURL) } // Lowercase the host part but preserve the port verbatim. Without // this we'd lowercase a numeric port unnecessarily, which is fine // but pointless; with this the ASCII-cased host normalization // works the same for both default and explicit-port URLs. host := u.Host if i := strings.Index(host, ":"); i >= 0 { host = strings.ToLower(host[:i]) + host[i:] } else { host = strings.ToLower(host) } p := u.Path for strings.Contains(p, "//") { p = strings.ReplaceAll(p, "//", "/") } p = strings.TrimPrefix(p, "/") if p == "" { p = "index.html" } cleaned := filepath.Clean("/" + p) if strings.Contains(cleaned, "..") { return "", fmt.Errorf("URL path contains '..'") } return u.Scheme + "/" + host + cleaned, nil } func (c *Cache) pathFor(rawURL string) (string, error) { key, err := keyForURL(rawURL) if err != nil { return "", err } return filepath.Join(c.root, filepath.FromSlash(key)), nil } // Has reports whether a cache entry exists for the URL. func (c *Cache) Has(rawURL string) bool { p, err := c.pathFor(rawURL) if err != nil { return false } _, err = os.Stat(p) return err == nil } // Read returns the cached body or os.ErrNotExist. func (c *Cache) Read(rawURL string) ([]byte, error) { p, err := c.pathFor(rawURL) if err != nil { return nil, err } return os.ReadFile(p) } // Write atomically stores body for the URL. Parent directories are // created as needed. Writes via tmp+rename so partial files are never // observable. func (c *Cache) Write(rawURL string, body []byte) error { p, err := c.pathFor(rawURL) if err != nil { return err } if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { return err } return writeAtomic(p, body) } func writeAtomic(path string, data []byte) error { dir := filepath.Dir(path) tmp, err := os.CreateTemp(dir, filepath.Base(path)+".tmp.*") if err != nil { return err } tmpName := tmp.Name() cleanup := func() { _ = os.Remove(tmpName) } if _, err := tmp.Write(data); err != nil { _ = tmp.Close() cleanup() return err } if err := tmp.Sync(); err != nil { _ = tmp.Close() cleanup() return err } if err := tmp.Close(); err != nil { cleanup() return err } if err := os.Rename(tmpName, path); err != nil { cleanup() return err } return nil } func (c *Cache) sweepTemps() error { err := filepath.WalkDir(c.root, func(p string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return nil } if strings.Contains(d.Name(), ".tmp.") { _ = os.Remove(p) } return nil }) if err != nil && !os.IsNotExist(err) { return err } return nil }