use std::{
    cmp::Ordering,
    ffi::CString,
    hint::unreachable_unchecked,
    os::{
        fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, OwnedFd},
        unix::io::RawFd,
    },
    ptr,
};

use libseccomp::ScmpFilterContext;
use nix::{
    errno::Errno,
    fcntl::OFlag,
    libc::c_char,
    sys::{
        ptrace::{cont, seize, Options},
        signal::{kill, Signal},
        wait::{Id, WaitPidFlag},
    },
    unistd::{isatty, read, setpgid, tcsetpgrp, write, Pid},
};

use crate::{
    compat::{waitid, WaitStatus},
    config::CHLD_STACK_SIZE,
    fs::{duprand, pidfd_getfd, retry_on_eintr},
    unshare::{child, config::Config, Child, Command, Executable},
};

type ChildPreExecFunc = Box<dyn Fn() -> Result<(), Errno>>;
type PipePair = ((OwnedFd, OwnedFd), (OwnedFd, OwnedFd));

#[derive(Debug)]
pub enum Exe<'a> {
    Library(&'a libloading::os::unix::Library),
    Program((*const c_char, Vec<*const c_char>)),
}

pub struct ChildInfo<'a> {
    pub exe: Exe<'a>,
    pub cfg: Config,
    pub pre_exec: Option<ChildPreExecFunc>,
    pub pty_fd: Option<OwnedFd>,
    pub seccomp_filter: Option<ScmpFilterContext>,
    pub seccomp_pipefd: PipePair,
}

fn raw_with_null(arr: &Vec<CString>) -> Vec<*const c_char> {
    let mut vec = Vec::with_capacity(arr.len().saturating_add(1));
    for i in arr {
        vec.push(i.as_ptr());
    }
    vec.push(ptr::null());
    vec
}

impl Command {
    /// Spawn the command and return a handle that can be waited for
    pub fn spawn(mut self) -> Result<Child, Errno> {
        let exe = match self.exe {
            Executable::Library(ref lib) => Exe::Library(lib),
            Executable::Program((ref filename, ref args)) => {
                let c_args = raw_with_null(args);
                Exe::Program((filename.as_ptr(), c_args))
            }
        };

        let mut pid_fd: libc::c_int = -1;
        let clone_flags = libc::SIGCHLD | libc::CLONE_PIDFD;

        // SAFETY: Child owns its copy of the pipes,
        // and is responsible for closing them.
        let seccomp_pipefd = unsafe {
            (
                (
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.0 .0),
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.0 .1),
                ),
                (
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.1 .0),
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.1 .1),
                ),
            )
        };

        let child_info = Box::new(ChildInfo {
            exe,
            cfg: self.config,
            pre_exec: std::mem::take(&mut self.pre_exec),
            pty_fd: std::mem::take(&mut self.pty_fd),
            seccomp_filter: std::mem::take(&mut self.seccomp_filter),
            seccomp_pipefd,
        });
        let child_info_ptr: *mut libc::c_void = Box::into_raw(child_info) as *mut libc::c_void;

        // 2M stack by default, see config.rs.
        let mut stack = [0u8; CHLD_STACK_SIZE];

        // SAFETY: nix's clone does not support CLONE_PIDFD,
        // so we use libc::clone instead.
        let child = unsafe {
            let ptr = stack.as_mut_ptr().add(stack.len());
            let ptr_aligned = ptr.sub(ptr as usize % 16);
            libc::clone(
                child::child_after_clone as extern "C" fn(*mut libc::c_void) -> libc::c_int,
                ptr_aligned as *mut libc::c_void,
                clone_flags,
                child_info_ptr,
                &mut pid_fd,
            )
        };

        // SAFETY: Randomize the pid FD for hardening.
        // The created fd is O_CLOEXEC too, and we'll
        // send the number to the child to close it.
        // O_EXCL closes oldfd on success.
        let pid_fd = duprand(pid_fd, OFlag::O_CLOEXEC | OFlag::O_EXCL)?;

        // SAFETY: duprand returns a valid FD on success.
        let pid_fd = unsafe { BorrowedFd::borrow_raw(pid_fd) };

        match child.cmp(&0) {
            Ordering::Less => {
                // SAFETY: Reconstruct and drop.
                let _ = unsafe { Box::from_raw(child_info_ptr as *mut ChildInfo) };
                Err(Errno::last())
            }
            Ordering::Greater => {
                let child = Pid::from_raw(child);

                let seccomp_fd = match self.after_start(child, &pid_fd) {
                    Ok(seccomp_fd) => seccomp_fd,
                    Err(e) => loop {
                        match waitid(Id::PIDFd(pid_fd.as_fd()), WaitPidFlag::WEXITED) {
                            Ok(WaitStatus::Exited(_, errno)) => return Err(Errno::from_raw(errno)),
                            Err(Errno::EINTR) => {}
                            _ => return Err(e),
                        }
                    },
                };

                Ok(Child {
                    pid: child.into(),
                    pid_fd: pid_fd.as_raw_fd(),
                    seccomp_fd,
                    status: None,
                })
            }
            // SAFETY: This can never happen because clone child
            // jumps to the specified function.
            _ => unsafe { unreachable_unchecked() },
        }
    }

    #[allow(clippy::cognitive_complexity)]
    fn after_start<F: AsRawFd>(mut self, pid: Pid, pid_fd: &F) -> Result<RawFd, Errno> {
        if self.config.stop {
            // Seize the process for tracing.
            // This must happen before reading the seccomp fd.
            // TODO: Make ptrace options configurable.
            let ptrace_options: Options = Options::PTRACE_O_TRACEFORK
                | Options::PTRACE_O_TRACEVFORK
                | Options::PTRACE_O_TRACECLONE
                | Options::PTRACE_O_TRACEEXEC     // used by Exec TOCTOU mitigator.
                | Options::PTRACE_O_TRACEEXIT     // used by SegvGuard.
                | Options::PTRACE_O_TRACESECCOMP  // used by chdir and exec hooks.
                | Options::PTRACE_O_TRACESYSGOOD  // ditto.
                | Options::PTRACE_O_EXITKILL; // we also set PDEATHSIG so this is the second layer.

            // SAFETY: Prefer to use the PIDFd rather than the PID
            // for waitid(2) calls. This gives us safety against
            // e.g. PID recycling and ensures a secure attach process.
            let pid_fd = unsafe { BorrowedFd::borrow_raw(pid_fd.as_raw_fd()) };
            // Step 1: Wait for the process to stop itself.
            // Note, we also wait for EXITED so that if the process is
            // interrupted, and the wait will fall through to the assert
            // to fail.
            let status = waitid(
                Id::PIDFd(pid_fd),
                WaitPidFlag::WEXITED | WaitPidFlag::WSTOPPED | WaitPidFlag::__WNOTHREAD,
            )?;
            assert_eq!(status, WaitStatus::Stopped(pid, libc::SIGSTOP));
            // Step 2: Seize the process.
            // We use PTRACE_SEIZE in the parent rather than
            // PTRACE_TRACEME in the child for its improved
            // behaviour/API.  This also gives us the chance to deny
            // PTRACE_TRACEME and further confine the sandbox against
            // e.g. trivial ptrace detectors.
            seize(pid, ptrace_options)?;
            let status = waitid(
                Id::PIDFd(pid_fd),
                WaitPidFlag::WEXITED | WaitPidFlag::WSTOPPED | WaitPidFlag::__WNOTHREAD,
            )?;
            assert_eq!(
                status,
                WaitStatus::PtraceEvent(pid, libc::SIGSTOP, libc::PTRACE_EVENT_STOP)
            );
            // SAFETY: nix does not have a wrapper for PTRACE_LISTEN.
            Errno::result(unsafe { libc::ptrace(libc::PTRACE_LISTEN, pid.as_raw(), 0, 0) })?;
            // Step 3: Successfully attached, resume the process.
            // We have to do a simple signal ping-pong here but
            // it's done once and it's worth the trouble.
            kill(pid, Signal::SIGCONT)?;
            let status = waitid(
                Id::PIDFd(pid_fd),
                WaitPidFlag::WEXITED | WaitPidFlag::WSTOPPED | WaitPidFlag::__WNOTHREAD,
            )?;
            assert_eq!(
                status,
                WaitStatus::PtraceEvent(pid, libc::SIGTRAP, libc::PTRACE_EVENT_STOP)
            );
            cont(pid, None)?;
            let status = waitid(
                Id::PIDFd(pid_fd),
                WaitPidFlag::WEXITED | WaitPidFlag::WSTOPPED | WaitPidFlag::__WNOTHREAD,
            )?;
            assert_eq!(status, WaitStatus::PtraceEvent(pid, libc::SIGCONT, 0));
            cont(pid, Some(Signal::SIGCONT))?;
        }

        let stdin_isatty = isatty(std::io::stdin()).unwrap_or(false);
        if self.config.make_group_leader {
            // SAFETY: Put sandbox process in a new process group
            // to isolate Syd processes from the sandbox processes.
            setpgid(pid, pid)?;

            // Check if standard input is a TTY.
            if stdin_isatty {
                // Set foreground process group to the sandbox process.
                tcsetpgrp(std::io::stdin(), pid)?;
            }
        }

        // SAFETY: Note we don't want to put Syd into a new process
        // group here so as to defend the parent process against
        // signals in cases when Landlock protection is not available.
        // Landlock signal scopes are new in ABI 6, new in Linux>=6.12.

        if let Some(ref mut callback) = self.before_unfreeze {
            #[allow(clippy::cast_sign_loss)]
            callback(i32::from(pid) as u32)?;
        }

        // SAFETY: Parent owns its copy of the pipes,
        // and is responsible for closing them.
        let seccomp_pipefd = unsafe {
            (
                (
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.0 .0),
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.0 .1),
                ),
                (
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.1 .0),
                    OwnedFd::from_raw_fd(self.seccomp_pipefd.1 .1),
                ),
            )
        };

        // We'll read seccomp notify fd from the second pipe,
        // and write the acknowledgement notification to
        // the first pipe.
        let (pipe_ro, pipe_rw) = (seccomp_pipefd.1 .0, seccomp_pipefd.0 .1);

        // Close the unused ends of the pipes.
        drop(seccomp_pipefd.0 .0);
        drop(seccomp_pipefd.1 .1);

        // Read the value of the file descriptor from the pipe.
        // Handle interrupts and partial reads.
        // EOF means process died before writing to the pipe.
        let mut buf = vec![0u8; std::mem::size_of::<RawFd>()];
        let mut nread = 0;
        while nread < buf.len() {
            #[allow(clippy::arithmetic_side_effects)]
            match read(&pipe_ro, &mut buf[nread..]) {
                Ok(0) => return Err(Errno::EIO),
                Ok(n) => nread += n,
                Err(Errno::EINTR | Errno::EAGAIN) => continue,
                Err(errno) => return Err(errno),
            }
        }

        // Close the read end of the pipe.
        drop(pipe_ro);

        let remote_seccomp_fd = match buf.as_slice().try_into() {
            Ok(buf) => RawFd::from_le_bytes(buf),
            Err(_) => return Err(Errno::EINVAL),
        };

        // Get the seccomp notify fd using pidfd_getfd(2).
        // The child is waiting on the read end of the pipe,
        // for us to safely transfer the file descriptor.
        let seccomp_fd = pidfd_getfd(pid_fd.as_raw_fd(), remote_seccomp_fd)?;

        // Unblock the child to safely continue and close
        // their copy of the seccomp notify file descriptor.
        // Handle interrupts.
        // Partial write is not possible.
        // EOF means process died before reading from the pipe.
        let buf = [42u8; 1];
        match retry_on_eintr(|| write(&pipe_rw, &buf))? {
            0 => return Err(Errno::EIO),
            1 => {}
            n => unreachable!("BUG: invalid pipe write of size {n}!"),
        };

        // Close the write end of the pipe.
        drop(pipe_rw);

        // SAFETY: Randomize the seccomp fd for hardening.
        // Old seccomp fd will be closed by Drop on function exit.
        duprand(seccomp_fd.as_raw_fd(), OFlag::O_CLOEXEC)
    }
}
