#include "stdafx.h"
#include "ReplaceActive.h"
#include "ControlFlow.h"
#include "ControlFlowDiff.h"
#include "Code/Listing.h"
#include "Code/Arena.h"
#include "Engine.h"

namespace storm {

	/**
	 * Convenient data structures:
	 */


	/**
	 * Data that describes the replacement we are trying to do:
	 */
	struct ActiveReplace {
		// Arena.
		code::Arena *arena;

		// Old function.
		const void *oldFn;

		// New function.
		Function *newFn;

		// Source listing for the new function.
		code::Listing *newSource;

		// Transformed source listing (with some additional labels).
		code::Listing *newTransformed;

		// Variable offsets for the new function.
		Array<Offset> *newVarOffsets;

		// Layout in 'oldFn'.
		Array<ControlFlowItem> *oldCf;

		// Mapped layout in 'newFn'. Refers to instructions in 'newSource'.
		Array<ControlFlowItem> *newCf;

		// Map locations in 'newCf' to instructions in 'newTransformed'. First location after the call instr.
		Map<Nat, Nat> *toTransformedInside;

		// Map locations in 'newCf' to instructions in 'newTransformed'. First location after the expanded FnCall.
		Map<Nat, Nat> *toTransformedAfter;

		// Map locations in 'newCf' to offsets, to right after the corresponding call instruction.
		Map<Nat, Nat> *toOffsetInside;

		// Map locations in 'newCf' to offsets, to after the result has been stored (after the expanded FnCall).
		Map<Nat, Nat> *toOffsetAfter;
	};


	/**
	 * Data structure that describes how to translate offsets from another function to the current
	 * one (the adapter function). This is needed since some architectures have a frame pointer that
	 * points to different locations depending on the size of the stack frame.
	 */
	class OffsetTranslator {
	public:
		// Default: no translation.
		OffsetTranslator() : refVar(), offset() {}

		// Any translation?
		Bool any() const {
			return refVar != code::Var();
		}
		Bool empty() const {
			return !any();
		}

		// Set a different zero point.
		void set(code::Var refVar, code::Offset offset) {
			this->refVar = refVar;
			this->offset = offset;
		}

		// Translate.
		code::Operand operator ()(const code::Operand &o) const {
			if (!any())
				return o;

			// Note: We don't need to handle offset references - they are not used for
			// stack-allocated variables.
			if (o.type() != code::opRelative)
				return o;
			if (!code::same(o.reg(), code::ptrFrame))
				return o;
			return xRel(o.size(), refVar, offset + o.offset());
		}

	private:
		// Variable to use, if any.
		code::Var refVar;

		// Offset in the old function, if any.
		code::Offset offset;
	};


	// Find a Listing for a function.
	static MAYBE(code::Listing *) findSource(Function *fn) {
		Code *code = fn->getCode();

		if (GeneratedCode *g = as<GeneratedCode>(code)) {
			g->compile(); // Make sure it is compiled.
			return g->source();
		} else if (DelegatedCode *delegated = as<DelegatedCode>(fn->getCode())) {
			// Traverse the delegated code and see if we find something on the other side.
			if (NamedSource *target = as<NamedSource>(delegated->to().source())) {
				if (Function *f = as<Function>(target->named())) {
					return findSource(f);
				}
			}
		}

		return null;
	}

	// Clear all parameters in the array:
	static void clearParams(code::Listing *l, Array<code::Var> *vars) {
		for (Nat i = 0; i < vars->count(); i++) {
			if (l->isParam(vars->at(i)))
				vars->at(i) = code::Var();
		}
	}

	// Create a Value from a VarInfo.
	static Value asValue(code::Listing::VarInfo *info) {
		return Value(info->type, info->ref);
	}

	// Default-initialize a variable.
	static void defaultInitVar(code::Listing *l, code::Listing::VarInfo *info, code::Operand target) {
		using namespace code;

		Value type = asValue(info);

		if (type.isValue() && type.isPrimitive()) {
			// Int, float, etc. We can simply zero it.
			*l << lea(ptrA, target);
			*l << mov(xRel(type.size(), ptrA), xConst(type.size(), 0));
		} else if (Function *defaultCtor = info->type->defaultCtor()) {
			TypeDesc *ptrDesc = l->engine().ptrDesc();

			if (type.isClass()) {
				*l << fnParam(ptrDesc, info->type->typeRef());
				*l << fnCall(l->engine().ref(builtin::alloc), false, ptrDesc, ptrA);
				*l << lea(ptrC, target);
				*l << mov(ptrRel(ptrC), ptrA);
				*l << fnParam(ptrDesc, ptrA);
				*l << fnCall(defaultCtor->ref(), true);
			} else {
				*l << lea(ptrA, target);
				*l << fnParam(ptrDesc, ptrA);
				*l << fnCall(defaultCtor->ref(), true);
			}
		} else {
			// Should this be a hard error?
			WARNING(L"No default constructor!");
		}
	}

	// Find the target we should migrate to, given an offset.
	static void findMapTarget(Array<ControlFlowItem> *oldCf, Array<ControlFlowItem> *newCf, size_t offset,
							const ControlFlowItem &parent,
							ControlFlowItem &result, size_t &best) {
		for (Nat i = 0; i < oldCf->count(); i++) {
			const ControlFlowItem &item = oldCf->at(i);
			const ControlFlowItem &newItem = newCf->at(i);
			if (item.isLoop()) {
				findMapTarget(item.loop(), newItem.loop(), offset, newItem, result, best);
			} else if (item.isCall()) {
				if (item.offset() <= offset && offset - item.offset() < best) {
					best = offset - item.offset();
					if (newItem.isStart()) {
						// If the new item was the start, use 'parent'. This maps 'start' inside
						// loops onto the loop itself.
						result = parent;
						// We mark it as 'none' so we start at the top of the loop.
						result.status(ControlFlowItem::none);
					} else {
						result = newItem;
					}
				}
			}
		}
	}
	static ControlFlowItem findMapTarget(ActiveReplace &replace, size_t offset) {
		size_t best = size_t(-1);
		ControlFlowItem firstParent;
		ControlFlowItem result;
		findMapTarget(replace.oldCf, replace.newCf, offset, firstParent, result, best);
		return result;
	}

	struct GeneratedThunk {
		code::Binary *generated;
		size_t offset;
	};

	static code::Operand offsetIn(code::Size size, code::Operand base, Nat offset) {
		using namespace code;
		if (base.type() == opRelative) {
			return xRel(size, base.reg(), base.offset() + Offset(offset));
		} else if (base.type() == opVariable) {
			return xRel(size, base.var(), base.offset() + Offset(offset));
		} else {
			throw new (runtime::someEngine()) InternalError(S("Unsupported operand type!"));
		}
	}

	static void movMemcpy(code::Listing *out, code::Size size, code::Operand from, code::Operand to) {
		using namespace code;

		// Copy pointer by pointer.
		Nat totalSize = size.current();
		Nat offset = 0;

		const Size steps[] = { Size::sPtr, Size::sInt, Size::sByte };

		for (Nat step = 0; step < ARRAY_COUNT(steps); step++) {
			Size stepSz = steps[step];

			while (offset + stepSz.current() <= totalSize) {
				*out << mov(offsetIn(stepSz, to, offset), offsetIn(stepSz, from, offset));
				offset += stepSz.current();
			}
		}
	}

	static void copyValue(code::Listing *out, Value type, code::Operand from, code::Operand to) {
		using namespace code;

		if (type.type == null) {
			// We need to trust sizes in the operands. Prefer 'from'.
			Reg sizedA = asSize(ptrA, from.size());
			*out << mov(sizedA, from);
			if (to.size() == from.size()) {
				*out << mov(to, sizedA);
			} else {
				*out << lea(ptrC, to);
				*out << mov(xRel(from.size(), ptrC), sizedA);
			}
		} else if (type.isAsmType()) {
			// Sizes of 'from' and 'to' may be wrong, so we work with pointers:
			Reg sizedA = asSize(ptrA, type.size());
			if (from.size() != type.size() || to.size() != type.size()) {
				*out << lea(ptrA, from);
				*out << lea(ptrC, to);
				*out << mov(sizedA, xRel(type.size(), ptrA, Offset()));
				*out << mov(xRel(type.size(), ptrC, Offset()), sizedA);
			} else {
				*out << mov(sizedA, from);
				*out << mov(to, sizedA);
			}
		} else {
			Function *copyCtor = type.type->copyCtor();
			TypeDesc *ptrDesc = out->engine().ptrDesc();
			if (!copyCtor || (copyCtor->fnFlags() & fnPure)) {
				// Memcpy
				movMemcpy(out, type.size(), from, to);
			} else {
				// Call copy-ctor.
				*out << lea(ptrA, to);
				*out << lea(ptrC, from);
				*out << fnParam(ptrDesc, ptrA);
				*out << fnParam(ptrDesc, ptrC);
				*out << fnCall(copyCtor->ref(), true);
			}

			// Run the dtor if applicable:
			Function *dtor = type.type->destructor();
			// Don't run dtor if ctor was marked pure.
			if (copyCtor && (copyCtor->fnFlags() & fnPure))
				dtor = null;
			if (dtor) {
				*out << lea(ptrA, from);
				*out << fnParam(ptrDesc, ptrA);
				*out << fnCall(dtor->ref(), true);
			}
		}
	}

	static void commitMoves(code::Listing *out, Array<DataMove> *moves) {
		using namespace code;

		for (Nat i = 0; i < moves->count(); i++) {
			const DataMove &move = moves->at(i);

			copyValue(out, move.type, move.source, move.target);
		}
	}

	static void moveVar(Array<DataMove> *out, code::Listing *l, code::Block auxBlock,
						Value type, code::Operand from, code::Operand to) {
		using namespace code;

		// If the two locations are the same, we can skip moving it.
		if (from == to)
			return;

		Var tmpVar = l->createVar(auxBlock, type.size());
		copyValue(l, type, from, tmpVar);

		out->push(DataMove(tmpVar, to, type));
	}

	static void movePreservedRegs(Array<DataMove> *move, code::Listing *out,
								const OffsetTranslator &translateOld, const OffsetTranslator &translateNew,
								code::Block auxBlock, code::Arena::Skeleton *oldFn, code::Arena::Skeleton *newFn) {
		using namespace code;

		// Find registers we need to keep and save them in separate variables.
		for (Nat i = 0; i < newFn->savedRegs->count(); i++) {
			Operand src = newFn->savedRegs->at(i);

			for (Nat j = 0; j < oldFn->savedRegs->count(); j++) {
				if (same(oldFn->savedRegs->at(j).reg(), src.reg())) {
					src = oldFn->savedLocs->at(j);
					break;
				}
			}

			Operand dest = newFn->savedLocs->at(i);

			src = translateOld(src);
			dest = translateNew(dest);

			// If it is already in the proper location, we don't need to do anything.
			if (src == dest)
				continue;

			Var tmp = out->createVar(auxBlock, Size::sPtr);
			*out << mov(ptrA, src);
			*out << mov(tmp, ptrA);
			move->push(DataMove(tmp, dest, Value()));
		}

		// Find registers that were removed, and restore them now.
		for (Nat i = 0; i < oldFn->savedRegs->count(); i++) {
			Operand dest = oldFn->savedRegs->at(i);
			Bool found = false;
			for (Nat j = 0; j < newFn->savedRegs->count(); j++) {
				if (same(dest.reg(), newFn->savedRegs->at(j).reg())) {
					found = true;
					break;
				}
			}

			if (!found) {
				// Avoid making 'dest' a preserved register by using 'shadowMov'.
				*out << shadowMov(dest, translateOld(oldFn->savedLocs->at(i)));
			}
		}
	}

	// Helper data structure to look up variables based on their offsets.
	class OffsetLookup {
	public:
		// Variables that are visible here.
		Array<code::Var> *visibleVars;

		// Offset-based lookup.
		struct Entry {
			code::Offset offset;
			code::Var var;

			Entry(code::Offset offset, code::Var var)
				: offset(offset), var(var) {}

			bool operator <(const Entry &o) const {
				return *this < o.offset;
			}
			bool operator <(const code::Offset &o) const {
				return offset.current() < o.current();
			}
		};
		vector<Entry> lookup;

		// Create.
		OffsetLookup(code::Listing *l, code::Block block, Array<code::Offset> *offsets) {
			// Get all variables to get the total number of variables we need to worry about.
			visibleVars = l->allVars();
			for (Nat i = 0; i < visibleVars->count(); i++)
				visibleVars->at(i) = code::Var();

			// Fill in all variables that are visible.
			for (code::Block c = block; c != code::Block(); c = l->parent(c)) {
				Array<code::Var> *here = l->allVars(c);
				for (Nat i = 0; i < here->count(); i++) {
					code::Var v = here->at(i);

					// Don't include named variables or parameters.
					if (l->varInfo(v))
						continue;
					if (l->isParam(v))
						continue;

					visibleVars->at(v.key()) = v;
				}
			}

			// Put all offsets in an array so that we can look them up later on.
			for (Nat i = 0; i < visibleVars->count(); i++) {
				code::Var v = visibleVars->at(i);
				if (v == code::Var())
					continue;

				lookup.push_back(Entry(offsets->at(v.key()), v));
			}

			std::sort(lookup.begin(), lookup.end());
		}

		// Lookup an offset into a variable.
		code::Var find(code::Offset offset) const {
			vector<Entry>::const_iterator found = std::lower_bound(lookup.begin(), lookup.end(), offset);
			if (found == lookup.end())
				return code::Var();

			if (found->offset.current() == offset.current())
				return found->var;

			// Try to look at the previous element:
			if (found == lookup.begin())
				return code::Var();

			--found;
			if (found->offset.current() + Int(found->var.size().current()) < offset.current())
				return found->var;
			return code::Var();
		}

		// Lookup an entire Operand. Only return something sensible if it is relative to ptrFrame.
		code::Var find(const code::Operand &op) const {
			if (op.type() == code::opRelative && op.reg() == code::ptrFrame)
				return find(op.offset());
			return code::Var();
		}
	};

	// Add a register to a RegSet, but skip ptrFrame and ptrStack.
	static void putReg(code::RegSet *to, code::Reg reg) {
		if (code::same(reg, code::ptrStack) || code::same(reg, code::ptrFrame))
			return;
		to->put(reg);
	}

	// Look backwards and see if the registers used to address some operands lead to a
	// non-initialized variable.
	static Bool dependsOnVariable(code::Operand toTrace, code::Listing *src, Nat current, const OffsetLookup &lookup) {
		using namespace code;

		// This only makes sense for pointer-sized registers.
		if (toTrace.size() != Size::sPtr)
			return false;

		// Go backwards and find the source.
		for (Nat i = current; i > 0; i--) {
			Instr *instr = src->at(i - 1);

			// Ignore instructions that don't write anything.
			if ((instr->mode() & destWrite) == 0)
				continue;

			// Right target?
			Operand dest = instr->dest();
			if (dest != toTrace)
				continue;

			// End-case, is it a lea?
			if (instr->op() == op::lea) {
				return lookup.find(instr->src()) != Var();
			}

			// Otherwise, check the source operand. If that is a register, we need to trace that as well.
			// We also need to trace through other variables on the stack - we might have spilled the
			// register to memory.
			Operand srcOp = instr->src();
			if (srcOp.type() == opRegister) {
				if (dependsOnVariable(srcOp, src, i - 1, lookup))
					return true;
			} else if (srcOp.type() == opRelative && srcOp.reg() == ptrFrame) {
				if (dependsOnVariable(srcOp, src, i - 1, lookup))
					return true;
			}

			// Finally, continue looping as long as the instruction also reads from the source
			// operand (e.g. add ptrX, 10)
			if ((instr->mode() & destRead) == 0)
				break;
		}

		return false;
	}

	// Check if the array contains the operand.
	static Bool findAndRemove(const code::Offset &find, Array<code::Offset> *in) {
		for (Nat i = 0; i < in->count(); i++) {
			if (find.current() == in->at(i).current()) {
				in->remove(i);
				return true;
			}
		}
		return false;
	}

	// Helper to find the target offset of a label (slow).
	static Nat findLabelTarget(code::Listing *src, code::Label find) {
		for (Nat i = 0; i < src->count(); i++) {
			Array<code::Label> *lbls = src->labels(i);
			for (Nat j = 0; j < lbls->count(); j++) {
				if (lbls->at(j) == find)
					return i;
			}
		}
		return src->count();
	}

	// Replace source operands that are labels if necessary.
	static code::Instr *replaceInstrLabel(code::Instr *instr, code::Listing *listing, Function *newFn,
										Array<code::Operand> *largeData, code::Label largeLbl) {
		using namespace code;

		Operand src = instr->src();
		if (src.type() != opLabel && src.type() != opRelativeLbl)
			return instr;

		// Find the label:
		Nat target = findLabelTarget(listing, src.label());
		if (target >= listing->count())
			return instr;

		// There are two cases we want to handle here:
		if (target == 0 && src.type() == opLabel) {
			// Start of the listing itself. This is used on X86 to store a pointer to the code
			// itself for EH.
			src = newFn->directRef();
		} else {
			// Reference to data. This is used to load large-ish constants from memory. We need to
			// copy the data to our listing.
			Int offset = 0;
			if (src.type() == opRelativeLbl)
				offset = src.offset().v64() / Offset::sWord.v64();
			if (Int(target) + offset < 0 || Nat(target + offset) >= listing->count())
				return instr;

			Instr *d = listing->at(Nat(target + offset));
			if (d->op() != op::dat)
				return instr;

			src = xRel(src.size(), largeLbl, Offset::sWord * largeData->count());
			largeData->push(d->src());
		}

		return instr->alterSrc(src);
	}

	// Initialize all non-named variables in the listing based on 'src' that is expected to be a
	// transformed version of the new code. Note that, since 'src' is transformed, we can not easily
	// speak of variables. We only have offsets to work with.
	static void initializeNonNamed(code::Arena *arena, code::Listing *out,
								code::Listing *src, const OffsetTranslator &translate,
								code::Block activeBlock, Nat srcOffset, Array<code::Offset> *srcLayout,
								Array<code::Offset> *extraMetadata, Function *newFn) {
		using namespace code;

		// TODO: Try to trace values between function calls as well! That is nice in cases with
		// expressions with multiple function calls and lots of temporaries.

		OffsetLookup lookup(src, activeBlock, srcLayout);

		// 1: Walk the code forward to find which variables will be used in the future. We stop at
		// the first occurrence of each of them. This is a bit tricky, since the same stack space is
		// re-used in subsequent blocks. We are therefore likely to over-estimate what we need.
		Array<Var> *toTrack = new (src) Array<Var>(lookup.visibleVars->count(), Var());
		for (Nat i = srcOffset; i < src->count(); i++) {
			Instr *instr = src->at(i);

			Var srcVar = lookup.find(instr->src());
			if (srcVar != Var())
				toTrack->at(srcVar.key()) = srcVar;

			if (instr->mode() & destRead) {
				Var destVar = lookup.find(instr->dest());
				if (destVar != Var())
					toTrack->at(destVar.key()) = destVar;
			}
		}

		// We will trash 'extraMetadata', so make our own copy.
		extraMetadata = new (extraMetadata) Array<Offset>(*extraMetadata);

		Array<Operand> *largeData = new (out) Array<Operand>();
		Label largeLbl = out->label();

		// 2: Walk the code backwards to find assignments to the affected variables. Copy them to
		// the output.
		Array<Instr *> *keep = new (out) Array<Instr *>();
		RegSet *regs = new (out) RegSet(); // Registers that contain valuable data.
		Bool addNext = false; // To keep flags intact if we need to.
		for (Nat i = srcOffset; i > 0; i--) {
			Instr *instr = src->at(i - 1);

			// Skip instructions prefixed with threadLocal (x86 mainly), they don't behave in the
			// way we expect.
			if (i >= 2 && src->at(i - 2)->op() == op::threadLocal)
				continue;

			Bool add = addNext;
			addNext = false;

			if (instr->op() == op::call) {
				// Function calls are special in that they clear registers.
				arena->removeFnRegs(regs);

				// TODO: If we are interested in some function call register, we might want to keep
				// the call. This does, however, mean that we have to understand quite a bit about
				// the calling convention. For example, we need to retain stack setup/cleanup, which
				// is not easy. Since we generally know which function was called, we could inspect
				// it to figure out parameter layout for example. Also, we need to re-do the call
				// since we have a larger stack frame. If we keep the old code we risk destroying
				// our saved temporaries.

			} else if (instr->mode() & destWrite) {
				// See if it produces something we are interested in!
				Operand dest = instr->dest();
				Var destVar = lookup.find(dest);
				if (destVar != Var() && toTrack->at(destVar.key()) != Var()) {
					add = true;
					// TODO: This does not account for if the *entire* variable has been read/written.
					toTrack->at(destVar.key()) = Var();
				} else if (dest.type() == opRegister && regs->has(dest.reg())) {
					add = true;
					regs->remove(dest.reg());
				} else if (dest.type() == opRelative) {
					// We have different things to consider here:
					if (same(dest.reg(), ptrFrame)) {
						// 1: if the base register is ptrFrame, check if this is an extra metadata access.
						add |= findAndRemove(dest.offset(), extraMetadata);
					} else if (same(dest.reg(), ptrStack)) {
						// 2: ptrStack can just be skipped.
					} else {
						// Check if the source originates from a lea of the address of the variable.
						add |= dependsOnVariable(dest.reg(), src, i - 1, lookup);
					}
				}
			}

			// Add it if we want to keep it!
			if (add) {
				// If 'src' is a label, we might need to modify it a bit.
				instr = replaceInstrLabel(instr, src, newFn, largeData, largeLbl);

				keep->push(instr);

				// If we just kept 'setCond', we want to add the next one if it is a 'cmp' or
				// 'test', even if those instructions don't produce any relevant output. They affect
				// flags, and we are interested in flags!
				if (instr->op() == op::setCond) {
					if (i >= 2) {
						Instr *prev = src->at(i - 2);
						addNext = (prev->op() == op::cmp) || (prev->op() == op::test);
					}
				}

				// Look at inputs from the instruction and update what we should keep track of.

				// Note the special case xor <reg>, <reg> has no inputs.
				Bool skipInputs = false;
				if (instr->op() == op::bxor) {
					skipInputs = instr->src() == instr->dest();
				}

				if (!skipInputs) {
					{
						Operand op = instr->src();
						Var v = lookup.find(op);
						if (v != Var() && lookup.visibleVars->at(v.key()) != Var()) {
							toTrack->at(v.key()) = v;
						} else if (op.type() == opRegister || op.type() == opRelative) {
							if (op.reg() != ptrFrame && op.reg() != ptrStack)
								regs->put(op.reg());
						}
					}

					Operand op = instr->dest();
					Var v = lookup.find(op);
					if (instr->mode() & destRead) {
						if (v != Var() && lookup.visibleVars->at(v.key()) != Var()) {
							toTrack->at(v.key()) = v;
						} else if (op.type() == opRegister) {
							putReg(regs, op.reg());
						}
					}
					// Addressing reads are a bit special:
					if ((instr->mode() & (destRead | destWrite)) && op.type() == opRelative) {
						putReg(regs, op.reg());
					}
				}
			}
		}

		// 3: Put things back to "out".
		for (Nat i = keep->count(); i > 0; i--) {
			Instr *instr = keep->at(i - 1);
			// Translate offsets if required:
			instr = instr->alter(translate(instr->dest()), translate(instr->src()));
			// PVAR(instr);
			*out << instr;
		}

		// 4: Put the large constants out if we need them.
		if (largeData->any()) {
			Label end = out->label();
			*out << jmp(end);
			*out << align(Offset::sWord);
			*out << largeLbl;
			for (Nat i = 0; i < largeData->count(); i++)
				*out << dat(largeData->at(i));
			*out << end;
		}
	}

	static GeneratedThunk generateStage1(ActiveReplace &replace, size_t offset,
										code::Arena::Skeleton *skeleton, GeneratedThunk stage2) {
		using namespace code;

		Binary *oldBinary = codeBinary(replace.oldFn);
		Listing *l = skeleton->listing;

		// To initialize the stack frame and EH properly. We will actually jump *over* this, since
		// we do not want to re-initialize the stack frame.
		*l << prolog();

		// Add dummy "mov(reg, 0)" to make sure that they are clobbered.
		for (Nat i = 0; i < skeleton->savedRegs->count(); i++) {
			Operand op = skeleton->savedRegs->at(i);
			*l << mov(op, xConst(op.size(), 0));
		}

		// This is where we actually start executing the thunk!
		Label start = l->label();
		*l << start;

		// The only thing left to do is to jump to stage 2:

		// Update the stack size. ptrC is safe to use here. It is neither volatile, nor used for the result.
		replace.arena->resizeStackFrame(l, ptrC, stage2.generated);

		// Jump to target!
		RefSource *ref = new (l) StrRefSource(S("<stage 2>"), stage2.generated);
		*l << mov(ptrC, Operand(Ref(ref)));
		*l << add(ptrC, ptrConst(Nat(stage2.offset)));
		*l << jmp(ptrC);

		Binary::Info info = Binary::compile(replace.arena, l);

		// Check the assumption that the stack frame of what we have created is the same as the
		// stack frame of the old function. Otherwise, exceptions will not work!
		size_t oldStackSize = oldBinary->stackSize();
		if (info.binary->stackSize() != oldStackSize) {
			StrBuf *msg = new (replace.arena) StrBuf();
			*msg << S("Mismatched stack sizes during replacement. ");
			*msg << S("Expected ") << oldStackSize << S(" but got ") << info.binary->stackSize() << S(".");
			throw new (replace.arena) InternalError(msg->toS());
		}

		GeneratedThunk r = {
			info.binary,
			info.offsets->at(start.key()),
		};
		return r;
	}

	static void activateBlock(code::Listing *out, code::Block block) {
		if (block == out->root())
			return;
		activateBlock(out, out->parent(block));
		*out << code::begin(block);
	}

	static GeneratedThunk generateStage2(ActiveReplace &replace, size_t offset, code::Arena::Skeleton *stage1) {
		using namespace code;

		Binary *newBinary = codeBinary(replace.newFn->directRef().address());
		size_t newStackSize = newBinary->stackSize();
		size_t oldStackSize = codeBinary(replace.oldFn)->stackSize();

		ControlFlowItem target = findMapTarget(replace, offset);

		// Figure out the target offset:
		Nat targetOffset = 0;
		Nat transformedOffset = 0;
		switch (target.status()) {
		case ControlFlowItem::none:
			targetOffset = replace.toOffsetInside->get(target.offset());
			transformedOffset = replace.toTransformedInside->get(target.offset());
			break;
		case ControlFlowItem::removed:
			// Skip storing the result, the call was removed:
			targetOffset = replace.toOffsetAfter->get(target.offset());
			transformedOffset = replace.toTransformedAfter->get(target.offset());
			break;
		}

		Listing *srcListing = replace.newTransformed;

		// Create our own listing with the proper layout.
		Listing *l = srcListing->createShell(replace.arena);

		// Generate a skeleton for the compiled version of the new function. This will give us
		// information about saved registers, active blocks, etc.
		Arena::Skeleton *skeleton = replace.arena->compatibleFrameSkeleton(newBinary, targetOffset);

		*l << prolog();

		// Add dummy "mov(reg, 0)" to make sure that they are clobbered.
		for (Nat i = 0; i < skeleton->savedRegs->count(); i++) {
			Operand op = skeleton->savedRegs->at(i);
			*l << mov(op, xConst(op.size(), 0));
		}

		// Activate the right block, for EH to work properly. (Note: we can't look in the listing -
		// transformation removes 'begin' and 'end' instructions).
		Block activeBlock = Block::fromNat(skeleton->currentBlock);
		activateBlock(l, activeBlock);

		// Create a new block to increase the size of the stack frame. This allows us to store
		// registers that need to be preserved for the result. A separate block ensures that these
		// variables are *after* any local variables in the function.
		RegSet *fnRegsToSave = replace.arena->fnResultRegs();
		Array<Var> *storedFnRegs = new (l) Array<Var>();

		Block auxBlock = l->createBlock(activeBlock);
		*l << begin(auxBlock);

		// Reference point for offset translations.
		Var translatorZero;

		// If the new stack frame is smaller than the old version, add padding here so that we don't
		// accidentally overwrite old data when we try to move everything into place.
		if (newStackSize < oldStackSize) {
			Nat extraSize = Nat(oldStackSize - newStackSize);
			translatorZero = l->createVar(auxBlock, Size(extraSize));

			// Then start a new block here to make sure that everything new we add comes after the padding.
			auxBlock = l->createBlock(auxBlock);
			*l << begin(auxBlock);
		}

		// Initialize the translators if necessary.
		OffsetTranslator translateFromOld;
		// Note: 'translateFromNew' is only necessary when looking at transformed code.
		OffsetTranslator translateFromNew;
		if (stage1->accessMode >= 0) {
			assert(skeleton->accessMode >= 0, L"Both skeletons should have similar properties.");

			// If we need to translate offsets, we need a reference point.
			if (translatorZero == Var()) {
				translatorZero = l->createVar(auxBlock, Size::sPtr);
				auxBlock = l->createBlock(auxBlock);
				*l << begin(auxBlock);
			}

			// Set up the translators:
			Offset oldOffset = -Offset(stage1->accessMode);
			Offset newOffset = oldOffset +
				Offset(Int(oldStackSize + stage1->accessMode)) -
				Offset(Int(newStackSize + skeleton->accessMode));

			translateFromOld.set(translatorZero, oldOffset);
			translateFromNew.set(translatorZero, newOffset);
		}

		// Actual starting point.
		Label start = l->label();
		*l << start;

		// We only need to save result registers if the call-site was not removed (if it was removed,
		// we might need to run destructors, however).
		Nat fnRegsSize = 0;
		if (target.status() != ControlFlowItem::removed) {
			for (RegSet::Iter i = fnRegsToSave->begin(); i != fnRegsToSave->end(); i++) {
				Var v = l->createVar(auxBlock, size(i.v()));
				*l << replace.arena->saveFnResultReg(i.v(), v);
				storedFnRegs->push(v);
				// Note: this under-estimates the size (due to not considering alignment), which is
				// fine since we only care about not having a stack frame that is smaller than the
				// old version's stack frame. It is fine if we accidentally add "too much" padding,
				// for example.
				fnRegsSize += v.size().current();
			}
		}

		// Store which items need to be restored.
		Array<DataMove> *toMove = new (replace.arena) Array<DataMove>();

		// Figure out which registers need to be moved, to make sure any preserved registers are in
		// their proper place.
		movePreservedRegs(toMove, l, translateFromOld, translateFromNew, auxBlock, stage1, skeleton);

		// Go through all visible variables and migrate them.
		Array<Var> *toInit = new (replace.arena) Array<Var>();
		Array<Var> *toRemove = new (replace.arena) Array<Var>();
		{
			// Note: Both of these are skeletons, so they only contain visible variables.
			// Note: Due to how the Arena generates the skeleton, variables will be from
			// leafmost block to rootmost block.
			Array<Var> *allOldVars = stage1->listing->allVars();
			Array<Var> *allNewVars = skeleton->listing->allVars();

			// We don't support migrating parameters yet. They need to be the same.
			clearParams(stage1->listing, allOldVars);
			clearParams(skeleton->listing, allNewVars);

			// TODO: We could look at the types as well. That could help to find renamed variables.
			for (Nat i = 0; i < allNewVars->count(); i++) {
				Var newVar = allNewVars->at(i);
				Listing::VarInfo *newInfo = skeleton->listing->varInfo(newVar);
				if (!newInfo)
					continue;

				for (Nat j = 0; j < allOldVars->count(); j++) {
					Var oldVar = allOldVars->at(j);
					Listing::VarInfo *oldInfo = stage1->listing->varInfo(oldVar);
					if (!oldInfo)
						continue;

					if (*newInfo->name != *oldInfo->name)
						continue;

					moveVar(toMove, l, auxBlock, asValue(newInfo),
							translateFromOld(stage1->varOffsets->at(oldVar.key())),
							// Compatible frames, so we can just use 'newVar' here instead of:
							// translateFromNew(skeleton->varOffsets->at(newVar.key())));
							newVar);

					allNewVars->at(i) = Var();
					allOldVars->at(j) = Var();
					break;
				}
			}

			// Now, we can just look at any remaining variables in 'allNewVars' to find things to initialize...
			for (Nat i = 0; i < allNewVars->count(); i++)
				if (allNewVars->at(i) != Var())
					toInit->push(allNewVars->at(i));

			// ...and in 'allOldVars' to find things to destroy.
			for (Nat i = 0; i < allOldVars->count(); i++)
				if (allOldVars->at(i) != Var())
					toRemove->push(allOldVars->at(i));
		}

		// Destroy old variables.
		for (Nat i = 0; i < toRemove->count(); i++) {
			Var var = toRemove->at(i);
			Listing::VarInfo *info = stage1->listing->varInfo(var);
			if (!info || !info->type)
				continue;

			if (Function *dtor = info->type->destructor()) {
				*l << lea(ptrA, translateFromOld(stage1->varOffsets->at(var.key())));
				*l << fnParam(l->engine().ptrDesc(), ptrA);
				*l << fnCall(dtor->ref(), true);
			}
		}

		// Now that everything is copied away from the old frame, we can copy everything back!
		commitMoves(l, toMove);

		// Initialize any new named variables:
		for (Nat i = 0; i < toInit->count(); i++) {
			Var var = toInit->at(i);
			Listing::VarInfo *info = skeleton->listing->varInfo(var);
			if (!info || !info->type)
				continue;

			// TODO: Find initialization of the variable in replace.newSource and use that if possible.
			// Note: We can just use the variable itself. Frames are compatible.
			// defaultInitVar(l, info, translateFromNew(skeleton->varOffsets->at(var.key())));
			defaultInitVar(l, info, var);
		}

		// Copy code from the new version of the function to initialize any non-named variables.
		initializeNonNamed(replace.arena, l, srcListing, translateFromNew, activeBlock, transformedOffset,
						replace.newVarOffsets, stage1->extraMetadata, replace.newFn);

		// Restore function result registers if we need to.
		if (target.status() != ControlFlowItem::removed) {
			Nat id = 0;
			for (RegSet::Iter i = fnRegsToSave->begin(); i != fnRegsToSave->end(); i++, id++) {
				*l << replace.arena->restoreFnResultReg(i.v(), storedFnRegs->at(id));
			}
		}

		// Update the stack pointer using ptrC as a temporary.
		replace.arena->resizeStackFrame(l, ptrC, newBinary);

		// Jump to the right location.
		// Note 1: we use ptrC, since that register is always saved by the caller and never contains
		// the return value.
		// Note 2: we intentionally *do not* execute the epilog of the function.
		*l << mov(ptrC, replace.newFn->directRef());
		*l << add(ptrC, ptrConst(targetOffset));
		*l << jmp(ptrC);

		// PVAR(l);

		Binary::Info info = Binary::compile(replace.arena, l);
		GeneratedThunk r = {
			info.binary,
			info.offsets->at(start.key()),
		};
		return r;
	}

	static GeneratedThunk generateThunk(ActiveReplace &replace, size_t offset) {
		// The approach here is as follows:
		// We generate two thunks: stage1 and stage2. The code in stage1 has the stack layout of the
		// old function. This way exceptions work reliably, and we can actually read/write data
		// members as we wish. Stage 1 does not do very much, it mainly updates the frame pointer
		// and jumps to stage 2. Stage 2 has the stack layout of the new function. The first thing
		// stage 2 does is to shuffle data around to match the new stack layout. It then runs any
		// initialization code for non-named variables (or new local variables), before jumping to
		// the original version. Stage 2 actually allocates some extra stack space to be able to
		// save the registers that may contain the return value.

		// Compute information about stage 1 first.
		code::Binary *oldBinary = code::codeBinary(replace.oldFn);
		code::Arena::Skeleton *stage1Info = replace.arena->compatibleFrameSkeleton(oldBinary, Nat(offset));

		// Generate stage 2 first.
		GeneratedThunk stage2 = generateStage2(replace, offset, stage1Info);

		// Then, we can connect it to stage 1.
		GeneratedThunk stage1 = generateStage1(replace, offset, stage1Info, stage2);

		return stage1;
	}

	static Bool replaceWithThunk(ActiveReplace &replace, ActiveOffset offset, ReplaceTasks *tasks) {
		GeneratedThunk thunk = generateThunk(replace, offset.offset);
		if (!thunk.generated)
			return false;

		// Add a RefSource for better output in stack traces, etc.
		// TODO: Eventually, we want some special representation here, so that we can detect and account for the
		// case when we are trying to replace a function that was previously replaced, but not yet executed.
		new (replace.arena) code::StrRefSource(S("<thunk>"), thunk.generated);

		size_t replaced = tasks->replaceActive(replace.oldFn, offset.offset, thunk.generated->address(), thunk.offset);
		return replaced == offset.count;
	}

	static MAYBE(Array<Nat> *) hasLabel(Array<code::Label> *checkFor, MAYBE(Array<code::Label> *) checkIn) {
		if (!checkIn)
			return null;
		if (checkFor->empty())
			return null;

		Array<Nat> *found = null;

		for (Nat i = 0; i < checkIn->count(); i++) {
			Nat check = checkIn->at(i).key();

			// TODO: We could binary search here, they should be sorted.
			for (Nat j = 0; j < checkFor->count(); j++) {
				Nat f = checkFor->at(j).key();
				if (check != f)
					continue;

				if (!found)
					found = new (checkFor) Array<Nat>();
				found->push(j);
			}
		}

		return found;
	}

	// Replace mappings in 'mapping' with corresponding offsets in the binary.
	static void srcLocToMapping(ActiveReplace &replace, Array<ControlFlowItem> *targets) {
		code::Listing *code = new (replace.arena) code::Listing(*replace.newSource);

		// Figure out which offsets are necessary:
		// Note: This excludes loops. Currently, we don't need them, but we might in the future.
		Array<ControlFlowItem> *offsets = flatten(targets);
		offsets->sort();
		offsets->removeDuplicates();
		if (offsets->empty())
			return;

		// Insert labels:
		Array<code::Label> *innerLabels = new (code) Array<code::Label>();
		Array<code::Label> *endLabels = new (code) Array<code::Label>();

		for (Nat i = 0; i < offsets->count(); i++) {
			ControlFlowItem item = offsets->at(i);
			if (item.isLoop()) {
				// Loop:
				code::Label start = code->label();
				code::Label end = code->label();
				code->insert(item.offset(), start);
				code->insert(item.endOffset() + 1, end); // After the jump instruction.
				innerLabels->push(start);
				endLabels->push(end);
			} else {
				// Function call:
				code::Label lbl = code->label();
				// Note: Inserting just after the function call.
				code->insert(item.offset() + 1, lbl);
				innerLabels->push(lbl);
				endLabels->push(lbl);
			}
		}

		// Transform into low-level representation:
		code::Arena::TransformInfo tfmInfo = replace.arena->transformInfo(code);
		code::Listing *transformed = tfmInfo.listing;
		replace.newTransformed = tfmInfo.listing;
		replace.newVarOffsets = tfmInfo.varLayout;

		// Find the labels in the transformed version, move them earlier to just after the call instruction.
		for (Nat i = 0; i < transformed->count(); i++) {
			Array<Nat> *found = hasLabel(endLabels, transformed->labels(i));
			if (found && found->any()) {
				// See if at least one element is a call.
				Bool anyCalls = false;
				for (Nat j = 0; j < found->count(); j++) {
					if (!offsets->at(j).isLoop())
						anyCalls = true;
				}
				if (!anyCalls)
					continue;

				// Find the call op:
				Nat call = i - 1;
				while (call > 0 && transformed->at(call)->op() != code::op::call)
					call--;

				// Update labels if we need to:
				if (call + 1 != i) {
					code::Label lbl = transformed->label();
					transformed->insert(call + 1, lbl);
					for (Nat j = 0; j < found->count(); j++)
						innerLabels->at(found->at(j)) = lbl;
				}
			}
		}

		// Compute offsets for all labels in 'transformed':
		{
			Array<Nat> *labelTarget = new (code) Array<Nat>(transformed->labelCount(), transformed->count());
			for (Nat i = 0; i < transformed->count(); i++) {
				// Mark labels:
				if (Array<code::Label> *labels = transformed->labels(i)) {
					for (Nat j = 0; j < labels->count(); j++) {
						labelTarget->at(labels->at(j).key()) = i;
					}
				}
			}

			replace.toTransformedInside = new (code) Map<Nat, Nat>();
			replace.toTransformedAfter = new (code) Map<Nat, Nat>();

			// Values to handle "jump to start of function".
			replace.toTransformedInside->put(0, 0);
			replace.toTransformedAfter->put(0, 0);

			for (Nat i = 0; i < offsets->count(); i++) {
				const ControlFlowItem &item = offsets->at(i);
				replace.toTransformedInside->put(item.offset(), labelTarget->at(innerLabels->at(i).key()));
				replace.toTransformedAfter->put(item.endOffset(), labelTarget->at(endLabels->at(i).key()));
			}
		}

		// Compute the offset of all labels:
		code::LabelOutput *lblOutput = replace.arena->labelOutput();
		replace.arena->output(transformed, lblOutput);

		// Find the offsets and store them:
		replace.toOffsetInside = new (code) Map<Nat, Nat>();
		replace.toOffsetAfter = new (code) Map<Nat, Nat>();

		// Values to handle "jump to start of function".
		replace.toOffsetInside->put(0, 0);
		replace.toOffsetAfter->put(0, 0);

		for (Nat i = 0; i < offsets->count(); i++) {
			Nat o = offsets->at(i).offset();
			replace.toOffsetInside->put(o, lblOutput->offsets->at(innerLabels->at(i).key()));
			replace.toOffsetAfter->put(o, lblOutput->offsets->at(endLabels->at(i).key()));
		}
	}


	// Note: We might need some way of differentiating between old types and new types. Since this
	// code expects to be executed *after* replacing all types, it can not simply look at Type
	// pointers. For that, we might need a context that contains replacement code, if that is not
	// already inside ReplaceTasks.
	Bool replaceActiveFunction(const void *oldFn, Function *newFn, ReplaceTasks *tasks) {
		// Figure out what we need to do:
		vector<ActiveOffset> active = tasks->findActive(oldFn);
		if (active.empty())
			return true;

		ActiveReplace replace = {
			newFn->engine().arena(),
			oldFn,
			newFn,
			null,
			null,
			null,
			null,
			null,
			null,
			null,
		};

		// Find a Listing, otherwise we can't do very much.
		replace.newSource = findSource(newFn);
		if (!replace.newSource)
			return false;

		// Pre-compute mappings etc. that can be used for many different offsets.
		replace.oldCf = controlFlowListRaw((void *)replace.oldFn);
		replace.newCf = controlFlowList(replace.newSource);

		replace.newCf = diff(replace.oldCf, replace.newCf);
		srcLocToMapping(replace, replace.newCf);
		// PVAR(formatDiff(replace.oldCf, replace.newCf));

		Bool ok = true;

		for (size_t i = 0; i < active.size(); i++) {
			ok &= replaceWithThunk(replace, active[i], tasks);
		}

		return ok;
	}

}
