diff -u -r Makefile.target Makefile.target
--- Makefile.target	2007-02-06 00:01:54.000000000 +0100
+++ Makefile.target	2007-07-25 19:21:53.000000000 +0200
@@ -23,6 +23,9 @@
 endif
 BASE_CFLAGS=
 BASE_LDFLAGS=
+ifeq ($(CONFIG_DARWIN),yes)
+CFLAGS+=-mdynamic-no-pic
+endif
 #CFLAGS+=-Werror
 LIBS=
 HELPER_CFLAGS=$(CFLAGS)
@@ -68,13 +71,17 @@
 ifdef CONFIG_STATIC
 BASE_LDFLAGS+=-static
 endif
+ifeq ($(TARGET_ARCH), x86_64)
+# XXX globally save %ebx, %esi, %edi on entry to generated function
+OP_CFLAGS+= -fcall-used-ebx -fcall-used-esi -fcall-used-edi
+endif
 
 # We require -O2 to avoid the stack setup prologue in EXIT_TB
 OP_CFLAGS = -Wall -O2 -g -fno-strict-aliasing
 
 ifeq ($(ARCH),i386)
 HELPER_CFLAGS+=-fomit-frame-pointer
-OP_CFLAGS+=-mpreferred-stack-boundary=2 -fomit-frame-pointer
+OP_CFLAGS+=-fomit-frame-pointer
 ifeq ($(HAVE_GCC3_OPTIONS),yes)
 OP_CFLAGS+= -falign-functions=0 -fno-gcse
 else
@@ -415,8 +422,8 @@
 endif
 VL_OBJS+=vnc.o
 ifdef CONFIG_COCOA
-VL_OBJS+=cocoa.o
-COCOA_LIBS=-F/System/Library/Frameworks -framework Cocoa -framework IOKit
+VL_OBJS+=cocoaQemuMain.o cocoaQemuController.o cocoaQemu.o cocoaQemuProgressWindow.o cocoaQemuWindow.o cocoaQemuOpenGLView.o cocoaQemuQuartzView.o cocoaQemuQuickDrawView.o cocoaPopUpView.o cocoaCpuView.o FSController.o FSRoundedView.o FSToolbarController.o FSTransparentButton.o
+COCOA_LIBS=-F/System/Library/Frameworks -framework Cocoa -framework IOKit -framework CoreFoundation -framework OpenGL -framework ApplicationServices
 ifdef CONFIG_COREAUDIO
 COCOA_LIBS+=-framework CoreAudio
 endif
@@ -465,7 +472,11 @@
 $(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
 	$(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(COCOA_LIBS) $(VL_LIBS)
 
-cocoa.o: cocoa.m
+
+cocoa%.o: host-cocoa/cocoa%.m
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
+
+FS%.o: host-cocoa/FSControls/FS%.m
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
 
 sdl.o: sdl.c keymaps.c sdl_keysym.h
diff -u -r block-qcow2.c block-qcow2.c
--- block-qcow2.c	2007-02-06 00:01:54.000000000 +0100
+++ block-qcow2.c	2007-07-25 19:21:53.000000000 +0200
@@ -1886,6 +1886,8 @@
     int64_t table_offset;
     uint64_t data64;
     uint32_t data32;
+    int old_table_size;
+    int64_t old_table_offset;
 
     if (min_size <= s->refcount_table_size)
         return 0;
@@ -1931,10 +1933,14 @@
                     &data32, sizeof(data32)) != sizeof(data32))
         goto fail;
     qemu_free(s->refcount_table);
+    old_table_offset = s->refcount_table_offset;
+    old_table_size = s->refcount_table_size;
     s->refcount_table = new_table;
     s->refcount_table_size = new_table_size;
+    s->refcount_table_offset = table_offset;
 
     update_refcount(bs, table_offset, new_table_size2, 1);
+    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
     return 0;
  fail:
     free_clusters(bs, table_offset, new_table_size2);
diff -u -r block.c block.c
--- block.c	2007-02-06 00:01:54.000000000 +0100
+++ block.c	2007-07-25 19:21:53.000000000 +0200
@@ -497,6 +497,7 @@
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors)
 {
+    bs->activityLED = 1;
     BlockDriver *drv = bs->drv;
 
     if (!drv)
@@ -534,6 +535,7 @@
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors)
 {
+    bs->activityLED = 1;
     BlockDriver *drv = bs->drv;
     if (!bs->drv)
         return -ENOMEDIUM;
@@ -1058,6 +1060,7 @@
                                 uint8_t *buf, int nb_sectors,
                                 BlockDriverCompletionFunc *cb, void *opaque)
 {
+    bs->activityLED = 1;
     BlockDriver *drv = bs->drv;
 
     if (!drv)
@@ -1078,6 +1081,7 @@
                                  const uint8_t *buf, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
+    bs->activityLED = 1;
     BlockDriver *drv = bs->drv;
 
     if (!drv)
diff -u -r block_int.h block_int.h
--- block_int.h	2007-02-06 00:01:54.000000000 +0100
+++ block_int.h	2007-07-25 19:21:53.000000000 +0200
@@ -87,6 +87,7 @@
     int removable; /* if true, the media can be removed */
     int locked;    /* if true, the media cannot temporarily be ejected */
     int encrypted; /* if true, the media is encrypted */
+    int activityLED; /* if true, the media is accessed atm */
     /* event callback when inserting/removing */
     void (*change_cb)(void *opaque);
     void *change_opaque;
diff -u -r cpu-all.h cpu-all.h
--- cpu-all.h	2007-02-06 00:01:54.000000000 +0100
+++ cpu-all.h	2007-07-25 19:21:53.000000000 +0200
@@ -339,7 +339,13 @@
 
 static inline void stq_le_p(void *ptr, uint64_t v)
 {
+#if defined(__i386__) && __GNUC__ >= 4
+    const union { uint64_t v; uint32_t p[2]; } x = { .v = v };
+    ((uint32_t *)ptr)[0] = x.p[0];
+    ((uint32_t *)ptr)[1] = x.p[1];
+#else
     *(uint64_t *)ptr = v;
+#endif
 }
 
 /* float access */
diff -u -r cpu-exec.c cpu-exec.c
--- cpu-exec.c	2007-02-06 00:01:54.000000000 +0100
+++ cpu-exec.c	2007-07-25 19:21:53.000000000 +0200
@@ -652,6 +652,15 @@
                               : /* no outputs */
                               : "r" (gen_func)
                               : "r1", "r2", "r3", "r8", "r9", "r10", "r12", "r14");
+#elif defined(TARGET_X86_64) && defined(__i386__)
+                asm volatile ("push %%ebx\n"
+                              "push %%esi\n"
+                              "push %%edi\n"
+                              "call *%0\n"
+                              "pop %%edi\n"
+                              "pop %%esi\n"
+                              "pop %%ebx\n"
+                              : : "r" (gen_func) : "ebx", "esi", "edi");
 #elif defined(TARGET_I386) && defined(USE_CODE_COPY)
 {
     if (!(tb->cflags & CF_CODE_COPY)) {
diff -u -r dyngen-exec.h dyngen-exec.h
--- dyngen-exec.h	2007-02-06 00:01:54.000000000 +0100
+++ dyngen-exec.h	2007-07-25 19:21:53.000000000 +0200
@@ -191,7 +191,12 @@
 #endif
 
 /* force GCC to generate only one epilog at the end of the function */
+#if defined(__i386__) || defined(__x86_64__)
+/* Also add 4 bytes of padding so that we can replace the ret with a jmp.  */
+#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
+#else
 #define FORCE_RET() __asm__ __volatile__("" : : : "memory");
+#endif
 
 #ifndef OPPROTO
 #define OPPROTO
@@ -241,12 +246,26 @@
 #endif
 
 #ifdef __i386__
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* Dyngen will replace hlt instructions with a ret instruction.  Inserting a
+   ret directly would confuse dyngen.  */
+#define EXIT_TB() asm volatile ("hlt")
+/* Dyngen will replace cli with 0x9e (jmp). 
+   We generate the offset manually.  */
+#if defined(__APPLE__)
+/* XXX Different relocations are generated for MacOS X for Intel
+   (please as from cctools).  */
+#define GOTO_LABEL_PARAM(n) \
+  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n)
+#else
+#define GOTO_LABEL_PARAM(n) \
+  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
+#endif
 #endif
 #ifdef __x86_64__
-#define EXIT_TB() asm volatile ("ret")
-#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
+/* The same as i386.  */
+#define EXIT_TB() asm volatile ("hlt")
+#define GOTO_LABEL_PARAM(n) \
+  asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
 #endif
 #ifdef __powerpc__
 #define EXIT_TB() asm volatile ("blr")
diff -u -r dyngen.c dyngen.c
--- dyngen.c	2007-02-06 00:01:54.000000000 +0100
+++ dyngen.c	2007-07-25 19:21:53.000000000 +0200
@@ -32,6 +32,8 @@
 
 #include "config-host.h"
 
+//#define DEBUG_OP
+
 /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
    compilation */
 #if defined(CONFIG_WIN32)
@@ -182,6 +184,20 @@
 #include <mach-o/reloc.h>
 #include <mach-o/ppc/reloc.h>
 
+#ifdef HOST_PPC
+
+#define MACH_CPU_TYPE CPU_TYPE_POWERPC
+#define mach_check_cputype(x) ((x) == CPU_TYPE_POWERPC)
+
+#elif defined(HOST_I386)
+
+#define MACH_CPU_TYPE CPU_TYPE_I386
+#define mach_check_cputype(x) ((x) == CPU_TYPE_I386)
+
+#else
+#error unsupported CPU - please update the code
+#endif
+
 # define check_mach_header(x) (x.magic == MH_MAGIC)
 typedef int32_t host_long;
 typedef uint32_t host_ulong;
@@ -938,22 +954,23 @@
 {
     unsigned int tocindex, symindex, size;
     const char *name = 0;
+	int section_type;
     
     /* Sanity check */
     if(!( address >= sec_hdr->addr && address < (sec_hdr->addr + sec_hdr->size) ) )
         return (char*)0;
-		
-	if( sec_hdr->flags & S_SYMBOL_STUBS ){
+
+	section_type = sec_hdr->flags & SECTION_TYPE;
+	if( section_type == S_SYMBOL_STUBS ){
 		size = sec_hdr->reserved2;
 		if(size == 0)
 		    error("size = 0");
-		
 	}
-	else if( sec_hdr->flags & S_LAZY_SYMBOL_POINTERS ||
-	            sec_hdr->flags & S_NON_LAZY_SYMBOL_POINTERS)
+	else if( section_type == S_LAZY_SYMBOL_POINTERS ||
+			 section_type == S_NON_LAZY_SYMBOL_POINTERS)
 		size = sizeof(unsigned long);
 	else
-		return 0;
+		return NULL;
 		
     /* Compute our index in toc */
 	tocindex = (address - sec_hdr->addr)/size;
@@ -987,8 +1004,27 @@
 	/* init the slide value */
 	*sslide = 0;
 	
-	if(R_SCATTERED & rel->r_address)
-		return (char *)find_reloc_name_given_its_address(sca_rel->r_value);
+	if (R_SCATTERED & rel->r_address) {
+        char *name = (char *)find_reloc_name_given_its_address(sca_rel->r_value);
+
+        /* search it in the full symbol list, if not found */
+        if (!name) {
+            int i;
+            for (i = 0; i < nb_syms; i++) {
+                EXE_SYM *sym = &symtab[i];
+                if (sym->st_value == sca_rel->r_value) {
+                    name = get_sym_name(sym);
+                    switch (sca_rel->r_type) {
+                    case GENERIC_RELOC_VANILLA:
+                        *sslide = *(uint32_t *)(text + sca_rel->r_address) - sca_rel->r_value;
+                        break;
+                    }
+                    break;
+                }
+            }
+        }
+        return name;
+    }
 
 	if(rel->r_extern)
 	{
@@ -1020,14 +1056,21 @@
 			sectoffset = ( *(uint32_t *)(text + rel->r_address) & 0x03fffffc );
 			if (sectoffset & 0x02000000) sectoffset |= 0xfc000000;
 			break;
+        case GENERIC_RELOC_VANILLA:
+            sectoffset  = *(uint32_t *)(text + rel->r_address);
+            break;
 		default:
-			error("switch(rel->type) not found");
+			error("switch(rel->type=%d) not found", rel->r_type);
 	}
 
-	if(rel->r_pcrel)
+	if(rel->r_pcrel) {
 		sectoffset += rel->r_address;
-			
-	if (rel->r_type == PPC_RELOC_BR24)
+#ifdef HOST_I386
+        sectoffset += (1 << rel->r_length);
+#endif
+    }
+
+	if (rel->r_type == PPC_RELOC_BR24 || rel->r_pcrel)
 		name = (char *)find_reloc_name_in_sec_ptr((int)sectoffset, &section_hdr[sectnum-1]);
 
 	/* search it in the full symbol list, if not found */
@@ -1079,7 +1122,7 @@
         error("bad Mach header");
     }
     
-    if (mach_hdr.cputype != CPU_TYPE_POWERPC)
+    if (!mach_check_cputype(mach_hdr.cputype))
         error("Unsupported CPU");
         
     if (mach_hdr.filetype != MH_OBJECT)
@@ -1414,6 +1457,677 @@
 #endif
 
 
+#if defined(HOST_I386) || defined(HOST_X86_64)
+
+/* This byte is the first byte of an instruction.  */
+#define FLAG_INSN     (1 << 0)
+/* This byte has been processed as part of an instruction.  */
+#define FLAG_SCANNED  (1 << 1)
+/* This instruction is a return instruction.  Gcc cometimes generates prefix
+   bytes, so may be more than one byte long.  */
+#define FLAG_RET      (1 << 2)
+/* This is either the target of a jump, or the preceeding instruction uses
+   a pc-relative offset.  */
+#define FLAG_TARGET   (1 << 3)
+/* This is a magic instruction that needs fixing up.  */
+#define FLAG_EXIT     (1 << 4)
+/* This instruction clobbers the stack pointer.  */
+/* XXX only supports push, pop, add/sub $imm,%esp  */
+#define FLAG_STACK    (1 << 5)
+#define MAX_EXITS     5
+
+static void
+bad_opcode(const char *name, uint32_t op)
+{
+    error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
+}
+
+/* Mark len bytes as scanned,  Returns insn_size + len.  Reports an error
+   if these bytes have already been scanned.  */
+static int
+eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
+{
+    while (len > 0) {
+        /* This should never occur in sane code.  */
+        if (flags[insn + insn_size] & FLAG_SCANNED)
+            error ("Overlapping instructions in %s", name);
+        flags[insn + insn_size] |= FLAG_SCANNED;
+        insn_size++;
+        len--;
+    }
+    return insn_size;
+}
+
+static void
+trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
+                 int len)
+{
+    uint8_t *ptr;
+    uint8_t op;
+    int modrm;
+    int is_prefix;
+    int op_size;
+    int addr_size;
+    int insn_size;
+    int is_ret;
+    int is_condjmp;
+    int is_jmp;
+    int is_exit;
+    int is_pcrel;
+    int is_stack;
+    int immed;
+    int seen_rexw;
+    int32_t disp;
+
+    ptr = start_p + insn;
+    /* nonzero if this insn has a ModR/M byte.  */
+    modrm = 1;
+    /* The size of the immediate value in this instruction.  */
+    immed = 0;
+    /* The operand size.  */
+    op_size = 4;
+    /* The address size */
+    addr_size = 4;
+    /* The total length of this instruction.  */
+    insn_size = 0;
+    is_prefix = 1;
+    is_ret = 0;
+    is_condjmp = 0;
+    is_jmp = 0;
+    is_exit = 0;
+    seen_rexw = 0;
+    is_pcrel = 0;
+    is_stack = 0;
+
+    while (is_prefix) {
+        op = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        is_prefix = 0;
+        switch (op >> 4) {
+        case 0:
+        case 1:
+        case 2:
+        case 3:
+            if (op == 0x0f) {
+                /* two-byte opcode.  */
+                op = ptr[insn_size];
+                insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+                switch (op >> 4) {
+                case 0:
+                    if ((op & 0xf) > 3)
+                      modrm = 0;
+                    break;
+                case 1: /* vector move or prefetch */
+                case 2: /* various moves and vector compares.  */
+                case 4: /* cmov */
+                case 5: /* vector instructions */
+                case 6:
+                case 13:
+                case 14:
+                case 15:
+                    break;
+                case 7: /* mmx */
+                    if (op & 0x77) /* emms */
+                      modrm = 0;
+                    break;
+                case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
+                    modrm = 0;
+                    break;
+                case 8: /* long conditional jump */
+                    is_condjmp = 1;
+                    immed = op_size;
+                    modrm = 0;
+                    break;
+                case 9: /* setcc */
+                    break;
+                case 10:
+                    switch (op & 0x7) {
+                    case 0: /* push fs/gs */
+                    case 1: /* pop fs/gs */
+                        is_stack = 1;
+                    case 2: /* cpuid/rsm */
+                        modrm = 0;
+                        break;
+                    case 4: /* shld/shrd immediate */
+                        immed = 1;
+                        break;
+                    default: /* Normal instructions with a ModR/M byte.  */
+                        break;
+                    }
+                    break;
+                case 11:
+                    switch (op & 0xf) {
+                    case 10: /* bt, bts, btr, btc */
+                        immed = 1;
+                        break;
+                    default:
+                        /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
+                           undefined, and movsx */
+                        break;
+                    }
+                    break;
+                case 12:
+                    if (op & 8) {
+                        /* bswap */
+                        modrm = 0;
+                    } else {
+                        switch (op & 0x7) {
+                        case 2:
+                        case 4:
+                        case 5:
+                        case 6:
+                            immed = 1;
+                            break;
+                        default:
+                            break;
+                        }
+                    }
+                    break;
+                }
+            } else if ((op & 0x07) <= 0x3) {
+                /* General arithmentic ax.  */
+            } else if ((op & 0x07) <= 0x5) {
+                /* General arithmetic ax, immediate.  */
+                if (op & 0x01)
+                    immed = op_size;
+                else
+                    immed = 1;
+                modrm = 0;
+            } else if ((op & 0x23) == 0x22) {
+                /* Segment prefix.  */
+                is_prefix = 1;
+            } else {
+                /* Segment register push/pop or DAA/AAA/DAS/AAS.  */
+                modrm = 0;
+            }
+            break;
+
+#if defined(HOST_X86_64)
+        case 4: /* rex prefix.  */
+            is_prefix = 1;
+            /* The address/operand size is actually 64-bit, but the immediate
+               values in the instruction are still 32-bit.  */
+            op_size = 4;
+            addr_size = 4;
+            if (op & 8)
+                seen_rexw = 1;
+            break;
+#else
+        case 4: /* inc/dec register.  */
+#endif
+        case 5: /* push/pop general register.  */
+            modrm = 0;
+            is_stack = 1;
+            break;
+
+        case 6:
+            switch (op & 0x0f) {
+            case 0: /* pusha */
+            case 1: /* popa */
+                modrm = 0;
+                is_stack = 1;
+                break;
+            case 2: /* bound */
+            case 3: /* arpl */
+                break;
+            case 4: /* FS */
+            case 5: /* GS */
+                is_prefix = 1;
+                break;
+            case 6: /* opcode size prefix.  */
+                op_size = 2;
+                is_prefix = 1;
+                break;
+            case 7: /* Address size prefix.  */
+                addr_size = 2;
+                is_prefix = 1;
+                break;
+            case 8: /* push immediate */
+                immed = op_size;
+                modrm = 0;
+                is_stack = 1;
+                break;
+            case 10: /* push 8-bit immediate */
+                immed = 1;
+                modrm = 0;
+                is_stack = 1;
+                break;
+            case 9: /* imul immediate */
+                immed = op_size;
+                break;
+            case 11: /* imul 8-bit immediate */
+                immed = 1;
+                break;
+            case 12: /* insb */
+            case 13: /* insw */
+            case 14: /* outsb */
+            case 15: /* outsw */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 7: /* Short conditional jump.  */
+            is_condjmp = 1;
+            immed = 1;
+            modrm = 0;
+            break;
+          
+        case 8:
+            if ((op & 0xf) <= 3) {
+                /* arithmetic immediate.  */
+                if ((op & 3) == 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+                if (op == 0x81 || op == 0x83) {
+                    /* add, sub */
+                    op = ptr[insn_size];
+                    switch ((op >> 3) & 7) {
+                    case 0:
+                    case 5:
+                        is_stack = (op & 7) == 4;
+                        break;
+                    }
+                }
+            }
+            else if ((op & 0xf) == 0xf) {
+                /* pop general.  */
+                is_stack = 1;
+            }
+            /* else test, xchg, mov, lea.  */
+            break;
+
+        case 9:
+            /* Various single-byte opcodes with no modrm byte.  */
+            modrm = 0;
+            if (op == 10) {
+                /* Call */
+                immed = 4;
+            }
+            break;
+
+        case 10:
+            switch ((op & 0xe) >> 1) {
+            case 0: /* mov absoliute immediate.  */
+            case 1:
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = addr_size;
+                break;
+            case 4: /* test immediate.  */
+                if (op & 1)
+                    immed = op_size;
+                else
+                    immed = 1;
+                break;
+            default: /* Various string ops.  */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 11: /* move immediate to register */
+            if (op & 8) {
+                if (seen_rexw)
+                    immed = 8;
+                else
+                    immed = op_size;
+            } else {
+                immed = 1;
+            }
+            modrm = 0;
+            break;
+
+          case 12:
+            switch (op & 0xf) {
+            case 0: /* shift immediate */
+            case 1:
+                immed = 1;
+                break;
+            case 2: /* ret immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 3: /* ret */
+                modrm = 0;
+                is_ret = 1;
+            case 4: /* les */
+            case 5: /* lds */
+                break;
+            case 6: /* mov immediate byte */
+                immed = 1;
+                break;
+            case 7: /* mov immediate */
+                immed = op_size;
+                break;
+            case 8: /* enter */
+                /* TODO: Is this right?  */
+                immed = 3;
+                modrm = 0;
+                break;
+            case 10: /* retf immediate */
+                immed = 2;
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            case 13: /* int */
+                immed = 1;
+                modrm = 0;
+                break;
+            case 11: /* retf */
+            case 15: /* iret */
+                modrm = 0;
+                bad_opcode(name, op);
+                break;
+            default: /* leave, int3 or into */
+                modrm = 0;
+                break;
+            }
+            break;
+
+        case 13:
+            if ((op & 0xf) >= 8) {
+                /* Coprocessor escape.  For our purposes this is just a normal
+                   instruction with a ModR/M byte.  */
+            } else if ((op & 0xf) >= 4) {
+                /* AAM, AAD or XLAT */
+                modrm = 0;
+            }
+            /* else shift instruction */
+            break;
+
+        case 14:
+            switch ((op & 0xc) >> 2) {
+            case 0: /* loop or jcxz */
+                is_condjmp = 1;
+                immed = 1;
+                break;
+            case 1: /* in/out immed */
+                immed = 1;
+                break;
+            case 2: /* call or jmp */
+                switch (op & 3) {
+                case 0: /* call */
+                    immed = op_size;
+                    break;
+                case 1: /* long jump */
+                    immed = 4;
+                    is_jmp = 1;
+                    break;
+                case 2: /* far jmp */
+                    bad_opcode(name, op);
+                    break;
+                case 3: /* short jmp */
+                    immed = 1;
+                    is_jmp = 1;
+                    break;
+                }
+                break;
+            case 3: /* in/out register */
+                break;
+            }
+            modrm = 0;
+            break;
+
+        case 15:
+            switch ((op & 0xe) >> 1) {
+            case 0:
+            case 1:
+                is_prefix = 1;
+                break;
+            case 2:
+            case 4:
+            case 5:
+            case 6:
+                modrm = 0;
+                /* Some privileged insns are used as markers.  */
+                switch (op) {
+                case 0xf4: /* hlt: Exit translation block.  */
+                    is_exit = 1;
+                    break;
+                case 0xfa: /* cli: Jump to label.  */
+                    is_exit = 1;
+                    immed = 4;
+                    break;
+                case 0xfb: /* sti: TB patch jump.  */
+                    /* Mark the insn for patching, but continue sscanning.  */
+                    flags[insn] |= FLAG_EXIT;
+                    immed = 4;
+                    break;
+                }
+                break;
+            case 3: /* unary grp3 */
+                if ((ptr[insn_size] & 0x38) == 0) {
+                    if (op == 0xf7)
+                        immed = op_size;
+                    else
+                        immed = 1; /* test immediate */
+                }
+                break;
+            case 7: /* inc/dec grp4/5 */
+                /* TODO: This includes indirect jumps.  We should fail if we
+                   encounter one of these. */
+                break;
+            }
+            break;
+        }
+    }
+
+    if (modrm) {
+        if (addr_size != 4)
+            error("16-bit addressing mode used in %s", name);
+
+        disp = 0;
+        modrm = ptr[insn_size];
+        insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        modrm &= 0xc7;
+        switch ((modrm & 0xc0) >> 6) {
+        case 0:
+            if (modrm == 5)
+              disp = 4;
+            break;
+        case 1:
+            disp = 1;
+            break;
+        case 2:
+            disp = 4;
+            break;
+        }
+        if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
+            /* SIB byte */
+            if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
+                disp = 4;
+                is_pcrel = 1;
+            }
+            insn_size = eat_bytes(name, flags, insn, insn_size, 1);
+        }
+        insn_size = eat_bytes(name, flags, insn, insn_size, disp);
+    }
+    insn_size = eat_bytes(name, flags, insn, insn_size, immed);
+    if (is_condjmp || is_jmp) {
+        if (immed == 1) {
+            disp = (int8_t)*(ptr + insn_size - 1);
+        } else {
+            disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
+                   | (((int32_t)*(ptr + insn_size - 2)) << 16)
+                   | (((int32_t)*(ptr + insn_size - 3)) << 8)
+                   | *(ptr + insn_size - 4);
+        }
+        disp += insn_size;
+        /* Jumps to external symbols point to the address of the offset
+           before relocation.  */
+        /* ??? These are probably a tailcall.  We could fix them up by
+           replacing them with jmp to EOB + call, but it's easier to just
+           prevent the compiler generating them.  */
+        if (disp == 1)
+            error("Unconditional jump (sibcall?) in %s", name);
+        disp += insn;
+        if (disp < 0 || disp > len)
+            error("Jump outside instruction in %s", name);
+
+        if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
+            error("Overlapping instructions in %s", name);
+
+        flags[disp] |= (FLAG_INSN | FLAG_TARGET);
+        is_pcrel = 1; 
+    }
+    if (is_pcrel) {
+        /* Mark the following insn as a jump target.  This will stop
+           this instruction being moved.  */
+        flags[insn + insn_size] |= FLAG_TARGET;
+    }
+    if (is_ret)
+      flags[insn] |= FLAG_RET;
+
+    if (is_exit)
+      flags[insn] |= FLAG_EXIT;
+
+    if (is_stack)
+      flags[insn] |= FLAG_STACK;
+
+    if (!(is_jmp || is_ret || is_exit))
+      flags[insn + insn_size] |= FLAG_INSN;
+}
+
+/* Scan a function body.  Returns the position of the return sequence.
+   Sets *patch_bytes to the number of bytes that need to be copied from that
+   location.  If no patching is required (ie. the return is the last insn)
+   *patch_bytes will be set to -1.  *plen is the number of code bytes to copy.
+ */
+static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
+                         int *patch_bytes, int *exit_addrs)
+{
+    char *flags;
+    int more;
+    int insn;
+    int retpos;
+    int bytes;
+    int num_exits;
+    int len;
+    int last_insn;
+    int stack_clobbered;
+
+    len = *plen;
+    flags = malloc(len + 1);
+    memset(flags, 0, len + 1);
+    flags[0] |= FLAG_INSN;
+    more = 1;
+    while (more) {
+        more = 0;
+        for (insn = 0; insn < len; insn++) {
+            if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
+                trace_i386_insn(name, start_p, flags, insn, len);
+                more = 1;
+            }
+        }
+    }
+
+    /* Strip any unused code at the end of the function.  */
+    while (len > 0 && flags[len - 1] == 0)
+      len--;
+
+    retpos = -1;
+    num_exits = 0;
+    last_insn = 0;
+    stack_clobbered = 0;
+    for (insn = 0; insn < len; insn++) {
+        if (flags[insn] & FLAG_RET) {
+            /* ??? In theory it should be possible to handle multiple return
+               points.  In practice it's not worth the effort.  */
+            if (retpos != -1)
+                error("Multiple return instructions in %s", name);
+            retpos = insn;
+        }
+        if (flags[insn] & FLAG_EXIT) {
+            if (stack_clobbered)
+                error("Stack clobbered in %s", name);
+            if (num_exits == MAX_EXITS)
+                error("Too many block exits in %s", name);
+            exit_addrs[num_exits] = insn;
+            num_exits++;
+        }
+        if (flags[insn] & FLAG_INSN)
+            last_insn = insn;
+        if (flags[insn] & FLAG_STACK)
+            stack_clobbered = 1;
+    }
+
+    exit_addrs[num_exits] = -1;
+    if (retpos == -1) {
+        if (num_exits == 0) {
+            error ("No return instruction found in %s", name);
+        } else {
+            retpos = len;
+            last_insn = len;
+        }
+    }
+    
+    /* If the return instruction is the last instruction we can just 
+       remove it.  */
+    if (retpos == last_insn)
+        *patch_bytes = -1;
+    else
+        *patch_bytes = 0;
+
+    /* Back up over any nop instructions.  */
+    while (retpos > 0
+           && (flags[retpos] & FLAG_TARGET) == 0
+           && (flags[retpos - 1] & FLAG_INSN) != 0
+           && start_p[retpos - 1] == 0x90) {
+        retpos--;
+    }
+
+    if (*patch_bytes == -1) {
+        *plen = retpos;
+        free (flags);
+        return retpos;
+    }
+    *plen = len;
+
+    /* The ret is in the middle of the function.  Find four more bytes that
+       so the ret can be replaced by a jmp. */
+    /* ??? Use a short jump where possible. */
+    bytes = 4;
+    insn = retpos + 1;
+    /* We can clobber everything up to the next jump target.  */
+    while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
+        insn++;
+        bytes--;
+    }
+    if (bytes > 0) {
+        /* ???: Strip out nop blocks.  */
+        /* We can't do the replacement without clobbering anything important.
+           Copy preceeding instructions(s) to give us some space.  */
+        while (retpos > 0) {
+            /* If this byte is the target of a jmp we can't move it.  */
+            if (flags[retpos] & FLAG_TARGET)
+                break;
+
+            (*patch_bytes)++;
+            bytes--;
+            retpos--;
+
+            /* Break out of the loop if we have enough space and this is either 
+               the first byte of an instruction or a pad byte.  */
+            if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
+                && bytes <= 0) {
+                break;
+            }
+        }
+    }
+
+    if (bytes > 0)
+        error("Unable to replace ret with jmp in %s\n", name);
+
+    free(flags);
+    return retpos;
+}
+
+#endif
+
 #define MAX_ARGS 3
 
 /* generate op code */
@@ -1427,6 +2141,11 @@
     uint8_t args_present[MAX_ARGS];
     const char *sym_name, *p;
     EXE_RELOC *rel;
+#if defined(HOST_I386) || defined(HOST_X86_64)
+    int patch_bytes;
+    int retpos;
+    int exit_addrs[MAX_EXITS];
+#endif
 
     /* Compute exact size excluding prologue and epilogue instructions.
      * Increment start_offset to skip epilogue instructions, then compute
@@ -1437,33 +2156,12 @@
     p_end = p_start + size;
     start_offset = offset;
 #if defined(HOST_I386) || defined(HOST_X86_64)
-#ifdef CONFIG_FORMAT_COFF
-    {
-        uint8_t *p;
-        p = p_end - 1;
-        if (p == p_start)
-            error("empty code for %s", name);
-        while (*p != 0xc3) {
-            p--;
-            if (p <= p_start)
-                error("ret or jmp expected at the end of %s", name);
-        }
-        copy_size = p - p_start;
-    }
-#else
     {
         int len;
         len = p_end - p_start;
-        if (len == 0)
-            error("empty code for %s", name);
-        if (p_end[-1] == 0xc3) {
-            len--;
-        } else {
-            error("ret or jmp expected at the end of %s", name);
-        }
+        retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
         copy_size = len;
     }
-#endif    
 #elif defined(HOST_PPC)
     {
         uint8_t *p;
@@ -1675,6 +2373,13 @@
     }
 
     if (gen_switch == 2) {
+#if defined(HOST_I386) || defined(HOST_X86_64)
+        if (patch_bytes != -1)
+            copy_size += patch_bytes;
+#ifdef DEBUG_OP
+        copy_size += 2;
+#endif
+#endif
         fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
     } else if (gen_switch == 1) {
 
@@ -1809,6 +2514,82 @@
         /* patch relocations */
 #if defined(HOST_I386)
             {
+#ifdef CONFIG_FORMAT_MACH
+                struct scattered_relocation_info *scarel;
+                struct relocation_info * rel;
+				char final_sym_name[256];
+				const char *sym_name;
+				const char *p;
+				int slide, sslide;
+				int i;
+	
+				for (i = 0, rel = relocs; i < nb_relocs; i++, rel++) {
+					unsigned int offset, length, value = 0;
+					unsigned int type, pcrel, isym = 0;
+					unsigned int usesym = 0;
+				
+					if (R_SCATTERED & rel->r_address) {
+						scarel = (struct scattered_relocation_info*)rel;
+						offset = (unsigned int)scarel->r_address;
+						length = scarel->r_length;
+						pcrel = scarel->r_pcrel;
+						type = scarel->r_type;
+						value = scarel->r_value;
+					}
+                    else {
+						value = isym = rel->r_symbolnum;
+						usesym = (rel->r_extern);
+						offset = rel->r_address;
+						length = rel->r_length;
+						pcrel = rel->r_pcrel;
+						type = rel->r_type;
+					}
+				
+					slide = offset - start_offset;
+		
+					if (!(offset >= start_offset && offset < start_offset + size)) 
+						continue;  /* not in our range */
+
+					sym_name = get_reloc_name(rel, &sslide);
+					
+					if (usesym && symtab[isym].n_type & N_STAB)
+						continue; /* don't handle STAB (debug sym) */
+					
+					if (sym_name && strstart(sym_name, "__op_jmp", &p)) {
+						int n;
+						n = strtol(p, NULL, 10);
+						fprintf(outfile, "    jmp_offsets[%d] = %d + (gen_code_ptr - gen_code_buf);\n", n, slide);
+						continue; /* Nothing more to do */
+					}
+					
+					if (!sym_name) {
+						fprintf(outfile, "/* #warning relocation not handled in %s (value 0x%x, %s, offset 0x%x, length 0x%x, %s, type 0x%x) */\n",
+                                name, value, usesym ? "use sym" : "don't use sym", offset, length, pcrel ? "pcrel":"", type);
+						continue; /* dunno how to handle without final_sym_name */
+					}
+
+                    get_reloc_expr(final_sym_name, sizeof(final_sym_name),
+                                   sym_name);
+
+                    if (length != 2)
+                        error("unsupported %d-bit relocation", 8 * (1 << length));
+
+					switch (type) {
+					case GENERIC_RELOC_VANILLA:
+                        if (pcrel || strstart(sym_name,"__op_gen_label",&p)) {
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) - 4;\n",
+                                    slide, final_sym_name, slide);
+                        }
+                        else {
+                            fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (%s + %d);\n", 
+                                    slide, final_sym_name, sslide);
+                        }
+                        break;
+                    default:
+                        error("unsupported i386 relocation (%d)", type);
+                    }
+                }
+#else
                 char name[256];
                 int type;
                 int addend;
@@ -1879,7 +2660,44 @@
 #error unsupport object format
 #endif
                 }
+               }
+#endif
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
+                }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
                 }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_X86_64)
             {
@@ -1913,6 +2731,42 @@
                     }
                 }
                 }
+                /* Replace the marker instructions with the actual opcodes.  */
+                for (i = 0; exit_addrs[i] != -1; i++) {
+                    int op;
+                    switch (p_start[exit_addrs[i]])
+                      {
+                      case 0xf4: op = 0xc3; break; /* hlt -> ret */
+                      case 0xfa: op = 0xe9; break; /* cli -> jmp */
+                      case 0xfb: op = 0xe9; break; /* sti -> jmp */
+                      default: error("Internal error");
+                      }
+                    fprintf(outfile, 
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            exit_addrs[i], op);
+                }
+                /* Fix up the return instruction.  */
+                if (patch_bytes != -1) {
+                    if (patch_bytes) {
+                        fprintf(outfile, "    memcpy(gen_code_ptr + %d,"
+                                "gen_code_ptr + %d, %d);\n",
+                                copy_size, retpos, patch_bytes);
+                    }
+                    fprintf(outfile,
+                            "    *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
+                            retpos);
+                    fprintf(outfile,
+                            "    *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
+                            retpos + 1, copy_size - (retpos + 5));
+                    
+                    copy_size += patch_bytes;
+                }
+#ifdef DEBUG_OP
+                fprintf(outfile,
+                        "    *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
+                        copy_size);
+                copy_size += 2;
+#endif
             }
 #elif defined(HOST_PPC)
             {
diff -u -r exec-all.h exec-all.h
--- exec-all.h	2007-02-06 00:01:54.000000000 +0100
+++ exec-all.h	2007-07-25 19:21:53.000000000 +0200
@@ -326,16 +326,31 @@
 
 #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
 
-/* we patch the jump instruction directly */
+/* we patch the jump instruction directly.  Use sti in place of the actual
+   jmp instruction so that dyngen can patch in the correct result.  */
+#if defined(__APPLE__)
+/* XXX Different relocations are generated for MacOS X for Intel
+   (please as from cctools).  */
 #define GOTO_TB(opname, tbparam, n)\
 do {\
-    asm volatile (".section .data\n"\
+    asm volatile (ASM_DATA_SECTION\
 		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
 		  ".long 1f\n"\
 		  ASM_PREVIOUS_SECTION \
-                  "jmp " ASM_NAME(__op_jmp) #n "\n"\
+                  "sti;.long " ASM_NAME(__op_jmp) #n "\n"\
 		  "1:\n");\
 } while (0)
+#else
+#define GOTO_TB(opname, tbparam, n)\
+do {\
+    asm volatile (ASM_DATA_SECTION\
+		  ASM_OP_LABEL_NAME(n, opname) ":\n"\
+		  ".long 1f\n"\
+		  ASM_PREVIOUS_SECTION \
+                  "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\
+		  "1:\n");\
+} while (0)
+#endif
 
 #else
 
diff -u -r hw/pc.c hw/pc.c
--- hw/pc.c	2007-02-06 00:01:54.000000000 +0100
+++ hw/pc.c	2007-07-25 19:21:53.000000000 +0200
@@ -154,7 +154,7 @@
 }
 
 /* hd_table must contain 4 block drivers */
-static void cmos_init(int ram_size, int boot_device, BlockDriverState **hd_table)
+static void cmos_init(int ram_size, int boot_device, int boot_device_2, BlockDriverState **hd_table)
 {
     RTCState *s = rtc_state;
     int val;
@@ -185,19 +185,32 @@
     rtc_set_memory(s, 0x34, val);
     rtc_set_memory(s, 0x35, val >> 8);
     
+    int bd2_val;
+    switch(boot_device_2) {
+    case 'a':
+        bd2_val = 0x10; 
+        break;
+    case 'c':
+        bd2_val = 0x20; 
+        break;
+    case 'd':
+        bd2_val = 0x30; 
+        break;
+    }
+    
     switch(boot_device) {
     case 'a':
     case 'b':
-        rtc_set_memory(s, 0x3d, 0x01); /* floppy boot */
+        rtc_set_memory(s, 0x3d, bd2_val | 0x01); /* floppy boot */
         if (!fd_bootchk)
-            rtc_set_memory(s, 0x38, 0x01); /* disable signature check */
+            rtc_set_memory(s, 0x38, bd2_val | 0x01); /* disable signature check */
         break;
     default:
     case 'c':
-        rtc_set_memory(s, 0x3d, 0x02); /* hard drive boot */
+        rtc_set_memory(s, 0x3d, bd2_val | 0x02); /* hard drive boot */
         break;
     case 'd':
-        rtc_set_memory(s, 0x3d, 0x03); /* CD-ROM boot */
+        rtc_set_memory(s, 0x3d, bd2_val | 0x03); /* CD-ROM boot */
         break;
     }
 
@@ -443,7 +456,8 @@
 }
 
 /* PC hardware initialisation */
-static void pc_init1(int ram_size, int vga_ram_size, int boot_device,
+static void pc_init1(int ram_size, int vga_ram_size,
+                     int boot_device, int boot_device_2,
                      DisplayState *ds, const char **fd_filename, int snapshot,
                      const char *kernel_filename, const char *kernel_cmdline,
                      const char *initrd_filename,
@@ -692,7 +706,7 @@
 
     floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
 
-    cmos_init(ram_size, boot_device, bs_table);
+    cmos_init(ram_size, boot_device, boot_device_2, bs_table);
 
     if (pci_enabled && usb_enabled) {
         usb_uhci_init(pci_bus, piix3_devfn + 2);
@@ -730,27 +744,29 @@
 #endif
 }
 
-static void pc_init_pci(int ram_size, int vga_ram_size, int boot_device,
+static void pc_init_pci(int ram_size, int vga_ram_size,
+                        int boot_device, int boot_device_2,
                         DisplayState *ds, const char **fd_filename, 
                         int snapshot, 
                         const char *kernel_filename, 
                         const char *kernel_cmdline,
                         const char *initrd_filename)
 {
-    pc_init1(ram_size, vga_ram_size, boot_device,
+    pc_init1(ram_size, vga_ram_size, boot_device, boot_device_2,
              ds, fd_filename, snapshot,
              kernel_filename, kernel_cmdline,
              initrd_filename, 1);
 }
 
-static void pc_init_isa(int ram_size, int vga_ram_size, int boot_device,
+static void pc_init_isa(int ram_size, int vga_ram_size,
+                        int boot_device, int boot_device_2,
                         DisplayState *ds, const char **fd_filename, 
                         int snapshot, 
                         const char *kernel_filename, 
                         const char *kernel_cmdline,
                         const char *initrd_filename)
 {
-    pc_init1(ram_size, vga_ram_size, boot_device,
+    pc_init1(ram_size, vga_ram_size, boot_device, boot_device_2,
              ds, fd_filename, snapshot,
              kernel_filename, kernel_cmdline,
              initrd_filename, 0);
diff -u -r hw/vga.c hw/vga.c
--- hw/vga.c	2007-02-06 00:01:54.000000000 +0100
+++ hw/vga.c	2007-07-25 19:21:53.000000000 +0200
@@ -814,22 +814,38 @@
 
 static inline unsigned int rgb_to_pixel8(unsigned int r, unsigned int g, unsigned b)
 {
+#if __LITTLE_ENDIAN__
+    return ((b >> 5) << 5) | ((g >> 5) << 2) | (r >> 6);
+#else
     return ((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6);
+#endif
 }
 
 static inline unsigned int rgb_to_pixel15(unsigned int r, unsigned int g, unsigned b)
 {
+#if __LITTLE_ENDIAN__
+    return ((b >> 3) << 10) | ((g >> 3) << 5) | (r >> 3);
+#else
     return ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3);
+#endif
 }
 
 static inline unsigned int rgb_to_pixel16(unsigned int r, unsigned int g, unsigned b)
 {
+#if __LITTLE_ENDIAN__
+    return ((b >> 3) << 11) | ((g >> 2) << 5) | (r >> 3);
+#else
     return ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+#endif
 }
 
 static inline unsigned int rgb_to_pixel32(unsigned int r, unsigned int g, unsigned b)
 {
+#if __LITTLE_ENDIAN__
+    return (b << 16) | (g << 8) | r;
+#else
     return (r << 16) | (g << 8) | b;
+#endif
 }
 
 static inline unsigned int rgb_to_pixel32bgr(unsigned int r, unsigned int g, unsigned b)
diff -u -r softmmu_header.h softmmu_header.h
--- softmmu_header.h	2007-02-06 00:01:54.000000000 +0100
+++ softmmu_header.h	2007-07-25 19:21:53.000000000 +0200
@@ -108,7 +108,7 @@
 void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE v, int is_user);
 
 #if (DATA_SIZE <= 4) && (TARGET_LONG_BITS == 32) && defined(__i386__) && \
-    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU)
+    (ACCESS_TYPE <= 1) && defined(ASM_SOFTMMU) && (__GNUC__ < 4)
 
 #define CPU_TLB_ENTRY_BITS 4
 
@@ -150,7 +150,7 @@
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_read)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
     return res;
 }
 
@@ -197,13 +197,14 @@
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_read)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__ld, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
     return res;
 }
 #endif
 
-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE val)
 {
+    RES_TYPE v = val;
     asm volatile ("movl %0, %%edx\n"
                   "movl %0, %%eax\n"
                   "shrl %3, %%edx\n"
@@ -240,16 +241,14 @@
                   "2:\n"
                   : 
                   : "r" (ptr), 
-/* NOTE: 'q' would be needed as constraint, but we could not use it
-   with T1 ! */
-                  "r" (v), 
+                  "q" (v), 
                   "i" ((CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), 
                   "i" (TARGET_PAGE_MASK | (DATA_SIZE - 1)),
                   "m" (*(uint32_t *)offsetof(CPUState, tlb_table[CPU_MEM_INDEX][0].addr_write)),
                   "i" (CPU_MEM_INDEX),
                   "m" (*(uint8_t *)&glue(glue(__st, SUFFIX), MMUSUFFIX))
-                  : "%eax", "%ecx", "%edx", "memory", "cc");
+                  : "%eax", "%edx", "memory", "cc");
 }
 
 #else
diff -u -r target-i386/helper.c target-i386/helper.c
--- target-i386/helper.c	2007-02-06 00:01:54.000000000 +0100
+++ target-i386/helper.c	2007-07-25 19:21:53.000000000 +0200
@@ -29,68 +29,6 @@
     (raise_exception_err)(a, b);\
 } while (0)
 #endif
-
-const uint8_t parity_table[256] = {
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
-    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
-};
-
-/* modulo 17 table */
-const uint8_t rclw_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7, 
-    8, 9,10,11,12,13,14,15,
-   16, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9,10,11,12,13,14,
-};
-
-/* modulo 9 table */
-const uint8_t rclb_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7, 
-    8, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 0, 1, 2, 3, 4, 5, 
-    6, 7, 8, 0, 1, 2, 3, 4,
-};
-
-const CPU86_LDouble f15rk[7] =
-{
-    0.00000000000000000000L,
-    1.00000000000000000000L,
-    3.14159265358979323851L,  /*pi*/
-    0.30102999566398119523L,  /*lg2*/
-    0.69314718055994530943L,  /*ln2*/
-    1.44269504088896340739L,  /*l2e*/
-    3.32192809488736234781L,  /*l2t*/
-};
     
 /* thread support */
 
@@ -3452,8 +3390,15 @@
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
+#if defined(__i386__) && __GNUC__ >= 4
+            env->xmm_regs[i].XMM_L(0) = ldl(addr);
+            env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+            env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+            env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+#else
             env->xmm_regs[i].XMM_Q(0) = ldq(addr);
             env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#endif
             addr += 16;
         }
     }
diff -u -r target-i386/op.c target-i386/op.c
--- target-i386/op.c	2007-02-06 00:01:54.000000000 +0100
+++ target-i386/op.c	2007-07-25 19:21:53.000000000 +0200
@@ -21,6 +21,69 @@
 #define ASM_SOFTMMU
 #include "exec.h"
 
+const uint8_t parity_table[256] = {
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+    0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+/* modulo 17 table */
+const uint8_t rclw_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 
+    8, 9,10,11,12,13,14,15,
+   16, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+const uint8_t rclb_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 
+    8, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 0, 1, 2, 3, 4, 5, 
+    6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+const CPU86_LDouble f15rk[7] =
+{
+    0.00000000000000000000L,
+    1.00000000000000000000L,
+    3.14159265358979323851L,  /*pi*/
+    0.30102999566398119523L,  /*lg2*/
+    0.69314718055994530943L,  /*ln2*/
+    1.44269504088896340739L,  /*l2e*/
+    3.32192809488736234781L,  /*l2t*/
+};
+
+    
 /* n must be a constant to be efficient */
 static inline target_long lshift(target_long x, int n)
 {
@@ -1531,6 +1594,7 @@
     eflags = cc_table[CC_OP].compute_all();
     eflags &= ~CC_C;
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 void OPPROTO op_stc(void)
@@ -1547,6 +1611,7 @@
     eflags = cc_table[CC_OP].compute_all();
     eflags ^= CC_C;
     CC_SRC = eflags;
+    FORCE_RET();
 }
 
 void OPPROTO op_salc(void)
diff -u -r target-i386/ops_sse.h target-i386/ops_sse.h
--- target-i386/ops_sse.h	2007-02-06 00:01:54.000000000 +0100
+++ target-i386/ops_sse.h	2007-07-25 19:21:53.000000000 +0200
@@ -34,6 +34,12 @@
 #define Q(n) XMM_Q(n)
 #define SUFFIX _xmm
 #endif
+#if defined(__i386__) && __GNUC__ >= 4
+#define RegCopy(d, s) __builtin_memcpy(&(d), &(s), sizeof(d))
+#endif
+#ifndef RegCopy
+#define RegCopy(d, s) d = s
+#endif
 
 void OPPROTO glue(op_psrlw, SUFFIX)(void)
 {
@@ -589,7 +595,7 @@
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
-    *d = r;
+    RegCopy(*d, r);
 }
 #else
 void OPPROTO op_shufps(void)
diff -u -r target-ppc/exec.h target-ppc/exec.h
--- target-ppc/exec.h	2007-02-06 00:01:54.000000000 +0100
+++ target-ppc/exec.h	2007-07-25 19:21:53.000000000 +0200
@@ -37,11 +37,7 @@
 #define FT1 (env->ft1)
 #define FT2 (env->ft2)
 
-#if defined (DEBUG_OP)
-# define RETURN() __asm__ __volatile__("nop" : : : "memory");
-#else
-# define RETURN() __asm__ __volatile__("" : : : "memory");
-#endif
+#define RETURN() FORCE_RET()
 
 #include "cpu.h"
 #include "exec-all.h"
diff -u -r vl.c vl.c
--- vl.c	2007-02-06 00:01:54.000000000 +0100
+++ vl.c	2007-07-25 19:21:53.000000000 +0200
@@ -131,6 +131,7 @@
 const char* keyboard_layout = NULL;
 int64_t ticks_per_sec;
 int boot_device = 'c';
+int boot_device_2 = 'd';
 int ram_size;
 int pit_min_timer_count = 0;
 int nb_nics;
@@ -6021,7 +6022,11 @@
            "-hda/-hdb file  use 'file' as IDE hard disk 0/1 image\n"
            "-hdc/-hdd file  use 'file' as IDE hard disk 2/3 image\n"
            "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
+#ifdef TARGET_I386
+           "-boot d1(,d2)   boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)\n"
+#else
            "-boot [a|c|d|n] boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)\n"
+#endif
            "-snapshot       write to temporary files instead of disk image files\n"
 #ifdef CONFIG_SDL
            "-no-quit        disable SDL window close capability\n"
@@ -6721,6 +6726,15 @@
                 break;
             case QEMU_OPTION_boot:
                 boot_device = optarg[0];
+#ifdef TARGET_I386
+                if (strlen(optarg) == 3) { //we have a second bootdevice
+                    boot_device_2 = optarg[2];
+                    if (boot_device_2 != 'a' && boot_device_2 != 'c' && boot_device_2 != 'd') {
+                        fprintf(stderr, "qemu: invalid second boot device '%c'\n", boot_device_2);
+                        exit(1);
+                    }
+                }
+#endif
                 if (boot_device != 'a' && 
 #if defined(TARGET_SPARC) || defined(TARGET_I386)
 		    // Network boot
@@ -7199,8 +7213,11 @@
                 qemu_chr_printf(parallel_hds[i], "parallel%d console\r\n", i);
         }
     }
-
+#ifdef TARGET_I386
+    machine->init(ram_size, vga_ram_size, boot_device, boot_device_2,
+#else
     machine->init(ram_size, vga_ram_size, boot_device,
+#endif
                   ds, fd_filename, snapshot,
                   kernel_filename, kernel_cmdline, initrd_filename);
 
diff -u -r vl.h vl.h
--- vl.h	2007-02-06 00:01:54.000000000 +0100
+++ vl.h	2007-07-25 19:21:53.000000000 +0200
@@ -683,6 +683,9 @@
 
 typedef void QEMUMachineInitFunc(int ram_size, int vga_ram_size, 
                                  int boot_device,
+#ifdef TARGET_I386
+                                 int boot_device_2,
+#endif
              DisplayState *ds, const char **fd_filename, int snapshot,
              const char *kernel_filename, const char *kernel_cmdline,
              const char *initrd_filename);
