Enables providing a backing file for the PC's ram.

The file is specified by the new -pcram-file option. The file is mmap'd shared,
so the RAMBlock that it backs doesn't need to be saved by vm_save / migration.

We have found this small feature very useful for experimenting with memory
migration techniques. By exposing PC memory through a simple interface (i.e.,
the filesystem), we can implement various memory migration techniques
independently of QEMU. For example, one can map a VM's ram to a file being
served over a network, thus implementing on-demand fetching.

In the future, RAMBlocks could be mmap'd privately to implement memory sharing.

Note that unlike the existing -mem-path option, which specifies a (hugetlbfs)
directory in which files for all RAMBlocks are to be created, -pcram-file
specifies a file to be mapped for the "pc.ram" RAMBlock

Refreshed for qemu-1.3.0

Signed-off-by: Peter Feiner <peter@gridcentric.com>
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

Index: qemu/arch_init.c
===================================================================
--- qemu.orig/arch_init.c	2013-01-17 10:56:55.040404548 -0600
+++ qemu/arch_init.c	2013-01-17 10:56:55.024404548 -0600
@@ -336,7 +336,8 @@
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 
-static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
+static inline bool migration_bitmap_test_and_reset_dirty(RAMBlock *block,
+                                                         MemoryRegion *mr,
                                                          ram_addr_t offset)
 {
     bool ret;
@@ -344,13 +345,14 @@
 
     ret = test_and_clear_bit(nr, migration_bitmap);
 
-    if (ret) {
+    if (ret && !block->do_not_save) {
         migration_dirty_pages--;
     }
     return ret;
 }
 
-static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+static inline bool migration_bitmap_set_dirty(RAMBlock *block,
+                                              MemoryRegion *mr,
                                               ram_addr_t offset)
 {
     bool ret;
@@ -358,7 +360,7 @@
 
     ret = test_and_set_bit(nr, migration_bitmap);
 
-    if (!ret) {
+    if (!ret && !block->do_not_save) {
         migration_dirty_pages++;
     }
     return ret;
@@ -385,7 +387,7 @@
         for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
             if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
                                         DIRTY_MEMORY_MIGRATION)) {
-                migration_bitmap_set_dirty(block->mr, addr);
+                migration_bitmap_set_dirty(block, block->mr, addr);
             }
         }
         memory_region_reset_dirty(block->mr, 0, block->length,
@@ -424,10 +426,17 @@
 
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
+    
+    while (block->do_not_save) {
+        block = QLIST_NEXT(block, next);
+        if (!block) {
+            return 0;
+        }
+    }
 
     do {
         mr = block->mr;
-        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
+        if (migration_bitmap_test_and_reset_dirty(block, mr, offset)) {
             uint8_t *p;
             int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
 
@@ -467,6 +476,16 @@
             block = QLIST_NEXT(block, next);
             if (!block)
                 block = QLIST_FIRST(&ram_list.blocks);
+            /* We need to skip pcram if do_not_save, otherwise we'll loop all
+             * over again. */
+            while (block->do_not_save) {
+                block = QLIST_NEXT(block, next);
+                /* This will not iterate forever because we only set pcram to
+                 * do_not_save, and there are at least one other
+                 * ramblock(e.g. pc.rom or pc.bios). */
+                if (!block)
+                    block = QLIST_FIRST(&ram_list.blocks);
+            }
         }
     } while (block != last_block || offset != last_offset);
 
@@ -570,6 +589,14 @@
     migration_bitmap = bitmap_new(ram_pages);
     bitmap_set(migration_bitmap, 1, ram_pages);
     migration_dirty_pages = ram_pages;
+    {
+        RAMBlock *block = QLIST_FIRST(&ram_list.blocks);
+        while (block) {
+            if (block->do_not_save)
+                migration_dirty_pages -= block->length >> TARGET_PAGE_BITS;
+            block = QLIST_NEXT(block, next);
+        }
+    }
 
     bytes_transferred = 0;
     reset_ram_globals();
Index: qemu/cpu-all.h
===================================================================
--- qemu.orig/cpu-all.h	2013-01-17 10:56:55.040404548 -0600
+++ qemu/cpu-all.h	2013-01-17 10:56:55.028404548 -0600
@@ -488,6 +488,7 @@
     uint32_t flags;
     char idstr[256];
     QLIST_ENTRY(RAMBlock) next;
+    int do_not_save;
 #if defined(__linux__) && !defined(TARGET_S390X)
     int fd;
 #endif
@@ -499,6 +500,17 @@
 } RAMList;
 extern RAMList ram_list;
 
+typedef struct MemFile {
+    const char *idstr;
+    const char *path;
+    QLIST_ENTRY(MemFile) next;
+} MemFile;
+
+typedef struct MemFileList {
+    QLIST_HEAD(files, MemFile) files;
+} MemFileList;
+extern MemFileList mem_file_list;
+
 extern const char *mem_path;
 extern int mem_prealloc;
 
Index: qemu/exec.c
===================================================================
--- qemu.orig/exec.c	2013-01-17 10:56:55.040404548 -0600
+++ qemu/exec.c	2013-01-17 10:56:55.028404548 -0600
@@ -109,6 +109,8 @@
 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
 static MemoryRegion io_mem_subpage_ram;
 
+MemFileList mem_file_list = { .files = QLIST_HEAD_INITIALIZER(mem_file_list) };
+
 #endif
 
 CPUArchState *first_cpu;
@@ -2341,6 +2343,59 @@
 }
 
 #if defined(__linux__) && !defined(TARGET_S390X)
+static void *mem_file_ram_alloc(RAMBlock *block,
+                                ram_addr_t memory)
+{
+    void *host;
+    MemFile *mf;
+    struct stat buf;
+    int ret;
+
+    QLIST_FOREACH(mf, &mem_file_list.files, next) {
+        if (strcmp(mf->idstr, block->mr->name)) {
+            continue;
+        }
+
+        if (kvm_enabled() && !kvm_has_sync_mmu()) {
+            fprintf(stderr, "host lacks kvm mmu notifiers, "
+                            "MemFile unsupported, abort!\n");
+            abort();
+        }
+
+        block->fd = open(mf->path, O_RDWR);
+        if (block->fd == -1) {
+            fprintf(stderr, "Could not open %s for RAMBlock %s, abort!\n",
+                    mf->path, mf->idstr);
+            abort();
+        }
+        ret = fstat(block->fd, &buf);
+        if (ret != 0) {
+            fprintf(stderr, "Could not stat %s for RAMBlock %s, abort!\n",
+                    mf->path, mf->idstr);
+            abort();
+        }
+        if (buf.st_size != memory) {
+            fprintf(stderr,
+                    "File %s has size %luB. RAMBlock %s expects %luB. Abort!\n",
+                    mf->path, buf.st_size, block->idstr, memory);
+            abort();
+        }
+
+        host = mmap(NULL, memory, PROT_READ | PROT_WRITE, MAP_SHARED,
+                    block->fd, 0);
+        if (host == MAP_FAILED) {
+            fprintf(stderr, "Failed to mmap %s for RAMBlock %s, abort!\n",
+                    mf->path, mf->idstr);
+            abort();
+        }
+        block->do_not_save = 1;
+        return host;
+    }
+    return NULL;
+}
+#endif
+
+#if defined(__linux__) && !defined(TARGET_S390X)
 
 #include <sys/vfs.h>
 
@@ -2543,6 +2598,28 @@
     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }
 
+void add_memory_file(const char *idstr, const char *path)
+{
+#ifndef __linux__
+    fprintf(stderr, "MemFile only supported on Linux, abort!\n");
+    abort();
+#else
+    MemFile *mf;
+
+    QLIST_FOREACH(mf, &mem_file_list.files, next) {
+        if (!strcmp(mf->idstr, idstr)) {
+            fprintf(stderr, "MemFile for \"%s\" already specified, abort!\n",
+                    idstr);
+            abort();
+        }
+    }
+    mf = g_malloc0(sizeof(*mf));
+    mf->idstr = idstr;
+    mf->path = path;
+    QLIST_INSERT_HEAD(&mem_file_list.files, mf, next);
+#endif
+}
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
@@ -2553,6 +2630,12 @@
 
     new_block->mr = mr;
     new_block->offset = find_ram_offset(size);
+#if defined(__linux__) && !defined(TARGET_S390X)
+    new_block->host = mem_file_ram_alloc(new_block, size);
+    if (new_block->host) {
+        assert(!host);
+    } else
+#endif
     if (host) {
         new_block->host = host;
         new_block->flags |= RAM_PREALLOC_MASK;
Index: qemu/qemu-common.h
===================================================================
--- qemu.orig/qemu-common.h	2013-01-17 10:56:55.040404548 -0600
+++ qemu/qemu-common.h	2013-01-17 10:56:55.028404548 -0600
@@ -405,6 +405,8 @@
 void os_parse_cmd_args(int index, const char *optarg);
 void os_pidfile_error(void);
 
+void add_memory_file(const char *idstr, const char *path);
+
 /* Convert a byte between binary and BCD.  */
 static inline uint8_t to_bcd(uint8_t val)
 {
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx	2013-01-17 10:56:55.040404548 -0600
+++ qemu/qemu-options.hx	2013-01-17 10:56:55.032404548 -0600
@@ -459,6 +459,19 @@
 ETEXI
 #endif
 
+#ifdef __linux__
+DEF("pcram-file", HAS_ARG, QEMU_OPTION_pcram_file,
+    "-pcram-file FILE  provide backing storage for PC RAM\n", QEMU_ARCH_I386)
+STEXI
+@item -pcram-file @var{path}
+Populate guest PC RAM with memory mapped file @var{path}. All changes to guest
+ram are reflected in the file (i.e., it is a @code{MAP_SHARED} mapping).
+
+PC RAM is neither migrated nor saved.
+ETEXI
+#endif
+
+
 DEF("k", HAS_ARG, QEMU_OPTION_k,
     "-k language     use keyboard layout (for example 'fr' for French)\n",
     QEMU_ARCH_ALL)
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2013-01-17 10:56:55.040404548 -0600
+++ qemu/vl.c	2013-01-17 10:56:55.032404548 -0600
@@ -2938,6 +2938,9 @@
                 }
                 break;
             }
+            case QEMU_OPTION_pcram_file:
+                add_memory_file("pc.ram", optarg); 
+                break;
             case QEMU_OPTION_mempath:
                 mem_path = optarg;
                 break;
