
OverlayFS is a modern union filesystem that is similar to AUFS, but faster and with a simpler implementation. Docker provides two storage drivers for OverlayFS: the original overlay, and the newer and more stable overlay2.



overlay vs overlay2

If you are still using the overlay driver rather than overlay2, see How the overlay driver works instead.

OverlayFS layers two directories on a single Linux host and presents them as a single directory. These directories are called layers and the unification process is referred to as a union mount. OverlayFS refers to the lower directory as lowerdir and the upper directory a upperdir. The unified view is exposed through its own directory called merged.

While the overlay driver only works with a single lower OverlayFS layer and hence requires hard links for implementation of multi-layered images, the overlay2 driver natively supports up to 128 lower OverlayFS layers. This capability provides better performance for layer-related Docker commands such as docker build and docker commit, and consumes fewer inodes on the backing filesystem.


To create a container, the overlay driver combines the directory representing the image’s top layer plus a new directory for the container. The image’s top layer is the lowerdir in the overlay and is read-only. The new directory for the container is the upperdir and is writable.

moby overlay的代码:moby/moby/daemon/graphdriver/overlay

  • Create()方法也就是产生rootfs的过程中都是copy.DirCopy(parentUpperDir, upperDir, copy.Content, true)也就是拷贝的内容(将parentUpperDir的内容拷贝到upperDir,因为只支持一层lowerDir,同时不能将parentUpperDir hard-link到upperDir目录,因为upper层是可读写的,hard-link也会破坏原有的parent)
  • ApplyDiff()也就是构建镜像过程中调用的函数都是使用的hard-link的方式copy.DirCopy(parentRootDir, tmpRootDir, copy.Hardlink, true)

docker commit过程需要Create() -> Diff() -> ApplyDiff()的过程,因为Create()会拷贝文件内容,因此会消耗多余的inode。


func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
    return o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)

func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) ([]mount.Mount, error) {
    ctx, t, err := o.ms.TransactionContext(ctx, true)
    if err != nil {
        return nil, err

    var td, path string
    defer func() {
        if err != nil {
            if td != "" {
                if err1 := os.RemoveAll(td); err1 != nil {
                    log.G(ctx).WithError(err1).Warn("failed to cleanup temp snapshot directory")
            if path != "" {
                if err1 := os.RemoveAll(path); err1 != nil {
                    log.G(ctx).WithError(err1).WithField("path", path).Error("failed to reclaim snapshot directory, directory may need removal")
                    err = errors.Wrapf(err, "failed to remove path: %v", err1)

    snapshotDir := filepath.Join(o.root, "snapshots")
    td, err = o.prepareDirectory(ctx, snapshotDir, kind)
    if err != nil {
        if rerr := t.Rollback(); rerr != nil {
            log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
        return nil, errors.Wrap(err, "failed to create prepare snapshot dir")
    rollback := true
    defer func() {
        if rollback {
            if rerr := t.Rollback(); rerr != nil {
                log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")

    s, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...)
    if err != nil {
        return nil, errors.Wrap(err, "failed to create snapshot")

    if len(s.ParentIDs) > 0 {
        st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
        if err != nil {
            return nil, errors.Wrap(err, "failed to stat parent")

        stat := st.Sys().(*syscall.Stat_t)

        // 设置目录权限和parent的相同
        if err := os.Lchown(filepath.Join(td, "fs"), int(stat.Uid), int(stat.Gid)); err != nil {
            if rerr := t.Rollback(); rerr != nil {
                log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
            return nil, errors.Wrap(err, "failed to chown")

    path = filepath.Join(snapshotDir, s.ID)
    if err = os.Rename(td, path); err != nil {
        return nil, errors.Wrap(err, "failed to rename")
    td = ""

    // 这里不太理解为什么rollback=false? 
    rollback = false
    if err = t.Commit(); err != nil {
        return nil, errors.Wrap(err, "commit failed")

    // merge目录不由snapshotter决定,挂载在哪,哪就是merge层
    return o.mounts(s), nil

func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
    if len(s.ParentIDs) == 0 {
        // if we only have one layer/no parents then just return a bind mount as overlay
        // will not work
        roFlag := "rw"
        if s.Kind == snapshots.KindView {
            roFlag = "ro"

        return []mount.Mount{
                Source: o.upperPath(s.ID),
                Type:   "bind",
                Options: []string{
    var options []string

    if s.Kind == snapshots.KindActive {
        options = append(options,
            // filepath.Join(o.root, "snapshots", id, "work")
            // 文件系统挂载后用于存放临时和间接文件的工作基目录
            fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
            // upper目录,也就是文件系统存储的主目录,可以认为是container的RW层
            // filepath.Join(o.root, "snapshots", id, "fs")
            fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)),
    } else if len(s.ParentIDs) == 1 {
        // 只有一层且只是返回可读层的时候上边注释也有说明,直接返回父亲的bind mount即可
        return []mount.Mount{
                Source: o.upperPath(s.ParentIDs[0]),
                Type:   "bind",
                Options: []string{

    // 不论是返回读写层都需要把lowerdir给放到options中去
    parentPaths := make([]string, len(s.ParentIDs))
    for i := range s.ParentIDs {
        parentPaths[i] = o.upperPath(s.ParentIDs[i])

    options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
    // 具体如何处理由mount包处理,参考containerd/containerd/sys/mount_linux.go
    return []mount.Mount{
            Type:    "overlay",
            Source:  "overlay",
            Options: options,


// 和native的一样的,没什么好说的
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
    ctx, t, err := o.ms.TransactionContext(ctx, true)
    if err != nil {
        return err

    defer func() {
        if err != nil {
            if rerr := t.Rollback(); rerr != nil {
                log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")

    // grab the existing id
    id, _, _, err := storage.GetInfo(ctx, key)
    if err != nil {
        return err

    usage, err := fs.DiskUsage(ctx, o.upperPath(id))
    if err != nil {
        return err

    if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil {
        return errors.Wrap(err, "failed to commit snapshot")
    return t.Commit()

