Documentation/ABI/testing/debugfs-aufs                          |    50 +
 Documentation/ABI/testing/sysfs-aufs                            |    31 +
 Documentation/admin-guide/kernel-parameters.txt                 |    11 +
 Documentation/block/bfq-iosched.txt                             |    74 +-
 Documentation/filesystems/aufs/README                           |   393 +
 Documentation/filesystems/aufs/design/01intro.txt               |   171 +
 Documentation/filesystems/aufs/design/02struct.txt              |   258 +
 Documentation/filesystems/aufs/design/03atomic_open.txt         |    85 +
 Documentation/filesystems/aufs/design/03lookup.txt              |   113 +
 Documentation/filesystems/aufs/design/04branch.txt              |    74 +
 Documentation/filesystems/aufs/design/05wbr_policy.txt          |    64 +
 Documentation/filesystems/aufs/design/06dirren.dot              |    31 +
 Documentation/filesystems/aufs/design/06dirren.txt              |   102 +
 Documentation/filesystems/aufs/design/06fhsm.txt                |   120 +
 Documentation/filesystems/aufs/design/06mmap.txt                |    72 +
 Documentation/filesystems/aufs/design/06xattr.txt               |    96 +
 Documentation/filesystems/aufs/design/07export.txt              |    58 +
 Documentation/filesystems/aufs/design/08shwh.txt                |    52 +
 Documentation/filesystems/aufs/design/10dynop.txt               |    47 +
 Documentation/power/tuxonice-internals.txt                      |   532 +
 Documentation/power/tuxonice.txt                                |   948 +
 Documentation/scheduler/sched-BFS.txt                           |   351 +
 Documentation/scheduler/sched-MuQSS.txt                         |   373 +
 Documentation/sysctl/kernel.txt                                 |    37 +
 Documentation/tp_smapi.txt                                      |   275 +
 Documentation/vm/00-INDEX                                       |     2 +
 Documentation/vm/uksm.txt                                       |    61 +
 MAINTAINERS                                                     |    19 +
 Makefile                                                        |     4 +
 arch/powerpc/platforms/cell/spufs/sched.c                       |     5 -
 arch/x86/Kconfig                                                |    91 +-
 arch/x86/Kconfig.cpu                                            |   231 +-
 arch/x86/Makefile                                               |    35 +-
 arch/x86/Makefile_32.cpu                                        |    24 +-
 arch/x86/include/asm/module.h                                   |    40 +
 block/Kconfig.iosched                                           |    50 +
 block/Makefile                                                  |     4 +-
 block/bfq-cgroup-included.c                                     |  1354 +
 block/bfq-ioc.c                                                 |    36 +
 block/bfq-mq-iosched.c                                          |  6187 ++++
 block/bfq-mq.h                                                  |  1011 +
 block/bfq-sched.c                                               |  2066 ++
 block/bfq-sq-iosched.c                                          |  5647 ++++
 block/bfq.h                                                     |  1002 +
 block/blk-core.c                                                |     5 +
 block/elevator.c                                                |     4 +
 block/genhd.c                                                   |    81 +
 block/uuid.c                                                    |   510 +
 drivers/block/loop.c                                            |    18 +
 drivers/cpufreq/cpufreq_ondemand.c                              |    12 +-
 drivers/gpu/drm/amd/display/dc/basics/logger.c                  |     2 +-
 drivers/gpu/drm/i915/intel_dp.c                                 |     3 +-
 drivers/gpu/drm/i915/intel_dp_link_training.c                   |    26 +-
 drivers/infiniband/core/addr.c                                  |     1 +
 drivers/input/mouse/synaptics.c                                 |     4 +-
 drivers/input/mouse/synaptics.h                                 |     1 +
 drivers/macintosh/Kconfig                                       |     7 +
 drivers/macintosh/adbhid.c                                      |    83 +-
 drivers/platform/x86/Kconfig                                    |    19 +
 drivers/platform/x86/Makefile                                   |     2 +
 drivers/platform/x86/hdaps.c                                    |   945 +-
 drivers/platform/x86/thinkpad_ec.c                              |   513 +
 drivers/platform/x86/tp_smapi.c                                 |  1493 +
 drivers/scsi/Kconfig                                            |     2 +
 drivers/scsi/Makefile                                           |     1 +
 drivers/scsi/vhba/Kconfig                                       |     9 +
 drivers/scsi/vhba/Makefile                                      |     4 +
 drivers/scsi/vhba/vhba.c                                        |  1076 +
 drivers/tty/Kconfig                                             |    13 +
 drivers/usb/host/xhci-hub.c                                     |     2 +-
 drivers/video/logo/Kconfig                                      |    95 +-
 drivers/video/logo/Makefile                                     |    12 +
 drivers/video/logo/logo.c                                       |   190 +-
 drivers/video/logo/logo_arch_clut224.ppm                        | 43204 ++++++++++++++++++++++++++
 drivers/video/logo/logo_bsd_clut224.ppm                         |  2403 ++
 drivers/video/logo/logo_debian_clut224.ppm                      |   883 +
 drivers/video/logo/logo_exherbo_clut224.ppm                     |   963 +
 drivers/video/logo/logo_fbsd_clut224.ppm                        |  2403 ++
 drivers/video/logo/logo_fedoraglossy_clut224.ppm                |  1123 +
 drivers/video/logo/logo_fedorasimple_clut224.ppm                |  1123 +
 drivers/video/logo/logo_gentoo_clut224.ppm                      |   803 +
 drivers/video/logo/logo_oldzen_clut224.ppm                      |   882 +
 drivers/video/logo/logo_slackware_clut224.ppm                   |  1123 +
 drivers/video/logo/logo_tits_clut224.ppm                        |  1443 +
 drivers/video/logo/logo_zen_clut224.ppm                         |  2043 ++
 fs/Kconfig                                                      |     2 +
 fs/Makefile                                                     |     2 +
 fs/aufs/Kconfig                                                 |   199 +
 fs/aufs/Makefile                                                |    46 +
 fs/aufs/aufs.h                                                  |    60 +
 fs/aufs/branch.c                                                |  1432 +
 fs/aufs/branch.h                                                |   333 +
 fs/aufs/conf.mk                                                 |    40 +
 fs/aufs/cpup.c                                                  |  1441 +
 fs/aufs/cpup.h                                                  |    99 +
 fs/aufs/dbgaufs.c                                               |   437 +
 fs/aufs/dbgaufs.h                                               |    48 +
 fs/aufs/dcsub.c                                                 |   225 +
 fs/aufs/dcsub.h                                                 |   136 +
 fs/aufs/debug.c                                                 |   440 +
 fs/aufs/debug.h                                                 |   225 +
 fs/aufs/dentry.c                                                |  1152 +
 fs/aufs/dentry.h                                                |   266 +
 fs/aufs/dinfo.c                                                 |   553 +
 fs/aufs/dir.c                                                   |   759 +
 fs/aufs/dir.h                                                   |   131 +
 fs/aufs/dirren.c                                                |  1315 +
 fs/aufs/dirren.h                                                |   139 +
 fs/aufs/dynop.c                                                 |   369 +
 fs/aufs/dynop.h                                                 |    74 +
 fs/aufs/export.c                                                |   836 +
 fs/aufs/f_op.c                                                  |   817 +
 fs/aufs/fhsm.c                                                  |   426 +
 fs/aufs/file.c                                                  |   856 +
 fs/aufs/file.h                                                  |   340 +
 fs/aufs/finfo.c                                                 |   148 +
 fs/aufs/fstype.h                                                |   400 +
 fs/aufs/hbl.h                                                   |    64 +
 fs/aufs/hfsnotify.c                                             |   289 +
 fs/aufs/hfsplus.c                                               |    56 +
 fs/aufs/hnotify.c                                               |   719 +
 fs/aufs/i_op.c                                                  |  1459 +
 fs/aufs/i_op_add.c                                              |   920 +
 fs/aufs/i_op_del.c                                              |   511 +
 fs/aufs/i_op_ren.c                                              |  1246 +
 fs/aufs/iinfo.c                                                 |   285 +
 fs/aufs/inode.c                                                 |   527 +
 fs/aufs/inode.h                                                 |   695 +
 fs/aufs/ioctl.c                                                 |   219 +
 fs/aufs/loop.c                                                  |   147 +
 fs/aufs/loop.h                                                  |    52 +
 fs/aufs/magic.mk                                                |    31 +
 fs/aufs/module.c                                                |   266 +
 fs/aufs/module.h                                                |   101 +
 fs/aufs/mvdown.c                                                |   704 +
 fs/aufs/opts.c                                                  |  1891 ++
 fs/aufs/opts.h                                                  |   224 +
 fs/aufs/plink.c                                                 |   515 +
 fs/aufs/poll.c                                                  |    53 +
 fs/aufs/posix_acl.c                                             |   102 +
 fs/aufs/procfs.c                                                |   170 +
 fs/aufs/rdu.c                                                   |   381 +
 fs/aufs/rwsem.h                                                 |    72 +
 fs/aufs/sbinfo.c                                                |   304 +
 fs/aufs/super.c                                                 |  1051 +
 fs/aufs/super.h                                                 |   626 +
 fs/aufs/sysaufs.c                                               |   104 +
 fs/aufs/sysaufs.h                                               |   101 +
 fs/aufs/sysfs.c                                                 |   376 +
 fs/aufs/sysrq.c                                                 |   159 +
 fs/aufs/vdir.c                                                  |   893 +
 fs/aufs/vfsub.c                                                 |   894 +
 fs/aufs/vfsub.h                                                 |   354 +
 fs/aufs/wbr_policy.c                                            |   830 +
 fs/aufs/whout.c                                                 |  1061 +
 fs/aufs/whout.h                                                 |    85 +
 fs/aufs/wkq.c                                                   |   390 +
 fs/aufs/wkq.h                                                   |    93 +
 fs/aufs/xattr.c                                                 |   355 +
 fs/aufs/xino.c                                                  |  1469 +
 fs/dcache.c                                                     |     4 +-
 fs/drop_caches.c                                                |     7 +
 fs/exec.c                                                       |     9 +-
 fs/exfat/Kconfig                                                |    39 +
 fs/exfat/LICENSE                                                |   339 +
 fs/exfat/Makefile                                               |    54 +
 fs/exfat/README.md                                              |    98 +
 fs/exfat/dkms.conf                                              |     7 +
 fs/exfat/exfat-km.mk                                            |    11 +
 fs/exfat/exfat_api.c                                            |   528 +
 fs/exfat/exfat_api.h                                            |   206 +
 fs/exfat/exfat_bitmap.c                                         |    63 +
 fs/exfat/exfat_bitmap.h                                         |    55 +
 fs/exfat/exfat_blkdev.c                                         |   197 +
 fs/exfat/exfat_blkdev.h                                         |    73 +
 fs/exfat/exfat_cache.c                                          |   784 +
 fs/exfat/exfat_cache.h                                          |    85 +
 fs/exfat/exfat_config.h                                         |    69 +
 fs/exfat/exfat_core.c                                           |  5138 +++
 fs/exfat/exfat_core.h                                           |   671 +
 fs/exfat/exfat_data.c                                           |    77 +
 fs/exfat/exfat_data.h                                           |    58 +
 fs/exfat/exfat_nls.c                                            |   448 +
 fs/exfat/exfat_nls.h                                            |    91 +
 fs/exfat/exfat_oal.c                                            |   196 +
 fs/exfat/exfat_oal.h                                            |    74 +
 fs/exfat/exfat_super.c                                          |  2711 ++
 fs/exfat/exfat_super.h                                          |   171 +
 fs/exfat/exfat_upcase.c                                         |   405 +
 fs/exfat/exfat_version.h                                        |    19 +
 fs/fcntl.c                                                      |     5 +-
 fs/file_table.c                                                 |     4 +
 fs/inode.c                                                      |     3 +-
 fs/namespace.c                                                  |     9 +
 fs/notify/group.c                                               |     4 +
 fs/notify/mark.c                                                |     4 +
 fs/open.c                                                       |     6 +
 fs/proc/base.c                                                  |     4 +-
 fs/proc/meminfo.c                                               |     4 +
 fs/proc/nommu.c                                                 |     5 +-
 fs/proc/task_mmu.c                                              |     7 +-
 fs/proc/task_nommu.c                                            |     5 +-
 fs/read_write.c                                                 |    26 +
 fs/splice.c                                                     |    12 +-
 fs/super.c                                                      |     2 +-
 fs/sync.c                                                       |     3 +-
 fs/xattr.c                                                      |     1 +
 include/Documentation/ABI/testing/debugfs-aufs                  |    50 +
 include/Documentation/ABI/testing/sysfs-aufs                    |    31 +
 include/Documentation/filesystems/aufs/README                   |   393 +
 include/Documentation/filesystems/aufs/design/01intro.txt       |   171 +
 include/Documentation/filesystems/aufs/design/02struct.txt      |   258 +
 include/Documentation/filesystems/aufs/design/03atomic_open.txt |    85 +
 include/Documentation/filesystems/aufs/design/03lookup.txt      |   113 +
 include/Documentation/filesystems/aufs/design/04branch.txt      |    74 +
 include/Documentation/filesystems/aufs/design/05wbr_policy.txt  |    64 +
 include/Documentation/filesystems/aufs/design/06dirren.dot      |    31 +
 include/Documentation/filesystems/aufs/design/06dirren.txt      |   102 +
 include/Documentation/filesystems/aufs/design/06fhsm.txt        |   120 +
 include/Documentation/filesystems/aufs/design/06mmap.txt        |    72 +
 include/Documentation/filesystems/aufs/design/06xattr.txt       |    96 +
 include/Documentation/filesystems/aufs/design/07export.txt      |    58 +
 include/Documentation/filesystems/aufs/design/08shwh.txt        |    52 +
 include/Documentation/filesystems/aufs/design/10dynop.txt       |    47 +
 include/asm-generic/pgtable.h                                   |    17 +-
 include/fs/aufs/Kconfig                                         |   199 +
 include/fs/aufs/Makefile                                        |    46 +
 include/fs/aufs/aufs.h                                          |    60 +
 include/fs/aufs/branch.c                                        |  1432 +
 include/fs/aufs/branch.h                                        |   333 +
 include/fs/aufs/conf.mk                                         |    40 +
 include/fs/aufs/cpup.c                                          |  1442 +
 include/fs/aufs/cpup.h                                          |    99 +
 include/fs/aufs/dbgaufs.c                                       |   437 +
 include/fs/aufs/dbgaufs.h                                       |    48 +
 include/fs/aufs/dcsub.c                                         |   225 +
 include/fs/aufs/dcsub.h                                         |   136 +
 include/fs/aufs/debug.c                                         |   440 +
 include/fs/aufs/debug.h                                         |   225 +
 include/fs/aufs/dentry.c                                        |  1152 +
 include/fs/aufs/dentry.h                                        |   266 +
 include/fs/aufs/dinfo.c                                         |   553 +
 include/fs/aufs/dir.c                                           |   759 +
 include/fs/aufs/dir.h                                           |   131 +
 include/fs/aufs/dirren.c                                        |  1315 +
 include/fs/aufs/dirren.h                                        |   139 +
 include/fs/aufs/dynop.c                                         |   369 +
 include/fs/aufs/dynop.h                                         |    74 +
 include/fs/aufs/export.c                                        |   836 +
 include/fs/aufs/f_op.c                                          |   817 +
 include/fs/aufs/fhsm.c                                          |   427 +
 include/fs/aufs/file.c                                          |   856 +
 include/fs/aufs/file.h                                          |   340 +
 include/fs/aufs/finfo.c                                         |   148 +
 include/fs/aufs/fstype.h                                        |   400 +
 include/fs/aufs/hbl.h                                           |    64 +
 include/fs/aufs/hfsnotify.c                                     |   289 +
 include/fs/aufs/hfsplus.c                                       |    56 +
 include/fs/aufs/hnotify.c                                       |   719 +
 include/fs/aufs/i_op.c                                          |  1459 +
 include/fs/aufs/i_op_add.c                                      |   920 +
 include/fs/aufs/i_op_del.c                                      |   511 +
 include/fs/aufs/i_op_ren.c                                      |  1246 +
 include/fs/aufs/iinfo.c                                         |   285 +
 include/fs/aufs/inode.c                                         |   527 +
 include/fs/aufs/inode.h                                         |   695 +
 include/fs/aufs/ioctl.c                                         |   219 +
 include/fs/aufs/loop.c                                          |   147 +
 include/fs/aufs/loop.h                                          |    52 +
 include/fs/aufs/magic.mk                                        |    31 +
 include/fs/aufs/module.c                                        |   266 +
 include/fs/aufs/module.h                                        |   101 +
 include/fs/aufs/mvdown.c                                        |   704 +
 include/fs/aufs/opts.c                                          |  1891 ++
 include/fs/aufs/opts.h                                          |   224 +
 include/fs/aufs/plink.c                                         |   515 +
 include/fs/aufs/poll.c                                          |    53 +
 include/fs/aufs/posix_acl.c                                     |   102 +
 include/fs/aufs/procfs.c                                        |   170 +
 include/fs/aufs/rdu.c                                           |   381 +
 include/fs/aufs/rwsem.h                                         |    72 +
 include/fs/aufs/sbinfo.c                                        |   304 +
 include/fs/aufs/super.c                                         |  1046 +
 include/fs/aufs/super.h                                         |   626 +
 include/fs/aufs/sysaufs.c                                       |   104 +
 include/fs/aufs/sysaufs.h                                       |   101 +
 include/fs/aufs/sysfs.c                                         |   376 +
 include/fs/aufs/sysrq.c                                         |   159 +
 include/fs/aufs/vdir.c                                          |   893 +
 include/fs/aufs/vfsub.c                                         |   894 +
 include/fs/aufs/vfsub.h                                         |   360 +
 include/fs/aufs/wbr_policy.c                                    |   830 +
 include/fs/aufs/whout.c                                         |  1061 +
 include/fs/aufs/whout.h                                         |    85 +
 include/fs/aufs/wkq.c                                           |   390 +
 include/fs/aufs/wkq.h                                           |    93 +
 include/fs/aufs/xattr.c                                         |   355 +
 include/fs/aufs/xino.c                                          |  1469 +
 include/linux/bio.h                                             |     2 +
 include/linux/blk_types.h                                       |     3 +-
 include/linux/blkdev.h                                          |    10 +-
 include/linux/file.h                                            |     1 +
 include/linux/fs.h                                              |    21 +
 include/linux/fs_uuid.h                                         |    19 +
 include/linux/gfp.h                                             |     6 +-
 include/linux/init_task.h                                       |     4 +
 include/linux/ioprio.h                                          |     2 +
 include/linux/ksm.h                                             |    44 +-
 include/linux/linux_logo.h                                      |    12 +
 include/linux/lockdep.h                                         |     3 +
 include/linux/mm.h                                              |    23 +
 include/linux/mm_types.h                                        |     5 +
 include/linux/mmzone.h                                          |     5 +-
 include/linux/mnt_namespace.h                                   |     3 +
 include/linux/page-flags.h                                      |    25 +-
 include/linux/sched.h                                           |    60 +-
 include/linux/sched/nohz.h                                      |     4 +-
 include/linux/sched/prio.h                                      |    12 +
 include/linux/sched/rt.h                                        |     2 +
 include/linux/sched/task.h                                      |     2 +-
 include/linux/skip_list.h                                       |    33 +
 include/linux/splice.h                                          |     6 +
 include/linux/sradix-tree.h                                     |    77 +
 include/linux/suspend.h                                         |    67 +
 include/linux/swap.h                                            |     7 +
 include/linux/thinkpad_ec.h                                     |    47 +
 include/linux/thread_info.h                                     |     5 +-
 include/linux/tuxonice.h                                        |    48 +
 include/linux/uksm.h                                            |   149 +
 include/trace/events/fs.h                                       |    53 +
 include/trace/events/mmflags.h                                  |    10 +
 include/uapi/linux/aufs_type.h                                  |   447 +
 include/uapi/linux/netlink.h                                    |     4 +-
 include/uapi/linux/sched.h                                      |     9 +-
 include/uapi/linux/vt.h                                         |    15 +-
 init/Kconfig                                                    |    75 +-
 init/do_mounts.c                                                |     2 +
 init/do_mounts_initrd.c                                         |     6 +
 init/init_task.c                                                |    10 +
 init/main.c                                                     |     3 +-
 kernel/Makefile                                                 |     2 +-
 kernel/delayacct.c                                              |     2 +-
 kernel/exit.c                                                   |     4 +-
 kernel/fork.c                                                   |    21 +-
 kernel/kthread.c                                                |    30 +-
 kernel/livepatch/transition.c                                   |     8 +-
 kernel/locking/lockdep.c                                        |     4 +-
 kernel/power/Kconfig                                            |   238 +
 kernel/power/Makefile                                           |    31 +
 kernel/power/hibernate.c                                        |    34 +-
 kernel/power/power.h                                            |    37 +
 kernel/power/snapshot.c                                         |   325 +-
 kernel/power/tuxonice.h                                         |   260 +
 kernel/power/tuxonice_alloc.c                                   |   308 +
 kernel/power/tuxonice_alloc.h                                   |    54 +
 kernel/power/tuxonice_atomic_copy.c                             |   471 +
 kernel/power/tuxonice_atomic_copy.h                             |    25 +
 kernel/power/tuxonice_bio.h                                     |    80 +
 kernel/power/tuxonice_bio_chains.c                              |  1121 +
 kernel/power/tuxonice_bio_core.c                                |  1944 ++
 kernel/power/tuxonice_bio_internal.h                            |   101 +
 kernel/power/tuxonice_bio_signature.c                           |   403 +
 kernel/power/tuxonice_builtin.c                                 |   502 +
 kernel/power/tuxonice_builtin.h                                 |    41 +
 kernel/power/tuxonice_checksum.c                                |   401 +
 kernel/power/tuxonice_checksum.h                                |    31 +
 kernel/power/tuxonice_cluster.c                                 |  1058 +
 kernel/power/tuxonice_cluster.h                                 |    18 +
 kernel/power/tuxonice_compress.c                                |   452 +
 kernel/power/tuxonice_copy_before_write.c                       |   240 +
 kernel/power/tuxonice_extent.c                                  |   144 +
 kernel/power/tuxonice_extent.h                                  |    45 +
 kernel/power/tuxonice_file.c                                    |   484 +
 kernel/power/tuxonice_highlevel.c                               |  1429 +
 kernel/power/tuxonice_incremental.c                             |   402 +
 kernel/power/tuxonice_io.c                                      |  1936 ++
 kernel/power/tuxonice_io.h                                      |    72 +
 kernel/power/tuxonice_modules.c                                 |   520 +
 kernel/power/tuxonice_modules.h                                 |   212 +
 kernel/power/tuxonice_netlink.c                                 |   324 +
 kernel/power/tuxonice_netlink.h                                 |    62 +
 kernel/power/tuxonice_pagedir.c                                 |   345 +
 kernel/power/tuxonice_pagedir.h                                 |    50 +
 kernel/power/tuxonice_pageflags.c                               |    18 +
 kernel/power/tuxonice_pageflags.h                               |   106 +
 kernel/power/tuxonice_power_off.c                               |   286 +
 kernel/power/tuxonice_power_off.h                               |    24 +
 kernel/power/tuxonice_prepare_image.c                           |  1090 +
 kernel/power/tuxonice_prepare_image.h                           |    38 +
 kernel/power/tuxonice_prune.c                                   |   406 +
 kernel/power/tuxonice_storage.c                                 |   282 +
 kernel/power/tuxonice_storage.h                                 |    45 +
 kernel/power/tuxonice_swap.c                                    |   474 +
 kernel/power/tuxonice_sysfs.c                                   |   333 +
 kernel/power/tuxonice_sysfs.h                                   |   137 +
 kernel/power/tuxonice_ui.c                                      |   247 +
 kernel/power/tuxonice_ui.h                                      |    97 +
 kernel/power/tuxonice_userui.c                                  |   658 +
 kernel/rcu/Kconfig                                              |     2 +-
 kernel/sched/Makefile                                           |    14 +-
 kernel/sched/MuQSS.c                                            |  7231 +++++
 kernel/sched/MuQSS.h                                            |   768 +
 kernel/sched/cpufreq_schedutil.c                                |     4 +
 kernel/sched/cputime.c                                          |    22 +-
 kernel/sched/fair.c                                             |    25 +
 kernel/sched/idle.c                                             |    10 +-
 kernel/sched/sched.h                                            |    30 +
 kernel/sched/topology.c                                         |     8 +
 kernel/skip_list.c                                              |   148 +
 kernel/sysctl.c                                                 |    64 +-
 kernel/task_work.c                                              |     1 +
 kernel/time/clockevents.c                                       |     5 +
 kernel/time/posix-cpu-timers.c                                  |     8 +-
 kernel/time/timer.c                                             |     7 +-
 kernel/trace/trace_selftest.c                                   |     5 +
 kernel/user_namespace.c                                         |     7 +
 lib/Kconfig.debug                                               |     3 +-
 lib/Makefile                                                    |     2 +-
 lib/sradix-tree.c                                               |   476 +
 lib/swiotlb.c                                                   |     1 +
 mm/Kconfig                                                      |    26 +
 mm/Makefile                                                     |     5 +-
 mm/filemap.c                                                    |     2 +-
 mm/memory.c                                                     |    36 +-
 mm/mmap.c                                                       |    70 +-
 mm/nommu.c                                                      |    10 +-
 mm/page-writeback.c                                             |     8 +
 mm/prfile.c                                                     |    86 +
 mm/rmap.c                                                       |     4 +-
 mm/swapfile.c                                                   |    64 +-
 mm/uksm.c                                                       |  5584 ++++
 mm/vmscan.c                                                     |    17 +-
 mm/vmstat.c                                                     |     3 +
 net/ipv4/Kconfig                                                |     4 +
 scripts/mkcompile_h                                             |     4 +-
 scripts/tuxonice_output_to_csv.sh                               |    35 +
 security/commoncap.c                                            |     2 +
 security/device_cgroup.c                                        |     2 +
 security/security.c                                             |    10 +
 439 files changed, 203523 insertions(+), 763 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs
new file mode 100644
index 000000000..99642d105
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-aufs
@@ -0,0 +1,50 @@
+What:		/debug/aufs/si_<id>/
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		Under /debug/aufs, a directory named si_<id> is created
+		per aufs mount, where <id> is a unique id generated
+		internally.
+
+What:		/debug/aufs/si_<id>/plink
+Date:		Apr 2013
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It has three lines and shows the information about the
+		pseudo-link. The first line is a single number
+		representing a number of buckets. The second line is a
+		number of pseudo-links per buckets (separated by a
+		blank). The last line is a single number representing a
+		total number of psedo-links.
+		When the aufs mount option 'noplink' is specified, it
+		will show "1\n0\n0\n".
+
+What:		/debug/aufs/si_<id>/xib
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xib (External Inode Number
+		Bitmap), its block size and file size.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
+
+What:		/debug/aufs/si_<id>/xino0, xino1 ... xinoN
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xino (External Inode Number
+		Translation Table), its link count, block size and file
+		size.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
+
+What:		/debug/aufs/si_<id>/xigen
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xigen (External Inode
+		Generation Table), its block size and file size.
+		If CONFIG_AUFS_EXPORT is disabled, this entry will not
+		be created.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs
new file mode 100644
index 000000000..82f951849
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-aufs
@@ -0,0 +1,31 @@
+What:		/sys/fs/aufs/si_<id>/
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		Under /sys/fs/aufs, a directory named si_<id> is created
+		per aufs mount, where <id> is a unique id generated
+		internally.
+
+What:		/sys/fs/aufs/si_<id>/br0, br1 ... brN
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the abolute path of a member directory (which
+		is called branch) in aufs, and its permission.
+
+What:		/sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
+Date:		July 2013
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the id of a member directory (which is called
+		branch) in aufs.
+
+What:		/sys/fs/aufs/si_<id>/xi_path
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the abolute path of XINO (External Inode Number
+		Bitmap, Translation Table and Generation Table) file
+		even if it is the default path.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1d1d53f85..c38f78639 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3812,6 +3812,14 @@
 			Memory area to be used by remote processor image,
 			managed by CMA.
 
+	rqshare=	[X86] Select the MuQSS scheduler runqueue sharing type.
+			Format: <string>
+			smt -- Share SMT (hyperthread) sibling runqueues
+			mc -- Share MC (multicore) sibling runqueues
+			smp -- Share SMP runqueues
+			none -- So not share any runqueues
+			Default value is mc
+
 	rw		[KNL] Mount root device read-write on boot
 
 	S		[KNL] Run init in single mode
@@ -4447,6 +4455,9 @@
 					HIGHMEM regardless of setting
 					of CONFIG_HIGHPTE.
 
+	uuid_debug=	(Boolean) whether to enable debugging of TuxOnIce's
+			uuid support.
+
 	vdso=		[X86,SH]
 			On X86_32, this is an alias for vdso32=.  Otherwise:
 
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 8d8d8f06c..30ef2dba8 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -11,6 +11,15 @@ controllers), BFQ's main features are:
   groups (switching back to time distribution when needed to keep
   throughput high).
 
+If bfq-mq patches have been applied, then the following three
+instances of BFQ are available (otherwise only the first instance):
+- bfq: mainline version of BFQ, for blk-mq
+- bfq-mq: development version of BFQ for blk-mq; this version contains
+   also all latest features and fixes not yet landed in mainline, plus many
+   safety checks
+- bfq-sq: BFQ for legacy blk; also this version contains latest features
+   and fixes, as well as safety checks
+
 In its default configuration, BFQ privileges latency over
 throughput. So, when needed for achieving a lower latency, BFQ builds
 schedules that may lead to a lower throughput. If your main or only
@@ -22,27 +31,42 @@ latency and throughput, or on how to maximize throughput.
 
 BFQ has a non-null overhead, which limits the maximum IOPS that a CPU
 can process for a device scheduled with BFQ. To give an idea of the
-limits on slow or average CPUs, here are, first, the limits of BFQ for
-three different CPUs, on, respectively, an average laptop, an old
-desktop, and a cheap embedded system, in case full hierarchical
-support is enabled (i.e., CONFIG_BFQ_GROUP_IOSCHED is set), but
+limits on slow or average CPUs, here are, first, the limits of bfq-mq
+and bfq for three different CPUs, on, respectively, an average laptop,
+an old desktop, and a cheap embedded system, in case full hierarchical
+support is enabled (i.e., CONFIG_MQ_BFQ_GROUP_IOSCHED is set for
+bfq-mq, or CONFIG_BFQ_GROUP_IOSCHED is set for bfq), but
 CONFIG_DEBUG_BLK_CGROUP is not set (Section 4-2):
 - Intel i7-4850HQ: 400 KIOPS
 - AMD A8-3850: 250 KIOPS
 - ARM CortexTM-A53 Octa-core: 80 KIOPS
 
-If CONFIG_DEBUG_BLK_CGROUP is set (and of course full hierarchical
-support is enabled), then the sustainable throughput with BFQ
-decreases, because all blkio.bfq* statistics are created and updated
-(Section 4-2). For BFQ, this leads to the following maximum
-sustainable throughputs, on the same systems as above:
+As for bfq-sq, it cannot reach the above IOPS, because of the
+inherent, lower parallelism of legacy blk and of the components within
+it (including bfq-sq itself). In particular, results with
+CONFIG_DEBUG_BLK_CGROUP unset are rather fluctuating. The limits
+reported below for the case CONFIG_DEBUG_BLK_CGROUP set will however
+provide a lower bound to the limits of bfq-sq.
+
+Turning back to bfq-mq and bfq, If CONFIG_DEBUG_BLK_CGROUP is set (and
+of course full hierarchical support is enabled), then the sustainable
+throughput with bfq-mq and bfq decreases, because all blkio.bfq*
+statistics are created and updated (Section 4-2).  For bfq-mq and bfq,
+this leads to the following maximum sustainable throughputs, on the
+same systems as above:
 - Intel i7-4850HQ: 310 KIOPS
 - AMD A8-3850: 200 KIOPS
 - ARM CortexTM-A53 Octa-core: 56 KIOPS
 
-BFQ works for multi-queue devices too.
+Finally, if CONFIG_DEBUG_BLK_CGROUP is set (and full hierarchical
+support is enabled), then bfq-sq exhibits the following limits:
+- Intel i7-4850HQ: 250 KIOPS
+- AMD A8-3850: 170 KIOPS
+- ARM CortexTM-A53 Octa-core: 45 KIOPS
+
+BFQ works for multi-queue devices too (bfq and bfq-mq instances).
 
-The table of contents follow. Impatients can just jump to Section 3.
+The table of contents follows. Impatients can just jump to Section 3.
 
 CONTENTS
 
@@ -509,25 +533,27 @@ To get proportional sharing of bandwidth with BFQ for a given device,
 BFQ must of course be the active scheduler for that device.
 
 Within each group directory, the names of the files associated with
-BFQ-specific cgroup parameters and stats begin with the "bfq."
-prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
-BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
-parameter to set the weight of a group with BFQ is blkio.bfq.weight
+BFQ-specific cgroup parameters and stats begin with the "bfq.",
+"bfq-sq." or "bfq-mq." prefix, depending on which instance of bfq you
+want to use. So, with cgroups-v1 or cgroups-v2, the full prefix for
+BFQ-specific files is "blkio.bfqX." or "io.bfqX.", where X can be ""
+(i.e., null string), "-sq" or "-mq". For example, the group parameter
+to set the weight of a group with the mainline BFQ is blkio.bfq.weight
 or io.bfq.weight.
 
 As for cgroups-v1 (blkio controller), the exact set of stat files
-created, and kept up-to-date by bfq, depends on whether
-CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
+created, and kept up-to-date by bfq*, depends on whether
+CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq* creates all
 the stat files documented in
 Documentation/cgroup-v1/blkio-controller.txt. If, instead,
-CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
-blkio.bfq.io_service_bytes
-blkio.bfq.io_service_bytes_recursive
-blkio.bfq.io_serviced
-blkio.bfq.io_serviced_recursive
+CONFIG_DEBUG_BLK_CGROUP is not set, then bfq* creates only the files
+blkio.bfq*.io_service_bytes
+blkio.bfq*.io_service_bytes_recursive
+blkio.bfq*.io_serviced
+blkio.bfq*.io_serviced_recursive
 
 The value of CONFIG_DEBUG_BLK_CGROUP greatly influences the maximum
-throughput sustainable with bfq, because updating the blkio.bfq.*
+throughput sustainable with bfq*, because updating the blkio.bfq*
 stats is rather costly, especially for some of the stats enabled by
 CONFIG_DEBUG_BLK_CGROUP.
 
@@ -536,7 +562,7 @@ Parameters to set
 
 For each group, there is only the following parameter to set.
 
-weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
+weight (namely blkio.bfqX.weight or io.bfqX.weight): the weight of the
 group inside its parent. Available values: 1..10000 (default 100). The
 linear mapping between ioprio and weights, described at the beginning
 of the tunable section, is still valid, but all weights higher than
diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
new file mode 100644
index 000000000..fa82b6394
--- /dev/null
+++ b/Documentation/filesystems/aufs/README
@@ -0,0 +1,393 @@
+
+Aufs4 -- advanced multi layered unification filesystem version 4.x
+http://aufs.sf.net
+Junjiro R. Okajima
+
+
+0. Introduction
+----------------------------------------
+In the early days, aufs was entirely re-designed and re-implemented
+Unionfs Version 1.x series. Adding many original ideas, approaches,
+improvements and implementations, it becomes totally different from
+Unionfs while keeping the basic features.
+Recently, Unionfs Version 2.x series begin taking some of the same
+approaches to aufs1's.
+Unionfs is being developed by Professor Erez Zadok at Stony Brook
+University and his team.
+
+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
+If you want older kernel version support, try aufs2-2.6.git or
+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
+
+Note: it becomes clear that "Aufs was rejected. Let's give it up."
+      According to Christoph Hellwig, linux rejects all union-type
+      filesystems but UnionMount.
+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
+
+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
+    UnionMount, and he pointed out an issue around a directory mutex
+    lock and aufs addressed it. But it is still unsure whether aufs will
+    be merged (or any other union solution).
+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
+
+
+1. Features
+----------------------------------------
+- unite several directories into a single virtual filesystem. The member
+  directory is called as a branch.
+- you can specify the permission flags to the branch, which are 'readonly',
+  'readwrite' and 'whiteout-able.'
+- by upper writable branch, internal copyup and whiteout, files/dirs on
+  readonly branch are modifiable logically.
+- dynamic branch manipulation, add, del.
+- etc...
+
+Also there are many enhancements in aufs, such as:
+- test only the highest one for the directory permission (dirperm1)
+- copyup on open (coo=)
+- 'move' policy for copy-up between two writable branches, after
+  checking free space.
+- xattr, acl
+- readdir(3) in userspace.
+- keep inode number by external inode number table
+- keep the timestamps of file/dir in internal copyup operation
+- seekable directory, supporting NFS readdir.
+- whiteout is hardlinked in order to reduce the consumption of inodes
+  on branch
+- do not copyup, nor create a whiteout when it is unnecessary
+- revert a single systemcall when an error occurs in aufs
+- remount interface instead of ioctl
+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
+- loopback mounted filesystem as a branch
+- kernel thread for removing the dir who has a plenty of whiteouts
+- support copyup sparse file (a file which has a 'hole' in it)
+- default permission flags for branches
+- selectable permission flags for ro branch, whether whiteout can
+  exist or not
+- export via NFS.
+- support <sysfs>/fs/aufs and <debugfs>/aufs.
+- support multiple writable branches, some policies to select one
+  among multiple writable branches.
+- a new semantics for link(2) and rename(2) to support multiple
+  writable branches.
+- no glibc changes are required.
+- pseudo hardlink (hardlink over branches)
+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
+  including NFS or remote filesystem branch.
+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
+- and more...
+
+Currently these features are dropped temporary from aufs4.
+See design/08plan.txt in detail.
+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
+  (robr)
+- statistics of aufs thread (/sys/fs/aufs/stat)
+
+Features or just an idea in the future (see also design/*.txt),
+- reorder the branch index without del/re-add.
+- permanent xino files for NFSD
+- an option for refreshing the opened files after add/del branches
+- light version, without branch manipulation. (unnecessary?)
+- copyup in userspace
+- inotify in userspace
+- readv/writev
+
+
+2. Download
+----------------------------------------
+There are three GIT trees for aufs4, aufs4-linux.git,
+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
+"aufs-util.git."
+While the aufs-util is always necessary, you need either of aufs4-linux
+or aufs4-standalone.
+
+The aufs4-linux tree includes the whole linux mainline GIT tree,
+git://git.kernel.org/.../torvalds/linux.git.
+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
+build aufs4 as an external kernel module.
+Several extra patches are not included in this tree. Only
+aufs4-standalone tree contains them. They are described in the later
+section "Configuration and Compilation."
+
+On the other hand, the aufs4-standalone tree has only aufs source files
+and necessary patches, and you can select CONFIG_AUFS_FS=m.
+But you need to apply all aufs patches manually.
+
+You will find GIT branches whose name is in form of "aufs4.x" where "x"
+represents the linux kernel version, "linux-4.x". For instance,
+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
+"aufs4.x-rcN" branch.
+
+o aufs4-linux tree
+$ git clone --reference /your/linux/git/tree \
+	git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
+- if you don't have linux GIT tree, then remove "--reference ..."
+$ cd aufs4-linux.git
+$ git checkout origin/aufs4.0
+
+Or You may want to directly git-pull aufs into your linux GIT tree, and
+leave the patch-work to GIT.
+$ cd /your/linux/git/tree
+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
+$ git fetch aufs4
+$ git checkout -b my4.0 v4.0
+$ (add your local change...)
+$ git pull aufs4 aufs4.0
+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
+- you may need to solve some conflicts between your_changes and
+  aufs4.0. in this case, git-rerere is recommended so that you can
+  solve the similar conflicts automatically when you upgrade to 4.1 or
+  later in the future.
+
+o aufs4-standalone tree
+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
+$ cd aufs4-standalone.git
+$ git checkout origin/aufs4.0
+
+o aufs-util tree
+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
+- note that the public aufs-util.git is on SourceForge instead of
+  GitHUB.
+$ cd aufs-util.git
+$ git checkout origin/aufs4.0
+
+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
+The minor version number, 'x' in '4.x', of aufs may not always
+follow the minor version number of the kernel.
+Because changes in the kernel that cause the use of a new
+minor version number do not always require changes to aufs-util.
+
+Since aufs-util has its own minor version number, you may not be
+able to find a GIT branch in aufs-util for your kernel's
+exact minor version number.
+In this case, you should git-checkout the branch for the
+nearest lower number.
+
+For (an unreleased) example:
+If you are using "linux-4.10" and the "aufs4.10" branch
+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
+or something numerically smaller is the branch for your kernel.
+
+Also you can view all branches by
+	$ git branch -a
+
+
+3. Configuration and Compilation
+----------------------------------------
+Make sure you have git-checkout'ed the correct branch.
+
+For aufs4-linux tree,
+- enable CONFIG_AUFS_FS.
+- set other aufs configurations if necessary.
+
+For aufs4-standalone tree,
+There are several ways to build.
+
+1.
+- apply ./aufs4-kbuild.patch to your kernel source files.
+- apply ./aufs4-base.patch too.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too, if you have a plan to set
+  CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
+  kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
+- enable CONFIG_AUFS_FS, you can select either
+  =m or =y.
+- and build your kernel as usual.
+- install the built kernel.
+  Note: Since linux-3.9, every filesystem module requires an alias
+  "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+  modules.aliases file if you set CONFIG_AUFS_FS=m.
+- install the header files too by "make headers_install" to the
+  directory where you specify. By default, it is $PWD/usr.
+  "make help" shows a brief note for headers_install.
+- and reboot your system.
+
+2.
+- module only (CONFIG_AUFS_FS=m).
+- apply ./aufs4-base.patch to your kernel source files.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too.
+- build your kernel, don't forget "make headers_install", and reboot.
+- edit ./config.mk and set other aufs configurations if necessary.
+  Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
+  every aufs configurations.
+- build the module by simple "make".
+  Note: Since linux-3.9, every filesystem module requires an alias
+  "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+  modules.aliases file.
+- you can specify ${KDIR} make variable which points to your kernel
+  source tree.
+- install the files
+  + run "make install" to install the aufs module, or copy the built
+    $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
+  + run "make install_headers" (instead of headers_install) to install
+    the modified aufs header file (you can specify DESTDIR which is
+    available in aufs standalone version's Makefile only), or copy
+    $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
+    you like manually. By default, the target directory is $PWD/usr.
+- no need to apply aufs4-kbuild.patch, nor copying source files to your
+  kernel source tree.
+
+Note: The header file aufs_type.h is necessary to build aufs-util
+      as well as "make headers_install" in the kernel source tree.
+      headers_install is subject to be forgotten, but it is essentially
+      necessary, not only for building aufs-util.
+      You may not meet problems without headers_install in some older
+      version though.
+
+And then,
+- read README in aufs-util, build and install it
+- note that your distribution may contain an obsoleted version of
+  aufs_type.h in /usr/include/linux or something. When you build aufs
+  utilities, make sure that your compiler refers the correct aufs header
+  file which is built by "make headers_install."
+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
+  then run "make install_ulib" too. And refer to the aufs manual in
+  detail.
+
+There several other patches in aufs4-standalone.git. They are all
+optional. When you meet some problems, they will help you.
+- aufs4-loopback.patch
+  Supports a nested loopback mount in a branch-fs. This patch is
+  unnecessary until aufs produces a message like "you may want to try
+  another patch for loopback file".
+- vfs-ino.patch
+  Modifies a system global kernel internal function get_next_ino() in
+  order to stop assigning 0 for an inode-number. Not directly related to
+  aufs, but recommended generally.
+- tmpfs-idr.patch
+  Keeps the tmpfs inode number as the lowest value. Effective to reduce
+  the size of aufs XINO files for tmpfs branch. Also it prevents the
+  duplication of inode number, which is important for backup tools and
+  other utilities. When you find aufs XINO files for tmpfs branch
+  growing too much, try this patch.
+- lockdep-debug.patch
+  Because aufs is not only an ordinary filesystem (callee of VFS), but
+  also a caller of VFS functions for branch filesystems, subclassing of
+  the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
+  feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
+  need to apply this debug patch to expand several constant values.
+  If don't know what LOCKDEP, then you don't have apply this patch.
+
+
+4. Usage
+----------------------------------------
+At first, make sure aufs-util are installed, and please read the aufs
+manual, aufs.5 in aufs-util.git tree.
+$ man -l aufs.5
+
+And then,
+$ mkdir /tmp/rw /tmp/aufs
+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
+
+Here is another example. The result is equivalent.
+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
+  Or
+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
+# mount -o remount,append:${HOME} /tmp/aufs
+
+Then, you can see whole tree of your home dir through /tmp/aufs. If
+you modify a file under /tmp/aufs, the one on your home directory is
+not affected, instead the same named file will be newly created under
+/tmp/rw. And all of your modification to a file will be applied to
+the one under /tmp/rw. This is called the file based Copy on Write
+(COW) method.
+Aufs mount options are described in aufs.5.
+If you run chroot or something and make your aufs as a root directory,
+then you need to customize the shutdown script. See the aufs manual in
+detail.
+
+Additionally, there are some sample usages of aufs which are a
+diskless system with network booting, and LiveCD over NFS.
+See sample dir in CVS tree on SourceForge.
+
+
+5. Contact
+----------------------------------------
+When you have any problems or strange behaviour in aufs, please let me
+know with:
+- /proc/mounts (instead of the output of mount(8))
+- /sys/module/aufs/*
+- /sys/fs/aufs/* (if you have them)
+- /debug/aufs/* (if you have them)
+- linux kernel version
+  if your kernel is not plain, for example modified by distributor,
+  the url where i can download its source is necessary too.
+- aufs version which was printed at loading the module or booting the
+  system, instead of the date you downloaded.
+- configuration (define/undefine CONFIG_AUFS_xxx)
+- kernel configuration or /proc/config.gz (if you have it)
+- behaviour which you think to be incorrect
+- actual operation, reproducible one is better
+- mailto: aufs-users at lists.sourceforge.net
+
+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
+and Feature Requests) on SourceForge. Please join and write to
+aufs-users ML.
+
+
+6. Acknowledgements
+----------------------------------------
+Thanks to everyone who have tried and are using aufs, whoever
+have reported a bug or any feedback.
+
+Especially donators:
+Tomas Matejicek(slax.org) made a donation (much more than once).
+	Since Apr 2010, Tomas M (the author of Slax and Linux Live
+	scripts) is making "doubling" donations.
+	Unfortunately I cannot list all of the donators, but I really
+	appreciate.
+	It ends Aug 2010, but the ordinary donation URL is still available.
+	<http://sourceforge.net/donate/index.php?group_id=167503>
+Dai Itasaka made a donation (2007/8).
+Chuck Smith made a donation (2008/4, 10 and 12).
+Henk Schoneveld made a donation (2008/9).
+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
+Francois Dupoux made a donation (2008/11).
+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
+	aufs2 GIT tree (2009/2).
+William Grant made a donation (2009/3).
+Patrick Lane made a donation (2009/4).
+The Mail Archive (mail-archive.com) made donations (2009/5).
+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
+Pavel Pronskiy made a donation (2011/2).
+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
+	Networks (Ed Wildgoose) made a donation for hardware (2011/3).
+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
+11).
+Sam Liddicott made a donation (2011/9).
+Era Scarecrow made a donation (2013/4).
+Bor Ratajc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+POIRETTE Marc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+lauri kasvandik made a donation (2013/5).
+"pemasu from Finland" made a donation (2013/7).
+The Parted Magic Project made a donation (2013/9 and 11).
+Pavel Barta made a donation (2013/10).
+Nikolay Pertsev made a donation (2014/5).
+James B made a donation (2014/7 and 2015/7).
+Stefano Di Biase made a donation (2014/8).
+Daniel Epellei made a donation (2015/1).
+OmegaPhil made a donation (2016/1).
+Tomasz Szewczyk made a donation (2016/4).
+James Burry made a donation (2016/12).
+
+Thank you very much.
+Donations are always, including future donations, very important and
+helpful for me to keep on developing aufs.
+
+
+7.
+----------------------------------------
+If you are an experienced user, no explanation is needed. Aufs is
+just a linux filesystem.
+
+
+Enjoy!
+
+# Local variables: ;
+# mode: text;
+# End: ;
diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt
new file mode 100644
index 000000000..aa1052982
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/01intro.txt
@@ -0,0 +1,171 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Introduction
+----------------------------------------
+
+aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s]
+1. abbrev. for "advanced multi-layered unification filesystem".
+2. abbrev. for "another unionfs".
+3. abbrev. for "auf das" in German which means "on the" in English.
+   Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
+   But "Filesystem aufs Filesystem" is hard to understand.
+4. abbrev. for "African Urban Fashion Show".
+
+AUFS is a filesystem with features:
+- multi layered stackable unification filesystem, the member directory
+  is called as a branch.
+- branch permission and attribute, 'readonly', 'real-readonly',
+  'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
+  combination.
+- internal "file copy-on-write".
+- logical deletion, whiteout.
+- dynamic branch manipulation, adding, deleting and changing permission.
+- allow bypassing aufs, user's direct branch access.
+- external inode number translation table and bitmap which maintains the
+  persistent aufs inode number.
+- seekable directory, including NFS readdir.
+- file mapping, mmap and sharing pages.
+- pseudo-link, hardlink over branches.
+- loopback mounted filesystem as a branch.
+- several policies to select one among multiple writable branches.
+- revert a single systemcall when an error occurs in aufs.
+- and more...
+
+
+Multi Layered Stackable Unification Filesystem
+----------------------------------------------------------------------
+Most people already knows what it is.
+It is a filesystem which unifies several directories and provides a
+merged single directory. When users access a file, the access will be
+passed/re-directed/converted (sorry, I am not sure which English word is
+correct) to the real file on the member filesystem. The member
+filesystem is called 'lower filesystem' or 'branch' and has a mode
+'readonly' and 'readwrite.' And the deletion for a file on the lower
+readonly branch is handled by creating 'whiteout' on the upper writable
+branch.
+
+On LKML, there have been discussions about UnionMount (Jan Blunck,
+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
+different approaches to implement the merged-view.
+The former tries putting it into VFS, and the latter implements as a
+separate filesystem.
+(If I misunderstand about these implementations, please let me know and
+I shall correct it. Because it is a long time ago when I read their
+source files last time).
+
+UnionMount's approach will be able to small, but may be hard to share
+branches between several UnionMount since the whiteout in it is
+implemented in the inode on branch filesystem and always
+shared. According to Bharata's post, readdir does not seems to be
+finished yet.
+There are several missing features known in this implementations such as
+- for users, the inode number may change silently. eg. copy-up.
+- link(2) may break by copy-up.
+- read(2) may get an obsoleted filedata (fstat(2) too).
+- fcntl(F_SETLK) may be broken by copy-up.
+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
+  open(O_RDWR).
+
+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
+merged into mainline. This is another implementation of UnionMount as a
+separated filesystem. All the limitations and known problems which
+UnionMount are equally inherited to "overlay" filesystem.
+
+Unionfs has a longer history. When I started implementing a stackable
+filesystem (Aug 2005), it already existed. It has virtual super_block,
+inode, dentry and file objects and they have an array pointing lower
+same kind objects. After contributing many patches for Unionfs, I
+re-started my project AUFS (Jun 2006).
+
+In AUFS, the structure of filesystem resembles to Unionfs, but I
+implemented my own ideas, approaches and enhancements and it became
+totally different one.
+
+Comparing DM snapshot and fs based implementation
+- the number of bytes to be copied between devices is much smaller.
+- the type of filesystem must be one and only.
+- the fs must be writable, no readonly fs, even for the lower original
+  device. so the compression fs will not be usable. but if we use
+  loopback mount, we may address this issue.
+  for instance,
+	mount /cdrom/squashfs.img /sq
+	losetup /sq/ext2.img
+	losetup /somewhere/cow
+	dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
+- it will be difficult (or needs more operations) to extract the
+  difference between the original device and COW.
+- DM snapshot-merge may help a lot when users try merging. in the
+  fs-layer union, users will use rsync(1).
+
+You may want to read my old paper "Filesystems in LiveCD"
+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
+
+
+Several characters/aspects/persona of aufs
+----------------------------------------------------------------------
+
+Aufs has several characters, aspects or persona.
+1. a filesystem, callee of VFS helper
+2. sub-VFS, caller of VFS helper for branches
+3. a virtual filesystem which maintains persistent inode number
+4. reader/writer of files on branches such like an application
+
+1. Callee of VFS Helper
+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
+unlink(2) from an application reaches sys_unlink() kernel function and
+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
+calls filesystem specific unlink operation. Actually aufs implements the
+unlink operation but it behaves like a redirector.
+
+2. Caller of VFS Helper for Branches
+aufs_unlink() passes the unlink request to the branch filesystem as if
+it were called from VFS. So the called unlink operation of the branch
+filesystem acts as usual. As a caller of VFS helper, aufs should handle
+every necessary pre/post operation for the branch filesystem.
+- acquire the lock for the parent dir on a branch
+- lookup in a branch
+- revalidate dentry on a branch
+- mnt_want_write() for a branch
+- vfs_unlink() for a branch
+- mnt_drop_write() for a branch
+- release the lock on a branch
+
+3. Persistent Inode Number
+One of the most important issue for a filesystem is to maintain inode
+numbers. This is particularly important to support exporting a
+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
+backend block device for its own. But some storage is necessary to
+keep and maintain the inode numbers. It may be a large space and may not
+suit to keep in memory. Aufs rents some space from its first writable
+branch filesystem (by default) and creates file(s) on it. These files
+are created by aufs internally and removed soon (currently) keeping
+opened.
+Note: Because these files are removed, they are totally gone after
+      unmounting aufs. It means the inode numbers are not persistent
+      across unmount or reboot. I have a plan to make them really
+      persistent which will be important for aufs on NFS server.
+
+4. Read/Write Files Internally (copy-on-write)
+Because a branch can be readonly, when you write a file on it, aufs will
+"copy-up" it to the upper writable branch internally. And then write the
+originally requested thing to the file. Generally kernel doesn't
+open/read/write file actively. In aufs, even a single write may cause a
+internal "file copy". This behaviour is very similar to cp(1) command.
+
+Some people may think it is better to pass such work to user space
+helper, instead of doing in kernel space. Actually I am still thinking
+about it. But currently I have implemented it in kernel space.
diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt
new file mode 100644
index 000000000..f5fb6a8ad
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/02struct.txt
@@ -0,0 +1,258 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Basic Aufs Internal Structure
+
+Superblock/Inode/Dentry/File Objects
+----------------------------------------------------------------------
+As like an ordinary filesystem, aufs has its own
+superblock/inode/dentry/file objects. All these objects have a
+dynamically allocated array and store the same kind of pointers to the
+lower filesystem, branch.
+For example, when you build a union with one readwrite branch and one
+readonly, mounted /au, /rw and /ro respectively.
+- /au = /rw + /ro
+- /ro/fileA exists but /rw/fileA
+
+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
+pointers are stored in a aufs dentry. The array in aufs dentry will be,
+- [0] = NULL (because /rw/fileA doesn't exist)
+- [1] = /ro/fileA
+
+This style of an array is essentially same to the aufs
+superblock/inode/dentry/file objects.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+branches dynamically, these objects has its own generation. When
+branches are changed, the generation in aufs superblock is
+incremented. And a generation in other object are compared when it is
+accessed. When a generation in other objects are obsoleted, aufs
+refreshes the internal array.
+
+
+Superblock
+----------------------------------------------------------------------
+Additionally aufs superblock has some data for policies to select one
+among multiple writable branches, XIB files, pseudo-links and kobject.
+See below in detail.
+About the policies which supports copy-down a directory, see
+wbr_policy.txt too.
+
+
+Branch and XINO(External Inode Number Translation Table)
+----------------------------------------------------------------------
+Every branch has its own xino (external inode number translation table)
+file. The xino file is created and unlinked by aufs internally. When two
+members of a union exist on the same filesystem, they share the single
+xino file.
+The struct of a xino file is simple, just a sequence of aufs inode
+numbers which is indexed by the lower inode number.
+In the above sample, assume the inode number of /ro/fileA is i111 and
+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
+
+When the inode numbers are not contiguous, the xino file will be sparse
+which has a hole in it and doesn't consume as much disk space as it
+might appear. If your branch filesystem consumes disk space for such
+holes, then you should specify 'xino=' option at mounting aufs.
+
+Aufs has a mount option to free the disk blocks for such holes in XINO
+files on tmpfs or ramdisk. But it is not so effective actually. If you
+meet a problem of disk shortage due to XINO files, then you should try
+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
+The patch localizes the assignment inumbers per tmpfs-mount and avoid
+the holes in XINO files.
+
+Also a writable branch has three kinds of "whiteout bases". All these
+are existed when the branch is joined to aufs, and their names are
+whiteout-ed doubly, so that users will never see their names in aufs
+hierarchy.
+1. a regular file which will be hardlinked to all whiteouts.
+2. a directory to store a pseudo-link.
+3. a directory to store an "orphan"-ed file temporary.
+
+1. Whiteout Base
+   When you remove a file on a readonly branch, aufs handles it as a
+   logical deletion and creates a whiteout on the upper writable branch
+   as a hardlink of this file in order not to consume inode on the
+   writable branch.
+2. Pseudo-link Dir
+   See below, Pseudo-link.
+3. Step-Parent Dir
+   When "fileC" exists on the lower readonly branch only and it is
+   opened and removed with its parent dir, and then user writes
+   something into it, then aufs copies-up fileC to this
+   directory. Because there is no other dir to store fileC. After
+   creating a file under this dir, the file is unlinked.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+dynamically, a branch has its own id. When the branch order changes,
+aufs finds the new index by searching the branch id.
+
+
+Pseudo-link
+----------------------------------------------------------------------
+Assume "fileA" exists on the lower readonly branch only and it is
+hardlinked to "fileB" on the branch. When you write something to fileA,
+aufs copies-up it to the upper writable branch. Additionally aufs
+creates a hardlink under the Pseudo-link Directory of the writable
+branch. The inode of a pseudo-link is kept in aufs super_block as a
+simple list. If fileB is read after unlinking fileA, aufs returns
+filedata from the pseudo-link instead of the lower readonly
+branch. Because the pseudo-link is based upon the inode, to keep the
+inode number by xino (see above) is essentially necessary.
+
+All the hardlinks under the Pseudo-link Directory of the writable branch
+should be restored in a proper location later. Aufs provides a utility
+to do this. The userspace helpers executed at remounting and unmounting
+aufs by default.
+During this utility is running, it puts aufs into the pseudo-link
+maintenance mode. In this mode, only the process which began the
+maintenance mode (and its child processes) is allowed to operate in
+aufs. Some other processes which are not related to the pseudo-link will
+be allowed to run too, but the rest have to return an error or wait
+until the maintenance mode ends. If a process already acquires an inode
+mutex (in VFS), it has to return an error.
+
+
+XIB(external inode number bitmap)
+----------------------------------------------------------------------
+Addition to the xino file per a branch, aufs has an external inode number
+bitmap in a superblock object. It is also an internal file such like a
+xino file.
+It is a simple bitmap to mark whether the aufs inode number is in-use or
+not.
+To reduce the file I/O, aufs prepares a single memory page to cache xib.
+
+As well as XINO files, aufs has a feature to truncate/refresh XIB to
+reduce the number of consumed disk blocks for these files.
+
+
+Virtual or Vertical Dir, and Readdir in Userspace
+----------------------------------------------------------------------
+In order to support multiple layers (branches), aufs readdir operation
+constructs a virtual dir block on memory. For readdir, aufs calls
+vfs_readdir() internally for each dir on branches, merges their entries
+with eliminating the whiteout-ed ones, and sets it to file (dir)
+object. So the file object has its entry list until it is closed. The
+entry list will be updated when the file position is zero and becomes
+obsoleted. This decision is made in aufs automatically.
+
+The dynamically allocated memory block for the name of entries has a
+unit of 512 bytes (by default) and stores the names contiguously (no
+padding). Another block for each entry is handled by kmem_cache too.
+During building dir blocks, aufs creates hash list and judging whether
+the entry is whiteouted by its upper branch or already listed.
+The merged result is cached in the corresponding inode object and
+maintained by a customizable life-time option.
+
+Some people may call it can be a security hole or invite DoS attack
+since the opened and once readdir-ed dir (file object) holds its entry
+list and becomes a pressure for system memory. But I'd say it is similar
+to files under /proc or /sys. The virtual files in them also holds a
+memory page (generally) while they are opened. When an idea to reduce
+memory for them is introduced, it will be applied to aufs too.
+For those who really hate this situation, I've developed readdir(3)
+library which operates this merging in userspace. You just need to set
+LD_PRELOAD environment variable, and aufs will not consume no memory in
+kernel space for readdir(3).
+
+
+Workqueue
+----------------------------------------------------------------------
+Aufs sometimes requires privilege access to a branch. For instance,
+in copy-up/down operation. When a user process is going to make changes
+to a file which exists in the lower readonly branch only, and the mode
+of one of ancestor directories may not be writable by a user
+process. Here aufs copy-up the file with its ancestors and they may
+require privilege to set its owner/group/mode/etc.
+This is a typical case of a application character of aufs (see
+Introduction).
+
+Aufs uses workqueue synchronously for this case. It creates its own
+workqueue. The workqueue is a kernel thread and has privilege. Aufs
+passes the request to call mkdir or write (for example), and wait for
+its completion. This approach solves a problem of a signal handler
+simply.
+If aufs didn't adopt the workqueue and changed the privilege of the
+process, then the process may receive the unexpected SIGXFSZ or other
+signals.
+
+Also aufs uses the system global workqueue ("events" kernel thread) too
+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
+whiteout base and etc. This is unrelated to a privilege.
+Most of aufs operation tries acquiring a rw_semaphore for aufs
+superblock at the beginning, at the same time waits for the completion
+of all queued asynchronous tasks.
+
+
+Whiteout
+----------------------------------------------------------------------
+The whiteout in aufs is very similar to Unionfs's. That is represented
+by its filename. UnionMount takes an approach of a file mode, but I am
+afraid several utilities (find(1) or something) will have to support it.
+
+Basically the whiteout represents "logical deletion" which stops aufs to
+lookup further, but also it represents "dir is opaque" which also stop
+further lookup.
+
+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
+In order to make several functions in a single systemcall to be
+revertible, aufs adopts an approach to rename a directory to a temporary
+unique whiteouted name.
+For example, in rename(2) dir where the target dir already existed, aufs
+renames the target dir to a temporary unique whiteouted name before the
+actual rename on a branch, and then handles other actions (make it opaque,
+update the attributes, etc). If an error happens in these actions, aufs
+simply renames the whiteouted name back and returns an error. If all are
+succeeded, aufs registers a function to remove the whiteouted unique
+temporary name completely and asynchronously to the system global
+workqueue.
+
+
+Copy-up
+----------------------------------------------------------------------
+It is a well-known feature or concept.
+When user modifies a file on a readonly branch, aufs operate "copy-up"
+internally and makes change to the new file on the upper writable branch.
+When the trigger systemcall does not update the timestamps of the parent
+dir, aufs reverts it after copy-up.
+
+
+Move-down (aufs3.9 and later)
+----------------------------------------------------------------------
+"Copy-up" is one of the essential feature in aufs. It copies a file from
+the lower readonly branch to the upper writable branch when a user
+changes something about the file.
+"Move-down" is an opposite action of copy-up. Basically this action is
+ran manually instead of automatically and internally.
+For desgin and implementation, aufs has to consider these issues.
+- whiteout for the file may exist on the lower branch.
+- ancestor directories may not exist on the lower branch.
+- diropq for the ancestor directories may exist on the upper branch.
+- free space on the lower branch will reduce.
+- another access to the file may happen during moving-down, including
+  UDBA (see "Revalidate Dentry and UDBA").
+- the file should not be hard-linked nor pseudo-linked. they should be
+  handled by auplink utility later.
+
+Sometimes users want to move-down a file from the upper writable branch
+to the lower readonly or writable branch. For instance,
+- the free space of the upper writable branch is going to run out.
+- create a new intermediate branch between the upper and lower branch.
+- etc.
+
+For this purpose, use "aumvdown" command in aufs-util.git.
diff --git a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt
new file mode 100644
index 000000000..1b0699f8b
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/03atomic_open.txt
@@ -0,0 +1,85 @@
+
+# Copyright (C) 2015-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Support for a branch who has its ->atomic_open()
+----------------------------------------------------------------------
+The filesystems who implement its ->atomic_open() are not majority. For
+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
+sure whether all filesystems who have ->atomic_open() behave like this,
+but NFSv4 surely returns the error.
+
+In order to support ->atomic_open() for aufs, there are a few
+approaches.
+
+A. Introduce aufs_atomic_open()
+   - calls one of VFS:do_last(), lookup_open() or atomic_open() for
+     branch fs.
+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
+   an aufs user Pip Cet's approach
+   - calls aufs_create(), VFS finish_open() and notify_change().
+   - pass fake-mode to finish_open(), and then correct the mode by
+     notify_change().
+C. Extend aufs_open() to call branch fs's ->atomic_open()
+   - no aufs_atomic_open().
+   - aufs_lookup() registers the TID to an aufs internal object.
+   - aufs_create() does nothing when the matching TID is registered, but
+     registers the mode.
+   - aufs_open() calls branch fs's ->atomic_open() when the matching
+     TID is registered.
+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
+   credential
+   - no aufs_atomic_open().
+   - aufs_create() registers the TID to an internal object. this info
+     represents "this process created this file just now."
+   - when aufs gets EACCES from branch fs's ->open(), then confirm the
+     registered TID and re-try open() with superuser's credential.
+
+Pros and cons for each approach.
+
+A.
+   - straightforward but highly depends upon VFS internal.
+   - the atomic behavaiour is kept.
+   - some of parameters such as nameidata are hard to reproduce for
+     branch fs.
+   - large overhead.
+B.
+   - easy to implement.
+   - the atomic behavaiour is lost.
+C.
+   - the atomic behavaiour is kept.
+   - dirty and tricky.
+   - VFS checks whether the file is created correctly after calling
+     ->create(), which means this approach doesn't work.
+D.
+   - easy to implement.
+   - the atomic behavaiour is lost.
+   - to open a file with superuser's credential and give it to a user
+     process is a bad idea, since the file object keeps the credential
+     in it. It may affect LSM or something. This approach doesn't work
+     either.
+
+The approach A is ideal, but it hard to implement. So here is a
+variation of A, which is to be implemented.
+
+A-1. Introduce aufs_atomic_open()
+     - calls branch fs ->atomic_open() if exists. otherwise calls
+       vfs_create() and finish_open().
+     - the demerit is that the several checks after branch fs
+       ->atomic_open() are lost. in the ordinary case, the checks are
+       done by VFS:do_last(), lookup_open() and atomic_open(). some can
+       be implemented in aufs, but not all I am afraid.
diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt
new file mode 100644
index 000000000..80ae63bce
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/03lookup.txt
@@ -0,0 +1,113 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Lookup in a Branch
+----------------------------------------------------------------------
+Since aufs has a character of sub-VFS (see Introduction), it operates
+lookup for branches as VFS does. It may be a heavy work. But almost all
+lookup operation in aufs is the simplest case, ie. lookup only an entry
+directly connected to its parent. Digging down the directory hierarchy
+is unnecessary. VFS has a function lookup_one_len() for that use, and
+aufs calls it.
+
+When a branch is a remote filesystem, aufs basically relies upon its
+->d_revalidate(), also aufs forces the hardest revalidate tests for
+them.
+For d_revalidate, aufs implements three levels of revalidate tests. See
+"Revalidate Dentry and UDBA" in detail.
+
+
+Test Only the Highest One for the Directory Permission (dirperm1 option)
+----------------------------------------------------------------------
+Let's try case study.
+- aufs has two branches, upper readwrite and lower readonly.
+  /au = /rw + /ro
+- "dirA" exists under /ro, but /rw. and its mode is 0700.
+- user invoked "chmod a+rx /au/dirA"
+- the internal copy-up is activated and "/rw/dirA" is created and its
+  permission bits are set to world readable.
+- then "/au/dirA" becomes world readable?
+
+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
+or it may be a natively readonly filesystem. If aufs respects the lower
+branch, it should not respond readdir request from other users. But user
+allowed it by chmod. Should really aufs rejects showing the entries
+under /ro/dirA?
+
+To be honest, I don't have a good solution for this case. So aufs
+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
+users.
+When dirperm1 is specified, aufs checks only the highest one for the
+directory permission, and shows the entries. Otherwise, as usual, checks
+every dir existing on all branches and rejects the request.
+
+As a side effect, dirperm1 option improves the performance of aufs
+because the number of permission check is reduced when the number of
+branch is many.
+
+
+Revalidate Dentry and UDBA (User's Direct Branch Access)
+----------------------------------------------------------------------
+Generally VFS helpers re-validate a dentry as a part of lookup.
+0. digging down the directory hierarchy.
+1. lock the parent dir by its i_mutex.
+2. lookup the final (child) entry.
+3. revalidate it.
+4. call the actual operation (create, unlink, etc.)
+5. unlock the parent dir
+
+If the filesystem implements its ->d_revalidate() (step 3), then it is
+called. Actually aufs implements it and checks the dentry on a branch is
+still valid.
+But it is not enough. Because aufs has to release the lock for the
+parent dir on a branch at the end of ->lookup() (step 2) and
+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
+held by VFS.
+If the file on a branch is changed directly, eg. bypassing aufs, after
+aufs released the lock, then the subsequent operation may cause
+something unpleasant result.
+
+This situation is a result of VFS architecture, ->lookup() and
+->d_revalidate() is separated. But I never say it is wrong. It is a good
+design from VFS's point of view. It is just not suitable for sub-VFS
+character in aufs.
+
+Aufs supports such case by three level of revalidation which is
+selectable by user.
+1. Simple Revalidate
+   Addition to the native flow in VFS's, confirm the child-parent
+   relationship on the branch just after locking the parent dir on the
+   branch in the "actual operation" (step 4). When this validation
+   fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
+   checks the validation of the dentry on branches.
+2. Monitor Changes Internally by Inotify/Fsnotify
+   Addition to above, in the "actual operation" (step 4) aufs re-lookup
+   the dentry on the branch, and returns EBUSY if it finds different
+   dentry.
+   Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
+   during it is in cache. When the event is notified, aufs registers a
+   function to kernel 'events' thread by schedule_work(). And the
+   function sets some special status to the cached aufs dentry and inode
+   private data. If they are not cached, then aufs has nothing to
+   do. When the same file is accessed through aufs (step 0-3) later,
+   aufs will detect the status and refresh all necessary data.
+   In this mode, aufs has to ignore the event which is fired by aufs
+   itself.
+3. No Extra Validation
+   This is the simplest test and doesn't add any additional revalidation
+   test, and skip the revalidation in step 4. It is useful and improves
+   aufs performance when system surely hide the aufs branches from user,
+   by over-mounting something (or another method).
diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt
new file mode 100644
index 000000000..0c1289737
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/04branch.txt
@@ -0,0 +1,74 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Branch Manipulation
+
+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
+and changing its permission/attribute, there are a lot of works to do.
+
+
+Add a Branch
+----------------------------------------------------------------------
+o Confirm the adding dir exists outside of aufs, including loopback
+  mount, and its various attributes.
+o Initialize the xino file and whiteout bases if necessary.
+  See struct.txt.
+
+o Check the owner/group/mode of the directory
+  When the owner/group/mode of the adding directory differs from the
+  existing branch, aufs issues a warning because it may impose a
+  security risk.
+  For example, when a upper writable branch has a world writable empty
+  top directory, a malicious user can create any files on the writable
+  branch directly, like copy-up and modify manually. If something like
+  /etc/{passwd,shadow} exists on the lower readonly branch but the upper
+  writable branch, and the writable branch is world-writable, then a
+  malicious guy may create /etc/passwd on the writable branch directly
+  and the infected file will be valid in aufs.
+  I am afraid it can be a security issue, but aufs can do nothing except
+  producing a warning.
+
+
+Delete a Branch
+----------------------------------------------------------------------
+o Confirm the deleting branch is not busy
+  To be general, there is one merit to adopt "remount" interface to
+  manipulate branches. It is to discard caches. At deleting a branch,
+  aufs checks the still cached (and connected) dentries and inodes. If
+  there are any, then they are all in-use. An inode without its
+  corresponding dentry can be alive alone (for example, inotify/fsnotify case).
+
+  For the cached one, aufs checks whether the same named entry exists on
+  other branches.
+  If the cached one is a directory, because aufs provides a merged view
+  to users, as long as one dir is left on any branch aufs can show the
+  dir to users. In this case, the branch can be removed from aufs.
+  Otherwise aufs rejects deleting the branch.
+
+  If any file on the deleting branch is opened by aufs, then aufs
+  rejects deleting.
+
+
+Modify the Permission of a Branch
+----------------------------------------------------------------------
+o Re-initialize or remove the xino file and whiteout bases if necessary.
+  See struct.txt.
+
+o rw --> ro: Confirm the modifying branch is not busy
+  Aufs rejects the request if any of these conditions are true.
+  - a file on the branch is mmap-ed.
+  - a regular file on the branch is opened for write and there is no
+    same named entry on the upper branch.
diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt
new file mode 100644
index 000000000..cc5b7979c
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt
@@ -0,0 +1,64 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Policies to Select One among Multiple Writable Branches
+----------------------------------------------------------------------
+When the number of writable branch is more than one, aufs has to decide
+the target branch for file creation or copy-up. By default, the highest
+writable branch which has the parent (or ancestor) dir of the target
+file is chosen (top-down-parent policy).
+By user's request, aufs implements some other policies to select the
+writable branch, for file creation several policies, round-robin,
+most-free-space, and other policies. For copy-up, top-down-parent,
+bottom-up-parent, bottom-up and others.
+
+As expected, the round-robin policy selects the branch in circular. When
+you have two writable branches and creates 10 new files, 5 files will be
+created for each branch. mkdir(2) systemcall is an exception. When you
+create 10 new directories, all will be created on the same branch.
+And the most-free-space policy selects the one which has most free
+space among the writable branches. The amount of free space will be
+checked by aufs internally, and users can specify its time interval.
+
+The policies for copy-up is more simple,
+top-down-parent is equivalent to the same named on in create policy,
+bottom-up-parent selects the writable branch where the parent dir
+exists and the nearest upper one from the copyup-source,
+bottom-up selects the nearest upper writable branch from the
+copyup-source, regardless the existence of the parent dir.
+
+There are some rules or exceptions to apply these policies.
+- If there is a readonly branch above the policy-selected branch and
+  the parent dir is marked as opaque (a variation of whiteout), or the
+  target (creating) file is whiteout-ed on the upper readonly branch,
+  then the result of the policy is ignored and the target file will be
+  created on the nearest upper writable branch than the readonly branch.
+- If there is a writable branch above the policy-selected branch and
+  the parent dir is marked as opaque or the target file is whiteouted
+  on the branch, then the result of the policy is ignored and the target
+  file will be created on the highest one among the upper writable
+  branches who has diropq or whiteout. In case of whiteout, aufs removes
+  it as usual.
+- link(2) and rename(2) systemcalls are exceptions in every policy.
+  They try selecting the branch where the source exists as possible
+  since copyup a large file will take long time. If it can't be,
+  ie. the branch where the source exists is readonly, then they will
+  follow the copyup policy.
+- There is an exception for rename(2) when the target exists.
+  If the rename target exists, aufs compares the index of the branches
+  where the source and the target exists and selects the higher
+  one. If the selected branch is readonly, then aufs follows the
+  copyup policy.
diff --git a/Documentation/filesystems/aufs/design/06dirren.dot b/Documentation/filesystems/aufs/design/06dirren.dot
new file mode 100644
index 000000000..2d62bb6dd
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06dirren.dot
@@ -0,0 +1,31 @@
+
+// to view this graph, run dot(1) command in GRAPHVIZ.
+
+digraph G {
+node [shape=box];
+whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"];
+
+node [shape=oval];
+
+aufs_rename -> whinfo [label="store/remove"];
+
+node [shape=oval];
+inode_list [label="h_inum list in branch\ncache"];
+
+node [shape=box];
+whinode [label="h_inum list file"];
+
+node [shape=oval];
+brmgmt [label="br_add/del/mod/umount"];
+
+brmgmt -> inode_list [label="create/remove"];
+brmgmt -> whinode [label="load/store"];
+
+inode_list -> whinode [style=dashed,dir=both];
+
+aufs_rename -> inode_list [label="add/del"];
+
+aufs_lookup -> inode_list [label="search"];
+
+aufs_lookup -> whinfo [label="load/remove"];
+}
diff --git a/Documentation/filesystems/aufs/design/06dirren.txt b/Documentation/filesystems/aufs/design/06dirren.txt
new file mode 100644
index 000000000..1f3cb86d9
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06dirren.txt
@@ -0,0 +1,102 @@
+
+# Copyright (C) 2017-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Special handling for renaming a directory (DIRREN)
+----------------------------------------------------------------------
+First, let's assume we have a simple usecase.
+
+- /u = /rw + /ro
+- /rw/dirA exists
+- /ro/dirA and /ro/dirA/file exist too
+- there is no dirB on both branches
+- a user issues rename("dirA", "dirB")
+
+Now, what should aufs behave against this rename(2)?
+There are a few possible cases.
+
+A. returns EROFS.
+   since dirA exists on a readonly branch which cannot be renamed.
+B. returns EXDEV.
+   it is possible to copy-up dirA (only the dir itself), but the child
+   entries ("file" in this case) should not be. it must be a bad
+   approach to copy-up recursively.
+C. returns a success.
+   even the branch /ro is readonly, aufs tries renaming it. Obviously it
+   is a violation of aufs' policy.
+D. construct an extra information which indicates that /ro/dirA should
+   be handled as the name of dirB.
+   overlayfs has a similar feature called REDIRECT.
+
+Until now, aufs implements the case B only which returns EXDEV, and
+expects the userspace application behaves like mv(1) which tries
+issueing rename(2) recursively.
+
+A new aufs feature called DIRREN is introduced which implements the case
+D. There are several "extra information" added.
+
+1. detailed info per renamed directory
+   path: /rw/dirB/$AUFS_WH_DR_INFO_PFX.<lower branch-id>
+2. the inode-number list of directories on a branch
+   path: /rw/dirB/$AUFS_WH_DR_BRHINO
+
+The filename of "detailed info per directory" represents the lower
+branch, and its format is
+- a type of the branch id
+  one of these.
+  + uuid (not implemented yet)
+  + fsid
+  + dev
+- the inode-number of the branch root dir
+
+And it contains these info in a single regular file.
+- magic number
+- branch's inode-number of the logically renamed dir
+- the name of the before-renamed dir
+
+The "detailed info per directory" file is created in aufs rename(2), and
+loaded in any lookup.
+The info is considered in lookup for the matching case only. Here
+"matching" means that the root of branch (in the info filename) is same
+to the current looking-up branch. After looking-up the before-renamed
+name, the inode-number is compared. And the matched dentry is used.
+
+The "inode-number list of directories" is a regular file which contains
+simply the inode-numbers on the branch. The file is created or updated
+in removing the branch, and loaded in adding the branch. Its lifetime is
+equal to the branch.
+The list is refered in lookup, and when the current target inode is
+found in the list, the aufs tries loading the "detailed info per
+directory" and get the changed and valid name of the dir.
+
+Theoretically these "extra informaiton" may be able to be put into XATTR
+in the dir inode. But aufs doesn't choose this way because
+1. XATTR may not be supported by the branch (or its configuration)
+2. XATTR may have its size limit.
+3. XATTR may be less easy to convert than a regular file, when the
+   format of the info is changed in the future.
+At the same time, I agree that the regular file approach is much slower
+than XATTR approach. So, in the future, aufs may take the XATTR or other
+better approach.
+
+This DIRREN feature is enabled by aufs configuration, and is activated
+by a new mount option.
+
+For the more complicated case, there is a work with UDBA option, which
+is to dected the direct access to the branches (by-passing aufs) and to
+maintain the cashes in aufs. Since a single cached aufs dentry may
+contains two names, before- and after-rename, the name comparision in
+UDBA handler may not work correctly. In this case, the behaviour will be
+equivalen to udba=reval case.
diff --git a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt
new file mode 100644
index 000000000..ddfebecdf
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06fhsm.txt
@@ -0,0 +1,120 @@
+
+# Copyright (C) 2011-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+File-based Hierarchical Storage Management (FHSM)
+----------------------------------------------------------------------
+Hierarchical Storage Management (or HSM) is a well-known feature in the
+storage world. Aufs provides this feature as file-based with multiple
+writable branches, based upon the principle of "Colder, the Lower".
+Here the word "colder" means that the less used files, and "lower" means
+that the position in the order of the stacked branches vertically.
+These multiple writable branches are prioritized, ie. the topmost one
+should be the fastest drive and be used heavily.
+
+o Characters in aufs FHSM story
+- aufs itself and a new branch attribute.
+- a new ioctl interface to move-down and to establish a connection with
+  the daemon ("move-down" is a converse of "copy-up").
+- userspace tool and daemon.
+
+The userspace daemon establishes a connection with aufs and waits for
+the notification. The notified information is very similar to struct
+statfs containing the number of consumed blocks and inodes.
+When the consumed blocks/inodes of a branch exceeds the user-specified
+upper watermark, the daemon activates its move-down process until the
+consumed blocks/inodes reaches the user-specified lower watermark.
+
+The actual move-down is done by aufs based upon the request from
+user-space since we need to maintain the inode number and the internal
+pointer arrays in aufs.
+
+Currently aufs FHSM handles the regular files only. Additionally they
+must not be hard-linked nor pseudo-linked.
+
+
+o Cowork of aufs and the user-space daemon
+  During the userspace daemon established the connection, aufs sends a
+  small notification to it whenever aufs writes something into the
+  writable branch. But it may cost high since aufs issues statfs(2)
+  internally. So user can specify a new option to cache the
+  info. Actually the notification is controlled by these factors.
+  + the specified cache time.
+  + classified as "force" by aufs internally.
+  Until the specified time expires, aufs doesn't send the info
+  except the forced cases. When aufs decide forcing, the info is always
+  notified to userspace.
+  For example, the number of free inodes is generally large enough and
+  the shortage of it happens rarely. So aufs doesn't force the
+  notification when creating a new file, directory and others. This is
+  the typical case which aufs doesn't force.
+  When aufs writes the actual filedata and the files consumes any of new
+  blocks, the aufs forces notifying.
+
+
+o Interfaces in aufs
+- New branch attribute.
+  + fhsm
+    Specifies that the branch is managed by FHSM feature. In other word,
+    participant in the FHSM.
+    When nofhsm is set to the branch, it will not be the source/target
+    branch of the move-down operation. This attribute is set
+    independently from coo and moo attributes, and if you want full
+    FHSM, you should specify them as well.
+- New mount option.
+  + fhsm_sec
+    Specifies a second to suppress many less important info to be
+    notified.
+- New ioctl.
+  + AUFS_CTL_FHSM_FD
+    create a new file descriptor which userspace can read the notification
+    (a subset of struct statfs) from aufs.
+- Module parameter 'brs'
+  It has to be set to 1. Otherwise the new mount option 'fhsm' will not
+  be set.
+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
+  When there are two or more branches with fhsm attributes,
+  /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
+  terminates it. As a result of remounting and branch-manipulation, the
+  number of branches with fhsm attribute can be one. In this case,
+  /sbin/mount.aufs will terminate the user-space daemon.
+
+
+Finally the operation is done as these steps in kernel-space.
+- make sure that,
+  + no one else is using the file.
+  + the file is not hard-linked.
+  + the file is not pseudo-linked.
+  + the file is a regular file.
+  + the parent dir is not opaqued.
+- find the target writable branch.
+- make sure the file is not whiteout-ed by the upper (than the target)
+  branch.
+- make the parent dir on the target branch.
+- mutex lock the inode on the branch.
+- unlink the whiteout on the target branch (if exists).
+- lookup and create the whiteout-ed temporary name on the target branch.
+- copy the file as the whiteout-ed temporary name on the target branch.
+- rename the whiteout-ed temporary name to the original name.
+- unlink the file on the source branch.
+- maintain the internal pointer array and the external inode number
+  table (XINO).
+- maintain the timestamps and other attributes of the parent dir and the
+  file.
+
+And of course, in every step, an error may happen. So the operation
+should restore the original file state after an error happens.
diff --git a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt
new file mode 100644
index 000000000..9a0a096b1
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06mmap.txt
@@ -0,0 +1,72 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+mmap(2) -- File Memory Mapping
+----------------------------------------------------------------------
+In aufs, the file-mapped pages are handled by a branch fs directly, no
+interaction with aufs. It means aufs_mmap() calls the branch fs's
+->mmap().
+This approach is simple and good, but there is one problem.
+Under /proc, several entries show the mmapped files by its path (with
+device and inode number), and the printed path will be the path on the
+branch fs's instead of virtual aufs's.
+This is not a problem in most cases, but some utilities lsof(1) (and its
+user) may expect the path on aufs.
+
+To address this issue, aufs adds a new member called vm_prfile in struct
+vm_area_struct (and struct vm_region). The original vm_file points to
+the file on the branch fs in order to handle everything correctly as
+usual. The new vm_prfile points to a virtual file in aufs, and the
+show-functions in procfs refers to vm_prfile if it is set.
+Also we need to maintain several other places where touching vm_file
+such like
+- fork()/clone() copies vma and the reference count of vm_file is
+  incremented.
+- merging vma maintains the ref count too.
+
+This is not a good approach. It just fakes the printed path. But it
+leaves all behaviour around f_mapping unchanged. This is surely an
+advantage.
+Actually aufs had adopted another complicated approach which calls
+generic_file_mmap() and handles struct vm_operations_struct. In this
+approach, aufs met a hard problem and I could not solve it without
+switching the approach.
+
+There may be one more another approach which is
+- bind-mount the branch-root onto the aufs-root internally
+- grab the new vfsmount (ie. struct mount)
+- lazy-umount the branch-root internally
+- in open(2) the aufs-file, open the branch-file with the hidden
+  vfsmount (instead of the original branch's vfsmount)
+- ideally this "bind-mount and lazy-umount" should be done atomically,
+  but it may be possible from userspace by the mount helper.
+
+Adding the internal hidden vfsmount and using it in opening a file, the
+file path under /proc will be printed correctly. This approach looks
+smarter, but is not possible I am afraid.
+- aufs-root may be bind-mount later. when it happens, another hidden
+  vfsmount will be required.
+- it is hard to get the chance to bind-mount and lazy-umount
+  + in kernel-space, FS can have vfsmount in open(2) via
+    file->f_path, and aufs can know its vfsmount. But several locks are
+    already acquired, and if aufs tries to bind-mount and lazy-umount
+    here, then it may cause a deadlock.
+  + in user-space, bind-mount doesn't invoke the mount helper.
+- since /proc shows dev and ino, aufs has to give vma these info. it
+  means a new member vm_prinode will be necessary. this is essentially
+  equivalent to vm_prfile described above.
+
+I have to give up this "looks-smater" approach.
diff --git a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt
new file mode 100644
index 000000000..be441e8d3
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06xattr.txt
@@ -0,0 +1,96 @@
+
+# Copyright (C) 2014-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Listing XATTR/EA and getting the value
+----------------------------------------------------------------------
+For the inode standard attributes (owner, group, timestamps, etc.), aufs
+shows the values from the topmost existing file. This behaviour is good
+for the non-dir entries since the bahaviour exactly matches the shown
+information. But for the directories, aufs considers all the same named
+entries on the lower branches. Which means, if one of the lower entry
+rejects readdir call, then aufs returns an error even if the topmost
+entry allows it. This behaviour is necessary to respect the branch fs's
+security, but can make users confused since the user-visible standard
+attributes don't match the behaviour.
+To address this issue, aufs has a mount option called dirperm1 which
+checks the permission for the topmost entry only, and ignores the lower
+entry's permission.
+
+A similar issue can happen around XATTR.
+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
+always set. Otherwise these very unpleasant situation would happen.
+- listxattr(2) may return the duplicated entries.
+- users may not be able to remove or reset the XATTR forever,
+
+
+XATTR/EA support in the internal (copy,move)-(up,down)
+----------------------------------------------------------------------
+Generally the extended attributes of inode are categorized as these.
+- "security" for LSM and capability.
+- "system" for posix ACL, 'acl' mount option is required for the branch
+  fs generally.
+- "trusted" for userspace, CAP_SYS_ADMIN is required.
+- "user" for userspace, 'user_xattr' mount option is required for the
+  branch fs generally.
+
+Moreover there are some other categories. Aufs handles these rather
+unpopular categories as the ordinary ones, ie. there is no special
+condition nor exception.
+
+In copy-up, the support for XATTR on the dst branch may differ from the
+src branch. In this case, the copy-up operation will get an error and
+the original user operation which triggered the copy-up will fail. It
+can happen that even all copy-up will fail.
+When both of src and dst branches support XATTR and if an error occurs
+during copying XATTR, then the copy-up should fail obviously. That is a
+good reason and aufs should return an error to userspace. But when only
+the src branch support that XATTR, aufs should not return an error.
+For example, the src branch supports ACL but the dst branch doesn't
+because the dst branch may natively un-support it or temporary
+un-support it due to "noacl" mount option. Of course, the dst branch fs
+may NOT return an error even if the XATTR is not supported. It is
+totally up to the branch fs.
+
+Anyway when the aufs internal copy-up gets an error from the dst branch
+fs, then aufs tries removing the just copied entry and returns the error
+to the userspace. The worst case of this situation will be all copy-up
+will fail.
+
+For the copy-up operation, there two basic approaches.
+- copy the specified XATTR only (by category above), and return the
+  error unconditionally if it happens.
+- copy all XATTR, and ignore the error on the specified category only.
+
+In order to support XATTR and to implement the correct behaviour, aufs
+chooses the latter approach and introduces some new branch attributes,
+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
+They correspond to the XATTR namespaces (see above). Additionally, to be
+convenient, "icex" is also provided which means all "icex*" attributes
+are set (here the word "icex" stands for "ignore copy-error on XATTR").
+
+The meaning of these attributes is to ignore the error from setting
+XATTR on that branch.
+Note that aufs tries copying all XATTR unconditionally, and ignores the
+error from the dst branch according to the specified attributes.
+
+Some XATTR may have its default value. The default value may come from
+the parent dir or the environment. If the default value is set at the
+file creating-time, it will be overwritten by copy-up.
+Some contradiction may happen I am afraid.
+Do we need another attribute to stop copying XATTR? I am unsure. For
+now, aufs implements the branch attributes to ignore the error.
diff --git a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt
new file mode 100644
index 000000000..bb700cb18
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/07export.txt
@@ -0,0 +1,58 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Export Aufs via NFS
+----------------------------------------------------------------------
+Here is an approach.
+- like xino/xib, add a new file 'xigen' which stores aufs inode
+  generation.
+- iget_locked(): initialize aufs inode generation for a new inode, and
+  store it in xigen file.
+- destroy_inode(): increment aufs inode generation and store it in xigen
+  file. it is necessary even if it is not unlinked, because any data of
+  inode may be changed by UDBA.
+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
+  build file handle by
+  + branch id (4 bytes)
+  + superblock generation (4 bytes)
+  + inode number (4 or 8 bytes)
+  + parent dir inode number (4 or 8 bytes)
+  + inode generation (4 bytes))
+  + return value of exportfs_encode_fh() for the parent on a branch (4
+    bytes)
+  + file handle for a branch (by exportfs_encode_fh())
+- fh_to_dentry():
+  + find the index of a branch from its id in handle, and check it is
+    still exist in aufs.
+  + 1st level: get the inode number from handle and search it in cache.
+  + 2nd level: if not found in cache, get the parent inode number from
+    the handle and search it in cache. and then open the found parent
+    dir, find the matching inode number by vfs_readdir() and get its
+    name, and call lookup_one_len() for the target dentry.
+  + 3rd level: if the parent dir is not cached, call
+    exportfs_decode_fh() for a branch and get the parent on a branch,
+    build a pathname of it, convert it a pathname in aufs, call
+    path_lookup(). now aufs gets a parent dir dentry, then handle it as
+    the 2nd level.
+  + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
+    for every branch, but not itself. to get this, (currently) aufs
+    searches in current->nsproxy->mnt_ns list. it may not be a good
+    idea, but I didn't get other approach.
+  + test the generation of the gotten inode.
+- every inode operation: they may get EBUSY due to UDBA. in this case,
+  convert it into ESTALE for NFSD.
+- readdir(): call lockdep_on/off() because filldir in NFSD calls
+  lookup_one_len(), vfs_getattr(), encode_fh() and others.
diff --git a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt
new file mode 100644
index 000000000..1dad573f6
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/08shwh.txt
@@ -0,0 +1,52 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Show Whiteout Mode (shwh)
+----------------------------------------------------------------------
+Generally aufs hides the name of whiteouts. But in some cases, to show
+them is very useful for users. For instance, creating a new middle layer
+(branch) by merging existing layers.
+
+(borrowing aufs1 HOW-TO from a user, Michael Towers)
+When you have three branches,
+- Bottom: 'system', squashfs (underlying base system), read-only
+- Middle: 'mods', squashfs, read-only
+- Top: 'overlay', ram (tmpfs), read-write
+
+The top layer is loaded at boot time and saved at shutdown, to preserve
+the changes made to the system during the session.
+When larger changes have been made, or smaller changes have accumulated,
+the size of the saved top layer data grows. At this point, it would be
+nice to be able to merge the two overlay branches ('mods' and 'overlay')
+and rewrite the 'mods' squashfs, clearing the top layer and thus
+restoring save and load speed.
+
+This merging is simplified by the use of another aufs mount, of just the
+two overlay branches using the 'shwh' option.
+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
+	aufs /livesys/merge_union
+
+A merged view of these two branches is then available at
+/livesys/merge_union, and the new feature is that the whiteouts are
+visible!
+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
+writing to all branches. Also the default mode for all branches is 'ro'.
+It is now possible to save the combined contents of the two overlay
+branches to a new squashfs, e.g.:
+# mksquashfs /livesys/merge_union /path/to/newmods.squash
+
+This new squashfs archive can be stored on the boot device and the
+initramfs will use it to replace the old one at the next boot.
diff --git a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt
new file mode 100644
index 000000000..710313c08
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/10dynop.txt
@@ -0,0 +1,47 @@
+
+# Copyright (C) 2010-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Dynamically customizable FS operations
+----------------------------------------------------------------------
+Generally FS operations (struct inode_operations, struct
+address_space_operations, struct file_operations, etc.) are defined as
+"static const", but it never means that FS have only one set of
+operation. Some FS have multiple sets of them. For instance, ext2 has
+three sets, one for XIP, for NOBH, and for normal.
+Since aufs overrides and redirects these operations, sometimes aufs has
+to change its behaviour according to the branch FS type. More importantly
+VFS acts differently if a function (member in the struct) is set or
+not. It means aufs should have several sets of operations and select one
+among them according to the branch FS definition.
+
+In order to solve this problem and not to affect the behaviour of VFS,
+aufs defines these operations dynamically. For instance, aufs defines
+dummy direct_IO function for struct address_space_operations, but it may
+not be set to the address_space_operations actually. When the branch FS
+doesn't have it, aufs doesn't set it to its address_space_operations
+while the function definition itself is still alive. So the behaviour
+itself will not change, and it will return an error when direct_IO is
+not set.
+
+The lifetime of these dynamically generated operation object is
+maintained by aufs branch object. When the branch is removed from aufs,
+the reference counter of the object is decremented. When it reaches
+zero, the dynamically generated operation object will be freed.
+
+This approach is designed to support AIO (io_submit), Direct I/O and
+XIP (DAX) mainly.
+Currently this approach is applied to address_space_operations for
+regular files only.
diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt
new file mode 100644
index 000000000..0c6a2163a
--- /dev/null
+++ b/Documentation/power/tuxonice-internals.txt
@@ -0,0 +1,532 @@
+		   TuxOnIce 4.0 Internal Documentation.
+			Updated to 23 March 2015
+
+(Please note that incremental image support mentioned in this document is work
+in progress. This document may need updating prior to the actual release of
+4.0!)
+
+1.  Introduction.
+
+    TuxOnIce 4.0 is an addition to the Linux Kernel, designed to
+    allow the user to quickly shutdown and quickly boot a computer, without
+    needing to close documents or programs. It is equivalent to the
+    hibernate facility in some laptops. This implementation, however,
+    requires no special BIOS or hardware support.
+
+    The code in these files is based upon the original implementation
+    prepared by Gabor Kuti and additional work by Pavel Machek and a
+    host of others. This code has been substantially reworked by Nigel
+    Cunningham, again with the help and testing of many others, not the
+    least of whom are Bernard Blackham and Michael Frank. At its heart,
+    however, the operation is essentially the same as Gabor's version.
+
+2.  Overview of operation.
+
+    The basic sequence of operations is as follows:
+
+	a. Quiesce all other activity.
+	b. Ensure enough memory and storage space are available, and attempt
+	   to free memory/storage if necessary.
+	c. Allocate the required memory and storage space.
+	d. Write the image.
+	e. Power down.
+
+    There are a number of complicating factors which mean that things are
+    not as simple as the above would imply, however...
+
+    o The activity of each process must be stopped at a point where it will
+    not be holding locks necessary for saving the image, or unexpectedly
+    restart operations due to something like a timeout and thereby make
+    our image inconsistent.
+
+    o It is desirous that we sync outstanding I/O to disk before calculating
+    image statistics. This reduces corruption if one should suspend but
+    then not resume, and also makes later parts of the operation safer (see
+    below).
+
+    o We need to get as close as we can to an atomic copy of the data.
+    Inconsistencies in the image will result in inconsistent memory contents at
+    resume time, and thus in instability of the system and/or file system
+    corruption. This would appear to imply a maximum image size of one half of
+    the amount of RAM, but we have a solution... (again, below).
+
+    o In 2.6 and later, we choose to play nicely with the other suspend-to-disk
+    implementations.
+
+3.  Detailed description of internals.
+
+    a. Quiescing activity.
+
+    Safely quiescing the system is achieved using three separate but related
+    aspects.
+
+    First, we use the vanilla kerne's support for freezing processes. This code
+    is based on the observation that the vast majority of processes don't need
+    to run during suspend. They can be 'frozen'. The kernel therefore
+    implements a refrigerator routine, which processes enter and in which they
+    remain until the cycle is complete. Processes enter the refrigerator via
+    try_to_freeze() invocations at appropriate places.  A process cannot be
+    frozen in any old place. It must not be holding locks that will be needed
+    for writing the image or freezing other processes. For this reason,
+    userspace processes generally enter the refrigerator via the signal
+    handling code, and kernel threads at the place in their event loops where
+    they drop locks and yield to other processes or sleep. The task of freezing
+    processes is complicated by the fact that there can be interdependencies
+    between processes. Freezing process A before process B may mean that
+    process B cannot be frozen, because it stops at waiting for process A
+    rather than in the refrigerator. This issue is seen where userspace waits
+    on freezeable kernel threads or fuse filesystem threads. To address this
+    issue, we implement the following algorithm for quiescing activity:
+
+	- Freeze filesystems (including fuse - userspace programs starting
+		new requests are immediately frozen; programs already running
+		requests complete their work before being frozen in the next
+		step)
+	- Freeze userspace
+	- Thaw filesystems (this is safe now that userspace is frozen and no
+		fuse requests are outstanding).
+	- Invoke sys_sync (noop on fuse).
+	- Freeze filesystems
+	- Freeze kernel threads
+
+    If we need to free memory, we thaw kernel threads and filesystems, but not
+    userspace. We can then free caches without worrying about deadlocks due to
+    swap files being on frozen filesystems or such like.
+
+    b. Ensure enough memory & storage are available.
+
+    We have a number of constraints to meet in order to be able to successfully
+    suspend and resume.
+
+    First, the image will be written in two parts, described below. One of
+    these parts needs to have an atomic copy made, which of course implies a
+    maximum size of one half of the amount of system memory. The other part
+    ('pageset') is not atomically copied, and can therefore be as large or
+    small as desired.
+
+    Second, we have constraints on the amount of storage available. In these
+    calculations, we may also consider any compression that will be done. The
+    cryptoapi module allows the user to configure an expected compression ratio.
+
+    Third, the user can specify an arbitrary limit on the image size, in
+    megabytes. This limit is treated as a soft limit, so that we don't fail the
+    attempt to suspend if we cannot meet this constraint.
+
+    c. Allocate the required memory and storage space.
+
+    Having done the initial freeze, we determine whether the above constraints
+    are met, and seek to allocate the metadata for the image. If the constraints
+    are not met, or we fail to allocate the required space for the metadata, we
+    seek to free the amount of memory that we calculate is needed and try again.
+    We allow up to four iterations of this loop before aborting the cycle. If
+    we do fail, it should only be because of a bug in TuxOnIce's calculations
+    or the vanilla kernel code for freeing memory.
+
+    These steps are merged together in the prepare_image function, found in
+    prepare_image.c. The functions are merged because of the cyclical nature
+    of the problem of calculating how much memory and storage is needed. Since
+    the data structures containing the information about the image must
+    themselves take memory and use storage, the amount of memory and storage
+    required changes as we prepare the image. Since the changes are not large,
+    only one or two iterations will be required to achieve a solution.
+
+    The recursive nature of the algorithm is miminised by keeping user space
+    frozen while preparing the image, and by the fact that our records of which
+    pages are to be saved and which pageset they are saved in use bitmaps (so
+    that changes in number or fragmentation of the pages to be saved don't
+    feedback via changes in the amount of memory needed for metadata). The
+    recursiveness is thus limited to any extra slab pages allocated to store the
+    extents that record storage used, and the effects of seeking to free memory.
+
+    d. Write the image.
+
+    We previously mentioned the need to create an atomic copy of the data, and
+    the half-of-memory limitation that is implied in this. This limitation is
+    circumvented by dividing the memory to be saved into two parts, called
+    pagesets.
+
+    Pageset2 contains most of the page cache - the pages on the active and
+    inactive LRU lists that aren't needed or modified while TuxOnIce is
+    running, so they can be safely written without an atomic copy. They are
+    therefore saved first and reloaded last. While saving these pages,
+    TuxOnIce carefully ensures that the work of writing the pages doesn't make
+    the image inconsistent. With the support for Kernel (Video) Mode Setting
+    going into the kernel at the time of writing, we need to check for pages
+    on the LRU that are used by KMS, and exclude them from pageset2. They are
+    atomically copied as part of pageset 1.
+
+    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
+    memory. As part of the preparation, we power down drivers, thereby providing
+    them with the opportunity to have their state recorded in the image. The
+    amount of memory allocated by drivers for this is usually negligible, but if
+    DRI is in use, video drivers may require significants amounts. Ideally we
+    would be able to query drivers while preparing the image as to the amount of
+    memory they will need. Unfortunately no such mechanism exists at the time of
+    writing. For this reason, TuxOnIce allows the user to set an
+    'extra_pages_allowance', which is used to seek to ensure sufficient memory
+    is available for drivers at this point. TuxOnIce also lets the user set this
+    value to 0. In this case, a test driver suspend is done while preparing the
+    image, and the difference (plus a margin) used instead. TuxOnIce will also
+    automatically restart the hibernation process (twice at most) if it finds
+    that the extra pages allowance is not sufficient. It will then use what was
+    actually needed (plus a margin, again). Failure to hibernate should thus
+    be an extremely rare occurence.
+
+    Having suspended the drivers, we save the CPU context before making an
+    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
+    After saving the two pagesets, we just need to save our metadata before
+    powering down.
+
+    As we mentioned earlier, the contents of pageset2 pages aren't needed once
+    they've been saved. We therefore use them as the destination of our atomic
+    copy. In the unlikely event that pageset1 is larger, extra pages are
+    allocated while the image is being prepared. This is normally only a real
+    possibility when the system has just been booted and the page cache is
+    small.
+
+    This is where we need to be careful about syncing, however. Pageset2 will
+    probably contain filesystem meta data. If this is overwritten with pageset1
+    and then a sync occurs, the filesystem will be corrupted - at least until
+    resume time and another sync of the restored data. Since there is a
+    possibility that the user might not resume or (may it never be!) that
+    TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
+    copying pageset1.
+
+    e. Incremental images
+
+    TuxOnIce 4.0 introduces a new incremental image mode which changes things a
+    little. When incremental images are enabled, we save a 'normal' image the
+    first time we hibernate. One resume however, we do not free the image or
+    the associated storage. Instead, it is retained until the next attempt at
+    hibernating and a mechanism is enabled which is used to track which pages
+    of memory are modified between the two cycles. The modified pages can then
+    be added to the existing image, rather than unmodified pages being saved
+    again unnecessarily.
+
+    Incremental image support is available in 64 bit Linux only, due to the
+    requirement for extra page flags.
+
+    This support is accomplished in the following way:
+
+    1) Tracking of pages.
+
+    The tracking of changed pages is accomplished using the page fault
+    mechanism. When we reach a point at which we want to start tracking
+    changes, most pages are marked read-only and also flagged as being
+    read-only because of this support. Since this cannot happen for every page
+    of RAM, some are marked as untracked and always treated as modified whn
+    preparing an incremental iamge. When a process attempts to modify a page
+    that is marked read-only in this way, a page fault occurs, with TuxOnIce
+    code marking the page writable and dirty before allowing the write to
+    continue. In this way, the effect of incremental images on performance is
+    minimised - a page only causes a fault once. Small modifications to the
+    page allocator further reduce the number of faults that occur - free pages
+    are not tracked; they are made writable and marked as dirty as part of
+    being allocated.
+
+    2) Saving the incremental image / atomicity.
+
+    The page fault mechanism is also used to improve the means by which
+    atomicity of the image is acheived. When it is time to do an atomic copy,
+    the flags for pages are reset, with the result being that it is no longer
+    necessary for us to do an atomic of pageset1. Instead, we normally write
+    the uncopied pages to disk. When an attempt is made to modify a page that
+    has not yet been saved, the page-fault mechanism makes a copy of the page
+    prior to allowing the write. This copy is then written to disk. Likewise,
+    on resume, if a process attempts to write to a page that has been read
+    while the rest of the image is still being loaded, a copy of that page is
+    made prior to the write being allowed. At the end of loading the image,
+    modified pages can thus be restored to their 'atomic copy' contents prior
+    to restarting normal operation. We also mark pages that are yet to be read
+    as invalid PFNs, so that we can capture as a bug any attempt by a
+    half-restored kernel to access a page that hasn't yet been reloaded.
+
+    f. Power down.
+
+    Powering down uses standard kernel routines. TuxOnIce supports powering down
+    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
+    Supporting suspend to ram (S3) as a power off option might sound strange,
+    but it allows the user to quickly get their system up and running again if
+    the battery doesn't run out (we just need to re-read the overwritten pages)
+    and if the battery does run out (or the user removes power), they can still
+    resume.
+
+4.  Data Structures.
+
+    TuxOnIce uses three main structures to store its metadata and configuration
+    information:
+
+    a) Pageflags bitmaps.
+
+    TuxOnIce records which pages will be in pageset1, pageset2, the destination
+    of the atomic copy and the source of the atomically restored image using
+    bitmaps. The code used is that written for swsusp, with small improvements
+    to match TuxOnIce's requirements.
+
+    The pageset1 bitmap is thus easily stored in the image header for use at
+    resume time.
+
+    As mentioned above, using bitmaps also means that the amount of memory and
+    storage required for recording the above information is constant. This
+    greatly simplifies the work of preparing the image. In earlier versions of
+    TuxOnIce, extents were used to record which pages would be stored. In that
+    case, however, eating memory could result in greater fragmentation of the
+    lists of pages, which in turn required more memory to store the extents and
+    more storage in the image header. These could in turn require further
+    freeing of memory, and another iteration. All of this complexity is removed
+    by having bitmaps.
+
+    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
+    through the lists. There is therefore no cost to not being able to find the
+    nth page in order 0 time. We only need to worry about the cost of finding
+    the n+1th page, given the location of the nth page. Bitwise optimisations
+    help here.
+
+    b) Extents for block data.
+
+    TuxOnIce supports writing the image to multiple block devices. In the case
+    of swap, multiple partitions and/or files may be in use, and we happily use
+    them all (with the exception of compcache pages, which we allocate but do
+    not use). This use of multiple block devices is accomplished as follows:
+
+    Whatever the actual source of the allocated storage, the destination of the
+    image can be viewed in terms of one or more block devices, and on each
+    device, a list of sectors. To simplify matters, we only use contiguous,
+    PAGE_SIZE aligned sectors, like the swap code does.
+
+    Since sector numbers on each bdev may well not start at 0, it makes much
+    more sense to use extents here. Contiguous ranges of pages can thus be
+    represented in the extents by contiguous values.
+
+    Variations in block size are taken account of in transforming this data
+    into the parameters for bio submission.
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this lower layer.
+    The lower layer remembers where in the sequence of devices and blocks each
+    pageset starts. The header always starts at the beginning of the allocated
+    storage.
+
+    So extents are:
+
+    struct extent {
+      unsigned long minimum, maximum;
+      struct extent *next;
+    }
+
+    These are combined into chains of extents for a device:
+
+    struct extent_chain {
+      int size; /* size of the extent ie sum (max-min+1) */
+      int allocs, frees;
+      char *name;
+      struct extent *first, *last_touched;
+    };
+
+    For each bdev, we need to store a little more info (simplified definition):
+
+    struct toi_bdev_info {
+       struct block_device *bdev;
+
+       char uuid[17];
+       dev_t dev_t;
+       int bmap_shift;
+       int blocks_per_page;
+    };
+
+    The uuid is the main means used to identify the device in the storage
+    image. This means we can cope with the dev_t representation of a device
+    changing between saving the image and restoring it, as may happen on some
+    bioses or in the LVM case.
+
+    bmap_shift and blocks_per_page apply the effects of variations in blocks
+    per page settings for the filesystem and underlying bdev. For most
+    filesystems, these are the same, but for xfs, they can have independant
+    values.
+
+    Combining these two structures together, we have everything we need to
+    record what devices and what blocks on each device are being used to
+    store the image, and to submit i/o using bio_submit.
+
+    The last elements in the picture are a means of recording how the storage
+    is being used.
+
+    We do this first and foremost by implementing a layer of abstraction on
+    top of the devices and extent chains which allows us to view however many
+    devices there might be as one long storage tape, with a single 'head' that
+    tracks a 'current position' on the tape:
+
+    struct extent_iterate_state {
+      struct extent_chain *chains;
+      int num_chains;
+      int current_chain;
+      struct extent *current_extent;
+      unsigned long current_offset;
+    };
+
+    That is, *chains points to an array of size num_chains of extent chains.
+    For the filewriter, this is always a single chain. For the swapwriter, the
+    array is of size MAX_SWAPFILES.
+
+    current_chain, current_extent and current_offset thus point to the current
+    index in the chains array (and into a matching array of struct
+    suspend_bdev_info), the current extent in that chain (to optimise access),
+    and the current value in the offset.
+
+    The image is divided into three parts:
+    - The header
+    - Pageset 1
+    - Pageset 2
+
+    The header always starts at the first device and first block. We know its
+    size before we begin to save the image because we carefully account for
+    everything that will be stored in it.
+
+    The second pageset (LRU) is stored first. It begins on the next page after
+    the end of the header.
+
+    The first pageset is stored second. It's start location is only known once
+    pageset2 has been saved, since pageset2 may be compressed as it is written.
+    This location is thus recorded at the end of saving pageset2. It is page
+    aligned also.
+
+    Since this information is needed at resume time, and the location of extents
+    in memory will differ at resume time, this needs to be stored in a portable
+    way:
+
+    struct extent_iterate_saved_state {
+        int chain_num;
+        int extent_num;
+        unsigned long offset;
+    };
+
+    We can thus implement a layer of abstraction wherein the core of TuxOnIce
+    doesn't have to worry about which device we're currently writing to or
+    where in the device we are. It simply requests that the next page in the
+    pageset or header be written, leaving the details to this layer, and
+    invokes the routines to remember and restore the position, without having
+    to worry about the details of how the data is arranged on disk or such like.
+
+    c) Modules
+
+    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
+    for the implementation of different methods of transforming a page to be
+    written to disk and different methods of getting the pages stored.
+
+    In early versions (the betas and perhaps Suspend1), compression support was
+    inlined in the image writing code, and the data structures and code for
+    managing swap were intertwined with the rest of the code. A number of people
+    had expressed interest in implementing image encryption, and alternative
+    methods of storing the image.
+
+    In order to achieve this, TuxOnIce was given a modular design.
+
+    A module is a single file which encapsulates the functionality needed
+    to transform a pageset of data (encryption or compression, for example),
+    or to write the pageset to a device. The former type of module is called
+    a 'page-transformer', the later a 'writer'.
+
+    Modules are linked together in pipeline fashion. There may be zero or more
+    page transformers in a pipeline, and there is always exactly one writer.
+    The pipeline follows this pattern:
+
+		---------------------------------
+		|          TuxOnIce Core        |
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 1	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|	Page transformer 2	|
+		---------------------------------
+				|
+				|
+		---------------------------------
+		|            Writer		|
+		---------------------------------
+
+    During the writing of an image, the core code feeds pages one at a time
+    to the first module. This module performs whatever transformations it
+    implements on the incoming data, completely consuming the incoming data and
+    feeding output in a similar manner to the next module.
+
+    All routines are SMP safe, and the final result of the transformations is
+    written with an index (provided by the core) and size of the output by the
+    writer. As a result, we can have multithreaded I/O without needing to
+    worry about the sequence in which pages are written (or read).
+
+    During reading, the pipeline works in the reverse direction. The core code
+    calls the first module with the address of a buffer which should be filled.
+    (Note that the buffer size is always PAGE_SIZE at this time). This module
+    will in turn request data from the next module and so on down until the
+    writer is made to read from the stored image.
+
+    Part of definition of the structure of a module thus looks like this:
+
+        int (*rw_init) (int rw, int stream_number);
+        int (*rw_cleanup) (int rw);
+        int (*write_chunk) (struct page *buffer_page);
+        int (*read_chunk) (struct page *buffer_page, int sync);
+
+    It should be noted that the _cleanup routine may be called before the
+    full stream of data has been read or written. While writing the image,
+    the user may (depending upon settings) choose to abort suspending, and
+    if we are in the midst of writing the last portion of the image, a portion
+    of the second pageset may be reread. This may also happen if an error
+    occurs and we seek to abort the process of writing the image.
+
+    The modular design is also useful in a number of other ways. It provides
+    a means where by we can add support for:
+
+    - providing overall initialisation and cleanup routines;
+    - serialising configuration information in the image header;
+    - providing debugging information to the user;
+    - determining memory and image storage requirements;
+    - dis/enabling components at run-time;
+    - configuring the module (see below);
+
+    ...and routines for writers specific to their work:
+    - Parsing a resume= location;
+    - Determining whether an image exists;
+    - Marking a resume as having been attempted;
+    - Invalidating an image;
+
+    Since some parts of the core - the user interface and storage manager
+    support - have use for some of these functions, they are registered as
+    'miscellaneous' modules as well.
+
+    d) Sysfs data structures.
+
+    This brings us naturally to support for configuring TuxOnIce. We desired to
+    provide a way to make TuxOnIce as flexible and configurable as possible.
+    The user shouldn't have to reboot just because they want to now hibernate to
+    a file instead of a partition, for example.
+
+    To accomplish this, TuxOnIce implements a very generic means whereby the
+    core and modules can register new sysfs entries. All TuxOnIce entries use
+    a single _store and _show routine, both of which are found in
+    tuxonice_sysfs.c in the kernel/power directory. These routines handle the
+    most common operations - getting and setting the values of bits, integers,
+    longs, unsigned longs and strings in one place, and allow overrides for
+    customised get and set options as well as side-effect routines for all
+    reads and writes.
+
+    When combined with some simple macros, a new sysfs entry can then be defined
+    in just a couple of lines:
+
+        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+                        2048, 0, NULL),
+
+    This defines a sysfs entry named "progress_granularity" which is rw and
+    allows the user to access an integer stored at &progress_granularity, giving
+    it a value between 1 and 2048 inclusive.
+
+    Sysfs entries are registered under /sys/power/tuxonice, and entries for
+    modules are located in a subdirectory named after the module.
+
diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt
new file mode 100644
index 000000000..3bf0575ef
--- /dev/null
+++ b/Documentation/power/tuxonice.txt
@@ -0,0 +1,948 @@
+	--- TuxOnIce, version 3.0 ---
+
+1.  What is it?
+2.  Why would you want it?
+3.  What do you need to use it?
+4.  Why not just use the version already in the kernel?
+5.  How do you use it?
+6.  What do all those entries in /sys/power/tuxonice do?
+7.  How do you get support?
+8.  I think I've found a bug. What should I do?
+9.  When will XXX be supported?
+10  How does it work?
+11. Who wrote TuxOnIce?
+
+1. What is it?
+
+   Imagine you're sitting at your computer, working away. For some reason, you
+   need to turn off your computer for a while - perhaps it's time to go home
+   for the day. When you come back to your computer next, you're going to want
+   to carry on where you left off. Now imagine that you could push a button and
+   have your computer store the contents of its memory to disk and power down.
+   Then, when you next start up your computer, it loads that image back into
+   memory and you can carry on from where you were, just as if you'd never
+   turned the computer off. You have far less time to start up, no reopening of
+   applications or finding what directory you put that file in yesterday.
+   That's what TuxOnIce does.
+
+   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
+   with some help from Pavel Machek, got an early version going in 1999. The
+   project was then taken over by Florent Chabaud while still in alpha version
+   numbers. Nigel Cunningham came on the scene when Florent was unable to
+   continue, moving the project into betas, then 1.0, 2.0 and so on up to
+   the present series. During the 2.0 series, the name was contracted to
+   Suspend2 and the website suspend2.net created. Beginning around July 2007,
+   a transition to calling the software TuxOnIce was made, to seek to help
+   make it clear that TuxOnIce is more concerned with hibernation than suspend
+   to ram.
+
+   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
+   original name, and was essentially a fork of the beta code until Rafael
+   Wysocki came on the scene in 2005 and began to improve it further.
+
+2. Why would you want it?
+
+   Why wouldn't you want it?
+
+   Being able to save the state of your system and quickly restore it improves
+   your productivity - you get a useful system in far less time than through
+   the normal boot process. You also get to be completely 'green', using zero
+   power, or as close to that as possible (the computer may still provide
+   minimal power to some devices, so they can initiate a power on, but that
+   will be the same amount of power as would be used if you told the computer
+   to shutdown.
+
+3. What do you need to use it?
+
+   a. Kernel Support.
+
+   i) The TuxOnIce patch.
+
+   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
+   2.6 tree at the moment, so you will need to download the kernel source and
+   apply the latest patch. Having done that, enable the appropriate options in
+   make [menu|x]config (under Power Management Options - look for "Enhanced
+   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
+   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
+
+   TuxOnIce patches are available from http://tuxonice.net.
+
+   ii) Compression support.
+
+   Compression support is implemented via the cryptoapi. You will therefore want
+   to select any Cryptoapi transforms that you want to use on your image from
+   the Cryptoapi menu while configuring your kernel. We recommend the use of the
+   LZO compression method - it is very fast and still achieves good compression.
+
+   You can also tell TuxOnIce to write its image to an encrypted and/or
+   compressed filesystem/swap partition. In that case, you don't need to do
+   anything special for TuxOnIce when it comes to kernel configuration.
+
+   iii) Configuring other options.
+
+   While you're configuring your kernel, try to configure as much as possible
+   to build as modules. We recommend this because there are a number of drivers
+   that are still in the process of implementing proper power management
+   support. In those cases, the best way to work around their current lack is
+   to build them as modules and remove the modules while hibernating. You might
+   also bug the driver authors to get their support up to speed, or even help!
+
+   b. Storage.
+
+   i) Swap.
+
+   TuxOnIce can store the hibernation image in your swap partition, a swap file or
+   a combination thereof. Whichever combination you choose, you will probably
+   want to create enough swap space to store the largest image you could have,
+   plus the space you'd normally use for swap. A good rule of thumb would be
+   to calculate the amount of swap you'd want without using TuxOnIce, and then
+   add the amount of memory you have. This swapspace can be arranged in any way
+   you'd like. It can be in one partition or file, or spread over a number. The
+   only requirement is that they be active when you start a hibernation cycle.
+
+   There is one exception to this requirement. TuxOnIce has the ability to turn
+   on one swap file or partition at the start of hibernating and turn it back off
+   at the end. If you want to ensure you have enough memory to store a image
+   when your memory is fully used, you might want to make one swap partition or
+   file for 'normal' use, and another for TuxOnIce to activate & deactivate
+   automatically. (Further details below).
+
+   ii) Normal files.
+
+   TuxOnIce includes a 'file allocator'. The file allocator can store your
+   image in a simple file. Since Linux has the concept of everything being a
+   file, this is more powerful than it initially sounds. If, for example, you
+   were to set up a network block device file, you could hibernate to a network
+   server. This has been tested and works to a point, but nbd itself isn't
+   stateless enough for our purposes.
+
+   Take extra care when setting up the file allocator. If you just type
+   commands without thinking and then try to hibernate, you could cause
+   irreversible corruption on your filesystems! Make sure you have backups.
+
+   Most people will only want to hibernate to a local file. To achieve that, do
+   something along the lines of:
+
+   echo "TuxOnIce" > /hibernation-file
+   dd if=/dev/zero bs=1M count=512 >> /hibernation-file
+
+   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
+   it:
+
+   echo /hibernation-file > /sys/power/tuxonice/file/target
+
+   Then
+
+   cat /sys/power/tuxonice/resume
+
+   Put the results of this into your bootloader's configuration (see also step
+   C, below):
+
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+   # cat /sys/power/tuxonice/resume
+   file:/dev/hda2:0x1e001
+
+   In this example, we would edit the append= line of our lilo.conf|menu.lst
+   so that it included:
+
+   resume=file:/dev/hda2:0x1e001
+   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+
+   For those who are thinking 'Could I make the file sparse?', the answer is
+   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
+   a sparse file while hibernating. In the longer term (post merge!), I'd like
+   to change things so that the file could be dynamically resized and have
+   holes filled as needed. Right now, however, that's not possible and not a
+   priority.
+
+   c. Bootloader configuration.
+
+   Using TuxOnIce also requires that you add an extra parameter to
+   your lilo.conf or equivalent. Here's an example for a swap partition:
+
+   append="resume=swap:/dev/hda1"
+
+   This would tell TuxOnIce that /dev/hda1 is a swap partition you
+   have. TuxOnIce will use the swap signature of this partition as a
+   pointer to your data when you hibernate. This means that (in this example)
+   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
+   is actually stored. It just needs to be a swap partition that has a
+   valid signature.
+
+   You don't need to have a swap partition for this purpose. TuxOnIce
+   can also use a swap file, but usage is a little more complex. Having made
+   your swap file, turn it on and do
+
+   cat /sys/power/tuxonice/swap/headerlocations
+
+   (this assumes you've already compiled your kernel with TuxOnIce
+   support and booted it). The results of the cat command will tell you
+   what you need to put in lilo.conf:
+
+   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
+   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
+
+   If the swapfile changes for any reason (it is moved to a different
+   location, it is deleted and recreated, or the filesystem is
+   defragmented) then you will have to check
+   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
+
+   Once you've compiled and installed the kernel and adjusted your bootloader
+   configuration, you should only need to reboot for the most basic part
+   of TuxOnIce to be ready.
+
+   If you only compile in the swap allocator, or only compile in the file
+   allocator, you don't need to add the "swap:" part of the resume=
+   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
+   have compiled both and your storage is on swap, you can also use this
+   format (the swap allocator is the default allocator).
+
+   When compiling your kernel, one of the options in the 'Power Management
+   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
+   called 'Default resume partition'. This can be used to set a default value
+   for the resume= parameter.
+
+   d. The hibernate script.
+
+   Since the driver model in 2.6 kernels is still being developed, you may need
+   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
+   process via a script which prepares for the hibernation cycle, tells the
+   kernel to do its stuff and then restore things afterwards. This script might
+   involve:
+
+   - Switching to a text console and back if X doesn't like the video card
+     status on resume.
+   - Un/reloading drivers that don't play well with hibernation.
+
+   Note that you might not be able to unload some drivers if there are
+   processes using them. You might have to kill off processes that hold
+   devices open. Hint: if your X server accesses an USB mouse, doing a
+   'chvt' to a text console releases the device and you can unload the
+   module.
+
+   Check out the latest script (available on tuxonice.net).
+
+   e. The userspace user interface.
+
+   TuxOnIce has very limited support for displaying status if you only apply
+   the kernel patch - it can printk messages, but that is all. In addition,
+   some of the functions mentioned in this document (such as cancelling a cycle
+   or performing interactive debugging) are unavailable. To utilise these
+   functions, or simply get a nice display, you need the 'userui' component.
+   Userui comes in three flavours, usplash, fbsplash and text. Text should
+   work on any console. Usplash and fbsplash require the appropriate
+   (distro specific?) support.
+
+   To utilise a userui, TuxOnIce just needs to be told where to find the
+   userspace binary:
+
+   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
+
+   The hibernate script can do this for you, and a default value for this
+   setting can be configured when compiling the kernel. This path is also
+   stored in the image header, so if you have an initrd or initramfs, you can
+   use the userui during the first part of resuming (prior to the atomic
+   restore) by putting the binary in the same path in your initrd/ramfs.
+   Alternatively, you can put it in a different location and do an echo
+   similar to the above prior to the echo > do_resume. The value saved in the
+   image header will then be ignored.
+
+4. Why not just use the version already in the kernel?
+
+   The version in the vanilla kernel has a number of drawbacks. The most
+   serious of these are:
+	- it has a maximum image size of 1/2 total memory;
+	- it doesn't allocate storage until after it has snapshotted memory.
+	  This means that you can't be sure hibernating will work until you
+	  see it start to write the image;
+	- it does not allow you to press escape to cancel a cycle;
+	- it does not allow you to press escape to cancel resuming;
+	- it does not allow you to automatically swapon a file when
+	  starting a cycle;
+	- it does not allow you to use multiple swap partitions or files;
+	- it does not allow you to use ordinary files;
+	- it just invalidates an image and continues to boot if you
+	  accidentally boot the wrong kernel after hibernating;
+	- it doesn't support any sort of nice display while hibernating;
+	- it is moving toward requiring that you have an initrd/initramfs
+	  to ever have a hope of resuming (uswsusp). While uswsusp will
+	  address some of the concerns above, it won't address all of them,
+          and will be more complicated to get set up;
+        - it doesn't have support for suspend-to-both (write a hibernation
+	  image, then suspend to ram; I think this is known as ReadySafe
+	  under M$).
+
+5. How do you use it?
+
+   A hibernation cycle can be started directly by doing:
+
+	echo > /sys/power/tuxonice/do_hibernate
+
+   In practice, though, you'll probably want to use the hibernate script
+   to unload modules, configure the kernel the way you like it and so on.
+   In that case, you'd do (as root):
+
+	hibernate
+
+   See the hibernate script's man page for more details on the options it
+   takes.
+
+   If you're using the text or splash user interface modules, one feature of
+   TuxOnIce that you might find useful is that you can press Escape at any time
+   during hibernating, and the process will be aborted.
+
+   Due to the way hibernation works, this means you'll have your system back and
+   perfectly usable almost instantly. The only exception is when it's at the
+   very end of writing the image. Then it will need to reload a small (usually
+   4-50MBs, depending upon the image characteristics) portion first.
+
+   Likewise, when resuming, you can press escape and resuming will be aborted.
+   The computer will then powerdown again according to settings at that time for
+   the powerdown method or rebooting.
+
+   You can change the settings for powering down while the image is being
+   written by pressing 'R' to toggle rebooting and 'O' to toggle between
+   suspending to ram and powering down completely).
+
+   If you run into problems with resuming, adding the "noresume" option to
+   the kernel command line will let you skip the resume step and recover your
+   system. This option shouldn't normally be needed, because TuxOnIce modifies
+   the image header prior to the atomic restore, and will thus prompt you
+   if it detects that you've tried to resume an image before (this flag is
+   removed if you press Escape to cancel a resume, so you won't be prompted
+   then).
+
+   Recent kernels (2.6.24 onwards) add support for resuming from a different
+   kernel to the one that was hibernated (thanks to Rafael for his work on
+   this - I've just embraced and enhanced the support for TuxOnIce). This
+   should further reduce the need for you to use the noresume option.
+
+6. What do all those entries in /sys/power/tuxonice do?
+
+   /sys/power/tuxonice is the directory which contains files you can use to
+   tune and configure TuxOnIce to your liking. The exact contents of
+   the directory will depend upon the version of TuxOnIce you're
+   running and the options you selected at compile time. In the following
+   descriptions, names in brackets refer to compile time options.
+   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
+   in the first place!).
+
+   Since the values of these settings can open potential security risks, the
+   writeable ones are accessible only to the root user. You may want to
+   configure sudo to allow you to invoke your hibernate script as an ordinary
+   user.
+
+   - alloc/failure_test
+
+   This debugging option provides a way of testing TuxOnIce's handling of
+   memory allocation failures. Each allocation type that TuxOnIce makes has
+   been given a unique number (see the source code). Echo the appropriate
+   number into this entry, and when TuxOnIce attempts to do that allocation,
+   it will pretend there was a failure and act accordingly.
+
+   - alloc/find_max_mem_allocated
+
+   This debugging option will cause TuxOnIce to find the maximum amount of
+   memory it used during a cycle, and report that information in debugging
+   information at the end of the cycle.
+
+   - alt_resume_param
+
+   Instead of powering down after writing a hibernation image, TuxOnIce
+   supports resuming from a different image. This entry lets you set the
+   location of the signature for that image (the resume= value you'd use
+   for it). Using an alternate image and keep_image mode, you can do things
+   like using an alternate image to power down an uninterruptible power
+   supply.
+
+   - block_io/target_outstanding_io
+
+   This value controls the amount of memory that the block I/O code says it
+   needs when the core code is calculating how much memory is needed for
+   hibernating and for resuming. It doesn't directly control the amount of
+   I/O that is submitted at any one time - that depends on the amount of
+   available memory (we may have more available than we asked for), the
+   throughput that is being achieved and the ability of the CPU to keep up
+   with disk throughput (particularly where we're compressing pages).
+
+   - checksum/enabled
+
+   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
+   while we're saving the first part of the image, and to get any pages that
+   do change resaved in the atomic copy. This should normally not be needed,
+   but if you're seeing issues, please enable this. If your issues stop you
+   being able to resume, enable this option, hibernate and cancel the cycle
+   after the atomic copy is done. If the debugging info shows a non-zero
+   number of pages resaved, please report this to Nigel.
+
+   - compression/algorithm
+
+   Set the cryptoapi algorithm used for compressing the image.
+
+   - compression/expected_compression
+
+   These values allow you to set an expected compression ratio, which TuxOnice
+   will use in calculating whether it meets constraints on the image size. If
+   this expected compression ratio is not attained, the hibernation cycle will
+   abort, so it is wise to allow some spare. You can see what compression
+   ratio is achieved in the logs after hibernating.
+
+   - debug_info:
+
+   This file returns information about your configuration that may be helpful
+   in diagnosing problems with hibernating.
+
+   - did_suspend_to_both:
+
+   This file can be used when you hibernate with powerdown method 3 (ie suspend
+   to ram after writing the image). There can be two outcomes in this case. We
+   can resume from the suspend-to-ram before the battery runs out, or we can run
+   out of juice and and up resuming like normal. This entry lets you find out,
+   post resume, which way we went. If the value is 1, we resumed from suspend
+   to ram. This can be useful when actions need to be run post suspend-to-ram
+   that don't need to be run if we did the normal resume from power off.
+
+   - do_hibernate:
+
+   When anything is written to this file, the kernel side of TuxOnIce will
+   begin to attempt to write an image to disk and power down. You'll normally
+   want to run the hibernate script instead, to get modules unloaded first.
+
+   - do_resume:
+
+   When anything is written to this file TuxOnIce will attempt to read and
+   restore an image. If there is no image, it will return almost immediately.
+   If an image exists, the echo > will never return. Instead, the original
+   kernel context will be restored and the original echo > do_hibernate will
+   return.
+
+   - */enabled
+
+   These option can be used to temporarily disable various parts of TuxOnIce.
+
+   - extra_pages_allowance
+
+   When TuxOnIce does its atomic copy, it calls the driver model suspend
+   and resume methods. If you have DRI enabled with a driver such as fglrx,
+   this can result in the driver allocating a substantial amount of memory
+   for storing its state. Extra_pages_allowance tells TuxOnIce how much
+   extra memory it should ensure is available for those allocations. If
+   your attempts at hibernating end with a message in dmesg indicating that
+   insufficient extra pages were allowed, you need to increase this value.
+
+   - file/target:
+
+   Read this value to get the current setting. Write to it to point TuxOnice
+   at a new storage location for the file allocator. See section 3.b.ii above
+   for details of how to set up the file allocator.
+
+   - freezer_test
+
+   This entry can be used to get TuxOnIce to just test the freezer and prepare
+   an image without actually doing a hibernation cycle. It is useful for
+   diagnosing freezing and image preparation issues.
+
+   - full_pageset2
+
+   TuxOnIce divides the pages that are stored in an image into two sets. The
+   difference between the two sets is that pages in pageset 1 are atomically
+   copied, and pages in pageset 2 are written to disk without being copied
+   first. A page CAN be written to disk without being copied first if and only
+   if its contents will not be modified or used at any time after userspace
+   processes are frozen. A page MUST be in pageset 1 if its contents are
+   modified or used at any time after userspace processes have been frozen.
+
+   Normally (ie if this option is enabled), TuxOnIce will put all pages on the
+   per-zone LRUs in pageset2, then remove those pages used by any userspace
+   user interface helper and TuxOnIce storage manager that are running,
+   together with pages used by the GEM memory manager introduced around 2.6.28
+   kernels.
+
+   If this option is disabled, a much more conservative approach will be taken.
+   The only pages in pageset2 will be those belonging to userspace processes,
+   with the exclusion of those belonging to the TuxOnIce userspace helpers
+   mentioned above. This will result in a much smaller pageset2, and will
+   therefore result in smaller images than are possible with this option
+   enabled.
+
+   - ignore_rootfs
+
+   TuxOnIce records which device is mounted as the root filesystem when
+   writing the hibernation image. It will normally check at resume time that
+   this device isn't already mounted - that would be a cause of filesystem
+   corruption. In some particular cases (RAM based root filesystems), you
+   might want to disable this check. This option allows you to do that.
+
+   - image_exists:
+
+   Can be used in a script to determine whether a valid image exists at the
+   location currently pointed to by resume=. Returns up to three lines.
+   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
+   If an image eixsts, additional lines will return the machine and version.
+   Echoing anything to this entry removes any current image.
+
+   - image_size_limit:
+
+   The maximum size of hibernation image written to disk, measured in megabytes
+   (1024*1024).
+
+   - last_result:
+
+   The result of the last hibernation cycle, as defined in
+   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
+   SUSPEND_KEPT_IMAGE. This is a bitmask.
+
+   - late_cpu_hotplug:
+
+   This sysfs entry controls whether cpu hotplugging is done - as normal - just
+   before (unplug) and after (replug) the atomic copy/restore (so that all
+   CPUs/cores are available for multithreaded I/O). The alternative is to
+   unplug all secondary CPUs/cores at the start of hibernating/resuming, and
+   replug them at the end of resuming. No multithreaded I/O will be possible in
+   this configuration, but the odd machine has been reported to require it.
+
+   - lid_file:
+
+   This determines which ACPI button file we look in to determine whether the
+   lid is open or closed after resuming from suspend to disk or power off.
+   If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
+   and check its contents at the appropriate moment. See post_wake_state below
+   for more details on how this entry is used.
+
+   - log_everything (CONFIG_PM_DEBUG):
+
+   Setting this option results in all messages printed being logged. Normally,
+   only a subset are logged, so as to not slow the process and not clutter the
+   logs. Useful for debugging. It can be toggled during a cycle by pressing
+   'L'.
+
+   - no_load_direct:
+
+   This is a debugging option. If, when loading the atomically copied pages of
+   an image, TuxOnIce finds that the destination address for a page is free,
+   it will normally allocate the image, load the data directly into that
+   address and skip it in the atomic restore. If this option is disabled, the
+   page will be loaded somewhere else and atomically restored like other pages.
+
+   - no_flusher_thread:
+
+   When doing multithreaded I/O (see below), the first online CPU can be used
+   to _just_ submit compressed pages when writing the image, rather than
+   compressing and submitting data. This option is normally disabled, but has
+   been included because Nigel would like to see whether it will be more useful
+   as the number of cores/cpus in computers increases.
+
+   - no_multithreaded_io:
+
+   TuxOnIce will normally create one thread per cpu/core on your computer,
+   each of which will then perform I/O. This will generally result in
+   throughput that's the maximum the storage medium can handle. There
+   shouldn't be any reason to disable multithreaded I/O now, but this option
+   has been retained for debugging purposes.
+
+   - no_pageset2
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to do an atomic copy of all pages,
+   thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
+
+   - no_pageset2_if_unneeded
+
+   See the entry for full_pageset2 above for an explanation of pagesets.
+   Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
+   if and only it isn't needed anyway. This option may still make TuxOnIce
+   less reliable because pageset2 pages are normally used to store the
+   atomic copy - drivers that want to do allocations of larger amounts of
+   memory in one shot will be more likely to find that those amounts aren't
+   available if this option is enabled.
+
+   - pause_between_steps (CONFIG_PM_DEBUG):
+
+   This option is used during debugging, to make TuxOnIce pause between
+   each step of the process. It is ignored when the nice display is on.
+
+   - post_wake_state:
+
+   TuxOnIce provides support for automatically waking after a user-selected
+   delay, and using a different powerdown method if the lid is still closed.
+   (Yes, we're assuming a laptop).  This entry lets you choose what state
+   should be entered next. The values are those described under
+   powerdown_method, below. It can be used to suspend to RAM after hibernating,
+   then powerdown properly (say) 20 minutes. It can also be used to power down
+   properly, then wake at (say) 6.30am and suspend to RAM until you're ready
+   to use the machine.
+
+   - powerdown_method:
+
+   Used to select a method by which TuxOnIce should powerdown after writing the
+   image. Currently:
+
+   0: Don't use ACPI to power off.
+   3: Attempt to enter Suspend-to-ram.
+   4: Attempt to enter ACPI S4 mode.
+   5: Attempt to power down via ACPI S5 mode.
+
+   Note that these options are highly dependant upon your hardware & software:
+
+   3: When succesful, your machine suspends to ram instead of powering off.
+      The advantage of using this mode is that it doesn't matter whether your
+      battery has enough charge to make it through to your next resume. If it
+      lasts, you will simply resume from suspend to ram (and the image on disk
+      will be discarded). If the battery runs out, you will resume from disk
+      instead. The disadvantage is that it takes longer than a normal
+      suspend-to-ram to enter the state, since the suspend-to-disk image needs
+      to be written first.
+   4/5: When successful, your machine will be off and comsume (almost) no power.
+      But it might still react to some external events like opening the lid or
+      trafic on  a network or usb device. For the bios, resume is then the same
+      as warm boot, similar to a situation where you used the command `reboot'
+      to reboot your machine. If your machine has problems on warm boot or if
+      you want to protect your machine with the bios password, this is probably
+      not the right choice. Mode 4 may be necessary on some machines where ACPI
+      wake up methods need to be run to properly reinitialise hardware after a
+      hibernation cycle.
+   0: Switch the machine completely off. The only possible wakeup is the power
+      button. For the bios, resume is then the same as a cold boot, in
+      particular you would  have to provide your bios boot password if your
+      machine uses that feature for booting.
+
+   - progressbar_granularity_limit:
+
+   This option can be used to limit the granularity of the progress bar
+   displayed with a bootsplash screen. The value is the maximum number of
+   steps. That is, 10 will make the progress bar jump in 10% increments.
+
+   - reboot:
+
+   This option causes TuxOnIce to reboot rather than powering down
+   at the end of saving an image. It can be toggled during a cycle by pressing
+   'R'.
+
+   - resume:
+
+   This sysfs entry can be used to read and set the location in which TuxOnIce
+   will look for the signature of an image - the value set using resume= at
+   boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
+   writing to this file as well as modifying your bootloader's configuration
+   file (eg menu.lst), you can set or reset the location of your image or the
+   method of storing the image without rebooting.
+
+   - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
+
+   This option makes
+
+     echo disk > /sys/power/state
+
+   activate TuxOnIce instead of swsusp. Regardless of whether this option is
+   enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
+   to check for an image too. This is due to the fact that at resume time, we
+   can't know whether this option was enabled until we see if an image is there
+   for us to resume from. (And when an image exists, we don't care whether we
+   did replace swsusp anyway - we just want to resume).
+
+   - resume_commandline:
+
+   This entry can be read after resuming to see the commandline that was used
+   when resuming began. You might use this to set up two bootloader entries
+   that are the same apart from the fact that one includes a extra append=
+   argument "at_work=1". You could then grep resume_commandline in your
+   post-resume scripts and configure networking (for example) differently
+   depending upon whether you're at home or work. resume_commandline can be
+   set to arbitrary text if you wish to remove sensitive contents.
+
+   - swap/swapfilename:
+
+   This entry is used to specify the swapfile or partition that
+   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
+   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
+   for my hibernation image, I would
+
+   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
+
+   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
+   swapon and swapoff occur while other processes are frozen (including kswapd)
+   so this swap file will not be used up when attempting to free memory. The
+   parition/file is also given the highest priority, so other swapfiles/partitions
+   will only be used to save the image when this one is filled.
+
+   The value of this file is used by headerlocations along with any currently
+   activated swapfiles/partitions.
+
+   - swap/headerlocations:
+
+   This option tells you the resume= options to use for swap devices you
+   currently have activated. It is particularly useful when you only want to
+   use a swap file to store your image. See above for further details.
+
+   - test_bio
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will skip the atomic copy,
+   just doing the writing of the image and then returning control to the
+   user at the point where it would have powered off. This is useful for
+   testing throughput in different configurations.
+
+   - test_filter_speed
+
+   This is a debugging option. When enabled, TuxOnIce will not hibernate.
+   Instead, when asked to write an image, it will not write anything or do
+   an atomic copy, but will only run any enabled compression algorithm on the
+   data that would have been written (the source pages of the atomic copy in
+   the case of pageset 1). This is useful for comparing the performance of
+   compression algorithms and for determining the extent to which an upgrade
+   to your storage method would improve hibernation speed.
+
+   - user_interface/debug_sections (CONFIG_PM_DEBUG):
+
+   This value, together with the console log level, controls what debugging
+   information is displayed. The console log level determines the level of
+   detail, and this value determines what detail is displayed. This value is
+   a bit vector, and the meaning of the bits can be found in the kernel tree
+   in include/linux/tuxonice.h. It can be overridden using the kernel's
+   command line option suspend_dbg.
+
+   - user_interface/default_console_level (CONFIG_PM_DEBUG):
+
+   This determines the value of the console log level at the start of a
+   hibernation cycle. If debugging is compiled in, the console log level can be
+   changed during a cycle by pressing the digit keys. Meanings are:
+
+   0: Nice display.
+   1: Nice display plus numerical progress.
+   2: Errors only.
+   3: Low level debugging info.
+   4: Medium level debugging info.
+   5: High level debugging info.
+   6: Verbose debugging info.
+
+   - user_interface/enable_escape:
+
+   Setting this to "1" will enable you abort a hibernation cycle or resuming by
+   pressing escape, "0" (default) disables this feature. Note that enabling
+   this option means that you cannot initiate a hibernation cycle and then walk
+   away from your computer, expecting it to be secure. With feature disabled,
+   you can validly have this expectation once TuxOnice begins to write the
+   image to disk. (Prior to this point, it is possible that TuxOnice might
+   about because of failure to freeze all processes or because constraints
+   on its ability to save the image are not met).
+
+   - user_interface/program
+
+   This entry is used to tell TuxOnice what userspace program to use for
+   providing a user interface while hibernating. The program uses a netlink
+   socket to pass messages back and forward to the kernel, allowing all of the
+   functions formerly implemented in the kernel user interface components.
+
+   - version:
+
+   The version of TuxOnIce you have compiled into the currently running kernel.
+
+   - wake_alarm_dir:
+
+   As mentioned above (post_wake_state), TuxOnIce supports automatically waking
+   after some delay. This entry allows you to select which wake alarm to use.
+   It should contain the value "rtc0" if you're wanting to use
+   /sys/class/rtc/rtc0.
+
+   - wake_delay:
+
+   This value determines the delay from the end of writing the image until the
+   wake alarm is triggered. You can set an absolute time by writing the desired
+   time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
+   empty.
+
+   Note that for the wakeup to actually occur, you may need to modify entries
+   in /proc/acpi/wakeup. This is done by echoing the name of the button in the
+   first column (eg PBTN) into the file.
+
+7. How do you get support?
+
+   Glad you asked. TuxOnIce is being actively maintained and supported
+   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
+   (who maintains the hibernate script and userspace user interface components)
+   and its users.
+
+   Resources availble include HowTos, FAQs and a Wiki, all available via
+   tuxonice.net.  You can find the mailing lists there.
+
+8. I think I've found a bug. What should I do?
+
+   By far and a way, the most common problems people have with TuxOnIce
+   related to drivers not having adequate power management support. In this
+   case, it is not a bug with TuxOnIce, but we can still help you. As we
+   mentioned above, such issues can usually be worked around by building the
+   functionality as modules and unloading them while hibernating. Please visit
+   the Wiki for up-to-date lists of known issues and work arounds.
+
+   If this information doesn't help, try running:
+
+   hibernate --bug-report
+
+   ..and sending the output to the users mailing list.
+
+   Good information on how to provide us with useful information from an
+   oops is found in the file REPORTING-BUGS, in the top level directory
+   of the kernel tree. If you get an oops, please especially note the
+   information about running what is printed on the screen through ksymoops.
+   The raw information is useless.
+
+9. When will XXX be supported?
+
+   If there's a feature missing from TuxOnIce that you'd like, feel free to
+   ask. We try to be obliging, within reason.
+
+   Patches are welcome. Please send to the list.
+
+10. How does it work?
+
+   TuxOnIce does its work in a number of steps.
+
+   a. Freezing system activity.
+
+   The first main stage in hibernating is to stop all other activity. This is
+   achieved in stages. Processes are considered in fours groups, which we will
+   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
+   flag, kernel threads without this flag, userspace processes with the
+   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
+   untouched by the refrigerator code. They are allowed to run during hibernating
+   and resuming, and are used to support user interaction, storage access or the
+   like. Other kernel threads (those unneeded while hibernating) are frozen last.
+   This leaves us with userspace processes that need to be frozen. When a
+   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
+   that process for the duration of that call. Processes that have this flag are
+   frozen after processes without it, so that we can seek to ensure that dirty
+   data is synced to disk as quickly as possible in a situation where other
+   processes may be submitting writes at the same time. Freezing the processes
+   that are submitting data stops new I/O from being submitted. Syncthreads can
+   then cleanly finish their work. So the order is:
+
+   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
+   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
+   - Kernel processes without PF_NOFREEZE.
+
+   b. Eating memory.
+
+   For a successful hibernation cycle, you need to have enough disk space to store the
+   image and enough memory for the various limitations of TuxOnIce's
+   algorithm. You can also specify a maximum image size. In order to attain
+   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
+   processes, the constraints aren't met, TuxOnIce will thaw all the
+   other processes and begin to eat memory until its calculations indicate
+   the constraints are met. It will then freeze processes again and recheck
+   its calculations.
+
+   c. Allocation of storage.
+
+   Next, TuxOnIce allocates the storage that will be used to save
+   the image.
+
+   The core of TuxOnIce knows nothing about how or where pages are stored. We
+   therefore request the active allocator (remember you might have compiled in
+   more than one!) to allocate enough storage for our expect image size. If
+   this request cannot be fulfilled, we eat more memory and try again. If it
+   is fulfiled, we seek to allocate additional storage, just in case our
+   expected compression ratio (if any) isn't achieved. This time, however, we
+   just continue if we can't allocate enough storage.
+
+   If these calls to our allocator change the characteristics of the image
+   such that we haven't allocated enough memory, we also loop. (The allocator
+   may well need to allocate space for its storage information).
+
+   d. Write the first part of the image.
+
+   TuxOnIce stores the image in two sets of pages called 'pagesets'.
+   Pageset 2 contains pages on the active and inactive lists; essentially
+   the page cache. Pageset 1 contains all other pages, including the kernel.
+   We use two pagesets for one important reason: We need to make an atomic copy
+   of the kernel to ensure consistency of the image. Without a second pageset,
+   that would limit us to an image that was at most half the amount of memory
+   available. Using two pagesets allows us to store a full image. Since pageset
+   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
+   We can then make our atomic copy of the remaining pages using both pageset 2
+   pages and any other pages that are free. While saving both pagesets, we are
+   careful not to corrupt the image. Among other things, we use lowlevel block
+   I/O routines that don't change the pagecache contents.
+
+   The next step, then, is writing pageset 2.
+
+   e. Suspending drivers and storing processor context.
+
+   Having written pageset2, TuxOnIce calls the power management functions to
+   notify drivers of the hibernation, and saves the processor state in preparation
+   for the atomic copy of memory we are about to make.
+
+   f. Atomic copy.
+
+   At this stage, everything else but the TuxOnIce code is halted. Processes
+   are frozen or idling, drivers are quiesced and have stored (ideally and where
+   necessary) their configuration in memory we are about to atomically copy.
+   In our lowlevel architecture specific code, we have saved the CPU state.
+   We can therefore now do our atomic copy before resuming drivers etc.
+
+   g. Save the atomic copy (pageset 1).
+
+   TuxOnice can then write the atomic copy of the remaining pages. Since we
+   have copied the pages into other locations, we can continue to use the
+   normal block I/O routines without fear of corruption our image.
+
+   f. Save the image header.
+
+   Nearly there! We save our settings and other parameters needed for
+   reloading pageset 1 in an 'image header'. We also tell our allocator to
+   serialise its data at this stage, so that it can reread the image at resume
+   time.
+
+   g. Set the image header.
+
+   Finally, we edit the header at our resume= location. The signature is
+   changed by the allocator to reflect the fact that an image exists, and to
+   point to the start of that data if necessary (swap allocator).
+
+   h. Power down.
+
+   Or reboot if we're debugging and the appropriate option is selected.
+
+   Whew!
+
+   Reloading the image.
+   --------------------
+
+   Reloading the image is essentially the reverse of all the above. We load
+   our copy of pageset 1, being careful to choose locations that aren't going
+   to be overwritten as we copy it back (We start very early in the boot
+   process, so there are no other processes to quiesce here). We then copy
+   pageset 1 back to its original location in memory and restore the process
+   context. We are now running with the original kernel. Next, we reload the
+   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
+   the pageset header and restart processes. Sounds easy in comparison to
+   hibernating, doesn't it!
+
+   There is of course more to TuxOnIce than this, but this explanation
+   should be a good start. If there's interest, I'll write further
+   documentation on range pages and the low level I/O.
+
+11. Who wrote TuxOnIce?
+
+   (Answer based on the writings of Florent Chabaud, credits in files and
+   Nigel's limited knowledge; apologies to anyone missed out!)
+
+   The main developers of TuxOnIce have been...
+
+   Gabor Kuti
+   Pavel Machek
+   Florent Chabaud
+   Bernard Blackham
+   Nigel Cunningham
+
+   Significant portions of swsusp, the code in the vanilla kernel which
+   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
+   also be expressed to him.
+
+   The above mentioned developers have been aided in their efforts by a host
+   of hundreds, if not thousands of testers and people who have submitted bug
+   fixes & suggestions. Of special note are the efforts of Michael Frank, who
+   had his computers repetitively hibernate and resume for literally tens of
+   thousands of cycles and developed scripts to stress the system and test
+   TuxOnIce far beyond the point most of us (Nigel included!) would consider
+   testing. His efforts have contributed as much to TuxOnIce as any of the
+   names above.
diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
new file mode 100644
index 000000000..c0282002a
--- /dev/null
+++ b/Documentation/scheduler/sched-BFS.txt
@@ -0,0 +1,351 @@
+BFS - The Brain Fuck Scheduler by Con Kolivas.
+
+Goals.
+
+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
+completely do away with the complex designs of the past for the cpu process
+scheduler and instead implement one that is very simple in basic design.
+The main focus of BFS is to achieve excellent desktop interactivity and
+responsiveness without heuristics and tuning knobs that are difficult to
+understand, impossible to model and predict the effect of, and when tuned to
+one workload cause massive detriment to another.
+
+
+Design summary.
+
+BFS is best described as a single runqueue, O(n) lookup, earliest effective
+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
+deadline first) and my previous Staircase Deadline scheduler. Each component
+shall be described in order to understand the significance of, and reasoning for
+it. The codebase when the first stable version was released was approximately
+9000 lines less code than the existing mainline linux kernel scheduler (in
+2.6.31). This does not even take into account the removal of documentation and
+the cgroups code that is not used.
+
+Design reasoning.
+
+The single runqueue refers to the queued but not running processes for the
+entire system, regardless of the number of CPUs. The reason for going back to
+a single runqueue design is that once multiple runqueues are introduced,
+per-CPU or otherwise, there will be complex interactions as each runqueue will
+be responsible for the scheduling latency and fairness of the tasks only on its
+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
+advantage in throughput of having CPU local tasks causes other disadvantages.
+This is due to requiring a very complex balancing system to at best achieve some
+semblance of fairness across CPUs and can only maintain relatively low latency
+for tasks bound to the same CPUs, not across them. To increase said fairness
+and latency across CPUs, the advantage of local runqueue locking, which makes
+for better scalability, is lost due to having to grab multiple locks.
+
+A significant feature of BFS is that all accounting is done purely based on CPU
+used and nowhere is sleep time used in any way to determine entitlement or
+interactivity. Interactivity "estimators" that use some kind of sleep/run
+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
+tasks that aren't interactive as being so. The reason for this is that it is
+close to impossible to determine that when a task is sleeping, whether it is
+doing it voluntarily, as in a userspace application waiting for input in the
+form of a mouse click or otherwise, or involuntarily, because it is waiting for
+another thread, process, I/O, kernel activity or whatever. Thus, such an
+estimator will introduce corner cases, and more heuristics will be required to
+cope with those corner cases, introducing more corner cases and failed
+interactivity detection and so on. Interactivity in BFS is built into the design
+by virtue of the fact that tasks that are waking up have not used up their quota
+of CPU time, and have earlier effective deadlines, thereby making it very likely
+they will preempt any CPU bound task of equivalent nice level. See below for
+more information on the virtual deadline mechanism. Even if they do not preempt
+a running task, because the rr interval is guaranteed to have a bound upper
+limit on how long a task will wait for, it will be scheduled within a timeframe
+that will not cause visible interface jitter.
+
+
+Design details.
+
+Task insertion.
+
+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
+linked list. On insertion, *every* running queue is checked to see if the newly
+queued task can run on any idle queue, or preempt the lowest running task on the
+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
+latency per extra CPU the system has. In this case the lookup is, in the worst
+case scenario, O(n) where n is the number of CPUs on the system.
+
+Data protection.
+
+BFS has one single lock protecting the process local data of every task in the
+global queue. Thus every insertion, removal and modification of task data in the
+global runqueue needs to grab the global lock. However, once a task is taken by
+a CPU, the CPU has its own local data copy of the running process' accounting
+information which only that CPU accesses and modifies (such as during a
+timer tick) thus allowing the accounting data to be updated lockless. Once a
+CPU has taken a task to run, it removes it from the global queue. Thus the
+global queue only ever has, at most,
+
+	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
+
+tasks in the global queue. This value is relevant for the time taken to look up
+tasks during scheduling. This will increase if many tasks with CPU affinity set
+in their policy to limit which CPUs they're allowed to run on if they outnumber
+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
+currently running task is put back on the queue. Lookup will be described after
+the virtual deadline mechanism is explained.
+
+Virtual deadline.
+
+The key to achieving low latency, scheduling fairness, and "nice level"
+distribution in BFS is entirely in the virtual deadline mechanism. The one
+tunable in BFS is the rr_interval, or "round robin interval". This is the
+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
+tasks of the same nice level will be running for, or looking at it the other
+way around, the longest duration two tasks of the same nice level will be
+delayed for. When a task requests cpu time, it is given a quota (time_slice)
+equal to the rr_interval and a virtual deadline. The virtual deadline is
+offset from the current time in jiffies by this equation:
+
+	jiffies + (prio_ratio * rr_interval)
+
+The prio_ratio is determined as a ratio compared to the baseline of nice -20
+and increases by 10% per nice level. The deadline is a virtual one only in that
+no guarantee is placed that a task will actually be scheduled by this time, but
+it is used to compare which task should go next. There are three components to
+how a task is next chosen. First is time_slice expiration. If a task runs out
+of its time_slice, it is descheduled, the time_slice is refilled, and the
+deadline reset to that formula above. Second is sleep, where a task no longer
+is requesting CPU for whatever reason. The time_slice and deadline are _not_
+adjusted in this case and are just carried over for when the task is next
+scheduled. Third is preemption, and that is when a newly waking task is deemed
+higher priority than a currently running task on any cpu by virtue of the fact
+that it has an earlier virtual deadline than the currently running task. The
+earlier deadline is the key to which task is next chosen for the first and
+second cases. Once a task is descheduled, it is put back on the queue, and an
+O(n) lookup of all queued-but-not-running tasks is done to determine which has
+the earliest deadline and that task is chosen to receive CPU next.
+
+The CPU proportion of different nice tasks works out to be approximately the
+
+	(prio_ratio difference)^2
+
+The reason it is squared is that a task's deadline does not change while it is
+running unless it runs out of time_slice. Thus, even if the time actually
+passes the deadline of another task that is queued, it will not get CPU time
+unless the current running task deschedules, and the time "base" (jiffies) is
+constantly moving.
+
+Task lookup.
+
+BFS has 103 priority queues. 100 of these are dedicated to the static priority
+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
+scheduling). When a task of these priorities is queued, a bitmap of running
+priorities is set showing which of these priorities has tasks waiting for CPU
+time. When a CPU is made to reschedule, the lookup for the next task to get
+CPU time is performed in the following way:
+
+First the bitmap is checked to see what static priority tasks are queued. If
+any realtime priorities are found, the corresponding queue is checked and the
+first task listed there is taken (provided CPU affinity is suitable) and lookup
+is complete. If the priority corresponds to a SCHED_ISO task, they are also
+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
+stage, every task in the runlist that corresponds to that priority is checked
+to see which has the earliest set deadline, and (provided it has suitable CPU
+affinity) it is taken off the runqueue and given the CPU. If a task has an
+expired deadline, it is taken and the rest of the lookup aborted (as they are
+chosen in FIFO order).
+
+Thus, the lookup is O(n) in the worst case only, where n is as described
+earlier, as tasks may be chosen before the whole task list is looked over.
+
+
+Scalability.
+
+The major limitations of BFS will be that of scalability, as the separate
+runqueue designs will have less lock contention as the number of CPUs rises.
+However they do not scale linearly even with separate runqueues as multiple
+runqueues will need to be locked concurrently on such designs to be able to
+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
+other CPUs would be more suited. BFS has the advantage that it requires no
+balancing algorithm whatsoever, as balancing occurs by proxy simply because
+all CPUs draw off the global runqueue, in priority and deadline order. Despite
+the fact that scalability is _not_ the prime concern of BFS, it both shows very
+good scalability to smaller numbers of CPUs and is likely a more scalable design
+at these numbers of CPUs.
+
+It also has some very low overhead scalability features built into the design
+when it has been deemed their overhead is so marginal that they're worth adding.
+The first is the local copy of the running process' data to the CPU it's running
+on to allow that data to be updated lockless where possible. Then there is
+deference paid to the last CPU a task was running on, by trying that CPU first
+when looking for an idle CPU to use the next time it's scheduled. Finally there
+is the notion of cache locality beyond the last running CPU. The sched_domains
+information is used to determine the relative virtual "cache distance" that
+other CPUs have from the last CPU a task was running on. CPUs with shared
+caches, such as SMT siblings, or multicore CPUs with shared caches, are treated
+as cache local. CPUs without shared caches are treated as not cache local, and
+CPUs on different NUMA nodes are treated as very distant. This "relative cache
+distance" is used by modifying the virtual deadline value when doing lookups.
+Effectively, the deadline is unaltered between "cache local" CPUs, doubled for
+"cache distant" CPUs, and quadrupled for "very distant" CPUs. The reasoning
+behind the doubling of deadlines is as follows. The real cost of migrating a
+task from one CPU to another is entirely dependant on the cache footprint of
+the task, how cache intensive the task is, how long it's been running on that
+CPU to take up the bulk of its cache, how big the CPU cache is, how fast and
+how layered the CPU cache is, how fast a context switch is... and so on. In
+other words, it's close to random in the real world where we do more than just
+one sole workload. The only thing we can be sure of is that it's not free. So
+BFS uses the principle that an idle CPU is a wasted CPU and utilising idle CPUs
+is more important than cache locality, and cache locality only plays a part
+after that. Doubling the effective deadline is based on the premise that the
+"cache local" CPUs will tend to work on the same tasks up to double the number
+of cache local CPUs, and once the workload is beyond that amount, it is likely
+that none of the tasks are cache warm anywhere anyway. The quadrupling for NUMA
+is a value I pulled out of my arse.
+
+When choosing an idle CPU for a waking task, the cache locality is determined
+according to where the task last ran and then idle CPUs are ranked from best
+to worst to choose the most suitable idle CPU based on cache locality, NUMA
+node locality and hyperthread sibling business. They are chosen in the
+following preference (if idle):
+
+* Same core, idle or busy cache, idle threads
+* Other core, same cache, idle or busy cache, idle threads.
+* Same node, other CPU, idle cache, idle threads.
+* Same node, other CPU, busy cache, idle threads.
+* Same core, busy threads.
+* Other core, same cache, busy threads.
+* Same node, other CPU, busy threads.
+* Other node, other CPU, idle cache, idle threads.
+* Other node, other CPU, busy cache, idle threads.
+* Other node, other CPU, busy threads.
+
+This shows the SMT or "hyperthread" awareness in the design as well which will
+choose a real idle core first before a logical SMT sibling which already has
+tasks on the physical CPU.
+
+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
+However this benchmarking was performed on an earlier design that was far less
+scalable than the current one so it's hard to know how scalable it is in terms
+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
+O(n) lookup) at this stage. Note that in terms of scalability, the number of
+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
+results are very promising indeed, without needing to tweak any knobs, features
+or options. Benchmark contributions are most welcome.
+
+
+Features
+
+As the initial prime target audience for BFS was the average desktop user, it
+was designed to not need tweaking, tuning or have features set to obtain benefit
+from it. Thus the number of knobs and features has been kept to an absolute
+minimum and should not require extra user input for the vast majority of cases.
+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
+support for CGROUPS. The average user should neither need to know what these
+are, nor should they need to be using them to have good desktop behaviour.
+
+rr_interval
+
+There is only one "scheduler" tunable, the round robin interval. This can be
+accessed in
+
+	/proc/sys/kernel/rr_interval
+
+The value is in milliseconds, and the default value is set to 6 on a
+uniprocessor machine, and automatically set to a progressively higher value on
+multiprocessor machines. The reasoning behind increasing the value on more CPUs
+is that the effective latency is decreased by virtue of there being more CPUs on
+BFS (for reasons explained above), and increasing the value allows for less
+cache contention and more throughput. Valid values are from 1 to 1000
+Decreasing the value will decrease latencies at the cost of decreasing
+throughput, while increasing it will improve throughput, but at the cost of
+worsening latencies. The accuracy of the rr interval is limited by HZ resolution
+of the kernel configuration. Thus, the worst case latencies are usually slightly
+higher than this actual value. The default value of 6 is not an arbitrary one.
+It is based on the fact that humans can detect jitter at approximately 7ms, so
+aiming for much lower latencies is pointless under most circumstances. It is
+worth noting this fact when comparing the latency performance of BFS to other
+schedulers. Worst case latencies being higher than 7ms are far worse than
+average latencies not being in the microsecond range.
+
+Isochronous scheduling.
+
+Isochronous scheduling is a unique scheduling policy designed to provide
+near-real-time performance to unprivileged (ie non-root) users without the
+ability to starve the machine indefinitely. Isochronous tasks (which means
+"same time") are set using, for example, the schedtool application like so:
+
+	schedtool -I -e amarok
+
+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
+is that it has a priority level between true realtime tasks and SCHED_NORMAL
+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
+rate). However if ISO tasks run for more than a tunable finite amount of time,
+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
+time is the percentage of _total CPU_ available across the machine, configurable
+as a percentage in the following "resource handling" tunable (as opposed to a
+scheduler tunable):
+
+	/proc/sys/kernel/iso_cpu
+
+and is set to 70% by default. It is calculated over a rolling 5 second average
+Because it is the total CPU available, it means that on a multi CPU machine, it
+is possible to have an ISO task running as realtime scheduling indefinitely on
+just one CPU, as the other CPUs will be available. Setting this to 100 is the
+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
+ability to run any pseudo-realtime tasks.
+
+A feature of BFS is that it detects when an application tries to obtain a
+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
+appropriate privileges to use those policies. When it detects this, it will
+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
+Because some applications constantly set their policy as well as their nice
+level, there is potential for them to undo the override specified by the user
+on the command line of setting the policy to SCHED_ISO. To counter this, once
+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
+processes and threads will also inherit the ISO policy.
+
+Idleprio scheduling.
+
+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
+ultra low priority tasks to be run in the background that have virtually no
+effect on the foreground tasks. This is ideally suited to distributed computing
+clients (like setiathome, folding, mprime etc) but can also be used to start
+a video encode or so on without any slowdown of other tasks. To avoid this
+policy from grabbing shared resources and holding them indefinitely, if it
+detects a state where the task is waiting on I/O, the machine is about to
+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
+
+	schedtool -D -e ./mprime
+
+Subtick accounting.
+
+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
+the accounting is done by simply determining what is happening at the precise
+moment a timer tick fires off. This becomes increasingly inaccurate as the
+timer tick frequency (HZ) is lowered. It is possible to create an application
+which uses almost 100% CPU, yet by being descheduled at the right time, records
+zero CPU usage. While the main problem with this is that there are possible
+security implications, it is also difficult to determine how much CPU a task
+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
+where possible, to determine real CPU usage. This is not entirely reliable, but
+is far more likely to produce accurate CPU usage data than the existing designs
+and will not show tasks as consuming no CPU usage when they actually are. Thus,
+the amount of CPU reported as being used by BFS will more accurately represent
+how much CPU the task itself is using (as is shown for example by the 'time'
+application), so the reported values may be quite different to other schedulers.
+Values reported as the 'load' are more prone to problems with this design, but
+per process values are closer to real usage. When comparing throughput of BFS
+to other designs, it is important to compare the actual completed work in terms
+of total wall clock time taken and total work done, rather than the reported
+"cpu usage".
+
+
+Con Kolivas <kernel@kolivas.org> Fri Aug 27 2010
diff --git a/Documentation/scheduler/sched-MuQSS.txt b/Documentation/scheduler/sched-MuQSS.txt
new file mode 100644
index 000000000..ae28b85c9
--- /dev/null
+++ b/Documentation/scheduler/sched-MuQSS.txt
@@ -0,0 +1,373 @@
+MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.
+
+MuQSS is a per-cpu runqueue variant of the original BFS scheduler with
+one 8 level skiplist per runqueue, and fine grained locking for much more
+scalability.
+
+
+Goals.
+
+The goal of the Multiple Queue Skiplist Scheduler, referred to as MuQSS from
+here on (pronounced mux) is to completely do away with the complex designs of
+the past for the cpu process scheduler and instead implement one that is very
+simple in basic design. The main focus of MuQSS is to achieve excellent desktop
+interactivity and responsiveness without heuristics and tuning knobs that are
+difficult to understand, impossible to model and predict the effect of, and when
+tuned to one workload cause massive detriment to another, while still being
+scalable to many CPUs and processes.
+
+
+Design summary.
+
+MuQSS is best described as per-cpu multiple runqueue, O(log n) insertion, O(1)
+lookup, earliest effective virtual deadline first tickless design, loosely based
+on EEVDF (earliest eligible virtual deadline first) and my previous Staircase
+Deadline scheduler, and evolved from the single runqueue O(n) BFS scheduler.
+Each component shall be described in order to understand the significance of,
+and reasoning for it.
+
+
+Design reasoning.
+
+In BFS, the use of a single runqueue across all CPUs meant that each CPU would
+need to scan the entire runqueue looking for the process with the earliest
+deadline and schedule that next, regardless of which CPU it originally came
+from. This made BFS deterministic with respect to latency and provided
+guaranteed latencies dependent on number of processes and CPUs. The single
+runqueue, however, meant that all CPUs would compete for the single lock
+protecting it, which would lead to increasing lock contention as the number of
+CPUs rose and appeared to limit scalability of common workloads beyond 16
+logical CPUs. Additionally, the O(n) lookup of the runqueue list obviously
+increased overhead proportionate to the number of queued proecesses and led to
+cache thrashing while iterating over the linked list.
+
+MuQSS is an evolution of BFS, designed to maintain the same scheduling
+decision mechanism and be virtually deterministic without relying on the
+constrained design of the single runqueue by splitting out the single runqueue
+to be per-CPU and use skiplists instead of linked lists.
+
+The original reason for going back to a single runqueue design for BFS was that
+once multiple runqueues are introduced, per-CPU or otherwise, there will be
+complex interactions as each runqueue will be responsible for the scheduling
+latency and fairness of the tasks only on its own runqueue, and to achieve
+fairness and low latency across multiple CPUs, any advantage in throughput of
+having CPU local tasks causes other disadvantages. This is due to requiring a
+very complex balancing system to at best achieve some semblance of fairness
+across CPUs and can only maintain relatively low latency for tasks bound to the
+same CPUs, not across them. To increase said fairness and latency across CPUs,
+the advantage of local runqueue locking, which makes for better scalability, is
+lost due to having to grab multiple locks.
+
+MuQSS works around the problems inherent in multiple runqueue designs by
+making its skip lists priority ordered and through novel use of lockless
+examination of each other runqueue it can decide if it should take the earliest
+deadline task from another runqueue for latency reasons, or for CPU balancing
+reasons. It still does not have a balancing system, choosing to allow the
+next task scheduling decision and task wakeup CPU choice to allow balancing to
+happen by virtue of its choices.
+
+As a further evolution of the design, MuQSS normally configures sharing of
+runqueues in a logical fashion for when CPU resources are shared for improved
+latency and throughput. By default it shares runqueues and locks between
+multicore siblings. Optionally it can be configured to run with sharing of
+SMT siblings only, all SMP packages or no sharing at all. Additionally it can
+be selected at boot time.
+
+
+Design details.
+
+Custom skip list implementation:
+
+To avoid the overhead of building up and tearing down skip list structures,
+the variant used by MuQSS has a number of optimisations making it specific for
+its use case in the scheduler. It uses static arrays of 8 'levels' instead of
+building up and tearing down structures dynamically. This makes each runqueue
+only scale O(log N) up to 64k tasks. However as there is one runqueue per CPU
+it means that it scales O(log N) up to 64k x number of logical CPUs which is
+far beyond the realistic task limits each CPU could handle. By being 8 levels
+it also makes the array exactly one cacheline in size. Additionally, each
+skip list node is bidirectional making insertion and removal amortised O(1),
+being O(k) where k is 1-8. Uniquely, we are only ever interested in the very
+first entry in each list at all times with MuQSS, so there is never a need to
+do a search and thus look up is always O(1). In interactive mode, the queues
+will be searched beyond their first entry if the first task is not suitable
+for affinity or SMT nice reasons.
+
+Task insertion:
+
+MuQSS inserts tasks into a per CPU runqueue as an O(log N) insertion into
+a custom skip list as described above (based on the original design by William
+Pugh). Insertion is ordered in such a way that there is never a need to do a
+search by ordering tasks according to static priority primarily, and then
+virtual deadline at the time of insertion.
+
+Niffies:
+
+Niffies are a monotonic forward moving timer not unlike the "jiffies" but are
+of nanosecond resolution. Niffies are calculated per-runqueue from the high
+resolution TSC timers, and in order to maintain fairness are synchronised
+between CPUs whenever both runqueues are locked concurrently.
+
+Virtual deadline:
+
+The key to achieving low latency, scheduling fairness, and "nice level"
+distribution in MuQSS is entirely in the virtual deadline mechanism. The one
+tunable in MuQSS is the rr_interval, or "round robin interval". This is the
+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
+tasks of the same nice level will be running for, or looking at it the other
+way around, the longest duration two tasks of the same nice level will be
+delayed for. When a task requests cpu time, it is given a quota (time_slice)
+equal to the rr_interval and a virtual deadline. The virtual deadline is
+offset from the current time in niffies by this equation:
+
+	niffies + (prio_ratio * rr_interval)
+
+The prio_ratio is determined as a ratio compared to the baseline of nice -20
+and increases by 10% per nice level. The deadline is a virtual one only in that
+no guarantee is placed that a task will actually be scheduled by this time, but
+it is used to compare which task should go next. There are three components to
+how a task is next chosen. First is time_slice expiration. If a task runs out
+of its time_slice, it is descheduled, the time_slice is refilled, and the
+deadline reset to that formula above. Second is sleep, where a task no longer
+is requesting CPU for whatever reason. The time_slice and deadline are _not_
+adjusted in this case and are just carried over for when the task is next
+scheduled. Third is preemption, and that is when a newly waking task is deemed
+higher priority than a currently running task on any cpu by virtue of the fact
+that it has an earlier virtual deadline than the currently running task. The
+earlier deadline is the key to which task is next chosen for the first and
+second cases.
+
+The CPU proportion of different nice tasks works out to be approximately the
+
+	(prio_ratio difference)^2
+
+The reason it is squared is that a task's deadline does not change while it is
+running unless it runs out of time_slice. Thus, even if the time actually
+passes the deadline of another task that is queued, it will not get CPU time
+unless the current running task deschedules, and the time "base" (niffies) is
+constantly moving.
+
+Task lookup:
+
+As tasks are already pre-ordered according to anticipated scheduling order in
+the skip lists, lookup for the next suitable task per-runqueue is always a
+matter of simply selecting the first task in the 0th level skip list entry.
+In order to maintain optimal latency and fairness across CPUs, MuQSS does a
+novel examination of every other runqueue in cache locality order, choosing the
+best task across all runqueues. This provides near-determinism of how long any
+task across the entire system may wait before receiving CPU time. The other
+runqueues are first examine lockless and then trylocked to minimise the
+potential lock contention if they are likely to have a suitable better task.
+Each other runqueue lock is only held for as long as it takes to examine the
+entry for suitability. In "interactive" mode, the default setting, MuQSS will
+look for the best deadline task across all CPUs, while in !interactive mode,
+it will only select a better deadline task from another CPU if it is more
+heavily laden than the current one.
+
+Lookup is therefore O(k) where k is number of CPUs.
+
+
+Latency.
+
+Through the use of virtual deadlines to govern the scheduling order of normal
+tasks, queue-to-activation latency per runqueue is guaranteed to be bound by
+the rr_interval tunable which is set to 6ms by default. This means that the
+longest a CPU bound task will wait for more CPU is proportional to the number
+of running tasks and in the common case of 0-2 running tasks per CPU, will be
+under the 7ms threshold for human perception of jitter. Additionally, as newly
+woken tasks will have an early deadline from their previous runtime, the very
+tasks that are usually latency sensitive will have the shortest interval for
+activation, usually preempting any existing CPU bound tasks.
+
+Tickless expiry:
+
+A feature of MuQSS is that it is not tied to the resolution of the chosen tick
+rate in Hz, instead depending entirely on the high resolution timers where
+possible for sub-millisecond accuracy on timeouts regarless of the underlying
+tick rate. This allows MuQSS to be run with the low overhead of low Hz rates
+such as 100 by default, benefiting from the improved throughput and lower
+power usage it provides. Another advantage of this approach is that in
+combination with the Full No HZ option, which disables ticks on running task
+CPUs instead of just idle CPUs, the tick can be disabled at all times
+regardless of how many tasks are running instead of being limited to just one
+running task. Note that this option is NOT recommended for regular desktop
+users.
+
+
+Scalability and balancing.
+
+Unlike traditional approaches where balancing is a combination of CPU selection
+at task wakeup and intermittent balancing based on a vast array of rules set
+according to architecture, busyness calculations and special case management,
+MuQSS indirectly balances on the fly at task wakeup and next task selection.
+During initialisation, MuQSS creates a cache coherency ordered list of CPUs for
+each logical CPU and uses this to aid task/CPU selection when CPUs are busy.
+Additionally it selects any idle CPUs, if they are available, at any time over
+busy CPUs according to the following preference:
+
+ * Same thread, idle or busy cache, idle or busy threads
+ * Other core, same cache, idle or busy cache, idle threads.
+ * Same node, other CPU, idle cache, idle threads.
+ * Same node, other CPU, busy cache, idle threads.
+ * Other core, same cache, busy threads.
+ * Same node, other CPU, busy threads.
+ * Other node, other CPU, idle cache, idle threads.
+ * Other node, other CPU, busy cache, idle threads.
+ * Other node, other CPU, busy threads.
+
+Mux is therefore SMT, MC and Numa aware without the need for extra
+intermittent balancing to maintain CPUs busy and make the most of cache
+coherency.
+
+
+Features
+
+As the initial prime target audience for MuQSS was the average desktop user, it
+was designed to not need tweaking, tuning or have features set to obtain benefit
+from it. Thus the number of knobs and features has been kept to an absolute
+minimum and should not require extra user input for the vast majority of cases.
+There are 3 optional tunables, and 2 extra scheduling policies. The rr_interval,
+interactive, and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO
+policies. In addition to this, MuQSS also uses sub-tick accounting. What MuQSS
+does _not_ now feature is support for CGROUPS. The average user should neither
+need to know what these are, nor should they need to be using them to have good
+desktop behaviour. However since some applications refuse to work without
+cgroups, one can enable them with MuQSS as a stub and the filesystem will be
+created which will allow the applications to work.
+
+rr_interval:
+
+	/proc/sys/kernel/rr_interval
+
+The value is in milliseconds, and the default value is set to 6. Valid values
+are from 1 to 1000 Decreasing the value will decrease latencies at the cost of
+decreasing throughput, while increasing it will improve throughput, but at the
+cost of worsening latencies. It is based on the fact that humans can detect
+jitter at approximately 7ms, so aiming for much lower latencies is pointless
+under most circumstances. It is worth noting this fact when comparing the
+latency performance of MuQSS to other schedulers. Worst case latencies being
+higher than 7ms are far worse than average latencies not being in the
+microsecond range.
+
+interactive:
+
+	/proc/sys/kernel/interactive
+
+The value is a simple boolean of 1 for on and 0 for off and is set to on by
+default. Disabling this will disable the near-determinism of MuQSS when
+selecting the next task by not examining all CPUs for the earliest deadline
+task, or which CPU to wake to, instead prioritising CPU balancing for improved
+throughput. Latency will still be bound by rr_interval, but on a per-CPU basis
+instead of across the whole system.
+
+Runqueue sharing.
+
+By default MuQSS chooses to share runqueue resources (specifically the skip
+list and locking) between multicore siblings. It is configurable at build time
+to select between None, SMT, MC and SMP, corresponding to no sharing, sharing
+only between simultaneous mulithreading siblings, multicore siblings, or
+symmetric multiprocessing physical packages. Additionally it can be se at
+bootime with the use of the rqshare parameter. The reason for configurability
+is that some architectures have CPUs with many multicore siblings (>= 16)
+where it may be detrimental to throughput to share runqueues and another
+sharing option may be desirable. Additionally, more sharing than usual can
+improve latency on a system-wide level at the expense of throughput if desired.
+
+The options are:
+none, smt, mc, smp
+
+eg:
+	rqshare=mc
+
+Isochronous scheduling:
+
+Isochronous scheduling is a unique scheduling policy designed to provide
+near-real-time performance to unprivileged (ie non-root) users without the
+ability to starve the machine indefinitely. Isochronous tasks (which means
+"same time") are set using, for example, the schedtool application like so:
+
+	schedtool -I -e amarok
+
+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
+is that it has a priority level between true realtime tasks and SCHED_NORMAL
+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
+rate). However if ISO tasks run for more than a tunable finite amount of time,
+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
+time is the percentage of CPU available per CPU, configurable as a percentage in
+the following "resource handling" tunable (as opposed to a scheduler tunable):
+
+iso_cpu:
+
+	/proc/sys/kernel/iso_cpu
+
+and is set to 70% by default. It is calculated over a rolling 5 second average
+Because it is the total CPU available, it means that on a multi CPU machine, it
+is possible to have an ISO task running as realtime scheduling indefinitely on
+just one CPU, as the other CPUs will be available. Setting this to 100 is the
+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
+ability to run any pseudo-realtime tasks.
+
+A feature of MuQSS is that it detects when an application tries to obtain a
+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
+appropriate privileges to use those policies. When it detects this, it will
+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
+
+
+Idleprio scheduling:
+
+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
+ultra low priority tasks to be run in the background that have virtually no
+effect on the foreground tasks. This is ideally suited to distributed computing
+clients (like setiathome, folding, mprime etc) but can also be used to start a
+video encode or so on without any slowdown of other tasks. To avoid this policy
+from grabbing shared resources and holding them indefinitely, if it detects a
+state where the task is waiting on I/O, the machine is about to suspend to ram
+and so on, it will transiently schedule them as SCHED_NORMAL. Once a task has
+been scheduled as IDLEPRIO, it cannot be put back to SCHED_NORMAL without
+superuser privileges since it is effectively a lower scheduling policy. Tasks
+can be set to start as SCHED_IDLEPRIO with the schedtool command like so:
+
+schedtool -D -e ./mprime
+
+Subtick accounting:
+
+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
+the accounting is done by simply determining what is happening at the precise
+moment a timer tick fires off. This becomes increasingly inaccurate as the timer
+tick frequency (HZ) is lowered. It is possible to create an application which
+uses almost 100% CPU, yet by being descheduled at the right time, records zero
+CPU usage. While the main problem with this is that there are possible security
+implications, it is also difficult to determine how much CPU a task really does
+use. Mux uses sub-tick accounting from the TSC clock to determine real CPU
+usage. Thus, the amount of CPU reported as being used by MuQSS will more
+accurately represent how much CPU the task itself is using (as is shown for
+example by the 'time' application), so the reported values may be quite
+different to other schedulers. When comparing throughput of MuQSS to other
+designs, it is important to compare the actual completed work in terms of total
+wall clock time taken and total work done, rather than the reported "cpu usage".
+
+Symmetric MultiThreading (SMT) aware nice:
+
+SMT, a.k.a. hyperthreading, is a very common feature on modern CPUs. While the
+logical CPU count rises by adding thread units to each CPU core, allowing more
+than one task to be run simultaneously on the same core, the disadvantage of it
+is that the CPU power is shared between the tasks, not summating to the power
+of two CPUs. The practical upshot of this is that two tasks running on
+separate threads of the same core run significantly slower than if they had one
+core each to run on. While smart CPU selection allows each task to have a core
+to itself whenever available (as is done on MuQSS), it cannot offset the
+slowdown that occurs when the cores are all loaded and only a thread is left.
+Most of the time this is harmless as the CPU is effectively overloaded at this
+point and the extra thread is of benefit. However when running a niced task in
+the presence of an un-niced task (say nice 19 v nice 0), the nice task gets
+precisely the same amount of CPU power as the unniced one. MuQSS has an
+optional configuration feature known as SMT-NICE which selectively idles the
+secondary niced thread for a period proportional to the nice difference,
+allowing CPU distribution according to nice level to be maintained, at the
+expense of a small amount of extra overhead. If this is configured in on a
+machine without SMT threads, the overhead is minimal.
+
+
+Con Kolivas <kernel@kolivas.org> Sat, 29th October 2016
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 412314eeb..eeb966fc7 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -40,6 +40,7 @@ show up in /proc/sys/kernel:
 - hung_task_timeout_secs
 - hung_task_warnings
 - kexec_load_disabled
+- iso_cpu
 - kptr_restrict
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
@@ -74,6 +75,7 @@ show up in /proc/sys/kernel:
 - randomize_va_space
 - real-root-dev               ==> Documentation/admin-guide/initrd.rst
 - reboot-cmd                  [ SPARC only ]
+- rr_interval
 - rtsig-max
 - rtsig-nr
 - seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
@@ -95,6 +97,7 @@ show up in /proc/sys/kernel:
 - unknown_nmi_panic
 - watchdog
 - watchdog_thresh
+- yield_type
 - version
 
 ==============================================================
@@ -411,6 +414,16 @@ When kptr_restrict is set to (2), kernel pointers printed using
 
 ==============================================================
 
+iso_cpu: (MuQSS CPU scheduler only).
+
+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
+run effectively at realtime priority, averaged over a rolling five
+seconds over the -whole- system, meaning all cpus.
+
+Set to 70 (percent) by default.
+
+==============================================================
+
 l2cr: (PPC only)
 
 This flag controls the L2 cache of G3 processor boards. If
@@ -837,6 +850,20 @@ rebooting. ???
 
 ==============================================================
 
+rr_interval: (MuQSS CPU scheduler only)
+
+This is the smallest duration that any cpu process scheduling unit
+will run for. Increasing this value can increase throughput of cpu
+bound tasks substantially but at the expense of increased latencies
+overall. Conversely decreasing it will decrease average and maximum
+latencies but at the expense of throughput. This value is in
+milliseconds and the default value chosen depends on the number of
+cpus available at scheduler initialisation with a minimum of 6.
+
+Valid values are from 1-1000.
+
+==============================================================
+
 rtsig-max & rtsig-nr:
 
 The file rtsig-max can be used to tune the maximum number
@@ -1075,3 +1102,13 @@ The softlockup threshold is (2 * watchdog_thresh). Setting this
 tunable to zero will disable lockup detection altogether.
 
 ==============================================================
+
+yield_type: (MuQSS CPU scheduler only)
+
+This determines what type of yield calls to sched_yield will perform.
+
+ 0: No yield.
+ 1: Yield only to better priority/deadline tasks. (default)
+ 2: Expire timeslice and recalculate deadline.
+
+==============================================================
diff --git a/Documentation/tp_smapi.txt b/Documentation/tp_smapi.txt
new file mode 100644
index 000000000..a249678a8
--- /dev/null
+++ b/Documentation/tp_smapi.txt
@@ -0,0 +1,275 @@
+tp_smapi version 0.42
+IBM ThinkPad hardware functions driver
+
+Author:  Shem Multinymous <multinymous@gmail.com>
+Project: http://sourceforge.net/projects/tpctl
+Wiki:    http://thinkwiki.org/wiki/tp_smapi
+List:    linux-thinkpad@linux-thinkpad.org
+         (http://mailman.linux-thinkpad.org/mailman/listinfo/linux-thinkpad)
+
+Description
+-----------
+
+ThinkPad laptops include a proprietary interface called SMAPI BIOS
+(System Management Application Program Interface) which provides some
+hardware control functionality that is not accessible by other means.
+
+This driver exposes some features of the SMAPI BIOS through a sysfs
+interface. It is suitable for newer models, on which SMAPI is invoked
+through IO port writes. Older models use a different SMAPI interface;
+for those, try the "thinkpad" module from the "tpctl" package.
+
+WARNING:
+This driver uses undocumented features and direct hardware access.
+It thus cannot be guaranteed to work, and may cause arbitrary damage
+(especially on models it wasn't tested on).
+
+
+Module parameters
+-----------------
+
+thinkpad_ec module:
+  force_io=1 lets thinkpad_ec load on some recent ThinkPad models
+  (e.g., T400 and T500) whose BIOS's ACPI DSDT reserves the ports we need.
+tp_smapi module:
+  debug=1    enables verbose dmesg output.
+
+
+Usage
+-----
+
+Control of battery charging thresholds (in percents of current full charge
+capacity):
+
+# echo 40 > /sys/devices/platform/smapi/BAT0/start_charge_thresh
+# echo 70 > /sys/devices/platform/smapi/BAT0/stop_charge_thresh
+# cat /sys/devices/platform/smapi/BAT0/*_charge_thresh
+
+    (This is useful since Li-Ion batteries wear out much faster at very
+     high or low charge levels. The driver will also keeps the thresholds
+     across suspend-to-disk with AC disconnected; this isn't done
+     automatically by the hardware.)
+
+Inhibiting battery charging for 17 minutes (overrides thresholds):
+
+# echo 17 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+# echo 0  > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes  # stop
+# cat /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+
+    (This can be used to control which battery is charged when using an
+     Ultrabay battery.)
+
+Forcing battery discharging even if AC power available:
+
+# echo 1 > /sys/devices/platform/smapi/BAT0/force_discharge  # start discharge
+# echo 0 > /sys/devices/platform/smapi/BAT0/force_discharge  # stop discharge
+# cat /sys/devices/platform/smapi/BAT0/force_discharge
+
+    (When AC is connected, forced discharging will automatically stop
+     when battery is fully depleted -- this is useful for calibration.
+     Also, this attribute can be used to control which battery is discharged
+     when both a system battery and an Ultrabay battery are connected.)
+
+Misc read-only battery status attributes (see note about HDAPS below):
+
+/sys/devices/platform/smapi/BAT0/installed   # 0 or 1
+/sys/devices/platform/smapi/BAT0/state       # idle/charging/discharging
+/sys/devices/platform/smapi/BAT0/cycle_count # integer counter
+/sys/devices/platform/smapi/BAT0/current_now # instantaneous current
+/sys/devices/platform/smapi/BAT0/current_avg # last minute average
+/sys/devices/platform/smapi/BAT0/power_now   # instantaneous power
+/sys/devices/platform/smapi/BAT0/power_avg   # last minute average
+/sys/devices/platform/smapi/BAT0/last_full_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/remaining_percent          # remaining percent of energy (set by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_percent_error    # error range of remaing_percent (not reset by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_running_time     # in minutes, by last minute average power
+/sys/devices/platform/smapi/BAT0/remaining_running_time_now # in minutes, by instantenous power
+/sys/devices/platform/smapi/BAT0/remaining_charging_time    # in minutes
+/sys/devices/platform/smapi/BAT0/remaining_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/design_capacity            # in mWh
+/sys/devices/platform/smapi/BAT0/voltage           # in mV
+/sys/devices/platform/smapi/BAT0/design_voltage    # in mV
+/sys/devices/platform/smapi/BAT0/charging_max_current  # max charging current
+/sys/devices/platform/smapi/BAT0/charging_max_voltage  # max charging voltage
+/sys/devices/platform/smapi/BAT0/group{0,1,2,3}_voltage # see below
+/sys/devices/platform/smapi/BAT0/manufacturer      # string
+/sys/devices/platform/smapi/BAT0/model             # string
+/sys/devices/platform/smapi/BAT0/barcoding         # string
+/sys/devices/platform/smapi/BAT0/chemistry         # string
+/sys/devices/platform/smapi/BAT0/serial            # integer
+/sys/devices/platform/smapi/BAT0/manufacture_date  # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/first_use_date    # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/temperature  # in milli-Celsius
+/sys/devices/platform/smapi/BAT0/dump         # see below
+/sys/devices/platform/smapi/ac_connected      # 0 or 1
+
+The BAT0/group{0,1,2,3}_voltage attribute refers to the separate cell groups
+in each battery. For example, on the ThinkPad 600, X3x, T4x and R5x models,
+the battery contains 3 cell groups in series, where each group consisting of 2
+or 3 cells  connected in parallel. The voltage of each group is given by these
+attributes, and their sum (roughly) equals the "voltage" attribute.
+(The effective performance of the battery is determined by the weakest group,
+i.e., the one those voltage changes most rapidly during dis/charging.)
+
+The "BAT0/dump" attribute gives a a hex dump of the raw status data, which
+contains additional data now in the above (if you can figure it out). Some
+unused values are autodetected and replaced by "--":
+
+In all of the above, replace BAT0 with BAT1 to address the 2nd battery (e.g.
+in the UltraBay).
+
+
+Raw SMAPI calls:
+
+/sys/devices/platform/smapi/smapi_request
+This performs raw SMAPI calls. It uses a bad interface that cannot handle
+multiple simultaneous access. Don't touch it, it's for development only.
+If you did touch it, you would so something like
+# echo '211a 100 0 0' > /sys/devices/platform/smapi/smapi_request
+# cat /sys/devices/platform/smapi/smapi_request
+and notice that in the output "211a 34b b2 0 0 0 'OK'", the "4b" in the 2nd
+value, converted to decimal is 75: the current charge stop threshold.
+
+
+Model-specific status
+---------------------
+
+Works (at least partially) on the following ThinkPad model:
+* A30
+* G41
+* R40, R50p, R51, R52
+* T23, T40, T40p, T41, T41p, T42, T42p, T43, T43p, T60, T61, T400, T410, T420 (partially)
+* X24, X31, X32, X40, X41, X60, X61, X200, X201, X220 (partially)
+* Z60t, Z61m
+
+Does not work on:
+* X230 and newer
+* T430 and newer
+* Any ThinkPad Edge
+* Any ThinkPad Yoga
+* Any ThinkPad L series
+* Any ThinkPad P series
+
+Not all functions are available on all models; for detailed status, see:
+  http://thinkwiki.org/wiki/tp_smapi
+
+Please report success/failure by e-mail or on the Wiki.
+If you get a "not implemented" or "not supported" message, your laptop
+probably just can't do that (at least not via the SMAPI BIOS).
+For negative reports, follow the bug reporting guidelines below.
+If you send me the necessary technical data (i.e., SMAPI function
+interfaces), I will support additional models.
+
+
+Additional HDAPS features
+-------------------------
+
+The modified hdaps driver has several improvements on the one in mainline
+(beyond resolving the conflict with thinkpad_ec and tp_smapi):
+
+- Fixes reliability and improves support for recent ThinkPad models
+  (especially *60 and newer). Unlike the mainline driver, the modified hdaps
+  correctly follows the Embedded Controller communication protocol.
+
+- Extends the "invert" parameter to cover all possible axis orientations.
+  The possible values are as follows.
+  Let X,Y denote the hardware readouts.
+  Let R denote the laptop's roll (tilt left/right).
+  Let P denote the laptop's pitch (tilt forward/backward).
+    invert=0:   R= X  P= Y   (same as mainline)
+    invert=1:   R=-X  P=-Y   (same as mainline)
+    invert=2:   R=-X  P= Y   (new)
+    invert=3:   R= X  P=-Y   (new)
+    invert=4:   R= Y  P= X   (new)
+    invert=5:   R=-Y  P=-X   (new)
+    invert=6:   R=-Y  P= X   (new)
+    invert=7:   R= Y  P=-X   (new)
+  It's probably easiest to just try all 8 possibilities and see which yields
+  correct results (e.g., in the hdaps-gl visualisation).
+
+- Adds a whitelist which automatically sets the correct axis orientation for
+  some models. If the value for your model is wrong or missing, you can override
+  it using the "invert" parameter. Please also update the tables at
+  http://www.thinkwiki.org/wiki/tp_smapi and
+  http://www.thinkwiki.org/wiki/List_of_DMI_IDs
+  and submit a patch for the whitelist in hdaps.c.
+
+- Provides new attributes:
+  /sys/devices/platform/hdaps/sampling_rate:
+    This determines the frequency at which the host queries the embedded
+    controller for accelerometer data (and informs the hdaps input devices).
+    Default=50.
+  /sys/devices/platform/hdaps/oversampling_ratio:
+    When set to X, the embedded controller is told to do physical accelerometer
+    measurements at a rate that is X times higher than the rate at which
+    the driver reads those measurements (i.e., X*sampling_rate). This
+    makes the readouts from the embedded controller more fresh, and is also
+    useful for the running average filter (see next). Default=5
+  /sys/devices/platform/hdaps/running_avg_filter_order:
+    When set to X, reported readouts will be the average of the last X physical
+    accelerometer measurements. Current firmware allows 1<=X<=8. Setting to a
+    high value decreases readout fluctuations. The averaging is handled by the
+    embedded controller, so no CPU resources are used. Higher values make the
+    readouts smoother, since it averages out both sensor noise (good) and abrupt
+    changes (bad). Default=2.
+
+- Provides a second input device, which publishes the raw accelerometer
+  measurements (without the fuzzing needed for joystick emulation). This input
+  device can be matched by a udev rule such as the following (all on one line):
+    KERNEL=="event[0-9]*", ATTRS{phys}=="hdaps/input1",
+    ATTRS{modalias}=="input:b0019v1014p5054e4801-*",
+    SYMLINK+="input/hdaps/accelerometer-event
+
+A new version of the hdapsd userspace daemon, which uses the input device
+interface instead of polling sysfs, is available seprately. Using this reduces
+the total interrupts per second generated by hdaps+hdapsd (on tickless kernels)
+to 50, down from a value that fluctuates between 50 and 100. Set the
+sampling_rate sysfs attribute to a lower value to further reduce interrupts,
+at the expense of response latency.
+
+Licensing note: all my changes to the HDAPS driver are licensed under the
+GPL version 2 or, at your option and to the extent allowed by derivation from
+prior works, any later version. My version of hdaps is derived work from the
+mainline version, which at the time of writing is available only under
+GPL version 2.
+
+Bug reporting
+-------------
+
+Mail <multinymous@gmail.com>. Please include:
+* Details about your model,
+* Relevant "dmesg" output. Make sure thinkpad_ec and tp_smapi are loaded with
+  the "debug=1" parameter (e.g., use "make load HDAPS=1 DEBUG=1").
+* Output of "dmidecode | grep -C5 Product"
+* Does the failed functionality works under Windows?
+
+
+More about SMAPI
+----------------
+
+For hints about what may be possible via the SMAPI BIOS and how, see:
+
+* IBM Technical Reference Manual for the ThinkPad 770
+  (http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD)
+* Exported symbols in PWRMGRIF.DLL or TPPWRW32.DLL (e.g., use "objdump -x").
+* drivers/char/mwave/smapi.c in the Linux kernel tree.*
+* The "thinkpad" SMAPI module (http://tpctl.sourceforge.net).
+* The SMAPI_* constants in tp_smapi.c.
+
+Note that in the above Technical Reference and in the "thinkpad" module,
+SMAPI is invoked through a function call to some physical address. However,
+the interface used by tp_smapi and the above mwave drive, and apparently
+required by newer ThinkPad, is different: you set the parameters up in the
+CPU's registers and write to ports 0xB2 (the APM control port) and 0x4F; this
+triggers an SMI (System Management Interrupt), causing the CPU to enter
+SMM (System Management Mode) and run the BIOS firmware; the results are
+returned in the CPU's registers. It is not clear what is the relation between
+the two variants of SMAPI, though the assignment of error codes seems to be
+similar.
+
+In addition, the embedded controller on ThinkPad laptops has a non-standard
+interface at IO ports 0x1600-0x161F (mapped to LCP channel 3 of the H8S chip).
+The interface provides various system management services (currently known:
+battery information and accelerometer readouts). For more information see the
+thinkpad_ec module and the H8S hardware documentation:
+http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 11d3d8dcb..5269998fe 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -20,6 +20,8 @@ idle_page_tracking.txt
 	- description of the idle page tracking feature.
 ksm.txt
 	- how to use the Kernel Samepage Merging feature.
+uksm.txt
+	- Introduction to Ultra KSM
 numa
 	- information about NUMA specific code in the Linux vm.
 numa_memory_policy.txt
diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
new file mode 100644
index 000000000..be19a3127
--- /dev/null
+++ b/Documentation/vm/uksm.txt
@@ -0,0 +1,61 @@
+The Ultra Kernel Samepage Merging feature
+----------------------------------------------
+/*
+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
+ *
+ * This is an improvement upon KSM. Some basic data structures and routines
+ * are borrowed from ksm.c .
+ *
+ * Its new features:
+ * 1. Full system scan:
+ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
+ *      interaction to submit a memory area to KSM is no longer needed.
+ *
+ * 2. Rich area detection:
+ *      It automatically detects rich areas containing abundant duplicated
+ *      pages based. Rich areas are given a full scan speed. Poor areas are
+ *      sampled at a reasonable speed with very low CPU consumption.
+ *
+ * 3. Ultra Per-page scan speed improvement:
+ *      A new hash algorithm is proposed. As a result, on a machine with
+ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
+ *      can scan memory areas that does not contain duplicated pages at speed of
+ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
+ *      477MB/sec ~ 923MB/sec.
+ *
+ * 4. Thrashing area avoidance:
+ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
+ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
+ *      hash value based volatile page detection.
+ *
+ *
+ * 5. Misc changes upon KSM:
+ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
+ *        comparison. It's much faster than default C version on x86.
+ *      * rmap_item now has an struct *page member to loosely cache a
+ *        address-->page mapping, which reduces too much time-costly
+ *        follow_page().
+ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
+ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
+ *        ksm is needed for this case.
+ *
+ * 6. Full Zero Page consideration(contributed by Figo Zhang)
+ *    Now uksmd consider full zero pages as special pages and merge them to an
+ *    special unswappable uksm zero page.
+ */
+
+ChangeLog:
+
+2012-05-05 The creation of this Doc
+2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
+2012-05-28 UKSM 0.1.1.2 bug fix release
+2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
+2012-07-2  UKSM 0.1.2-beta2
+2012-07-10 UKSM 0.1.2-beta3
+2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
+2012-10-13 UKSM 0.1.2.1 Bug fixes.
+2012-12-31 UKSM 0.1.2.2 Minor bug fixes.
+2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug".
+2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings.
+2016-09-10 UKSM 0.1.2.5 Fix a bug in dedup ratio calculation.
+2017-02-26 UKSM 0.1.2.6 Fix a bug in hugetlbpage handling and a race bug with page migration.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6e950b8b4..871b8a97d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2485,6 +2485,19 @@ F:	include/linux/audit.h
 F:	include/uapi/linux/audit.h
 F:	kernel/audit*
 
+AUFS (advanced multi layered unification filesystem) FILESYSTEM
+M:	"J. R. Okajima" <hooanon05g@gmail.com>
+L:	linux-unionfs@vger.kernel.org
+L:	aufs-users@lists.sourceforge.net (members only)
+W:	http://aufs.sourceforge.net
+T:	git://github.com/sfjro/aufs4-linux.git
+S:	Supported
+F:	Documentation/filesystems/aufs/
+F:	Documentation/ABI/testing/debugfs-aufs
+F:	Documentation/ABI/testing/sysfs-aufs
+F:	fs/aufs/
+F:	include/uapi/linux/aufs_type.h
+
 AUXILIARY DISPLAY DRIVERS
 M:	Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
 W:	http://miguelojeda.es/auxdisplay.htm
@@ -14196,6 +14209,12 @@ S:	Maintained
 F:	drivers/tc/
 F:	include/linux/tc.h
 
+TUXONICE (ENHANCED HIBERNATION)
+P:	Nigel Cunningham
+M:	nigel@nigelcunningham.com.au
+W:	http://nigelcunningham.com.au
+S:	Maintained
+
 TW5864 VIDEO4LINUX DRIVER
 M:	Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 M:	Anton Sviridenko <anton@corp.bluecherry.net>
diff --git a/Makefile b/Makefile
index cae354266..bce3b6818 100644
--- a/Makefile
+++ b/Makefile
@@ -647,12 +647,16 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 else
+ifdef CONFIG_CC_OPTIMIZE_HARDER
+KBUILD_CFLAGS	+= -O3 $(call cc-disable-warning,maybe-uninitialized,)
+else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
 KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
 else
 KBUILD_CFLAGS   += -O2
 endif
 endif
+endif
 
 KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
 			$(call cc-disable-warning,maybe-uninitialized,))
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9033c8194..a7437387a 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -64,11 +64,6 @@ static struct task_struct *spusched_task;
 static struct timer_list spusched_timer;
 static struct timer_list spuloadavg_timer;
 
-/*
- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
- */
-#define NORMAL_PRIO		120
-
 /*
  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
  * tick for every 10 CPU scheduler ticks.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78e..1a063ea4f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1016,10 +1016,26 @@ config SCHED_SMT
 	depends on SMP
 	---help---
 	  SMT scheduler support improves the CPU scheduler's decision making
-	  when dealing with Intel Pentium 4 chips with HyperThreading at a
+	  when dealing with Intel P4/Core 2 chips with HyperThreading at a
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+config SMT_NICE
+	bool "SMT (Hyperthreading) aware nice priority and policy support"
+	depends on SCHED_MUQSS && SCHED_SMT
+	default y
+	---help---
+	  Enabling Hyperthreading on Intel CPUs decreases the effectiveness
+	  of the use of 'nice' levels and different scheduling policies
+	  (e.g. realtime) due to sharing of CPU power between hyperthreads.
+	  SMT nice support makes each logical CPU aware of what is running on
+	  its hyperthread siblings, maintaining appropriate distribution of
+	  CPU according to nice levels and scheduling policies at the expense
+	  of slightly increased overhead.
+
+	  If unsure say Y here.
+
+
 config SCHED_MC
 	def_bool y
 	prompt "Multi-core scheduler support"
@@ -1050,6 +1066,79 @@ config SCHED_MC_PRIO
 
 	  If unsure say Y here.
 
+choice
+	prompt "CPU scheduler runqueue sharing"
+	default RQ_MC if SCHED_MUQSS
+	default RQ_NONE
+
+config RQ_NONE
+	bool "No sharing"
+	help
+	  This is the default behaviour where the CPU scheduler has one runqueue
+	  per CPU, whether it is a physical or logical CPU (hyperthread).
+
+	  This can still be enabled runtime with the boot parameter
+	  rqshare=none
+
+	  If unsure, say N.
+
+config RQ_SMT
+	bool "SMT (hyperthread) siblings"
+	depends on SCHED_SMT && SCHED_MUQSS
+
+	help
+	  With this option enabled, the CPU scheduler will have one runqueue
+	  shared by SMT (hyperthread) siblings. As these logical cores share
+	  one physical core, sharing the runqueue resource can lead to decreased
+	  overhead, lower latency and higher throughput.
+
+	  This can still be enabled runtime with the boot parameter
+	  rqshare=smt
+
+	  If unsure, say N.
+
+config RQ_MC
+	bool "Multicore siblings"
+	depends on SCHED_MC && SCHED_MUQSS
+	help
+	  With this option enabled, the CPU scheduler will have one runqueue
+	  shared by multicore siblings in addition to any SMT siblings.
+	  As these physical cores share caches, sharing the runqueue resource
+	  will lead to lower latency, but its effects on overhead and throughput
+	  are less predictable. As a general rule, 6 or fewer cores will likely
+	  benefit from this, while larger CPUs will only derive a latency
+	  benefit. If your workloads are primarily single threaded, this will
+	  possibly worsen throughput. If you are only concerned about latency
+	  then enable this regardless of how many cores you have.
+
+	  This can still be enabled runtime with the boot parameter
+	  rqshare=mc
+
+	  If unsure, say Y.
+
+config RQ_SMP
+	bool "Symmetric Multi-Processing"
+	depends on SMP && SCHED_MUQSS
+	help
+	  With this option enabled, the CPU scheduler will have one runqueue
+	  shared by all physical CPUs unless they are on separate NUMA nodes.
+	  As physical CPUs usually do not share resources, sharing the runqueue
+	  will normally worsen throughput but improve latency. If you only
+	  care about latency enable this.
+
+	  This can still be enabled runtime with the boot parameter
+	  rqshare=smp
+
+	  If unsure, say N.
+endchoice
+
+config SHARERQ
+	int
+	default 0 if RQ_NONE
+	default 1 if RQ_SMT
+	default 2 if RQ_MC
+	default 3 if RQ_SMP
+
 source "kernel/Kconfig.preempt"
 
 config UP_LATE_INIT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 638411f22..e5110f4ae 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -116,6 +116,7 @@ config MPENTIUMM
 config MPENTIUM4
 	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
 	depends on X86_32
+	select X86_P6_NOP
 	---help---
 	  Select this for Intel Pentium 4 chips.  This includes the
 	  Pentium 4, Pentium D, P4-based Celeron and Xeon, and
@@ -148,9 +149,8 @@ config MPENTIUM4
 		-Paxville
 		-Dempsey
 
-
 config MK6
-	bool "K6/K6-II/K6-III"
+	bool "AMD K6/K6-II/K6-III"
 	depends on X86_32
 	---help---
 	  Select this for an AMD K6-family processor.  Enables use of
@@ -158,7 +158,7 @@ config MK6
 	  flags to GCC.
 
 config MK7
-	bool "Athlon/Duron/K7"
+	bool "AMD Athlon/Duron/K7"
 	depends on X86_32
 	---help---
 	  Select this for an AMD Athlon K7-family processor.  Enables use of
@@ -166,12 +166,83 @@ config MK7
 	  flags to GCC.
 
 config MK8
-	bool "Opteron/Athlon64/Hammer/K8"
+	bool "AMD Opteron/Athlon64/Hammer/K8"
 	---help---
 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
 	  Enables use of some extended instructions, and passes appropriate
 	  optimization flags to GCC.
 
+config MK8SSE3
+	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
+	---help---
+	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
+	  Enables use of some extended instructions, and passes appropriate
+	  optimization flags to GCC.
+
+config MK10
+	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
+	---help---
+	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
+		Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
+	  Enables use of some extended instructions, and passes appropriate
+	  optimization flags to GCC.
+
+config MBARCELONA
+	bool "AMD Barcelona"
+	---help---
+	  Select this for AMD Family 10h Barcelona processors.
+
+	  Enables -march=barcelona
+
+config MBOBCAT
+	bool "AMD Bobcat"
+	---help---
+	  Select this for AMD Family 14h Bobcat processors.
+
+	  Enables -march=btver1
+
+config MJAGUAR
+	bool "AMD Jaguar"
+	---help---
+	  Select this for AMD Family 16h Jaguar processors.
+
+	  Enables -march=btver2
+
+config MBULLDOZER
+	bool "AMD Bulldozer"
+	---help---
+	  Select this for AMD Family 15h Bulldozer processors.
+
+	  Enables -march=bdver1
+
+config MPILEDRIVER
+	bool "AMD Piledriver"
+	---help---
+	  Select this for AMD Family 15h Piledriver processors.
+
+	  Enables -march=bdver2
+
+config MSTEAMROLLER
+	bool "AMD Steamroller"
+	---help---
+	  Select this for AMD Family 15h Steamroller processors.
+
+	  Enables -march=bdver3
+
+config MEXCAVATOR
+	bool "AMD Excavator"
+	---help---
+	  Select this for AMD Family 15h Excavator processors.
+
+	  Enables -march=bdver4
+
+config MPCK
+	bool "AMD Zen"
+	---help---
+	  Select this for AMD Family 17h Zen processors.
+
+	  Enables -march=znver1
+
 config MCRUSOE
 	bool "Crusoe"
 	depends on X86_32
@@ -253,6 +324,7 @@ config MVIAC7
 
 config MPSC
 	bool "Intel P4 / older Netburst based Xeon"
+	select X86_P6_NOP
 	depends on X86_64
 	---help---
 	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
@@ -262,8 +334,19 @@ config MPSC
 	  using the cpu family field
 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
 
+config MATOM
+	bool "Intel Atom"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+	  in-order pipelining architecture and thus can benefit from
+	  accordingly optimized code. Use a recent GCC with specific Atom
+	  support in order to fully benefit from selecting this option.
+
 config MCORE2
-	bool "Core 2/newer Xeon"
+	bool "Intel Core 2"
+	select X86_P6_NOP
 	---help---
 
 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
@@ -271,14 +354,88 @@ config MCORE2
 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
 	  (not a typo)
 
-config MATOM
-	bool "Intel Atom"
+	  Enables -march=core2
+
+config MNEHALEM
+	bool "Intel Nehalem"
+	select X86_P6_NOP
 	---help---
 
-	  Select this for the Intel Atom platform. Intel Atom CPUs have an
-	  in-order pipelining architecture and thus can benefit from
-	  accordingly optimized code. Use a recent GCC with specific Atom
-	  support in order to fully benefit from selecting this option.
+	  Select this for 1st Gen Core processors in the Nehalem family.
+
+	  Enables -march=nehalem
+
+config MWESTMERE
+	bool "Intel Westmere"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for the Intel Westmere formerly Nehalem-C family.
+
+	  Enables -march=westmere
+
+config MSILVERMONT
+	bool "Intel Silvermont"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for the Intel Silvermont platform.
+
+	  Enables -march=silvermont
+
+config MSANDYBRIDGE
+	bool "Intel Sandy Bridge"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
+
+	  Enables -march=sandybridge
+
+config MIVYBRIDGE
+	bool "Intel Ivy Bridge"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
+
+	  Enables -march=ivybridge
+
+config MHASWELL
+	bool "Intel Haswell"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 4th Gen Core processors in the Haswell family.
+
+	  Enables -march=haswell
+
+config MBROADWELL
+	bool "Intel Broadwell"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 5th Gen Core processors in the Broadwell family.
+
+	  Enables -march=broadwell
+
+config MSKYLAKE
+	bool "Intel Skylake"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 6th Gen Core processors in the Skylake family.
+
+	  Enables -march=skylake
+
+config MSKYLAKEX
+	bool "Intel Skylake X"
+	select X86_P6_NOP
+	---help---
+
+	  Select this for 6th Gen Core processors in the Skylake X family.
+
+	  Enables -march=skylake-avx512
 
 config GENERIC_CPU
 	bool "Generic-x86-64"
@@ -287,6 +444,19 @@ config GENERIC_CPU
 	  Generic x86-64 CPU.
 	  Run equally well on all x86-64 CPUs.
 
+config MNATIVE
+ bool "Native optimizations autodetected by GCC"
+ ---help---
+
+   GCC 4.2 and above support -march=native, which automatically detects
+   the optimum settings to use based on your processor. -march=native
+   also detects and applies additional settings beyond -march specific
+   to your CPU, (eg. -msse4). Unless you have a specific reason not to
+   (e.g. distcc cross-compiling), you should probably be using
+   -march=native rather than anything listed below.
+
+   Enables -march=native
+
 endchoice
 
 config X86_GENERIC
@@ -311,7 +481,7 @@ config X86_INTERNODE_CACHE_SHIFT
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MPCK || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 	default "4" if MELAN || M486 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
@@ -329,35 +499,36 @@ config X86_ALIGNMENT_16
 
 config X86_INTEL_USERCOPY
 	def_bool y
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MATOM || MNATIVE
 
 config X86_USE_3DNOW
 	def_bool y
 	depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
 
-#
-# P6_NOPs are a relatively minor optimization that require a family >=
-# 6 processor, except that it is broken on certain VIA chips.
-# Furthermore, AMD chips prefer a totally different sequence of NOPs
-# (which work on all CPUs).  In addition, it looks like Virtual PC
-# does not understand them.
-#
-# As a result, disallow these if we're not compiling for X86_64 (these
-# NOPs do work on all x86-64 capable chips); the list of processors in
-# the right-hand clause are the cores that benefit from this optimization.
-#
 config X86_P6_NOP
-	def_bool y
-	depends on X86_64
-	depends on (MCORE2 || MPENTIUM4 || MPSC)
+	default n
+	bool "Support for P6_NOPs on Intel chips"
+	depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT  || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE)
+	---help---
+	P6_NOPs are a relatively minor optimization that require a family >=
+	6 processor, except that it is broken on certain VIA chips.
+	Furthermore, AMD chips prefer a totally different sequence of NOPs
+	(which work on all CPUs).  In addition, it looks like Virtual PC
+	does not understand them.
+
+	As a result, disallow these if we're not compiling for X86_64 (these
+	NOPs do work on all x86-64 capable chips); the list of processors in
+	the right-hand clause are the cores that benefit from this optimization.
+
+	Say Y if you have Intel CPU newer than Pentium Pro, N otherwise.
 
 config X86_TSC
 	def_bool y
-	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE || MATOM) || X86_64
 
 config X86_CMPXCHG64
 	def_bool y
@@ -367,7 +538,7 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
+	depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MPCK || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1c4d01255..2b6e05fd5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -124,13 +124,42 @@ else
 	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
 
         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
+        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+        cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
+        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
+        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
+        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
+        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
+        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
+        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
+        cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
+        cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4)
+        cflags-$(CONFIG_MPCK) += $(call cc-option,-march=znver1)
         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 
         cflags-$(CONFIG_MCORE2) += \
-                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
-		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
+        cflags-$(CONFIG_MNEHALEM) += \
+                $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem))
+        cflags-$(CONFIG_MWESTMERE) += \
+                $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere))
+        cflags-$(CONFIG_MSILVERMONT) += \
+                $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont))
+        cflags-$(CONFIG_MSANDYBRIDGE) += \
+                $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge))
+        cflags-$(CONFIG_MIVYBRIDGE) += \
+                $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge))
+        cflags-$(CONFIG_MHASWELL) += \
+                $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
+        cflags-$(CONFIG_MBROADWELL) += \
+                $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
+        cflags-$(CONFIG_MSKYLAKE) += \
+                $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake))
+        cflags-$(CONFIG_MSKYLAKEX) += \
+                $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512))
+        cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
+                $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
         KBUILD_CFLAGS += $(cflags-y)
 
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1f5faf860..3be678325 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6)		+= -march=k6
 # Please note, that patches that add -march=athlon-xp and friends are pointless.
 # They make zero difference whatsosever to performance at this time.
 cflags-$(CONFIG_MK7)		+= -march=athlon
+cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
 cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,-march=athlon)
+cflags-$(CONFIG_MK8SSE3)		+= $(call cc-option,-march=k8-sse3,-march=athlon)
+cflags-$(CONFIG_MK10)	+= $(call cc-option,-march=amdfam10,-march=athlon)
+cflags-$(CONFIG_MBARCELONA)	+= $(call cc-option,-march=barcelona,-march=athlon)
+cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)
+cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)
+cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)
+cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)
+cflags-$(CONFIG_MSTEAMROLLER)	+= $(call cc-option,-march=bdver3,-march=athlon)
+cflags-$(CONFIG_MEXCAVATOR)	+= $(call cc-option,-march=bdver4,-march=athlon)
+cflags-$(CONFIG_MPCK)	+= $(call cc-option,-march=znver1,-march=athlon)
 cflags-$(CONFIG_MCRUSOE)	+= -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0
 cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0
 cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
@@ -32,8 +43,17 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) -falign-fu
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_MVIAC7)		+= -march=i686
 cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
-cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
-	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+cflags-$(CONFIG_MNEHALEM)	+= -march=i686 $(call tune,nehalem)
+cflags-$(CONFIG_MWESTMERE)	+= -march=i686 $(call tune,westmere)
+cflags-$(CONFIG_MSILVERMONT)	+= -march=i686 $(call tune,silvermont)
+cflags-$(CONFIG_MSANDYBRIDGE)	+= -march=i686 $(call tune,sandybridge)
+cflags-$(CONFIG_MIVYBRIDGE)	+= -march=i686 $(call tune,ivybridge)
+cflags-$(CONFIG_MHASWELL)	+= -march=i686 $(call tune,haswell)
+cflags-$(CONFIG_MBROADWELL)	+= -march=i686 $(call tune,broadwell)
+cflags-$(CONFIG_MSKYLAKE)	+= -march=i686 $(call tune,skylake)
+cflags-$(CONFIG_MSKYLAKEX)	+= -march=i686 $(call tune,skylake-avx512)
+cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
+	$(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
 cflags-$(CONFIG_MELAN)		+= -march=i486
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17fe..b8ed87f28 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -25,6 +25,26 @@ struct mod_arch_specific {
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MNATIVE
+#define MODULE_PROC_FAMILY "NATIVE "
+#elif defined CONFIG_MNEHALEM
+#define MODULE_PROC_FAMILY "NEHALEM "
+#elif defined CONFIG_MWESTMERE
+#define MODULE_PROC_FAMILY "WESTMERE "
+#elif defined CONFIG_MSILVERMONT
+#define MODULE_PROC_FAMILY "SILVERMONT "
+#elif defined CONFIG_MSANDYBRIDGE
+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
+#elif defined CONFIG_MIVYBRIDGE
+#define MODULE_PROC_FAMILY "IVYBRIDGE "
+#elif defined CONFIG_MHASWELL
+#define MODULE_PROC_FAMILY "HASWELL "
+#elif defined CONFIG_MBROADWELL
+#define MODULE_PROC_FAMILY "BROADWELL "
+#elif defined CONFIG_MSKYLAKE
+#define MODULE_PROC_FAMILY "SKYLAKE "
+#elif defined CONFIG_MSKYLAKE
+#define MODULE_PROC_FAMILY "SKYLAKEX "
 #elif defined CONFIG_MATOM
 #define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
@@ -43,6 +63,26 @@ struct mod_arch_specific {
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_MK8SSE3
+#define MODULE_PROC_FAMILY "K8SSE3 "
+#elif defined CONFIG_MK10
+#define MODULE_PROC_FAMILY "K10 "
+#elif defined CONFIG_MBARCELONA
+#define MODULE_PROC_FAMILY "BARCELONA "
+#elif defined CONFIG_MBOBCAT
+#define MODULE_PROC_FAMILY "BOBCAT "
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
+#elif defined CONFIG_MSTEAMROLLER
+#define MODULE_PROC_FAMILY "STEAMROLLER "
+#elif defined CONFIG_MJAGUAR
+#define MODULE_PROC_FAMILY "JAGUAR "
+#elif defined CONFIG_MEXCAVATOR
+#define MODULE_PROC_FAMILY "EXCAVATOR "
+#elif defined CONFIG_MPCK
+#define MODULE_PROC_FAMILY "PCK "
 #elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index a4a8914bf..299a6861f 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -40,6 +40,26 @@ config CFQ_GROUP_IOSCHED
 	---help---
 	  Enable group IO scheduling in CFQ.
 
+config IOSCHED_BFQ_SQ
+	tristate "BFQ-SQ I/O scheduler"
+	default n
+	---help---
+	The BFQ-SQ I/O scheduler (for legacy blk: SQ stands for
+	SingleQueue) distributes bandwidth among all processes
+	according to their weights, regardless of the device
+	parameters and with any workload. It also guarantees a low
+	latency to interactive and soft real-time applications.
+	Details in Documentation/block/bfq-iosched.txt
+
+config BFQ_SQ_GROUP_IOSCHED
+	bool "BFQ-SQ hierarchical scheduling support"
+	depends on IOSCHED_BFQ_SQ && BLK_CGROUP
+	default n
+	---help---
+
+	Enable hierarchical scheduling in BFQ-SQ, using the blkio
+	(cgroups-v1) or io (cgroups-v2) controller.
+
 choice
 
 	prompt "Default I/O scheduler"
@@ -54,6 +74,16 @@ choice
 	config DEFAULT_CFQ
 		bool "CFQ" if IOSCHED_CFQ=y
 
+	config DEFAULT_BFQ_SQ
+		bool "BFQ-SQ" if IOSCHED_BFQ_SQ=y
+		help
+		  Selects BFQ-SQ as the default I/O scheduler which will be
+		  used by default for all block devices.
+		  The BFQ-SQ I/O scheduler aims at distributing the bandwidth
+		  as desired, independently of the disk parameters and with
+		  any workload. It also tries to guarantee low latency to
+		  interactive and soft real-time applications.
+
 	config DEFAULT_NOOP
 		bool "No-op"
 
@@ -63,8 +93,28 @@ config DEFAULT_IOSCHED
 	string
 	default "deadline" if DEFAULT_DEADLINE
 	default "cfq" if DEFAULT_CFQ
+	default "bfq-sq" if DEFAULT_BFQ_SQ
 	default "noop" if DEFAULT_NOOP
 
+config MQ_IOSCHED_BFQ
+	tristate "BFQ-MQ I/O Scheduler"
+	default y
+	---help---
+	BFQ I/O scheduler for BLK-MQ. BFQ-MQ distributes bandwidth
+	among all processes according to their weights, regardless of
+	the device parameters and with any workload. It also
+	guarantees a low latency to interactive and soft real-time
+	applications.  Details in Documentation/block/bfq-iosched.txt
+
+config MQ_BFQ_GROUP_IOSCHED
+	bool "BFQ-MQ hierarchical scheduling support"
+	depends on MQ_IOSCHED_BFQ && BLK_CGROUP
+	default n
+	---help---
+
+	Enable hierarchical scheduling in BFQ-MQ, using the blkio
+	(cgroups-v1) or io (cgroups-v2) controller.
+
 config MQ_IOSCHED_DEADLINE
 	tristate "MQ deadline I/O scheduler"
 	default y
diff --git a/block/Makefile b/block/Makefile
index 6a56303b9..0934dafda 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
-			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
+			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o uuid.o \
 			genhd.o partition-generic.o ioprio.o \
 			badblocks.o partitions/
 
@@ -24,6 +24,8 @@ obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
 obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
+obj-$(CONFIG_IOSCHED_BFQ_SQ)	+= bfq-sq-iosched.o
+obj-$(CONFIG_MQ_IOSCHED_BFQ)	+= bfq-mq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
diff --git a/block/bfq-cgroup-included.c b/block/bfq-cgroup-included.c
new file mode 100644
index 000000000..613f154e9
--- /dev/null
+++ b/block/bfq-cgroup-included.c
@@ -0,0 +1,1354 @@
+/*
+ * BFQ: CGROUPS support.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ */
+
+#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+
+/* bfqg stats flags */
+enum bfqg_stats_flags {
+	BFQG_stats_waiting = 0,
+	BFQG_stats_idling,
+	BFQG_stats_empty,
+};
+
+#define BFQG_FLAG_FNS(name)						\
+static void bfqg_stats_mark_##name(struct bfqg_stats *stats)	\
+{									\
+	stats->flags |= (1 << BFQG_stats_##name);			\
+}									\
+static void bfqg_stats_clear_##name(struct bfqg_stats *stats)	\
+{									\
+	stats->flags &= ~(1 << BFQG_stats_##name);			\
+}									\
+static int bfqg_stats_##name(struct bfqg_stats *stats)		\
+{									\
+	return (stats->flags & (1 << BFQG_stats_##name)) != 0;		\
+}									\
+
+BFQG_FLAG_FNS(waiting)
+BFQG_FLAG_FNS(idling)
+BFQG_FLAG_FNS(empty)
+#undef BFQG_FLAG_FNS
+
+#ifdef BFQ_MQ
+/* This should be called with the scheduler lock held. */
+#else
+/* This should be called with the queue_lock held. */
+#endif
+static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
+{
+	unsigned long long now;
+
+	if (!bfqg_stats_waiting(stats))
+		return;
+
+	now = sched_clock();
+	if (time_after64(now, stats->start_group_wait_time))
+		blkg_stat_add(&stats->group_wait_time,
+			      now - stats->start_group_wait_time);
+	bfqg_stats_clear_waiting(stats);
+}
+
+#ifdef BFQ_MQ
+/* This should be called with the scheduler lock held. */
+#else
+/* This should be called with the queue_lock held. */
+#endif
+static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+						 struct bfq_group *curr_bfqg)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+
+	if (bfqg_stats_waiting(stats))
+		return;
+	if (bfqg == curr_bfqg)
+		return;
+	stats->start_group_wait_time = sched_clock();
+	bfqg_stats_mark_waiting(stats);
+}
+
+#ifdef BFQ_MQ
+/* This should be called with the scheduler lock held. */
+#else
+/* This should be called with the queue_lock held. */
+#endif
+static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
+{
+	unsigned long long now;
+
+	if (!bfqg_stats_empty(stats))
+		return;
+
+	now = sched_clock();
+	if (time_after64(now, stats->start_empty_time))
+		blkg_stat_add(&stats->empty_time,
+			      now - stats->start_empty_time);
+	bfqg_stats_clear_empty(stats);
+}
+
+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
+{
+	blkg_stat_add(&bfqg->stats.dequeue, 1);
+}
+
+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+
+	if (blkg_rwstat_total(&stats->queued))
+		return;
+
+	/*
+	 * group is already marked empty. This can happen if bfqq got new
+	 * request in parent group and moved to this group while being added
+	 * to service tree. Just ignore the event and move on.
+	 */
+	if (bfqg_stats_empty(stats))
+		return;
+
+	stats->start_empty_time = sched_clock();
+	bfqg_stats_mark_empty(stats);
+}
+
+static void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+
+	if (bfqg_stats_idling(stats)) {
+		unsigned long long now = sched_clock();
+
+		if (time_after64(now, stats->start_idle_time))
+			blkg_stat_add(&stats->idle_time,
+				      now - stats->start_idle_time);
+		bfqg_stats_clear_idling(stats);
+	}
+}
+
+static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+
+	stats->start_idle_time = sched_clock();
+	bfqg_stats_mark_idling(stats);
+}
+
+static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+
+	blkg_stat_add(&stats->avg_queue_size_sum,
+		      blkg_rwstat_total(&stats->queued));
+	blkg_stat_add(&stats->avg_queue_size_samples, 1);
+	bfqg_stats_update_group_wait_time(stats);
+}
+
+static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
+				struct bfq_queue *bfqq, unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.queued, op, 1);
+	bfqg_stats_end_empty_time(&bfqg->stats);
+	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
+		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
+}
+
+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.queued, op, -1);
+}
+
+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
+{
+	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
+}
+
+static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+		uint64_t start_time, uint64_t io_start_time, unsigned int op)
+{
+	struct bfqg_stats *stats = &bfqg->stats;
+	unsigned long long now = sched_clock();
+
+	if (time_after64(now, io_start_time))
+		blkg_rwstat_add(&stats->service_time, op,
+				now - io_start_time);
+	if (time_after64(io_start_time, start_time))
+		blkg_rwstat_add(&stats->wait_time, op,
+				io_start_time - start_time);
+}
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED && CONFIG_DEBUG_BLK_CGROUP */
+
+static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg,
+			struct bfq_queue *bfqq, unsigned int op) { }
+static inline void
+bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
+static inline void
+bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
+static inline void bfqg_stats_update_completion(struct bfq_group *bfqg,
+		uint64_t start_time, uint64_t io_start_time,
+		unsigned int op) { }
+static inline void
+bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+		struct bfq_group *curr_bfqg) { }
+static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { }
+static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
+static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED && CONFIG_DEBUG_BLK_CGROUP */
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct blkcg_policy blkcg_policy_bfq;
+
+/*
+ * blk-cgroup policy-related handlers
+ * The following functions help in converting between blk-cgroup
+ * internal structures and BFQ-specific structures.
+ */
+
+static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct bfq_group, pd) : NULL;
+}
+
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+{
+	return pd_to_blkg(&bfqg->pd);
+}
+
+static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+{
+	struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
+
+	return pd_to_bfqg(pd);
+}
+
+/*
+ * bfq_group handlers
+ * The following functions help in navigating the bfq_group hierarchy
+ * by allowing to find the parent of a bfq_group or the bfq_group
+ * associated to a bfq_queue.
+ */
+
+static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
+{
+	struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
+
+	return pblkg ? blkg_to_bfqg(pblkg) : NULL;
+}
+
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *group_entity = bfqq->entity.parent;
+
+	return group_entity ? container_of(group_entity, struct bfq_group,
+					   entity) :
+			      bfqq->bfqd->root_group;
+}
+
+/*
+ * The following two functions handle get and put of a bfq_group by
+ * wrapping the related blk-cgroup hooks.
+ */
+
+static void bfqg_get(struct bfq_group *bfqg)
+{
+#ifdef BFQ_MQ
+	bfqg->ref++;
+#else
+	blkg_get(bfqg_to_blkg(bfqg));
+#endif
+}
+
+static void bfqg_put(struct bfq_group *bfqg)
+{
+#ifdef BFQ_MQ
+	bfqg->ref--;
+
+	BUG_ON(bfqg->ref < 0);
+	if (bfqg->ref == 0)
+		kfree(bfqg);
+#else
+	blkg_put(bfqg_to_blkg(bfqg));
+#endif
+}
+
+#ifdef BFQ_MQ
+static void bfqg_and_blkg_get(struct bfq_group *bfqg)
+{
+	/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
+	bfqg_get(bfqg);
+
+	blkg_get(bfqg_to_blkg(bfqg));
+}
+
+static void bfqg_and_blkg_put(struct bfq_group *bfqg)
+{
+	bfqg_put(bfqg);
+
+	blkg_put(bfqg_to_blkg(bfqg));
+}
+#endif
+
+/* @stats = 0 */
+static void bfqg_stats_reset(struct bfqg_stats *stats)
+{
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	/* queued stats shouldn't be cleared */
+	blkg_rwstat_reset(&stats->merged);
+	blkg_rwstat_reset(&stats->service_time);
+	blkg_rwstat_reset(&stats->wait_time);
+	blkg_stat_reset(&stats->time);
+	blkg_stat_reset(&stats->avg_queue_size_sum);
+	blkg_stat_reset(&stats->avg_queue_size_samples);
+	blkg_stat_reset(&stats->dequeue);
+	blkg_stat_reset(&stats->group_wait_time);
+	blkg_stat_reset(&stats->idle_time);
+	blkg_stat_reset(&stats->empty_time);
+#endif
+}
+
+/* @to += @from */
+static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
+{
+	if (!to || !from)
+		return;
+
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	/* queued stats shouldn't be cleared */
+	blkg_rwstat_add_aux(&to->merged, &from->merged);
+	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
+	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
+	blkg_stat_add_aux(&from->time, &from->time);
+	blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+	blkg_stat_add_aux(&to->avg_queue_size_samples,
+			  &from->avg_queue_size_samples);
+	blkg_stat_add_aux(&to->dequeue, &from->dequeue);
+	blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
+	blkg_stat_add_aux(&to->idle_time, &from->idle_time);
+	blkg_stat_add_aux(&to->empty_time, &from->empty_time);
+#endif
+}
+
+/*
+ * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors'
+ * recursive stats can still account for the amount used by this bfqg after
+ * it's gone.
+ */
+static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
+{
+	struct bfq_group *parent;
+
+	if (!bfqg) /* root_group */
+		return;
+
+	parent = bfqg_parent(bfqg);
+
+	lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
+
+	if (unlikely(!parent))
+		return;
+
+	bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
+	bfqg_stats_reset(&bfqg->stats);
+}
+
+static void bfq_init_entity(struct bfq_entity *entity,
+			    struct bfq_group *bfqg)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	entity->weight = entity->new_weight;
+	entity->orig_weight = entity->new_weight;
+	if (bfqq) {
+		bfqq->ioprio = bfqq->new_ioprio;
+		bfqq->ioprio_class = bfqq->new_ioprio_class;
+#ifdef BFQ_MQ
+		/*
+		 * Make sure that bfqg and its associated blkg do not
+		 * disappear before entity.
+		 */
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "getting bfqg %p and blkg\n",
+		bfqg);
+
+		bfqg_and_blkg_get(bfqg);
+#else
+		bfqg_get(bfqg);
+#endif
+	}
+	entity->parent = bfqg->my_entity; /* NULL for root group */
+	entity->sched_data = &bfqg->sched_data;
+}
+
+static void bfqg_stats_exit(struct bfqg_stats *stats)
+{
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	blkg_rwstat_exit(&stats->merged);
+	blkg_rwstat_exit(&stats->service_time);
+	blkg_rwstat_exit(&stats->wait_time);
+	blkg_rwstat_exit(&stats->queued);
+	blkg_stat_exit(&stats->time);
+	blkg_stat_exit(&stats->avg_queue_size_sum);
+	blkg_stat_exit(&stats->avg_queue_size_samples);
+	blkg_stat_exit(&stats->dequeue);
+	blkg_stat_exit(&stats->group_wait_time);
+	blkg_stat_exit(&stats->idle_time);
+	blkg_stat_exit(&stats->empty_time);
+#endif
+}
+
+static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+{
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	if (blkg_rwstat_init(&stats->merged, gfp) ||
+	    blkg_rwstat_init(&stats->service_time, gfp) ||
+	    blkg_rwstat_init(&stats->wait_time, gfp) ||
+	    blkg_rwstat_init(&stats->queued, gfp) ||
+	    blkg_stat_init(&stats->time, gfp) ||
+	    blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
+	    blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
+	    blkg_stat_init(&stats->dequeue, gfp) ||
+	    blkg_stat_init(&stats->group_wait_time, gfp) ||
+	    blkg_stat_init(&stats->idle_time, gfp) ||
+	    blkg_stat_init(&stats->empty_time, gfp)) {
+		bfqg_stats_exit(stats);
+		return -ENOMEM;
+	}
+#endif
+
+	return 0;
+}
+
+static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
+{
+	return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
+}
+
+static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
+{
+	return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
+}
+
+static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
+{
+	struct bfq_group_data *bgd;
+
+	bgd = kzalloc(sizeof(*bgd), gfp);
+	if (!bgd)
+		return NULL;
+	return &bgd->pd;
+}
+
+static void bfq_cpd_init(struct blkcg_policy_data *cpd)
+{
+	struct bfq_group_data *d = cpd_to_bfqgd(cpd);
+
+	d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
+		CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
+}
+
+static void bfq_cpd_free(struct blkcg_policy_data *cpd)
+{
+	kfree(cpd_to_bfqgd(cpd));
+}
+
+static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+{
+	struct bfq_group *bfqg;
+
+	bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
+	if (!bfqg)
+		return NULL;
+
+	if (bfqg_stats_init(&bfqg->stats, gfp)) {
+		kfree(bfqg);
+		return NULL;
+	}
+#ifdef BFQ_MQ
+	/* see comments in bfq_bic_update_cgroup for why refcounting */
+	bfqg_get(bfqg);
+#endif
+	return &bfqg->pd;
+}
+
+static void bfq_pd_init(struct blkg_policy_data *pd)
+{
+	struct blkcg_gq *blkg;
+	struct bfq_group *bfqg;
+	struct bfq_data *bfqd;
+	struct bfq_entity *entity;
+	struct bfq_group_data *d;
+
+	blkg = pd_to_blkg(pd);
+	BUG_ON(!blkg);
+	bfqg = blkg_to_bfqg(blkg);
+	bfqd = blkg->q->elevator->elevator_data;
+	BUG_ON(bfqg == bfqd->root_group);
+	entity = &bfqg->entity;
+	d = blkcg_to_bfqgd(blkg->blkcg);
+
+	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
+	entity->my_sched_data = &bfqg->sched_data;
+	bfqg->my_entity = entity; /*
+				   * the root_group's will be set to NULL
+				   * in bfq_init_queue()
+				   */
+	bfqg->bfqd = bfqd;
+	bfqg->active_entities = 0;
+	bfqg->rq_pos_tree = RB_ROOT;
+}
+
+static void bfq_pd_free(struct blkg_policy_data *pd)
+{
+	struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+	bfqg_stats_exit(&bfqg->stats);
+#ifdef BFQ_MQ
+	bfqg_put(bfqg);
+#else
+	kfree(bfqg);
+#endif
+}
+
+static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+{
+	struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+	bfqg_stats_reset(&bfqg->stats);
+}
+
+static void bfq_group_set_parent(struct bfq_group *bfqg,
+					struct bfq_group *parent)
+{
+	struct bfq_entity *entity;
+
+	BUG_ON(!parent);
+	BUG_ON(!bfqg);
+	BUG_ON(bfqg == parent);
+
+	entity = &bfqg->entity;
+	entity->parent = parent->my_entity;
+	entity->sched_data = &parent->sched_data;
+}
+
+static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
+					 struct blkcg *blkcg)
+{
+	struct blkcg_gq *blkg;
+
+	blkg = blkg_lookup(blkcg, bfqd->queue);
+	if (likely(blkg))
+		return blkg_to_bfqg(blkg);
+	return NULL;
+}
+
+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+					    struct blkcg *blkcg)
+{
+	struct bfq_group *bfqg, *parent;
+	struct bfq_entity *entity;
+
+	bfqg = bfq_lookup_bfqg(bfqd, blkcg);
+
+	if (unlikely(!bfqg))
+		return NULL;
+
+	/*
+	 * Update chain of bfq_groups as we might be handling a leaf group
+	 * which, along with some of its relatives, has not been hooked yet
+	 * to the private hierarchy of BFQ.
+	 */
+	entity = &bfqg->entity;
+	for_each_entity(entity) {
+		bfqg = container_of(entity, struct bfq_group, entity);
+		BUG_ON(!bfqg);
+		if (bfqg != bfqd->root_group) {
+			parent = bfqg_parent(bfqg);
+			if (!parent)
+				parent = bfqd->root_group;
+			BUG_ON(!parent);
+			bfq_group_set_parent(bfqg, parent);
+		}
+	}
+
+	return bfqg;
+}
+
+static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
+				  struct bfq_queue *bfqq);
+
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason);
+
+/**
+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
+ * @bfqd: queue descriptor.
+ * @bfqq: the queue to move.
+ * @bfqg: the group to move to.
+ *
+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
+ * it on the new one.  Avoid putting the entity on the old group idle tree.
+ *
+#ifdef BFQ_MQ
+ * Must be called under the scheduler lock, to make sure that the blkg
+ * owning @bfqg does not disappear (see comments in
+ * bfq_bic_update_cgroup on guaranteeing the consistency of blkg
+ * objects).
+#else
+ * Must be called under the queue lock; the cgroup owning @bfqg must
+ * not disappear (by now this just means that we are called under
+ * rcu_read_lock()).
+#endif
+ */
+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_group *bfqg)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list));
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && !entity->on_st);
+	BUG_ON(bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list)
+	       && entity->on_st &&
+	       bfqq != bfqd->in_service_queue);
+	BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue);
+
+	/* If bfqq is empty, then bfq_bfqq_expire also invokes
+	 * bfq_del_bfqq_busy, thereby removing bfqq and its entity
+	 * from data structures related to current group. Otherwise we
+	 * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
+	 * we do below.
+	 */
+	if (bfqq == bfqd->in_service_queue)
+		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+				false, BFQ_BFQQ_PREEMPTED);
+
+	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
+	    && &bfq_entity_service_tree(entity)->idle !=
+	       entity->tree);
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+
+	if (bfq_bfqq_busy(bfqq))
+		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
+	else if (entity->on_st) {
+		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
+		       entity->tree);
+		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+	}
+#ifdef BFQ_MQ
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg);
+
+	bfqg_and_blkg_put(bfqq_group(bfqq));
+#else
+	bfqg_put(bfqq_group(bfqq));
+#endif
+
+	entity->parent = bfqg->my_entity;
+	entity->sched_data = &bfqg->sched_data;
+#ifdef BFQ_MQ
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "getting blkg and bfqg %p\n", bfqg);
+
+	/* pin down bfqg and its associated blkg  */
+	bfqg_and_blkg_get(bfqg);
+#else
+	bfqg_get(bfqg);
+#endif
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+	if (bfq_bfqq_busy(bfqq)) {
+		bfq_pos_tree_add_move(bfqd, bfqq);
+		bfq_activate_bfqq(bfqd, bfqq);
+	}
+
+	if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+		bfq_schedule_dispatch(bfqd);
+	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
+	       && &bfq_entity_service_tree(entity)->idle !=
+	       entity->tree);
+}
+
+/**
+ * __bfq_bic_change_cgroup - move @bic to @cgroup.
+ * @bfqd: the queue descriptor.
+ * @bic: the bic to move.
+ * @blkcg: the blk-cgroup to move to.
+ *
+#ifdef BFQ_MQ
+ * Move bic to blkcg, assuming that bfqd->lock is held; which makes
+ * sure that the reference to cgroup is valid across the call (see
+ * comments in bfq_bic_update_cgroup on this issue)
+#else
+ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
+ * has to make sure that the reference to cgroup is valid across the call.
+#endif
+ *
+ * NOTE: an alternative approach might have been to store the current
+ * cgroup in bfqq and getting a reference to it, reducing the lookup
+ * time here, at the price of slightly more complex code.
+ */
+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+						struct bfq_io_cq *bic,
+						struct blkcg *blkcg)
+{
+	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
+	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
+	struct bfq_group *bfqg;
+	struct bfq_entity *entity;
+
+	bfqg = bfq_find_set_group(bfqd, blkcg);
+
+	if (unlikely(!bfqg))
+		bfqg = bfqd->root_group;
+
+	if (async_bfqq) {
+		entity = &async_bfqq->entity;
+
+		if (entity->sched_data != &bfqg->sched_data) {
+			bic_set_bfqq(bic, NULL, 0);
+			bfq_log_bfqq(bfqd, async_bfqq,
+				     "%p %d",
+				     async_bfqq,
+				     async_bfqq->ref);
+			bfq_put_queue(async_bfqq);
+		}
+	}
+
+	if (sync_bfqq) {
+		entity = &sync_bfqq->entity;
+		if (entity->sched_data != &bfqg->sched_data)
+			bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+	}
+
+	return bfqg;
+}
+
+static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+{
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+	struct bfq_group *bfqg = NULL;
+	uint64_t serial_nr;
+
+	rcu_read_lock();
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
+
+	/*
+	 * Check whether blkcg has changed.  The condition may trigger
+	 * spuriously on a newly created cic but there's no harm.
+	 */
+	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
+		goto out;
+
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+#ifdef BFQ_MQ
+	/*
+	 * Update blkg_path for bfq_log_* functions. We cache this
+	 * path, and update it here, for the following
+	 * reasons. Operations on blkg objects in blk-cgroup are
+	 * protected with the request_queue lock, and not with the
+	 * lock that protects the instances of this scheduler
+	 * (bfqd->lock). This exposes BFQ to the following sort of
+	 * race.
+	 *
+	 * The blkg_lookup performed in bfq_get_queue, protected
+	 * through rcu, may happen to return the address of a copy of
+	 * the original blkg. If this is the case, then the
+	 * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
+	 * the blkg, is useless: it does not prevent blk-cgroup code
+	 * from destroying both the original blkg and all objects
+	 * directly or indirectly referred by the copy of the
+	 * blkg.
+	 *
+	 * On the bright side, destroy operations on a blkg invoke, as
+	 * a first step, hooks of the scheduler associated with the
+	 * blkg. And these hooks are executed with bfqd->lock held for
+	 * BFQ. As a consequence, for any blkg associated with the
+	 * request queue this instance of the scheduler is attached
+	 * to, we are guaranteed that such a blkg is not destroyed, and
+	 * that all the pointers it contains are consistent, while we
+	 * are holding bfqd->lock. A blkg_lookup performed with
+	 * bfqd->lock held then returns a fully consistent blkg, which
+	 * remains consistent until this lock is held.
+	 *
+	 * Thanks to the last fact, and to the fact that: (1) bfqg has
+	 * been obtained through a blkg_lookup in the above
+	 * assignment, and (2) bfqd->lock is being held, here we can
+	 * safely use the policy data for the involved blkg (i.e., the
+	 * field bfqg->pd) to get to the blkg associated with bfqg,
+	 * and then we can safely use any field of blkg. After we
+	 * release bfqd->lock, even just getting blkg through this
+	 * bfqg may cause dangling references to be traversed, as
+	 * bfqg->pd may not exist any more.
+	 *
+	 * In view of the above facts, here we cache, in the bfqg, any
+	 * blkg data we may need for this bic, and for its associated
+	 * bfq_queue. As of now, we need to cache only the path of the
+	 * blkg, which is used in the bfq_log_* functions.
+	 *
+	 * Finally, note that bfqg itself needs to be protected from
+	 * destruction on the blkg_free of the original blkg (which
+	 * invokes bfq_pd_free). We use an additional private
+	 * refcounter for bfqg, to let it disappear only after no
+	 * bfq_queue refers to it any longer.
+	 */
+	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
+#endif
+	bic->blkcg_serial_nr = serial_nr;
+out:
+	rcu_read_unlock();
+}
+
+/**
+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
+ * @st: the service tree being flushed.
+ */
+static void bfq_flush_idle_tree(struct bfq_service_tree *st)
+{
+	struct bfq_entity *entity = st->first_idle;
+
+	for (; entity ; entity = st->first_idle)
+		__bfq_deactivate_entity(entity, false);
+}
+
+/**
+ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
+ * @bfqd: the device data structure with the root group.
+ * @entity: the entity to move.
+ */
+static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+				     struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	BUG_ON(!bfqq);
+	bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
+}
+
+/**
+ * bfq_reparent_active_entities - move to the root group all active
+ *                                entities.
+ * @bfqd: the device data structure with the root group.
+ * @bfqg: the group to move from.
+ * @st: the service tree with the entities.
+ */
+static void bfq_reparent_active_entities(struct bfq_data *bfqd,
+					 struct bfq_group *bfqg,
+					 struct bfq_service_tree *st)
+{
+	struct rb_root *active = &st->active;
+	struct bfq_entity *entity = NULL;
+
+	if (!RB_EMPTY_ROOT(&st->active))
+		entity = bfq_entity_of(rb_first(active));
+
+	for (; entity ; entity = bfq_entity_of(rb_first(active)))
+		bfq_reparent_leaf_entity(bfqd, entity);
+
+	if (bfqg->sched_data.in_service_entity)
+		bfq_reparent_leaf_entity(bfqd,
+			bfqg->sched_data.in_service_entity);
+}
+
+/**
+ * bfq_pd_offline - deactivate the entity associated with @pd,
+ *		    and reparent its children entities.
+ * @pd: descriptor of the policy going offline.
+ *
+ * blkio already grabs the queue_lock for us, so no need to use
+ * RCU-based magic
+ */
+static void bfq_pd_offline(struct blkg_policy_data *pd)
+{
+	struct bfq_service_tree *st;
+	struct bfq_group *bfqg;
+	struct bfq_data *bfqd;
+	struct bfq_entity *entity;
+#ifdef BFQ_MQ
+	unsigned long flags;
+#endif
+	int i;
+
+	BUG_ON(!pd);
+	bfqg = pd_to_bfqg(pd);
+	BUG_ON(!bfqg);
+	bfqd = bfqg->bfqd;
+	BUG_ON(bfqd && !bfqd->root_group);
+
+	entity = bfqg->my_entity;
+
+#ifdef BFQ_MQ
+	spin_lock_irqsave(&bfqd->lock, flags);
+#endif
+
+	if (!entity) /* root group */
+		goto put_async_queues;
+
+	/*
+	 * Empty all service_trees belonging to this group before
+	 * deactivating the group itself.
+	 */
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
+		BUG_ON(!bfqg->sched_data.service_tree);
+		st = bfqg->sched_data.service_tree + i;
+		/*
+		 * The idle tree may still contain bfq_queues belonging
+		 * to exited task because they never migrated to a different
+		 * cgroup from the one being destroyed now.
+		 */
+		bfq_flush_idle_tree(st);
+
+		/*
+		 * It may happen that some queues are still active
+		 * (busy) upon group destruction (if the corresponding
+		 * processes have been forced to terminate). We move
+		 * all the leaf entities corresponding to these queues
+		 * to the root_group.
+		 * Also, it may happen that the group has an entity
+		 * in service, which is disconnected from the active
+		 * tree: it must be moved, too.
+		 * There is no need to put the sync queues, as the
+		 * scheduler has taken no reference.
+		 */
+		bfq_reparent_active_entities(bfqd, bfqg, st);
+		BUG_ON(!RB_EMPTY_ROOT(&st->active));
+		BUG_ON(!RB_EMPTY_ROOT(&st->idle));
+	}
+	BUG_ON(bfqg->sched_data.next_in_service);
+	BUG_ON(bfqg->sched_data.in_service_entity);
+
+	__bfq_deactivate_entity(entity, false);
+
+put_async_queues:
+	bfq_put_async_queues(bfqd, bfqg);
+
+#ifdef BFQ_MQ
+	spin_unlock_irqrestore(&bfqd->lock, flags);
+#endif
+	/*
+	 * @blkg is going offline and will be ignored by
+	 * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
+	 * that they don't get lost.  If IOs complete after this point, the
+	 * stats for them will be lost.  Oh well...
+	 */
+	bfqg_stats_xfer_dead(bfqg);
+}
+
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+	struct blkcg_gq *blkg;
+
+	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+		BUG_ON(!bfqg);
+
+		bfq_end_wr_async_queues(bfqd, bfqg);
+	}
+	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+static int bfq_io_show_weight(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+	unsigned int val = 0;
+
+	if (bfqgd)
+		val = bfqgd->weight;
+
+	seq_printf(sf, "%u\n", val);
+
+	return 0;
+}
+
+static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
+				    struct cftype *cftype,
+				    u64 val)
+{
+	struct blkcg *blkcg = css_to_blkcg(css);
+	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+	struct blkcg_gq *blkg;
+	int ret = -ERANGE;
+
+	if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
+		return ret;
+
+	ret = 0;
+	spin_lock_irq(&blkcg->lock);
+	bfqgd->weight = (unsigned short)val;
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+
+		if (!bfqg)
+			continue;
+		/*
+		 * Setting the prio_changed flag of the entity
+		 * to 1 with new_weight == weight would re-set
+		 * the value of the weight to its ioprio mapping.
+		 * Set the flag only if necessary.
+		 */
+		if ((unsigned short)val != bfqg->entity.new_weight) {
+			bfqg->entity.new_weight = (unsigned short)val;
+			/*
+			 * Make sure that the above new value has been
+			 * stored in bfqg->entity.new_weight before
+			 * setting the prio_changed flag. In fact,
+			 * this flag may be read asynchronously (in
+			 * critical sections protected by a different
+			 * lock than that held here), and finding this
+			 * flag set may cause the execution of the code
+			 * for updating parameters whose value may
+			 * depend also on bfqg->entity.new_weight (in
+			 * __bfq_entity_update_weight_prio).
+			 * This barrier makes sure that the new value
+			 * of bfqg->entity.new_weight is correctly
+			 * seen in that code.
+			 */
+			smp_wmb();
+			bfqg->entity.prio_changed = 1;
+		}
+	}
+	spin_unlock_irq(&blkcg->lock);
+
+	return ret;
+}
+
+static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
+				 char *buf, size_t nbytes,
+				 loff_t off)
+{
+	u64 weight;
+	/* First unsigned long found in the file is used */
+	int ret = kstrtoull(strim(buf), 0, &weight);
+
+	if (ret)
+		return ret;
+
+	return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+}
+
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+static int bfqg_print_stat(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
+			  &blkcg_policy_bfq, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
+			  &blkcg_policy_bfq, seq_cft(sf)->private, true);
+	return 0;
+}
+
+static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
+{
+	u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
+					  &blkcg_policy_bfq, off);
+	return __blkg_prfill_u64(sf, pd, sum);
+}
+
+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+					struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
+							   &blkcg_policy_bfq,
+							   off);
+	return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
+			  seq_cft(sf)->private, false);
+	return 0;
+}
+
+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
+			  seq_cft(sf)->private, true);
+	return 0;
+}
+
+static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
+			       int off)
+{
+	u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+
+	return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
+	return 0;
+}
+
+static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
+					 struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
+					offsetof(struct blkcg_gq, stat_bytes));
+	u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
+		atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+	return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
+			  false);
+	return 0;
+}
+
+
+static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
+{
+	struct bfq_group *bfqg = pd_to_bfqg(pd);
+	u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
+	u64 v = 0;
+
+	if (samples) {
+		v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
+		v = div64_u64(v, samples);
+	}
+	__blkg_prfill_u64(sf, pd, v);
+	return 0;
+}
+
+/* print avg_queue_size */
+static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
+			  0, false);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_BLK_CGROUP */
+
+static struct bfq_group *
+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+{
+	int ret;
+
+	ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
+	if (ret)
+		return NULL;
+
+	return blkg_to_bfqg(bfqd->queue->root_blkg);
+}
+
+#ifdef BFQ_MQ
+#define BFQ_CGROUP_FNAME(param) "bfq-mq."#param
+#else
+#define BFQ_CGROUP_FNAME(param) "bfq-sq."#param
+#endif
+
+static struct cftype bfq_blkcg_legacy_files[] = {
+	{
+		.name = BFQ_CGROUP_FNAME(weight),
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = bfq_io_show_weight,
+		.write_u64 = bfq_io_set_weight_legacy,
+	},
+
+	/* statistics, covers only the tasks in the bfqg */
+	{
+		.name = BFQ_CGROUP_FNAME(io_service_bytes),
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_bytes,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_serviced),
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_ios,
+	},
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	{
+		.name = BFQ_CGROUP_FNAME(time),
+		.private = offsetof(struct bfq_group, stats.time),
+		.seq_show = bfqg_print_stat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(sectors),
+		.seq_show = bfqg_print_stat_sectors,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_service_time),
+		.private = offsetof(struct bfq_group, stats.service_time),
+		.seq_show = bfqg_print_rwstat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_wait_time),
+		.private = offsetof(struct bfq_group, stats.wait_time),
+		.seq_show = bfqg_print_rwstat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_merged),
+		.private = offsetof(struct bfq_group, stats.merged),
+		.seq_show = bfqg_print_rwstat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_queued),
+		.private = offsetof(struct bfq_group, stats.queued),
+		.seq_show = bfqg_print_rwstat,
+	},
+#endif /* CONFIG_DEBUG_BLK_CGROUP */
+
+	/* the same statictics which cover the bfqg and its descendants */
+	{
+		.name = BFQ_CGROUP_FNAME(io_service_bytes_recursive),
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_bytes_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_serviced_recursive),
+		.private = (unsigned long)&blkcg_policy_bfq,
+		.seq_show = blkg_print_stat_ios_recursive,
+	},
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	{
+		.name = BFQ_CGROUP_FNAME(time_recursive),
+		.private = offsetof(struct bfq_group, stats.time),
+		.seq_show = bfqg_print_stat_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(sectors_recursive),
+		.seq_show = bfqg_print_stat_sectors_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_service_time_recursive),
+		.private = offsetof(struct bfq_group, stats.service_time),
+		.seq_show = bfqg_print_rwstat_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_wait_time_recursive),
+		.private = offsetof(struct bfq_group, stats.wait_time),
+		.seq_show = bfqg_print_rwstat_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_merged_recursive),
+		.private = offsetof(struct bfq_group, stats.merged),
+		.seq_show = bfqg_print_rwstat_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(io_queued_recursive),
+		.private = offsetof(struct bfq_group, stats.queued),
+		.seq_show = bfqg_print_rwstat_recursive,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(avg_queue_size),
+		.seq_show = bfqg_print_avg_queue_size,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(group_wait_time),
+		.private = offsetof(struct bfq_group, stats.group_wait_time),
+		.seq_show = bfqg_print_stat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(idle_time),
+		.private = offsetof(struct bfq_group, stats.idle_time),
+		.seq_show = bfqg_print_stat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(empty_time),
+		.private = offsetof(struct bfq_group, stats.empty_time),
+		.seq_show = bfqg_print_stat,
+	},
+	{
+		.name = BFQ_CGROUP_FNAME(dequeue),
+		.private = offsetof(struct bfq_group, stats.dequeue),
+		.seq_show = bfqg_print_stat,
+	},
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
+	{ }	/* terminate */
+};
+
+static struct cftype bfq_blkg_files[] = {
+	{
+		.name = BFQ_CGROUP_FNAME(weight),
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = bfq_io_show_weight,
+		.write = bfq_io_set_weight,
+	},
+	{} /* terminate */
+};
+
+#undef BFQ_CGROUP_FNAME
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+
+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_group *bfqg) {}
+
+static void bfq_init_entity(struct bfq_entity *entity,
+			    struct bfq_group *bfqg)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	entity->weight = entity->new_weight;
+	entity->orig_weight = entity->new_weight;
+	if (bfqq) {
+		bfqq->ioprio = bfqq->new_ioprio;
+		bfqq->ioprio_class = bfqq->new_ioprio_class;
+	}
+	entity->sched_data = &bfqg->sched_data;
+}
+
+static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
+
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
+
+static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+					    struct blkcg *blkcg)
+{
+	return bfqd->root_group;
+}
+
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
+{
+	return bfqq->bfqd->root_group;
+}
+
+static struct bfq_group *
+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+{
+	struct bfq_group *bfqg;
+	int i;
+
+	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
+	if (!bfqg)
+		return NULL;
+
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+
+	return bfqg;
+}
+#endif
diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c
new file mode 100644
index 000000000..fb7bb8f08
--- /dev/null
+++ b/block/bfq-ioc.c
@@ -0,0 +1,36 @@
+/*
+ * BFQ: I/O context handling.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ */
+
+/**
+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
+ * @icq: the iocontext queue.
+ */
+static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
+{
+	/* bic->icq is the first member, %NULL will convert to %NULL */
+	return container_of(icq, struct bfq_io_cq, icq);
+}
+
+/**
+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
+ * @bfqd: the lookup key.
+ * @ioc: the io_context of the process doing I/O.
+ *
+ * Queue lock must be held.
+ */
+static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+					struct io_context *ioc)
+{
+	if (ioc)
+		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
+	return NULL;
+}
diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c
new file mode 100644
index 000000000..03efd90c5
--- /dev/null
+++ b/block/bfq-mq-iosched.c
@@ -0,0 +1,6187 @@
+/*
+ * Budget Fair Queueing (BFQ) I/O scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ *
+ * BFQ is a proportional-share I/O scheduler, with some extra
+ * low-latency capabilities. BFQ also supports full hierarchical
+ * scheduling through cgroups. Next paragraphs provide an introduction
+ * on BFQ inner workings. Details on BFQ benefits and usage can be
+ * found in Documentation/block/bfq-iosched.txt.
+ *
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based
+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+ * budgets, measured in number of sectors, to processes instead of
+ * time slices. The device is not granted to the in-service process
+ * for a given time slice, but until it has exhausted its assigned
+ * budget. This change from the time to the service domain enables BFQ
+ * to distribute the device throughput among processes as desired,
+ * without any distortion due to throughput fluctuations, or to device
+ * internal queueing. BFQ uses an ad hoc internal scheduler, called
+ * B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated with processes. Thanks to
+ * the accurate policy of B-WF2Q+, BFQ can afford to assign high
+ * budgets to I/O-bound processes issuing sequential requests (to
+ * boost the throughput), and yet guarantee a low latency to
+ * interactive and soft real-time applications.
+ *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
+ * BFQ is described in [1], where also a reference to the initial, more
+ * theoretical paper on BFQ can be found. The interested reader can find
+ * in the latter paper full details on the main algorithm, as well as
+ * formulas of the guarantees and formal proofs of all the properties.
+ * With respect to the version of BFQ presented in these papers, this
+ * implementation adds a few more heuristics, such as the one that
+ * guarantees a low latency to soft real-time applications, and a
+ * hierarchical extension based on H-WF2Q+.
+ *
+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
+ * complexity derives from the one introduced with EEVDF in [3].
+ *
+ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ *   Scheduler", Proceedings of the First Workshop on Mobile System
+ *   Technologies (MST-2015), May 2015.
+ *   http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+ *
+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
+ *
+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
+ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
+ *     Oct 1997.
+ *
+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
+ *
+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
+ *     First: A Flexible and Accurate Mechanism for Proportional Share
+ *     Resource Allocation,'' technical report.
+ *
+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/cgroup.h>
+#include <linux/elevator.h>
+#include <linux/jiffies.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include <linux/sbitmap.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+#include "blk-mq.h"
+#include "blk-mq-tag.h"
+#include "blk-mq-sched.h"
+#include "bfq-mq.h"
+#include "blk-wbt.h"
+
+/* Expiration time of sync (0) and async (1) requests, in ns. */
+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
+
+/* Maximum backwards seek, in KiB. */
+static const int bfq_back_max = (16 * 1024);
+
+/* Penalty of a backwards seek, in number of sectors. */
+static const int bfq_back_penalty = 2;
+
+/* Idling period duration, in ns. */
+static u32 bfq_slice_idle = (NSEC_PER_SEC / 125);
+
+/* Minimum number of assigned budgets for which stats are safe to compute. */
+static const int bfq_stats_min_budgets = 194;
+
+/* Default maximum budget values, in sectors and number of requests. */
+static const int bfq_default_max_budget = (16 * 1024);
+
+/*
+ * Async to sync throughput distribution is controlled as follows:
+ * when an async request is served, the entity is charged the number
+ * of sectors of the request, multiplied by the factor below
+ */
+static const int bfq_async_charge_factor = 10;
+
+/* Default timeout values, in jiffies, approximating CFQ defaults. */
+static const int bfq_timeout = (HZ / 8);
+
+/*
+ * Time limit for merging (see comments in bfq_setup_cooperator). Set
+ * to the slowest value that, in our tests, proved to be effective in
+ * removing false positives, while not causing true positives to miss
+ * queue merging.
+ *
+ * As can be deduced from the low time limit below, queue merging, if
+ * successful, happens at the very beggining of the I/O of the involved
+ * cooperating processes, as a consequence of the arrival of the very
+ * first requests from each cooperator.  After that, there is very
+ * little chance to find cooperators.
+ */
+static const unsigned long bfq_merge_time_limit = HZ/10;
+
+#define MAX_LENGTH_REASON_NAME 25
+
+static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE",
+"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS",
+"PREEMPTED"};
+
+static struct kmem_cache *bfq_pool;
+
+/* Below this threshold (in ns), we consider thinktime immediate. */
+#define BFQ_MIN_TT		(2 * NSEC_PER_MSEC)
+
+/* hw_tag detection: parallel requests threshold and min samples needed. */
+#define BFQ_HW_QUEUE_THRESHOLD	4
+#define BFQ_HW_QUEUE_SAMPLES	32
+
+#define BFQQ_SEEK_THR		(sector_t)(8 * 100)
+#define BFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
+#define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
+#define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 19)
+
+/* Min number of samples required to perform peak-rate update */
+#define BFQ_RATE_MIN_SAMPLES	32
+/* Min observation time interval required to perform a peak-rate update (ns) */
+#define BFQ_RATE_MIN_INTERVAL	(300*NSEC_PER_MSEC)
+/* Target observation time interval for a peak-rate update (ns) */
+#define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
+
+/*
+ * Shift used for peak-rate fixed precision calculations.
+ * With
+ * - the current shift: 16 positions
+ * - the current type used to store rate: u32
+ * - the current unit of measure for rate: [sectors/usec], or, more precisely,
+ *   [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
+ * the range of rates that can be stored is
+ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
+ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
+ * [15, 65G] sectors/sec
+ * Which, assuming a sector size of 512B, corresponds to a range of
+ * [7.5K, 33T] B/sec
+ */
+#define BFQ_RATE_SHIFT		16
+
+/*
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see
+ * below), and T is a reference time: given the systems that are
+ * likely to be installed on the reference device according to its
+ * speed class, T is about the maximum time needed, under BFQ and
+ * while reading two files in parallel, to load typical large
+ * applications on these systems (see the comments on
+ * max_service_from_wr below, for more details on how T is obtained).
+ * In practice, the slower/faster the device at hand is, the more/less
+ * it takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to
+ * interactive applications.
+ *
+ * BFQ uses four different reference pairs (R, T), depending on:
+ * . whether the device is rotational or non-rotational;
+ * . whether the device is slow, such as old or portable HDDs, as well as
+ *   SD cards, or fast, such as newer HDDs and SSDs.
+ *
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+ * In the following definitions, R_slow[0]/R_fast[0] and
+ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
+ * rotational device, whereas R_slow[1]/R_fast[1] and
+ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
+ * non-rotational device. Finally, device_speed_thresh are the
+ * thresholds used to switch between speed classes. The reference
+ * rates are not the actual peak rates of the devices used as a
+ * reference, but slightly lower values. The reason for using these
+ * slightly lower values is that the peak-rate estimator tends to
+ * yield slightly lower values than the actual peak rate (it can yield
+ * the actual peak rate only if there is only one process doing I/O,
+ * and the process does sequential I/O).
+ *
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+static int R_slow[2] = {1000, 10700};
+static int R_fast[2] = {14000, 33000};
+/*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+ * function.
+ */
+static int T_slow[2];
+static int T_fast[2];
+static int device_speed_thresh[2];
+
+/*
+ * BFQ uses the above-detailed, time-based weight-raising mechanism to
+ * privilege interactive tasks. This mechanism is vulnerable to the
+ * following false positives: I/O-bound applications that will go on
+ * doing I/O for much longer than the duration of weight
+ * raising. These applications have basically no benefit from being
+ * weight-raised at the beginning of their I/O. On the opposite end,
+ * while being weight-raised, these applications
+ * a) unjustly steal throughput to applications that may actually need
+ * low latency;
+ * b) make BFQ uselessly perform device idling; device idling results
+ * in loss of device throughput with most flash-based storage, and may
+ * increase latencies when used purposelessly.
+ *
+ * BFQ tries to reduce these problems, by adopting the following
+ * countermeasure. To introduce this countermeasure, we need first to
+ * finish explaining how the duration of weight-raising for
+ * interactive tasks is computed.
+ *
+ * For a bfq_queue deemed as interactive, the duration of weight
+ * raising is dynamically adjusted, as a function of the estimated
+ * peak rate of the device, so as to be equal to the time needed to
+ * execute the 'largest' interactive task we benchmarked so far. By
+ * largest task, we mean the task for which each involved process has
+ * to do more I/O than for any of the other tasks we benchmarked. This
+ * reference interactive task is the start-up of LibreOffice Writer,
+ * and in this task each process/bfq_queue needs to have at most ~110K
+ * sectors transferred.
+ *
+ * This last piece of information enables BFQ to reduce the actual
+ * duration of weight-raising for at least one class of I/O-bound
+ * applications: those doing sequential or quasi-sequential I/O. An
+ * example is file copy. In fact, once started, the main I/O-bound
+ * processes of these applications usually consume the above 110K
+ * sectors in much less time than the processes of an application that
+ * is starting, because these I/O-bound processes will greedily devote
+ * almost all their CPU cycles only to their target,
+ * throughput-friendly I/O operations. This is even more true if BFQ
+ * happens to be underestimating the device peak rate, and thus
+ * overestimating the duration of weight raising. But, according to
+ * our measurements, once transferred 110K sectors, these processes
+ * have no right to be weight-raised any longer.
+ *
+ * Basing on the last consideration, BFQ ends weight-raising for a
+ * bfq_queue if the latter happens to have received an amount of
+ * service at least equal to the following constant. The constant is
+ * set to slightly more than 110K, to have a minimum safety margin.
+ *
+ * This early ending of weight-raising reduces the amount of time
+ * during which interactive false positives cause the two problems
+ * described at the beginning of these comments.
+ */
+static const unsigned long max_service_from_wr = 120000;
+
+#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
+				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
+
+#define RQ_BIC(rq)		icq_to_bic((rq)->elv.priv[0])
+#define RQ_BFQQ(rq)		((rq)->elv.priv[1])
+
+/**
+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
+ * @icq: the iocontext queue.
+ */
+static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
+{
+	/* bic->icq is the first member, %NULL will convert to %NULL */
+	return container_of(icq, struct bfq_io_cq, icq);
+}
+
+/**
+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
+ * @bfqd: the lookup key.
+ * @ioc: the io_context of the process doing I/O.
+ * @q: the request queue.
+ */
+static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+					struct io_context *ioc,
+					struct request_queue *q)
+{
+	if (ioc) {
+		unsigned long flags;
+		struct bfq_io_cq *icq;
+
+		spin_lock_irqsave(q->queue_lock, flags);
+		icq = icq_to_bic(ioc_lookup_icq(ioc, q));
+		spin_unlock_irqrestore(q->queue_lock, flags);
+
+		return icq;
+	}
+
+	return NULL;
+}
+
+/*
+ * Scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing.
+ */
+static void bfq_schedule_dispatch(struct bfq_data *bfqd)
+{
+	if (bfqd->queued != 0) {
+		bfq_log(bfqd, "");
+		blk_mq_run_hw_queues(bfqd->queue, true);
+	}
+}
+
+#define BFQ_MQ
+#include "bfq-sched.c"
+#include "bfq-cgroup-included.c"
+
+#define bfq_class_idle(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define bfq_class_rt(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define bfq_sample_valid(samples)	((samples) > 80)
+
+/*
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
+ * We choose the request that is closesr to the head right now.  Distance
+ * behind the head is penalized and only allowed to a certain extent.
+ */
+static struct request *bfq_choose_req(struct bfq_data *bfqd,
+				      struct request *rq1,
+				      struct request *rq2,
+				      sector_t last)
+{
+	sector_t s1, s2, d1 = 0, d2 = 0;
+	unsigned long back_max;
+#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
+#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
+	unsigned int wrap = 0; /* bit mask: requests behind the disk head? */
+
+	if (!rq1 || rq1 == rq2)
+		return rq2;
+	if (!rq2)
+		return rq1;
+
+	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+		return rq1;
+	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+		return rq2;
+	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
+		return rq1;
+	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
+		return rq2;
+
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
+
+	/*
+	 * By definition, 1KiB is 2 sectors.
+	 */
+	back_max = bfqd->bfq_back_max * 2;
+
+	/*
+	 * Strict one way elevator _except_ in the case where we allow
+	 * short backward seeks which are biased as twice the cost of a
+	 * similar forward seek.
+	 */
+	if (s1 >= last)
+		d1 = s1 - last;
+	else if (s1 + back_max >= last)
+		d1 = (last - s1) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ1_WRAP;
+
+	if (s2 >= last)
+		d2 = s2 - last;
+	else if (s2 + back_max >= last)
+		d2 = (last - s2) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ2_WRAP;
+
+	/* Found required data */
+
+	/*
+	 * By doing switch() on the bit mask "wrap" we avoid having to
+	 * check two variables for all permutations: --> faster!
+	 */
+	switch (wrap) {
+	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
+		if (d1 < d2)
+			return rq1;
+		else if (d2 < d1)
+			return rq2;
+
+		if (s1 >= s2)
+			return rq1;
+		else
+			return rq2;
+
+	case BFQ_RQ2_WRAP:
+		return rq1;
+	case BFQ_RQ1_WRAP:
+		return rq2;
+	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
+	default:
+		/*
+		 * Since both rqs are wrapped,
+		 * start with the one that's further behind head
+		 * (--> only *one* back seek required),
+		 * since back seek takes more time than forward.
+		 */
+		if (s1 <= s2)
+			return rq1;
+		else
+			return rq2;
+	}
+}
+
+/*
+ * See the comments on bfq_limit_depth for the purpose of
+ * the depths set in the function.
+ */
+static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
+{
+	bfqd->sb_shift = bt->sb.shift;
+
+	/*
+	 * In-word depths if no bfq_queue is being weight-raised:
+	 * leaving 25% of tags only for sync reads.
+	 *
+	 * In next formulas, right-shift the value
+	 * (1U<<bfqd->sb_shift), instead of computing directly
+	 * (1U<<(bfqd->sb_shift - something)), to be robust against
+	 * any possible value of bfqd->sb_shift, without having to
+	 * limit 'something'.
+	 */
+	/* no more than 50% of tags for async I/O */
+	bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U);
+	/*
+	 * no more than 75% of tags for sync writes (25% extra tags
+	 * w.r.t. async I/O, to prevent async I/O from starving sync
+	 * writes)
+	 */
+	bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U);
+
+	/*
+	 * In-word depths in case some bfq_queue is being weight-
+	 * raised: leaving ~63% of tags for sync reads. This is the
+	 * highest percentage for which, in our tests, application
+	 * start-up times didn't suffer from any regression due to tag
+	 * shortage.
+	 */
+	/* no more than ~18% of tags for async I/O */
+	bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U);
+	/* no more than ~37% of tags for sync writes (~20% extra tags) */
+	bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U);
+}
+
+/*
+ * Async I/O can easily starve sync I/O (both sync reads and sync
+ * writes), by consuming all tags. Similarly, storms of sync writes,
+ * such as those that sync(2) may trigger, can starve sync reads.
+ * Limit depths of async I/O and sync writes so as to counter both
+ * problems.
+ */
+static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+{
+	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	struct bfq_data *bfqd = data->q->elevator->elevator_data;
+	struct sbitmap_queue *bt;
+
+	if (op_is_sync(op) && !op_is_write(op))
+		return;
+
+	if (data->flags & BLK_MQ_REQ_RESERVED) {
+		if (unlikely(!tags->nr_reserved_tags)) {
+			WARN_ON_ONCE(1);
+			return;
+		}
+		bt = &tags->breserved_tags;
+	} else
+		bt = &tags->bitmap_tags;
+
+	if (unlikely(bfqd->sb_shift != bt->sb.shift))
+		bfq_update_depths(bfqd, bt);
+
+	data->shallow_depth =
+		bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
+
+	bfq_log(bfqd, "wr_busy %d sync %d depth %u",
+			bfqd->wr_busy_queues, op_is_sync(op),
+			data->shallow_depth);
+}
+
+static struct bfq_queue *
+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
+		     sector_t sector, struct rb_node **ret_parent,
+		     struct rb_node ***rb_link)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *bfqq = NULL;
+
+	parent = NULL;
+	p = &root->rb_node;
+	while (*p) {
+		struct rb_node **n;
+
+		parent = *p;
+		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+
+		/*
+		 * Sort strictly based on sector. Smallest to the left,
+		 * largest to the right.
+		 */
+		if (sector > blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_right;
+		else if (sector < blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_left;
+		else
+			break;
+		p = n;
+		bfqq = NULL;
+	}
+
+	*ret_parent = parent;
+	if (rb_link)
+		*rb_link = p;
+
+	bfq_log(bfqd, "%llu: returning %d",
+		(unsigned long long) sector,
+		bfqq ? bfqq->pid : 0);
+
+	return bfqq;
+}
+
+static bool bfq_too_late_for_merging(struct bfq_queue *bfqq)
+{
+	return bfqq->service_from_backlogged > 0 &&
+		time_is_before_jiffies(bfqq->first_IO_time +
+				       bfq_merge_time_limit);
+}
+
+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *__bfqq;
+
+	if (bfqq->pos_root) {
+		rb_erase(&bfqq->pos_node, bfqq->pos_root);
+		bfqq->pos_root = NULL;
+	}
+
+	/*
+	 * bfqq cannot be merged any longer (see comments in
+	 * bfq_setup_cooperator): no point in adding bfqq into the
+	 * position tree.
+	 */
+	if (bfq_too_late_for_merging(bfqq))
+		return;
+
+	if (bfq_class_idle(bfqq))
+		return;
+	if (!bfqq->next_rq)
+		return;
+
+	bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
+			blk_rq_pos(bfqq->next_rq), &parent, &p);
+	if (!__bfqq) {
+		rb_link_node(&bfqq->pos_node, parent, p);
+		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
+	} else
+		bfqq->pos_root = NULL;
+}
+
+/*
+ * Tell whether there are active queues or groups with differentiated weights.
+ */
+static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+{
+	/*
+	 * For weights to differ, at least one of the trees must contain
+	 * at least two nodes.
+	 */
+	return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+		(bfqd->queue_weights_tree.rb_node->rb_left ||
+		 bfqd->queue_weights_tree.rb_node->rb_right)
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	       ) ||
+	       (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
+		(bfqd->group_weights_tree.rb_node->rb_left ||
+		 bfqd->group_weights_tree.rb_node->rb_right)
+#endif
+	       );
+}
+
+/*
+ * The following function returns true if every queue must receive the
+ * same share of the throughput (this condition is used when deciding
+ * whether idling may be disabled, see the comments in the function
+ * bfq_bfqq_may_idle()).
+ *
+ * Such a scenario occurs when:
+ * 1) all active queues have the same weight,
+ * 2) all active groups at the same level in the groups tree have the same
+ *    weight,
+ * 3) all active groups at the same level in the groups tree have the same
+ *    number of children.
+ *
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore this function evaluates, instead, the following stronger
+ * sub-conditions, for which it is much easier to maintain the needed
+ * state:
+ * 1) all active queues have the same weight,
+ * 2) all active groups have the same weight,
+ * 3) all active groups have at most one active child each.
+ * In particular, the last two conditions are always true if hierarchical
+ * support and the cgroups interface are not enabled, thus no state needs
+ * to be maintained in this case.
+ */
+static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+{
+	return !bfq_differentiated_weights(bfqd);
+}
+
+/*
+ * If the weight-counter tree passed as input contains no counter for
+ * the weight of the input entity, then add that counter; otherwise just
+ * increment the existing counter.
+ *
+ * Note that weight-counter trees contain few nodes in mostly symmetric
+ * scenarios. For example, if all queues have the same weight, then the
+ * weight-counter tree for the queues may contain at most one node.
+ * This holds even if low_latency is on, because weight-raised queues
+ * are not inserted in the tree.
+ * In most scenarios, the rate at which nodes are created/destroyed
+ * should be low too.
+ */
+static void bfq_weights_tree_add(struct bfq_data *bfqd,
+				 struct bfq_entity *entity,
+				 struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/*
+	 * Do not insert if the entity is already associated with a
+	 * counter, which happens if:
+	 *   1) the entity is associated with a queue,
+	 *   2) a request arrival has caused the queue to become both
+	 *      non-weight-raised, and hence change its weight, and
+	 *      backlogged; in this respect, each of the two events
+	 *      causes an invocation of this function,
+	 *   3) this is the invocation of this function caused by the
+	 *      second event. This second invocation is actually useless,
+	 *      and we handle this fact by exiting immediately. More
+	 *      efficient or clearer solutions might possibly be adopted.
+	 */
+	if (entity->weight_counter)
+		return;
+
+	while (*new) {
+		struct bfq_weight_counter *__counter = container_of(*new,
+						struct bfq_weight_counter,
+						weights_node);
+		parent = *new;
+
+		if (entity->weight == __counter->weight) {
+			entity->weight_counter = __counter;
+			goto inc_counter;
+		}
+		if (entity->weight < __counter->weight)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
+					 GFP_ATOMIC);
+
+	/*
+	 * In the unlucky event of an allocation failure, we just
+	 * exit. This will cause the weight of entity to not be
+	 * considered in bfq_differentiated_weights, which, in its
+	 * turn, causes the scenario to be deemed wrongly symmetric in
+	 * case entity's weight would have been the only weight making
+	 * the scenario asymmetric. On the bright side, no unbalance
+	 * will however occur when entity becomes inactive again (the
+	 * invocation of this function is triggered by an activation
+	 * of entity). In fact, bfq_weights_tree_remove does nothing
+	 * if !entity->weight_counter.
+	 */
+	if (unlikely(!entity->weight_counter))
+		return;
+
+	entity->weight_counter->weight = entity->weight;
+	rb_link_node(&entity->weight_counter->weights_node, parent, new);
+	rb_insert_color(&entity->weight_counter->weights_node, root);
+
+inc_counter:
+	entity->weight_counter->num_active++;
+}
+
+/*
+ * Decrement the weight counter associated with the entity, and, if the
+ * counter reaches 0, remove the counter from the tree.
+ * See the comments to the function bfq_weights_tree_add() for considerations
+ * about overhead.
+ */
+static void bfq_weights_tree_remove(struct bfq_data *bfqd,
+				    struct bfq_entity *entity,
+				    struct rb_root *root)
+{
+	if (!entity->weight_counter)
+		return;
+
+	BUG_ON(RB_EMPTY_ROOT(root));
+	BUG_ON(entity->weight_counter->weight != entity->weight);
+
+	BUG_ON(!entity->weight_counter->num_active);
+	entity->weight_counter->num_active--;
+	if (entity->weight_counter->num_active > 0)
+		goto reset_entity_pointer;
+
+	rb_erase(&entity->weight_counter->weights_node, root);
+	kfree(entity->weight_counter);
+
+reset_entity_pointer:
+	entity->weight_counter = NULL;
+}
+
+/*
+ * Return expired entry, or NULL to just start from scratch in rbtree.
+ */
+static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
+				      struct request *last)
+{
+	struct request *rq;
+
+	if (bfq_bfqq_fifo_expire(bfqq))
+		return NULL;
+
+	bfq_mark_bfqq_fifo_expire(bfqq);
+
+	rq = rq_entry_fifo(bfqq->fifo.next);
+
+	if (rq == last || ktime_get_ns() < rq->fifo_time)
+		return NULL;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq);
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+	return rq;
+}
+
+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq,
+					struct request *last)
+{
+	struct rb_node *rbnext = rb_next(&last->rb_node);
+	struct rb_node *rbprev = rb_prev(&last->rb_node);
+	struct request *next, *prev = NULL;
+
+	BUG_ON(list_empty(&bfqq->fifo));
+
+	/* Follow expired path, else get first next available. */
+	next = bfq_check_fifo(bfqq, last);
+	if (next) {
+		BUG_ON(next == last);
+		return next;
+	}
+
+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
+
+	if (rbprev)
+		prev = rb_entry_rq(rbprev);
+
+	if (rbnext)
+		next = rb_entry_rq(rbnext);
+	else {
+		rbnext = rb_first(&bfqq->sort_list);
+		if (rbnext && rbnext != &last->rb_node)
+			next = rb_entry_rq(rbnext);
+	}
+
+	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
+}
+
+/* see the definition of bfq_async_charge_factor for details */
+static unsigned long bfq_serv_to_charge(struct request *rq,
+					struct bfq_queue *bfqq)
+{
+	if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
+		return blk_rq_sectors(rq);
+
+	/*
+	 * If there are no weight-raised queues, then amplify service
+	 * by just the async charge factor; otherwise amplify service
+	 * by twice the async charge factor, to further reduce latency
+	 * for weight-raised queues.
+	 */
+	if (bfqq->bfqd->wr_busy_queues == 0)
+		return blk_rq_sectors(rq) * bfq_async_charge_factor;
+
+	return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+}
+
+/**
+ * bfq_updated_next_req - update the queue after a new next_rq selection.
+ * @bfqd: the device data the queue belongs to.
+ * @bfqq: the queue to update.
+ *
+ * If the first request of a queue changes we make sure that the queue
+ * has enough budget to serve at least its first request (if the
+ * request has grown).  We do this because if the queue has not enough
+ * budget for its first request, it has to go through two dispatch
+ * rounds to actually get it dispatched.
+ */
+static void bfq_updated_next_req(struct bfq_data *bfqd,
+				 struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	struct request *next_rq = bfqq->next_rq;
+	unsigned long new_budget;
+
+	if (!next_rq)
+		return;
+
+	if (bfqq == bfqd->in_service_queue)
+		/*
+		 * In order not to break guarantees, budgets cannot be
+		 * changed after an entity has been selected.
+		 */
+		return;
+
+	BUG_ON(entity->tree != &st->active);
+	BUG_ON(entity == entity->sched_data->in_service_entity);
+
+	new_budget = max_t(unsigned long, bfqq->max_budget,
+			   bfq_serv_to_charge(next_rq, bfqq));
+	if (entity->budget != new_budget) {
+		entity->budget = new_budget;
+		bfq_log_bfqq(bfqd, bfqq, "new budget %lu",
+					 new_budget);
+		bfq_requeue_bfqq(bfqd, bfqq, false);
+	}
+}
+
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+	u64 dur;
+
+	if (bfqd->bfq_wr_max_time > 0)
+		return bfqd->bfq_wr_max_time;
+
+	dur = bfqd->RT_prod;
+	do_div(dur, bfqd->peak_rate);
+
+	/*
+	 * Limit duration between 3 and 13 seconds. Tests show that
+	 * higher values than 13 seconds often yield the opposite of
+	 * the desired result, i.e., worsen responsiveness by letting
+	 * non-interactive and non-soft-real-time applications
+	 * preserve weight raising for a too long time interval.
+	 *
+	 * On the other end, lower values than 3 seconds make it
+	 * difficult for most interactive tasks to complete their jobs
+	 * before weight-raising finishes.
+	 */
+	if (dur > msecs_to_jiffies(13000))
+		dur = msecs_to_jiffies(13000);
+	else if (dur < msecs_to_jiffies(3000))
+		dur = msecs_to_jiffies(3000);
+
+	return dur;
+}
+
+/* switch back from soft real-time to interactive weight raising */
+static void switch_back_to_interactive_wr(struct bfq_queue *bfqq,
+					  struct bfq_data *bfqd)
+{
+	bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+	bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+	bfqq->last_wr_start_finish = bfqq->wr_start_at_switch_to_srt;
+}
+
+static void
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
+{
+	unsigned int old_wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
+	if (bic->saved_has_short_ttime)
+		bfq_mark_bfqq_has_short_ttime(bfqq);
+	else
+		bfq_clear_bfqq_has_short_ttime(bfqq);
+
+	if (bic->saved_IO_bound)
+		bfq_mark_bfqq_IO_bound(bfqq);
+	else
+		bfq_clear_bfqq_IO_bound(bfqq);
+
+	if (unlikely(busy))
+		old_wr_coeff = bfqq->wr_coeff;
+
+	bfqq->ttime = bic->saved_ttime;
+	bfqq->wr_coeff = bic->saved_wr_coeff;
+	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
+	BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
+	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
+	bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
+	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "bic %p wr_coeff %d start_finish %lu max_time %lu",
+		     bic, bfqq->wr_coeff, bfqq->last_wr_start_finish,
+		     bfqq->wr_cur_max_time);
+
+	if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
+				   time_is_before_jiffies(bfqq->last_wr_start_finish +
+							  bfqq->wr_cur_max_time))) {
+		if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		    !bfq_bfqq_in_large_burst(bfqq) &&
+		    time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt +
+					     bfq_wr_duration(bfqd))) {
+			switch_back_to_interactive_wr(bfqq, bfqd);
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "switching back to interactive");
+		} else {
+			bfqq->wr_coeff = 1;
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "switching off wr (%lu + %lu < %lu)",
+			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
+			     jiffies);
+		}
+	}
+
+	/* make sure weight will be updated, however we got here */
+	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) {
+		bfqd->wr_busy_queues++;
+		BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+	} else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) {
+		bfqd->wr_busy_queues--;
+		BUG_ON(bfqd->wr_busy_queues < 0);
+	}
+}
+
+static int bfqq_process_refs(struct bfq_queue *bfqq)
+{
+	int process_refs, io_refs;
+
+	lockdep_assert_held(&bfqq->bfqd->lock);
+
+	io_refs = bfqq->allocated;
+	process_refs = bfqq->ref - io_refs - bfqq->entity.on_st;
+	BUG_ON(process_refs < 0);
+	return process_refs;
+}
+
+/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
+static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_queue *item;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
+		hlist_del_init(&item->burst_list_node);
+	hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+	bfqd->burst_size = 1;
+	bfqd->burst_parent_entity = bfqq->entity.parent;
+}
+
+/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/* Increment burst size to take into account also bfqq */
+	bfqd->burst_size++;
+
+	bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size);
+
+	BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh);
+
+	if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+		struct bfq_queue *pos, *bfqq_item;
+		struct hlist_node *n;
+
+		/*
+		 * Enough queues have been activated shortly after each
+		 * other to consider this burst as large.
+		 */
+		bfqd->large_burst = true;
+		bfq_log_bfqq(bfqd, bfqq, "large burst started");
+
+		/*
+		 * We can now mark all queues in the burst list as
+		 * belonging to a large burst.
+		 */
+		hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+				     burst_list_node) {
+			bfq_mark_bfqq_in_large_burst(bfqq_item);
+			bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst");
+		}
+		bfq_mark_bfqq_in_large_burst(bfqq);
+		bfq_log_bfqq(bfqd, bfqq, "marked in large burst");
+
+		/*
+		 * From now on, and until the current burst finishes, any
+		 * new queue being activated shortly after the last queue
+		 * was inserted in the burst can be immediately marked as
+		 * belonging to a large burst. So the burst list is not
+		 * needed any more. Remove it.
+		 */
+		hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+					  burst_list_node)
+			hlist_del_init(&pos->burst_list_node);
+	} else /*
+		* Burst not yet large: add bfqq to the burst list. Do
+		* not increment the ref counter for bfqq, because bfqq
+		* is removed from the burst list before freeing bfqq
+		* in put_queue.
+		*/
+		hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+}
+
+/*
+ * If many queues belonging to the same group happen to be created
+ * shortly after each other, then the processes associated with these
+ * queues have typically a common goal. In particular, bursts of queue
+ * creations are usually caused by services or applications that spawn
+ * many parallel threads/processes. Examples are systemd during boot,
+ * or git grep. To help these processes get their job done as soon as
+ * possible, it is usually better to not grant either weight-raising
+ * or device idling to their queues.
+ *
+ * In this comment we describe, firstly, the reasons why this fact
+ * holds, and, secondly, the next function, which implements the main
+ * steps needed to properly mark these queues so that they can then be
+ * treated in a different way.
+ *
+ * The above services or applications benefit mostly from a high
+ * throughput: the quicker the requests of the activated queues are
+ * cumulatively served, the sooner the target job of these queues gets
+ * completed. As a consequence, weight-raising any of these queues,
+ * which also implies idling the device for it, is almost always
+ * counterproductive. In most cases it just lowers throughput.
+ *
+ * On the other hand, a burst of queue creations may be caused also by
+ * the start of an application that does not consist of a lot of
+ * parallel I/O-bound threads. In fact, with a complex application,
+ * several short processes may need to be executed to start-up the
+ * application. In this respect, to start an application as quickly as
+ * possible, the best thing to do is in any case to privilege the I/O
+ * related to the application with respect to all other
+ * I/O. Therefore, the best strategy to start as quickly as possible
+ * an application that causes a burst of queue creations is to
+ * weight-raise all the queues created during the burst. This is the
+ * exact opposite of the best strategy for the other type of bursts.
+ *
+ * In the end, to take the best action for each of the two cases, the
+ * two types of bursts need to be distinguished. Fortunately, this
+ * seems relatively easy, by looking at the sizes of the bursts. In
+ * particular, we found a threshold such that only bursts with a
+ * larger size than that threshold are apparently caused by
+ * services or commands such as systemd or git grep. For brevity,
+ * hereafter we call just 'large' these bursts. BFQ *does not*
+ * weight-raise queues whose creation occurs in a large burst. In
+ * addition, for each of these queues BFQ performs or does not perform
+ * idling depending on which choice boosts the throughput more. The
+ * exact choice depends on the device and request pattern at
+ * hand.
+ *
+ * Unfortunately, false positives may occur while an interactive task
+ * is starting (e.g., an application is being started). The
+ * consequence is that the queues associated with the task do not
+ * enjoy weight raising as expected. Fortunately these false positives
+ * are very rare. They typically occur if some service happens to
+ * start doing I/O exactly when the interactive task starts.
+ *
+ * Turning back to the next function, it implements all the steps
+ * needed to detect the occurrence of a large burst and to properly
+ * mark all the queues belonging to it (so that they can then be
+ * treated in a different way). This goal is achieved by maintaining a
+ * "burst list" that holds, temporarily, the queues that belong to the
+ * burst in progress. The list is then used to mark these queues as
+ * belonging to a large burst if the burst does become large. The main
+ * steps are the following.
+ *
+ * . when the very first queue is created, the queue is inserted into the
+ *   list (as it could be the first queue in a possible burst)
+ *
+ * . if the current burst has not yet become large, and a queue Q that does
+ *   not yet belong to the burst is activated shortly after the last time
+ *   at which a new queue entered the burst list, then the function appends
+ *   Q to the burst list
+ *
+ * . if, as a consequence of the previous step, the burst size reaches
+ *   the large-burst threshold, then
+ *
+ *     . all the queues in the burst list are marked as belonging to a
+ *       large burst
+ *
+ *     . the burst list is deleted; in fact, the burst list already served
+ *       its purpose (keeping temporarily track of the queues in a burst,
+ *       so as to be able to mark them as belonging to a large burst in the
+ *       previous sub-step), and now is not needed any more
+ *
+ *     . the device enters a large-burst mode
+ *
+ * . if a queue Q that does not belong to the burst is created while
+ *   the device is in large-burst mode and shortly after the last time
+ *   at which a queue either entered the burst list or was marked as
+ *   belonging to the current large burst, then Q is immediately marked
+ *   as belonging to a large burst.
+ *
+ * . if a queue Q that does not belong to the burst is created a while
+ *   later, i.e., not shortly after, than the last time at which a queue
+ *   either entered the burst list or was marked as belonging to the
+ *   current large burst, then the current burst is deemed as finished and:
+ *
+ *        . the large-burst mode is reset if set
+ *
+ *        . the burst list is emptied
+ *
+ *        . Q is inserted in the burst list, as Q may be the first queue
+ *          in a possible new burst (then the burst list contains just Q
+ *          after this step).
+ */
+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/*
+	 * If bfqq is already in the burst list or is part of a large
+	 * burst, or finally has just been split, then there is
+	 * nothing else to do.
+	 */
+	if (!hlist_unhashed(&bfqq->burst_list_node) ||
+	    bfq_bfqq_in_large_burst(bfqq) ||
+	    time_is_after_eq_jiffies(bfqq->split_time +
+				     msecs_to_jiffies(10)))
+		return;
+
+	/*
+	 * If bfqq's creation happens late enough, or bfqq belongs to
+	 * a different group than the burst group, then the current
+	 * burst is finished, and related data structures must be
+	 * reset.
+	 *
+	 * In this respect, consider the special case where bfqq is
+	 * the very first queue created after BFQ is selected for this
+	 * device. In this case, last_ins_in_burst and
+	 * burst_parent_entity are not yet significant when we get
+	 * here. But it is easy to verify that, whether or not the
+	 * following condition is true, bfqq will end up being
+	 * inserted into the burst list. In particular the list will
+	 * happen to contain only bfqq. And this is exactly what has
+	 * to happen, as bfqq may be the first queue of the first
+	 * burst.
+	 */
+	if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+	    bfqd->bfq_burst_interval) ||
+	    bfqq->entity.parent != bfqd->burst_parent_entity) {
+		bfqd->large_burst = false;
+		bfq_reset_burst_list(bfqd, bfqq);
+		bfq_log_bfqq(bfqd, bfqq,
+			"late activation or different group");
+		goto end;
+	}
+
+	/*
+	 * If we get here, then bfqq is being activated shortly after the
+	 * last queue. So, if the current burst is also large, we can mark
+	 * bfqq as belonging to this large burst immediately.
+	 */
+	if (bfqd->large_burst) {
+		bfq_log_bfqq(bfqd, bfqq, "marked in burst");
+		bfq_mark_bfqq_in_large_burst(bfqq);
+		goto end;
+	}
+
+	/*
+	 * If we get here, then a large-burst state has not yet been
+	 * reached, but bfqq is being activated shortly after the last
+	 * queue. Then we add bfqq to the burst.
+	 */
+	bfq_add_to_burst(bfqd, bfqq);
+end:
+	/*
+	 * At this point, bfqq either has been added to the current
+	 * burst or has caused the current burst to terminate and a
+	 * possible new burst to start. In particular, in the second
+	 * case, bfqq has become the first queue in the possible new
+	 * burst.  In both cases last_ins_in_burst needs to be moved
+	 * forward.
+	 */
+	bfqd->last_ins_in_burst = jiffies;
+
+}
+
+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	return entity->budget - entity->service;
+}
+
+/*
+ * If enough samples have been computed, return the current max budget
+ * stored in bfqd, which is dynamically updated according to the
+ * estimated disk peak rate; otherwise return the default max budget
+ */
+static int bfq_max_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+		return bfq_default_max_budget;
+	else
+		return bfqd->bfq_max_budget;
+}
+
+/*
+ * Return min budget, which is a fraction of the current or default
+ * max budget (trying with 1/32)
+ */
+static int bfq_min_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+		return bfq_default_max_budget / 32;
+	else
+		return bfqd->bfq_max_budget / 32;
+}
+
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason);
+
+/*
+ * The next function, invoked after the input queue bfqq switches from
+ * idle to busy, updates the budget of bfqq. The function also tells
+ * whether the in-service queue should be expired, by returning
+ * true. The purpose of expiring the in-service queue is to give bfqq
+ * the chance to possibly preempt the in-service queue, and the reason
+ * for preempting the in-service queue is to achieve one of the two
+ * goals below.
+ *
+ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
+ * expired because it has remained idle. In particular, bfqq may have
+ * expired for one of the following two reasons:
+ *
+ * - BFQ_BFQQ_NO_MORE_REQUEST bfqq did not enjoy any device idling and
+ *   did not make it to issue a new request before its last request
+ *   was served;
+ *
+ * - BFQ_BFQQ_TOO_IDLE bfqq did enjoy device idling, but did not issue
+ *   a new request before the expiration of the idling-time.
+ *
+ * Even if bfqq has expired for one of the above reasons, the process
+ * associated with the queue may be however issuing requests greedily,
+ * and thus be sensitive to the bandwidth it receives (bfqq may have
+ * remained idle for other reasons: CPU high load, bfqq not enjoying
+ * idling, I/O throttling somewhere in the path from the process to
+ * the I/O scheduler, ...). But if, after every expiration for one of
+ * the above two reasons, bfqq has to wait for the service of at least
+ * one full budget of another queue before being served again, then
+ * bfqq is likely to get a much lower bandwidth or resource time than
+ * its reserved ones. To address this issue, two countermeasures need
+ * to be taken.
+ *
+ * First, the budget and the timestamps of bfqq need to be updated in
+ * a special way on bfqq reactivation: they need to be updated as if
+ * bfqq did not remain idle and did not expire. In fact, if they are
+ * computed as if bfqq expired and remained idle until reactivation,
+ * then the process associated with bfqq is treated as if, instead of
+ * being greedy, it stopped issuing requests when bfqq remained idle,
+ * and restarts issuing requests only on this reactivation. In other
+ * words, the scheduler does not help the process recover the "service
+ * hole" between bfqq expiration and reactivation. As a consequence,
+ * the process receives a lower bandwidth than its reserved one. In
+ * contrast, to recover this hole, the budget must be updated as if
+ * bfqq was not expired at all before this reactivation, i.e., it must
+ * be set to the value of the remaining budget when bfqq was
+ * expired. Along the same line, timestamps need to be assigned the
+ * value they had the last time bfqq was selected for service, i.e.,
+ * before last expiration. Thus timestamps need to be back-shifted
+ * with respect to their normal computation (see [1] for more details
+ * on this tricky aspect).
+ *
+ * Secondly, to allow the process to recover the hole, the in-service
+ * queue must be expired too, to give bfqq the chance to preempt it
+ * immediately. In fact, if bfqq has to wait for a full budget of the
+ * in-service queue to be completed, then it may become impossible to
+ * let the process recover the hole, even if the back-shifted
+ * timestamps of bfqq are lower than those of the in-service queue. If
+ * this happens for most or all of the holes, then the process may not
+ * receive its reserved bandwidth. In this respect, it is worth noting
+ * that, being the service of outstanding requests unpreemptible, a
+ * little fraction of the holes may however be unrecoverable, thereby
+ * causing a little loss of bandwidth.
+ *
+ * The last important point is detecting whether bfqq does need this
+ * bandwidth recovery. In this respect, the next function deems the
+ * process associated with bfqq greedy, and thus allows it to recover
+ * the hole, if: 1) the process is waiting for the arrival of a new
+ * request (which implies that bfqq expired for one of the above two
+ * reasons), and 2) such a request has arrived soon. The first
+ * condition is controlled through the flag non_blocking_wait_rq,
+ * while the second through the flag arrived_in_time. If both
+ * conditions hold, then the function computes the budget in the
+ * above-described special way, and signals that the in-service queue
+ * should be expired. Timestamp back-shifting is done later in
+ * __bfq_activate_entity.
+ *
+ * 2. Reduce latency. Even if timestamps are not backshifted to let
+ * the process associated with bfqq recover a service hole, bfqq may
+ * however happen to have, after being (re)activated, a lower finish
+ * timestamp than the in-service queue.  That is, the next budget of
+ * bfqq may have to be completed before the one of the in-service
+ * queue. If this is the case, then preempting the in-service queue
+ * allows this goal to be achieved, apart from the unpreemptible,
+ * outstanding requests mentioned above.
+ *
+ * Unfortunately, regardless of which of the above two goals one wants
+ * to achieve, service trees need first to be updated to know whether
+ * the in-service queue must be preempted. To have service trees
+ * correctly updated, the in-service queue must be expired and
+ * rescheduled, and bfqq must be scheduled too. This is one of the
+ * most costly operations (in future versions, the scheduling
+ * mechanism may be re-designed in such a way to make it possible to
+ * know whether preemption is needed without needing to update service
+ * trees). In addition, queue preemptions almost always cause random
+ * I/O, and thus loss of throughput. Because of these facts, the next
+ * function adopts the following simple scheme to avoid both costly
+ * operations and too frequent preemptions: it requests the expiration
+ * of the in-service queue (unconditionally) only for queues that need
+ * to recover a hole, or that either are weight-raised or deserve to
+ * be weight-raised.
+ */
+static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
+						struct bfq_queue *bfqq,
+						bool arrived_in_time,
+						bool wr_or_deserves_wr)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
+		/*
+		 * We do not clear the flag non_blocking_wait_rq here, as
+		 * the latter is used in bfq_activate_bfqq to signal
+		 * that timestamps need to be back-shifted (and is
+		 * cleared right after).
+		 */
+
+		/*
+		 * In next assignment we rely on that either
+		 * entity->service or entity->budget are not updated
+		 * on expiration if bfqq is empty (see
+		 * __bfq_bfqq_recalc_budget). Thus both quantities
+		 * remain unchanged after such an expiration, and the
+		 * following statement therefore assigns to
+		 * entity->budget the remaining budget on such an
+		 * expiration. For clarity, entity->service is not
+		 * updated on expiration in any case, and, in normal
+		 * operation, is reset only when bfqq is selected for
+		 * service (see bfq_get_next_queue).
+		 */
+		BUG_ON(bfqq->max_budget < 0);
+		entity->budget = min_t(unsigned long,
+				       bfq_bfqq_budget_left(bfqq),
+				       bfqq->max_budget);
+
+		BUG_ON(entity->budget < 0);
+		return true;
+	}
+
+	BUG_ON(bfqq->max_budget < 0);
+	entity->budget = max_t(unsigned long, bfqq->max_budget,
+			       bfq_serv_to_charge(bfqq->next_rq, bfqq));
+	BUG_ON(entity->budget < 0);
+
+	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+	return wr_or_deserves_wr;
+}
+
+/*
+ * Return the farthest future time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_greatest_from_now(void)
+{
+	return jiffies + MAX_JIFFY_OFFSET;
+}
+
+/*
+ * Return the farthest past time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_smallest_from_now(void)
+{
+	return jiffies - MAX_JIFFY_OFFSET;
+}
+
+static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
+					     struct bfq_queue *bfqq,
+					     unsigned int old_wr_coeff,
+					     bool wr_or_deserves_wr,
+					     bool interactive,
+					     bool in_burst,
+					     bool soft_rt)
+{
+	if (old_wr_coeff == 1 && wr_or_deserves_wr) {
+		/* start a weight-raising period */
+		if (interactive) {
+			bfqq->service_from_wr = 0;
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+		} else {
+			/*
+			 * No interactive weight raising in progress
+			 * here: assign minus infinity to
+			 * wr_start_at_switch_to_srt, to make sure
+			 * that, at the end of the soft-real-time
+			 * weight raising periods that is starting
+			 * now, no interactive weight-raising period
+			 * may be wrongly considered as still in
+			 * progress (and thus actually started by
+			 * mistake).
+			 */
+			bfqq->wr_start_at_switch_to_srt =
+				bfq_smallest_from_now();
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+				BFQ_SOFTRT_WEIGHT_FACTOR;
+			bfqq->wr_cur_max_time =
+				bfqd->bfq_wr_rt_max_time;
+		}
+		/*
+		 * If needed, further reduce budget to make sure it is
+		 * close to bfqq's backlog, so as to reduce the
+		 * scheduling-error component due to a too large
+		 * budget. Do not care about throughput consequences,
+		 * but only about latency. Finally, do not assign a
+		 * too small budget either, to avoid increasing
+		 * latency by causing too frequent expirations.
+		 */
+		bfqq->entity.budget = min_t(unsigned long,
+					    bfqq->entity.budget,
+					    2 * bfq_min_budget(bfqd));
+
+		bfq_log_bfqq(bfqd, bfqq,
+			     "wrais starting at %lu, rais_max_time %u",
+			     jiffies,
+			     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	} else if (old_wr_coeff > 1) {
+		if (interactive) { /* update wr coeff and duration */
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+		} else if (in_burst) {
+			bfqq->wr_coeff = 1;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "wrais ending at %lu, rais_max_time %u",
+				     jiffies,
+				     jiffies_to_msecs(bfqq->
+						      wr_cur_max_time));
+		} else if (soft_rt) {
+			/*
+			 * The application is now or still meeting the
+			 * requirements for being deemed soft rt.  We
+			 * can then correctly and safely (re)charge
+			 * the weight-raising duration for the
+			 * application with the weight-raising
+			 * duration for soft rt applications.
+			 *
+			 * In particular, doing this recharge now, i.e.,
+			 * before the weight-raising period for the
+			 * application finishes, reduces the probability
+			 * of the following negative scenario:
+			 * 1) the weight of a soft rt application is
+			 *    raised at startup (as for any newly
+			 *    created application),
+			 * 2) since the application is not interactive,
+			 *    at a certain time weight-raising is
+			 *    stopped for the application,
+			 * 3) at that time the application happens to
+			 *    still have pending requests, and hence
+			 *    is destined to not have a chance to be
+			 *    deemed soft rt before these requests are
+			 *    completed (see the comments to the
+			 *    function bfq_bfqq_softrt_next_start()
+			 *    for details on soft rt detection),
+			 * 4) these pending requests experience a high
+			 *    latency because the application is not
+			 *    weight-raised while they are pending.
+			 */
+			if (bfqq->wr_cur_max_time !=
+				bfqd->bfq_wr_rt_max_time) {
+				bfqq->wr_start_at_switch_to_srt =
+					bfqq->last_wr_start_finish;
+                BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+				bfqq->wr_cur_max_time =
+					bfqd->bfq_wr_rt_max_time;
+				bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+					BFQ_SOFTRT_WEIGHT_FACTOR;
+				bfq_log_bfqq(bfqd, bfqq,
+					     "switching to soft_rt wr");
+			} else
+				bfq_log_bfqq(bfqd, bfqq,
+					"moving forward soft_rt wr duration");
+			bfqq->last_wr_start_finish = jiffies;
+		}
+	}
+}
+
+static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq)
+{
+	return bfqq->dispatched == 0 &&
+		time_is_before_jiffies(
+			bfqq->budget_timeout +
+			bfqd->bfq_wr_min_idle_time);
+}
+
+static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
+					     struct bfq_queue *bfqq,
+					     int old_wr_coeff,
+					     struct request *rq,
+					     bool *interactive)
+{
+	bool soft_rt, in_burst,	wr_or_deserves_wr,
+		bfqq_wants_to_preempt,
+		idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
+		/*
+		 * See the comments on
+		 * bfq_bfqq_update_budg_for_activation for
+		 * details on the usage of the next variable.
+		 */
+		arrived_in_time =  ktime_get_ns() <=
+			bfqq->ttime.last_end_request +
+			bfqd->bfq_slice_idle * 3;
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "bfq_add_request non-busy: "
+		     "jiffies %lu, in_time %d, idle_long %d busyw %d "
+		     "wr_coeff %u",
+		     jiffies, arrived_in_time,
+		     idle_for_long_time,
+		     bfq_bfqq_non_blocking_wait_rq(bfqq),
+		     old_wr_coeff);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	BUG_ON(bfqq == bfqd->in_service_queue);
+
+	/*
+	 * bfqq deserves to be weight-raised if:
+	 * - it is sync,
+	 * - it does not belong to a large burst,
+	 * - it has been idle for enough time or is soft real-time,
+	 * - is linked to a bfq_io_cq (it is not shared in any sense)
+	 */
+	in_burst = bfq_bfqq_in_large_burst(bfqq);
+	soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+		!in_burst &&
+		time_is_before_jiffies(bfqq->soft_rt_next_start);
+	*interactive =
+		!in_burst &&
+		idle_for_long_time;
+	wr_or_deserves_wr = bfqd->low_latency &&
+		(bfqq->wr_coeff > 1 ||
+		 (bfq_bfqq_sync(bfqq) &&
+		  bfqq->bic && (*interactive || soft_rt)));
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "bfq_add_request: "
+		     "in_burst %d, "
+		     "soft_rt %d (next %lu), inter %d, bic %p",
+		     bfq_bfqq_in_large_burst(bfqq), soft_rt,
+		     bfqq->soft_rt_next_start,
+		     *interactive,
+		     bfqq->bic);
+
+	/*
+	 * Using the last flag, update budget and check whether bfqq
+	 * may want to preempt the in-service queue.
+	 */
+	bfqq_wants_to_preempt =
+		bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
+						    arrived_in_time,
+						    wr_or_deserves_wr);
+
+	/*
+	 * If bfqq happened to be activated in a burst, but has been
+	 * idle for much more than an interactive queue, then we
+	 * assume that, in the overall I/O initiated in the burst, the
+	 * I/O associated with bfqq is finished. So bfqq does not need
+	 * to be treated as a queue belonging to a burst
+	 * anymore. Accordingly, we reset bfqq's in_large_burst flag
+	 * if set, and remove bfqq from the burst list if it's
+	 * there. We do not decrement burst_size, because the fact
+	 * that bfqq does not need to belong to the burst list any
+	 * more does not invalidate the fact that bfqq was created in
+	 * a burst.
+	 */
+	if (likely(!bfq_bfqq_just_created(bfqq)) &&
+	    idle_for_long_time &&
+	    time_is_before_jiffies(
+		    bfqq->budget_timeout +
+		    msecs_to_jiffies(10000))) {
+		hlist_del_init(&bfqq->burst_list_node);
+		bfq_clear_bfqq_in_large_burst(bfqq);
+	}
+
+	bfq_clear_bfqq_just_created(bfqq);
+
+	if (!bfq_bfqq_IO_bound(bfqq)) {
+		if (arrived_in_time) {
+			bfqq->requests_within_timer++;
+			if (bfqq->requests_within_timer >=
+			    bfqd->bfq_requests_within_timer)
+				bfq_mark_bfqq_IO_bound(bfqq);
+		} else
+			bfqq->requests_within_timer = 0;
+		bfq_log_bfqq(bfqd, bfqq, "requests in time %d",
+			     bfqq->requests_within_timer);
+	}
+
+	if (bfqd->low_latency) {
+		if (unlikely(time_is_after_jiffies(bfqq->split_time)))
+			/* wraparound */
+			bfqq->split_time =
+				jiffies - bfqd->bfq_wr_min_idle_time - 1;
+
+		if (time_is_before_jiffies(bfqq->split_time +
+					   bfqd->bfq_wr_min_idle_time)) {
+			bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
+							 old_wr_coeff,
+							 wr_or_deserves_wr,
+							 *interactive,
+							 in_burst,
+							 soft_rt);
+
+			if (old_wr_coeff != bfqq->wr_coeff)
+				bfqq->entity.prio_changed = 1;
+		}
+	}
+
+	bfqq->last_idle_bklogged = jiffies;
+	bfqq->service_from_backlogged = 0;
+	bfq_clear_bfqq_softrt_update(bfqq);
+
+	bfq_add_bfqq_busy(bfqd, bfqq);
+
+	/*
+	 * Expire in-service queue only if preemption may be needed
+	 * for guarantees. In this respect, the function
+	 * next_queue_may_preempt just checks a simple, necessary
+	 * condition, and not a sufficient condition based on
+	 * timestamps. In fact, for the latter condition to be
+	 * evaluated, timestamps would need first to be updated, and
+	 * this operation is quite costly (see the comments on the
+	 * function bfq_bfqq_update_budg_for_activation).
+	 */
+	if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
+	    bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff &&
+	    next_queue_may_preempt(bfqd)) {
+		struct bfq_queue *in_serv =
+			bfqd->in_service_queue;
+		BUG_ON(in_serv == bfqq);
+
+		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+				false, BFQ_BFQQ_PREEMPTED);
+	}
+}
+
+static void bfq_add_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	struct request *next_rq, *prev;
+	unsigned int old_wr_coeff = bfqq->wr_coeff;
+	bool interactive = false;
+
+	bfq_log_bfqq(bfqd, bfqq, "size %u %s",
+		     blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A");
+
+	if (bfqq->wr_coeff > 1) /* queue is being weight-raised */
+		bfq_log_bfqq(bfqd, bfqq,
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
+
+	bfqq->queued[rq_is_sync(rq)]++;
+	bfqd->queued++;
+
+	BUG_ON(!RQ_BFQQ(rq));
+	BUG_ON(RQ_BFQQ(rq) != bfqq);
+	WARN_ON(blk_rq_sectors(rq) == 0);
+
+	elv_rb_add(&bfqq->sort_list, rq);
+
+	/*
+	 * Check if this request is a better next-to-serve candidate.
+	 */
+	prev = bfqq->next_rq;
+	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+	BUG_ON(!next_rq);
+	BUG_ON(!RQ_BFQQ(next_rq));
+	BUG_ON(RQ_BFQQ(next_rq) != bfqq);
+	bfqq->next_rq = next_rq;
+
+	/*
+	 * Adjust priority tree position, if next_rq changes.
+	 */
+	if (prev != bfqq->next_rq)
+		bfq_pos_tree_add_move(bfqd, bfqq);
+
+	if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
+		bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
+						 rq, &interactive);
+	else {
+		if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+		    time_is_before_jiffies(
+				bfqq->last_wr_start_finish +
+				bfqd->bfq_wr_min_inter_arr_async)) {
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+
+			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+			bfqq->entity.prio_changed = 1;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "non-idle wrais starting, "
+				     "wr_max_time %u wr_busy %d",
+				     jiffies_to_msecs(bfqq->wr_cur_max_time),
+				     bfqd->wr_busy_queues);
+		}
+		if (prev != bfqq->next_rq)
+			bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	/*
+	 * Assign jiffies to last_wr_start_finish in the following
+	 * cases:
+	 *
+	 * . if bfqq is not going to be weight-raised, because, for
+	 *   non weight-raised queues, last_wr_start_finish stores the
+	 *   arrival time of the last request; as of now, this piece
+	 *   of information is used only for deciding whether to
+	 *   weight-raise async queues
+	 *
+	 * . if bfqq is not weight-raised, because, if bfqq is now
+	 *   switching to weight-raised, then last_wr_start_finish
+	 *   stores the time when weight-raising starts
+	 *
+	 * . if bfqq is interactive, because, regardless of whether
+	 *   bfqq is currently weight-raised, the weight-raising
+	 *   period must start or restart (this case is considered
+	 *   separately because it is not detected by the above
+	 *   conditions, if bfqq is already weight-raised)
+	 *
+	 * last_wr_start_finish has to be updated also if bfqq is soft
+	 * real-time, because the weight-raising period is constantly
+	 * restarted on idle-to-busy transitions for these queues, but
+	 * this is already done in bfq_bfqq_handle_idle_busy_switch if
+	 * needed.
+	 */
+	if (bfqd->low_latency &&
+		(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+		bfqq->last_wr_start_finish = jiffies;
+}
+
+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
+					  struct bio *bio,
+					  struct request_queue *q)
+{
+	struct bfq_queue *bfqq = bfqd->bio_bfqq;
+
+	BUG_ON(!bfqd->bio_bfqq_set);
+
+	if (bfqq)
+		return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
+
+	return NULL;
+}
+
+static sector_t get_sdist(sector_t last_pos, struct request *rq)
+{
+	sector_t sdist = 0;
+
+	if (last_pos) {
+		if (last_pos < blk_rq_pos(rq))
+			sdist = blk_rq_pos(rq) - last_pos;
+		else
+			sdist = last_pos - blk_rq_pos(rq);
+	}
+
+	return sdist;
+}
+
+#if 0 /* Still not clear if we can do without next two functions */
+static void bfq_activate_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	bfqd->rq_in_driver++;
+}
+
+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+
+	BUG_ON(bfqd->rq_in_driver == 0);
+	bfqd->rq_in_driver--;
+}
+#endif
+
+static void bfq_remove_request(struct request_queue *q,
+			       struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	const int sync = rq_is_sync(rq);
+
+	BUG_ON(bfqq->entity.service > bfqq->entity.budget &&
+	       bfqq == bfqd->in_service_queue);
+
+	if (bfqq->next_rq == rq) {
+		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+		if (bfqq->next_rq && !RQ_BFQQ(bfqq->next_rq)) {
+			pr_crit("no bfqq! for next rq %p bfqq %p\n",
+				bfqq->next_rq, bfqq);
+		}
+
+		BUG_ON(bfqq->next_rq && !RQ_BFQQ(bfqq->next_rq));
+		if (bfqq->next_rq && RQ_BFQQ(bfqq->next_rq) != bfqq) {
+			pr_crit(
+			"wrong bfqq! for next rq %p, rq_bfqq %p bfqq %p\n",
+			bfqq->next_rq, RQ_BFQQ(bfqq->next_rq), bfqq);
+		}
+		BUG_ON(bfqq->next_rq && RQ_BFQQ(bfqq->next_rq) != bfqq);
+
+		bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	if (rq->queuelist.prev != &rq->queuelist)
+		list_del_init(&rq->queuelist);
+	BUG_ON(bfqq->queued[sync] == 0);
+	bfqq->queued[sync]--;
+	bfqd->queued--;
+	elv_rb_del(&bfqq->sort_list, rq);
+
+	elv_rqhash_del(q, rq);
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		bfqq->next_rq = NULL;
+
+		BUG_ON(bfqq->entity.budget < 0);
+
+		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+			BUG_ON(bfqq->ref < 2); /* referred by rq and on tree */
+			bfq_del_bfqq_busy(bfqd, bfqq, false);
+			/*
+			 * bfqq emptied. In normal operation, when
+			 * bfqq is empty, bfqq->entity.service and
+			 * bfqq->entity.budget must contain,
+			 * respectively, the service received and the
+			 * budget used last time bfqq emptied. These
+			 * facts do not hold in this case, as at least
+			 * this last removal occurred while bfqq is
+			 * not in service. To avoid inconsistencies,
+			 * reset both bfqq->entity.service and
+			 * bfqq->entity.budget, if bfqq has still a
+			 * process that may issue I/O requests to it.
+			 */
+			bfqq->entity.budget = bfqq->entity.service = 0;
+		}
+
+		/*
+		 * Remove queue from request-position tree as it is empty.
+		 */
+		if (bfqq->pos_root) {
+			rb_erase(&bfqq->pos_node, bfqq->pos_root);
+			bfqq->pos_root = NULL;
+		}
+	} else {
+		BUG_ON(!bfqq->next_rq);
+		bfq_pos_tree_add_move(bfqd, bfqq);
+	}
+
+	if (rq->cmd_flags & REQ_META) {
+		BUG_ON(bfqq->meta_pending == 0);
+		bfqq->meta_pending--;
+	}
+}
+
+static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
+{
+	struct request_queue *q = hctx->queue;
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct request *free = NULL;
+	/*
+	 * bfq_bic_lookup grabs the queue_lock: invoke it now and
+	 * store its return value for later use, to avoid nesting
+	 * queue_lock inside the bfqd->lock. We assume that the bic
+	 * returned by bfq_bic_lookup does not go away before
+	 * bfqd->lock is taken.
+	 */
+	struct bfq_io_cq *bic = bfq_bic_lookup(bfqd, current->io_context, q);
+	bool ret;
+
+	spin_lock_irq(&bfqd->lock);
+
+	if (bic)
+		bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
+	else
+		bfqd->bio_bfqq = NULL;
+	bfqd->bio_bic = bic;
+	/* Set next flag just for testing purposes */
+	bfqd->bio_bfqq_set = true;
+
+	ret = blk_mq_sched_try_merge(q, bio, &free);
+
+	/*
+	 * XXX Not yet freeing without lock held, to avoid an
+	 * inconsistency with respect to the lock-protected invocation
+	 * of blk_mq_sched_try_insert_merge in bfq_bio_merge. Waiting
+	 * for clarifications from Jens.
+	 */
+	if (free)
+		blk_mq_free_request(free);
+	bfqd->bio_bfqq_set = false;
+	spin_unlock_irq(&bfqd->lock);
+
+	return ret;
+}
+
+static int bfq_request_merge(struct request_queue *q, struct request **req,
+			     struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct request *__rq;
+
+	__rq = bfq_find_rq_fmerge(bfqd, bio, q);
+	if (__rq && elv_bio_merge_ok(__rq, bio)) {
+		*req = __rq;
+		bfq_log(bfqd, "req %p", __rq);
+
+		return ELEVATOR_FRONT_MERGE;
+	}
+
+	return ELEVATOR_NO_MERGE;
+}
+
+static void bfq_request_merged(struct request_queue *q, struct request *req,
+			       enum elv_merge type)
+{
+	BUG_ON(req->rq_flags & RQF_DISP_LIST);
+
+	if (type == ELEVATOR_FRONT_MERGE &&
+	    rb_prev(&req->rb_node) &&
+	    blk_rq_pos(req) <
+	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
+				    struct request, rb_node))) {
+		struct bfq_queue *bfqq = RQ_BFQQ(req);
+		struct bfq_data *bfqd = bfqq->bfqd;
+		struct request *prev, *next_rq;
+
+		/* Reposition request in its sort_list */
+		elv_rb_del(&bfqq->sort_list, req);
+		BUG_ON(!RQ_BFQQ(req));
+		BUG_ON(RQ_BFQQ(req) != bfqq);
+		elv_rb_add(&bfqq->sort_list, req);
+
+		/* Choose next request to be served for bfqq */
+		prev = bfqq->next_rq;
+		next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
+					 bfqd->last_position);
+		BUG_ON(!next_rq);
+
+		bfqq->next_rq = next_rq;
+
+		bfq_log_bfqq(bfqd, bfqq,
+			"req %p prev %p next_rq %p bfqq %p",
+			     req, prev, next_rq, bfqq);
+
+		/*
+		 * If next_rq changes, update both the queue's budget to
+		 * fit the new request and the queue's position in its
+		 * rq_pos_tree.
+		 */
+		if (prev != bfqq->next_rq) {
+			bfq_updated_next_req(bfqd, bfqq);
+			bfq_pos_tree_add_move(bfqd, bfqq);
+		}
+	}
+}
+
+static void bfq_requests_merged(struct request_queue *q, struct request *rq,
+				struct request *next)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
+
+	BUG_ON(!RQ_BFQQ(rq));
+	BUG_ON(!RQ_BFQQ(next));
+	BUG_ON(rq->rq_flags & RQF_DISP_LIST);
+	BUG_ON(next->rq_flags & RQF_DISP_LIST);
+
+	if (!RB_EMPTY_NODE(&rq->rb_node))
+		goto end;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "rq %p next %p bfqq %p next_bfqq %p",
+		     rq, next, bfqq, next_bfqq);
+
+	spin_lock_irq(&bfqq->bfqd->lock);
+
+	/*
+	 * If next and rq belong to the same bfq_queue and next is older
+	 * than rq, then reposition rq in the fifo (by substituting next
+	 * with rq). Otherwise, if next and rq belong to different
+	 * bfq_queues, never reposition rq: in fact, we would have to
+	 * reposition it with respect to next's position in its own fifo,
+	 * which would most certainly be too expensive with respect to
+	 * the benefits.
+	 */
+	if (bfqq == next_bfqq &&
+	    !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+	    next->fifo_time < rq->fifo_time) {
+		list_del_init(&rq->queuelist);
+		list_replace_init(&next->queuelist, &rq->queuelist);
+		rq->fifo_time = next->fifo_time;
+	}
+
+	if (bfqq->next_rq == next)
+		bfqq->next_rq = rq;
+
+	bfq_remove_request(q, next);
+	bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
+
+	spin_unlock_irq(&bfqq->bfqd->lock);
+end:
+	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+}
+
+/* Must be called with bfqq != NULL */
+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+{
+	BUG_ON(!bfqq);
+
+	if (bfq_bfqq_busy(bfqq)) {
+		bfqq->bfqd->wr_busy_queues--;
+		BUG_ON(bfqq->bfqd->wr_busy_queues < 0);
+	}
+	bfqq->wr_coeff = 1;
+	bfqq->wr_cur_max_time = 0;
+	bfqq->last_wr_start_finish = jiffies;
+	/*
+	 * Trigger a weight change on the next invocation of
+	 * __bfq_entity_update_weight_prio.
+	 */
+	bfqq->entity.prio_changed = 1;
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "wrais ending at %lu, rais_max_time %u",
+		     bfqq->last_wr_start_finish,
+		     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d",
+		     bfqq->bfqd->wr_busy_queues);
+}
+
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			if (bfqg->async_bfqq[i][j])
+				bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
+	if (bfqg->async_idle_bfqq)
+		bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
+}
+
+static void bfq_end_wr(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+
+	spin_lock_irq(&bfqd->lock);
+
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	bfq_end_wr_async(bfqd);
+
+	spin_unlock_irq(&bfqd->lock);
+}
+
+static sector_t bfq_io_struct_pos(void *io_struct, bool request)
+{
+	if (request)
+		return blk_rq_pos(io_struct);
+	else
+		return ((struct bio *)io_struct)->bi_iter.bi_sector;
+}
+
+static int bfq_rq_close_to_sector(void *io_struct, bool request,
+				  sector_t sector)
+{
+	return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
+	       BFQQ_CLOSE_THR;
+}
+
+static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+					 struct bfq_queue *bfqq,
+					 sector_t sector)
+{
+	struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	struct rb_node *parent, *node;
+	struct bfq_queue *__bfqq;
+
+	if (RB_EMPTY_ROOT(root))
+		return NULL;
+
+	/*
+	 * First, if we find a request starting at the end of the last
+	 * request, choose it.
+	 */
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
+	if (__bfqq)
+		return __bfqq;
+
+	/*
+	 * If the exact sector wasn't found, the parent of the NULL leaf
+	 * will contain the closest sector (rq_pos_tree sorted by
+	 * next_request position).
+	 */
+	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	if (blk_rq_pos(__bfqq->next_rq) < sector)
+		node = rb_next(&__bfqq->pos_node);
+	else
+		node = rb_prev(&__bfqq->pos_node);
+	if (!node)
+		return NULL;
+
+	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	return NULL;
+}
+
+static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
+						   struct bfq_queue *cur_bfqq,
+						   sector_t sector)
+{
+	struct bfq_queue *bfqq;
+
+	/*
+	 * We shall notice if some of the queues are cooperating,
+	 * e.g., working closely on the same area of the device. In
+	 * that case, we can group them together and: 1) don't waste
+	 * time idling, and 2) serve the union of their requests in
+	 * the best possible order for throughput.
+	 */
+	bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
+	if (!bfqq || bfqq == cur_bfqq)
+		return NULL;
+
+	return bfqq;
+}
+
+static struct bfq_queue *
+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	int process_refs, new_process_refs;
+	struct bfq_queue *__bfqq;
+
+	/*
+	 * If there are no process references on the new_bfqq, then it is
+	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
+	 * may have dropped their last reference (not just their last process
+	 * reference).
+	 */
+	if (!bfqq_process_refs(new_bfqq))
+		return NULL;
+
+	/* Avoid a circular list and skip interim queue merges. */
+	while ((__bfqq = new_bfqq->new_bfqq)) {
+		if (__bfqq == bfqq)
+			return NULL;
+		new_bfqq = __bfqq;
+	}
+
+	process_refs = bfqq_process_refs(bfqq);
+	new_process_refs = bfqq_process_refs(new_bfqq);
+	/*
+	 * If the process for the bfqq has gone away, there is no
+	 * sense in merging the queues.
+	 */
+	if (process_refs == 0 || new_process_refs == 0)
+		return NULL;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+		new_bfqq->pid);
+
+	/*
+	 * Merging is just a redirection: the requests of the process
+	 * owning one of the two queues are redirected to the other queue.
+	 * The latter queue, in its turn, is set as shared if this is the
+	 * first time that the requests of some process are redirected to
+	 * it.
+	 *
+	 * We redirect bfqq to new_bfqq and not the opposite, because
+	 * we are in the context of the process owning bfqq, thus we
+	 * have the io_cq of this process. So we can immediately
+	 * configure this io_cq to redirect the requests of the
+	 * process to new_bfqq. In contrast, the io_cq of new_bfqq is
+	 * not available any more (new_bfqq->bic == NULL).
+	 *
+	 * Anyway, even in case new_bfqq coincides with the in-service
+	 * queue, redirecting requests the in-service queue is the
+	 * best option, as we feed the in-service queue with new
+	 * requests close to the last request served and, by doing so,
+	 * are likely to increase the throughput.
+	 */
+	bfqq->new_bfqq = new_bfqq;
+	new_bfqq->ref += process_refs;
+	return new_bfqq;
+}
+
+static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+					struct bfq_queue *new_bfqq)
+{
+	if (bfq_too_late_for_merging(new_bfqq)) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "too late for bfq%d to be merged",
+				new_bfqq->pid);
+		return false;
+	}
+
+	if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
+	    (bfqq->ioprio_class != new_bfqq->ioprio_class))
+		return false;
+
+	/*
+	 * If either of the queues has already been detected as seeky,
+	 * then merging it with the other queue is unlikely to lead to
+	 * sequential I/O.
+	 */
+	if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
+		return false;
+
+	/*
+	 * Interleaved I/O is known to be done by (some) applications
+	 * only for reads, so it does not make sense to merge async
+	 * queues.
+	 */
+	if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
+		return false;
+
+	return true;
+}
+
+/*
+ * Attempt to schedule a merge of bfqq with the currently in-service
+ * queue or with a close queue among the scheduled queues.  Return
+ * NULL if no merge was scheduled, a pointer to the shared bfq_queue
+ * structure otherwise.
+ *
+ * The OOM queue is not allowed to participate to cooperation: in fact, since
+ * the requests temporarily redirected to the OOM queue could be redirected
+ * again to dedicated queues at any time, the state needed to correctly
+ * handle merging with the OOM queue would be quite complex and expensive
+ * to maintain. Besides, in such a critical condition as an out of memory,
+ * the benefits of queue merging may be little relevant, or even negligible.
+ *
+ * WARNING: queue merging may impair fairness among non-weight raised
+ * queues, for at least two reasons: 1) the original weight of a
+ * merged queue may change during the merged state, 2) even being the
+ * weight the same, a merged queue may be bloated with many more
+ * requests than the ones produced by its originally-associated
+ * process.
+ */
+static struct bfq_queue *
+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+		     void *io_struct, bool request)
+{
+	struct bfq_queue *in_service_bfqq, *new_bfqq;
+
+	/*
+	 * Prevent bfqq from being merged if it has been created too
+	 * long ago. The idea is that true cooperating processes, and
+	 * thus their associated bfq_queues, are supposed to be
+	 * created shortly after each other. This is the case, e.g.,
+	 * for KVM/QEMU and dump I/O threads. Basing on this
+	 * assumption, the following filtering greatly reduces the
+	 * probability that two non-cooperating processes, which just
+	 * happen to do close I/O for some short time interval, have
+	 * their queues merged by mistake.
+	 */
+	if (bfq_too_late_for_merging(bfqq)) {
+		bfq_log_bfqq(bfqd, bfqq,
+			     "would have looked for coop, but too late");
+		return NULL;
+	}
+
+	if (bfqq->new_bfqq)
+		return bfqq->new_bfqq;
+
+	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
+		return NULL;
+
+	/* If there is only one backlogged queue, don't search. */
+	if (bfqd->busy_queues == 1)
+		return NULL;
+
+	in_service_bfqq = bfqd->in_service_queue;
+
+	if (in_service_bfqq && in_service_bfqq != bfqq &&
+	    likely(in_service_bfqq != &bfqd->oom_bfqq) &&
+	    bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
+	    bfqq->entity.parent == in_service_bfqq->entity.parent &&
+	    bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
+		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
+		if (new_bfqq)
+			return new_bfqq;
+	}
+	/*
+	 * Check whether there is a cooperator among currently scheduled
+	 * queues. The only thing we need is that the bio/request is not
+	 * NULL, as we need it to establish whether a cooperator exists.
+	 */
+	new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
+			bfq_io_struct_pos(io_struct, request));
+
+	BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
+
+	if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
+	    bfq_may_be_close_cooperator(bfqq, new_bfqq))
+		return bfq_setup_merge(bfqq, new_bfqq);
+
+	return NULL;
+}
+
+static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+{
+	struct bfq_io_cq *bic = bfqq->bic;
+
+	/*
+	 * If !bfqq->bic, the queue is already shared or its requests
+	 * have already been redirected to a shared queue; both idle window
+	 * and weight raising state have already been saved. Do nothing.
+	 */
+	if (!bic)
+		return;
+
+	bic->saved_ttime = bfqq->ttime;
+	bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
+	bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+	bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+	bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+	if (unlikely(bfq_bfqq_just_created(bfqq) &&
+		     !bfq_bfqq_in_large_burst(bfqq) &&
+		     bfqq->bfqd->low_latency)) {
+		/*
+		 * bfqq being merged ritgh after being created: bfqq
+		 * would have deserved interactive weight raising, but
+		 * did not make it to be set in a weight-raised state,
+		 * because of this early merge.  Store directly the
+		 * weight-raising state that would have been assigned
+		 * to bfqq, so that to avoid that bfqq unjustly fails
+		 * to enjoy weight raising if split soon.
+		 */
+		bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+		bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd);
+		bic->saved_last_wr_start_finish = jiffies;
+	} else {
+		bic->saved_wr_coeff = bfqq->wr_coeff;
+		bic->saved_wr_start_at_switch_to_srt =
+			bfqq->wr_start_at_switch_to_srt;
+		bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
+		bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+	}
+	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "bic %p wr_coeff %d start_finish %lu max_time %lu",
+		     bic, bfqq->wr_coeff, bfqq->last_wr_start_finish,
+		     bfqq->wr_cur_max_time);
+}
+
+static void
+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
+		     (unsigned long) new_bfqq->pid);
+	BUG_ON(bfqq->bic && bfqq->bic == new_bfqq->bic);
+	/* Save weight raising and idle window of the merged queues */
+	bfq_bfqq_save_state(bfqq);
+	bfq_bfqq_save_state(new_bfqq);
+
+	if (bfq_bfqq_IO_bound(bfqq))
+		bfq_mark_bfqq_IO_bound(new_bfqq);
+	bfq_clear_bfqq_IO_bound(bfqq);
+
+	/*
+	 * If bfqq is weight-raised, then let new_bfqq inherit
+	 * weight-raising. To reduce false positives, neglect the case
+	 * where bfqq has just been created, but has not yet made it
+	 * to be weight-raised (which may happen because EQM may merge
+	 * bfqq even before bfq_add_request is executed for the first
+	 * time for bfqq). Handling this case would however be very
+	 * easy, thanks to the flag just_created.
+	 */
+	if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
+		new_bfqq->wr_coeff = bfqq->wr_coeff;
+		new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
+		new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
+		new_bfqq->wr_start_at_switch_to_srt =
+			bfqq->wr_start_at_switch_to_srt;
+		if (bfq_bfqq_busy(new_bfqq)) {
+			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+		}
+
+		new_bfqq->entity.prio_changed = 1;
+		bfq_log_bfqq(bfqd, new_bfqq,
+			     "wr start after merge with %d, rais_max_time %u",
+			     bfqq->pid,
+			     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
+		bfqq->wr_coeff = 1;
+		bfqq->entity.prio_changed = 1;
+		if (bfq_bfqq_busy(bfqq)) {
+			bfqd->wr_busy_queues--;
+			BUG_ON(bfqd->wr_busy_queues < 0);
+		}
+
+	}
+
+	bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d",
+		     bfqd->wr_busy_queues);
+
+	/*
+	 * Merge queues (that is, let bic redirect its requests to new_bfqq)
+	 */
+	bic_set_bfqq(bic, new_bfqq, 1);
+	bfq_mark_bfqq_coop(new_bfqq);
+	/*
+	 * new_bfqq now belongs to at least two bics (it is a shared queue):
+	 * set new_bfqq->bic to NULL. bfqq either:
+	 * - does not belong to any bic any more, and hence bfqq->bic must
+	 *   be set to NULL, or
+	 * - is a queue whose owning bics have already been redirected to a
+	 *   different queue, hence the queue is destined to not belong to
+	 *   any bic soon and bfqq->bic is already NULL (therefore the next
+	 *   assignment causes no harm).
+	 */
+	new_bfqq->bic = NULL;
+	bfqq->bic = NULL;
+	/* release process reference to bfqq */
+	bfq_put_queue(bfqq);
+}
+
+static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+				struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	bool is_sync = op_is_sync(bio->bi_opf);
+	struct bfq_queue *bfqq = bfqd->bio_bfqq, *new_bfqq;
+
+	assert_spin_locked(&bfqd->lock);
+	/*
+	 * Disallow merge of a sync bio into an async request.
+	 */
+	if (is_sync && !rq_is_sync(rq))
+		return false;
+
+	/*
+	 * Lookup the bfqq that this bio will be queued with. Allow
+	 * merge only if rq is queued there.
+	 */
+	BUG_ON(!bfqd->bio_bfqq_set);
+	if (!bfqq)
+		return false;
+
+	/*
+	 * We take advantage of this function to perform an early merge
+	 * of the queues of possible cooperating processes.
+	 */
+	new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
+	BUG_ON(new_bfqq == bfqq);
+	if (new_bfqq) {
+		/*
+		 * bic still points to bfqq, then it has not yet been
+		 * redirected to some other bfq_queue, and a queue
+		 * merge beween bfqq and new_bfqq can be safely
+		 * fulfillled, i.e., bic can be redirected to new_bfqq
+		 * and bfqq can be put.
+		 */
+		bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
+				new_bfqq);
+		/*
+		 * If we get here, bio will be queued into new_queue,
+		 * so use new_bfqq to decide whether bio and rq can be
+		 * merged.
+		 */
+		bfqq = new_bfqq;
+
+		/*
+		 * Change also bqfd->bio_bfqq, as
+		 * bfqd->bio_bic now points to new_bfqq, and
+		 * this function may be invoked again (and then may
+		 * use again bqfd->bio_bfqq).
+		 */
+		bfqd->bio_bfqq = bfqq;
+	}
+	return bfqq == RQ_BFQQ(rq);
+}
+
+/*
+ * Set the maximum time for the in-service queue to consume its
+ * budget. This prevents seeky processes from lowering the throughput.
+ * In practice, a time-slice service scheme is used with seeky
+ * processes.
+ */
+static void bfq_set_budget_timeout(struct bfq_data *bfqd,
+				   struct bfq_queue *bfqq)
+{
+	unsigned int timeout_coeff;
+
+	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+		timeout_coeff = 1;
+	else
+		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+
+	bfqd->last_budget_start = ktime_get();
+
+	bfqq->budget_timeout = jiffies +
+		bfqd->bfq_timeout * timeout_coeff;
+
+	bfq_log_bfqq(bfqd, bfqq, "%u",
+		jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff));
+}
+
+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+				       struct bfq_queue *bfqq)
+{
+	if (bfqq) {
+		bfq_clear_bfqq_fifo_expire(bfqq);
+
+		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+
+		BUG_ON(bfqq == bfqd->in_service_queue);
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+
+		if (time_is_before_jiffies(bfqq->last_wr_start_finish) &&
+		    bfqq->wr_coeff > 1 &&
+		    bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		    time_is_before_jiffies(bfqq->budget_timeout)) {
+			/*
+			 * For soft real-time queues, move the start
+			 * of the weight-raising period forward by the
+			 * time the queue has not received any
+			 * service. Otherwise, a relatively long
+			 * service delay is likely to cause the
+			 * weight-raising period of the queue to end,
+			 * because of the short duration of the
+			 * weight-raising period of a soft real-time
+			 * queue.  It is worth noting that this move
+			 * is not so dangerous for the other queues,
+			 * because soft real-time queues are not
+			 * greedy.
+			 *
+			 * To not add a further variable, we use the
+			 * overloaded field budget_timeout to
+			 * determine for how long the queue has not
+			 * received service, i.e., how much time has
+			 * elapsed since the queue expired. However,
+			 * this is a little imprecise, because
+			 * budget_timeout is set to jiffies if bfqq
+			 * not only expires, but also remains with no
+			 * request.
+			 */
+			if (time_after(bfqq->budget_timeout,
+				       bfqq->last_wr_start_finish))
+				bfqq->last_wr_start_finish +=
+					jiffies - bfqq->budget_timeout;
+			else
+				bfqq->last_wr_start_finish = jiffies;
+
+			if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
+			       pr_crit(
+			       "BFQ WARNING:last %lu budget %lu jiffies %lu",
+			       bfqq->last_wr_start_finish,
+			       bfqq->budget_timeout,
+			       jiffies);
+			       pr_crit("diff %lu", jiffies -
+				       max_t(unsigned long,
+					     bfqq->last_wr_start_finish,
+					     bfqq->budget_timeout));
+			       bfqq->last_wr_start_finish = jiffies;
+			}
+		}
+
+		bfq_set_budget_timeout(bfqd, bfqq);
+		bfq_log_bfqq(bfqd, bfqq,
+			     "cur-budget = %d",
+			     bfqq->entity.budget);
+	} else
+		bfq_log(bfqd, "NULL");
+
+	bfqd->in_service_queue = bfqq;
+}
+
+/*
+ * Get and set a new queue for service.
+ */
+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
+
+	__bfq_set_in_service_queue(bfqd, bfqq);
+	return bfqq;
+}
+
+static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfqd->in_service_queue;
+	u32 sl;
+
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	bfq_mark_bfqq_wait_request(bfqq);
+
+	/*
+	 * We don't want to idle for seeks, but we do want to allow
+	 * fair distribution of slice time for a process doing back-to-back
+	 * seeks. So allow a little bit of time for him to submit a new rq.
+	 *
+	 * To prevent processes with (partly) seeky workloads from
+	 * being too ill-treated, grant them a small fraction of the
+	 * assigned budget before reducing the waiting time to
+	 * BFQ_MIN_TT. This happened to help reduce latency.
+	 */
+	sl = bfqd->bfq_slice_idle;
+	/*
+	 * Unless the queue is being weight-raised or the scenario is
+	 * asymmetric, grant only minimum idle time if the queue
+	 * is seeky. A long idling is preserved for a weight-raised
+	 * queue, or, more in general, in an asymemtric scenario,
+	 * because a long idling is needed for guaranteeing to a queue
+	 * its reserved share of the throughput (in particular, it is
+	 * needed if the queue has a higher weight than some other
+	 * queue).
+	 */
+	if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
+	    bfq_symmetric_scenario(bfqd))
+		sl = min_t(u32, sl, BFQ_MIN_TT);
+
+	bfqd->last_idling_start = ktime_get();
+	hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
+		      HRTIMER_MODE_REL);
+	bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+	bfq_log(bfqd, "arm idle: %ld/%ld ms",
+		sl / NSEC_PER_MSEC, bfqd->bfq_slice_idle / NSEC_PER_MSEC);
+}
+
+/*
+ * In autotuning mode, max_budget is dynamically recomputed as the
+ * amount of sectors transferred in timeout at the estimated peak
+ * rate. This enables BFQ to utilize a full timeslice with a full
+ * budget, even if the in-service queue is served at peak rate. And
+ * this maximises throughput with sequential workloads.
+ */
+static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
+{
+	return (u64)bfqd->peak_rate * USEC_PER_MSEC *
+		jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT;
+}
+
+/*
+ * Update parameters related to throughput and responsiveness, as a
+ * function of the estimated peak rate. See comments on
+ * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ */
+static void update_thr_responsiveness_params(struct bfq_data *bfqd)
+{
+	int dev_type = blk_queue_nonrot(bfqd->queue);
+
+	if (bfqd->bfq_user_max_budget == 0) {
+		bfqd->bfq_max_budget =
+			bfq_calc_max_budget(bfqd);
+		BUG_ON(bfqd->bfq_max_budget < 0);
+		bfq_log(bfqd, "new max_budget = %d",
+			bfqd->bfq_max_budget);
+	}
+
+	if (bfqd->device_speed == BFQ_BFQD_FAST &&
+	    bfqd->peak_rate < device_speed_thresh[dev_type]) {
+		bfqd->device_speed = BFQ_BFQD_SLOW;
+		bfqd->RT_prod = R_slow[dev_type] *
+			T_slow[dev_type];
+	} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
+		   bfqd->peak_rate > device_speed_thresh[dev_type]) {
+		bfqd->device_speed = BFQ_BFQD_FAST;
+		bfqd->RT_prod = R_fast[dev_type] *
+			T_fast[dev_type];
+	}
+
+	bfq_log(bfqd,
+"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
+		dev_type == 0 ? "ROT" : "NONROT",
+		bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
+		bfqd->device_speed == BFQ_BFQD_FAST ?
+		(USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
+		(USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
+		(USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
+		BFQ_RATE_SHIFT);
+}
+
+static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq)
+{
+	if (rq != NULL) { /* new rq dispatch now, reset accordingly */
+		bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ;
+		bfqd->peak_rate_samples = 1;
+		bfqd->sequential_samples = 0;
+		bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
+			blk_rq_sectors(rq);
+	} else /* no new rq dispatched, just reset the number of samples */
+		bfqd->peak_rate_samples = 0; /* full re-init on next disp. */
+
+	bfq_log(bfqd,
+		"at end, sample %u/%u tot_sects %llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		bfqd->tot_sectors_dispatched);
+}
+
+static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
+{
+	u32 rate, weight, divisor;
+
+	/*
+	 * For the convergence property to hold (see comments on
+	 * bfq_update_peak_rate()) and for the assessment to be
+	 * reliable, a minimum number of samples must be present, and
+	 * a minimum amount of time must have elapsed. If not so, do
+	 * not compute new rate. Just reset parameters, to get ready
+	 * for a new evaluation attempt.
+	 */
+	if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES ||
+	    bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) {
+		bfq_log(bfqd,
+	"only resetting, delta_first %lluus samples %d",
+			bfqd->delta_from_first>>10, bfqd->peak_rate_samples);
+		goto reset_computation;
+	}
+
+	/*
+	 * If a new request completion has occurred after last
+	 * dispatch, then, to approximate the rate at which requests
+	 * have been served by the device, it is more precise to
+	 * extend the observation interval to the last completion.
+	 */
+	bfqd->delta_from_first =
+		max_t(u64, bfqd->delta_from_first,
+		      bfqd->last_completion - bfqd->first_dispatch);
+
+	BUG_ON(bfqd->delta_from_first == 0);
+	/*
+	 * Rate computed in sects/usec, and not sects/nsec, for
+	 * precision issues.
+	 */
+	rate = div64_ul(bfqd->tot_sectors_dispatched<<BFQ_RATE_SHIFT,
+			div_u64(bfqd->delta_from_first, NSEC_PER_USEC));
+
+	bfq_log(bfqd,
+"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)",
+		bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		rate > 20<<BFQ_RATE_SHIFT);
+
+	/*
+	 * Peak rate not updated if:
+	 * - the percentage of sequential dispatches is below 3/4 of the
+	 *   total, and rate is below the current estimated peak rate
+	 * - rate is unreasonably high (> 20M sectors/sec)
+	 */
+	if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 &&
+	     rate <= bfqd->peak_rate) ||
+		rate > 20<<BFQ_RATE_SHIFT) {
+		bfq_log(bfqd,
+		"goto reset, samples %u/%u rate/peak %llu/%llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+		goto reset_computation;
+	} else {
+		bfq_log(bfqd,
+		"do update, samples %u/%u rate/peak %llu/%llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+	}
+
+	/*
+	 * We have to update the peak rate, at last! To this purpose,
+	 * we use a low-pass filter. We compute the smoothing constant
+	 * of the filter as a function of the 'weight' of the new
+	 * measured rate.
+	 *
+	 * As can be seen in next formulas, we define this weight as a
+	 * quantity proportional to how sequential the workload is,
+	 * and to how long the observation time interval is.
+	 *
+	 * The weight runs from 0 to 8. The maximum value of the
+	 * weight, 8, yields the minimum value for the smoothing
+	 * constant. At this minimum value for the smoothing constant,
+	 * the measured rate contributes for half of the next value of
+	 * the estimated peak rate.
+	 *
+	 * So, the first step is to compute the weight as a function
+	 * of how sequential the workload is. Note that the weight
+	 * cannot reach 9, because bfqd->sequential_samples cannot
+	 * become equal to bfqd->peak_rate_samples, which, in its
+	 * turn, holds true because bfqd->sequential_samples is not
+	 * incremented for the first sample.
+	 */
+	weight = (9 * bfqd->sequential_samples) / bfqd->peak_rate_samples;
+
+	/*
+	 * Second step: further refine the weight as a function of the
+	 * duration of the observation interval.
+	 */
+	weight = min_t(u32, 8,
+		       div_u64(weight * bfqd->delta_from_first,
+			       BFQ_RATE_REF_INTERVAL));
+
+	/*
+	 * Divisor ranging from 10, for minimum weight, to 2, for
+	 * maximum weight.
+	 */
+	divisor = 10 - weight;
+	BUG_ON(divisor == 0);
+
+	/*
+	 * Finally, update peak rate:
+	 *
+	 * peak_rate = peak_rate * (divisor-1) / divisor  +  rate / divisor
+	 */
+	bfqd->peak_rate *= divisor-1;
+	bfqd->peak_rate /= divisor;
+	rate /= divisor; /* smoothing constant alpha = 1/divisor */
+
+	bfq_log(bfqd,
+		"divisor %d tmp_peak_rate %llu tmp_rate %u",
+		divisor,
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT),
+		(u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT));
+
+	BUG_ON(bfqd->peak_rate == 0);
+	BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT);
+
+	bfqd->peak_rate += rate;
+
+	/*
+	 * For a very slow device, bfqd->peak_rate can reach 0 (see
+	 * the minimum representable values reported in the comments
+	 * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
+	 * divisions by zero where bfqd->peak_rate is used as a
+	 * divisor.
+	 */
+	bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
+
+	update_thr_responsiveness_params(bfqd);
+	BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT);
+
+reset_computation:
+	bfq_reset_rate_computation(bfqd, rq);
+}
+
+/*
+ * Update the read/write peak rate (the main quantity used for
+ * auto-tuning, see update_thr_responsiveness_params()).
+ *
+ * It is not trivial to estimate the peak rate (correctly): because of
+ * the presence of sw and hw queues between the scheduler and the
+ * device components that finally serve I/O requests, it is hard to
+ * say exactly when a given dispatched request is served inside the
+ * device, and for how long. As a consequence, it is hard to know
+ * precisely at what rate a given set of requests is actually served
+ * by the device.
+ *
+ * On the opposite end, the dispatch time of any request is trivially
+ * available, and, from this piece of information, the "dispatch rate"
+ * of requests can be immediately computed. So, the idea in the next
+ * function is to use what is known, namely request dispatch times
+ * (plus, when useful, request completion times), to estimate what is
+ * unknown, namely in-device request service rate.
+ *
+ * The main issue is that, because of the above facts, the rate at
+ * which a certain set of requests is dispatched over a certain time
+ * interval can vary greatly with respect to the rate at which the
+ * same requests are then served. But, since the size of any
+ * intermediate queue is limited, and the service scheme is lossless
+ * (no request is silently dropped), the following obvious convergence
+ * property holds: the number of requests dispatched MUST become
+ * closer and closer to the number of requests completed as the
+ * observation interval grows. This is the key property used in
+ * the next function to estimate the peak service rate as a function
+ * of the observed dispatch rate. The function assumes to be invoked
+ * on every request dispatch.
+ */
+static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
+{
+	u64 now_ns = ktime_get_ns();
+
+	if (bfqd->peak_rate_samples == 0) { /* first dispatch */
+		bfq_log(bfqd,
+		"goto reset, samples %d",
+				bfqd->peak_rate_samples) ;
+		bfq_reset_rate_computation(bfqd, rq);
+		goto update_last_values; /* will add one sample */
+	}
+
+	/*
+	 * Device idle for very long: the observation interval lasting
+	 * up to this dispatch cannot be a valid observation interval
+	 * for computing a new peak rate (similarly to the late-
+	 * completion event in bfq_completed_request()). Go to
+	 * update_rate_and_reset to have the following three steps
+	 * taken:
+	 * - close the observation interval at the last (previous)
+	 *   request dispatch or completion
+	 * - compute rate, if possible, for that observation interval
+	 * - start a new observation interval with this dispatch
+	 */
+	if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC &&
+	    bfqd->rq_in_driver == 0) {
+		bfq_log(bfqd,
+"jumping to updating&resetting delta_last %lluus samples %d",
+			(now_ns - bfqd->last_dispatch)>>10,
+			bfqd->peak_rate_samples) ;
+		goto update_rate_and_reset;
+	}
+
+	/* Update sampling information */
+	bfqd->peak_rate_samples++;
+
+	if ((bfqd->rq_in_driver > 0 ||
+		now_ns - bfqd->last_completion < BFQ_MIN_TT)
+	     && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR)
+		bfqd->sequential_samples++;
+
+	bfqd->tot_sectors_dispatched += blk_rq_sectors(rq);
+
+	/* Reset max observed rq size every 32 dispatches */
+	if (likely(bfqd->peak_rate_samples % 32))
+		bfqd->last_rq_max_size =
+			max_t(u32, blk_rq_sectors(rq), bfqd->last_rq_max_size);
+	else
+		bfqd->last_rq_max_size = blk_rq_sectors(rq);
+
+	bfqd->delta_from_first = now_ns - bfqd->first_dispatch;
+
+	bfq_log(bfqd,
+	"added samples %u/%u tot_sects %llu delta_first %lluus",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		bfqd->tot_sectors_dispatched,
+		bfqd->delta_from_first>>10);
+
+	/* Target observation interval not yet reached, go on sampling */
+	if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL)
+		goto update_last_values;
+
+update_rate_and_reset:
+	bfq_update_rate_reset(bfqd, rq);
+update_last_values:
+	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
+	bfqd->last_dispatch = now_ns;
+
+	bfq_log(bfqd,
+	"delta_first %lluus last_pos %llu peak_rate %llu",
+		(now_ns - bfqd->first_dispatch)>>10,
+		(unsigned long long) bfqd->last_position,
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+	bfq_log(bfqd,
+	"samples at end %d", bfqd->peak_rate_samples);
+}
+
+/*
+ * Remove request from internal lists.
+ */
+static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+	/*
+	 * For consistency, the next instruction should have been
+	 * executed after removing the request from the queue and
+	 * dispatching it.  We execute instead this instruction before
+	 * bfq_remove_request() (and hence introduce a temporary
+	 * inconsistency), for efficiency.  In fact, should this
+	 * dispatch occur for a non in-service bfqq, this anticipated
+	 * increment prevents two counters related to bfqq->dispatched
+	 * from risking to be, first, uselessly decremented, and then
+	 * incremented again when the (new) value of bfqq->dispatched
+	 * happens to be taken into account.
+	 */
+	bfqq->dispatched++;
+	bfq_update_peak_rate(q->elevator->elevator_data, rq);
+
+	bfq_remove_request(q, rq);
+}
+
+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	/*
+	 * If this bfqq is shared between multiple processes, check
+	 * to make sure that those processes are still issuing I/Os
+	 * within the mean seek distance. If not, it may be time to
+	 * break the queues apart again.
+	 */
+	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
+		bfq_mark_bfqq_split_coop(bfqq);
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		if (bfqq->dispatched == 0)
+			/*
+			 * Overloading budget_timeout field to store
+			 * the time at which the queue remains with no
+			 * backlog and no outstanding request; used by
+			 * the weight-raising mechanism.
+			 */
+			bfqq->budget_timeout = jiffies;
+
+		bfq_del_bfqq_busy(bfqd, bfqq, true);
+	} else {
+		bfq_requeue_bfqq(bfqd, bfqq, true);
+		/*
+		 * Resort priority tree of potential close cooperators.
+		 */
+		bfq_pos_tree_add_move(bfqd, bfqq);
+	}
+
+	/*
+	 * All in-service entities must have been properly deactivated
+	 * or requeued before executing the next function, which
+	 * resets all in-service entites as no more in service.
+	 */
+	__bfq_bfqd_reset_in_service(bfqd);
+}
+
+/**
+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
+ * @bfqd: device data.
+ * @bfqq: queue to update.
+ * @reason: reason for expiration.
+ *
+ * Handle the feedback on @bfqq budget at queue expiration.
+ * See the body for detailed comments.
+ */
+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+				     struct bfq_queue *bfqq,
+				     enum bfqq_expiration reason)
+{
+	struct request *next_rq;
+	int budget, min_budget;
+
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	min_budget = bfq_min_budget(bfqd);
+
+	if (bfqq->wr_coeff == 1)
+		budget = bfqq->max_budget;
+	else /*
+	      * Use a constant, low budget for weight-raised queues,
+	      * to help achieve a low latency. Keep it slightly higher
+	      * than the minimum possible budget, to cause a little
+	      * bit fewer expirations.
+	      */
+		budget = 2 * min_budget;
+
+	bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d",
+		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+	bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d",
+		budget, bfq_min_budget(bfqd));
+	bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d",
+		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+
+	if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
+		switch (reason) {
+		/*
+		 * Caveat: in all the following cases we trade latency
+		 * for throughput.
+		 */
+		case BFQ_BFQQ_TOO_IDLE:
+			/*
+			 * This is the only case where we may reduce
+			 * the budget: if there is no request of the
+			 * process still waiting for completion, then
+			 * we assume (tentatively) that the timer has
+			 * expired because the batch of requests of
+			 * the process could have been served with a
+			 * smaller budget.  Hence, betting that
+			 * process will behave in the same way when it
+			 * becomes backlogged again, we reduce its
+			 * next budget.  As long as we guess right,
+			 * this budget cut reduces the latency
+			 * experienced by the process.
+			 *
+			 * However, if there are still outstanding
+			 * requests, then the process may have not yet
+			 * issued its next request just because it is
+			 * still waiting for the completion of some of
+			 * the still outstanding ones.  So in this
+			 * subcase we do not reduce its budget, on the
+			 * contrary we increase it to possibly boost
+			 * the throughput, as discussed in the
+			 * comments to the BUDGET_TIMEOUT case.
+			 */
+			if (bfqq->dispatched > 0) /* still outstanding reqs */
+				budget = min(budget * 2, bfqd->bfq_max_budget);
+			else {
+				if (budget > 5 * min_budget)
+					budget -= 4 * min_budget;
+				else
+					budget = min_budget;
+			}
+			break;
+		case BFQ_BFQQ_BUDGET_TIMEOUT:
+			/*
+			 * We double the budget here because it gives
+			 * the chance to boost the throughput if this
+			 * is not a seeky process (and has bumped into
+			 * this timeout because of, e.g., ZBR).
+			 */
+			budget = min(budget * 2, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_BUDGET_EXHAUSTED:
+			/*
+			 * The process still has backlog, and did not
+			 * let either the budget timeout or the disk
+			 * idling timeout expire. Hence it is not
+			 * seeky, has a short thinktime and may be
+			 * happy with a higher budget too. So
+			 * definitely increase the budget of this good
+			 * candidate to boost the disk throughput.
+			 */
+			budget = min(budget * 4, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_NO_MORE_REQUESTS:
+			/*
+			 * For queues that expire for this reason, it
+			 * is particularly important to keep the
+			 * budget close to the actual service they
+			 * need. Doing so reduces the timestamp
+			 * misalignment problem described in the
+			 * comments in the body of
+			 * __bfq_activate_entity. In fact, suppose
+			 * that a queue systematically expires for
+			 * BFQ_BFQQ_NO_MORE_REQUESTS and presents a
+			 * new request in time to enjoy timestamp
+			 * back-shifting. The larger the budget of the
+			 * queue is with respect to the service the
+			 * queue actually requests in each service
+			 * slot, the more times the queue can be
+			 * reactivated with the same virtual finish
+			 * time. It follows that, even if this finish
+			 * time is pushed to the system virtual time
+			 * to reduce the consequent timestamp
+			 * misalignment, the queue unjustly enjoys for
+			 * many re-activations a lower finish time
+			 * than all newly activated queues.
+			 *
+			 * The service needed by bfqq is measured
+			 * quite precisely by bfqq->entity.service.
+			 * Since bfqq does not enjoy device idling,
+			 * bfqq->entity.service is equal to the number
+			 * of sectors that the process associated with
+			 * bfqq requested to read/write before waiting
+			 * for request completions, or blocking for
+			 * other reasons.
+			 */
+			budget = max_t(int, bfqq->entity.service, min_budget);
+			break;
+		default:
+			return;
+		}
+	} else if (!bfq_bfqq_sync(bfqq))
+		/*
+		 * Async queues get always the maximum possible
+		 * budget, as for them we do not care about latency
+		 * (in addition, their ability to dispatch is limited
+		 * by the charging factor).
+		 */
+		budget = bfqd->bfq_max_budget;
+
+	bfqq->max_budget = budget;
+
+	if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
+	    !bfqd->bfq_user_max_budget)
+		bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
+
+	/*
+	 * If there is still backlog, then assign a new budget, making
+	 * sure that it is large enough for the next request.  Since
+	 * the finish time of bfqq must be kept in sync with the
+	 * budget, be sure to call __bfq_bfqq_expire() *after* this
+	 * update.
+	 *
+	 * If there is no backlog, then no need to update the budget;
+	 * it will be updated on the arrival of a new request.
+	 */
+	next_rq = bfqq->next_rq;
+	if (next_rq) {
+		BUG_ON(reason == BFQ_BFQQ_TOO_IDLE ||
+		       reason == BFQ_BFQQ_NO_MORE_REQUESTS);
+		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+					    bfq_serv_to_charge(next_rq, bfqq));
+		BUG_ON(!bfq_bfqq_busy(bfqq));
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+			next_rq ? blk_rq_sectors(next_rq) : 0,
+			bfqq->entity.budget);
+}
+
+/*
+ * Return true if the process associated with bfqq is "slow". The slow
+ * flag is used, in addition to the budget timeout, to reduce the
+ * amount of service provided to seeky processes, and thus reduce
+ * their chances to lower the throughput. More details in the comments
+ * on the function bfq_bfqq_expire().
+ *
+ * An important observation is in order: as discussed in the comments
+ * on the function bfq_update_peak_rate(), with devices with internal
+ * queues, it is hard if ever possible to know when and for how long
+ * an I/O request is processed by the device (apart from the trivial
+ * I/O pattern where a new request is dispatched only after the
+ * previous one has been completed). This makes it hard to evaluate
+ * the real rate at which the I/O requests of each bfq_queue are
+ * served.  In fact, for an I/O scheduler like BFQ, serving a
+ * bfq_queue means just dispatching its requests during its service
+ * slot (i.e., until the budget of the queue is exhausted, or the
+ * queue remains idle, or, finally, a timeout fires). But, during the
+ * service slot of a bfq_queue, around 100 ms at most, the device may
+ * be even still processing requests of bfq_queues served in previous
+ * service slots. On the opposite end, the requests of the in-service
+ * bfq_queue may be completed after the service slot of the queue
+ * finishes.
+ *
+ * Anyway, unless more sophisticated solutions are used
+ * (where possible), the sum of the sizes of the requests dispatched
+ * during the service slot of a bfq_queue is probably the only
+ * approximation available for the service received by the bfq_queue
+ * during its service slot. And this sum is the quantity used in this
+ * function to evaluate the I/O speed of a process.
+ */
+static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				 bool compensate, enum bfqq_expiration reason,
+				 unsigned long *delta_ms)
+{
+	ktime_t delta_ktime;
+	u32 delta_usecs;
+	bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */
+
+	if (!bfq_bfqq_sync(bfqq))
+		return false;
+
+	if (compensate)
+		delta_ktime = bfqd->last_idling_start;
+	else
+		delta_ktime = ktime_get();
+	delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
+	delta_usecs = ktime_to_us(delta_ktime);
+
+	/* don't use too short time intervals */
+	if (delta_usecs < 1000) {
+		if (blk_queue_nonrot(bfqd->queue))
+			 /*
+			  * give same worst-case guarantees as idling
+			  * for seeky
+			  */
+			*delta_ms = BFQ_MIN_TT / NSEC_PER_MSEC;
+		else /* charge at least one seek */
+			*delta_ms = bfq_slice_idle / NSEC_PER_MSEC;
+
+		bfq_log(bfqd, "too short %u", delta_usecs);
+
+		return slow;
+	}
+
+	*delta_ms = delta_usecs / USEC_PER_MSEC;
+
+	/*
+	 * Use only long (> 20ms) intervals to filter out excessive
+	 * spikes in service rate estimation.
+	 */
+	if (delta_usecs > 20000) {
+		/*
+		 * Caveat for rotational devices: processes doing I/O
+		 * in the slower disk zones tend to be slow(er) even
+		 * if not seeky. In this respect, the estimated peak
+		 * rate is likely to be an average over the disk
+		 * surface. Accordingly, to not be too harsh with
+		 * unlucky processes, a process is deemed slow only if
+		 * its rate has been lower than half of the estimated
+		 * peak rate.
+		 */
+		slow = bfqq->entity.service < bfqd->bfq_max_budget / 2;
+		bfq_log(bfqd, "relative rate %d/%d",
+			bfqq->entity.service, bfqd->bfq_max_budget);
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "slow %d", slow);
+
+	return slow;
+}
+
+/*
+ * To be deemed as soft real-time, an application must meet two
+ * requirements. First, the application must not require an average
+ * bandwidth higher than the approximate bandwidth required to playback or
+ * record a compressed high-definition video.
+ * The next function is invoked on the completion of the last request of a
+ * batch, to compute the next-start time instant, soft_rt_next_start, such
+ * that, if the next request of the application does not arrive before
+ * soft_rt_next_start, then the above requirement on the bandwidth is met.
+ *
+ * The second requirement is that the request pattern of the application is
+ * isochronous, i.e., that, after issuing a request or a batch of requests,
+ * the application stops issuing new requests until all its pending requests
+ * have been completed. After that, the application may issue a new batch,
+ * and so on.
+ * For this reason the next function is invoked to compute
+ * soft_rt_next_start only for applications that meet this requirement,
+ * whereas soft_rt_next_start is set to infinity for applications that do
+ * not.
+ *
+ * Unfortunately, even a greedy (i.e., I/O-bound) application may
+ * happen to meet, occasionally or systematically, both the above
+ * bandwidth and isochrony requirements. This may happen at least in
+ * the following circumstances. First, if the CPU load is high. The
+ * application may stop issuing requests while the CPUs are busy
+ * serving other processes, then restart, then stop again for a while,
+ * and so on. The other circumstances are related to the storage
+ * device: the storage device is highly loaded or reaches a low-enough
+ * throughput with the I/O of the application (e.g., because the I/O
+ * is random and/or the device is slow). In all these cases, the
+ * I/O of the application may be simply slowed down enough to meet
+ * the bandwidth and isochrony requirements. To reduce the probability
+ * that greedy applications are deemed as soft real-time in these
+ * corner cases, a further rule is used in the computation of
+ * soft_rt_next_start: the return value of this function is forced to
+ * be higher than the maximum between the following two quantities.
+ *
+ * (a) Current time plus: (1) the maximum time for which the arrival
+ *     of a request is waited for when a sync queue becomes idle,
+ *     namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We
+ *     postpone for a moment the reason for adding a few extra
+ *     jiffies; we get back to it after next item (b).  Lower-bounding
+ *     the return value of this function with the current time plus
+ *     bfqd->bfq_slice_idle tends to filter out greedy applications,
+ *     because the latter issue their next request as soon as possible
+ *     after the last one has been completed. In contrast, a soft
+ *     real-time application spends some time processing data, after a
+ *     batch of its requests has been completed.
+ *
+ * (b) Current value of bfqq->soft_rt_next_start. As pointed out
+ *     above, greedy applications may happen to meet both the
+ *     bandwidth and isochrony requirements under heavy CPU or
+ *     storage-device load. In more detail, in these scenarios, these
+ *     applications happen, only for limited time periods, to do I/O
+ *     slowly enough to meet all the requirements described so far,
+ *     including the filtering in above item (a). These slow-speed
+ *     time intervals are usually interspersed between other time
+ *     intervals during which these applications do I/O at a very high
+ *     speed. Fortunately, exactly because of the high speed of the
+ *     I/O in the high-speed intervals, the values returned by this
+ *     function happen to be so high, near the end of any such
+ *     high-speed interval, to be likely to fall *after* the end of
+ *     the low-speed time interval that follows. These high values are
+ *     stored in bfqq->soft_rt_next_start after each invocation of
+ *     this function. As a consequence, if the last value of
+ *     bfqq->soft_rt_next_start is constantly used to lower-bound the
+ *     next value that this function may return, then, from the very
+ *     beginning of a low-speed interval, bfqq->soft_rt_next_start is
+ *     likely to be constantly kept so high that any I/O request
+ *     issued during the low-speed interval is considered as arriving
+ *     to soon for the application to be deemed as soft
+ *     real-time. Then, in the high-speed interval that follows, the
+ *     application will not be deemed as soft real-time, just because
+ *     it will do I/O at a high speed. And so on.
+ *
+ * Getting back to the filtering in item (a), in the following two
+ * cases this filtering might be easily passed by a greedy
+ * application, if the reference quantity was just
+ * bfqd->bfq_slice_idle:
+ * 1) HZ is so low that the duration of a jiffy is comparable to or
+ *    higher than bfqd->bfq_slice_idle. This happens, e.g., on slow
+ *    devices with HZ=100. The time granularity may be so coarse
+ *    that the approximation, in jiffies, of bfqd->bfq_slice_idle
+ *    is rather lower than the exact value.
+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
+ *    for a while, then suddenly 'jump' by several units to recover the lost
+ *    increments. This seems to happen, e.g., inside virtual machines.
+ * To address this issue, in the filtering in (a) we do not use as a
+ * reference time interval just bfqd->bfq_slice_idle, but
+ * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the
+ * minimum number of jiffies for which the filter seems to be quite
+ * precise also in embedded systems and KVM/QEMU virtual machines.
+ */
+static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+						struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqd, bfqq,
+"service_blkg %lu soft_rate %u sects/sec interval %u",
+		     bfqq->service_from_backlogged,
+		     bfqd->bfq_wr_max_softrt_rate,
+		     jiffies_to_msecs(HZ * bfqq->service_from_backlogged /
+				      bfqd->bfq_wr_max_softrt_rate));
+
+	return max3(bfqq->soft_rt_next_start,
+		    bfqq->last_idle_bklogged +
+		    HZ * bfqq->service_from_backlogged /
+		    bfqd->bfq_wr_max_softrt_rate,
+		    jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
+}
+
+/**
+ * bfq_bfqq_expire - expire a queue.
+ * @bfqd: device owning the queue.
+ * @bfqq: the queue to expire.
+ * @compensate: if true, compensate for the time spent idling.
+ * @reason: the reason causing the expiration.
+ *
+ * If the process associated with bfqq does slow I/O (e.g., because it
+ * issues random requests), we charge bfqq with the time it has been
+ * in service instead of the service it has received (see
+ * bfq_bfqq_charge_time for details on how this goal is achieved). As
+ * a consequence, bfqq will typically get higher timestamps upon
+ * reactivation, and hence it will be rescheduled as if it had
+ * received more service than what it has actually received. In the
+ * end, bfqq receives less service in proportion to how slowly its
+ * associated process consumes its budgets (and hence how seriously it
+ * tends to lower the throughput). In addition, this time-charging
+ * strategy guarantees time fairness among slow processes. In
+ * contrast, if the process associated with bfqq is not slow, we
+ * charge bfqq exactly with the service it has received.
+ *
+ * Charging time to the first type of queues and the exact service to
+ * the other has the effect of using the WF2Q+ policy to schedule the
+ * former on a timeslice basis, without violating service domain
+ * guarantees among the latter.
+ */
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason)
+{
+	bool slow;
+	unsigned long delta = 0;
+	struct bfq_entity *entity = &bfqq->entity;
+	int ref;
+
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	/*
+	 * Check whether the process is slow (see bfq_bfqq_is_slow).
+	 */
+	slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta);
+
+	/*
+	 * As above explained, charge slow (typically seeky) and
+	 * timed-out queues with the time and not the service
+	 * received, to favor sequential workloads.
+	 *
+	 * Processes doing I/O in the slower disk zones will tend to
+	 * be slow(er) even if not seeky. Therefore, since the
+	 * estimated peak rate is actually an average over the disk
+	 * surface, these processes may timeout just for bad luck. To
+	 * avoid punishing them, do not charge time to processes that
+	 * succeeded in consuming at least 2/3 of their budget. This
+	 * allows BFQ to preserve enough elasticity to still perform
+	 * bandwidth, and not time, distribution with little unlucky
+	 * or quasi-sequential processes.
+	 */
+	if (bfqq->wr_coeff == 1 &&
+	    (slow ||
+	     (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+	      bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
+		bfq_bfqq_charge_time(bfqd, bfqq, delta);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	if (reason == BFQ_BFQQ_TOO_IDLE &&
+	    entity->service <= 2 * entity->budget / 10)
+		bfq_clear_bfqq_IO_bound(bfqq);
+
+	if (bfqd->low_latency && bfqq->wr_coeff == 1)
+		bfqq->last_wr_start_finish = jiffies;
+
+	if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		/*
+		 * If we get here, and there are no outstanding
+		 * requests, then the request pattern is isochronous
+		 * (see the comments on the function
+		 * bfq_bfqq_softrt_next_start()). Thus we can compute
+		 * soft_rt_next_start. If, instead, the queue still
+		 * has outstanding requests, then we have to wait for
+		 * the completion of all the outstanding requests to
+		 * discover whether the request pattern is actually
+		 * isochronous.
+		 */
+		BUG_ON(bfqd->busy_queues < 1);
+		if (bfqq->dispatched == 0) {
+			bfqq->soft_rt_next_start =
+				bfq_bfqq_softrt_next_start(bfqd, bfqq);
+			bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu",
+				     bfqq->soft_rt_next_start);
+		} else {
+			/*
+			 * The application is still waiting for the
+			 * completion of one or more requests:
+			 * prevent it from possibly being incorrectly
+			 * deemed as soft real-time by setting its
+			 * soft_rt_next_start to infinity. In fact,
+			 * without this assignment, the application
+			 * would be incorrectly deemed as soft
+			 * real-time if:
+			 * 1) it issued a new request before the
+			 *    completion of all its in-flight
+			 *    requests, and
+			 * 2) at that time, its soft_rt_next_start
+			 *    happened to be in the past.
+			 */
+			bfqq->soft_rt_next_start =
+				bfq_greatest_from_now();
+			/*
+			 * Schedule an update of soft_rt_next_start to when
+			 * the task may be discovered to be isochronous.
+			 */
+			bfq_mark_bfqq_softrt_update(bfqq);
+		}
+	}
+
+	bfq_log_bfqq(bfqd, bfqq,
+		"expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)",
+		     reason_name[reason], slow, bfqq->dispatched,
+		     bfq_bfqq_has_short_ttime(bfqq), entity->weight);
+
+	/*
+	 * Increase, decrease or leave budget unchanged according to
+	 * reason.
+	 */
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+	BUG_ON(bfqq->next_rq == NULL &&
+	       bfqq->entity.budget < bfqq->entity.service);
+	ref = bfqq->ref;
+	__bfq_bfqq_expire(bfqd, bfqq);
+
+	BUG_ON(ref > 1 &&
+	       !bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
+		!bfq_class_idle(bfqq));
+
+	/* mark bfqq as waiting a request only if a bic still points to it */
+	if (ref > 1 && !bfq_bfqq_busy(bfqq) &&
+	    reason != BFQ_BFQQ_BUDGET_TIMEOUT &&
+	    reason != BFQ_BFQQ_BUDGET_EXHAUSTED)
+		bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+}
+
+/*
+ * Budget timeout is not implemented through a dedicated timer, but
+ * just checked on request arrivals and completions, as well as on
+ * idle timer expirations.
+ */
+static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+{
+	return time_is_before_eq_jiffies(bfqq->budget_timeout);
+}
+
+/*
+ * If we expire a queue that is actively waiting (i.e., with the
+ * device idled) for the arrival of a new request, then we may incur
+ * the timestamp misalignment problem described in the body of the
+ * function __bfq_activate_entity. Hence we return true only if this
+ * condition does not hold, or if the queue is slow enough to deserve
+ * only to be kicked off for preserving a high throughput.
+ */
+static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		"wait_request %d left %d timeout %d",
+		bfq_bfqq_wait_request(bfqq),
+			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
+		bfq_bfqq_budget_timeout(bfqq));
+
+	return (!bfq_bfqq_wait_request(bfqq) ||
+		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
+		&&
+		bfq_bfqq_budget_timeout(bfqq);
+}
+
+/*
+ * For a queue that becomes empty, device idling is allowed only if
+ * this function returns true for that queue. As a consequence, since
+ * device idling plays a critical role for both throughput boosting
+ * and service guarantees, the return value of this function plays a
+ * critical role as well.
+ *
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
+ *
+ * In more detail, the return value of this function is obtained by,
+ * first, computing a number of boolean variables that take into
+ * account throughput and service-guarantee issues, and, then,
+ * combining these variables in a logical expression. Most of the
+ * issues taken into account are not trivial. We discuss these issues
+ * while introducing the variables.
+ */
+static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+	bool rot_without_queueing =
+		!blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
+		bfqq_sequential_and_IO_bound,
+		idling_boosts_thr, idling_boosts_thr_without_issues,
+		idling_needed_for_service_guarantees,
+		asymmetric_scenario;
+
+	if (bfqd->strict_guarantees)
+		return true;
+
+	/*
+	 * Idling is performed only if slice_idle > 0. In addition, we
+	 * do not idle if
+	 * (a) bfqq is async
+	 * (b) bfqq is in the idle io prio class: in this case we do
+	 * not idle because we want to minimize the bandwidth that
+	 * queues in this class can steal to higher-priority queues
+	 */
+	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
+	   bfq_class_idle(bfqq))
+		return false;
+
+	bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
+		bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
+	/*
+	 * The next variable takes into account the cases where idling
+	 * boosts the throughput.
+	 *
+	 * The value of the variable is computed considering, first, that
+	 * idling is virtually always beneficial for the throughput if:
+	 * (a) the device is not NCQ-capable and rotational, or
+	 * (b) regardless of the presence of NCQ, the device is rotational and
+	 *     the request pattern for bfqq is I/O-bound and sequential, or
+	 * (c) regardless of whether it is rotational, the device is
+	 *     not NCQ-capable and the request pattern for bfqq is
+	 *     I/O-bound and sequential.
+	 *
+	 * Secondly, and in contrast to the above item (b), idling an
+	 * NCQ-capable flash-based device would not boost the
+	 * throughput even with sequential I/O; rather it would lower
+	 * the throughput in proportion to how fast the device
+	 * is. Accordingly, the next variable is true if any of the
+	 * above conditions (a), (b) or (c) is true, and, in
+	 * particular, happens to be false if bfqd is an NCQ-capable
+	 * flash-based device.
+	 */
+	idling_boosts_thr = rot_without_queueing ||
+		((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) &&
+		 bfqq_sequential_and_IO_bound);
+
+	/*
+	 * The value of the next variable,
+	 * idling_boosts_thr_without_issues, is equal to that of
+	 * idling_boosts_thr, unless a special case holds. In this
+	 * special case, described below, idling may cause problems to
+	 * weight-raised queues.
+	 *
+	 * When the request pool is saturated (e.g., in the presence
+	 * of write hogs), if the processes associated with
+	 * non-weight-raised queues ask for requests at a lower rate,
+	 * then processes associated with weight-raised queues have a
+	 * higher probability to get a request from the pool
+	 * immediately (or at least soon) when they need one. Thus
+	 * they have a higher probability to actually get a fraction
+	 * of the device throughput proportional to their high
+	 * weight. This is especially true with NCQ-capable drives,
+	 * which enqueue several requests in advance, and further
+	 * reorder internally-queued requests.
+	 *
+	 * For this reason, we force to false the value of
+	 * idling_boosts_thr_without_issues if there are weight-raised
+	 * busy queues. In this case, and if bfqq is not weight-raised,
+	 * this guarantees that the device is not idled for bfqq (if,
+	 * instead, bfqq is weight-raised, then idling will be
+	 * guaranteed by another variable, see below). Combined with
+	 * the timestamping rules of BFQ (see [1] for details), this
+	 * behavior causes bfqq, and hence any sync non-weight-raised
+	 * queue, to get a lower number of requests served, and thus
+	 * to ask for a lower number of requests from the request
+	 * pool, before the busy weight-raised queues get served
+	 * again. This often mitigates starvation problems in the
+	 * presence of heavy write workloads and NCQ, thereby
+	 * guaranteeing a higher application and system responsiveness
+	 * in these hostile scenarios.
+	 */
+	idling_boosts_thr_without_issues = idling_boosts_thr &&
+		bfqd->wr_busy_queues == 0;
+
+	/*
+	 * There is then a case where idling must be performed not
+	 * for throughput concerns, but to preserve service
+	 * guarantees.
+	 *
+	 * To introduce this case, we can note that allowing the drive
+	 * to enqueue more than one request at a time, and hence
+	 * delegating de facto final scheduling decisions to the
+	 * drive's internal scheduler, entails loss of control on the
+	 * actual request service order. In particular, the critical
+	 * situation is when requests from different processes happen
+	 * to be present, at the same time, in the internal queue(s)
+	 * of the drive. In such a situation, the drive, by deciding
+	 * the service order of the internally-queued requests, does
+	 * determine also the actual throughput distribution among
+	 * these processes. But the drive typically has no notion or
+	 * concern about per-process throughput distribution, and
+	 * makes its decisions only on a per-request basis. Therefore,
+	 * the service distribution enforced by the drive's internal
+	 * scheduler is likely to coincide with the desired
+	 * device-throughput distribution only in a completely
+	 * symmetric scenario where:
+	 * (i)  each of these processes must get the same throughput as
+	 *      the others;
+	 * (ii) all these processes have the same I/O pattern
+	 *      (either sequential or random).
+	 * In fact, in such a scenario, the drive will tend to treat
+	 * the requests of each of these processes in about the same
+	 * way as the requests of the others, and thus to provide
+	 * each of these processes with about the same throughput
+	 * (which is exactly the desired throughput distribution). In
+	 * contrast, in any asymmetric scenario, device idling is
+	 * certainly needed to guarantee that bfqq receives its
+	 * assigned fraction of the device throughput (see [1] for
+	 * details).
+	 *
+	 * We address this issue by controlling, actually, only the
+	 * symmetry sub-condition (i), i.e., provided that
+	 * sub-condition (i) holds, idling is not performed,
+	 * regardless of whether sub-condition (ii) holds. In other
+	 * words, only if sub-condition (i) holds, then idling is
+	 * allowed, and the device tends to be prevented from queueing
+	 * many requests, possibly of several processes. The reason
+	 * for not controlling also sub-condition (ii) is that we
+	 * exploit preemption to preserve guarantees in case of
+	 * symmetric scenarios, even if (ii) does not hold, as
+	 * explained in the next two paragraphs.
+	 *
+	 * Even if a queue, say Q, is expired when it remains idle, Q
+	 * can still preempt the new in-service queue if the next
+	 * request of Q arrives soon (see the comments on
+	 * bfq_bfqq_update_budg_for_activation). If all queues and
+	 * groups have the same weight, this form of preemption,
+	 * combined with the hole-recovery heuristic described in the
+	 * comments on function bfq_bfqq_update_budg_for_activation,
+	 * are enough to preserve a correct bandwidth distribution in
+	 * the mid term, even without idling. In fact, even if not
+	 * idling allows the internal queues of the device to contain
+	 * many requests, and thus to reorder requests, we can rather
+	 * safely assume that the internal scheduler still preserves a
+	 * minimum of mid-term fairness. The motivation for using
+	 * preemption instead of idling is that, by not idling,
+	 * service guarantees are preserved without minimally
+	 * sacrificing throughput. In other words, both a high
+	 * throughput and its desired distribution are obtained.
+	 *
+	 * More precisely, this preemption-based, idleless approach
+	 * provides fairness in terms of IOPS, and not sectors per
+	 * second. This can be seen with a simple example. Suppose
+	 * that there are two queues with the same weight, but that
+	 * the first queue receives requests of 8 sectors, while the
+	 * second queue receives requests of 1024 sectors. In
+	 * addition, suppose that each of the two queues contains at
+	 * most one request at a time, which implies that each queue
+	 * always remains idle after it is served. Finally, after
+	 * remaining idle, each queue receives very quickly a new
+	 * request. It follows that the two queues are served
+	 * alternatively, preempting each other if needed. This
+	 * implies that, although both queues have the same weight,
+	 * the queue with large requests receives a service that is
+	 * 1024/8 times as high as the service received by the other
+	 * queue.
+	 *
+	 * On the other hand, device idling is performed, and thus
+	 * pure sector-domain guarantees are provided, for the
+	 * following queues, which are likely to need stronger
+	 * throughput guarantees: weight-raised queues, and queues
+	 * with a higher weight than other queues. When such queues
+	 * are active, sub-condition (i) is false, which triggers
+	 * device idling.
+	 *
+	 * According to the above considerations, the next variable is
+	 * true (only) if sub-condition (i) holds. To compute the
+	 * value of this variable, we not only use the return value of
+	 * the function bfq_symmetric_scenario(), but also check
+	 * whether bfqq is being weight-raised, because
+	 * bfq_symmetric_scenario() does not take into account also
+	 * weight-raised queues (see comments on
+	 * bfq_weights_tree_add()).
+	 *
+	 * As a side note, it is worth considering that the above
+	 * device-idling countermeasures may however fail in the
+	 * following unlucky scenario: if idling is (correctly)
+	 * disabled in a time period during which all symmetry
+	 * sub-conditions hold, and hence the device is allowed to
+	 * enqueue many requests, but at some later point in time some
+	 * sub-condition stops to hold, then it may become impossible
+	 * to let requests be served in the desired order until all
+	 * the requests already queued in the device have been served.
+	 */
+	asymmetric_scenario = bfqq->wr_coeff > 1 ||
+		!bfq_symmetric_scenario(bfqd);
+
+	/*
+	 * Finally, there is a case where maximizing throughput is the
+	 * best choice even if it may cause unfairness toward
+	 * bfqq. Such a case is when bfqq became active in a burst of
+	 * queue activations. Queues that became active during a large
+	 * burst benefit only from throughput, as discussed in the
+	 * comments on bfq_handle_burst. Thus, if bfqq became active
+	 * in a burst and not idling the device maximizes throughput,
+	 * then the device must no be idled, because not idling the
+	 * device provides bfqq and all other queues in the burst with
+	 * maximum benefit. Combining this and the above case, we can
+	 * now establish when idling is actually needed to preserve
+	 * service guarantees.
+	 */
+	idling_needed_for_service_guarantees =
+		asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+
+	/*
+	 * We have now all the components we need to compute the
+	 * return value of the function, which is true only if idling
+	 * either boosts the throughput (without issues), or is
+	 * necessary to preserve service guarantees.
+	 */
+	bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d",
+		     bfq_bfqq_sync(bfqq), idling_boosts_thr);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "wr_busy %d boosts %d IO-bound %d guar %d",
+		     bfqd->wr_busy_queues,
+		     idling_boosts_thr_without_issues,
+		     bfq_bfqq_IO_bound(bfqq),
+		     idling_needed_for_service_guarantees);
+
+	return idling_boosts_thr_without_issues ||
+		idling_needed_for_service_guarantees;
+}
+
+/*
+ * If the in-service queue is empty but the function bfq_bfqq_may_idle
+ * returns true, then:
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the device must be idled to wait for the possible arrival of a new
+ *    request for the queue.
+ * See the comments on the function bfq_bfqq_may_idle for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_may_idle itself
+ * returns true.
+ */
+static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+{
+	return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq);
+}
+
+/*
+ * Select a queue for service.  If we have a current queue in service,
+ * check whether to continue servicing it, or retrieve and set a new one.
+ */
+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+	struct request *next_rq;
+	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+
+	bfqq = bfqd->in_service_queue;
+	if (!bfqq)
+		goto new_queue;
+
+	bfq_log_bfqq(bfqd, bfqq, "already in-service queue");
+
+	if (bfq_may_expire_for_budg_timeout(bfqq) &&
+	    !bfq_bfqq_wait_request(bfqq) &&
+	    !bfq_bfqq_must_idle(bfqq))
+		goto expire;
+
+check_queue:
+	/*
+	 * This loop is rarely executed more than once. Even when it
+	 * happens, it is much more convenient to re-execute this loop
+	 * than to return NULL and trigger a new dispatch to get a
+	 * request served.
+	 */
+	next_rq = bfqq->next_rq;
+	/*
+	 * If bfqq has requests queued and it has enough budget left to
+	 * serve them, keep the queue, otherwise expire it.
+	 */
+	if (next_rq) {
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+
+		if (bfq_serv_to_charge(next_rq, bfqq) >
+			bfq_bfqq_budget_left(bfqq)) {
+			/*
+			 * Expire the queue for budget exhaustion,
+			 * which makes sure that the next budget is
+			 * enough to serve the next request, even if
+			 * it comes from the fifo expired path.
+			 */
+			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
+			goto expire;
+		} else {
+			/*
+			 * The idle timer may be pending because we may
+			 * not disable disk idling even when a new request
+			 * arrives.
+			 */
+			if (bfq_bfqq_wait_request(bfqq)) {
+				/*
+				 * If we get here: 1) at least a new request
+				 * has arrived but we have not disabled the
+				 * timer because the request was too small,
+				 * 2) then the block layer has unplugged
+				 * the device, causing the dispatch to be
+				 * invoked.
+				 *
+				 * Since the device is unplugged, now the
+				 * requests are probably large enough to
+				 * provide a reasonable throughput.
+				 * So we disable idling.
+				 */
+				bfq_clear_bfqq_wait_request(bfqq);
+				hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+			}
+			goto keep_queue;
+		}
+	}
+
+	/*
+	 * No requests pending. However, if the in-service queue is idling
+	 * for a new request, or has requests waiting for a completion and
+	 * may idle after their completion, then keep it anyway.
+	 */
+	if (bfq_bfqq_wait_request(bfqq) ||
+	    (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
+		bfqq = NULL;
+		goto keep_queue;
+	}
+
+	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, false, reason);
+new_queue:
+	bfqq = bfq_set_in_service_queue(bfqd);
+	if (bfqq) {
+		bfq_log_bfqq(bfqd, bfqq, "checking new queue");
+		goto check_queue;
+	}
+keep_queue:
+	if (bfqq)
+		bfq_log_bfqq(bfqd, bfqq, "returned this queue");
+	else
+		bfq_log(bfqd, "no queue returned");
+
+	return bfqq;
+}
+
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
+		BUG_ON(bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		       time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+		bfq_log_bfqq(bfqd, bfqq,
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
+
+		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
+		       entity->orig_weight * bfqq->wr_coeff);
+		if (entity->prio_changed)
+			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
+		/*
+		 * If the queue was activated in a burst, or too much
+		 * time has elapsed from the beginning of this
+		 * weight-raising period, then end weight raising.
+		 */
+		if (bfq_bfqq_in_large_burst(bfqq))
+			bfq_bfqq_end_wr(bfqq);
+		else if (time_is_before_jiffies(bfqq->last_wr_start_finish +
+					   bfqq->wr_cur_max_time)) {
+			if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
+			time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
+					bfq_wr_duration(bfqd)))
+				bfq_bfqq_end_wr(bfqq);
+			else {
+				switch_back_to_interactive_wr(bfqq, bfqd);
+				BUG_ON(time_is_after_jiffies(
+					       bfqq->last_wr_start_finish));
+				bfqq->entity.prio_changed = 1;
+				bfq_log_bfqq(bfqd, bfqq,
+					"back to interactive wr");
+			}
+		}
+		if (bfqq->wr_coeff > 1 &&
+		    bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time &&
+		    bfqq->service_from_wr > max_service_from_wr) {
+			/* see comments on max_service_from_wr */
+			bfq_bfqq_end_wr(bfqq);
+			bfq_log_bfqq(bfqd, bfqq,
+				     "too much service");
+		}
+	}
+	/*
+	 * To improve latency (for this or other queues), immediately
+	 * update weight both if it must be raised and if it must be
+	 * lowered. Since, entity may be on some active tree here, and
+	 * might have a pending change of its ioprio class, invoke
+	 * next function with the last parameter unset (see the
+	 * comments on the function).
+	 */
+	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
+		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+						entity, false);
+}
+
+/*
+ * Dispatch next request from bfqq.
+ */
+static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
+						 struct bfq_queue *bfqq)
+{
+	struct request *rq = bfqq->next_rq;
+	unsigned long service_to_charge;
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+	BUG_ON(!rq);
+	service_to_charge = bfq_serv_to_charge(rq, bfqq);
+
+	BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq));
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	bfq_bfqq_served(bfqq, service_to_charge);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	bfq_dispatch_remove(bfqd->queue, rq);
+
+	/*
+	 * If weight raising has to terminate for bfqq, then next
+	 * function causes an immediate update of bfqq's weight,
+	 * without waiting for next activation. As a consequence, on
+	 * expiration, bfqq will be timestamped as if has never been
+	 * weight-raised during this service slot, even if it has
+	 * received part or even most of the service as a
+	 * weight-raised queue. This inflates bfqq's timestamps, which
+	 * is beneficial, as bfqq is then more willing to leave the
+	 * device immediately to possible other weight-raised queues.
+	 */
+	bfq_update_wr_data(bfqd, bfqq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+	     "dispatched %u sec req (%llu), budg left %d, new disp_nr %d",
+			blk_rq_sectors(rq),
+			(unsigned long long) blk_rq_pos(rq),
+		     bfq_bfqq_budget_left(bfqq),
+		     bfqq->dispatched);
+
+	/*
+	 * Expire bfqq, pretending that its budget expired, if bfqq
+	 * belongs to CLASS_IDLE and other queues are waiting for
+	 * service.
+	 */
+	if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
+		goto expire;
+
+	return rq;
+
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED);
+	return rq;
+}
+
+static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
+{
+	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+
+	bfq_log(bfqd, "dispatch_non_empty %d busy_queues %d",
+		!list_empty_careful(&bfqd->dispatch), bfqd->busy_queues > 0);
+
+	/*
+	 * Avoiding lock: a race on bfqd->busy_queues should cause at
+	 * most a call to dispatch for nothing
+	 */
+	return !list_empty_careful(&bfqd->dispatch) ||
+		bfqd->busy_queues > 0;
+}
+
+static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+{
+	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+	struct request *rq = NULL;
+	struct bfq_queue *bfqq = NULL;
+
+	if (!list_empty(&bfqd->dispatch)) {
+		rq = list_first_entry(&bfqd->dispatch, struct request,
+				      queuelist);
+		list_del_init(&rq->queuelist);
+		rq->rq_flags &= ~RQF_DISP_LIST;
+
+		bfq_log(bfqd,
+			"picked %p from dispatch list", rq);
+		bfqq = RQ_BFQQ(rq);
+
+		if (bfqq) {
+			/*
+			 * Increment counters here, because this
+			 * dispatch does not follow the standard
+			 * dispatch flow (where counters are
+			 * incremented)
+			 */
+			bfqq->dispatched++;
+
+			/*
+			 * TESTING: reset DISP_LIST flag, because: 1)
+			 * this rq this request has passed through
+			 * bfq_prepare_request, 2) then it will have
+			 * bfq_finish_requeue_request invoked on it, and 3) in
+			 * bfq_finish_requeue_request we use this flag to check
+			 * that bfq_finish_requeue_request is not invoked on
+			 * requests for which bfq_prepare_request has
+			 * been invoked.
+			 */
+			rq->rq_flags &= ~RQF_DISP_LIST;
+			goto inc_in_driver_start_rq;
+		}
+
+		/*
+		 * We exploit the bfq_finish_requeue_request hook to decrement
+		 * rq_in_driver, but bfq_finish_requeue_request will not be
+		 * invoked on this request. So, to avoid unbalance,
+		 * just start this request, without incrementing
+		 * rq_in_driver. As a negative consequence,
+		 * rq_in_driver is deceptively lower than it should be
+		 * while this request is in service. This may cause
+		 * bfq_schedule_dispatch to be invoked uselessly.
+		 *
+		 * As for implementing an exact solution, the
+		 * bfq_finish_requeue_request hook, if defined, is probably
+		 * invoked also on this request. So, by exploiting
+		 * this hook, we could 1) increment rq_in_driver here,
+		 * and 2) decrement it in bfq_finish_requeue_request. Such a
+		 * solution would let the value of the counter be
+		 * always accurate, but it would entail using an extra
+		 * interface function. This cost seems higher than the
+		 * benefit, being the frequency of non-elevator-private
+		 * requests very low.
+		 */
+		goto start_rq;
+	}
+
+	bfq_log(bfqd, "%d busy queues", bfqd->busy_queues);
+
+	if (bfqd->busy_queues == 0)
+		goto exit;
+
+	/*
+	 * Force device to serve one request at a time if
+	 * strict_guarantees is true. Forcing this service scheme is
+	 * currently the ONLY way to guarantee that the request
+	 * service order enforced by the scheduler is respected by a
+	 * queueing device. Otherwise the device is free even to make
+	 * some unlucky request wait for as long as the device
+	 * wishes.
+	 *
+	 * Of course, serving one request at at time may cause loss of
+	 * throughput.
+	 */
+	if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
+		goto exit;
+
+	bfqq = bfq_select_queue(bfqd);
+	if (!bfqq)
+		goto exit;
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	BUG_ON(bfq_bfqq_wait_request(bfqq));
+
+	rq = bfq_dispatch_rq_from_bfqq(bfqd, bfqq);
+
+	BUG_ON(bfqq->next_rq == NULL &&
+	       bfqq->entity.budget < bfqq->entity.service);
+
+	if (rq) {
+	inc_in_driver_start_rq:
+		bfqd->rq_in_driver++;
+	start_rq:
+		rq->rq_flags |= RQF_STARTED;
+		if (bfqq)
+			bfq_log_bfqq(bfqd, bfqq,
+				"%s request %p, rq_in_driver %d",
+				     bfq_bfqq_sync(bfqq) ? "sync" : "async",
+				     rq,
+				     bfqd->rq_in_driver);
+		else
+			bfq_log(bfqd,
+		"request %p from dispatch list, rq_in_driver %d",
+				rq, bfqd->rq_in_driver);
+	} else
+		bfq_log(bfqd,
+		"returned NULL request, rq_in_driver %d",
+			bfqd->rq_in_driver);
+
+exit:
+	return rq;
+}
+
+
+#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+static void bfq_update_dispatch_stats(struct request_queue *q,
+				      struct request *rq,
+				      struct bfq_queue *in_serv_queue,
+				      bool idle_timer_disabled)
+{
+	struct bfq_queue *bfqq = rq ? RQ_BFQQ(rq) : NULL;
+
+	if (!idle_timer_disabled && !bfqq)
+		return;
+
+	/*
+	 * rq and bfqq are guaranteed to exist until this function
+	 * ends, for the following reasons. First, rq can be
+	 * dispatched to the device, and then can be completed and
+	 * freed, only after this function ends. Second, rq cannot be
+	 * merged (and thus freed because of a merge) any longer,
+	 * because it has already started. Thus rq cannot be freed
+	 * before this function ends, and, since rq has a reference to
+	 * bfqq, the same guarantee holds for bfqq too.
+	 *
+	 * In addition, the following queue lock guarantees that
+	 * bfqq_group(bfqq) exists as well.
+	 */
+	spin_lock_irq(q->queue_lock);
+	if (idle_timer_disabled)
+		/*
+		 * Since the idle timer has been disabled,
+		 * in_serv_queue contained some request when
+		 * __bfq_dispatch_request was invoked above, which
+		 * implies that rq was picked exactly from
+		 * in_serv_queue. Thus in_serv_queue == bfqq, and is
+		 * therefore guaranteed to exist because of the above
+		 * arguments.
+		 */
+		bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
+	if (bfqq) {
+		struct bfq_group *bfqg = bfqq_group(bfqq);
+
+		bfqg_stats_update_avg_queue_size(bfqg);
+		bfqg_stats_set_start_empty_time(bfqg);
+		bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+#else
+static inline void bfq_update_dispatch_stats(struct request_queue *q,
+					     struct request *rq,
+					     struct bfq_queue *in_serv_queue,
+					     bool idle_timer_disabled) {}
+#endif
+static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+{
+	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+	struct request *rq;
+	struct bfq_queue *in_serv_queue;
+	bool waiting_rq, idle_timer_disabled;
+
+	spin_lock_irq(&bfqd->lock);
+
+	in_serv_queue = bfqd->in_service_queue;
+	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
+
+	rq = __bfq_dispatch_request(hctx);
+
+	idle_timer_disabled =
+		waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+
+	spin_unlock_irq(&bfqd->lock);
+
+	bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue,
+				  idle_timer_disabled);
+
+	return rq;
+}
+
+/*
+ * Task holds one reference to the queue, dropped when task exits.  Each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
+ *
+ * Scheduler lock must be held here. Recall not to use bfqq after calling
+ * this function on it.
+ */
+static void bfq_put_queue(struct bfq_queue *bfqq)
+{
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	struct bfq_group *bfqg = bfqq_group(bfqq);
+#endif
+
+	assert_spin_locked(&bfqq->bfqd->lock);
+
+	BUG_ON(bfqq->ref <= 0);
+
+	if (bfqq->bfqd)
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref);
+
+	bfqq->ref--;
+	if (bfqq->ref)
+		return;
+
+	BUG_ON(rb_first(&bfqq->sort_list));
+	BUG_ON(bfqq->allocated != 0);
+	BUG_ON(bfqq->entity.tree);
+	BUG_ON(bfq_bfqq_busy(bfqq));
+
+	if (!hlist_unhashed(&bfqq->burst_list_node)) {
+		hlist_del_init(&bfqq->burst_list_node);
+		/*
+		 * Decrement also burst size after the removal, if the
+		 * process associated with bfqq is exiting, and thus
+		 * does not contribute to the burst any longer. This
+		 * decrement helps filter out false positives of large
+		 * bursts, when some short-lived process (often due to
+		 * the execution of commands by some service) happens
+		 * to start and exit while a complex application is
+		 * starting, and thus spawning several processes that
+		 * do I/O (and that *must not* be treated as a large
+		 * burst, see comments on bfq_handle_burst).
+		 *
+		 * In particular, the decrement is performed only if:
+		 * 1) bfqq is not a merged queue, because, if it is,
+		 * then this free of bfqq is not triggered by the exit
+		 * of the process bfqq is associated with, but exactly
+		 * by the fact that bfqq has just been merged.
+		 * 2) burst_size is greater than 0, to handle
+		 * unbalanced decrements. Unbalanced decrements may
+		 * happen in te following case: bfqq is inserted into
+		 * the current burst list--without incrementing
+		 * bust_size--because of a split, but the current
+		 * burst list is not the burst list bfqq belonged to
+		 * (see comments on the case of a split in
+		 * bfq_set_request).
+		 */
+		if (bfqq->bic && bfqq->bfqd->burst_size > 0)
+			bfqq->bfqd->burst_size--;
+	}
+
+	if (bfqq->bfqd)
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq);
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg);
+	bfqg_and_blkg_put(bfqg);
+#endif
+	kmem_cache_free(bfq_pool, bfqq);
+}
+
+static void bfq_put_cooperator(struct bfq_queue *bfqq)
+{
+	struct bfq_queue *__bfqq, *next;
+
+	/*
+	 * If this queue was scheduled to merge with another queue, be
+	 * sure to drop the reference taken on that queue (and others in
+	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
+	 */
+	__bfqq = bfqq->new_bfqq;
+	while (__bfqq) {
+		if (__bfqq == bfqq)
+			break;
+		next = __bfqq->new_bfqq;
+		bfq_put_queue(__bfqq);
+		__bfqq = next;
+	}
+}
+
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	if (bfqq == bfqd->in_service_queue) {
+		__bfq_bfqq_expire(bfqd, bfqq);
+		bfq_schedule_dispatch(bfqd);
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref);
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq); /* release process reference */
+}
+
+static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
+{
+	struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+	struct bfq_data *bfqd;
+
+	if (bfqq)
+		bfqd = bfqq->bfqd; /* NULL if scheduler already exited */
+
+	if (bfqq && bfqd) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&bfqd->lock, flags);
+		bfq_exit_bfqq(bfqd, bfqq);
+		bic_set_bfqq(bic, NULL, is_sync);
+		spin_unlock_irqrestore(&bfqd->lock, flags);
+	}
+}
+
+static void bfq_exit_icq(struct io_cq *icq)
+{
+	struct bfq_io_cq *bic = icq_to_bic(icq);
+
+	BUG_ON(!bic);
+	bfq_exit_icq_bfqq(bic, true);
+	bfq_exit_icq_bfqq(bic, false);
+}
+
+/*
+ * Update the entity prio values; note that the new values will not
+ * be used until the next (re)activation.
+ */
+static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq,
+				     struct bfq_io_cq *bic)
+{
+	struct task_struct *tsk = current;
+	int ioprio_class;
+	struct bfq_data *bfqd = bfqq->bfqd;
+
+	WARN_ON(!bfqd);
+	if (!bfqd)
+		return;
+
+	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	switch (ioprio_class) {
+	default:
+		dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
+			"bfq: bad prio class %d\n", ioprio_class);
+	case IOPRIO_CLASS_NONE:
+		/*
+		 * No prio set, inherit CPU scheduling settings.
+		 */
+		bfqq->new_ioprio = task_nice_ioprio(tsk);
+		bfqq->new_ioprio_class = task_nice_ioclass(tsk);
+		break;
+	case IOPRIO_CLASS_RT:
+		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
+		break;
+	case IOPRIO_CLASS_BE:
+		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
+		break;
+	case IOPRIO_CLASS_IDLE:
+		bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
+		bfqq->new_ioprio = 7;
+		break;
+	}
+
+	if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+		pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
+			bfqq->new_ioprio);
+		BUG();
+	}
+
+	bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+	bfqq->entity.prio_changed = 1;
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "bic_class %d prio %d class %d",
+		     ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class);
+}
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+{
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+	struct bfq_queue *bfqq;
+	unsigned long uninitialized_var(flags);
+	int ioprio = bic->icq.ioc->ioprio;
+
+	/*
+	 * This condition may trigger on a newly created bic, be sure to
+	 * drop the lock before returning.
+	 */
+	if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
+		return;
+
+	bic->ioprio = ioprio;
+
+	bfqq = bic_to_bfqq(bic, false);
+	if (bfqq) {
+		/* release process reference on this queue */
+		bfq_put_queue(bfqq);
+		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
+		bic_set_bfqq(bic, bfqq, false);
+		bfq_log_bfqq(bfqd, bfqq,
+			     "bfqq %p %d",
+			     bfqq, bfqq->ref);
+	}
+
+	bfqq = bic_to_bfqq(bic, true);
+	if (bfqq)
+		bfq_set_next_ioprio_data(bfqq, bic);
+}
+
+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_io_cq *bic, pid_t pid, int is_sync)
+{
+	RB_CLEAR_NODE(&bfqq->entity.rb_node);
+	INIT_LIST_HEAD(&bfqq->fifo);
+	INIT_HLIST_NODE(&bfqq->burst_list_node);
+	BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+
+	bfqq->ref = 0;
+	bfqq->bfqd = bfqd;
+
+	if (bic)
+		bfq_set_next_ioprio_data(bfqq, bic);
+
+	if (is_sync) {
+		/*
+		 * No need to mark as has_short_ttime if in
+		 * idle_class, because no device idling is performed
+		 * for queues in idle class
+		 */
+		if (!bfq_class_idle(bfqq))
+			/* tentatively mark as has_short_ttime */
+			bfq_mark_bfqq_has_short_ttime(bfqq);
+		bfq_mark_bfqq_sync(bfqq);
+		bfq_mark_bfqq_just_created(bfqq);
+	} else
+		bfq_clear_bfqq_sync(bfqq);
+
+	bfqq->ttime.last_end_request = ktime_get_ns() - (1ULL<<32);
+
+	bfq_mark_bfqq_IO_bound(bfqq);
+
+	/* Tentative initial value to trade off between thr and lat */
+	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
+	bfqq->pid = pid;
+
+	bfqq->wr_coeff = 1;
+	bfqq->last_wr_start_finish = jiffies;
+	bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now();
+	bfqq->budget_timeout = bfq_smallest_from_now();
+	bfqq->split_time = bfq_smallest_from_now();
+
+	/*
+	 * To not forget the possibly high bandwidth consumed by a
+	 * process/queue in the recent past,
+	 * bfq_bfqq_softrt_next_start() returns a value at least equal
+	 * to the current value of bfqq->soft_rt_next_start (see
+	 * comments on bfq_bfqq_softrt_next_start).  Set
+	 * soft_rt_next_start to now, to mean that bfqq has consumed
+	 * no bandwidth so far.
+	 */
+	bfqq->soft_rt_next_start = jiffies;
+
+	/* first request is almost certainly seeky */
+	bfqq->seek_history = 1;
+}
+
+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+					       struct bfq_group *bfqg,
+					       int ioprio_class, int ioprio)
+{
+	switch (ioprio_class) {
+	case IOPRIO_CLASS_RT:
+		return &bfqg->async_bfqq[0][ioprio];
+	case IOPRIO_CLASS_NONE:
+		ioprio = IOPRIO_NORM;
+		/* fall through */
+	case IOPRIO_CLASS_BE:
+		return &bfqg->async_bfqq[1][ioprio];
+	case IOPRIO_CLASS_IDLE:
+		return &bfqg->async_idle_bfqq;
+	default:
+		BUG();
+	}
+}
+
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bio *bio, bool is_sync,
+				       struct bfq_io_cq *bic)
+{
+	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	struct bfq_queue **async_bfqq = NULL;
+	struct bfq_queue *bfqq;
+	struct bfq_group *bfqg;
+
+	rcu_read_lock();
+
+	bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+	if (!bfqg) {
+		bfqq = &bfqd->oom_bfqq;
+		goto out;
+	}
+
+	if (!is_sync) {
+		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+						  ioprio);
+		bfqq = *async_bfqq;
+		if (bfqq)
+			goto out;
+	}
+
+	bfqq = kmem_cache_alloc_node(bfq_pool,
+				     GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
+				     bfqd->queue->node);
+
+	if (bfqq) {
+		bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+			      is_sync);
+		bfq_init_entity(&bfqq->entity, bfqg);
+		bfq_log_bfqq(bfqd, bfqq, "allocated");
+	} else {
+		bfqq = &bfqd->oom_bfqq;
+		bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+		goto out;
+	}
+
+	/*
+	 * Pin the queue now that it's allocated, scheduler exit will
+	 * prune it.
+	 */
+	if (async_bfqq) {
+		bfqq->ref++; /*
+			      * Extra group reference, w.r.t. sync
+			      * queue. This extra reference is removed
+			      * only if bfqq->bfqg disappears, to
+			      * guarantee that this queue is not freed
+			      * until its group goes away.
+			      */
+		bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d",
+			     bfqq, bfqq->ref);
+		*async_bfqq = bfqq;
+	}
+
+out:
+	bfqq->ref++; /* get a process reference to this queue */
+	bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref);
+	rcu_read_unlock();
+	return bfqq;
+}
+
+static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+				    struct bfq_queue *bfqq)
+{
+	struct bfq_ttime *ttime = &bfqq->ttime;
+	u64 elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
+
+	elapsed = min_t(u64, elapsed, 2 * bfqd->bfq_slice_idle);
+
+	ttime->ttime_samples = (7*bfqq->ttime.ttime_samples + 256) / 8;
+	ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
+	ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+				     ttime->ttime_samples);
+}
+
+static void
+bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+		       struct request *rq)
+{
+	bfqq->seek_history <<= 1;
+	bfqq->seek_history |=
+		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
+		(!blk_queue_nonrot(bfqd->queue) ||
+		 blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
+}
+
+static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
+				       struct bfq_queue *bfqq,
+				       struct bfq_io_cq *bic)
+{
+	bool has_short_ttime = true;
+
+	/*
+	 * No need to update has_short_ttime if bfqq is async or in
+	 * idle io prio class, or if bfq_slice_idle is zero, because
+	 * no device idling is performed for bfqq in this case.
+	 */
+	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) ||
+	    bfqd->bfq_slice_idle == 0)
+		return;
+
+	/* Idle window just restored, statistics are meaningless. */
+	if (time_is_after_eq_jiffies(bfqq->split_time +
+				     bfqd->bfq_wr_min_idle_time))
+		return;
+
+	/* Think time is infinite if no process is linked to
+	 * bfqq. Otherwise check average think time to
+	 * decide whether to mark as has_short_ttime
+	 */
+	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+	    (bfq_sample_valid(bfqq->ttime.ttime_samples) &&
+	     bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle))
+		has_short_ttime = false;
+
+	bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d",
+		has_short_ttime);
+
+	if (has_short_ttime)
+		bfq_mark_bfqq_has_short_ttime(bfqq);
+	else
+		bfq_clear_bfqq_has_short_ttime(bfqq);
+}
+
+/*
+ * Called when a new fs request (rq) is added to bfqq.  Check if there's
+ * something we should do about it.
+ */
+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			    struct request *rq)
+{
+	struct bfq_io_cq *bic = RQ_BIC(rq);
+
+	if (rq->cmd_flags & REQ_META)
+		bfqq->meta_pending++;
+
+	bfq_update_io_thinktime(bfqd, bfqq);
+	bfq_update_has_short_ttime(bfqd, bfqq, bic);
+	bfq_update_io_seektime(bfqd, bfqq, rq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "has_short_ttime=%d (seeky %d)",
+		     bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq));
+
+	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+	if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
+		bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
+				 blk_rq_sectors(rq) < 32;
+		bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
+
+		/*
+		 * There is just this request queued: if the request
+		 * is small and the queue is not to be expired, then
+		 * just exit.
+		 *
+		 * In this way, if the device is being idled to wait
+		 * for a new request from the in-service queue, we
+		 * avoid unplugging the device and committing the
+		 * device to serve just a small request. On the
+		 * contrary, we wait for the block layer to decide
+		 * when to unplug the device: hopefully, new requests
+		 * will be merged to this one quickly, then the device
+		 * will be unplugged and larger requests will be
+		 * dispatched.
+		 */
+		if (small_req && !budget_timeout)
+			return;
+
+		/*
+		 * A large enough request arrived, or the queue is to
+		 * be expired: in both cases disk idling is to be
+		 * stopped, so clear wait_request flag and reset
+		 * timer.
+		 */
+		bfq_clear_bfqq_wait_request(bfqq);
+		hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+
+		/*
+		 * The queue is not empty, because a new request just
+		 * arrived. Hence we can safely expire the queue, in
+		 * case of budget timeout, without risking that the
+		 * timestamps of the queue are not updated correctly.
+		 * See [1] for more details.
+		 */
+		if (budget_timeout)
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_BUDGET_TIMEOUT);
+	}
+}
+
+/* returns true if it causes the idle timer to be disabled */
+static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+	bool waiting, idle_timer_disabled = false;
+	BUG_ON(!bfqq);
+
+	assert_spin_locked(&bfqd->lock);
+
+	bfq_log_bfqq(bfqd, bfqq, "rq %p bfqq %p", rq, bfqq);
+
+	/*
+	 * An unplug may trigger a requeue of a request from the device
+	 * driver: make sure we are in process context while trying to
+	 * merge two bfq_queues.
+	 */
+	if (!in_interrupt()) {
+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+		if (new_bfqq) {
+			if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
+				new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
+			/*
+			 * Release the request's reference to the old bfqq
+			 * and make sure one is taken to the shared queue.
+			 */
+			new_bfqq->allocated++;
+			bfqq->allocated--;
+			bfq_log_bfqq(bfqd, bfqq,
+		     "new allocated %d", bfqq->allocated);
+			bfq_log_bfqq(bfqd, new_bfqq,
+		     "new_bfqq new allocated %d",
+				     bfqq->allocated);
+
+			new_bfqq->ref++;
+			/*
+			 * If the bic associated with the process
+			 * issuing this request still points to bfqq
+			 * (and thus has not been already redirected
+			 * to new_bfqq or even some other bfq_queue),
+			 * then complete the merge and redirect it to
+			 * new_bfqq.
+			 */
+			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+						bfqq, new_bfqq);
+
+			bfq_clear_bfqq_just_created(bfqq);
+			/*
+			 * rq is about to be enqueued into new_bfqq,
+			 * release rq reference on bfqq
+			 */
+			bfq_put_queue(bfqq);
+			rq->elv.priv[1] = new_bfqq;
+			bfqq = new_bfqq;
+		}
+	}
+
+	waiting = bfqq && bfq_bfqq_wait_request(bfqq);
+	bfq_add_request(rq);
+	idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
+
+	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+	list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+	bfq_rq_enqueued(bfqd, bfqq, rq);
+
+	return idle_timer_disabled;
+}
+
+#if defined(BFQ_GROUP_IOSCHED_ENABLED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+static void bfq_update_insert_stats(struct request_queue *q,
+				    struct bfq_queue *bfqq,
+				    bool idle_timer_disabled,
+				    unsigned int cmd_flags)
+{
+	if (!bfqq)
+		return;
+
+	/*
+	 * bfqq still exists, because it can disappear only after
+	 * either it is merged with another queue, or the process it
+	 * is associated with exits. But both actions must be taken by
+	 * the same process currently executing this flow of
+	 * instructions.
+	 *
+	 * In addition, the following queue lock guarantees that
+	 * bfqq_group(bfqq) exists as well.
+	 */
+	spin_lock_irq(q->queue_lock);
+	bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
+	if (idle_timer_disabled)
+		bfqg_stats_update_idle_time(bfqq_group(bfqq));
+	spin_unlock_irq(q->queue_lock);
+}
+#else
+static inline void bfq_update_insert_stats(struct request_queue *q,
+					   struct bfq_queue *bfqq,
+					   bool idle_timer_disabled,
+					   unsigned int cmd_flags) {}
+#endif
+
+static void bfq_prepare_request(struct request *rq, struct bio *bio);
+
+static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+			       bool at_head)
+{
+	struct request_queue *q = hctx->queue;
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	bool idle_timer_disabled = false;
+	unsigned int cmd_flags;
+
+	spin_lock_irq(&bfqd->lock);
+	if (blk_mq_sched_try_insert_merge(q, rq)) {
+		spin_unlock_irq(&bfqd->lock);
+		return;
+	}
+
+	spin_unlock_irq(&bfqd->lock);
+
+	blk_mq_sched_request_inserted(rq);
+
+	spin_lock_irq(&bfqd->lock);
+	if (at_head || blk_rq_is_passthrough(rq)) {
+		if (at_head)
+			list_add(&rq->queuelist, &bfqd->dispatch);
+		else
+			list_add_tail(&rq->queuelist, &bfqd->dispatch);
+
+		rq->rq_flags |= RQF_DISP_LIST;
+		if (bfqq)
+			bfq_log_bfqq(bfqd, bfqq,
+				     "%p in disp: at_head %d",
+				     rq, at_head);
+		else
+			bfq_log(bfqd,
+				"%p in disp: at_head %d",
+				rq, at_head);
+	} else {
+		BUG_ON(!(rq->rq_flags & RQF_GOT));
+		rq->rq_flags &= ~RQF_GOT;
+
+		if (!bfqq) {
+			/*
+			 * This should never happen. Most likely rq is
+			 * a requeued regular request, being
+			 * re-inserted without being first
+			 * re-prepared. Do a prepare, to avoid
+			 * failure.
+			 */
+			pr_warn("Regular request associated with no queue");
+			WARN_ON(1);
+			bfq_prepare_request(rq, rq->bio);
+			bfqq = RQ_BFQQ(rq);
+		}
+
+		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
+		/*
+		 * Update bfqq, because, if a queue merge has occurred
+		 * in __bfq_insert_request, then rq has been
+		 * redirected into a new queue.
+		 */
+		bfqq = RQ_BFQQ(rq);
+
+		if (rq_mergeable(rq)) {
+			elv_rqhash_add(q, rq);
+			if (!q->last_merge)
+				q->last_merge = rq;
+		}
+	}
+
+	/*
+	 * Cache cmd_flags before releasing scheduler lock, because rq
+	 * may disappear afterwards (for example, because of a request
+	 * merge).
+	 */
+	cmd_flags = rq->cmd_flags;
+
+	spin_unlock_irq(&bfqd->lock);
+	bfq_update_insert_stats(q, bfqq, idle_timer_disabled,
+				cmd_flags);
+}
+
+static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
+				struct list_head *list, bool at_head)
+{
+	while (!list_empty(list)) {
+		struct request *rq;
+
+		rq = list_first_entry(list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		bfq_insert_request(hctx, rq, at_head);
+	}
+}
+
+static void bfq_update_hw_tag(struct bfq_data *bfqd)
+{
+	bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
+				       bfqd->rq_in_driver);
+
+	if (bfqd->hw_tag == 1)
+		return;
+
+	/*
+	 * This sample is valid if the number of outstanding requests
+	 * is large enough to allow a queueing behavior.  Note that the
+	 * sum is not exact, as it's not taking into account deactivated
+	 * requests.
+	 */
+	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
+		return;
+
+	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
+		return;
+
+	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
+	bfqd->max_rq_in_driver = 0;
+	bfqd->hw_tag_samples = 0;
+}
+
+static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
+{
+	u64 now_ns;
+	u32 delta_us;
+
+	bfq_update_hw_tag(bfqd);
+
+	BUG_ON(!bfqd->rq_in_driver);
+	BUG_ON(!bfqq->dispatched);
+	bfqd->rq_in_driver--;
+	bfqq->dispatched--;
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "new disp %d, new rq_in_driver %d",
+		     bfqq->dispatched, bfqd->rq_in_driver);
+
+	if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
+		BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+		/*
+		 * Set budget_timeout (which we overload to store the
+		 * time at which the queue remains with no backlog and
+		 * no outstanding request; used by the weight-raising
+		 * mechanism).
+		 */
+		bfqq->budget_timeout = jiffies;
+
+		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+					&bfqd->queue_weights_tree);
+	}
+
+	now_ns = ktime_get_ns();
+
+	bfqq->ttime.last_end_request = now_ns;
+
+	/*
+	 * Using us instead of ns, to get a reasonable precision in
+	 * computing rate in next check.
+	 */
+	delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		"delta %uus/%luus max_size %u rate %llu/%llu",
+		delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size,
+		delta_us > 0 ?
+		(USEC_PER_SEC*
+		(u64)((bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us))
+			>>BFQ_RATE_SHIFT :
+		(USEC_PER_SEC*
+		(u64)(bfqd->last_rq_max_size<<BFQ_RATE_SHIFT))>>BFQ_RATE_SHIFT,
+		(USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT);
+
+	/*
+	 * If the request took rather long to complete, and, according
+	 * to the maximum request size recorded, this completion latency
+	 * implies that the request was certainly served at a very low
+	 * rate (less than 1M sectors/sec), then the whole observation
+	 * interval that lasts up to this time instant cannot be a
+	 * valid time interval for computing a new peak rate.  Invoke
+	 * bfq_update_rate_reset to have the following three steps
+	 * taken:
+	 * - close the observation interval at the last (previous)
+	 *   request dispatch or completion
+	 * - compute rate, if possible, for that observation interval
+	 * - reset to zero samples, which will trigger a proper
+	 *   re-initialization of the observation interval on next
+	 *   dispatch
+	 */
+	if (delta_us > BFQ_MIN_TT/NSEC_PER_USEC &&
+	   (bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us <
+			1UL<<(BFQ_RATE_SHIFT - 10))
+		bfq_update_rate_reset(bfqd, NULL);
+	bfqd->last_completion = now_ns;
+
+	/*
+	 * If we are waiting to discover whether the request pattern
+	 * of the task associated with the queue is actually
+	 * isochronous, and both requisites for this condition to hold
+	 * are now satisfied, then compute soft_rt_next_start (see the
+	 * comments on the function bfq_bfqq_softrt_next_start()). We
+	 * schedule this delayed check when bfqq expires, if it still
+	 * has in-flight requests.
+	 */
+	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list))
+		bfqq->soft_rt_next_start =
+			bfq_bfqq_softrt_next_start(bfqd, bfqq);
+
+	/*
+	 * If this is the in-service queue, check if it needs to be expired,
+	 * or if we want to idle in case it has no pending requests.
+	 */
+	if (bfqd->in_service_queue == bfqq) {
+		if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
+			bfq_arm_slice_timer(bfqd);
+			return;
+		} else if (bfq_may_expire_for_budg_timeout(bfqq))
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_BUDGET_TIMEOUT);
+		else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
+			 (bfqq->dispatched == 0 ||
+			  !bfq_bfqq_may_idle(bfqq)))
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_NO_MORE_REQUESTS);
+	}
+}
+
+static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "allocated %d", bfqq->allocated);
+	BUG_ON(!bfqq->allocated);
+	bfqq->allocated--;
+
+	bfq_put_queue(bfqq);
+}
+
+/*
+ * Handle either a requeue or a finish for rq. The things to do are
+ * the same in both cases: all references to rq are to be dropped. In
+ * particular, rq is considered completed from the point of view of
+ * the scheduler.
+ */
+static void bfq_finish_requeue_request(struct request *rq)
+{
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd;
+	struct bfq_io_cq *bic;
+
+	BUG_ON(!rq);
+
+	bfqq = RQ_BFQQ(rq);
+
+	/*
+	 * Requeue and finish hooks are invoked in blk-mq without
+	 * checking whether the involved request is actually still
+	 * referenced in the scheduler. To handle this fact, the
+	 * following two checks make this function exit in case of
+	 * spurious invocations, for which there is nothing to do.
+	 *
+	 * First, check whether rq has nothing to do with an elevator.
+	 */
+	if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
+		return;
+
+	/*
+	 * rq either is not associated with any icq, or is an already
+	 * requeued request that has not (yet) been re-inserted into
+	 * a bfq_queue.
+	 */
+	if (!rq->elv.icq || !bfqq)
+		return;
+
+	bic = RQ_BIC(rq);
+	BUG_ON(!bic);
+
+	bfqd = bfqq->bfqd;
+	BUG_ON(!bfqd);
+
+	if (rq->rq_flags & RQF_DISP_LIST) {
+		pr_crit("putting disp rq %p for %d", rq, bfqq->pid);
+		BUG();
+	}
+	BUG_ON(rq->rq_flags & RQF_QUEUED);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "putting rq %p with %u sects left, STARTED %d",
+		     rq, blk_rq_sectors(rq),
+		     rq->rq_flags & RQF_STARTED);
+
+	if (rq->rq_flags & RQF_STARTED)
+		bfqg_stats_update_completion(bfqq_group(bfqq),
+					     rq_start_time_ns(rq),
+					     rq_io_start_time_ns(rq),
+					     rq->cmd_flags);
+
+	WARN_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED));
+
+	if (likely(rq->rq_flags & RQF_STARTED)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&bfqd->lock, flags);
+
+		bfq_completed_request(bfqq, bfqd);
+		bfq_finish_requeue_request_body(bfqq);
+
+		spin_unlock_irqrestore(&bfqd->lock, flags);
+	} else {
+		/*
+		 * Request rq may be still/already in the scheduler,
+		 * in which case we need to remove it (this should
+		 * never happen in case of requeue). And we cannot
+		 * defer such a check and removal, to avoid
+		 * inconsistencies in the time interval from the end
+		 * of this function to the start of the deferred work.
+		 * This situation seems to occur only in process
+		 * context, as a consequence of a merge. In the
+		 * current version of the code, this implies that the
+		 * lock is held.
+		 */
+		BUG_ON(in_interrupt());
+
+		assert_spin_locked(&bfqd->lock);
+		if (!RB_EMPTY_NODE(&rq->rb_node)) {
+			bfq_remove_request(rq->q, rq);
+			bfqg_stats_update_io_remove(bfqq_group(bfqq),
+						    rq->cmd_flags);
+		}
+		bfq_finish_requeue_request_body(bfqq);
+	}
+
+	/*
+	 * Reset private fields. In case of a requeue, this allows
+	 * this function to correctly do nothing if it is spuriously
+	 * invoked again on this same request (see the check at the
+	 * beginning of the function). Probably, a better general
+	 * design would be to prevent blk-mq from invoking the requeue
+	 * or finish hooks of an elevator, for a request that is not
+	 * referred by that elevator.
+	 *
+	 * Resetting the following fields would break the
+	 * request-insertion logic if rq is re-inserted into a bfq
+	 * internal queue, without a re-preparation. Here we assume
+	 * that re-insertions of requeued requests, without
+	 * re-preparation, can happen only for pass_through or at_head
+	 * requests (which are not re-inserted into bfq internal
+	 * queues).
+	 */
+	rq->elv.priv[0] = NULL;
+	rq->elv.priv[1] = NULL;
+}
+
+/*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+ * was the last process referring to that bfqq.
+ */
+static struct bfq_queue *
+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
+
+	if (bfqq_process_refs(bfqq) == 1) {
+		bfqq->pid = current->pid;
+		bfq_clear_bfqq_coop(bfqq);
+		bfq_clear_bfqq_split_coop(bfqq);
+		return bfqq;
+	}
+
+	bic_set_bfqq(bic, NULL, 1);
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq);
+	return NULL;
+}
+
+static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
+						   struct bfq_io_cq *bic,
+						   struct bio *bio,
+						   bool split, bool is_sync,
+						   bool *new_queue)
+{
+	struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+
+	if (likely(bfqq && bfqq != &bfqd->oom_bfqq))
+		return bfqq;
+
+	if (new_queue)
+		*new_queue = true;
+
+	if (bfqq)
+		bfq_put_queue(bfqq);
+	bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
+	BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+
+	bic_set_bfqq(bic, bfqq, is_sync);
+	if (split && is_sync) {
+		bfq_log_bfqq(bfqd, bfqq,
+			     "get_request: was_in_list %d "
+			     "was_in_large_burst %d "
+			     "large burst in progress %d",
+			     bic->was_in_burst_list,
+			     bic->saved_in_large_burst,
+			     bfqd->large_burst);
+
+		if ((bic->was_in_burst_list && bfqd->large_burst) ||
+		    bic->saved_in_large_burst) {
+			bfq_log_bfqq(bfqd, bfqq,
+				     "get_request: marking in "
+				     "large burst");
+			bfq_mark_bfqq_in_large_burst(bfqq);
+		} else {
+			bfq_log_bfqq(bfqd, bfqq,
+				     "get_request: clearing in "
+				     "large burst");
+			bfq_clear_bfqq_in_large_burst(bfqq);
+			if (bic->was_in_burst_list)
+				/*
+				 * If bfqq was in the current
+				 * burst list before being
+				 * merged, then we have to add
+				 * it back. And we do not need
+				 * to increase burst_size, as
+				 * we did not decrement
+				 * burst_size when we removed
+				 * bfqq from the burst list as
+				 * a consequence of a merge
+				 * (see comments in
+				 * bfq_put_queue). In this
+				 * respect, it would be rather
+				 * costly to know whether the
+				 * current burst list is still
+				 * the same burst list from
+				 * which bfqq was removed on
+				 * the merge. To avoid this
+				 * cost, if bfqq was in a
+				 * burst list, then we add
+				 * bfqq to the current burst
+				 * list without any further
+				 * check. This can cause
+				 * inappropriate insertions,
+				 * but rarely enough to not
+				 * harm the detection of large
+				 * bursts significantly.
+				 */
+				hlist_add_head(&bfqq->burst_list_node,
+					       &bfqd->burst_list);
+		}
+		bfqq->split_time = jiffies;
+	}
+
+	return bfqq;
+}
+
+/*
+ * Allocate bfq data structures associated with this request.
+ */
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
+{
+	struct request_queue *q = rq->q;
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_io_cq *bic;
+	const int is_sync = rq_is_sync(rq);
+	struct bfq_queue *bfqq;
+	bool bfqq_already_existing = false, split = false;
+	bool new_queue = false;
+
+	if (!rq->elv.icq)
+		return;
+	bic = icq_to_bic(rq->elv.icq);
+
+	spin_lock_irq(&bfqd->lock);
+
+	bfq_check_ioprio_change(bic, bio);
+
+	bfq_bic_update_cgroup(bic, bio);
+
+	bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync,
+					 &new_queue);
+
+	if (likely(!new_queue)) {
+		/* If the queue was seeky for too long, break it apart. */
+		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+			BUG_ON(!is_sync);
+			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
+
+			/* Update bic before losing reference to bfqq */
+			if (bfq_bfqq_in_large_burst(bfqq))
+				bic->saved_in_large_burst = true;
+
+			bfqq = bfq_split_bfqq(bic, bfqq);
+			split = true;
+
+			if (!bfqq)
+				bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
+								 true, is_sync,
+								 NULL);
+			else
+				bfqq_already_existing = true;
+
+			BUG_ON(!bfqq);
+			BUG_ON(bfqq == &bfqd->oom_bfqq);
+		}
+	}
+
+	bfqq->allocated++;
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "new allocated %d", bfqq->allocated);
+
+	bfqq->ref++;
+	bfq_log_bfqq(bfqd, bfqq, "%p: bfqq %p, %d", rq, bfqq, bfqq->ref);
+
+	rq->elv.priv[0] = bic;
+	rq->elv.priv[1] = bfqq;
+	rq->rq_flags &= ~RQF_DISP_LIST;
+
+	/*
+	 * If a bfq_queue has only one process reference, it is owned
+	 * by only this bic: we can then set bfqq->bic = bic. in
+	 * addition, if the queue has also just been split, we have to
+	 * resume its state.
+	 */
+	if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+		bfqq->bic = bic;
+		if (split) {
+			/*
+			 * The queue has just been split from a shared
+			 * queue: restore the idle window and the
+			 * possible weight raising period.
+			 */
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
+		}
+	}
+
+	if (unlikely(bfq_bfqq_just_created(bfqq)))
+		bfq_handle_burst(bfqd, bfqq);
+
+	rq->rq_flags |= RQF_GOT;
+	spin_unlock_irq(&bfqd->lock);
+}
+
+static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+	enum bfqq_expiration reason;
+	unsigned long flags;
+
+	BUG_ON(!bfqd);
+	spin_lock_irqsave(&bfqd->lock, flags);
+
+	bfq_log_bfqq(bfqd, bfqq, "handling slice_timer expiration");
+	bfq_clear_bfqq_wait_request(bfqq);
+
+	if (bfqq != bfqd->in_service_queue) {
+		spin_unlock_irqrestore(&bfqd->lock, flags);
+		return;
+	}
+
+	if (bfq_bfqq_budget_timeout(bfqq))
+		/*
+		 * Also here the queue can be safely expired
+		 * for budget timeout without wasting
+		 * guarantees
+		 */
+		reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+	else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
+		/*
+		 * The queue may not be empty upon timer expiration,
+		 * because we may not disable the timer when the
+		 * first request of the in-service queue arrives
+		 * during disk idling.
+		 */
+		reason = BFQ_BFQQ_TOO_IDLE;
+	else
+		goto schedule_dispatch;
+
+	bfq_bfqq_expire(bfqd, bfqq, true, reason);
+
+schedule_dispatch:
+	spin_unlock_irqrestore(&bfqd->lock, flags);
+	bfq_schedule_dispatch(bfqd);
+}
+
+/*
+ * Handler of the expiration of the timer running if the in-service queue
+ * is idling inside its time slice.
+ */
+static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
+{
+	struct bfq_data *bfqd = container_of(timer, struct bfq_data,
+					     idle_slice_timer);
+	struct bfq_queue *bfqq = bfqd->in_service_queue;
+
+	bfq_log(bfqd, "expired");
+
+	/*
+	 * Theoretical race here: the in-service queue can be NULL or
+	 * different from the queue that was idling if a new request
+	 * arrives for the current queue and there is a full dispatch
+	 * cycle that changes the in-service queue.  This can hardly
+	 * happen, but in the worst case we just expire a queue too
+	 * early.
+	 */
+	if (bfqq)
+		bfq_idle_slice_timer_body(bfqq);
+
+	return HRTIMER_NORESTART;
+}
+
+static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+				 struct bfq_queue **bfqq_ptr)
+{
+	struct bfq_group *root_group = bfqd->root_group;
+	struct bfq_queue *bfqq = *bfqq_ptr;
+
+	bfq_log(bfqd, "%p", bfqq);
+	if (bfqq) {
+		bfq_bfqq_move(bfqd, bfqq, root_group);
+		bfq_log_bfqq(bfqd, bfqq, "putting %p, %d",
+			     bfqq, bfqq->ref);
+		bfq_put_queue(bfqq);
+		*bfqq_ptr = NULL;
+	}
+}
+
+/*
+ * Release all the bfqg references to its async queues.  If we are
+ * deallocating the group these queues may still contain requests, so
+ * we reparent them to the root cgroup (i.e., the only one that will
+ * exist for sure until all the requests on a device are gone).
+ */
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
+
+	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+}
+
+static void bfq_exit_queue(struct elevator_queue *e)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	struct bfq_queue *bfqq, *n;
+
+	bfq_log(bfqd, "starting ...");
+
+	hrtimer_cancel(&bfqd->idle_slice_timer);
+
+	BUG_ON(bfqd->in_service_queue);
+	BUG_ON(!list_empty(&bfqd->active_list));
+
+	spin_lock_irq(&bfqd->lock);
+	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
+	spin_unlock_irq(&bfqd->lock);
+
+	hrtimer_cancel(&bfqd->idle_slice_timer);
+
+	BUG_ON(hrtimer_active(&bfqd->idle_slice_timer));
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	/* release oom-queue reference to root group */
+	bfqg_and_blkg_put(bfqd->root_group);
+
+	blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+#else
+	spin_lock_irq(&bfqd->lock);
+	bfq_put_async_queues(bfqd, bfqd->root_group);
+	kfree(bfqd->root_group);
+	spin_unlock_irq(&bfqd->lock);
+#endif
+
+	bfq_log(bfqd, "finished ...");
+	kfree(bfqd);
+}
+
+static void bfq_init_root_group(struct bfq_group *root_group,
+				struct bfq_data *bfqd)
+{
+	int i;
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	root_group->entity.parent = NULL;
+	root_group->my_entity = NULL;
+	root_group->bfqd = bfqd;
+#endif
+	root_group->rq_pos_tree = RB_ROOT;
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+	root_group->sched_data.bfq_class_idle_last_service = jiffies;
+}
+
+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+{
+	struct bfq_data *bfqd;
+	struct elevator_queue *eq;
+
+	eq = elevator_alloc(q, e);
+	if (!eq)
+		return -ENOMEM;
+
+	bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
+	if (!bfqd) {
+		kobject_put(&eq->kobj);
+		return -ENOMEM;
+	}
+	eq->elevator_data = bfqd;
+
+	spin_lock_irq(q->queue_lock);
+	q->elevator = eq;
+	spin_unlock_irq(q->queue_lock);
+
+	/*
+	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+	 * Grab a permanent reference to it, so that the normal code flow
+	 * will not attempt to free it.
+	 */
+	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+	bfqd->oom_bfqq.ref++;
+	bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+	bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+	bfqd->oom_bfqq.entity.new_weight =
+		bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
+
+	/* oom_bfqq does not participate to bursts */
+	bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+	/*
+	 * Trigger weight initialization, according to ioprio, at the
+	 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+	 * class won't be changed any more.
+	 */
+	bfqd->oom_bfqq.entity.prio_changed = 1;
+
+	bfqd->queue = q;
+	INIT_LIST_HEAD(&bfqd->dispatch);
+
+	hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+
+	bfqd->queue_weights_tree = RB_ROOT;
+	bfqd->group_weights_tree = RB_ROOT;
+
+	INIT_LIST_HEAD(&bfqd->active_list);
+	INIT_LIST_HEAD(&bfqd->idle_list);
+	INIT_HLIST_HEAD(&bfqd->burst_list);
+
+	bfqd->hw_tag = -1;
+
+	bfqd->bfq_max_budget = bfq_default_max_budget;
+
+	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
+	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
+	bfqd->bfq_back_max = bfq_back_max;
+	bfqd->bfq_back_penalty = bfq_back_penalty;
+	bfqd->bfq_slice_idle = bfq_slice_idle;
+	bfqd->bfq_timeout = bfq_timeout;
+
+	bfqd->bfq_requests_within_timer = 120;
+
+	bfqd->bfq_large_burst_thresh = 8;
+	bfqd->bfq_burst_interval = msecs_to_jiffies(180);
+
+	bfqd->low_latency = true;
+
+	/*
+	 * Trade-off between responsiveness and fairness.
+	 */
+	bfqd->bfq_wr_coeff = 30;
+	bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+	bfqd->bfq_wr_max_time = 0;
+	bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+	bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
+	bfqd->bfq_wr_max_softrt_rate = 7000; /*
+					      * Approximate rate required
+					      * to playback or record a
+					      * high-definition compressed
+					      * video.
+					      */
+	bfqd->wr_busy_queues = 0;
+
+	/*
+	 * Begin by assuming, optimistically, that the device is a
+	 * high-speed one, and that its peak rate is equal to 2/3 of
+	 * the highest reference rate.
+	 */
+	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+			T_fast[blk_queue_nonrot(bfqd->queue)];
+	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+	bfqd->device_speed = BFQ_BFQD_FAST;
+
+	spin_lock_init(&bfqd->lock);
+
+	/*
+	 * The invocation of the next bfq_create_group_hierarchy
+	 * function is the head of a chain of function calls
+	 * (bfq_create_group_hierarchy->blkcg_activate_policy->
+	 * blk_mq_freeze_queue) that may lead to the invocation of the
+	 * has_work hook function. For this reason,
+	 * bfq_create_group_hierarchy is invoked only after all
+	 * scheduler data has been initialized, apart from the fields
+	 * that can be initialized only after invoking
+	 * bfq_create_group_hierarchy. This, in particular, enables
+	 * has_work to correctly return false. Of course, to avoid
+	 * other inconsistencies, the blk-mq stack must then refrain
+	 * from invoking further scheduler hooks before this init
+	 * function is finished.
+	*/
+	bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
+	if (!bfqd->root_group)
+		goto out_free;
+	bfq_init_root_group(bfqd->root_group, bfqd);
+	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+
+	wbt_disable_default(q);
+	return 0;
+
+out_free:
+	kfree(bfqd);
+	kobject_put(&eq->kobj);
+	return -ENOMEM;
+}
+
+static void bfq_slab_kill(void)
+{
+	kmem_cache_destroy(bfq_pool);
+}
+
+static int __init bfq_slab_setup(void)
+{
+	bfq_pool = KMEM_CACHE(bfq_queue, 0);
+	if (!bfq_pool)
+		return -ENOMEM;
+	return 0;
+}
+
+static ssize_t bfq_var_show(unsigned int var, char *page)
+{
+	return sprintf(page, "%u\n", var);
+}
+
+static ssize_t bfq_var_store(unsigned long *var, const char *page,
+			     size_t count)
+{
+	unsigned long new_val;
+	int ret = kstrtoul(page, 10, &new_val);
+
+	if (ret == 0)
+		*var = new_val;
+
+	return count;
+}
+
+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+
+	return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
+		       jiffies_to_msecs(bfqd->bfq_wr_max_time) :
+		       jiffies_to_msecs(bfq_wr_duration(bfqd)));
+}
+
+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd = e->elevator_data;
+	ssize_t num_char = 0;
+
+	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
+			    bfqd->queued);
+
+	spin_lock_irq(&bfqd->lock);
+
+	num_char += sprintf(page + num_char, "Active:\n");
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
+		num_char += sprintf(page + num_char,
+				    "pid%d: weight %hu, nr_queued %d %d, ",
+				    bfqq->pid,
+				    bfqq->entity.weight,
+				    bfqq->queued[0],
+				    bfqq->queued[1]);
+		num_char += sprintf(page + num_char,
+				    "dur %d/%u\n",
+				    jiffies_to_msecs(
+					    jiffies -
+					    bfqq->last_wr_start_finish),
+				    jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	num_char += sprintf(page + num_char, "Idle:\n");
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
+		num_char += sprintf(page + num_char,
+				    "pid%d: weight %hu, dur %d/%u\n",
+				    bfqq->pid,
+				    bfqq->entity.weight,
+				    jiffies_to_msecs(jiffies -
+						     bfqq->last_wr_start_finish),
+				    jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	spin_unlock_irq(&bfqd->lock);
+
+	return num_char;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	u64 __data = __VAR;						\
+	if (__CONV == 1)						\
+		__data = jiffies_to_msecs(__data);			\
+	else if (__CONV == 2)						\
+		__data = div_u64(__data, NSEC_PER_MSEC);		\
+	return bfq_var_show(__data, (page));				\
+}
+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2);
+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2);
+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2);
+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
+SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1);
+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
+	1);
+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
+#undef SHOW_FUNCTION
+
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	u64 __data = __VAR;						\
+	__data = div_u64(__data, NSEC_PER_USEC);			\
+	return bfq_var_show(__data, (page));				\
+}
+USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle);
+#undef USEC_SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
+static ssize_t								\
+__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned long uninitialized_var(__data);			\
+	int ret = bfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	if (__CONV == 1)						\
+		*(__PTR) = msecs_to_jiffies(__data);			\
+	else if (__CONV == 2)						\
+		*(__PTR) = (u64)__data * NSEC_PER_MSEC;			\
+	else								\
+		*(__PTR) = __data;					\
+	return ret;							\
+}
+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
+		INT_MAX, 2);
+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
+		INT_MAX, 2);
+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+		INT_MAX, 0);
+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2);
+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
+		1);
+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store,
+		&bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
+		INT_MAX, 0);
+#undef STORE_FUNCTION
+
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)			\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned long uninitialized_var(__data);			\
+	int ret = bfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	*(__PTR) = (u64)__data * NSEC_PER_USEC;				\
+	return ret;							\
+}
+USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
+		    UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
+/* do nothing for the moment */
+static ssize_t bfq_weights_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	return count;
+}
+
+static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data == 0)
+		bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+	else {
+		if (__data > INT_MAX)
+			__data = INT_MAX;
+		bfqd->bfq_max_budget = __data;
+	}
+
+	bfqd->bfq_user_max_budget = __data;
+
+	return ret;
+}
+
+/*
+ * Leaving this name to preserve name compatibility with cfq
+ * parameters, but this timeout is used for both sync and async.
+ */
+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+				      const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data < 1)
+		__data = 1;
+	else if (__data > INT_MAX)
+		__data = INT_MAX;
+
+	bfqd->bfq_timeout = msecs_to_jiffies(__data);
+	if (bfqd->bfq_user_max_budget == 0)
+		bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+
+	return ret;
+}
+
+static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
+				     const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data > 1)
+		__data = 1;
+	if (!bfqd->strict_guarantees && __data == 1
+	    && bfqd->bfq_slice_idle < 8 * NSEC_PER_MSEC)
+		bfqd->bfq_slice_idle = 8 * NSEC_PER_MSEC;
+
+	bfqd->strict_guarantees = __data;
+
+	return ret;
+}
+
+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+				     const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data > 1)
+		__data = 1;
+	if (__data == 0 && bfqd->low_latency != 0)
+		bfq_end_wr(bfqd);
+	bfqd->low_latency = __data;
+
+	return ret;
+}
+
+#define BFQ_ATTR(name) \
+	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
+
+static struct elv_fs_entry bfq_attrs[] = {
+	BFQ_ATTR(fifo_expire_sync),
+	BFQ_ATTR(fifo_expire_async),
+	BFQ_ATTR(back_seek_max),
+	BFQ_ATTR(back_seek_penalty),
+	BFQ_ATTR(slice_idle),
+	BFQ_ATTR(slice_idle_us),
+	BFQ_ATTR(max_budget),
+	BFQ_ATTR(timeout_sync),
+	BFQ_ATTR(strict_guarantees),
+	BFQ_ATTR(low_latency),
+	BFQ_ATTR(wr_coeff),
+	BFQ_ATTR(wr_max_time),
+	BFQ_ATTR(wr_rt_max_time),
+	BFQ_ATTR(wr_min_idle_time),
+	BFQ_ATTR(wr_min_inter_arr_async),
+	BFQ_ATTR(wr_max_softrt_rate),
+	BFQ_ATTR(weights),
+	__ATTR_NULL
+};
+
+static struct elevator_type iosched_bfq_mq = {
+	.ops.mq = {
+		.limit_depth		= bfq_limit_depth,
+		.prepare_request        = bfq_prepare_request,
+		.requeue_request	= bfq_finish_requeue_request,
+		.finish_request         = bfq_finish_requeue_request,
+		.exit_icq		= bfq_exit_icq,
+		.insert_requests	= bfq_insert_requests,
+		.dispatch_request	= bfq_dispatch_request,
+		.next_request		= elv_rb_latter_request,
+		.former_request		= elv_rb_former_request,
+		.allow_merge		= bfq_allow_bio_merge,
+		.bio_merge		= bfq_bio_merge,
+		.request_merge		= bfq_request_merge,
+		.requests_merged	= bfq_requests_merged,
+		.request_merged		= bfq_request_merged,
+		.has_work		= bfq_has_work,
+		.init_sched		= bfq_init_queue,
+		.exit_sched		= bfq_exit_queue,
+	},
+
+	.uses_mq = 		true,
+	.icq_size =		sizeof(struct bfq_io_cq),
+	.icq_align =		__alignof__(struct bfq_io_cq),
+	.elevator_attrs =	bfq_attrs,
+	.elevator_name =	"bfq-mq",
+	.elevator_owner =	THIS_MODULE,
+};
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct blkcg_policy blkcg_policy_bfq = {
+	.dfl_cftypes		= bfq_blkg_files,
+	.legacy_cftypes		= bfq_blkcg_legacy_files,
+
+	.cpd_alloc_fn		= bfq_cpd_alloc,
+	.cpd_init_fn		= bfq_cpd_init,
+	.cpd_bind_fn	        = bfq_cpd_init,
+	.cpd_free_fn		= bfq_cpd_free,
+
+	.pd_alloc_fn		= bfq_pd_alloc,
+	.pd_init_fn		= bfq_pd_init,
+	.pd_offline_fn		= bfq_pd_offline,
+	.pd_free_fn		= bfq_pd_free,
+	.pd_reset_stats_fn	= bfq_pd_reset_stats,
+};
+#endif
+
+static int __init bfq_init(void)
+{
+	int ret;
+	char msg[60] = "BFQ I/O-scheduler: v8r12";
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	ret = blkcg_policy_register(&blkcg_policy_bfq);
+	if (ret)
+		return ret;
+#endif
+
+	ret = -ENOMEM;
+	if (bfq_slab_setup())
+		goto err_pol_unreg;
+
+	/*
+	 * Times to load large popular applications for the typical
+	 * systems installed on the reference devices (see the
+	 * comments before the definitions of the next two
+	 * arrays). Actually, we use slightly slower values, as the
+	 * estimated peak rate tends to be smaller than the actual
+	 * peak rate.  The reason for this last fact is that estimates
+	 * are computed over much shorter time intervals than the long
+	 * intervals typically used for benchmarking. Why? First, to
+	 * adapt more quickly to variations. Second, because an I/O
+	 * scheduler cannot rely on a peak-rate-evaluation workload to
+	 * be run for a long time.
+	 */
+	T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
+	T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
+	T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+	T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
+
+	/*
+	 * Thresholds that determine the switch between speed classes
+	 * (see the comments before the definition of the array
+	 * device_speed_thresh). These thresholds are biased towards
+	 * transitions to the fast class. This is safer than the
+	 * opposite bias. In fact, a wrong transition to the slow
+	 * class results in short weight-raising periods, because the
+	 * speed of the device then tends to be higher that the
+	 * reference peak rate. On the opposite end, a wrong
+	 * transition to the fast class tends to increase
+	 * weight-raising periods, because of the opposite reason.
+	 */
+	device_speed_thresh[0] = (4 * R_slow[0]) / 3;
+	device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+
+	ret = elv_register(&iosched_bfq_mq);
+	if (ret)
+		goto slab_kill;
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	strcat(msg, " (with cgroups support)");
+#endif
+	pr_info("%s", msg);
+
+	return 0;
+
+slab_kill:
+	bfq_slab_kill();
+err_pol_unreg:
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+	return ret;
+}
+
+static void __exit bfq_exit(void)
+{
+	elv_unregister(&iosched_bfq_mq);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+	bfq_slab_kill();
+}
+
+module_init(bfq_init);
+module_exit(bfq_exit);
+
+MODULE_AUTHOR("Paolo Valente");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MQ Budget Fair Queueing I/O Scheduler");
diff --git a/block/bfq-mq.h b/block/bfq-mq.h
new file mode 100644
index 000000000..4a54e5076
--- /dev/null
+++ b/block/bfq-mq.h
@@ -0,0 +1,1011 @@
+/*
+ * BFQ v8r12 for 4.11.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
+ */
+
+#ifndef _BFQ_H
+#define _BFQ_H
+
+#include <linux/hrtimer.h>
+#include <linux/blk-cgroup.h>
+
+/* see comments on CONFIG_BFQ_GROUP_IOSCHED in bfq.h */
+#ifdef CONFIG_MQ_BFQ_GROUP_IOSCHED
+#define BFQ_GROUP_IOSCHED_ENABLED
+#endif
+
+#define BFQ_IOPRIO_CLASSES	3
+#define BFQ_CL_IDLE_TIMEOUT	(HZ/5)
+
+#define BFQ_MIN_WEIGHT			1
+#define BFQ_MAX_WEIGHT			1000
+#define BFQ_WEIGHT_CONVERSION_COEFF	10
+
+#define BFQ_DEFAULT_QUEUE_IOPRIO	4
+
+#define BFQ_WEIGHT_LEGACY_DFL	100
+#define BFQ_DEFAULT_GRP_IOPRIO	0
+#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
+
+/*
+ * Soft real-time applications are extremely more latency sensitive
+ * than interactive ones. Over-raise the weight of the former to
+ * privilege them against the latter.
+ */
+#define BFQ_SOFTRT_WEIGHT_FACTOR	100
+
+struct bfq_entity;
+
+/**
+ * struct bfq_service_tree - per ioprio_class service tree.
+ *
+ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
+ * ioprio_class has its own independent scheduler, and so its own
+ * bfq_service_tree.  All the fields are protected by the queue lock
+ * of the containing bfqd.
+ */
+struct bfq_service_tree {
+	/* tree for active entities (i.e., those backlogged) */
+	struct rb_root active;
+	/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+	struct rb_root idle;
+
+	struct bfq_entity *first_idle;	/* idle entity with minimum F_i */
+	struct bfq_entity *last_idle;	/* idle entity with maximum F_i */
+
+	u64 vtime; /* scheduler virtual time */
+	/* scheduler weight sum; active and idle entities contribute to it */
+	unsigned long wsum;
+};
+
+/**
+ * struct bfq_sched_data - multi-class scheduler.
+ *
+ * bfq_sched_data is the basic scheduler queue.  It supports three
+ * ioprio_classes, and can be used either as a toplevel queue or as an
+ * intermediate queue in a hierarchical setup.
+ *
+ * The supported ioprio_classes are the same as in CFQ, in descending
+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+ * Requests from higher priority queues are served before all the
+ * requests from lower priority queues; among requests of the same
+ * queue requests are served according to B-WF2Q+.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All the fields are protected by the queue lock of the containing
+ * bfqd.
+ */
+struct bfq_sched_data {
+	struct bfq_entity *in_service_entity;  /* entity in service */
+	/* head-of-the-line entity in the scheduler (see comments above) */
+	struct bfq_entity *next_in_service;
+	/* array of service trees, one per ioprio_class */
+	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+	/* last time CLASS_IDLE was served */
+	unsigned long bfq_class_idle_last_service;
+
+};
+
+/**
+ * struct bfq_weight_counter - counter of the number of all active entities
+ *                             with a given weight.
+ */
+struct bfq_weight_counter {
+	unsigned int weight; /* weight of the entities this counter refers to */
+	unsigned int num_active; /* nr of active entities with this weight */
+	/*
+	 * Weights tree member (see bfq_data's @queue_weights_tree and
+	 * @group_weights_tree)
+	 */
+	struct rb_node weights_node;
+};
+
+/**
+ * struct bfq_entity - schedulable entity.
+ *
+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
+ * entity belongs to the sched_data of the parent group in the cgroup
+ * hierarchy.  Non-leaf entities have also their own sched_data, stored
+ * in @my_sched_data.
+ *
+ * Each entity stores independently its priority values; this would
+ * allow different weights on different devices, but this
+ * functionality is not exported to userspace by now.  Priorities and
+ * weights are updated lazily, first storing the new values into the
+ * new_* fields, then setting the @prio_changed flag.  As soon as
+ * there is a transition in the entity state that allows the priority
+ * update to take place the effective and the requested priority
+ * values are synchronized.
+ *
+ * Unless cgroups are used, the weight value is calculated from the
+ * ioprio to export the same interface as CFQ.  When dealing with
+ * ``well-behaved'' queues (i.e., queues that do not spend too much
+ * time to consume their budget and have true sequential behavior, and
+ * when there are no external factors breaking anticipation) the
+ * relative weights at each level of the cgroups hierarchy should be
+ * guaranteed.  All the fields are protected by the queue lock of the
+ * containing bfqd.
+ */
+struct bfq_entity {
+	struct rb_node rb_node; /* service_tree member */
+	/* pointer to the weight counter associated with this entity */
+	struct bfq_weight_counter *weight_counter;
+
+	/*
+	 * Flag, true if the entity is on a tree (either the active or
+	 * the idle one of its service_tree) or is in service.
+	 */
+	bool on_st;
+
+	u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */
+	u64 start;  /* B-WF2Q+ start timestamp (aka S_i) */
+
+	/* tree the entity is enqueued into; %NULL if not on a tree */
+	struct rb_root *tree;
+
+	/*
+	 * minimum start time of the (active) subtree rooted at this
+	 * entity; used for O(log N) lookups into active trees
+	 */
+	u64 min_start;
+
+	/* amount of service received during the last service slot */
+	int service;
+
+	/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
+	int budget;
+
+	unsigned int weight;	 /* weight of the queue */
+	unsigned int new_weight; /* next weight if a change is in progress */
+
+	/* original weight, used to implement weight boosting */
+	unsigned int orig_weight;
+
+	/* parent entity, for hierarchical scheduling */
+	struct bfq_entity *parent;
+
+	/*
+	 * For non-leaf nodes in the hierarchy, the associated
+	 * scheduler queue, %NULL on leaf nodes.
+	 */
+	struct bfq_sched_data *my_sched_data;
+	/* the scheduler queue this entity belongs to */
+	struct bfq_sched_data *sched_data;
+
+	/* flag, set to request a weight, ioprio or ioprio_class change  */
+	int prio_changed;
+};
+
+struct bfq_group;
+
+/**
+ * struct bfq_ttime - per process thinktime stats.
+ */
+struct bfq_ttime {
+	u64 last_end_request; /* completion time of last request */
+
+	u64 ttime_total; /* total process thinktime */
+	unsigned long ttime_samples; /* number of thinktime samples */
+	u64 ttime_mean; /* average process thinktime */
+
+};
+
+/**
+ * struct bfq_queue - leaf schedulable entity.
+ *
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it  is  async or shared  between  cooperating
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
+ * does not disappear while a bfqq still references it (mostly to avoid
+ * races between request issuing and task migration followed by cgroup
+ * destruction).
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_queue {
+	/* reference counter */
+	int ref;
+	/* parent bfq_data */
+	struct bfq_data *bfqd;
+
+	/* current ioprio and ioprio class */
+	unsigned short ioprio, ioprio_class;
+	/* next ioprio and ioprio class if a change is in progress */
+	unsigned short new_ioprio, new_ioprio_class;
+
+	/*
+	 * Shared bfq_queue if queue is cooperating with one or more
+	 * other queues.
+	 */
+	struct bfq_queue *new_bfqq;
+	/* request-position tree member (see bfq_group's @rq_pos_tree) */
+	struct rb_node pos_node;
+	/* request-position tree root (see bfq_group's @rq_pos_tree) */
+	struct rb_root *pos_root;
+
+	/* sorted list of pending requests */
+	struct rb_root sort_list;
+	/* if fifo isn't expired, next request to serve */
+	struct request *next_rq;
+	/* number of sync and async requests queued */
+	int queued[2];
+	/* number of requests currently allocated */
+	int allocated;
+	/* number of pending metadata requests */
+	int meta_pending;
+	/* fifo list of requests in sort_list */
+	struct list_head fifo;
+
+	/* entity representing this queue in the scheduler */
+	struct bfq_entity entity;
+
+	/* maximum budget allowed from the feedback mechanism */
+	int max_budget;
+	/* budget expiration (in jiffies) */
+	unsigned long budget_timeout;
+
+	/* number of requests on the dispatch list or inside driver */
+	int dispatched;
+
+	unsigned int flags; /* status flags.*/
+
+	/* node for active/idle bfqq list inside parent bfqd */
+	struct list_head bfqq_list;
+
+	/* associated @bfq_ttime struct */
+	struct bfq_ttime ttime;
+
+	/* bit vector: a 1 for each seeky requests in history */
+	u32 seek_history;
+
+	/* node for the device's burst list */
+	struct hlist_node burst_list_node;
+
+	/* position of the last request enqueued */
+	sector_t last_request_pos;
+
+	/* Number of consecutive pairs of request completion and
+	 * arrival, such that the queue becomes idle after the
+	 * completion, but the next request arrives within an idle
+	 * time slice; used only if the queue's IO_bound flag has been
+	 * cleared.
+	 */
+	unsigned int requests_within_timer;
+
+	/* pid of the process owning the queue, used for logging purposes */
+	pid_t pid;
+
+	/*
+	 * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
+	 * if the queue is shared.
+	 */
+	struct bfq_io_cq *bic;
+
+	/* current maximum weight-raising time for this queue */
+	unsigned long wr_cur_max_time;
+	/*
+	 * Minimum time instant such that, only if a new request is
+	 * enqueued after this time instant in an idle @bfq_queue with
+	 * no outstanding requests, then the task associated with the
+	 * queue it is deemed as soft real-time (see the comments on
+	 * the function bfq_bfqq_softrt_next_start())
+	 */
+	unsigned long soft_rt_next_start;
+	/*
+	 * Start time of the current weight-raising period if
+	 * the @bfq-queue is being weight-raised, otherwise
+	 * finish time of the last weight-raising period.
+	 */
+	unsigned long last_wr_start_finish;
+	/* factor by which the weight of this queue is multiplied */
+	unsigned int wr_coeff;
+	/*
+	 * Time of the last transition of the @bfq_queue from idle to
+	 * backlogged.
+	 */
+	unsigned long last_idle_bklogged;
+	/*
+	 * Cumulative service received from the @bfq_queue since the
+	 * last transition from idle to backlogged.
+	 */
+	unsigned long service_from_backlogged;
+	/*
+	 * Cumulative service received from the @bfq_queue since its
+	 * last transition to weight-raised state.
+	 */
+	unsigned long service_from_wr;
+	/*
+	 * Value of wr start time when switching to soft rt
+	 */
+	unsigned long wr_start_at_switch_to_srt;
+
+	unsigned long split_time; /* time of last split */
+	unsigned long first_IO_time; /* time of first I/O for this queue */
+};
+
+/**
+ * struct bfq_io_cq - per (request_queue, io_context) structure.
+ */
+struct bfq_io_cq {
+	/* associated io_cq structure */
+	struct io_cq icq; /* must be the first member */
+	/* array of two process queues, the sync and the async */
+	struct bfq_queue *bfqq[2];
+	/* per (request_queue, blkcg) ioprio */
+	int ioprio;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	uint64_t blkcg_serial_nr; /* the current blkcg serial */
+#endif
+
+	/*
+	 * Snapshot of the has_short_time flag before merging; taken
+	 * to remember its value while the queue is merged, so as to
+	 * be able to restore it in case of split.
+	 */
+	bool saved_has_short_ttime;
+	/*
+	 * Same purpose as the previous two fields for the I/O bound
+	 * classification of a queue.
+	 */
+	bool saved_IO_bound;
+
+	/*
+	 * Same purpose as the previous fields for the value of the
+	 * field keeping the queue's belonging to a large burst
+	 */
+	bool saved_in_large_burst;
+	/*
+	 * True if the queue belonged to a burst list before its merge
+	 * with another cooperating queue.
+	 */
+	bool was_in_burst_list;
+
+	/*
+	 * Similar to previous fields: save wr information.
+	 */
+	unsigned long saved_wr_coeff;
+	unsigned long saved_last_wr_start_finish;
+	unsigned long saved_wr_start_at_switch_to_srt;
+	unsigned int saved_wr_cur_max_time;
+	struct bfq_ttime saved_ttime;
+};
+
+enum bfq_device_speed {
+	BFQ_BFQD_FAST,
+	BFQ_BFQD_SLOW,
+};
+
+/**
+ * struct bfq_data - per-device data structure.
+ *
+ * All the fields are protected by @lock.
+ */
+struct bfq_data {
+	/* device request queue */
+	struct request_queue *queue;
+	/* dispatch queue */
+	struct list_head dispatch;
+
+	/* root bfq_group for the device */
+	struct bfq_group *root_group;
+
+	/*
+	 * rbtree of weight counters of @bfq_queues, sorted by
+	 * weight. Used to keep track of whether all @bfq_queues have
+	 * the same weight. The tree contains one counter for each
+	 * distinct weight associated to some active and not
+	 * weight-raised @bfq_queue (see the comments to the functions
+	 * bfq_weights_tree_[add|remove] for further details).
+	 */
+	struct rb_root queue_weights_tree;
+	/*
+	 * rbtree of non-queue @bfq_entity weight counters, sorted by
+	 * weight. Used to keep track of whether all @bfq_groups have
+	 * the same weight. The tree contains one counter for each
+	 * distinct weight associated to some active @bfq_group (see
+	 * the comments to the functions bfq_weights_tree_[add|remove]
+	 * for further details).
+	 */
+	struct rb_root group_weights_tree;
+
+	/*
+	 * Number of bfq_queues containing requests (including the
+	 * queue in service, even if it is idling).
+	 */
+	int busy_queues;
+	/* number of weight-raised busy @bfq_queues */
+	int wr_busy_queues;
+	/* number of queued requests */
+	int queued;
+	/* number of requests dispatched and waiting for completion */
+	int rq_in_driver;
+
+	/*
+	 * Maximum number of requests in driver in the last
+	 * @hw_tag_samples completed requests.
+	 */
+	int max_rq_in_driver;
+	/* number of samples used to calculate hw_tag */
+	int hw_tag_samples;
+	/* flag set to one if the driver is showing a queueing behavior */
+	int hw_tag;
+
+	/* number of budgets assigned */
+	int budgets_assigned;
+
+	/*
+	 * Timer set when idling (waiting) for the next request from
+	 * the queue in service.
+	 */
+	struct hrtimer idle_slice_timer;
+
+	/* bfq_queue in service */
+	struct bfq_queue *in_service_queue;
+
+	/* on-disk position of the last served request */
+	sector_t last_position;
+
+	/* time of last request completion (ns) */
+	u64 last_completion;
+
+	/* time of first rq dispatch in current observation interval (ns) */
+	u64 first_dispatch;
+	/* time of last rq dispatch in current observation interval (ns) */
+	u64 last_dispatch;
+
+	/* beginning of the last budget */
+	ktime_t last_budget_start;
+	/* beginning of the last idle slice */
+	ktime_t last_idling_start;
+
+	/* number of samples in current observation interval */
+	int peak_rate_samples;
+	/* num of samples of seq dispatches in current observation interval */
+	u32 sequential_samples;
+	/* total num of sectors transferred in current observation interval */
+	u64 tot_sectors_dispatched;
+	/* max rq size seen during current observation interval (sectors) */
+	u32 last_rq_max_size;
+	/* time elapsed from first dispatch in current observ. interval (us) */
+	u64 delta_from_first;
+	/*
+	 * Current estimate of the device peak rate, measured in
+	 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
+	 * BFQ_RATE_SHIFT is performed to increase precision in
+	 * fixed-point calculations.
+	 */
+	u32 peak_rate;
+
+	/* maximum budget allotted to a bfq_queue before rescheduling */
+	int bfq_max_budget;
+
+	/* list of all the bfq_queues active on the device */
+	struct list_head active_list;
+	/* list of all the bfq_queues idle on the device */
+	struct list_head idle_list;
+
+	/*
+	 * Timeout for async/sync requests; when it fires, requests
+	 * are served in fifo order.
+	 */
+	u64 bfq_fifo_expire[2];
+	/* weight of backward seeks wrt forward ones */
+	unsigned int bfq_back_penalty;
+	/* maximum allowed backward seek */
+	unsigned int bfq_back_max;
+	/* maximum idling time */
+	u32 bfq_slice_idle;
+
+	/* user-configured max budget value (0 for auto-tuning) */
+	int bfq_user_max_budget;
+	/*
+	 * Timeout for bfq_queues to consume their budget; used to
+	 * prevent seeky queues from imposing long latencies to
+	 * sequential or quasi-sequential ones (this also implies that
+	 * seeky queues cannot receive guarantees in the service
+	 * domain; after a timeout they are charged for the time they
+	 * have been in service, to preserve fairness among them, but
+	 * without service-domain guarantees).
+	 */
+	unsigned int bfq_timeout;
+
+	/*
+	 * Number of consecutive requests that must be issued within
+	 * the idle time slice to set again idling to a queue which
+	 * was marked as non-I/O-bound (see the definition of the
+	 * IO_bound flag for further details).
+	 */
+	unsigned int bfq_requests_within_timer;
+
+	/*
+	 * Force device idling whenever needed to provide accurate
+	 * service guarantees, without caring about throughput
+	 * issues. CAVEAT: this may even increase latencies, in case
+	 * of useless idling for processes that did stop doing I/O.
+	 */
+	bool strict_guarantees;
+
+	/*
+	 * Last time at which a queue entered the current burst of
+	 * queues being activated shortly after each other; for more
+	 * details about this and the following parameters related to
+	 * a burst of activations, see the comments on the function
+	 * bfq_handle_burst.
+	 */
+	unsigned long last_ins_in_burst;
+	/*
+	 * Reference time interval used to decide whether a queue has
+	 * been activated shortly after @last_ins_in_burst.
+	 */
+	unsigned long bfq_burst_interval;
+	/* number of queues in the current burst of queue activations */
+	int burst_size;
+
+	/* common parent entity for the queues in the burst */
+	struct bfq_entity *burst_parent_entity;
+	/* Maximum burst size above which the current queue-activation
+	 * burst is deemed as 'large'.
+	 */
+	unsigned long bfq_large_burst_thresh;
+	/* true if a large queue-activation burst is in progress */
+	bool large_burst;
+	/*
+	 * Head of the burst list (as for the above fields, more
+	 * details in the comments on the function bfq_handle_burst).
+	 */
+	struct hlist_head burst_list;
+
+	/* if set to true, low-latency heuristics are enabled */
+	bool low_latency;
+	/*
+	 * Maximum factor by which the weight of a weight-raised queue
+	 * is multiplied.
+	 */
+	unsigned int bfq_wr_coeff;
+	/* maximum duration of a weight-raising period (jiffies) */
+	unsigned int bfq_wr_max_time;
+
+	/* Maximum weight-raising duration for soft real-time processes */
+	unsigned int bfq_wr_rt_max_time;
+	/*
+	 * Minimum idle period after which weight-raising may be
+	 * reactivated for a queue (in jiffies).
+	 */
+	unsigned int bfq_wr_min_idle_time;
+	/*
+	 * Minimum period between request arrivals after which
+	 * weight-raising may be reactivated for an already busy async
+	 * queue (in jiffies).
+	 */
+	unsigned long bfq_wr_min_inter_arr_async;
+
+	/* Max service-rate for a soft real-time queue, in sectors/sec */
+	unsigned int bfq_wr_max_softrt_rate;
+	/*
+	 * Cached value of the product R*T, used for computing the
+	 * maximum duration of weight raising automatically.
+	 */
+	u64 RT_prod;
+	/* device-speed class for the low-latency heuristic */
+	enum bfq_device_speed device_speed;
+
+	/* fallback dummy bfqq for extreme OOM conditions */
+	struct bfq_queue oom_bfqq;
+
+	spinlock_t lock;
+
+	/*
+	 * bic associated with the task issuing current bio for
+	 * merging. This and the next field are used as a support to
+	 * be able to perform the bic lookup, needed by bio-merge
+	 * functions, before the scheduler lock is taken, and thus
+	 * avoid taking the request-queue lock while the scheduler
+	 * lock is being held.
+	 */
+	struct bfq_io_cq *bio_bic;
+	/* bfqq associated with the task issuing current bio for merging */
+	struct bfq_queue *bio_bfqq;
+	/* Extra flag used only for TESTING */
+	bool bio_bfqq_set;
+
+	/*
+	 * Cached sbitmap shift, used to compute depth limits in
+	 * bfq_update_depths.
+	 */
+	unsigned int sb_shift;
+
+	/*
+	 * Depth limits used in bfq_limit_depth (see comments on the
+	 * function)
+	 */
+	unsigned int word_depths[2][2];
+};
+
+enum bfqq_state_flags {
+	BFQ_BFQQ_FLAG_just_created = 0,	/* queue just allocated */
+	BFQ_BFQQ_FLAG_busy,		/* has requests or is in service */
+	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
+	BFQ_BFQQ_FLAG_non_blocking_wait_rq, /*
+					     * waiting for a request
+					     * without idling the device
+					     */
+	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
+	BFQ_BFQQ_FLAG_has_short_ttime,	/* queue has a short think time */
+	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
+	BFQ_BFQQ_FLAG_IO_bound,		/*
+					 * bfqq has timed-out at least once
+					 * having consumed at most 2/10 of
+					 * its budget
+					 */
+	BFQ_BFQQ_FLAG_in_large_burst,	/*
+					 * bfqq activated in a large burst,
+					 * see comments to bfq_handle_burst.
+					 */
+	BFQ_BFQQ_FLAG_softrt_update,	/*
+					 * may need softrt-next-start
+					 * update
+					 */
+	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
+	BFQ_BFQQ_FLAG_split_coop	/* shared bfqq will be split */
+};
+
+#define BFQ_BFQQ_FNS(name)						\
+static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
+{									\
+	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)		\
+{									\
+	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
+{									\
+	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
+}
+
+BFQ_BFQQ_FNS(just_created);
+BFQ_BFQQ_FNS(busy);
+BFQ_BFQQ_FNS(wait_request);
+BFQ_BFQQ_FNS(non_blocking_wait_rq);
+BFQ_BFQQ_FNS(fifo_expire);
+BFQ_BFQQ_FNS(has_short_ttime);
+BFQ_BFQQ_FNS(sync);
+BFQ_BFQQ_FNS(IO_bound);
+BFQ_BFQQ_FNS(in_large_burst);
+BFQ_BFQQ_FNS(coop);
+BFQ_BFQQ_FNS(split_coop);
+BFQ_BFQQ_FNS(softrt_update);
+#undef BFQ_BFQQ_FNS
+
+/* Logging facilities. */
+#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE
+
+static const char *checked_dev_name(const struct device *dev)
+{
+	static const char nodev[] = "nodev";
+
+	if (dev)
+		return dev_name(dev);
+
+	return nodev;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
+	pr_crit("%s bfq%d%c %s [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		(bfqq)->pid,						\
+		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',			\
+		bfqq_group(bfqq)->blkg_path, __func__, ##args);		\
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
+	pr_crit("%s %s [%s] " fmt "\n",					\
+	checked_dev_name((bfqd)->queue->backing_dev_info->dev),		\
+	bfqg->blkg_path, __func__, ##args);				\
+} while (0)
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)				\
+	pr_crit("%s bfq%d%c [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		(bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
+		__func__, ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log(bfqd, fmt, args...) \
+	pr_crit("%s bfq [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		__func__, ##args)
+
+#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+
+#if !defined(CONFIG_BLK_DEV_IO_TRACE)
+
+/* Avoid possible "unused-variable" warning. See commit message. */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	((void) (bfqq))
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	((void) (bfqg))
+
+#define bfq_log(bfqd, fmt, args...)		do {} while (0)
+
+#else /* CONFIG_BLK_DEV_IO_TRACE */
+
+#include <linux/blktrace_api.h>
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \
+			  (bfqq)->pid,			  \
+			  bfq_bfqq_sync((bfqq)) ? 'S' : 'A',	\
+			  bfqq_group(bfqq)->blkg_path, __func__, ##args); \
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
+	blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, bfqg->blkg_path, \
+	__func__, ##args);\
+} while (0)
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	\
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \
+			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
+				__func__, ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log(bfqd, fmt, args...) \
+	blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args)
+
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+
+/* Expiration reasons. */
+enum bfqq_expiration {
+	BFQ_BFQQ_TOO_IDLE = 0,		/*
+					 * queue has been idling for
+					 * too long
+					 */
+	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
+	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
+	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
+	BFQ_BFQQ_PREEMPTED		/* preemption in progress */
+};
+
+
+struct bfqg_stats {
+#if defined(BFQ_GROUP_IOSCHED_ENABLED) &&  defined(CONFIG_DEBUG_BLK_CGROUP)
+	/* number of ios merged */
+	struct blkg_rwstat		merged;
+	/* total time spent on device in ns, may not be accurate w/ queueing */
+	struct blkg_rwstat		service_time;
+	/* total time spent waiting in scheduler queue in ns */
+	struct blkg_rwstat		wait_time;
+	/* number of IOs queued up */
+	struct blkg_rwstat		queued;
+	/* total disk time and nr sectors dispatched by this group */
+	struct blkg_stat		time;
+	/* sum of number of ios queued across all samples */
+	struct blkg_stat		avg_queue_size_sum;
+	/* count of samples taken for average */
+	struct blkg_stat		avg_queue_size_samples;
+	/* how many times this group has been removed from service tree */
+	struct blkg_stat		dequeue;
+	/* total time spent waiting for it to be assigned a timeslice. */
+	struct blkg_stat		group_wait_time;
+	/* time spent idling for this blkcg_gq */
+	struct blkg_stat		idle_time;
+	/* total time with empty current active q with other requests queued */
+	struct blkg_stat		empty_time;
+	/* fields after this shouldn't be cleared on stat reset */
+	uint64_t			start_group_wait_time;
+	uint64_t			start_idle_time;
+	uint64_t			start_empty_time;
+	uint16_t			flags;
+#endif /* BFQ_GROUP_IOSCHED_ENABLED && CONFIG_DEBUG_BLK_CGROUP */
+};
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+/*
+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+ *
+ * @ps: @blkcg_policy_storage that this structure inherits
+ * @weight: weight of the bfq_group
+ */
+struct bfq_group_data {
+	/* must be the first member */
+	struct blkcg_policy_data pd;
+
+	unsigned int weight;
+};
+
+/**
+ * struct bfq_group - per (device, cgroup) data structure.
+ * @entity: schedulable entity to insert into the parent group sched_data.
+ * @sched_data: own sched_data, to contain child entities (they may be
+ *              both bfq_queues and bfq_groups).
+ * @bfqd: the bfq_data for the device this group acts upon.
+ * @async_bfqq: array of async queues for all the tasks belonging to
+ *              the group, one queue per ioprio value per ioprio_class,
+ *              except for the idle class that has only one queue.
+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
+ *             to avoid too many special cases during group creation/
+ *             migration.
+ * @active_entities: number of active entities belonging to the group;
+ *                   unused for the root group. Used to know whether there
+ *                   are groups with more than one active @bfq_entity
+ *                   (see the comments to the function
+ *                   bfq_bfqq_may_idle()).
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ *               determining if two or more queues have interleaving
+ *               requests (see bfq_find_close_cooperator()).
+ *
+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+ * there is a set of bfq_groups, each one collecting the lower-level
+ * entities belonging to the group that are acting on the same device.
+ *
+ * Locking works as follows:
+ *    o @bfqd is protected by the queue lock, RCU is used to access it
+ *      from the readers.
+ *    o All the other fields are protected by the @bfqd queue lock.
+ */
+struct bfq_group {
+	/* must be the first member */
+	struct blkg_policy_data pd;
+
+	/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
+	char blkg_path[128];
+
+	/* reference counter (see comments in bfq_bic_update_cgroup) */
+	int ref;
+
+	struct bfq_entity entity;
+	struct bfq_sched_data sched_data;
+
+	void *bfqd;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+
+	struct bfq_entity *my_entity;
+
+	int active_entities;
+
+	struct rb_root rq_pos_tree;
+
+	struct bfqg_stats stats;
+};
+
+#else
+struct bfq_group {
+	struct bfq_sched_data sched_data;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+
+	struct rb_root rq_pos_tree;
+};
+#endif
+
+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+
+static unsigned int bfq_class_idx(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	return bfqq ? bfqq->ioprio_class - 1 :
+		BFQ_DEFAULT_GRP_CLASS - 1;
+}
+
+static struct bfq_service_tree *
+bfq_entity_service_tree(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sched_data = entity->sched_data;
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	unsigned int idx = bfq_class_idx(entity);
+
+	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+	BUG_ON(sched_data == NULL);
+
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "%p %d",
+			     sched_data->service_tree + idx, idx);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "%p %d",
+			     sched_data->service_tree + idx, idx);
+	}
+#endif
+	return sched_data->service_tree + idx;
+}
+
+static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
+{
+	return bic->bfqq[is_sync];
+}
+
+static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq,
+			 bool is_sync)
+{
+	bic->bfqq[is_sync] = bfqq;
+}
+
+static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+{
+	return bic->icq.q->elevator->elevator_data;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *group_entity = bfqq->entity.parent;
+
+	if (!group_entity)
+		group_entity = &bfqq->bfqd->root_group->entity;
+
+	return container_of(group_entity, struct bfq_group, entity);
+}
+
+#else
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+	return bfqq->bfqd->root_group;
+}
+
+#endif
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+static void bfq_put_queue(struct bfq_queue *bfqq);
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bio *bio, bool is_sync,
+				       struct bfq_io_cq *bic);
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+#endif
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
+#endif /* _BFQ_H */
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
new file mode 100644
index 000000000..ead34c30a
--- /dev/null
+++ b/block/bfq-sched.c
@@ -0,0 +1,2066 @@
+/*
+ * BFQ: Hierarchical B-WF2Q+ scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
+ */
+
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+
+/**
+ * bfq_gt - compare two timestamps.
+ * @a: first ts.
+ * @b: second ts.
+ *
+ * Return @a > @b, dealing with wrapping correctly.
+ */
+static int bfq_gt(u64 a, u64 b)
+{
+	return (s64)(a - b) > 0;
+}
+
+static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
+{
+	struct rb_node *node = tree->rb_node;
+
+	return rb_entry(node, struct bfq_entity, rb_node);
+}
+
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+						 bool expiration);
+
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
+
+/**
+ * bfq_update_next_in_service - update sd->next_in_service
+ * @sd: sched_data for which to perform the update.
+ * @new_entity: if not NULL, pointer to the entity whose activation,
+ *		requeueing or repositionig triggered the invocation of
+ *		this function.
+ * @expiration: id true, this function is being invoked after the
+ *		expiration of the in-service entity
+ *
+ * This function is called to update sd->next_in_service, which, in
+ * its turn, may change as a consequence of the insertion or
+ * extraction of an entity into/from one of the active trees of
+ * sd. These insertions/extractions occur as a consequence of
+ * activations/deactivations of entities, with some activations being
+ * 'true' activations, and other activations being requeueings (i.e.,
+ * implementing the second, requeueing phase of the mechanism used to
+ * reposition an entity in its active tree; see comments on
+ * __bfq_activate_entity and __bfq_requeue_entity for details). In
+ * both the last two activation sub-cases, new_entity points to the
+ * just activated or requeued entity.
+ *
+ * Returns true if sd->next_in_service changes in such a way that
+ * entity->parent may become the next_in_service for its parent
+ * entity.
+ */
+static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
+				       struct bfq_entity *new_entity,
+				       bool expiration)
+{
+	struct bfq_entity *next_in_service = sd->next_in_service;
+	struct bfq_queue *bfqq;
+	bool parent_sched_may_change = false;
+	bool change_without_lookup = false;
+
+	/*
+	 * If this update is triggered by the activation, requeueing
+	 * or repositiong of an entity that does not coincide with
+	 * sd->next_in_service, then a full lookup in the active tree
+	 * can be avoided. In fact, it is enough to check whether the
+	 * just-modified entity has the same priority as
+	 * sd->next_in_service, is eligible and has a lower virtual
+	 * finish time than sd->next_in_service. If this compound
+	 * condition holds, then the new entity becomes the new
+	 * next_in_service. Otherwise no change is needed.
+	 */
+	if (new_entity && new_entity != sd->next_in_service) {
+		/*
+		 * Flag used to decide whether to replace
+		 * sd->next_in_service with new_entity. Tentatively
+		 * set to true, and left as true if
+		 * sd->next_in_service is NULL.
+		 */
+		change_without_lookup = true;
+
+		/*
+		 * If there is already a next_in_service candidate
+		 * entity, then compare timestamps to decide whether
+		 * to replace sd->service_tree with new_entity.
+		 */
+		if (next_in_service) {
+			unsigned int new_entity_class_idx =
+				bfq_class_idx(new_entity);
+			struct bfq_service_tree *st =
+				sd->service_tree + new_entity_class_idx;
+
+			change_without_lookup =
+				(new_entity_class_idx ==
+				 bfq_class_idx(next_in_service)
+				 &&
+				 !bfq_gt(new_entity->start, st->vtime)
+				 &&
+				 bfq_gt(next_in_service->finish,
+					new_entity->finish));
+		}
+
+		if (change_without_lookup) {
+			next_in_service = new_entity;
+			bfqq = bfq_entity_to_bfqq(next_in_service);
+
+			if (bfqq)
+				bfq_log_bfqq(bfqq->bfqd, bfqq,
+				"chose without lookup");
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+			else {
+				struct bfq_group *bfqg =
+					container_of(next_in_service,
+						     struct bfq_group, entity);
+
+				bfq_log_bfqg((struct bfq_data*)bfqg->bfqd, bfqg,
+				"chose without lookup");
+			}
+#endif
+		}
+	}
+
+	if (!change_without_lookup) /* lookup needed */
+		next_in_service = bfq_lookup_next_entity(sd, expiration);
+
+	if (next_in_service)
+		parent_sched_may_change = !sd->next_in_service ||
+			bfq_update_parent_budget(next_in_service);
+
+	sd->next_in_service = next_in_service;
+
+	if (!next_in_service)
+		return parent_sched_may_change;
+
+	bfqq = bfq_entity_to_bfqq(next_in_service);
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "chosen this queue");
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else {
+		struct bfq_group *bfqg =
+			container_of(next_in_service,
+				     struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "chosen this entity");
+	}
+#endif
+	return parent_sched_may_change;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+/* both next loops stop at one of the child entities of the root group */
+#define for_each_entity(entity)				\
+	for (; entity ; entity = entity->parent)
+
+/*
+ * For each iteration, compute parent in advance, so as to be safe if
+ * entity is deallocated during the iteration. Such a deallocation may
+ * happen as a consequence of a bfq_put_queue that frees the bfq_queue
+ * containing entity.
+ */
+#define for_each_entity_safe(entity, parent)				\
+	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+/*
+ * Returns true if this budget changes may let next_in_service->parent
+ * become the next_in_service entity for its parent entity.
+ */
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
+{
+	struct bfq_entity *bfqg_entity;
+	struct bfq_group *bfqg;
+	struct bfq_sched_data *group_sd;
+	bool ret = false;
+
+	BUG_ON(!next_in_service);
+
+	group_sd = next_in_service->sched_data;
+
+	bfqg = container_of(group_sd, struct bfq_group, sched_data);
+	/*
+	 * bfq_group's my_entity field is not NULL only if the group
+	 * is not the root group. We must not touch the root entity
+	 * as it must never become an in-service entity.
+	 */
+	bfqg_entity = bfqg->my_entity;
+	if (bfqg_entity) {
+		if (bfqg_entity->budget > next_in_service->budget)
+			ret = true;
+		bfqg_entity->budget = next_in_service->budget;
+	}
+
+	return ret;
+}
+
+/*
+ * This function tells whether entity stops being a candidate for next
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
+ *
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
+ *
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
+ */
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+	struct bfq_group *bfqg;
+
+	if (bfq_entity_to_bfqq(entity))
+		return true;
+
+	bfqg = container_of(entity, struct bfq_group, entity);
+
+	BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group);
+	BUG_ON(bfqg->active_entities == 0);
+	/*
+	 * The field active_entities does not always contain the
+	 * actual number of active children entities: it happens to
+	 * not account for the in-service entity in case the latter is
+	 * removed from its active tree (which may get done after
+	 * invoking the function bfq_no_longer_next_in_service in
+	 * bfq_get_next_queue). Fortunately, here, i.e., while
+	 * bfq_no_longer_next_in_service is not yet completed in
+	 * bfq_get_next_queue, bfq_active_extract has not yet been
+	 * invoked, and thus active_entities still coincides with the
+	 * actual number of active entities.
+	 */
+	if (bfqg->active_entities == 1)
+		return true;
+
+	return false;
+}
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+#define for_each_entity(entity)	\
+	for (; entity ; entity = NULL)
+
+#define for_each_entity_safe(entity, parent) \
+	for (parent = NULL; entity ; entity = parent)
+
+static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
+{
+	return false;
+}
+
+static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
+{
+	return true;
+}
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED */
+
+/*
+ * Shift for timestamp calculations.  This actually limits the maximum
+ * service allowed in one timestamp delta (small shift values increase it),
+ * the maximum total weight that can be used for the queues in the system
+ * (big shift values increase it), and the period of virtual time
+ * wraparounds.
+ */
+#define WFQ_SERVICE_SHIFT	22
+
+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = NULL;
+
+	BUG_ON(!entity);
+
+	if (!entity->my_sched_data)
+		bfqq = container_of(entity, struct bfq_queue, entity);
+
+	return bfqq;
+}
+
+
+/**
+ * bfq_delta - map service into the virtual time domain.
+ * @service: amount of service.
+ * @weight: scale factor (weight of an entity or weight sum).
+ */
+static u64 bfq_delta(unsigned long service, unsigned long weight)
+{
+	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
+
+	do_div(d, weight);
+	return d;
+}
+
+/**
+ * bfq_calc_finish - assign the finish time to an entity.
+ * @entity: the entity to act upon.
+ * @service: the service to be charged to the entity.
+ */
+static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	unsigned long long start, finish, delta;
+
+	BUG_ON(entity->weight == 0);
+
+	entity->finish = entity->start +
+		bfq_delta(service, entity->weight);
+
+	start = ((entity->start>>10)*1000)>>12;
+	finish = ((entity->finish>>10)*1000)>>12;
+	delta = ((bfq_delta(service, entity->weight)>>10)*1000)>>12;
+
+	if (bfqq) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			"serv %lu, w %d",
+			service, entity->weight);
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			"start %llu, finish %llu, delta %llu",
+			start, finish, delta);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	} else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			"group: serv %lu, w %d",
+			     service, entity->weight);
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			"group: start %llu, finish %llu, delta %llu",
+			start, finish, delta);
+#endif
+	}
+}
+
+/**
+ * bfq_entity_of - get an entity from a node.
+ * @node: the node field of the entity.
+ *
+ * Convert a node pointer to the relative entity.  This is used only
+ * to simplify the logic of some functions and not as the generic
+ * conversion mechanism because, e.g., in the tree walking functions,
+ * the check for a %NULL value would be redundant.
+ */
+static struct bfq_entity *bfq_entity_of(struct rb_node *node)
+{
+	struct bfq_entity *entity = NULL;
+
+	if (node)
+		entity = rb_entry(node, struct bfq_entity, rb_node);
+
+	return entity;
+}
+
+/**
+ * bfq_extract - remove an entity from a tree.
+ * @root: the tree root.
+ * @entity: the entity to remove.
+ */
+static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
+{
+	BUG_ON(entity->tree != root);
+
+	entity->tree = NULL;
+	rb_erase(&entity->rb_node, root);
+}
+
+/**
+ * bfq_idle_extract - extract an entity from the idle tree.
+ * @st: the service tree of the owning @entity.
+ * @entity: the entity being removed.
+ */
+static void bfq_idle_extract(struct bfq_service_tree *st,
+			     struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *next;
+
+	BUG_ON(entity->tree != &st->idle);
+
+	if (entity == st->first_idle) {
+		next = rb_next(&entity->rb_node);
+		st->first_idle = bfq_entity_of(next);
+	}
+
+	if (entity == st->last_idle) {
+		next = rb_prev(&entity->rb_node);
+		st->last_idle = bfq_entity_of(next);
+	}
+
+	bfq_extract(&st->idle, entity);
+
+	if (bfqq)
+		list_del(&bfqq->bfqq_list);
+}
+
+/**
+ * bfq_insert - generic tree insertion.
+ * @root: tree root.
+ * @entity: entity to insert.
+ *
+ * This is used for the idle and the active tree, since they are both
+ * ordered by finish time.
+ */
+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
+{
+	struct bfq_entity *entry;
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	BUG_ON(entity->tree);
+
+	while (*node) {
+		parent = *node;
+		entry = rb_entry(parent, struct bfq_entity, rb_node);
+
+		if (bfq_gt(entry->finish, entity->finish))
+			node = &parent->rb_left;
+		else
+			node = &parent->rb_right;
+	}
+
+	rb_link_node(&entity->rb_node, parent, node);
+	rb_insert_color(&entity->rb_node, root);
+
+	entity->tree = root;
+}
+
+/**
+ * bfq_update_min - update the min_start field of a entity.
+ * @entity: the entity to update.
+ * @node: one of its children.
+ *
+ * This function is called when @entity may store an invalid value for
+ * min_start due to updates to the active tree.  The function  assumes
+ * that the subtree rooted at @node (which may be its left or its right
+ * child) has a valid min_start value.
+ */
+static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
+{
+	struct bfq_entity *child;
+
+	if (node) {
+		child = rb_entry(node, struct bfq_entity, rb_node);
+		if (bfq_gt(entity->min_start, child->min_start))
+			entity->min_start = child->min_start;
+	}
+}
+
+/**
+ * bfq_update_active_node - recalculate min_start.
+ * @node: the node to update.
+ *
+ * @node may have changed position or one of its children may have moved,
+ * this function updates its min_start value.  The left and right subtrees
+ * are assumed to hold a correct min_start value.
+ */
+static void bfq_update_active_node(struct rb_node *node)
+{
+	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	entity->min_start = entity->start;
+	bfq_update_min(entity, node->rb_right);
+	bfq_update_min(entity, node->rb_left);
+
+	if (bfqq) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "new min_start %llu",
+			     ((entity->min_start>>10)*1000)>>12);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	} else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "new min_start %llu",
+			     ((entity->min_start>>10)*1000)>>12);
+#endif
+	}
+}
+
+/**
+ * bfq_update_active_tree - update min_start for the whole active tree.
+ * @node: the starting node.
+ *
+ * @node must be the deepest modified node after an update.  This function
+ * updates its min_start using the values held by its children, assuming
+ * that they did not change, and then updates all the nodes that may have
+ * changed in the path to the root.  The only nodes that may have changed
+ * are the ones in the path or their siblings.
+ */
+static void bfq_update_active_tree(struct rb_node *node)
+{
+	struct rb_node *parent;
+
+up:
+	bfq_update_active_node(node);
+
+	parent = rb_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right)
+		bfq_update_active_node(parent->rb_right);
+	else if (parent->rb_left)
+		bfq_update_active_node(parent->rb_left);
+
+	node = parent;
+	goto up;
+}
+
+static void bfq_weights_tree_add(struct bfq_data *bfqd,
+				 struct bfq_entity *entity,
+				 struct rb_root *root);
+
+static void bfq_weights_tree_remove(struct bfq_data *bfqd,
+				    struct bfq_entity *entity,
+				    struct rb_root *root);
+
+
+/**
+ * bfq_active_insert - insert an entity in the active tree of its
+ *                     group/device.
+ * @st: the service tree of the entity.
+ * @entity: the entity being inserted.
+ *
+ * The active tree is ordered by finish time, but an extra key is kept
+ * per each node, containing the minimum value for the start times of
+ * its children (and the node itself), so it's possible to search for
+ * the eligible node with the lowest finish time in logarithmic time.
+ */
+static void bfq_active_insert(struct bfq_service_tree *st,
+			      struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *node = &entity->rb_node;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	struct bfq_sched_data *sd = NULL;
+	struct bfq_group *bfqg = NULL;
+	struct bfq_data *bfqd = NULL;
+#endif
+
+	bfq_insert(&st->active, entity);
+
+	if (node->rb_left)
+		node = node->rb_left;
+	else if (node->rb_right)
+		node = node->rb_right;
+
+	bfq_update_active_tree(node);
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	sd = entity->sched_data;
+	bfqg = container_of(sd, struct bfq_group, sched_data);
+	BUG_ON(!bfqg);
+	bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+	if (bfqq)
+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else { /* bfq_group */
+		BUG_ON(!bfqd);
+		bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
+	}
+	if (bfqg != bfqd->root_group) {
+		BUG_ON(!bfqg);
+		BUG_ON(!bfqd);
+		bfqg->active_entities++;
+	}
+#endif
+}
+
+/**
+ * bfq_ioprio_to_weight - calc a weight from an ioprio.
+ * @ioprio: the ioprio value to convert.
+ */
+static unsigned short bfq_ioprio_to_weight(int ioprio)
+{
+	BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
+	return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
+}
+
+/**
+ * bfq_weight_to_ioprio - calc an ioprio from a weight.
+ * @weight: the weight value to convert.
+ *
+ * To preserve as much as possible the old only-ioprio user interface,
+ * 0 is used as an escape ioprio value for weights (numerically) equal or
+ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
+ */
+static unsigned short bfq_weight_to_ioprio(int weight)
+{
+	BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
+	return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight < 0 ?
+		0 : IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight;
+}
+
+static void bfq_get_entity(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	if (bfqq) {
+		bfqq->ref++;
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d",
+			     bfqq, bfqq->ref);
+	}
+}
+
+/**
+ * bfq_find_deepest - find the deepest node that an extraction can modify.
+ * @node: the node being removed.
+ *
+ * Do the first step of an extraction in an rb tree, looking for the
+ * node that will replace @node, and returning the deepest node that
+ * the following modifications to the tree can touch.  If @node is the
+ * last node in the tree return %NULL.
+ */
+static struct rb_node *bfq_find_deepest(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (!node->rb_right && !node->rb_left)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+	}
+
+	return deepest;
+}
+
+/**
+ * bfq_active_extract - remove an entity from the active tree.
+ * @st: the service_tree containing the tree.
+ * @entity: the entity being removed.
+ */
+static void bfq_active_extract(struct bfq_service_tree *st,
+			       struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct rb_node *node;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	struct bfq_sched_data *sd = NULL;
+	struct bfq_group *bfqg = NULL;
+	struct bfq_data *bfqd = NULL;
+#endif
+
+	node = bfq_find_deepest(&entity->rb_node);
+	bfq_extract(&st->active, entity);
+
+	if (node)
+		bfq_update_active_tree(node);
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	sd = entity->sched_data;
+	bfqg = container_of(sd, struct bfq_group, sched_data);
+	BUG_ON(!bfqg);
+	bfqd = (struct bfq_data *)bfqg->bfqd;
+#endif
+	if (bfqq)
+		list_del(&bfqq->bfqq_list);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else { /* bfq_group */
+		BUG_ON(!bfqd);
+		bfq_weights_tree_remove(bfqd, entity,
+					&bfqd->group_weights_tree);
+	}
+	if (bfqg != bfqd->root_group) {
+		BUG_ON(!bfqg);
+		BUG_ON(!bfqd);
+		BUG_ON(!bfqg->active_entities);
+		bfqg->active_entities--;
+	}
+#endif
+}
+
+/**
+ * bfq_idle_insert - insert an entity into the idle tree.
+ * @st: the service tree containing the tree.
+ * @entity: the entity to insert.
+ */
+static void bfq_idle_insert(struct bfq_service_tree *st,
+			    struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct bfq_entity *first_idle = st->first_idle;
+	struct bfq_entity *last_idle = st->last_idle;
+
+	if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
+		st->first_idle = entity;
+	if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
+		st->last_idle = entity;
+
+	bfq_insert(&st->idle, entity);
+
+	if (bfqq)
+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
+}
+
+/**
+ * bfq_forget_entity - do not consider entity any longer for scheduling
+ * @st: the service tree.
+ * @entity: the entity being removed.
+ * @is_in_service: true if entity is currently the in-service entity.
+ *
+ * Forget everything about @entity. In addition, if entity represents
+ * a queue, and the latter is not in service, then release the service
+ * reference to the queue (the one taken through bfq_get_entity). In
+ * fact, in this case, there is really no more service reference to
+ * the queue, as the latter is also outside any service tree. If,
+ * instead, the queue is in service, then __bfq_bfqd_reset_in_service
+ * will take care of putting the reference when the queue finally
+ * stops being served.
+ */
+static void bfq_forget_entity(struct bfq_service_tree *st,
+			      struct bfq_entity *entity,
+			      bool is_in_service)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	BUG_ON(!entity->on_st);
+
+	entity->on_st = false;
+	st->wsum -= entity->weight;
+	if (bfqq && !is_in_service) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "(before): %p %d",
+			     bfqq, bfqq->ref);
+		bfq_put_queue(bfqq);
+	}
+}
+
+/**
+ * bfq_put_idle_entity - release the idle tree ref of an entity.
+ * @st: service tree for the entity.
+ * @entity: the entity being released.
+ */
+static void bfq_put_idle_entity(struct bfq_service_tree *st,
+				struct bfq_entity *entity)
+{
+	bfq_idle_extract(st, entity);
+	bfq_forget_entity(st, entity,
+			  entity == entity->sched_data->in_service_entity);
+}
+
+/**
+ * bfq_forget_idle - update the idle tree if necessary.
+ * @st: the service tree to act upon.
+ *
+ * To preserve the global O(log N) complexity we only remove one entry here;
+ * as the idle tree will not grow indefinitely this can be done safely.
+ */
+static void bfq_forget_idle(struct bfq_service_tree *st)
+{
+	struct bfq_entity *first_idle = st->first_idle;
+	struct bfq_entity *last_idle = st->last_idle;
+
+	if (RB_EMPTY_ROOT(&st->active) && last_idle &&
+	    !bfq_gt(last_idle->finish, st->vtime)) {
+		/*
+		 * Forget the whole idle tree, increasing the vtime past
+		 * the last finish time of idle entities.
+		 */
+		st->vtime = last_idle->finish;
+	}
+
+	if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
+		bfq_put_idle_entity(st, first_idle);
+}
+
+/*
+ * Update weight and priority of entity. If update_class_too is true,
+ * then update the ioprio_class of entity too.
+ *
+ * The reason why the update of ioprio_class is controlled through the
+ * last parameter is as follows. Changing the ioprio class of an
+ * entity implies changing the destination service trees for that
+ * entity. If such a change occurred when the entity is already on one
+ * of the service trees for its previous class, then the state of the
+ * entity would become more complex: none of the new possible service
+ * trees for the entity, according to bfq_entity_service_tree(), would
+ * match any of the possible service trees on which the entity
+ * is. Complex operations involving these trees, such as entity
+ * activations and deactivations, should take into account this
+ * additional complexity.  To avoid this issue, this function is
+ * invoked with update_class_too unset in the points in the code where
+ * entity may happen to be on some tree.
+ */
+static struct bfq_service_tree *
+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+				struct bfq_entity *entity,
+				bool update_class_too)
+{
+	struct bfq_service_tree *new_st = old_st;
+
+	if (entity->prio_changed) {
+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+		unsigned int prev_weight, new_weight;
+		struct bfq_data *bfqd = NULL;
+		struct rb_root *root;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		struct bfq_sched_data *sd;
+		struct bfq_group *bfqg;
+#endif
+
+		if (bfqq)
+			bfqd = bfqq->bfqd;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		else {
+			sd = entity->my_sched_data;
+			bfqg = container_of(sd, struct bfq_group, sched_data);
+			BUG_ON(!bfqg);
+			bfqd = (struct bfq_data *)bfqg->bfqd;
+			BUG_ON(!bfqd);
+		}
+#endif
+
+		BUG_ON(entity->tree && update_class_too);
+		BUG_ON(old_st->wsum < entity->weight);
+		old_st->wsum -= entity->weight;
+
+		if (entity->new_weight != entity->orig_weight) {
+			if (entity->new_weight < BFQ_MIN_WEIGHT ||
+			    entity->new_weight > BFQ_MAX_WEIGHT) {
+				pr_crit("update_weight_prio: new_weight %d\n",
+					entity->new_weight);
+				if (entity->new_weight < BFQ_MIN_WEIGHT)
+					entity->new_weight = BFQ_MIN_WEIGHT;
+				else
+					entity->new_weight = BFQ_MAX_WEIGHT;
+			}
+			entity->orig_weight = entity->new_weight;
+			if (bfqq)
+				bfqq->ioprio =
+				  bfq_weight_to_ioprio(entity->orig_weight);
+		}
+
+		if (bfqq && update_class_too)
+			bfqq->ioprio_class = bfqq->new_ioprio_class;
+
+		/*
+		 * Reset prio_changed only if the ioprio_class change
+		 * is not pending any longer.
+		 */
+		if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
+			entity->prio_changed = 0;
+
+		/*
+		 * NOTE: here we may be changing the weight too early,
+		 * this will cause unfairness.  The correct approach
+		 * would have required additional complexity to defer
+		 * weight changes to the proper time instants (i.e.,
+		 * when entity->finish <= old_st->vtime).
+		 */
+		new_st = bfq_entity_service_tree(entity);
+
+		prev_weight = entity->weight;
+		new_weight = entity->orig_weight *
+			     (bfqq ? bfqq->wr_coeff : 1);
+		/*
+		 * If the weight of the entity changes, remove the entity
+		 * from its old weight counter (if there is a counter
+		 * associated with the entity), and add it to the counter
+		 * associated with its new weight.
+		 */
+		if (prev_weight != new_weight) {
+			if (bfqq)
+				bfq_log_bfqq(bfqq->bfqd, bfqq,
+					     "weight changed %d %d(%d %d)",
+					     prev_weight, new_weight,
+					     entity->orig_weight,
+					     bfqq->wr_coeff);
+
+			root = bfqq ? &bfqd->queue_weights_tree :
+				      &bfqd->group_weights_tree;
+			bfq_weights_tree_remove(bfqd, entity, root);
+		}
+		entity->weight = new_weight;
+		/*
+		 * Add the entity to its weights tree only if it is
+		 * not associated with a weight-raised queue.
+		 */
+		if (prev_weight != new_weight &&
+		    (bfqq ? bfqq->wr_coeff == 1 : 1))
+			/* If we get here, root has been initialized. */
+			bfq_weights_tree_add(bfqd, entity, root);
+
+		new_st->wsum += entity->weight;
+
+		if (new_st != old_st) {
+			BUG_ON(!update_class_too);
+			entity->start = new_st->vtime;
+		}
+	}
+
+	return new_st;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
+#endif
+
+/**
+ * bfq_bfqq_served - update the scheduler status after selection for
+ *                   service.
+ * @bfqq: the queue being served.
+ * @served: bytes to transfer.
+ *
+ * NOTE: this can be optimized, as the timestamps of upper level entities
+ * are synchronized every time a new bfqq is selected for service.  By now,
+ * we keep it to better check consistency.
+ */
+static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st;
+
+	if (!bfqq->service_from_backlogged)
+		bfqq->first_IO_time = jiffies;
+
+	if (bfqq->wr_coeff > 1)
+		bfqq->service_from_wr += served;
+
+	bfqq->service_from_backlogged += served;
+	for_each_entity(entity) {
+		st = bfq_entity_service_tree(entity);
+
+		entity->service += served;
+
+		BUG_ON(st->wsum == 0);
+
+		st->vtime += bfq_delta(served, st->wsum);
+		bfq_forget_idle(st);
+	}
+#ifndef BFQ_MQ
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
+#endif
+#endif
+	st = bfq_entity_service_tree(&bfqq->entity);
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs, vtime %llu on %p",
+		     served,  ((st->vtime>>10)*1000)>>12, st);
+}
+
+/**
+ * bfq_bfqq_charge_time - charge an amount of service equivalent to the length
+ *			  of the time interval during which bfqq has been in
+ *			  service.
+ * @bfqd: the device
+ * @bfqq: the queue that needs a service update.
+ * @time_ms: the amount of time during which the queue has received service
+ *
+ * If a queue does not consume its budget fast enough, then providing
+ * the queue with service fairness may impair throughput, more or less
+ * severely. For this reason, queues that consume their budget slowly
+ * are provided with time fairness instead of service fairness. This
+ * goal is achieved through the BFQ scheduling engine, even if such an
+ * engine works in the service, and not in the time domain. The trick
+ * is charging these queues with an inflated amount of service, equal
+ * to the amount of service that they would have received during their
+ * service slot if they had been fast, i.e., if their requests had
+ * been dispatched at a rate equal to the estimated peak rate.
+ *
+ * It is worth noting that time fairness can cause important
+ * distortions in terms of bandwidth distribution, on devices with
+ * internal queueing. The reason is that I/O requests dispatched
+ * during the service slot of a queue may be served after that service
+ * slot is finished, and may have a total processing time loosely
+ * correlated with the duration of the service slot. This is
+ * especially true for short service slots.
+ */
+static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				 unsigned long time_ms)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	int tot_serv_to_charge = entity->service;
+	unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
+
+	if (time_ms > 0 && time_ms < timeout_ms)
+		tot_serv_to_charge =
+			(bfqd->bfq_max_budget * time_ms) / timeout_ms;
+
+	if (tot_serv_to_charge < entity->service)
+		tot_serv_to_charge = entity->service;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "%lu/%u ms, %d/%d/%d sectors",
+		     time_ms, timeout_ms, entity->service,
+		     tot_serv_to_charge, entity->budget);
+
+	/* Increase budget to avoid inconsistencies */
+	if (tot_serv_to_charge > entity->budget)
+		entity->budget = tot_serv_to_charge;
+
+	bfq_bfqq_served(bfqq,
+			max_t(int, 0, tot_serv_to_charge - entity->service));
+}
+
+static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
+					struct bfq_service_tree *st,
+					bool backshifted)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	struct bfq_sched_data *sd = entity->sched_data;
+
+	/*
+	 * When this function is invoked, entity is not in any service
+	 * tree, then it is safe to invoke next function with the last
+	 * parameter set (see the comments on the function).
+	 */
+	BUG_ON(entity->tree);
+	st = __bfq_entity_update_weight_prio(st, entity, true);
+	bfq_calc_finish(entity, entity->budget);
+
+	/*
+	 * If some queues enjoy backshifting for a while, then their
+	 * (virtual) finish timestamps may happen to become lower and
+	 * lower than the system virtual time.  In particular, if
+	 * these queues often happen to be idle for short time
+	 * periods, and during such time periods other queues with
+	 * higher timestamps happen to be busy, then the backshifted
+	 * timestamps of the former queues can become much lower than
+	 * the system virtual time. In fact, to serve the queues with
+	 * higher timestamps while the ones with lower timestamps are
+	 * idle, the system virtual time may be pushed-up to much
+	 * higher values than the finish timestamps of the idle
+	 * queues. As a consequence, the finish timestamps of all new
+	 * or newly activated queues may end up being much larger than
+	 * those of lucky queues with backshifted timestamps. The
+	 * latter queues may then monopolize the device for a lot of
+	 * time. This would simply break service guarantees.
+	 *
+	 * To reduce this problem, push up a little bit the
+	 * backshifted timestamps of the queue associated with this
+	 * entity (only a queue can happen to have the backshifted
+	 * flag set): just enough to let the finish timestamp of the
+	 * queue be equal to the current value of the system virtual
+	 * time. This may introduce a little unfairness among queues
+	 * with backshifted timestamps, but it does not break
+	 * worst-case fairness guarantees.
+	 *
+	 * As a special case, if bfqq is weight-raised, push up
+	 * timestamps much less, to keep very low the probability that
+	 * this push up causes the backshifted finish timestamps of
+	 * weight-raised queues to become higher than the backshifted
+	 * finish timestamps of non weight-raised queues.
+	 */
+	if (backshifted && bfq_gt(st->vtime, entity->finish)) {
+		unsigned long delta = st->vtime - entity->finish;
+
+		if (bfqq)
+			delta /= bfqq->wr_coeff;
+
+		entity->start += delta;
+		entity->finish += delta;
+
+		if (bfqq) {
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+				     "new queue finish %llu",
+				     ((entity->finish>>10)*1000)>>12);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		} else {
+			struct bfq_group *bfqg =
+				container_of(entity, struct bfq_group, entity);
+
+			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+				     "new group finish %llu",
+				     ((entity->finish>>10)*1000)>>12);
+#endif
+		}
+	}
+
+	bfq_active_insert(st, entity);
+
+	if (bfqq) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			"queue %seligible in st %p",
+			     entity->start <= st->vtime ? "" : "non ", st);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	} else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			"group %seligible in st %p",
+			     entity->start <= st->vtime ? "" : "non ", st);
+#endif
+	}
+	BUG_ON(RB_EMPTY_ROOT(&st->active));
+	BUG_ON(&st->active != &sd->service_tree->active &&
+	       &st->active != &(sd->service_tree+1)->active &&
+	       &st->active != &(sd->service_tree+2)->active);
+}
+
+/**
+ * __bfq_activate_entity - handle activation of entity.
+ * @entity: the entity being activated.
+ * @non_blocking_wait_rq: true if entity was waiting for a request
+ *
+ * Called for a 'true' activation, i.e., if entity is not active and
+ * one of its children receives a new request.
+ *
+ * Basically, this function updates the timestamps of entity and
+ * inserts entity into its active tree, ater possibly extracting it
+ * from its idle tree.
+ */
+static void __bfq_activate_entity(struct bfq_entity *entity,
+				  bool non_blocking_wait_rq)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	bool backshifted = false;
+	unsigned long long min_vstart;
+
+	BUG_ON(!sd);
+	BUG_ON(!st);
+
+	/* See comments on bfq_fqq_update_budg_for_activation */
+	if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
+		backshifted = true;
+		min_vstart = entity->finish;
+	} else
+		min_vstart = st->vtime;
+
+	if (entity->tree == &st->idle) {
+		/*
+		 * Must be on the idle tree, bfq_idle_extract() will
+		 * check for that.
+		 */
+		bfq_idle_extract(st, entity);
+		BUG_ON(entity->tree);
+		entity->start = bfq_gt(min_vstart, entity->finish) ?
+			min_vstart : entity->finish;
+	} else {
+		BUG_ON(entity->tree);
+		/*
+		 * The finish time of the entity may be invalid, and
+		 * it is in the past for sure, otherwise the queue
+		 * would have been on the idle tree.
+		 */
+		entity->start = min_vstart;
+		st->wsum += entity->weight;
+		/*
+		 * entity is about to be inserted into a service tree,
+		 * and then set in service: get a reference to make
+		 * sure entity does not disappear until it is no
+		 * longer in service or scheduled for service.
+		 */
+		bfq_get_entity(entity);
+
+		BUG_ON(entity->on_st && bfqq);
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		if (entity->on_st && !bfqq) {
+			struct bfq_group *bfqg =
+				container_of(entity, struct bfq_group,
+					     entity);
+
+			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd,
+				     bfqg,
+				     "activate bug, class %d in_service %p",
+				     bfq_class_idx(entity), sd->in_service_entity);
+		}
+#endif
+		BUG_ON(entity->on_st && !bfqq);
+		entity->on_st = true;
+	}
+
+	bfq_update_fin_time_enqueue(entity, st, backshifted);
+}
+
+/**
+ * __bfq_requeue_entity - handle requeueing or repositioning of an entity.
+ * @entity: the entity being requeued or repositioned.
+ *
+ * Requeueing is needed if this entity stops being served, which
+ * happens if a leaf descendant entity has expired. On the other hand,
+ * repositioning is needed if the next_inservice_entity for the child
+ * entity has changed. See the comments inside the function for
+ * details.
+ *
+ * Basically, this function: 1) removes entity from its active tree if
+ * present there, 2) updates the timestamps of entity and 3) inserts
+ * entity back into its active tree (in the new, right position for
+ * the new values of the timestamps).
+ */
+static void __bfq_requeue_entity(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	BUG_ON(!sd);
+	BUG_ON(!st);
+
+	BUG_ON(entity != sd->in_service_entity &&
+	       entity->tree != &st->active);
+
+	if (entity == sd->in_service_entity) {
+		/*
+		 * We are requeueing the current in-service entity,
+		 * which may have to be done for one of the following
+		 * reasons:
+		 * - entity represents the in-service queue, and the
+		 *   in-service queue is being requeued after an
+		 *   expiration;
+		 * - entity represents a group, and its budget has
+		 *   changed because one of its child entities has
+		 *   just been either activated or requeued for some
+		 *   reason; the timestamps of the entity need then to
+		 *   be updated, and the entity needs to be enqueued
+		 *   or repositioned accordingly.
+		 *
+		 * In particular, before requeueing, the start time of
+		 * the entity must be moved forward to account for the
+		 * service that the entity has received while in
+		 * service. This is done by the next instructions. The
+		 * finish time will then be updated according to this
+		 * new value of the start time, and to the budget of
+		 * the entity.
+		 */
+		bfq_calc_finish(entity, entity->service);
+		entity->start = entity->finish;
+		BUG_ON(entity->tree && entity->tree == &st->idle);
+		BUG_ON(entity->tree && entity->tree != &st->active);
+		/*
+		 * In addition, if the entity had more than one child
+		 * when set in service, then it was not extracted from
+		 * the active tree. This implies that the position of
+		 * the entity in the active tree may need to be
+		 * changed now, because we have just updated the start
+		 * time of the entity, and we will update its finish
+		 * time in a moment (the requeueing is then, more
+		 * precisely, a repositioning in this case). To
+		 * implement this repositioning, we: 1) dequeue the
+		 * entity here, 2) update the finish time and requeue
+		 * the entity according to the new timestamps below.
+		 */
+		if (entity->tree)
+			bfq_active_extract(st, entity);
+	} else { /* The entity is already active, and not in service */
+		/*
+		 * In this case, this function gets called only if the
+		 * next_in_service entity below this entity has
+		 * changed, and this change has caused the budget of
+		 * this entity to change, which, finally implies that
+		 * the finish time of this entity must be
+		 * updated. Such an update may cause the scheduling,
+		 * i.e., the position in the active tree, of this
+		 * entity to change. We handle this change by: 1)
+		 * dequeueing the entity here, 2) updating the finish
+		 * time and requeueing the entity according to the new
+		 * timestamps below. This is the same approach as the
+		 * non-extracted-entity sub-case above.
+		 */
+		bfq_active_extract(st, entity);
+	}
+
+	bfq_update_fin_time_enqueue(entity, st, false);
+}
+
+static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
+					  struct bfq_sched_data *sd,
+					  bool non_blocking_wait_rq)
+{
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	if (sd->in_service_entity == entity || entity->tree == &st->active)
+		 /*
+		  * in service or already queued on the active tree,
+		  * requeue or reposition
+		  */
+		__bfq_requeue_entity(entity);
+	else
+		/*
+		 * Not in service and not queued on its active tree:
+		 * the activity is idle and this is a true activation.
+		 */
+		__bfq_activate_entity(entity, non_blocking_wait_rq);
+}
+
+
+/**
+ * bfq_activate_requeue_entity - activate or requeue an entity representing a bfq_queue,
+ *			 	 and activate, requeue or reposition all ancestors
+ *			 	 for which such an update becomes necessary.
+ * @entity: the entity to activate.
+ * @non_blocking_wait_rq: true if this entity was waiting for a request
+ * @requeue: true if this is a requeue, which implies that bfqq is
+ *	     being expired; thus ALL its ancestors stop being served and must
+ *	     therefore be requeued
+ * @expiration: true if this function is being invoked in the expiration path
+ *		of the in-service queue
+ */
+static void bfq_activate_requeue_entity(struct bfq_entity *entity,
+					bool non_blocking_wait_rq,
+					bool requeue, bool expiration)
+{
+	struct bfq_sched_data *sd;
+
+	for_each_entity(entity) {
+		BUG_ON(!entity);
+		sd = entity->sched_data;
+		__bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq);
+
+		BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) &&
+		       RB_EMPTY_ROOT(&(sd->service_tree+1)->active) &&
+		       RB_EMPTY_ROOT(&(sd->service_tree+2)->active));
+
+		if (!bfq_update_next_in_service(sd, entity, expiration) &&
+		    !requeue) {
+			BUG_ON(!sd->next_in_service);
+			break;
+		}
+		BUG_ON(!sd->next_in_service);
+	}
+}
+
+/**
+ * __bfq_deactivate_entity - deactivate an entity from its service tree.
+ * @entity: the entity to deactivate.
+ * @ins_into_idle_tree: if false, the entity will not be put into the
+ *			idle tree.
+ *
+ * Deactivates an entity, independently of its previous state.  Must
+ * be invoked only if entity is on a service tree. Extracts the entity
+ * from that tree, and if necessary and allowed, puts it into the idle
+ * tree.
+ */
+static bool __bfq_deactivate_entity(struct bfq_entity *entity,
+				    bool ins_into_idle_tree)
+{
+	struct bfq_sched_data *sd = entity->sched_data;
+	struct bfq_service_tree *st;
+	bool is_in_service;
+
+	if (!entity->on_st) { /* entity never activated, or already inactive */
+		BUG_ON(sd && entity == sd->in_service_entity);
+		return false;
+	}
+
+	/*
+	 * If we get here, then entity is active, which implies that
+	 * bfq_group_set_parent has already been invoked for the group
+	 * represented by entity. Therefore, the field
+	 * entity->sched_data has been set, and we can safely use it.
+	 */
+	st = bfq_entity_service_tree(entity);
+	is_in_service = entity == sd->in_service_entity;
+
+	BUG_ON(is_in_service && entity->tree && entity->tree != &st->active);
+
+	if (is_in_service) {
+		bfq_calc_finish(entity, entity->service);
+		sd->in_service_entity = NULL;
+	}
+
+	if (entity->tree == &st->active)
+		bfq_active_extract(st, entity);
+	else if (!is_in_service && entity->tree == &st->idle)
+		bfq_idle_extract(st, entity);
+	else if (entity->tree)
+		BUG();
+
+	if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
+		bfq_forget_entity(st, entity, is_in_service);
+	else
+		bfq_idle_insert(st, entity);
+
+	return true;
+}
+
+/**
+ * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
+ * @entity: the entity to deactivate.
+ * @ins_into_idle_tree: true if the entity can be put into the idle tree
+ * @expiration: true if this function is being invoked in the expiration path
+ *		of the in-service queue
+ */
+static void bfq_deactivate_entity(struct bfq_entity *entity,
+				  bool ins_into_idle_tree,
+				  bool expiration)
+{
+	struct bfq_sched_data *sd;
+	struct bfq_entity *parent = NULL;
+
+	for_each_entity_safe(entity, parent) {
+		sd = entity->sched_data;
+
+		BUG_ON(sd == NULL); /*
+				     * It would mean that this is the
+				     * root group.
+				     */
+
+		BUG_ON(expiration && entity != sd->in_service_entity);
+
+		BUG_ON(entity != sd->in_service_entity &&
+		       entity->tree ==
+		       &bfq_entity_service_tree(entity)->active &&
+		       !sd->next_in_service);
+
+		if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
+			/*
+			 * entity is not in any tree any more, so
+			 * this deactivation is a no-op, and there is
+			 * nothing to change for upper-level entities
+			 * (in case of expiration, this can never
+			 * happen).
+			 */
+			BUG_ON(expiration); /*
+					     * entity cannot be already out of
+					     * any tree
+					     */
+			return;
+		}
+
+		if (sd->next_in_service == entity)
+			/*
+			 * entity was the next_in_service entity,
+			 * then, since entity has just been
+			 * deactivated, a new one must be found.
+			 */
+			bfq_update_next_in_service(sd, NULL, expiration);
+
+		if (sd->next_in_service || sd->in_service_entity) {
+			/*
+			 * The parent entity is still active, because
+			 * either next_in_service or in_service_entity
+			 * is not NULL. So, no further upwards
+			 * deactivation must be performed.  Yet,
+			 * next_in_service has changed.  Then the
+			 * schedule does need to be updated upwards.
+			 *
+			 * NOTE If in_service_entity is not NULL, then
+			 * next_in_service may happen to be NULL,
+			 * although the parent entity is evidently
+			 * active. This happens if 1) the entity
+			 * pointed by in_service_entity is the only
+			 * active entity in the parent entity, and 2)
+			 * according to the definition of
+			 * next_in_service, the in_service_entity
+			 * cannot be considered as
+			 * next_in_service. See the comments on the
+			 * definition of next_in_service for details.
+			 */
+			BUG_ON(sd->next_in_service == entity);
+			BUG_ON(sd->in_service_entity == entity);
+			break;
+		}
+
+		/*
+		 * If we get here, then the parent is no more
+		 * backlogged and we need to propagate the
+		 * deactivation upwards. Thus let the loop go on.
+		 */
+
+		/*
+		 * Also let parent be queued into the idle tree on
+		 * deactivation, to preserve service guarantees, and
+		 * assuming that who invoked this function does not
+		 * need parent entities too to be removed completely.
+		 */
+		ins_into_idle_tree = true;
+	}
+
+	/*
+	 * If the deactivation loop is fully executed, then there are
+	 * no more entities to touch and next loop is not executed at
+	 * all. Otherwise, requeue remaining entities if they are
+	 * about to stop receiving service, or reposition them if this
+	 * is not the case.
+	 */
+	entity = parent;
+	for_each_entity(entity) {
+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+		/*
+		 * Invoke __bfq_requeue_entity on entity, even if
+		 * already active, to requeue/reposition it in the
+		 * active tree (because sd->next_in_service has
+		 * changed)
+		 */
+		__bfq_requeue_entity(entity);
+
+		sd = entity->sched_data;
+		BUG_ON(expiration && sd->in_service_entity != entity);
+
+		if (bfqq)
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+				     "invoking udpdate_next for this queue");
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		else {
+			struct bfq_group *bfqg =
+				container_of(entity,
+					     struct bfq_group, entity);
+
+			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+				     "invoking udpdate_next for this entity");
+		}
+#endif
+		if (!bfq_update_next_in_service(sd, entity, expiration) &&
+		    !expiration)
+			/*
+			 * next_in_service unchanged or not causing
+			 * any change in entity->parent->sd, and no
+			 * requeueing needed for expiration: stop
+			 * here.
+			 */
+			break;
+	}
+}
+
+/**
+ * bfq_calc_vtime_jump - compute the value to which the vtime should jump,
+ *                       if needed, to have at least one entity eligible.
+ * @st: the service tree to act upon.
+ *
+ * Assumes that st is not empty.
+ */
+static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
+{
+	struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
+
+	if (bfq_gt(root_entity->min_start, st->vtime)) {
+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity);
+
+		if (bfqq)
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+				     "new value %llu",
+				     ((root_entity->min_start>>10)*1000)>>12);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		else {
+			struct bfq_group *bfqg =
+				container_of(root_entity, struct bfq_group,
+					     entity);
+
+			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+				     "new value %llu",
+				     ((root_entity->min_start>>10)*1000)>>12);
+		}
+#endif
+		return root_entity->min_start;
+	}
+	return st->vtime;
+}
+
+static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
+{
+	if (new_value > st->vtime) {
+		st->vtime = new_value;
+		bfq_forget_idle(st);
+	}
+}
+
+/**
+ * bfq_first_active_entity - find the eligible entity with
+ *                           the smallest finish time
+ * @st: the service tree to select from.
+ * @vtime: the system virtual to use as a reference for eligibility
+ *
+ * This function searches the first schedulable entity, starting from the
+ * root of the tree and going on the left every time on this side there is
+ * a subtree with at least one eligible (start >= vtime) entity. The path on
+ * the right is followed only if a) the left subtree contains no eligible
+ * entities and b) no eligible entity has been found yet.
+ */
+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
+						  u64 vtime)
+{
+	struct bfq_entity *entry, *first = NULL;
+	struct rb_node *node = st->active.rb_node;
+
+	while (node) {
+		entry = rb_entry(node, struct bfq_entity, rb_node);
+left:
+		if (!bfq_gt(entry->start, vtime))
+			first = entry;
+
+		BUG_ON(bfq_gt(entry->min_start, vtime));
+
+		if (node->rb_left) {
+			entry = rb_entry(node->rb_left,
+					 struct bfq_entity, rb_node);
+			if (!bfq_gt(entry->min_start, vtime)) {
+				node = node->rb_left;
+				goto left;
+			}
+		}
+		if (first)
+			break;
+		node = node->rb_right;
+	}
+
+	BUG_ON(!first && !RB_EMPTY_ROOT(&st->active));
+	return first;
+}
+
+/**
+ * __bfq_lookup_next_entity - return the first eligible entity in @st.
+ * @st: the service tree.
+ *
+ * If there is no in-service entity for the sched_data st belongs to,
+ * then return the entity that will be set in service if:
+ * 1) the parent entity this st belongs to is set in service;
+ * 2) no entity belonging to such parent entity undergoes a state change
+ * that would influence the timestamps of the entity (e.g., becomes idle,
+ * becomes backlogged, changes its budget, ...).
+ *
+ * In this first case, update the virtual time in @st too (see the
+ * comments on this update inside the function).
+ *
+ * In constrast, if there is an in-service entity, then return the
+ * entity that would be set in service if not only the above
+ * conditions, but also the next one held true: the currently
+ * in-service entity, on expiration,
+ * 1) gets a finish time equal to the current one, or
+ * 2) is not eligible any more, or
+ * 3) is idle.
+ */
+static struct bfq_entity *
+__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)
+{
+	struct bfq_entity *entity;
+	u64 new_vtime;
+	struct bfq_queue *bfqq;
+
+	if (RB_EMPTY_ROOT(&st->active))
+		return NULL;
+
+	/*
+	 * Get the value of the system virtual time for which at
+	 * least one entity is eligible.
+	 */
+	new_vtime = bfq_calc_vtime_jump(st);
+
+	/*
+	 * If there is no in-service entity for the sched_data this
+	 * active tree belongs to, then push the system virtual time
+	 * up to the value that guarantees that at least one entity is
+	 * eligible. If, instead, there is an in-service entity, then
+	 * do not make any such update, because there is already an
+	 * eligible entity, namely the in-service one (even if the
+	 * entity is not on st, because it was extracted when set in
+	 * service).
+	 */
+	if (!in_service)
+		bfq_update_vtime(st, new_vtime);
+
+	entity = bfq_first_active_entity(st, new_vtime);
+	BUG_ON(bfq_gt(entity->start, new_vtime));
+
+	/* Log some information */
+	bfqq = bfq_entity_to_bfqq(entity);
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "start %llu vtime %llu st %p",
+			     ((entity->start>>10)*1000)>>12,
+			     ((new_vtime>>10)*1000)>>12, st);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "start %llu vtime %llu (%llu) st %p",
+			     ((entity->start>>10)*1000)>>12,
+			     ((st->vtime>>10)*1000)>>12,
+			     ((new_vtime>>10)*1000)>>12, st);
+	}
+#endif
+
+	BUG_ON(!entity);
+
+	return entity;
+}
+
+/**
+ * bfq_lookup_next_entity - return the first eligible entity in @sd.
+ * @sd: the sched_data.
+ * @expiration: true if we are on the expiration path of the in-service queue
+ *
+ * This function is invoked when there has been a change in the trees
+ * for sd, and we need to know what is the new next entity to serve
+ * after this change.
+ */
+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+						 bool expiration)
+{
+	struct bfq_service_tree *st = sd->service_tree;
+	struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
+	struct bfq_entity *entity = NULL;
+	struct bfq_queue *bfqq;
+	int class_idx = 0;
+
+	BUG_ON(!sd);
+	BUG_ON(!st);
+	/*
+	 * Choose from idle class, if needed to guarantee a minimum
+	 * bandwidth to this class (and if there is some active entity
+	 * in idle class). This should also mitigate
+	 * priority-inversion problems in case a low priority task is
+	 * holding file system resources.
+	 */
+	if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
+				   BFQ_CL_IDLE_TIMEOUT)) {
+		if (!RB_EMPTY_ROOT(&idle_class_st->active))
+			class_idx = BFQ_IOPRIO_CLASSES - 1;
+		/* About to be served if backlogged, or not yet backlogged */
+		sd->bfq_class_idle_last_service = jiffies;
+	}
+
+	/*
+	 * Find the next entity to serve for the highest-priority
+	 * class, unless the idle class needs to be served.
+	 */
+	for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
+		/*
+		 * If expiration is true, then bfq_lookup_next_entity
+		 * is being invoked as a part of the expiration path
+		 * of the in-service queue. In this case, even if
+		 * sd->in_service_entity is not NULL,
+		 * sd->in_service_entiy at this point is actually not
+		 * in service any more, and, if needed, has already
+		 * been properly queued or requeued into the right
+		 * tree. The reason why sd->in_service_entity is still
+		 * not NULL here, even if expiration is true, is that
+		 * sd->in_service_entiy is reset as a last step in the
+		 * expiration path. So, if expiration is true, tell
+		 * __bfq_lookup_next_entity that there is no
+		 * sd->in_service_entity.
+		 */
+		entity = __bfq_lookup_next_entity(st + class_idx,
+						  sd->in_service_entity &&
+						  !expiration);
+
+		if (entity)
+			break;
+	}
+
+	BUG_ON(!entity &&
+	       (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) ||
+		!RB_EMPTY_ROOT(&(st+2)->active)));
+
+	if (!entity)
+		return NULL;
+
+	/* Log some information */
+	bfqq = bfq_entity_to_bfqq(entity);
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d",
+			     st + class_idx, class_idx);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "chosen from st %p %d",
+			     st + class_idx, class_idx);
+	}
+#endif
+
+	return entity;
+}
+
+static bool next_queue_may_preempt(struct bfq_data *bfqd)
+{
+	struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
+
+	return sd->next_in_service != sd->in_service_entity;
+}
+
+/*
+ * Get next queue for service.
+ */
+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+{
+	struct bfq_entity *entity = NULL;
+	struct bfq_sched_data *sd;
+	struct bfq_queue *bfqq;
+
+	BUG_ON(bfqd->in_service_queue);
+
+	if (bfqd->busy_queues == 0)
+		return NULL;
+
+	/*
+	 * Traverse the path from the root to the leaf entity to
+	 * serve. Set in service all the entities visited along the
+	 * way.
+	 */
+	sd = &bfqd->root_group->sched_data;
+	for (; sd ; sd = entity->my_sched_data) {
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		if (entity) {
+			struct bfq_group *bfqg =
+				container_of(entity, struct bfq_group, entity);
+
+			bfq_log_bfqg(bfqd, bfqg,
+				     "lookup in this group");
+			if (!sd->next_in_service)
+				pr_crit("lookup in this group");
+		} else {
+			bfq_log_bfqg(bfqd, bfqd->root_group,
+				     "lookup in root group");
+			if (!sd->next_in_service)
+				pr_crit("lookup in root group");
+		}
+#endif
+
+		BUG_ON(!sd->next_in_service);
+
+		/*
+		 * WARNING. We are about to set the in-service entity
+		 * to sd->next_in_service, i.e., to the (cached) value
+		 * returned by bfq_lookup_next_entity(sd) the last
+		 * time it was invoked, i.e., the last time when the
+		 * service order in sd changed as a consequence of the
+		 * activation or deactivation of an entity. In this
+		 * respect, if we execute bfq_lookup_next_entity(sd)
+		 * in this very moment, it may, although with low
+		 * probability, yield a different entity than that
+		 * pointed to by sd->next_in_service. This rare event
+		 * happens in case there was no CLASS_IDLE entity to
+		 * serve for sd when bfq_lookup_next_entity(sd) was
+		 * invoked for the last time, while there is now one
+		 * such entity.
+		 *
+		 * If the above event happens, then the scheduling of
+		 * such entity in CLASS_IDLE is postponed until the
+		 * service of the sd->next_in_service entity
+		 * finishes. In fact, when the latter is expired,
+		 * bfq_lookup_next_entity(sd) gets called again,
+		 * exactly to update sd->next_in_service.
+		 */
+
+		/* Make next_in_service entity become in_service_entity */
+		entity = sd->next_in_service;
+		sd->in_service_entity = entity;
+
+		/*
+		 * Reset the accumulator of the amount of service that
+		 * the entity is about to receive.
+		 */
+		entity->service = 0;
+
+		/*
+		 * If entity is no longer a candidate for next
+		 * service, then it must be extracted from its active
+		 * tree, so as to make sure that it won't be
+		 * considered when computing next_in_service. See the
+		 * comments on the function
+		 * bfq_no_longer_next_in_service() for details.
+		 */
+		if (bfq_no_longer_next_in_service(entity))
+			bfq_active_extract(bfq_entity_service_tree(entity),
+					   entity);
+
+		/*
+		 * Even if entity is not to be extracted according to
+		 * the above check, a descendant entity may get
+		 * extracted in one of the next iterations of this
+		 * loop. Such an event could cause a change in
+		 * next_in_service for the level of the descendant
+		 * entity, and thus possibly back to this level.
+		 *
+		 * However, we cannot perform the resulting needed
+		 * update of next_in_service for this level before the
+		 * end of the whole loop, because, to know which is
+		 * the correct next-to-serve candidate entity for each
+		 * level, we need first to find the leaf entity to set
+		 * in service. In fact, only after we know which is
+		 * the next-to-serve leaf entity, we can discover
+		 * whether the parent entity of the leaf entity
+		 * becomes the next-to-serve, and so on.
+		 */
+
+		/* Log some information */
+		bfqq = bfq_entity_to_bfqq(entity);
+		if (bfqq)
+			bfq_log_bfqq(bfqd, bfqq,
+			     "this queue, finish %llu",
+				(((entity->finish>>10)*1000)>>10)>>2);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		else {
+			struct bfq_group *bfqg =
+				container_of(entity, struct bfq_group, entity);
+
+			bfq_log_bfqg(bfqd, bfqg,
+			     "this entity, finish %llu",
+				(((entity->finish>>10)*1000)>>10)>>2);
+		}
+#endif
+
+	}
+
+	BUG_ON(!entity);
+	bfqq = bfq_entity_to_bfqq(entity);
+	BUG_ON(!bfqq);
+
+	/*
+	 * We can finally update all next-to-serve entities along the
+	 * path from the leaf entity just set in service to the root.
+	 */
+	for_each_entity(entity) {
+		struct bfq_sched_data *sd = entity->sched_data;
+
+		if (!bfq_update_next_in_service(sd, NULL, false))
+			break;
+	}
+
+	return bfqq;
+}
+
+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+{
+	struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
+	struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
+	struct bfq_entity *entity = in_serv_entity;
+
+#ifndef BFQ_MQ
+	if (bfqd->in_service_bic) {
+		put_io_context(bfqd->in_service_bic->icq.ioc);
+		bfqd->in_service_bic = NULL;
+	}
+#endif
+
+	bfq_clear_bfqq_wait_request(in_serv_bfqq);
+	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+	bfqd->in_service_queue = NULL;
+
+	/*
+	 * When this function is called, all in-service entities have
+	 * been properly deactivated or requeued, so we can safely
+	 * execute the final step: reset in_service_entity along the
+	 * path from entity to the root.
+	 */
+	for_each_entity(entity)
+		entity->sched_data->in_service_entity = NULL;
+
+	/*
+	 * in_serv_entity is no longer in service, so, if it is in no
+	 * service tree either, then release the service reference to
+	 * the queue it represents (taken with bfq_get_entity).
+	 */
+	if (!in_serv_entity->on_st)
+		bfq_put_queue(in_serv_bfqq);
+}
+
+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				bool ins_into_idle_tree, bool expiration)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
+}
+
+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+
+	BUG_ON(bfqq == bfqd->in_service_queue);
+	BUG_ON(entity->tree != &st->active && entity->tree != &st->idle &&
+	       entity->on_st);
+
+	bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
+				    false, false);
+	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+}
+
+static void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			     bool expiration)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	bfq_activate_requeue_entity(entity, false,
+				    bfqq == bfqd->in_service_queue, expiration);
+}
+
+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+
+/*
+ * Called when the bfqq no longer has requests pending, remove it from
+ * the service tree. As a special case, it can be invoked during an
+ * expiration.
+ */
+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			      bool expiration)
+{
+	BUG_ON(!bfq_bfqq_busy(bfqq));
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	bfq_log_bfqq(bfqd, bfqq, "del from busy");
+
+	bfq_clear_bfqq_busy(bfqq);
+
+	BUG_ON(bfqd->busy_queues == 0);
+	bfqd->busy_queues--;
+
+	if (!bfqq->dispatched)
+		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+					&bfqd->queue_weights_tree);
+
+	if (bfqq->wr_coeff > 1) {
+		bfqd->wr_busy_queues--;
+		BUG_ON(bfqd->wr_busy_queues < 0);
+	}
+
+	bfqg_stats_update_dequeue(bfqq_group(bfqq));
+
+	BUG_ON(bfqq->entity.budget < 0);
+
+	bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
+}
+
+/*
+ * Called when an inactive queue receives a new request.
+ */
+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	BUG_ON(bfq_bfqq_busy(bfqq));
+	BUG_ON(bfqq == bfqd->in_service_queue);
+
+	bfq_log_bfqq(bfqd, bfqq, "add to busy");
+
+	bfq_activate_bfqq(bfqd, bfqq);
+
+	bfq_mark_bfqq_busy(bfqq);
+	bfqd->busy_queues++;
+
+	if (!bfqq->dispatched)
+		if (bfqq->wr_coeff == 1)
+			bfq_weights_tree_add(bfqd, &bfqq->entity,
+					     &bfqd->queue_weights_tree);
+
+	if (bfqq->wr_coeff > 1) {
+		bfqd->wr_busy_queues++;
+		BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+	}
+
+}
diff --git a/block/bfq-sq-iosched.c b/block/bfq-sq-iosched.c
new file mode 100644
index 000000000..e96213865
--- /dev/null
+++ b/block/bfq-sq-iosched.c
@@ -0,0 +1,5647 @@
+/*
+ * Budget Fair Queueing (BFQ) I/O scheduler.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ *
+ * BFQ is a proportional-share I/O scheduler, with some extra
+ * low-latency capabilities. BFQ also supports full hierarchical
+ * scheduling through cgroups. Next paragraphs provide an introduction
+ * on BFQ inner workings. Details on BFQ benefits and usage can be
+ * found in Documentation/block/bfq-iosched.txt.
+ *
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based
+ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
+ * budgets, measured in number of sectors, to processes instead of
+ * time slices. The device is not granted to the in-service process
+ * for a given time slice, but until it has exhausted its assigned
+ * budget. This change from the time to the service domain enables BFQ
+ * to distribute the device throughput among processes as desired,
+ * without any distortion due to throughput fluctuations, or to device
+ * internal queueing. BFQ uses an ad hoc internal scheduler, called
+ * B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated with processes. Thanks to
+ * the accurate policy of B-WF2Q+, BFQ can afford to assign high
+ * budgets to I/O-bound processes issuing sequential requests (to
+ * boost the throughput), and yet guarantee a low latency to
+ * interactive and soft real-time applications.
+ *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
+ * BFQ is described in [1], where also a reference to the initial, more
+ * theoretical paper on BFQ can be found. The interested reader can find
+ * in the latter paper full details on the main algorithm, as well as
+ * formulas of the guarantees and formal proofs of all the properties.
+ * With respect to the version of BFQ presented in these papers, this
+ * implementation adds a few more heuristics, such as the one that
+ * guarantees a low latency to soft real-time applications, and a
+ * hierarchical extension based on H-WF2Q+.
+ *
+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
+ * complexity derives from the one introduced with EEVDF in [3].
+ *
+ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
+ *   Scheduler", Proceedings of the First Workshop on Mobile System
+ *   Technologies (MST-2015), May 2015.
+ *   http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
+ *
+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
+ *
+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
+ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
+ *     Oct 1997.
+ *
+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
+ *
+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
+ *     First: A Flexible and Accurate Mechanism for Proportional Share
+ *     Resource Allocation,'' technical report.
+ *
+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/cgroup.h>
+#include <linux/elevator.h>
+#include <linux/jiffies.h>
+#include <linux/rbtree.h>
+#include <linux/ioprio.h>
+#include "blk.h"
+#include "bfq.h"
+#include "blk-wbt.h"
+
+/* Expiration time of sync (0) and async (1) requests, in ns. */
+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
+
+/* Maximum backwards seek, in KiB. */
+static const int bfq_back_max = (16 * 1024);
+
+/* Penalty of a backwards seek, in number of sectors. */
+static const int bfq_back_penalty = 2;
+
+/* Idling period duration, in ns. */
+static u32 bfq_slice_idle = (NSEC_PER_SEC / 125);
+
+/* Minimum number of assigned budgets for which stats are safe to compute. */
+static const int bfq_stats_min_budgets = 194;
+
+/* Default maximum budget values, in sectors and number of requests. */
+static const int bfq_default_max_budget = (16 * 1024);
+
+/*
+ * Async to sync throughput distribution is controlled as follows:
+ * when an async request is served, the entity is charged the number
+ * of sectors of the request, multiplied by the factor below
+ */
+static const int bfq_async_charge_factor = 10;
+
+/* Default timeout values, in jiffies, approximating CFQ defaults. */
+static const int bfq_timeout = (HZ / 8);
+
+/*
+ * Time limit for merging (see comments in bfq_setup_cooperator). Set
+ * to the slowest value that, in our tests, proved to be effective in
+ * removing false positives, while not causing true positives to miss
+ * queue merging.
+ *
+ * As can be deduced from the low time limit below, queue merging, if
+ * successful, happens at the very beggining of the I/O of the involved
+ * cooperating processes, as a consequence of the arrival of the very
+ * first requests from each cooperator.  After that, there is very
+ * little chance to find cooperators.
+ */
+static const unsigned long bfq_merge_time_limit = HZ/10;
+
+#define MAX_LENGTH_REASON_NAME 25
+
+static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE",
+"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS",
+"PREEMPTED"};
+
+static struct kmem_cache *bfq_pool;
+
+/* Below this threshold (in ns), we consider thinktime immediate. */
+#define BFQ_MIN_TT		(2 * NSEC_PER_MSEC)
+
+/* hw_tag detection: parallel requests threshold and min samples needed. */
+#define BFQ_HW_QUEUE_THRESHOLD	4
+#define BFQ_HW_QUEUE_SAMPLES	32
+
+#define BFQQ_SEEK_THR		(sector_t)(8 * 100)
+#define BFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
+#define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
+#define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 19)
+
+/* Min number of samples required to perform peak-rate update */
+#define BFQ_RATE_MIN_SAMPLES	32
+/* Min observation time interval required to perform a peak-rate update (ns) */
+#define BFQ_RATE_MIN_INTERVAL	(300*NSEC_PER_MSEC)
+/* Target observation time interval for a peak-rate update (ns) */
+#define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
+
+/*
+ * Shift used for peak-rate fixed precision calculations.
+ * With
+ * - the current shift: 16 positions
+ * - the current type used to store rate: u32
+ * - the current unit of measure for rate: [sectors/usec], or, more precisely,
+ *   [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
+ * the range of rates that can be stored is
+ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
+ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
+ * [15, 65G] sectors/sec
+ * Which, assuming a sector size of 512B, corresponds to a range of
+ * [7.5K, 33T] B/sec
+ */
+#define BFQ_RATE_SHIFT		16
+
+/*
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see
+ * below), and T is a reference time: given the systems that are
+ * likely to be installed on the reference device according to its
+ * speed class, T is about the maximum time needed, under BFQ and
+ * while reading two files in parallel, to load typical large
+ * applications on these systems (see the comments on
+ * max_service_from_wr below, for more details on how T is obtained).
+ * In practice, the slower/faster the device at hand is, the more/less
+ * it takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to
+ * interactive applications.
+ *
+ * BFQ uses four different reference pairs (R, T), depending on:
+ * . whether the device is rotational or non-rotational;
+ * . whether the device is slow, such as old or portable HDDs, as well as
+ *   SD cards, or fast, such as newer HDDs and SSDs.
+ *
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+ * In the following definitions, R_slow[0]/R_fast[0] and
+ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
+ * rotational device, whereas R_slow[1]/R_fast[1] and
+ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
+ * non-rotational device. Finally, device_speed_thresh are the
+ * thresholds used to switch between speed classes. The reference
+ * rates are not the actual peak rates of the devices used as a
+ * reference, but slightly lower values. The reason for using these
+ * slightly lower values is that the peak-rate estimator tends to
+ * yield slightly lower values than the actual peak rate (it can yield
+ * the actual peak rate only if there is only one process doing I/O,
+ * and the process does sequential I/O).
+ *
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+static int R_slow[2] = {1000, 10700};
+static int R_fast[2] = {14000, 33000};
+/*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+ * function.
+ */
+static int T_slow[2];
+static int T_fast[2];
+static int device_speed_thresh[2];
+
+/*
+ * BFQ uses the above-detailed, time-based weight-raising mechanism to
+ * privilege interactive tasks. This mechanism is vulnerable to the
+ * following false positives: I/O-bound applications that will go on
+ * doing I/O for much longer than the duration of weight
+ * raising. These applications have basically no benefit from being
+ * weight-raised at the beginning of their I/O. On the opposite end,
+ * while being weight-raised, these applications
+ * a) unjustly steal throughput to applications that may actually need
+ * low latency;
+ * b) make BFQ uselessly perform device idling; device idling results
+ * in loss of device throughput with most flash-based storage, and may
+ * increase latencies when used purposelessly.
+ *
+ * BFQ tries to reduce these problems, by adopting the following
+ * countermeasure. To introduce this countermeasure, we need first to
+ * finish explaining how the duration of weight-raising for
+ * interactive tasks is computed.
+ *
+ * For a bfq_queue deemed as interactive, the duration of weight
+ * raising is dynamically adjusted, as a function of the estimated
+ * peak rate of the device, so as to be equal to the time needed to
+ * execute the 'largest' interactive task we benchmarked so far. By
+ * largest task, we mean the task for which each involved process has
+ * to do more I/O than for any of the other tasks we benchmarked. This
+ * reference interactive task is the start-up of LibreOffice Writer,
+ * and in this task each process/bfq_queue needs to have at most ~110K
+ * sectors transfered.
+ *
+ * This last piece of information enables BFQ to reduce the actual
+ * duration of weight-raising for at least one class of I/O-bound
+ * applications: those doing sequential or quasi-sequential I/O. An
+ * example is file copy. In fact, once started, the main I/O-bound
+ * processes of these applications usually consume the above 110K
+ * sectors in much less time than the processes of an application that
+ * is starting, because these I/O-bound processes will greedily devote
+ * almost all their CPU cycles only to their target,
+ * throughput-friendly I/O operations. This is even more true if BFQ
+ * happens to be underestimating the device peak rate, and thus
+ * overestimating the duration of weight raising. But, according to
+ * our measurements, once transferred 110K sectors, these processes
+ * have no right to be weight-raised any longer.
+ *
+ * Basing on the last consideration, BFQ ends weight-raising for a
+ * bfq_queue if the latter happens to have received an amount of
+ * service at least equal to the following constant. The constant is
+ * set to slightly more than 110K, to have a minimum safety margin.
+ *
+ * This early ending of weight-raising reduces the amount of time
+ * during which interactive false positives cause the two problems
+ * described at the beginning of these comments.
+ */
+static const unsigned long max_service_from_wr = 120000;
+
+#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
+				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
+
+#define RQ_BIC(rq)		icq_to_bic((rq)->elv.priv[0])
+#define RQ_BFQQ(rq)		((rq)->elv.priv[1])
+
+static void bfq_schedule_dispatch(struct bfq_data *bfqd);
+
+#include "bfq-ioc.c"
+#include "bfq-sched.c"
+#include "bfq-cgroup-included.c"
+
+#define bfq_class_idle(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define bfq_class_rt(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define bfq_sample_valid(samples)	((samples) > 80)
+
+/*
+ * Scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing.
+ */
+static void bfq_schedule_dispatch(struct bfq_data *bfqd)
+{
+	if (bfqd->queued != 0) {
+		bfq_log(bfqd, "");
+		kblockd_schedule_work(&bfqd->unplug_work);
+	}
+}
+
+/*
+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
+ * We choose the request that is closesr to the head right now.  Distance
+ * behind the head is penalized and only allowed to a certain extent.
+ */
+static struct request *bfq_choose_req(struct bfq_data *bfqd,
+				      struct request *rq1,
+				      struct request *rq2,
+				      sector_t last)
+{
+	sector_t s1, s2, d1 = 0, d2 = 0;
+	unsigned long back_max;
+#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
+#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
+	unsigned int wrap = 0; /* bit mask: requests behind the disk head? */
+
+	if (!rq1 || rq1 == rq2)
+		return rq2;
+	if (!rq2)
+		return rq1;
+
+	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
+		return rq1;
+	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
+		return rq2;
+	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
+		return rq1;
+	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
+		return rq2;
+
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
+
+	/*
+	 * By definition, 1KiB is 2 sectors.
+	 */
+	back_max = bfqd->bfq_back_max * 2;
+
+	/*
+	 * Strict one way elevator _except_ in the case where we allow
+	 * short backward seeks which are biased as twice the cost of a
+	 * similar forward seek.
+	 */
+	if (s1 >= last)
+		d1 = s1 - last;
+	else if (s1 + back_max >= last)
+		d1 = (last - s1) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ1_WRAP;
+
+	if (s2 >= last)
+		d2 = s2 - last;
+	else if (s2 + back_max >= last)
+		d2 = (last - s2) * bfqd->bfq_back_penalty;
+	else
+		wrap |= BFQ_RQ2_WRAP;
+
+	/* Found required data */
+
+	/*
+	 * By doing switch() on the bit mask "wrap" we avoid having to
+	 * check two variables for all permutations: --> faster!
+	 */
+	switch (wrap) {
+	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
+		if (d1 < d2)
+			return rq1;
+		else if (d2 < d1)
+			return rq2;
+
+		if (s1 >= s2)
+			return rq1;
+		else
+			return rq2;
+
+	case BFQ_RQ2_WRAP:
+		return rq1;
+	case BFQ_RQ1_WRAP:
+		return rq2;
+	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
+	default:
+		/*
+		 * Since both rqs are wrapped,
+		 * start with the one that's further behind head
+		 * (--> only *one* back seek required),
+		 * since back seek takes more time than forward.
+		 */
+		if (s1 <= s2)
+			return rq1;
+		else
+			return rq2;
+	}
+}
+
+static struct bfq_queue *
+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
+		     sector_t sector, struct rb_node **ret_parent,
+		     struct rb_node ***rb_link)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *bfqq = NULL;
+
+	parent = NULL;
+	p = &root->rb_node;
+	while (*p) {
+		struct rb_node **n;
+
+		parent = *p;
+		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+
+		/*
+		 * Sort strictly based on sector. Smallest to the left,
+		 * largest to the right.
+		 */
+		if (sector > blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_right;
+		else if (sector < blk_rq_pos(bfqq->next_rq))
+			n = &(*p)->rb_left;
+		else
+			break;
+		p = n;
+		bfqq = NULL;
+	}
+
+	*ret_parent = parent;
+	if (rb_link)
+		*rb_link = p;
+
+	bfq_log(bfqd, "%llu: returning %d",
+		(unsigned long long) sector,
+		bfqq ? bfqq->pid : 0);
+
+	return bfqq;
+}
+
+static bool bfq_too_late_for_merging(struct bfq_queue *bfqq)
+{
+	return bfqq->service_from_backlogged > 0 &&
+		time_is_before_jiffies(bfqq->first_IO_time +
+				       bfq_merge_time_limit);
+}
+
+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct rb_node **p, *parent;
+	struct bfq_queue *__bfqq;
+
+	if (bfqq->pos_root) {
+		rb_erase(&bfqq->pos_node, bfqq->pos_root);
+		bfqq->pos_root = NULL;
+	}
+
+	/*
+	 * bfqq cannot be merged any longer (see comments in
+	 * bfq_setup_cooperator): no point in adding bfqq into the
+	 * position tree.
+	 */
+	if (bfq_too_late_for_merging(bfqq))
+		return;
+
+	if (bfq_class_idle(bfqq))
+		return;
+	if (!bfqq->next_rq)
+		return;
+
+	bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
+			blk_rq_pos(bfqq->next_rq), &parent, &p);
+	if (!__bfqq) {
+		rb_link_node(&bfqq->pos_node, parent, p);
+		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
+	} else
+		bfqq->pos_root = NULL;
+}
+
+/*
+ * Tell whether there are active queues or groups with differentiated weights.
+ */
+static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+{
+	/*
+	 * For weights to differ, at least one of the trees must contain
+	 * at least two nodes.
+	 */
+	return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+		(bfqd->queue_weights_tree.rb_node->rb_left ||
+		 bfqd->queue_weights_tree.rb_node->rb_right)
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	       ) ||
+	       (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
+		(bfqd->group_weights_tree.rb_node->rb_left ||
+		 bfqd->group_weights_tree.rb_node->rb_right)
+#endif
+	       );
+}
+
+/*
+ * The following function returns true if every queue must receive the
+ * same share of the throughput (this condition is used when deciding
+ * whether idling may be disabled, see the comments in the function
+ * bfq_bfqq_may_idle()).
+ *
+ * Such a scenario occurs when:
+ * 1) all active queues have the same weight,
+ * 2) all active groups at the same level in the groups tree have the same
+ *    weight,
+ * 3) all active groups at the same level in the groups tree have the same
+ *    number of children.
+ *
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore this function evaluates, instead, the following stronger
+ * sub-conditions, for which it is much easier to maintain the needed
+ * state:
+ * 1) all active queues have the same weight,
+ * 2) all active groups have the same weight,
+ * 3) all active groups have at most one active child each.
+ * In particular, the last two conditions are always true if hierarchical
+ * support and the cgroups interface are not enabled, thus no state needs
+ * to be maintained in this case.
+ */
+static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+{
+	return !bfq_differentiated_weights(bfqd);
+}
+
+/*
+ * If the weight-counter tree passed as input contains no counter for
+ * the weight of the input entity, then add that counter; otherwise just
+ * increment the existing counter.
+ *
+ * Note that weight-counter trees contain few nodes in mostly symmetric
+ * scenarios. For example, if all queues have the same weight, then the
+ * weight-counter tree for the queues may contain at most one node.
+ * This holds even if low_latency is on, because weight-raised queues
+ * are not inserted in the tree.
+ * In most scenarios, the rate at which nodes are created/destroyed
+ * should be low too.
+ */
+static void bfq_weights_tree_add(struct bfq_data *bfqd,
+				 struct bfq_entity *entity,
+				 struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/*
+	 * Do not insert if the entity is already associated with a
+	 * counter, which happens if:
+	 *   1) the entity is associated with a queue,
+	 *   2) a request arrival has caused the queue to become both
+	 *      non-weight-raised, and hence change its weight, and
+	 *      backlogged; in this respect, each of the two events
+	 *      causes an invocation of this function,
+	 *   3) this is the invocation of this function caused by the
+	 *      second event. This second invocation is actually useless,
+	 *      and we handle this fact by exiting immediately. More
+	 *      efficient or clearer solutions might possibly be adopted.
+	 */
+	if (entity->weight_counter)
+		return;
+
+	while (*new) {
+		struct bfq_weight_counter *__counter = container_of(*new,
+						struct bfq_weight_counter,
+						weights_node);
+		parent = *new;
+
+		if (entity->weight == __counter->weight) {
+			entity->weight_counter = __counter;
+			goto inc_counter;
+		}
+		if (entity->weight < __counter->weight)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
+					 GFP_ATOMIC);
+
+	/*
+	 * In the unlucky event of an allocation failure, we just
+	 * exit. This will cause the weight of entity to not be
+	 * considered in bfq_differentiated_weights, which, in its
+	 * turn, causes the scenario to be deemed wrongly symmetric in
+	 * case entity's weight would have been the only weight making
+	 * the scenario asymmetric. On the bright side, no unbalance
+	 * will however occur when entity becomes inactive again (the
+	 * invocation of this function is triggered by an activation
+	 * of entity). In fact, bfq_weights_tree_remove does nothing
+	 * if !entity->weight_counter.
+	 */
+	if (unlikely(!entity->weight_counter))
+		return;
+
+	entity->weight_counter->weight = entity->weight;
+	rb_link_node(&entity->weight_counter->weights_node, parent, new);
+	rb_insert_color(&entity->weight_counter->weights_node, root);
+
+inc_counter:
+	entity->weight_counter->num_active++;
+}
+
+/*
+ * Decrement the weight counter associated with the entity, and, if the
+ * counter reaches 0, remove the counter from the tree.
+ * See the comments to the function bfq_weights_tree_add() for considerations
+ * about overhead.
+ */
+static void bfq_weights_tree_remove(struct bfq_data *bfqd,
+				    struct bfq_entity *entity,
+				    struct rb_root *root)
+{
+	if (!entity->weight_counter)
+		return;
+
+	BUG_ON(RB_EMPTY_ROOT(root));
+	BUG_ON(entity->weight_counter->weight != entity->weight);
+
+	BUG_ON(!entity->weight_counter->num_active);
+	entity->weight_counter->num_active--;
+	if (entity->weight_counter->num_active > 0)
+		goto reset_entity_pointer;
+
+	rb_erase(&entity->weight_counter->weights_node, root);
+	kfree(entity->weight_counter);
+
+reset_entity_pointer:
+	entity->weight_counter = NULL;
+}
+
+/*
+ * Return expired entry, or NULL to just start from scratch in rbtree.
+ */
+static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
+				      struct request *last)
+{
+	struct request *rq;
+
+	if (bfq_bfqq_fifo_expire(bfqq))
+		return NULL;
+
+	bfq_mark_bfqq_fifo_expire(bfqq);
+
+	rq = rq_entry_fifo(bfqq->fifo.next);
+
+	if (rq == last || ktime_get_ns() < rq->fifo_time)
+		return NULL;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq);
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+	return rq;
+}
+
+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq,
+					struct request *last)
+{
+	struct rb_node *rbnext = rb_next(&last->rb_node);
+	struct rb_node *rbprev = rb_prev(&last->rb_node);
+	struct request *next, *prev = NULL;
+
+	BUG_ON(list_empty(&bfqq->fifo));
+
+	/* Follow expired path, else get first next available. */
+	next = bfq_check_fifo(bfqq, last);
+	if (next) {
+		BUG_ON(next == last);
+		return next;
+	}
+
+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
+
+	if (rbprev)
+		prev = rb_entry_rq(rbprev);
+
+	if (rbnext)
+		next = rb_entry_rq(rbnext);
+	else {
+		rbnext = rb_first(&bfqq->sort_list);
+		if (rbnext && rbnext != &last->rb_node)
+			next = rb_entry_rq(rbnext);
+	}
+
+	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
+}
+
+/* see the definition of bfq_async_charge_factor for details */
+static unsigned long bfq_serv_to_charge(struct request *rq,
+					struct bfq_queue *bfqq)
+{
+	if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
+		return blk_rq_sectors(rq);
+
+	/*
+	 * If there are no weight-raised queues, then amplify service
+	 * by just the async charge factor; otherwise amplify service
+	 * by twice the async charge factor, to further reduce latency
+	 * for weight-raised queues.
+	 */
+	if (bfqq->bfqd->wr_busy_queues == 0)
+		return blk_rq_sectors(rq) * bfq_async_charge_factor;
+
+	return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+}
+
+/**
+ * bfq_updated_next_req - update the queue after a new next_rq selection.
+ * @bfqd: the device data the queue belongs to.
+ * @bfqq: the queue to update.
+ *
+ * If the first request of a queue changes we make sure that the queue
+ * has enough budget to serve at least its first request (if the
+ * request has grown).  We do this because if the queue has not enough
+ * budget for its first request, it has to go through two dispatch
+ * rounds to actually get it dispatched.
+ */
+static void bfq_updated_next_req(struct bfq_data *bfqd,
+				 struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
+	struct request *next_rq = bfqq->next_rq;
+	unsigned long new_budget;
+
+	if (!next_rq)
+		return;
+
+	if (bfqq == bfqd->in_service_queue)
+		/*
+		 * In order not to break guarantees, budgets cannot be
+		 * changed after an entity has been selected.
+		 */
+		return;
+
+	BUG_ON(entity->tree != &st->active);
+	BUG_ON(entity == entity->sched_data->in_service_entity);
+
+	new_budget = max_t(unsigned long, bfqq->max_budget,
+			   bfq_serv_to_charge(next_rq, bfqq));
+	if (entity->budget != new_budget) {
+		entity->budget = new_budget;
+		bfq_log_bfqq(bfqd, bfqq, "new budget %lu",
+					 new_budget);
+		bfq_requeue_bfqq(bfqd, bfqq, false);
+	}
+}
+
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+	u64 dur;
+
+	if (bfqd->bfq_wr_max_time > 0)
+		return bfqd->bfq_wr_max_time;
+
+	dur = bfqd->RT_prod;
+	do_div(dur, bfqd->peak_rate);
+
+	/*
+	 * Limit duration between 3 and 13 seconds. Tests show that
+	 * higher values than 13 seconds often yield the opposite of
+	 * the desired result, i.e., worsen responsiveness by letting
+	 * non-interactive and non-soft-real-time applications
+	 * preserve weight raising for a too long time interval.
+	 *
+	 * On the other end, lower values than 3 seconds make it
+	 * difficult for most interactive tasks to complete their jobs
+	 * before weight-raising finishes.
+	 */
+	if (dur > msecs_to_jiffies(13000))
+		dur = msecs_to_jiffies(13000);
+	else if (dur < msecs_to_jiffies(3000))
+		dur = msecs_to_jiffies(3000);
+
+	return dur;
+}
+
+/* switch back from soft real-time to interactive weight raising */
+static void switch_back_to_interactive_wr(struct bfq_queue *bfqq,
+					  struct bfq_data *bfqd)
+{
+	bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+	bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+	bfqq->last_wr_start_finish = bfqq->wr_start_at_switch_to_srt;
+}
+
+static void
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
+{
+	unsigned int old_wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
+	if (bic->saved_has_short_ttime)
+		bfq_mark_bfqq_has_short_ttime(bfqq);
+	else
+		bfq_clear_bfqq_has_short_ttime(bfqq);
+
+	if (bic->saved_IO_bound)
+		bfq_mark_bfqq_IO_bound(bfqq);
+	else
+		bfq_clear_bfqq_IO_bound(bfqq);
+
+	if (unlikely(busy))
+		old_wr_coeff = bfqq->wr_coeff;
+
+	bfqq->wr_coeff = bic->saved_wr_coeff;
+	bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
+	BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt));
+	bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
+	bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
+	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "bic %p wr_coeff %d start_finish %lu max_time %lu",
+		     bic, bfqq->wr_coeff, bfqq->last_wr_start_finish,
+		     bfqq->wr_cur_max_time);
+
+	if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
+				   time_is_before_jiffies(bfqq->last_wr_start_finish +
+							  bfqq->wr_cur_max_time))) {
+		if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		    !bfq_bfqq_in_large_burst(bfqq) &&
+		    time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt +
+					     bfq_wr_duration(bfqd))) {
+			switch_back_to_interactive_wr(bfqq, bfqd);
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "switching back to interactive");
+		} else {
+			bfqq->wr_coeff = 1;
+			bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "switching off wr (%lu + %lu < %lu)",
+			     bfqq->last_wr_start_finish, bfqq->wr_cur_max_time,
+			     jiffies);
+		}
+	}
+
+	/* make sure weight will be updated, however we got here */
+	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) {
+		bfqd->wr_busy_queues++;
+		BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+	} else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) {
+		bfqd->wr_busy_queues--;
+		BUG_ON(bfqd->wr_busy_queues < 0);
+	}
+}
+
+static int bfqq_process_refs(struct bfq_queue *bfqq)
+{
+	int process_refs, io_refs;
+
+	lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
+
+	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
+	process_refs = bfqq->ref - io_refs - bfqq->entity.on_st;
+	BUG_ON(process_refs < 0);
+	return process_refs;
+}
+
+/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
+static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_queue *item;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
+		hlist_del_init(&item->burst_list_node);
+	hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+	bfqd->burst_size = 1;
+	bfqd->burst_parent_entity = bfqq->entity.parent;
+}
+
+/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/* Increment burst size to take into account also bfqq */
+	bfqd->burst_size++;
+
+	bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size);
+
+	BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh);
+
+	if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+		struct bfq_queue *pos, *bfqq_item;
+		struct hlist_node *n;
+
+		/*
+		 * Enough queues have been activated shortly after each
+		 * other to consider this burst as large.
+		 */
+		bfqd->large_burst = true;
+		bfq_log_bfqq(bfqd, bfqq, "large burst started");
+
+		/*
+		 * We can now mark all queues in the burst list as
+		 * belonging to a large burst.
+		 */
+		hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+				     burst_list_node) {
+			bfq_mark_bfqq_in_large_burst(bfqq_item);
+			bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst");
+		}
+		bfq_mark_bfqq_in_large_burst(bfqq);
+		bfq_log_bfqq(bfqd, bfqq, "marked in large burst");
+
+		/*
+		 * From now on, and until the current burst finishes, any
+		 * new queue being activated shortly after the last queue
+		 * was inserted in the burst can be immediately marked as
+		 * belonging to a large burst. So the burst list is not
+		 * needed any more. Remove it.
+		 */
+		hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+					  burst_list_node)
+			hlist_del_init(&pos->burst_list_node);
+	} else /*
+		* Burst not yet large: add bfqq to the burst list. Do
+		* not increment the ref counter for bfqq, because bfqq
+		* is removed from the burst list before freeing bfqq
+		* in put_queue.
+		*/
+		hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+}
+
+/*
+ * If many queues belonging to the same group happen to be created
+ * shortly after each other, then the processes associated with these
+ * queues have typically a common goal. In particular, bursts of queue
+ * creations are usually caused by services or applications that spawn
+ * many parallel threads/processes. Examples are systemd during boot,
+ * or git grep. To help these processes get their job done as soon as
+ * possible, it is usually better to not grant either weight-raising
+ * or device idling to their queues.
+ *
+ * In this comment we describe, firstly, the reasons why this fact
+ * holds, and, secondly, the next function, which implements the main
+ * steps needed to properly mark these queues so that they can then be
+ * treated in a different way.
+ *
+ * The above services or applications benefit mostly from a high
+ * throughput: the quicker the requests of the activated queues are
+ * cumulatively served, the sooner the target job of these queues gets
+ * completed. As a consequence, weight-raising any of these queues,
+ * which also implies idling the device for it, is almost always
+ * counterproductive. In most cases it just lowers throughput.
+ *
+ * On the other hand, a burst of queue creations may be caused also by
+ * the start of an application that does not consist of a lot of
+ * parallel I/O-bound threads. In fact, with a complex application,
+ * several short processes may need to be executed to start-up the
+ * application. In this respect, to start an application as quickly as
+ * possible, the best thing to do is in any case to privilege the I/O
+ * related to the application with respect to all other
+ * I/O. Therefore, the best strategy to start as quickly as possible
+ * an application that causes a burst of queue creations is to
+ * weight-raise all the queues created during the burst. This is the
+ * exact opposite of the best strategy for the other type of bursts.
+ *
+ * In the end, to take the best action for each of the two cases, the
+ * two types of bursts need to be distinguished. Fortunately, this
+ * seems relatively easy, by looking at the sizes of the bursts. In
+ * particular, we found a threshold such that only bursts with a
+ * larger size than that threshold are apparently caused by
+ * services or commands such as systemd or git grep. For brevity,
+ * hereafter we call just 'large' these bursts. BFQ *does not*
+ * weight-raise queues whose creation occurs in a large burst. In
+ * addition, for each of these queues BFQ performs or does not perform
+ * idling depending on which choice boosts the throughput more. The
+ * exact choice depends on the device and request pattern at
+ * hand.
+ *
+ * Unfortunately, false positives may occur while an interactive task
+ * is starting (e.g., an application is being started). The
+ * consequence is that the queues associated with the task do not
+ * enjoy weight raising as expected. Fortunately these false positives
+ * are very rare. They typically occur if some service happens to
+ * start doing I/O exactly when the interactive task starts.
+ *
+ * Turning back to the next function, it implements all the steps
+ * needed to detect the occurrence of a large burst and to properly
+ * mark all the queues belonging to it (so that they can then be
+ * treated in a different way). This goal is achieved by maintaining a
+ * "burst list" that holds, temporarily, the queues that belong to the
+ * burst in progress. The list is then used to mark these queues as
+ * belonging to a large burst if the burst does become large. The main
+ * steps are the following.
+ *
+ * . when the very first queue is created, the queue is inserted into the
+ *   list (as it could be the first queue in a possible burst)
+ *
+ * . if the current burst has not yet become large, and a queue Q that does
+ *   not yet belong to the burst is activated shortly after the last time
+ *   at which a new queue entered the burst list, then the function appends
+ *   Q to the burst list
+ *
+ * . if, as a consequence of the previous step, the burst size reaches
+ *   the large-burst threshold, then
+ *
+ *     . all the queues in the burst list are marked as belonging to a
+ *       large burst
+ *
+ *     . the burst list is deleted; in fact, the burst list already served
+ *       its purpose (keeping temporarily track of the queues in a burst,
+ *       so as to be able to mark them as belonging to a large burst in the
+ *       previous sub-step), and now is not needed any more
+ *
+ *     . the device enters a large-burst mode
+ *
+ * . if a queue Q that does not belong to the burst is created while
+ *   the device is in large-burst mode and shortly after the last time
+ *   at which a queue either entered the burst list or was marked as
+ *   belonging to the current large burst, then Q is immediately marked
+ *   as belonging to a large burst.
+ *
+ * . if a queue Q that does not belong to the burst is created a while
+ *   later, i.e., not shortly after, than the last time at which a queue
+ *   either entered the burst list or was marked as belonging to the
+ *   current large burst, then the current burst is deemed as finished and:
+ *
+ *        . the large-burst mode is reset if set
+ *
+ *        . the burst list is emptied
+ *
+ *        . Q is inserted in the burst list, as Q may be the first queue
+ *          in a possible new burst (then the burst list contains just Q
+ *          after this step).
+ */
+static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/*
+	 * If bfqq is already in the burst list or is part of a large
+	 * burst, or finally has just been split, then there is
+	 * nothing else to do.
+	 */
+	if (!hlist_unhashed(&bfqq->burst_list_node) ||
+	    bfq_bfqq_in_large_burst(bfqq) ||
+	    time_is_after_eq_jiffies(bfqq->split_time +
+				     msecs_to_jiffies(10)))
+		return;
+
+	/*
+	 * If bfqq's creation happens late enough, or bfqq belongs to
+	 * a different group than the burst group, then the current
+	 * burst is finished, and related data structures must be
+	 * reset.
+	 *
+	 * In this respect, consider the special case where bfqq is
+	 * the very first queue created after BFQ is selected for this
+	 * device. In this case, last_ins_in_burst and
+	 * burst_parent_entity are not yet significant when we get
+	 * here. But it is easy to verify that, whether or not the
+	 * following condition is true, bfqq will end up being
+	 * inserted into the burst list. In particular the list will
+	 * happen to contain only bfqq. And this is exactly what has
+	 * to happen, as bfqq may be the first queue of the first
+	 * burst.
+	 */
+	if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+	    bfqd->bfq_burst_interval) ||
+	    bfqq->entity.parent != bfqd->burst_parent_entity) {
+		bfqd->large_burst = false;
+		bfq_reset_burst_list(bfqd, bfqq);
+		bfq_log_bfqq(bfqd, bfqq,
+			"late activation or different group");
+		goto end;
+	}
+
+	/*
+	 * If we get here, then bfqq is being activated shortly after the
+	 * last queue. So, if the current burst is also large, we can mark
+	 * bfqq as belonging to this large burst immediately.
+	 */
+	if (bfqd->large_burst) {
+		bfq_log_bfqq(bfqd, bfqq, "marked in burst");
+		bfq_mark_bfqq_in_large_burst(bfqq);
+		goto end;
+	}
+
+	/*
+	 * If we get here, then a large-burst state has not yet been
+	 * reached, but bfqq is being activated shortly after the last
+	 * queue. Then we add bfqq to the burst.
+	 */
+	bfq_add_to_burst(bfqd, bfqq);
+end:
+	/*
+	 * At this point, bfqq either has been added to the current
+	 * burst or has caused the current burst to terminate and a
+	 * possible new burst to start. In particular, in the second
+	 * case, bfqq has become the first queue in the possible new
+	 * burst.  In both cases last_ins_in_burst needs to be moved
+	 * forward.
+	 */
+	bfqd->last_ins_in_burst = jiffies;
+
+}
+
+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	return entity->budget - entity->service;
+}
+
+/*
+ * If enough samples have been computed, return the current max budget
+ * stored in bfqd, which is dynamically updated according to the
+ * estimated disk peak rate; otherwise return the default max budget
+ */
+static int bfq_max_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+		return bfq_default_max_budget;
+	else
+		return bfqd->bfq_max_budget;
+}
+
+/*
+ * Return min budget, which is a fraction of the current or default
+ * max budget (trying with 1/32)
+ */
+static int bfq_min_budget(struct bfq_data *bfqd)
+{
+	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+		return bfq_default_max_budget / 32;
+	else
+		return bfqd->bfq_max_budget / 32;
+}
+
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason);
+
+/*
+ * The next function, invoked after the input queue bfqq switches from
+ * idle to busy, updates the budget of bfqq. The function also tells
+ * whether the in-service queue should be expired, by returning
+ * true. The purpose of expiring the in-service queue is to give bfqq
+ * the chance to possibly preempt the in-service queue, and the reason
+ * for preempting the in-service queue is to achieve one of the two
+ * goals below.
+ *
+ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
+ * expired because it has remained idle. In particular, bfqq may have
+ * expired for one of the following two reasons:
+ *
+ * - BFQ_BFQQ_NO_MORE_REQUEST bfqq did not enjoy any device idling and
+ *   did not make it to issue a new request before its last request
+ *   was served;
+ *
+ * - BFQ_BFQQ_TOO_IDLE bfqq did enjoy device idling, but did not issue
+ *   a new request before the expiration of the idling-time.
+ *
+ * Even if bfqq has expired for one of the above reasons, the process
+ * associated with the queue may be however issuing requests greedily,
+ * and thus be sensitive to the bandwidth it receives (bfqq may have
+ * remained idle for other reasons: CPU high load, bfqq not enjoying
+ * idling, I/O throttling somewhere in the path from the process to
+ * the I/O scheduler, ...). But if, after every expiration for one of
+ * the above two reasons, bfqq has to wait for the service of at least
+ * one full budget of another queue before being served again, then
+ * bfqq is likely to get a much lower bandwidth or resource time than
+ * its reserved ones. To address this issue, two countermeasures need
+ * to be taken.
+ *
+ * First, the budget and the timestamps of bfqq need to be updated in
+ * a special way on bfqq reactivation: they need to be updated as if
+ * bfqq did not remain idle and did not expire. In fact, if they are
+ * computed as if bfqq expired and remained idle until reactivation,
+ * then the process associated with bfqq is treated as if, instead of
+ * being greedy, it stopped issuing requests when bfqq remained idle,
+ * and restarts issuing requests only on this reactivation. In other
+ * words, the scheduler does not help the process recover the "service
+ * hole" between bfqq expiration and reactivation. As a consequence,
+ * the process receives a lower bandwidth than its reserved one. In
+ * contrast, to recover this hole, the budget must be updated as if
+ * bfqq was not expired at all before this reactivation, i.e., it must
+ * be set to the value of the remaining budget when bfqq was
+ * expired. Along the same line, timestamps need to be assigned the
+ * value they had the last time bfqq was selected for service, i.e.,
+ * before last expiration. Thus timestamps need to be back-shifted
+ * with respect to their normal computation (see [1] for more details
+ * on this tricky aspect).
+ *
+ * Secondly, to allow the process to recover the hole, the in-service
+ * queue must be expired too, to give bfqq the chance to preempt it
+ * immediately. In fact, if bfqq has to wait for a full budget of the
+ * in-service queue to be completed, then it may become impossible to
+ * let the process recover the hole, even if the back-shifted
+ * timestamps of bfqq are lower than those of the in-service queue. If
+ * this happens for most or all of the holes, then the process may not
+ * receive its reserved bandwidth. In this respect, it is worth noting
+ * that, being the service of outstanding requests unpreemptible, a
+ * little fraction of the holes may however be unrecoverable, thereby
+ * causing a little loss of bandwidth.
+ *
+ * The last important point is detecting whether bfqq does need this
+ * bandwidth recovery. In this respect, the next function deems the
+ * process associated with bfqq greedy, and thus allows it to recover
+ * the hole, if: 1) the process is waiting for the arrival of a new
+ * request (which implies that bfqq expired for one of the above two
+ * reasons), and 2) such a request has arrived soon. The first
+ * condition is controlled through the flag non_blocking_wait_rq,
+ * while the second through the flag arrived_in_time. If both
+ * conditions hold, then the function computes the budget in the
+ * above-described special way, and signals that the in-service queue
+ * should be expired. Timestamp back-shifting is done later in
+ * __bfq_activate_entity.
+ *
+ * 2. Reduce latency. Even if timestamps are not backshifted to let
+ * the process associated with bfqq recover a service hole, bfqq may
+ * however happen to have, after being (re)activated, a lower finish
+ * timestamp than the in-service queue.  That is, the next budget of
+ * bfqq may have to be completed before the one of the in-service
+ * queue. If this is the case, then preempting the in-service queue
+ * allows this goal to be achieved, apart from the unpreemptible,
+ * outstanding requests mentioned above.
+ *
+ * Unfortunately, regardless of which of the above two goals one wants
+ * to achieve, service trees need first to be updated to know whether
+ * the in-service queue must be preempted. To have service trees
+ * correctly updated, the in-service queue must be expired and
+ * rescheduled, and bfqq must be scheduled too. This is one of the
+ * most costly operations (in future versions, the scheduling
+ * mechanism may be re-designed in such a way to make it possible to
+ * know whether preemption is needed without needing to update service
+ * trees). In addition, queue preemptions almost always cause random
+ * I/O, and thus loss of throughput. Because of these facts, the next
+ * function adopts the following simple scheme to avoid both costly
+ * operations and too frequent preemptions: it requests the expiration
+ * of the in-service queue (unconditionally) only for queues that need
+ * to recover a hole, or that either are weight-raised or deserve to
+ * be weight-raised.
+ */
+static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
+						struct bfq_queue *bfqq,
+						bool arrived_in_time,
+						bool wr_or_deserves_wr)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
+		/*
+		 * We do not clear the flag non_blocking_wait_rq here, as
+		 * the latter is used in bfq_activate_bfqq to signal
+		 * that timestamps need to be back-shifted (and is
+		 * cleared right after).
+		 */
+
+		/*
+		 * In next assignment we rely on that either
+		 * entity->service or entity->budget are not updated
+		 * on expiration if bfqq is empty (see
+		 * __bfq_bfqq_recalc_budget). Thus both quantities
+		 * remain unchanged after such an expiration, and the
+		 * following statement therefore assigns to
+		 * entity->budget the remaining budget on such an
+		 * expiration. For clarity, entity->service is not
+		 * updated on expiration in any case, and, in normal
+		 * operation, is reset only when bfqq is selected for
+		 * service (see bfq_get_next_queue).
+		 */
+		BUG_ON(bfqq->max_budget < 0);
+		entity->budget = min_t(unsigned long,
+				       bfq_bfqq_budget_left(bfqq),
+				       bfqq->max_budget);
+
+		BUG_ON(entity->budget < 0);
+		return true;
+	}
+
+	BUG_ON(bfqq->max_budget < 0);
+	entity->budget = max_t(unsigned long, bfqq->max_budget,
+			       bfq_serv_to_charge(bfqq->next_rq, bfqq));
+	BUG_ON(entity->budget < 0);
+
+	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+	return wr_or_deserves_wr;
+}
+
+/*
+ * Return the farthest future time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_greatest_from_now(void)
+{
+	return jiffies + MAX_JIFFY_OFFSET;
+}
+
+/*
+ * Return the farthest past time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_smallest_from_now(void)
+{
+	return jiffies - MAX_JIFFY_OFFSET;
+}
+
+static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
+					     struct bfq_queue *bfqq,
+					     unsigned int old_wr_coeff,
+					     bool wr_or_deserves_wr,
+					     bool interactive,
+					     bool in_burst,
+					     bool soft_rt)
+{
+	if (old_wr_coeff == 1 && wr_or_deserves_wr) {
+		/* start a weight-raising period */
+		if (interactive) {
+			bfqq->service_from_wr = 0;
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+		} else {
+			/*
+			 * No interactive weight raising in progress
+			 * here: assign minus infinity to
+			 * wr_start_at_switch_to_srt, to make sure
+			 * that, at the end of the soft-real-time
+			 * weight raising periods that is starting
+			 * now, no interactive weight-raising period
+			 * may be wrongly considered as still in
+			 * progress (and thus actually started by
+			 * mistake).
+			 */
+			bfqq->wr_start_at_switch_to_srt =
+				bfq_smallest_from_now();
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+				BFQ_SOFTRT_WEIGHT_FACTOR;
+			bfqq->wr_cur_max_time =
+				bfqd->bfq_wr_rt_max_time;
+		}
+		/*
+		 * If needed, further reduce budget to make sure it is
+		 * close to bfqq's backlog, so as to reduce the
+		 * scheduling-error component due to a too large
+		 * budget. Do not care about throughput consequences,
+		 * but only about latency. Finally, do not assign a
+		 * too small budget either, to avoid increasing
+		 * latency by causing too frequent expirations.
+		 */
+		bfqq->entity.budget = min_t(unsigned long,
+					    bfqq->entity.budget,
+					    2 * bfq_min_budget(bfqd));
+
+		bfq_log_bfqq(bfqd, bfqq,
+			     "wrais starting at %lu, rais_max_time %u",
+			     jiffies,
+			     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	} else if (old_wr_coeff > 1) {
+		if (interactive) { /* update wr coeff and duration */
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+		} else if (in_burst) {
+			bfqq->wr_coeff = 1;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "wrais ending at %lu, rais_max_time %u",
+				     jiffies,
+				     jiffies_to_msecs(bfqq->
+						      wr_cur_max_time));
+		} else if (soft_rt) {
+			/*
+			 * The application is now or still meeting the
+			 * requirements for being deemed soft rt.  We
+			 * can then correctly and safely (re)charge
+			 * the weight-raising duration for the
+			 * application with the weight-raising
+			 * duration for soft rt applications.
+			 *
+			 * In particular, doing this recharge now, i.e.,
+			 * before the weight-raising period for the
+			 * application finishes, reduces the probability
+			 * of the following negative scenario:
+			 * 1) the weight of a soft rt application is
+			 *    raised at startup (as for any newly
+			 *    created application),
+			 * 2) since the application is not interactive,
+			 *    at a certain time weight-raising is
+			 *    stopped for the application,
+			 * 3) at that time the application happens to
+			 *    still have pending requests, and hence
+			 *    is destined to not have a chance to be
+			 *    deemed soft rt before these requests are
+			 *    completed (see the comments to the
+			 *    function bfq_bfqq_softrt_next_start()
+			 *    for details on soft rt detection),
+			 * 4) these pending requests experience a high
+			 *    latency because the application is not
+			 *    weight-raised while they are pending.
+			 */
+			if (bfqq->wr_cur_max_time !=
+				bfqd->bfq_wr_rt_max_time) {
+				bfqq->wr_start_at_switch_to_srt =
+					bfqq->last_wr_start_finish;
+                BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+				bfqq->wr_cur_max_time =
+					bfqd->bfq_wr_rt_max_time;
+				bfqq->wr_coeff = bfqd->bfq_wr_coeff *
+					BFQ_SOFTRT_WEIGHT_FACTOR;
+				bfq_log_bfqq(bfqd, bfqq,
+					     "switching to soft_rt wr");
+			} else
+				bfq_log_bfqq(bfqd, bfqq,
+					"moving forward soft_rt wr duration");
+			bfqq->last_wr_start_finish = jiffies;
+		}
+	}
+}
+
+static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
+					struct bfq_queue *bfqq)
+{
+	return bfqq->dispatched == 0 &&
+		time_is_before_jiffies(
+			bfqq->budget_timeout +
+			bfqd->bfq_wr_min_idle_time);
+}
+
+static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
+					     struct bfq_queue *bfqq,
+					     int old_wr_coeff,
+					     struct request *rq,
+					     bool *interactive)
+{
+	bool soft_rt, in_burst,	wr_or_deserves_wr,
+		bfqq_wants_to_preempt,
+		idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
+		/*
+		 * See the comments on
+		 * bfq_bfqq_update_budg_for_activation for
+		 * details on the usage of the next variable.
+		 */
+		arrived_in_time =  ktime_get_ns() <=
+			RQ_BIC(rq)->ttime.last_end_request +
+			bfqd->bfq_slice_idle * 3;
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "bfq_add_request non-busy: "
+		     "jiffies %lu, in_time %d, idle_long %d busyw %d "
+		     "wr_coeff %u",
+		     jiffies, arrived_in_time,
+		     idle_for_long_time,
+		     bfq_bfqq_non_blocking_wait_rq(bfqq),
+		     old_wr_coeff);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	BUG_ON(bfqq == bfqd->in_service_queue);
+	bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
+
+	/*
+	 * bfqq deserves to be weight-raised if:
+	 * - it is sync,
+	 * - it does not belong to a large burst,
+	 * - it has been idle for enough time or is soft real-time,
+	 * - is linked to a bfq_io_cq (it is not shared in any sense)
+	 */
+	in_burst = bfq_bfqq_in_large_burst(bfqq);
+	soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+		!in_burst &&
+		time_is_before_jiffies(bfqq->soft_rt_next_start);
+	*interactive =
+		!in_burst &&
+		idle_for_long_time;
+	wr_or_deserves_wr = bfqd->low_latency &&
+		(bfqq->wr_coeff > 1 ||
+		 (bfq_bfqq_sync(bfqq) &&
+		  bfqq->bic && (*interactive || soft_rt)));
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "bfq_add_request: "
+		     "in_burst %d, "
+		     "soft_rt %d (next %lu), inter %d, bic %p",
+		     bfq_bfqq_in_large_burst(bfqq), soft_rt,
+		     bfqq->soft_rt_next_start,
+		     *interactive,
+		     bfqq->bic);
+
+	/*
+	 * Using the last flag, update budget and check whether bfqq
+	 * may want to preempt the in-service queue.
+	 */
+	bfqq_wants_to_preempt =
+		bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
+						    arrived_in_time,
+						    wr_or_deserves_wr);
+
+	/*
+	 * If bfqq happened to be activated in a burst, but has been
+	 * idle for much more than an interactive queue, then we
+	 * assume that, in the overall I/O initiated in the burst, the
+	 * I/O associated with bfqq is finished. So bfqq does not need
+	 * to be treated as a queue belonging to a burst
+	 * anymore. Accordingly, we reset bfqq's in_large_burst flag
+	 * if set, and remove bfqq from the burst list if it's
+	 * there. We do not decrement burst_size, because the fact
+	 * that bfqq does not need to belong to the burst list any
+	 * more does not invalidate the fact that bfqq was created in
+	 * a burst.
+	 */
+	if (likely(!bfq_bfqq_just_created(bfqq)) &&
+	    idle_for_long_time &&
+	    time_is_before_jiffies(
+		    bfqq->budget_timeout +
+		    msecs_to_jiffies(10000))) {
+		hlist_del_init(&bfqq->burst_list_node);
+		bfq_clear_bfqq_in_large_burst(bfqq);
+	}
+
+	bfq_clear_bfqq_just_created(bfqq);
+
+	if (!bfq_bfqq_IO_bound(bfqq)) {
+		if (arrived_in_time) {
+			bfqq->requests_within_timer++;
+			if (bfqq->requests_within_timer >=
+			    bfqd->bfq_requests_within_timer)
+				bfq_mark_bfqq_IO_bound(bfqq);
+		} else
+			bfqq->requests_within_timer = 0;
+		bfq_log_bfqq(bfqd, bfqq, "requests in time %d",
+			     bfqq->requests_within_timer);
+	}
+
+	if (bfqd->low_latency) {
+		if (unlikely(time_is_after_jiffies(bfqq->split_time)))
+			/* wraparound */
+			bfqq->split_time =
+				jiffies - bfqd->bfq_wr_min_idle_time - 1;
+
+		if (time_is_before_jiffies(bfqq->split_time +
+					   bfqd->bfq_wr_min_idle_time)) {
+			bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
+							 old_wr_coeff,
+							 wr_or_deserves_wr,
+							 *interactive,
+							 in_burst,
+							 soft_rt);
+
+			if (old_wr_coeff != bfqq->wr_coeff)
+				bfqq->entity.prio_changed = 1;
+		}
+	}
+
+	bfqq->last_idle_bklogged = jiffies;
+	bfqq->service_from_backlogged = 0;
+	bfq_clear_bfqq_softrt_update(bfqq);
+
+	bfq_add_bfqq_busy(bfqd, bfqq);
+
+	/*
+	 * Expire in-service queue only if preemption may be needed
+	 * for guarantees. In this respect, the function
+	 * next_queue_may_preempt just checks a simple, necessary
+	 * condition, and not a sufficient condition based on
+	 * timestamps. In fact, for the latter condition to be
+	 * evaluated, timestamps would need first to be updated, and
+	 * this operation is quite costly (see the comments on the
+	 * function bfq_bfqq_update_budg_for_activation).
+	 */
+	if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
+	    bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff &&
+	    next_queue_may_preempt(bfqd)) {
+		struct bfq_queue *in_serv =
+			bfqd->in_service_queue;
+		BUG_ON(in_serv == bfqq);
+
+		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
+				false, BFQ_BFQQ_PREEMPTED);
+	}
+}
+
+static void bfq_add_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	struct request *next_rq, *prev;
+	unsigned int old_wr_coeff = bfqq->wr_coeff;
+	bool interactive = false;
+
+	bfq_log_bfqq(bfqd, bfqq, "size %u %s",
+		     blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A");
+
+	if (bfqq->wr_coeff > 1) /* queue is being weight-raised */
+		bfq_log_bfqq(bfqd, bfqq,
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
+
+	bfqq->queued[rq_is_sync(rq)]++;
+	bfqd->queued++;
+
+	elv_rb_add(&bfqq->sort_list, rq);
+
+	/*
+	 * Check if this request is a better next-to-serve candidate.
+	 */
+	prev = bfqq->next_rq;
+	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+	BUG_ON(!next_rq);
+	bfqq->next_rq = next_rq;
+
+	/*
+	 * Adjust priority tree position, if next_rq changes.
+	 */
+	if (prev != bfqq->next_rq)
+		bfq_pos_tree_add_move(bfqd, bfqq);
+
+	if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
+		bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
+						 rq, &interactive);
+	else {
+		if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+		    time_is_before_jiffies(
+				bfqq->last_wr_start_finish +
+				bfqd->bfq_wr_min_inter_arr_async)) {
+			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+
+			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+			bfqq->entity.prio_changed = 1;
+			bfq_log_bfqq(bfqd, bfqq,
+				     "non-idle wrais starting, "
+				     "wr_max_time %u wr_busy %d",
+				     jiffies_to_msecs(bfqq->wr_cur_max_time),
+				     bfqd->wr_busy_queues);
+		}
+		if (prev != bfqq->next_rq)
+			bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	/*
+	 * Assign jiffies to last_wr_start_finish in the following
+	 * cases:
+	 *
+	 * . if bfqq is not going to be weight-raised, because, for
+	 *   non weight-raised queues, last_wr_start_finish stores the
+	 *   arrival time of the last request; as of now, this piece
+	 *   of information is used only for deciding whether to
+	 *   weight-raise async queues
+	 *
+	 * . if bfqq is not weight-raised, because, if bfqq is now
+	 *   switching to weight-raised, then last_wr_start_finish
+	 *   stores the time when weight-raising starts
+	 *
+	 * . if bfqq is interactive, because, regardless of whether
+	 *   bfqq is currently weight-raised, the weight-raising
+	 *   period must start or restart (this case is considered
+	 *   separately because it is not detected by the above
+	 *   conditions, if bfqq is already weight-raised)
+	 *
+	 * last_wr_start_finish has to be updated also if bfqq is soft
+	 * real-time, because the weight-raising period is constantly
+	 * restarted on idle-to-busy transitions for these queues, but
+	 * this is already done in bfq_bfqq_handle_idle_busy_switch if
+	 * needed.
+	 */
+	if (bfqd->low_latency &&
+		(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+		bfqq->last_wr_start_finish = jiffies;
+}
+
+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
+					  struct bio *bio)
+{
+	struct task_struct *tsk = current;
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq;
+
+	bic = bfq_bic_lookup(bfqd, tsk->io_context);
+	if (!bic)
+		return NULL;
+
+	bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
+	if (bfqq)
+		return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
+
+	return NULL;
+}
+
+static sector_t get_sdist(sector_t last_pos, struct request *rq)
+{
+	sector_t sdist = 0;
+
+	if (last_pos) {
+		if (last_pos < blk_rq_pos(rq))
+			sdist = blk_rq_pos(rq) - last_pos;
+		else
+			sdist = last_pos - blk_rq_pos(rq);
+	}
+
+	return sdist;
+}
+
+static void bfq_activate_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	bfqd->rq_in_driver++;
+}
+
+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+
+	BUG_ON(bfqd->rq_in_driver == 0);
+	bfqd->rq_in_driver--;
+}
+
+static void bfq_remove_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	const int sync = rq_is_sync(rq);
+
+	BUG_ON(bfqq->entity.service > bfqq->entity.budget &&
+	       bfqq == bfqd->in_service_queue);
+
+	if (bfqq->next_rq == rq) {
+		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+		bfq_updated_next_req(bfqd, bfqq);
+	}
+
+	if (rq->queuelist.prev != &rq->queuelist)
+		list_del_init(&rq->queuelist);
+	BUG_ON(bfqq->queued[sync] == 0);
+	bfqq->queued[sync]--;
+	bfqd->queued--;
+	elv_rb_del(&bfqq->sort_list, rq);
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		bfqq->next_rq = NULL;
+
+		BUG_ON(bfqq->entity.budget < 0);
+
+		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+			BUG_ON(bfqq->ref < 2); /* referred by rq and on tree */
+			bfq_del_bfqq_busy(bfqd, bfqq, false);
+			/*
+			 * bfqq emptied. In normal operation, when
+			 * bfqq is empty, bfqq->entity.service and
+			 * bfqq->entity.budget must contain,
+			 * respectively, the service received and the
+			 * budget used last time bfqq emptied. These
+			 * facts do not hold in this case, as at least
+			 * this last removal occurred while bfqq is
+			 * not in service. To avoid inconsistencies,
+			 * reset both bfqq->entity.service and
+			 * bfqq->entity.budget, if bfqq has still a
+			 * process that may issue I/O requests to it.
+			 */
+			bfqq->entity.budget = bfqq->entity.service = 0;
+		}
+
+		/*
+		 * Remove queue from request-position tree as it is empty.
+		 */
+		if (bfqq->pos_root) {
+			rb_erase(&bfqq->pos_node, bfqq->pos_root);
+			bfqq->pos_root = NULL;
+		}
+	} else {
+		BUG_ON(!bfqq->next_rq);
+		bfq_pos_tree_add_move(bfqd, bfqq);
+	}
+
+	if (rq->cmd_flags & REQ_META) {
+		BUG_ON(bfqq->meta_pending == 0);
+		bfqq->meta_pending--;
+	}
+	bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+}
+
+static enum elv_merge bfq_merge(struct request_queue *q, struct request **req,
+				struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct request *__rq;
+
+	__rq = bfq_find_rq_fmerge(bfqd, bio);
+	if (__rq && elv_bio_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
+	}
+
+	return ELEVATOR_NO_MERGE;
+}
+
+static void bfq_merged_request(struct request_queue *q, struct request *req,
+			       enum elv_merge type)
+{
+	if (type == ELEVATOR_FRONT_MERGE &&
+	    rb_prev(&req->rb_node) &&
+	    blk_rq_pos(req) <
+	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
+				    struct request, rb_node))) {
+		struct bfq_queue *bfqq = RQ_BFQQ(req);
+		struct bfq_data *bfqd = bfqq->bfqd;
+		struct request *prev, *next_rq;
+
+		/* Reposition request in its sort_list */
+		elv_rb_del(&bfqq->sort_list, req);
+		elv_rb_add(&bfqq->sort_list, req);
+		/* Choose next request to be served for bfqq */
+		prev = bfqq->next_rq;
+		next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
+					 bfqd->last_position);
+		BUG_ON(!next_rq);
+		bfqq->next_rq = next_rq;
+		/*
+		 * If next_rq changes, update both the queue's budget to
+		 * fit the new request and the queue's position in its
+		 * rq_pos_tree.
+		 */
+		if (prev != bfqq->next_rq) {
+			bfq_updated_next_req(bfqd, bfqq);
+			bfq_pos_tree_add_move(bfqd, bfqq);
+		}
+	}
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static void bfq_bio_merged(struct request_queue *q, struct request *req,
+			   struct bio *bio)
+{
+	bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_opf);
+}
+#endif
+
+static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+				struct request *next)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
+
+	/*
+	 * If next and rq belong to the same bfq_queue and next is older
+	 * than rq, then reposition rq in the fifo (by substituting next
+	 * with rq). Otherwise, if next and rq belong to different
+	 * bfq_queues, never reposition rq: in fact, we would have to
+	 * reposition it with respect to next's position in its own fifo,
+	 * which would most certainly be too expensive with respect to
+	 * the benefits.
+	 */
+	if (bfqq == next_bfqq &&
+	    !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+	    next->fifo_time < rq->fifo_time) {
+		list_del_init(&rq->queuelist);
+		list_replace_init(&next->queuelist, &rq->queuelist);
+		rq->fifo_time = next->fifo_time;
+	}
+
+	if (bfqq->next_rq == next)
+		bfqq->next_rq = rq;
+
+	bfq_remove_request(next);
+	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+}
+
+/* Must be called with bfqq != NULL */
+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+{
+	BUG_ON(!bfqq);
+
+	if (bfq_bfqq_busy(bfqq)) {
+		bfqq->bfqd->wr_busy_queues--;
+		BUG_ON(bfqq->bfqd->wr_busy_queues < 0);
+	}
+	bfqq->wr_coeff = 1;
+	bfqq->wr_cur_max_time = 0;
+	bfqq->last_wr_start_finish = jiffies;
+	/*
+	 * Trigger a weight change on the next invocation of
+	 * __bfq_entity_update_weight_prio.
+	 */
+	bfqq->entity.prio_changed = 1;
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "wrais ending at %lu, rais_max_time %u",
+		     bfqq->last_wr_start_finish,
+		     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d",
+		     bfqq->bfqd->wr_busy_queues);
+}
+
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			if (bfqg->async_bfqq[i][j])
+				bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
+	if (bfqg->async_idle_bfqq)
+		bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
+}
+
+static void bfq_end_wr(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+
+	spin_lock_irq(bfqd->queue->queue_lock);
+
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
+		bfq_bfqq_end_wr(bfqq);
+	bfq_end_wr_async(bfqd);
+
+	spin_unlock_irq(bfqd->queue->queue_lock);
+}
+
+static sector_t bfq_io_struct_pos(void *io_struct, bool request)
+{
+	if (request)
+		return blk_rq_pos(io_struct);
+	else
+		return ((struct bio *)io_struct)->bi_iter.bi_sector;
+}
+
+static int bfq_rq_close_to_sector(void *io_struct, bool request,
+				  sector_t sector)
+{
+	return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
+	       BFQQ_CLOSE_THR;
+}
+
+static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+					 struct bfq_queue *bfqq,
+					 sector_t sector)
+{
+	struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	struct rb_node *parent, *node;
+	struct bfq_queue *__bfqq;
+
+	if (RB_EMPTY_ROOT(root))
+		return NULL;
+
+	/*
+	 * First, if we find a request starting at the end of the last
+	 * request, choose it.
+	 */
+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
+	if (__bfqq)
+		return __bfqq;
+
+	/*
+	 * If the exact sector wasn't found, the parent of the NULL leaf
+	 * will contain the closest sector (rq_pos_tree sorted by
+	 * next_request position).
+	 */
+	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	if (blk_rq_pos(__bfqq->next_rq) < sector)
+		node = rb_next(&__bfqq->pos_node);
+	else
+		node = rb_prev(&__bfqq->pos_node);
+	if (!node)
+		return NULL;
+
+	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+		return __bfqq;
+
+	return NULL;
+}
+
+static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
+						   struct bfq_queue *cur_bfqq,
+						   sector_t sector)
+{
+	struct bfq_queue *bfqq;
+
+	/*
+	 * We shall notice if some of the queues are cooperating,
+	 * e.g., working closely on the same area of the device. In
+	 * that case, we can group them together and: 1) don't waste
+	 * time idling, and 2) serve the union of their requests in
+	 * the best possible order for throughput.
+	 */
+	bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
+	if (!bfqq || bfqq == cur_bfqq)
+		return NULL;
+
+	return bfqq;
+}
+
+static struct bfq_queue *
+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	int process_refs, new_process_refs;
+	struct bfq_queue *__bfqq;
+
+	/*
+	 * If there are no process references on the new_bfqq, then it is
+	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
+	 * may have dropped their last reference (not just their last process
+	 * reference).
+	 */
+	if (!bfqq_process_refs(new_bfqq))
+		return NULL;
+
+	/* Avoid a circular list and skip interim queue merges. */
+	while ((__bfqq = new_bfqq->new_bfqq)) {
+		if (__bfqq == bfqq)
+			return NULL;
+		new_bfqq = __bfqq;
+	}
+
+	process_refs = bfqq_process_refs(bfqq);
+	new_process_refs = bfqq_process_refs(new_bfqq);
+	/*
+	 * If the process for the bfqq has gone away, there is no
+	 * sense in merging the queues.
+	 */
+	if (process_refs == 0 || new_process_refs == 0)
+		return NULL;
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+		new_bfqq->pid);
+
+	/*
+	 * Merging is just a redirection: the requests of the process
+	 * owning one of the two queues are redirected to the other queue.
+	 * The latter queue, in its turn, is set as shared if this is the
+	 * first time that the requests of some process are redirected to
+	 * it.
+	 *
+	 * We redirect bfqq to new_bfqq and not the opposite, because we
+	 * are in the context of the process owning bfqq, hence we have
+	 * the io_cq of this process. So we can immediately configure this
+	 * io_cq to redirect the requests of the process to new_bfqq.
+	 *
+	 * NOTE, even if new_bfqq coincides with the in-service queue, the
+	 * io_cq of new_bfqq is not available, because, if the in-service
+	 * queue is shared, bfqd->in_service_bic may not point to the
+	 * io_cq of the in-service queue.
+	 * Redirecting the requests of the process owning bfqq to the
+	 * currently in-service queue is in any case the best option, as
+	 * we feed the in-service queue with new requests close to the
+	 * last request served and, by doing so, hopefully increase the
+	 * throughput.
+	 */
+	bfqq->new_bfqq = new_bfqq;
+	new_bfqq->ref += process_refs;
+	return new_bfqq;
+}
+
+static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+					struct bfq_queue *new_bfqq)
+{
+	if (bfq_too_late_for_merging(new_bfqq)) {
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "too late for bfq%d to be merged",
+				new_bfqq->pid);
+		return false;
+	}
+
+	if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
+	    (bfqq->ioprio_class != new_bfqq->ioprio_class))
+		return false;
+
+	/*
+	 * If either of the queues has already been detected as seeky,
+	 * then merging it with the other queue is unlikely to lead to
+	 * sequential I/O.
+	 */
+	if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
+		return false;
+
+	/*
+	 * Interleaved I/O is known to be done by (some) applications
+	 * only for reads, so it does not make sense to merge async
+	 * queues.
+	 */
+	if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
+		return false;
+
+	return true;
+}
+
+/*
+ * Attempt to schedule a merge of bfqq with the currently in-service
+ * queue or with a close queue among the scheduled queues.  Return
+ * NULL if no merge was scheduled, a pointer to the shared bfq_queue
+ * structure otherwise.
+ *
+ * The OOM queue is not allowed to participate to cooperation: in fact, since
+ * the requests temporarily redirected to the OOM queue could be redirected
+ * again to dedicated queues at any time, the state needed to correctly
+ * handle merging with the OOM queue would be quite complex and expensive
+ * to maintain. Besides, in such a critical condition as an out of memory,
+ * the benefits of queue merging may be little relevant, or even negligible.
+ *
+ * WARNING: queue merging may impair fairness among non-weight raised
+ * queues, for at least two reasons: 1) the original weight of a
+ * merged queue may change during the merged state, 2) even being the
+ * weight the same, a merged queue may be bloated with many more
+ * requests than the ones produced by its originally-associated
+ * process.
+ */
+static struct bfq_queue *
+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+		     void *io_struct, bool request)
+{
+	struct bfq_queue *in_service_bfqq, *new_bfqq;
+
+	/*
+	 * Prevent bfqq from being merged if it has been created too
+	 * long ago. The idea is that true cooperating processes, and
+	 * thus their associated bfq_queues, are supposed to be
+	 * created shortly after each other. This is the case, e.g.,
+	 * for KVM/QEMU and dump I/O threads. Basing on this
+	 * assumption, the following filtering greatly reduces the
+	 * probability that two non-cooperating processes, which just
+	 * happen to do close I/O for some short time interval, have
+	 * their queues merged by mistake.
+	 */
+	if (bfq_too_late_for_merging(bfqq)) {
+		bfq_log_bfqq(bfqd, bfqq,
+			     "would have looked for coop, but too late");
+		return NULL;
+	}
+
+	if (bfqq->new_bfqq)
+		return bfqq->new_bfqq;
+
+	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
+		return NULL;
+
+	/* If there is only one backlogged queue, don't search. */
+	if (bfqd->busy_queues == 1)
+		return NULL;
+
+	in_service_bfqq = bfqd->in_service_queue;
+
+	if (in_service_bfqq && in_service_bfqq != bfqq &&
+	    likely(in_service_bfqq != &bfqd->oom_bfqq) &&
+	    bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
+	    bfqq->entity.parent == in_service_bfqq->entity.parent &&
+	    bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
+		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
+		if (new_bfqq)
+			return new_bfqq;
+	}
+	/*
+	 * Check whether there is a cooperator among currently scheduled
+	 * queues. The only thing we need is that the bio/request is not
+	 * NULL, as we need it to establish whether a cooperator exists.
+	 */
+	new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
+			bfq_io_struct_pos(io_struct, request));
+
+	BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
+
+	if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
+	    bfq_may_be_close_cooperator(bfqq, new_bfqq))
+		return bfq_setup_merge(bfqq, new_bfqq);
+
+	return NULL;
+}
+
+static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+{
+	struct bfq_io_cq *bic = bfqq->bic;
+
+	/*
+	 * If !bfqq->bic, the queue is already shared or its requests
+	 * have already been redirected to a shared queue; both idle window
+	 * and weight raising state have already been saved. Do nothing.
+	 */
+	if (!bic)
+		return;
+
+	bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
+	bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+	bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+	bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+	if (unlikely(bfq_bfqq_just_created(bfqq) &&
+		     !bfq_bfqq_in_large_burst(bfqq) &&
+		     bfqq->bfqd->low_latency)) {
+		/*
+		 * bfqq being merged ritgh after being created: bfqq
+		 * would have deserved interactive weight raising, but
+		 * did not make it to be set in a weight-raised state,
+		 * because of this early merge.	Store directly the
+		 * weight-raising state that would have been assigned
+		 * to bfqq, so that to avoid that bfqq unjustly fails
+		 * to enjoy weight raising if split soon.
+		 */
+		bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+		bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd);
+		bic->saved_last_wr_start_finish = jiffies;
+	} else {
+		bic->saved_wr_coeff = bfqq->wr_coeff;
+		bic->saved_wr_start_at_switch_to_srt =
+			bfqq->wr_start_at_switch_to_srt;
+		bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
+		bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+	}
+	BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish));
+}
+
+static void bfq_get_bic_reference(struct bfq_queue *bfqq)
+{
+	/*
+	 * If bfqq->bic has a non-NULL value, the bic to which it belongs
+	 * is about to begin using a shared bfq_queue.
+	 */
+	if (bfqq->bic)
+		atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
+}
+
+static void
+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+{
+	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
+		     (unsigned long) new_bfqq->pid);
+	/* Save weight raising and idle window of the merged queues */
+	bfq_bfqq_save_state(bfqq);
+	bfq_bfqq_save_state(new_bfqq);
+	if (bfq_bfqq_IO_bound(bfqq))
+		bfq_mark_bfqq_IO_bound(new_bfqq);
+	bfq_clear_bfqq_IO_bound(bfqq);
+
+	/*
+	 * If bfqq is weight-raised, then let new_bfqq inherit
+	 * weight-raising. To reduce false positives, neglect the case
+	 * where bfqq has just been created, but has not yet made it
+	 * to be weight-raised (which may happen because EQM may merge
+	 * bfqq even before bfq_add_request is executed for the first
+	 * time for bfqq). Handling this case would however be very
+	 * easy, thanks to the flag just_created.
+	 */
+	if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
+		new_bfqq->wr_coeff = bfqq->wr_coeff;
+		new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
+		new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
+		new_bfqq->wr_start_at_switch_to_srt =
+			bfqq->wr_start_at_switch_to_srt;
+		if (bfq_bfqq_busy(new_bfqq)) {
+			bfqd->wr_busy_queues++;
+			BUG_ON(bfqd->wr_busy_queues > bfqd->busy_queues);
+		}
+
+		new_bfqq->entity.prio_changed = 1;
+		bfq_log_bfqq(bfqd, new_bfqq,
+			     "wr start after merge with %d, rais_max_time %u",
+			     bfqq->pid,
+			     jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
+		bfqq->wr_coeff = 1;
+		bfqq->entity.prio_changed = 1;
+		if (bfq_bfqq_busy(bfqq)) {
+			bfqd->wr_busy_queues--;
+			BUG_ON(bfqd->wr_busy_queues < 0);
+		}
+
+	}
+
+	bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d",
+		     bfqd->wr_busy_queues);
+
+	/*
+	 * Grab a reference to the bic, to prevent it from being destroyed
+	 * before being possibly touched by a bfq_split_bfqq().
+	 */
+	bfq_get_bic_reference(bfqq);
+	bfq_get_bic_reference(new_bfqq);
+	/*
+	 * Merge queues (that is, let bic redirect its requests to new_bfqq)
+	 */
+	bic_set_bfqq(bic, new_bfqq, 1);
+	bfq_mark_bfqq_coop(new_bfqq);
+	/*
+	 * new_bfqq now belongs to at least two bics (it is a shared queue):
+	 * set new_bfqq->bic to NULL. bfqq either:
+	 * - does not belong to any bic any more, and hence bfqq->bic must
+	 *   be set to NULL, or
+	 * - is a queue whose owning bics have already been redirected to a
+	 *   different queue, hence the queue is destined to not belong to
+	 *   any bic soon and bfqq->bic is already NULL (therefore the next
+	 *   assignment causes no harm).
+	 */
+	new_bfqq->bic = NULL;
+	bfqq->bic = NULL;
+	/* release process reference to bfqq */
+	bfq_put_queue(bfqq);
+}
+
+static int bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+			       struct bio *bio)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	bool is_sync = op_is_sync(bio->bi_opf);
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq, *new_bfqq;
+
+	/*
+	 * Disallow merge of a sync bio into an async request.
+	 */
+	if (is_sync && !rq_is_sync(rq))
+		return false;
+
+	/*
+	 * Lookup the bfqq that this bio will be queued with. Allow
+	 * merge only if rq is queued there.
+	 * Queue lock is held here.
+	 */
+	bic = bfq_bic_lookup(bfqd, current->io_context);
+	if (!bic)
+		return false;
+
+	bfqq = bic_to_bfqq(bic, is_sync);
+	/*
+	 * We take advantage of this function to perform an early merge
+	 * of the queues of possible cooperating processes.
+	 */
+	if (bfqq) {
+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
+		if (new_bfqq) {
+			bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
+			/*
+			 * If we get here, the bio will be queued in the
+			 * shared queue, i.e., new_bfqq, so use new_bfqq
+			 * to decide whether bio and rq can be merged.
+			 */
+			bfqq = new_bfqq;
+		}
+	}
+
+	return bfqq == RQ_BFQQ(rq);
+}
+
+static int bfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+			      struct request *next)
+{
+	return RQ_BFQQ(rq) == RQ_BFQQ(next);
+}
+
+/*
+ * Set the maximum time for the in-service queue to consume its
+ * budget. This prevents seeky processes from lowering the throughput.
+ * In practice, a time-slice service scheme is used with seeky
+ * processes.
+ */
+static void bfq_set_budget_timeout(struct bfq_data *bfqd,
+				   struct bfq_queue *bfqq)
+{
+	unsigned int timeout_coeff;
+
+	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+		timeout_coeff = 1;
+	else
+		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+
+	bfqd->last_budget_start = ktime_get();
+
+	bfqq->budget_timeout = jiffies +
+		bfqd->bfq_timeout * timeout_coeff;
+
+	bfq_log_bfqq(bfqd, bfqq, "%u",
+		jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff));
+}
+
+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+				       struct bfq_queue *bfqq)
+{
+	if (bfqq) {
+		bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
+		bfq_mark_bfqq_must_alloc(bfqq);
+		bfq_clear_bfqq_fifo_expire(bfqq);
+
+		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+
+		BUG_ON(bfqq == bfqd->in_service_queue);
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+
+		if (time_is_before_jiffies(bfqq->last_wr_start_finish) &&
+		    bfqq->wr_coeff > 1 &&
+		    bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		    time_is_before_jiffies(bfqq->budget_timeout)) {
+			/*
+			 * For soft real-time queues, move the start
+			 * of the weight-raising period forward by the
+			 * time the queue has not received any
+			 * service. Otherwise, a relatively long
+			 * service delay is likely to cause the
+			 * weight-raising period of the queue to end,
+			 * because of the short duration of the
+			 * weight-raising period of a soft real-time
+			 * queue.  It is worth noting that this move
+			 * is not so dangerous for the other queues,
+			 * because soft real-time queues are not
+			 * greedy.
+			 *
+			 * To not add a further variable, we use the
+			 * overloaded field budget_timeout to
+			 * determine for how long the queue has not
+			 * received service, i.e., how much time has
+			 * elapsed since the queue expired. However,
+			 * this is a little imprecise, because
+			 * budget_timeout is set to jiffies if bfqq
+			 * not only expires, but also remains with no
+			 * request.
+			 */
+			if (time_after(bfqq->budget_timeout,
+				       bfqq->last_wr_start_finish))
+				bfqq->last_wr_start_finish +=
+					jiffies - bfqq->budget_timeout;
+			else
+				bfqq->last_wr_start_finish = jiffies;
+
+			if (time_is_after_jiffies(bfqq->last_wr_start_finish)) {
+			       pr_crit(
+			       "BFQ WARNING:last %lu budget %lu jiffies %lu",
+			       bfqq->last_wr_start_finish,
+			       bfqq->budget_timeout,
+			       jiffies);
+			       pr_crit("diff %lu", jiffies -
+				       max_t(unsigned long,
+					     bfqq->last_wr_start_finish,
+					     bfqq->budget_timeout));
+			       bfqq->last_wr_start_finish = jiffies;
+			}
+		}
+
+		bfq_set_budget_timeout(bfqd, bfqq);
+		bfq_log_bfqq(bfqd, bfqq,
+			     "cur-budget = %d",
+			     bfqq->entity.budget);
+	} else
+		bfq_log(bfqd, "NULL");
+
+	bfqd->in_service_queue = bfqq;
+}
+
+/*
+ * Get and set a new queue for service.
+ */
+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
+
+	__bfq_set_in_service_queue(bfqd, bfqq);
+	return bfqq;
+}
+
+static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq = bfqd->in_service_queue;
+	struct bfq_io_cq *bic;
+	u32 sl;
+
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+
+	/* Processes have exited, don't wait. */
+	bic = bfqd->in_service_bic;
+	if (!bic || atomic_read(&bic->icq.ioc->active_ref) == 0)
+		return;
+
+	bfq_mark_bfqq_wait_request(bfqq);
+
+	/*
+	 * We don't want to idle for seeks, but we do want to allow
+	 * fair distribution of slice time for a process doing back-to-back
+	 * seeks. So allow a little bit of time for him to submit a new rq.
+	 *
+	 * To prevent processes with (partly) seeky workloads from
+	 * being too ill-treated, grant them a small fraction of the
+	 * assigned budget before reducing the waiting time to
+	 * BFQ_MIN_TT. This happened to help reduce latency.
+	 */
+	sl = bfqd->bfq_slice_idle;
+	/*
+	 * Unless the queue is being weight-raised or the scenario is
+	 * asymmetric, grant only minimum idle time if the queue
+	 * is seeky. A long idling is preserved for a weight-raised
+	 * queue, or, more in general, in an asymemtric scenario,
+	 * because a long idling is needed for guaranteeing to a queue
+	 * its reserved share of the throughput (in particular, it is
+	 * needed if the queue has a higher weight than some other
+	 * queue).
+	 */
+	if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
+	    bfq_symmetric_scenario(bfqd))
+		sl = min_t(u32, sl, BFQ_MIN_TT);
+
+	bfqd->last_idling_start = ktime_get();
+	hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
+		      HRTIMER_MODE_REL);
+	bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+	bfq_log(bfqd, "arm idle: %ld/%ld ms",
+		sl / NSEC_PER_MSEC, bfqd->bfq_slice_idle / NSEC_PER_MSEC);
+}
+
+/*
+ * In autotuning mode, max_budget is dynamically recomputed as the
+ * amount of sectors transferred in timeout at the estimated peak
+ * rate. This enables BFQ to utilize a full timeslice with a full
+ * budget, even if the in-service queue is served at peak rate. And
+ * this maximises throughput with sequential workloads.
+ */
+static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
+{
+	return (u64)bfqd->peak_rate * USEC_PER_MSEC *
+		jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT;
+}
+
+/*
+ * Update parameters related to throughput and responsiveness, as a
+ * function of the estimated peak rate. See comments on
+ * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ */
+static void update_thr_responsiveness_params(struct bfq_data *bfqd)
+{
+	int dev_type = blk_queue_nonrot(bfqd->queue);
+
+	if (bfqd->bfq_user_max_budget == 0) {
+		bfqd->bfq_max_budget =
+			bfq_calc_max_budget(bfqd);
+		BUG_ON(bfqd->bfq_max_budget < 0);
+		bfq_log(bfqd, "new max_budget = %d",
+			bfqd->bfq_max_budget);
+	}
+
+	if (bfqd->device_speed == BFQ_BFQD_FAST &&
+	    bfqd->peak_rate < device_speed_thresh[dev_type]) {
+		bfqd->device_speed = BFQ_BFQD_SLOW;
+		bfqd->RT_prod = R_slow[dev_type] *
+			T_slow[dev_type];
+	} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
+		   bfqd->peak_rate > device_speed_thresh[dev_type]) {
+		bfqd->device_speed = BFQ_BFQD_FAST;
+		bfqd->RT_prod = R_fast[dev_type] *
+			T_fast[dev_type];
+	}
+
+	bfq_log(bfqd,
+"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
+		dev_type == 0 ? "ROT" : "NONROT",
+		bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
+		bfqd->device_speed == BFQ_BFQD_FAST ?
+		(USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
+		(USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
+		(USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
+		BFQ_RATE_SHIFT);
+}
+
+static void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq)
+{
+	if (rq != NULL) { /* new rq dispatch now, reset accordingly */
+		bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ;
+		bfqd->peak_rate_samples = 1;
+		bfqd->sequential_samples = 0;
+		bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
+			blk_rq_sectors(rq);
+	} else /* no new rq dispatched, just reset the number of samples */
+		bfqd->peak_rate_samples = 0; /* full re-init on next disp. */
+
+	bfq_log(bfqd,
+		"at end, sample %u/%u tot_sects %llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		bfqd->tot_sectors_dispatched);
+}
+
+static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
+{
+	u32 rate, weight, divisor;
+
+	/*
+	 * For the convergence property to hold (see comments on
+	 * bfq_update_peak_rate()) and for the assessment to be
+	 * reliable, a minimum number of samples must be present, and
+	 * a minimum amount of time must have elapsed. If not so, do
+	 * not compute new rate. Just reset parameters, to get ready
+	 * for a new evaluation attempt.
+	 */
+	if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES ||
+	    bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) {
+		bfq_log(bfqd,
+	"only resetting, delta_first %lluus samples %d",
+			bfqd->delta_from_first>>10, bfqd->peak_rate_samples);
+		goto reset_computation;
+	}
+
+	/*
+	 * If a new request completion has occurred after last
+	 * dispatch, then, to approximate the rate at which requests
+	 * have been served by the device, it is more precise to
+	 * extend the observation interval to the last completion.
+	 */
+	bfqd->delta_from_first =
+		max_t(u64, bfqd->delta_from_first,
+		      bfqd->last_completion - bfqd->first_dispatch);
+
+	BUG_ON(bfqd->delta_from_first == 0);
+	/*
+	 * Rate computed in sects/usec, and not sects/nsec, for
+	 * precision issues.
+	 */
+	rate = div64_ul(bfqd->tot_sectors_dispatched<<BFQ_RATE_SHIFT,
+			div_u64(bfqd->delta_from_first, NSEC_PER_USEC));
+
+	bfq_log(bfqd,
+"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)",
+		bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		rate > 20<<BFQ_RATE_SHIFT);
+
+	/*
+	 * Peak rate not updated if:
+	 * - the percentage of sequential dispatches is below 3/4 of the
+	 *   total, and rate is below the current estimated peak rate
+	 * - rate is unreasonably high (> 20M sectors/sec)
+	 */
+	if ((bfqd->sequential_samples < (3 * bfqd->peak_rate_samples)>>2 &&
+	     rate <= bfqd->peak_rate) ||
+		rate > 20<<BFQ_RATE_SHIFT) {
+		bfq_log(bfqd,
+		"goto reset, samples %u/%u rate/peak %llu/%llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+		goto reset_computation;
+	} else {
+		bfq_log(bfqd,
+		"do update, samples %u/%u rate/peak %llu/%llu",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT),
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+	}
+
+	/*
+	 * We have to update the peak rate, at last! To this purpose,
+	 * we use a low-pass filter. We compute the smoothing constant
+	 * of the filter as a function of the 'weight' of the new
+	 * measured rate.
+	 *
+	 * As can be seen in next formulas, we define this weight as a
+	 * quantity proportional to how sequential the workload is,
+	 * and to how long the observation time interval is.
+	 *
+	 * The weight runs from 0 to 8. The maximum value of the
+	 * weight, 8, yields the minimum value for the smoothing
+	 * constant. At this minimum value for the smoothing constant,
+	 * the measured rate contributes for half of the next value of
+	 * the estimated peak rate.
+	 *
+	 * So, the first step is to compute the weight as a function
+	 * of how sequential the workload is. Note that the weight
+	 * cannot reach 9, because bfqd->sequential_samples cannot
+	 * become equal to bfqd->peak_rate_samples, which, in its
+	 * turn, holds true because bfqd->sequential_samples is not
+	 * incremented for the first sample.
+	 */
+	weight = (9 * bfqd->sequential_samples) / bfqd->peak_rate_samples;
+
+	/*
+	 * Second step: further refine the weight as a function of the
+	 * duration of the observation interval.
+	 */
+	weight = min_t(u32, 8,
+		       div_u64(weight * bfqd->delta_from_first,
+			       BFQ_RATE_REF_INTERVAL));
+
+	/*
+	 * Divisor ranging from 10, for minimum weight, to 2, for
+	 * maximum weight.
+	 */
+	divisor = 10 - weight;
+	BUG_ON(divisor == 0);
+
+	/*
+	 * Finally, update peak rate:
+	 *
+	 * peak_rate = peak_rate * (divisor-1) / divisor  +  rate / divisor
+	 */
+	bfqd->peak_rate *= divisor-1;
+	bfqd->peak_rate /= divisor;
+	rate /= divisor; /* smoothing constant alpha = 1/divisor */
+
+	bfq_log(bfqd,
+		"divisor %d tmp_peak_rate %llu tmp_rate %u",
+		divisor,
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT),
+		(u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT));
+
+	BUG_ON(bfqd->peak_rate == 0);
+	BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT);
+
+	bfqd->peak_rate += rate;
+
+	/*
+	 * For a very slow device, bfqd->peak_rate can reach 0 (see
+	 * the minimum representable values reported in the comments
+	 * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
+	 * divisions by zero where bfqd->peak_rate is used as a
+	 * divisor.
+	 */
+	bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
+
+	update_thr_responsiveness_params(bfqd);
+	BUG_ON(bfqd->peak_rate > 20<<BFQ_RATE_SHIFT);
+
+reset_computation:
+	bfq_reset_rate_computation(bfqd, rq);
+}
+
+/*
+ * Update the read/write peak rate (the main quantity used for
+ * auto-tuning, see update_thr_responsiveness_params()).
+ *
+ * It is not trivial to estimate the peak rate (correctly): because of
+ * the presence of sw and hw queues between the scheduler and the
+ * device components that finally serve I/O requests, it is hard to
+ * say exactly when a given dispatched request is served inside the
+ * device, and for how long. As a consequence, it is hard to know
+ * precisely at what rate a given set of requests is actually served
+ * by the device.
+ *
+ * On the opposite end, the dispatch time of any request is trivially
+ * available, and, from this piece of information, the "dispatch rate"
+ * of requests can be immediately computed. So, the idea in the next
+ * function is to use what is known, namely request dispatch times
+ * (plus, when useful, request completion times), to estimate what is
+ * unknown, namely in-device request service rate.
+ *
+ * The main issue is that, because of the above facts, the rate at
+ * which a certain set of requests is dispatched over a certain time
+ * interval can vary greatly with respect to the rate at which the
+ * same requests are then served. But, since the size of any
+ * intermediate queue is limited, and the service scheme is lossless
+ * (no request is silently dropped), the following obvious convergence
+ * property holds: the number of requests dispatched MUST become
+ * closer and closer to the number of requests completed as the
+ * observation interval grows. This is the key property used in
+ * the next function to estimate the peak service rate as a function
+ * of the observed dispatch rate. The function assumes to be invoked
+ * on every request dispatch.
+ */
+static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
+{
+	u64 now_ns = ktime_get_ns();
+
+	if (bfqd->peak_rate_samples == 0) { /* first dispatch */
+		bfq_log(bfqd,
+		"goto reset, samples %d",
+				bfqd->peak_rate_samples) ;
+		bfq_reset_rate_computation(bfqd, rq);
+		goto update_last_values; /* will add one sample */
+	}
+
+	/*
+	 * Device idle for very long: the observation interval lasting
+	 * up to this dispatch cannot be a valid observation interval
+	 * for computing a new peak rate (similarly to the late-
+	 * completion event in bfq_completed_request()). Go to
+	 * update_rate_and_reset to have the following three steps
+	 * taken:
+	 * - close the observation interval at the last (previous)
+	 *   request dispatch or completion
+	 * - compute rate, if possible, for that observation interval
+	 * - start a new observation interval with this dispatch
+	 */
+	if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC &&
+	    bfqd->rq_in_driver == 0) {
+		bfq_log(bfqd,
+"jumping to updating&resetting delta_last %lluus samples %d",
+			(now_ns - bfqd->last_dispatch)>>10,
+			bfqd->peak_rate_samples) ;
+		goto update_rate_and_reset;
+	}
+
+	/* Update sampling information */
+	bfqd->peak_rate_samples++;
+
+	if ((bfqd->rq_in_driver > 0 ||
+		now_ns - bfqd->last_completion < BFQ_MIN_TT)
+	     && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR)
+		bfqd->sequential_samples++;
+
+	bfqd->tot_sectors_dispatched += blk_rq_sectors(rq);
+
+	/* Reset max observed rq size every 32 dispatches */
+	if (likely(bfqd->peak_rate_samples % 32))
+		bfqd->last_rq_max_size =
+			max_t(u32, blk_rq_sectors(rq), bfqd->last_rq_max_size);
+	else
+		bfqd->last_rq_max_size = blk_rq_sectors(rq);
+
+	bfqd->delta_from_first = now_ns - bfqd->first_dispatch;
+
+	bfq_log(bfqd,
+	"added samples %u/%u tot_sects %llu delta_first %lluus",
+		bfqd->peak_rate_samples, bfqd->sequential_samples,
+		bfqd->tot_sectors_dispatched,
+		bfqd->delta_from_first>>10);
+
+	/* Target observation interval not yet reached, go on sampling */
+	if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL)
+		goto update_last_values;
+
+update_rate_and_reset:
+	bfq_update_rate_reset(bfqd, rq);
+update_last_values:
+	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
+	bfqd->last_dispatch = now_ns;
+
+	bfq_log(bfqd,
+	"delta_first %lluus last_pos %llu peak_rate %llu",
+		(now_ns - bfqd->first_dispatch)>>10,
+		(unsigned long long) bfqd->last_position,
+		((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT));
+	bfq_log(bfqd,
+	"samples at end %d", bfqd->peak_rate_samples);
+}
+
+/*
+ * Move request from internal lists to the dispatch list of the request queue
+ */
+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+	/*
+	 * For consistency, the next instruction should have been executed
+	 * after removing the request from the queue and dispatching it.
+	 * We execute instead this instruction before bfq_remove_request()
+	 * (and hence introduce a temporary inconsistency), for efficiency.
+	 * In fact, in a forced_dispatch, this prevents two counters related
+	 * to bfqq->dispatched to risk to be uselessly decremented if bfqq
+	 * is not in service, and then to be incremented again after
+	 * incrementing bfqq->dispatched.
+	 */
+	bfqq->dispatched++;
+	bfq_update_peak_rate(q->elevator->elevator_data, rq);
+
+	bfq_remove_request(rq);
+	elv_dispatch_sort(q, rq);
+}
+
+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	/*
+	 * If this bfqq is shared between multiple processes, check
+	 * to make sure that those processes are still issuing I/Os
+	 * within the mean seek distance. If not, it may be time to
+	 * break the queues apart again.
+	 */
+	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
+		bfq_mark_bfqq_split_coop(bfqq);
+
+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		if (bfqq->dispatched == 0)
+			/*
+			 * Overloading budget_timeout field to store
+			 * the time at which the queue remains with no
+			 * backlog and no outstanding request; used by
+			 * the weight-raising mechanism.
+			 */
+			bfqq->budget_timeout = jiffies;
+
+		bfq_del_bfqq_busy(bfqd, bfqq, true);
+	} else {
+		bfq_requeue_bfqq(bfqd, bfqq, true);
+		/*
+		 * Resort priority tree of potential close cooperators.
+		 */
+		bfq_pos_tree_add_move(bfqd, bfqq);
+	}
+
+	/*
+	 * All in-service entities must have been properly deactivated
+	 * or requeued before executing the next function, which
+	 * resets all in-service entites as no more in service.
+	 */
+	__bfq_bfqd_reset_in_service(bfqd);
+}
+
+/**
+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
+ * @bfqd: device data.
+ * @bfqq: queue to update.
+ * @reason: reason for expiration.
+ *
+ * Handle the feedback on @bfqq budget at queue expiration.
+ * See the body for detailed comments.
+ */
+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+				     struct bfq_queue *bfqq,
+				     enum bfqq_expiration reason)
+{
+	struct request *next_rq;
+	int budget, min_budget;
+
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	min_budget = bfq_min_budget(bfqd);
+
+	if (bfqq->wr_coeff == 1)
+		budget = bfqq->max_budget;
+	else /*
+	      * Use a constant, low budget for weight-raised queues,
+	      * to help achieve a low latency. Keep it slightly higher
+	      * than the minimum possible budget, to cause a little
+	      * bit fewer expirations.
+	      */
+		budget = 2 * min_budget;
+
+	bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d",
+		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+	bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d",
+		budget, bfq_min_budget(bfqd));
+	bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d",
+		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+
+	if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
+		switch (reason) {
+		/*
+		 * Caveat: in all the following cases we trade latency
+		 * for throughput.
+		 */
+		case BFQ_BFQQ_TOO_IDLE:
+			/*
+			 * This is the only case where we may reduce
+			 * the budget: if there is no request of the
+			 * process still waiting for completion, then
+			 * we assume (tentatively) that the timer has
+			 * expired because the batch of requests of
+			 * the process could have been served with a
+			 * smaller budget.  Hence, betting that
+			 * process will behave in the same way when it
+			 * becomes backlogged again, we reduce its
+			 * next budget.  As long as we guess right,
+			 * this budget cut reduces the latency
+			 * experienced by the process.
+			 *
+			 * However, if there are still outstanding
+			 * requests, then the process may have not yet
+			 * issued its next request just because it is
+			 * still waiting for the completion of some of
+			 * the still outstanding ones.  So in this
+			 * subcase we do not reduce its budget, on the
+			 * contrary we increase it to possibly boost
+			 * the throughput, as discussed in the
+			 * comments to the BUDGET_TIMEOUT case.
+			 */
+			if (bfqq->dispatched > 0) /* still outstanding reqs */
+				budget = min(budget * 2, bfqd->bfq_max_budget);
+			else {
+				if (budget > 5 * min_budget)
+					budget -= 4 * min_budget;
+				else
+					budget = min_budget;
+			}
+			break;
+		case BFQ_BFQQ_BUDGET_TIMEOUT:
+			/*
+			 * We double the budget here because it gives
+			 * the chance to boost the throughput if this
+			 * is not a seeky process (and has bumped into
+			 * this timeout because of, e.g., ZBR).
+			 */
+			budget = min(budget * 2, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_BUDGET_EXHAUSTED:
+			/*
+			 * The process still has backlog, and did not
+			 * let either the budget timeout or the disk
+			 * idling timeout expire. Hence it is not
+			 * seeky, has a short thinktime and may be
+			 * happy with a higher budget too. So
+			 * definitely increase the budget of this good
+			 * candidate to boost the disk throughput.
+			 */
+			budget = min(budget * 4, bfqd->bfq_max_budget);
+			break;
+		case BFQ_BFQQ_NO_MORE_REQUESTS:
+			/*
+			 * For queues that expire for this reason, it
+			 * is particularly important to keep the
+			 * budget close to the actual service they
+			 * need. Doing so reduces the timestamp
+			 * misalignment problem described in the
+			 * comments in the body of
+			 * __bfq_activate_entity. In fact, suppose
+			 * that a queue systematically expires for
+			 * BFQ_BFQQ_NO_MORE_REQUESTS and presents a
+			 * new request in time to enjoy timestamp
+			 * back-shifting. The larger the budget of the
+			 * queue is with respect to the service the
+			 * queue actually requests in each service
+			 * slot, the more times the queue can be
+			 * reactivated with the same virtual finish
+			 * time. It follows that, even if this finish
+			 * time is pushed to the system virtual time
+			 * to reduce the consequent timestamp
+			 * misalignment, the queue unjustly enjoys for
+			 * many re-activations a lower finish time
+			 * than all newly activated queues.
+			 *
+			 * The service needed by bfqq is measured
+			 * quite precisely by bfqq->entity.service.
+			 * Since bfqq does not enjoy device idling,
+			 * bfqq->entity.service is equal to the number
+			 * of sectors that the process associated with
+			 * bfqq requested to read/write before waiting
+			 * for request completions, or blocking for
+			 * other reasons.
+			 */
+			budget = max_t(int, bfqq->entity.service, min_budget);
+			break;
+		default:
+			return;
+		}
+	} else if (!bfq_bfqq_sync(bfqq))
+		/*
+		 * Async queues get always the maximum possible
+		 * budget, as for them we do not care about latency
+		 * (in addition, their ability to dispatch is limited
+		 * by the charging factor).
+		 */
+		budget = bfqd->bfq_max_budget;
+
+	bfqq->max_budget = budget;
+
+	if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
+	    !bfqd->bfq_user_max_budget)
+		bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
+
+	/*
+	 * If there is still backlog, then assign a new budget, making
+	 * sure that it is large enough for the next request.  Since
+	 * the finish time of bfqq must be kept in sync with the
+	 * budget, be sure to call __bfq_bfqq_expire() *after* this
+	 * update.
+	 *
+	 * If there is no backlog, then no need to update the budget;
+	 * it will be updated on the arrival of a new request.
+	 */
+	next_rq = bfqq->next_rq;
+	if (next_rq) {
+		BUG_ON(reason == BFQ_BFQQ_TOO_IDLE ||
+		       reason == BFQ_BFQQ_NO_MORE_REQUESTS);
+		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+					    bfq_serv_to_charge(next_rq, bfqq));
+		BUG_ON(!bfq_bfqq_busy(bfqq));
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+			next_rq ? blk_rq_sectors(next_rq) : 0,
+			bfqq->entity.budget);
+}
+
+/*
+ * Return true if the process associated with bfqq is "slow". The slow
+ * flag is used, in addition to the budget timeout, to reduce the
+ * amount of service provided to seeky processes, and thus reduce
+ * their chances to lower the throughput. More details in the comments
+ * on the function bfq_bfqq_expire().
+ *
+ * An important observation is in order: as discussed in the comments
+ * on the function bfq_update_peak_rate(), with devices with internal
+ * queues, it is hard if ever possible to know when and for how long
+ * an I/O request is processed by the device (apart from the trivial
+ * I/O pattern where a new request is dispatched only after the
+ * previous one has been completed). This makes it hard to evaluate
+ * the real rate at which the I/O requests of each bfq_queue are
+ * served.  In fact, for an I/O scheduler like BFQ, serving a
+ * bfq_queue means just dispatching its requests during its service
+ * slot (i.e., until the budget of the queue is exhausted, or the
+ * queue remains idle, or, finally, a timeout fires). But, during the
+ * service slot of a bfq_queue, around 100 ms at most, the device may
+ * be even still processing requests of bfq_queues served in previous
+ * service slots. On the opposite end, the requests of the in-service
+ * bfq_queue may be completed after the service slot of the queue
+ * finishes.
+ *
+ * Anyway, unless more sophisticated solutions are used
+ * (where possible), the sum of the sizes of the requests dispatched
+ * during the service slot of a bfq_queue is probably the only
+ * approximation available for the service received by the bfq_queue
+ * during its service slot. And this sum is the quantity used in this
+ * function to evaluate the I/O speed of a process.
+ */
+static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+				 bool compensate, enum bfqq_expiration reason,
+				 unsigned long *delta_ms)
+{
+	ktime_t delta_ktime;
+	u32 delta_usecs;
+	bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */
+
+	if (!bfq_bfqq_sync(bfqq))
+		return false;
+
+	if (compensate)
+		delta_ktime = bfqd->last_idling_start;
+	else
+		delta_ktime = ktime_get();
+	delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
+	delta_usecs = ktime_to_us(delta_ktime);
+
+	/* don't use too short time intervals */
+	if (delta_usecs < 1000) {
+		if (blk_queue_nonrot(bfqd->queue))
+			 /*
+			  * give same worst-case guarantees as idling
+			  * for seeky
+			  */
+			*delta_ms = BFQ_MIN_TT / NSEC_PER_MSEC;
+		else /* charge at least one seek */
+			*delta_ms = bfq_slice_idle / NSEC_PER_MSEC;
+
+		bfq_log(bfqd, "too short %u", delta_usecs);
+
+		return slow;
+	}
+
+	*delta_ms = delta_usecs / USEC_PER_MSEC;
+
+	/*
+	 * Use only long (> 20ms) intervals to filter out excessive
+	 * spikes in service rate estimation.
+	 */
+	if (delta_usecs > 20000) {
+		/*
+		 * Caveat for rotational devices: processes doing I/O
+		 * in the slower disk zones tend to be slow(er) even
+		 * if not seeky. In this respect, the estimated peak
+		 * rate is likely to be an average over the disk
+		 * surface. Accordingly, to not be too harsh with
+		 * unlucky processes, a process is deemed slow only if
+		 * its rate has been lower than half of the estimated
+		 * peak rate.
+		 */
+		slow = bfqq->entity.service < bfqd->bfq_max_budget / 2;
+		bfq_log(bfqd, "relative rate %d/%d",
+			bfqq->entity.service, bfqd->bfq_max_budget);
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "slow %d", slow);
+
+	return slow;
+}
+
+/*
+ * To be deemed as soft real-time, an application must meet two
+ * requirements. First, the application must not require an average
+ * bandwidth higher than the approximate bandwidth required to playback or
+ * record a compressed high-definition video.
+ * The next function is invoked on the completion of the last request of a
+ * batch, to compute the next-start time instant, soft_rt_next_start, such
+ * that, if the next request of the application does not arrive before
+ * soft_rt_next_start, then the above requirement on the bandwidth is met.
+ *
+ * The second requirement is that the request pattern of the application is
+ * isochronous, i.e., that, after issuing a request or a batch of requests,
+ * the application stops issuing new requests until all its pending requests
+ * have been completed. After that, the application may issue a new batch,
+ * and so on.
+ * For this reason the next function is invoked to compute
+ * soft_rt_next_start only for applications that meet this requirement,
+ * whereas soft_rt_next_start is set to infinity for applications that do
+ * not.
+ *
+ * Unfortunately, even a greedy (i.e., I/O-bound) application may
+ * happen to meet, occasionally or systematically, both the above
+ * bandwidth and isochrony requirements. This may happen at least in
+ * the following circumstances. First, if the CPU load is high. The
+ * application may stop issuing requests while the CPUs are busy
+ * serving other processes, then restart, then stop again for a while,
+ * and so on. The other circumstances are related to the storage
+ * device: the storage device is highly loaded or reaches a low-enough
+ * throughput with the I/O of the application (e.g., because the I/O
+ * is random and/or the device is slow). In all these cases, the
+ * I/O of the application may be simply slowed down enough to meet
+ * the bandwidth and isochrony requirements. To reduce the probability
+ * that greedy applications are deemed as soft real-time in these
+ * corner cases, a further rule is used in the computation of
+ * soft_rt_next_start: the return value of this function is forced to
+ * be higher than the maximum between the following two quantities.
+ *
+ * (a) Current time plus: (1) the maximum time for which the arrival
+ *     of a request is waited for when a sync queue becomes idle,
+ *     namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We
+ *     postpone for a moment the reason for adding a few extra
+ *     jiffies; we get back to it after next item (b).  Lower-bounding
+ *     the return value of this function with the current time plus
+ *     bfqd->bfq_slice_idle tends to filter out greedy applications,
+ *     because the latter issue their next request as soon as possible
+ *     after the last one has been completed. In contrast, a soft
+ *     real-time application spends some time processing data, after a
+ *     batch of its requests has been completed.
+ *
+ * (b) Current value of bfqq->soft_rt_next_start. As pointed out
+ *     above, greedy applications may happen to meet both the
+ *     bandwidth and isochrony requirements under heavy CPU or
+ *     storage-device load. In more detail, in these scenarios, these
+ *     applications happen, only for limited time periods, to do I/O
+ *     slowly enough to meet all the requirements described so far,
+ *     including the filtering in above item (a). These slow-speed
+ *     time intervals are usually interspersed between other time
+ *     intervals during which these applications do I/O at a very high
+ *     speed. Fortunately, exactly because of the high speed of the
+ *     I/O in the high-speed intervals, the values returned by this
+ *     function happen to be so high, near the end of any such
+ *     high-speed interval, to be likely to fall *after* the end of
+ *     the low-speed time interval that follows. These high values are
+ *     stored in bfqq->soft_rt_next_start after each invocation of
+ *     this function. As a consequence, if the last value of
+ *     bfqq->soft_rt_next_start is constantly used to lower-bound the
+ *     next value that this function may return, then, from the very
+ *     beginning of a low-speed interval, bfqq->soft_rt_next_start is
+ *     likely to be constantly kept so high that any I/O request
+ *     issued during the low-speed interval is considered as arriving
+ *     to soon for the application to be deemed as soft
+ *     real-time. Then, in the high-speed interval that follows, the
+ *     application will not be deemed as soft real-time, just because
+ *     it will do I/O at a high speed. And so on.
+ *
+ * Getting back to the filtering in item (a), in the following two
+ * cases this filtering might be easily passed by a greedy
+ * application, if the reference quantity was just
+ * bfqd->bfq_slice_idle:
+ * 1) HZ is so low that the duration of a jiffy is comparable to or
+ *    higher than bfqd->bfq_slice_idle. This happens, e.g., on slow
+ *    devices with HZ=100. The time granularity may be so coarse
+ *    that the approximation, in jiffies, of bfqd->bfq_slice_idle
+ *    is rather lower than the exact value.
+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
+ *    for a while, then suddenly 'jump' by several units to recover the lost
+ *    increments. This seems to happen, e.g., inside virtual machines.
+ * To address this issue, in the filtering in (a) we do not use as a
+ * reference time interval just bfqd->bfq_slice_idle, but
+ * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the
+ * minimum number of jiffies for which the filter seems to be quite
+ * precise also in embedded systems and KVM/QEMU virtual machines.
+ */
+static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+						struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqd, bfqq,
+"service_blkg %lu soft_rate %u sects/sec interval %u",
+		     bfqq->service_from_backlogged,
+		     bfqd->bfq_wr_max_softrt_rate,
+		     jiffies_to_msecs(HZ * bfqq->service_from_backlogged /
+				      bfqd->bfq_wr_max_softrt_rate));
+
+	return max3(bfqq->soft_rt_next_start,
+		    bfqq->last_idle_bklogged +
+		    HZ * bfqq->service_from_backlogged /
+		    bfqd->bfq_wr_max_softrt_rate,
+		    jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
+}
+
+/**
+ * bfq_bfqq_expire - expire a queue.
+ * @bfqd: device owning the queue.
+ * @bfqq: the queue to expire.
+ * @compensate: if true, compensate for the time spent idling.
+ * @reason: the reason causing the expiration.
+ *
+ * If the process associated with bfqq does slow I/O (e.g., because it
+ * issues random requests), we charge bfqq with the time it has been
+ * in service instead of the service it has received (see
+ * bfq_bfqq_charge_time for details on how this goal is achieved). As
+ * a consequence, bfqq will typically get higher timestamps upon
+ * reactivation, and hence it will be rescheduled as if it had
+ * received more service than what it has actually received. In the
+ * end, bfqq receives less service in proportion to how slowly its
+ * associated process consumes its budgets (and hence how seriously it
+ * tends to lower the throughput). In addition, this time-charging
+ * strategy guarantees time fairness among slow processes. In
+ * contrast, if the process associated with bfqq is not slow, we
+ * charge bfqq exactly with the service it has received.
+ *
+ * Charging time to the first type of queues and the exact service to
+ * the other has the effect of using the WF2Q+ policy to schedule the
+ * former on a timeslice basis, without violating service domain
+ * guarantees among the latter.
+ */
+static void bfq_bfqq_expire(struct bfq_data *bfqd,
+			    struct bfq_queue *bfqq,
+			    bool compensate,
+			    enum bfqq_expiration reason)
+{
+	bool slow;
+	unsigned long delta = 0;
+	struct bfq_entity *entity = &bfqq->entity;
+	int ref;
+
+	BUG_ON(bfqq != bfqd->in_service_queue);
+
+	/*
+	 * Check whether the process is slow (see bfq_bfqq_is_slow).
+	 */
+	slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta);
+
+	/*
+	 * As above explained, charge slow (typically seeky) and
+	 * timed-out queues with the time and not the service
+	 * received, to favor sequential workloads.
+	 *
+	 * Processes doing I/O in the slower disk zones will tend to
+	 * be slow(er) even if not seeky. Therefore, since the
+	 * estimated peak rate is actually an average over the disk
+	 * surface, these processes may timeout just for bad luck. To
+	 * avoid punishing them, do not charge time to processes that
+	 * succeeded in consuming at least 2/3 of their budget. This
+	 * allows BFQ to preserve enough elasticity to still perform
+	 * bandwidth, and not time, distribution with little unlucky
+	 * or quasi-sequential processes.
+	 */
+	if (bfqq->wr_coeff == 1 &&
+	    (slow ||
+	     (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+	      bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
+		bfq_bfqq_charge_time(bfqd, bfqq, delta);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	if (reason == BFQ_BFQQ_TOO_IDLE &&
+	    entity->service <= 2 * entity->budget / 10)
+		bfq_clear_bfqq_IO_bound(bfqq);
+
+	if (bfqd->low_latency && bfqq->wr_coeff == 1)
+		bfqq->last_wr_start_finish = jiffies;
+
+	if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list)) {
+		/*
+		 * If we get here, and there are no outstanding
+		 * requests, then the request pattern is isochronous
+		 * (see the comments on the function
+		 * bfq_bfqq_softrt_next_start()). Thus we can compute
+		 * soft_rt_next_start. If, instead, the queue still
+		 * has outstanding requests, then we have to wait for
+		 * the completion of all the outstanding requests to
+		 * discover whether the request pattern is actually
+		 * isochronous.
+		 */
+		BUG_ON(bfqd->busy_queues < 1);
+		if (bfqq->dispatched == 0) {
+			bfqq->soft_rt_next_start =
+				bfq_bfqq_softrt_next_start(bfqd, bfqq);
+			bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu",
+				     bfqq->soft_rt_next_start);
+		} else {
+			/*
+			 * The application is still waiting for the
+			 * completion of one or more requests:
+			 * prevent it from possibly being incorrectly
+			 * deemed as soft real-time by setting its
+			 * soft_rt_next_start to infinity. In fact,
+			 * without this assignment, the application
+			 * would be incorrectly deemed as soft
+			 * real-time if:
+			 * 1) it issued a new request before the
+			 *    completion of all its in-flight
+			 *    requests, and
+			 * 2) at that time, its soft_rt_next_start
+			 *    happened to be in the past.
+			 */
+			bfqq->soft_rt_next_start =
+				bfq_greatest_from_now();
+			/*
+			 * Schedule an update of soft_rt_next_start to when
+			 * the task may be discovered to be isochronous.
+			 */
+			bfq_mark_bfqq_softrt_update(bfqq);
+		}
+	}
+
+	bfq_log_bfqq(bfqd, bfqq,
+		"expire (%s, slow %d, num_disp %d, short_ttime %d, weight %d)",
+		     reason_name[reason], slow, bfqq->dispatched,
+		     bfq_bfqq_has_short_ttime(bfqq), entity->weight);
+
+	/*
+	 * Increase, decrease or leave budget unchanged according to
+	 * reason.
+	 */
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+	BUG_ON(bfqq->next_rq == NULL &&
+	       bfqq->entity.budget < bfqq->entity.service);
+	ref = bfqq->ref;
+	__bfq_bfqq_expire(bfqd, bfqq);
+
+	BUG_ON(ref > 1 &&
+	       !bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
+		!bfq_class_idle(bfqq));
+
+	/* mark bfqq as waiting a request only if a bic still points to it */
+	if (ref > 1 && !bfq_bfqq_busy(bfqq) &&
+	    reason != BFQ_BFQQ_BUDGET_TIMEOUT &&
+	    reason != BFQ_BFQQ_BUDGET_EXHAUSTED)
+		bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+}
+
+/*
+ * Budget timeout is not implemented through a dedicated timer, but
+ * just checked on request arrivals and completions, as well as on
+ * idle timer expirations.
+ */
+static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+{
+	return time_is_before_eq_jiffies(bfqq->budget_timeout);
+}
+
+/*
+ * If we expire a queue that is actively waiting (i.e., with the
+ * device idled) for the arrival of a new request, then we may incur
+ * the timestamp misalignment problem described in the body of the
+ * function __bfq_activate_entity. Hence we return true only if this
+ * condition does not hold, or if the queue is slow enough to deserve
+ * only to be kicked off for preserving a high throughput.
+ */
+static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		"wait_request %d left %d timeout %d",
+		bfq_bfqq_wait_request(bfqq),
+			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
+		bfq_bfqq_budget_timeout(bfqq));
+
+	return (!bfq_bfqq_wait_request(bfqq) ||
+		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
+		&&
+		bfq_bfqq_budget_timeout(bfqq);
+}
+
+/*
+ * For a queue that becomes empty, device idling is allowed only if
+ * this function returns true for that queue. As a consequence, since
+ * device idling plays a critical role for both throughput boosting
+ * and service guarantees, the return value of this function plays a
+ * critical role as well.
+ *
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
+ *
+ * In more detail, the return value of this function is obtained by,
+ * first, computing a number of boolean variables that take into
+ * account throughput and service-guarantee issues, and, then,
+ * combining these variables in a logical expression. Most of the
+ * issues taken into account are not trivial. We discuss these issues
+ * while introducing the variables.
+ */
+static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+{
+	struct bfq_data *bfqd = bfqq->bfqd;
+	bool rot_without_queueing =
+		!blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
+		bfqq_sequential_and_IO_bound,
+		idling_boosts_thr, idling_boosts_thr_without_issues,
+		idling_needed_for_service_guarantees,
+		asymmetric_scenario;
+
+	if (bfqd->strict_guarantees)
+		return true;
+
+	/*
+	 * Idling is performed only if slice_idle > 0. In addition, we
+	 * do not idle if
+	 * (a) bfqq is async
+	 * (b) bfqq is in the idle io prio class: in this case we do
+	 * not idle because we want to minimize the bandwidth that
+	 * queues in this class can steal to higher-priority queues
+	 */
+	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
+	   bfq_class_idle(bfqq))
+		return false;
+
+	bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
+		bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
+	/*
+	 * The next variable takes into account the cases where idling
+	 * boosts the throughput.
+	 *
+	 * The value of the variable is computed considering, first, that
+	 * idling is virtually always beneficial for the throughput if:
+	 * (a) the device is not NCQ-capable and rotational, or
+	 * (b) regardless of the presence of NCQ, the device is rotational and
+	 *     the request pattern for bfqq is I/O-bound and sequential, or
+	 * (c) regardless of whether it is rotational, the device is
+	 *     not NCQ-capable and the request pattern for bfqq is
+	 *     I/O-bound and sequential.
+	 *
+	 * Secondly, and in contrast to the above item (b), idling an
+	 * NCQ-capable flash-based device would not boost the
+	 * throughput even with sequential I/O; rather it would lower
+	 * the throughput in proportion to how fast the device
+	 * is. Accordingly, the next variable is true if any of the
+	 * above conditions (a), (b) or (c) is true, and, in
+	 * particular, happens to be false if bfqd is an NCQ-capable
+	 * flash-based device.
+	 */
+	idling_boosts_thr = rot_without_queueing ||
+		((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) &&
+		 bfqq_sequential_and_IO_bound);
+
+	/*
+	 * The value of the next variable,
+	 * idling_boosts_thr_without_issues, is equal to that of
+	 * idling_boosts_thr, unless a special case holds. In this
+	 * special case, described below, idling may cause problems to
+	 * weight-raised queues.
+	 *
+	 * When the request pool is saturated (e.g., in the presence
+	 * of write hogs), if the processes associated with
+	 * non-weight-raised queues ask for requests at a lower rate,
+	 * then processes associated with weight-raised queues have a
+	 * higher probability to get a request from the pool
+	 * immediately (or at least soon) when they need one. Thus
+	 * they have a higher probability to actually get a fraction
+	 * of the device throughput proportional to their high
+	 * weight. This is especially true with NCQ-capable drives,
+	 * which enqueue several requests in advance, and further
+	 * reorder internally-queued requests.
+	 *
+	 * For this reason, we force to false the value of
+	 * idling_boosts_thr_without_issues if there are weight-raised
+	 * busy queues. In this case, and if bfqq is not weight-raised,
+	 * this guarantees that the device is not idled for bfqq (if,
+	 * instead, bfqq is weight-raised, then idling will be
+	 * guaranteed by another variable, see below). Combined with
+	 * the timestamping rules of BFQ (see [1] for details), this
+	 * behavior causes bfqq, and hence any sync non-weight-raised
+	 * queue, to get a lower number of requests served, and thus
+	 * to ask for a lower number of requests from the request
+	 * pool, before the busy weight-raised queues get served
+	 * again. This often mitigates starvation problems in the
+	 * presence of heavy write workloads and NCQ, thereby
+	 * guaranteeing a higher application and system responsiveness
+	 * in these hostile scenarios.
+	 */
+	idling_boosts_thr_without_issues = idling_boosts_thr &&
+		bfqd->wr_busy_queues == 0;
+
+	/*
+	 * There is then a case where idling must be performed not
+	 * for throughput concerns, but to preserve service
+	 * guarantees.
+	 *
+	 * To introduce this case, we can note that allowing the drive
+	 * to enqueue more than one request at a time, and hence
+	 * delegating de facto final scheduling decisions to the
+	 * drive's internal scheduler, entails loss of control on the
+	 * actual request service order. In particular, the critical
+	 * situation is when requests from different processes happen
+	 * to be present, at the same time, in the internal queue(s)
+	 * of the drive. In such a situation, the drive, by deciding
+	 * the service order of the internally-queued requests, does
+	 * determine also the actual throughput distribution among
+	 * these processes. But the drive typically has no notion or
+	 * concern about per-process throughput distribution, and
+	 * makes its decisions only on a per-request basis. Therefore,
+	 * the service distribution enforced by the drive's internal
+	 * scheduler is likely to coincide with the desired
+	 * device-throughput distribution only in a completely
+	 * symmetric scenario where:
+	 * (i)  each of these processes must get the same throughput as
+	 *      the others;
+	 * (ii) all these processes have the same I/O pattern
+	 *      (either sequential or random).
+	 * In fact, in such a scenario, the drive will tend to treat
+	 * the requests of each of these processes in about the same
+	 * way as the requests of the others, and thus to provide
+	 * each of these processes with about the same throughput
+	 * (which is exactly the desired throughput distribution). In
+	 * contrast, in any asymmetric scenario, device idling is
+	 * certainly needed to guarantee that bfqq receives its
+	 * assigned fraction of the device throughput (see [1] for
+	 * details).
+	 *
+	 * We address this issue by controlling, actually, only the
+	 * symmetry sub-condition (i), i.e., provided that
+	 * sub-condition (i) holds, idling is not performed,
+	 * regardless of whether sub-condition (ii) holds. In other
+	 * words, only if sub-condition (i) holds, then idling is
+	 * allowed, and the device tends to be prevented from queueing
+	 * many requests, possibly of several processes. The reason
+	 * for not controlling also sub-condition (ii) is that we
+	 * exploit preemption to preserve guarantees in case of
+	 * symmetric scenarios, even if (ii) does not hold, as
+	 * explained in the next two paragraphs.
+	 *
+	 * Even if a queue, say Q, is expired when it remains idle, Q
+	 * can still preempt the new in-service queue if the next
+	 * request of Q arrives soon (see the comments on
+	 * bfq_bfqq_update_budg_for_activation). If all queues and
+	 * groups have the same weight, this form of preemption,
+	 * combined with the hole-recovery heuristic described in the
+	 * comments on function bfq_bfqq_update_budg_for_activation,
+	 * are enough to preserve a correct bandwidth distribution in
+	 * the mid term, even without idling. In fact, even if not
+	 * idling allows the internal queues of the device to contain
+	 * many requests, and thus to reorder requests, we can rather
+	 * safely assume that the internal scheduler still preserves a
+	 * minimum of mid-term fairness. The motivation for using
+	 * preemption instead of idling is that, by not idling,
+	 * service guarantees are preserved without minimally
+	 * sacrificing throughput. In other words, both a high
+	 * throughput and its desired distribution are obtained.
+	 *
+	 * More precisely, this preemption-based, idleless approach
+	 * provides fairness in terms of IOPS, and not sectors per
+	 * second. This can be seen with a simple example. Suppose
+	 * that there are two queues with the same weight, but that
+	 * the first queue receives requests of 8 sectors, while the
+	 * second queue receives requests of 1024 sectors. In
+	 * addition, suppose that each of the two queues contains at
+	 * most one request at a time, which implies that each queue
+	 * always remains idle after it is served. Finally, after
+	 * remaining idle, each queue receives very quickly a new
+	 * request. It follows that the two queues are served
+	 * alternatively, preempting each other if needed. This
+	 * implies that, although both queues have the same weight,
+	 * the queue with large requests receives a service that is
+	 * 1024/8 times as high as the service received by the other
+	 * queue.
+	 *
+	 * On the other hand, device idling is performed, and thus
+	 * pure sector-domain guarantees are provided, for the
+	 * following queues, which are likely to need stronger
+	 * throughput guarantees: weight-raised queues, and queues
+	 * with a higher weight than other queues. When such queues
+	 * are active, sub-condition (i) is false, which triggers
+	 * device idling.
+	 *
+	 * According to the above considerations, the next variable is
+	 * true (only) if sub-condition (i) holds. To compute the
+	 * value of this variable, we not only use the return value of
+	 * the function bfq_symmetric_scenario(), but also check
+	 * whether bfqq is being weight-raised, because
+	 * bfq_symmetric_scenario() does not take into account also
+	 * weight-raised queues (see comments on
+	 * bfq_weights_tree_add()).
+	 *
+	 * As a side note, it is worth considering that the above
+	 * device-idling countermeasures may however fail in the
+	 * following unlucky scenario: if idling is (correctly)
+	 * disabled in a time period during which all symmetry
+	 * sub-conditions hold, and hence the device is allowed to
+	 * enqueue many requests, but at some later point in time some
+	 * sub-condition stops to hold, then it may become impossible
+	 * to let requests be served in the desired order until all
+	 * the requests already queued in the device have been served.
+	 */
+	asymmetric_scenario = bfqq->wr_coeff > 1 ||
+		!bfq_symmetric_scenario(bfqd);
+
+	/*
+	 * Finally, there is a case where maximizing throughput is the
+	 * best choice even if it may cause unfairness toward
+	 * bfqq. Such a case is when bfqq became active in a burst of
+	 * queue activations. Queues that became active during a large
+	 * burst benefit only from throughput, as discussed in the
+	 * comments on bfq_handle_burst. Thus, if bfqq became active
+	 * in a burst and not idling the device maximizes throughput,
+	 * then the device must no be idled, because not idling the
+	 * device provides bfqq and all other queues in the burst with
+	 * maximum benefit. Combining this and the above case, we can
+	 * now establish when idling is actually needed to preserve
+	 * service guarantees.
+	 */
+	idling_needed_for_service_guarantees =
+		asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+
+	/*
+	 * We have now all the components we need to compute the
+	 * return value of the function, which is true only if idling
+	 * either boosts the throughput (without issues), or is
+	 * necessary to preserve service guarantees.
+	 */
+	bfq_log_bfqq(bfqd, bfqq, "sync %d idling_boosts_thr %d",
+		     bfq_bfqq_sync(bfqq), idling_boosts_thr);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "wr_busy %d boosts %d IO-bound %d guar %d",
+		     bfqd->wr_busy_queues,
+		     idling_boosts_thr_without_issues,
+		     bfq_bfqq_IO_bound(bfqq),
+		     idling_needed_for_service_guarantees);
+
+	return idling_boosts_thr_without_issues ||
+		idling_needed_for_service_guarantees;
+}
+
+/*
+ * If the in-service queue is empty but the function bfq_bfqq_may_idle
+ * returns true, then:
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the device must be idled to wait for the possible arrival of a new
+ *    request for the queue.
+ * See the comments on the function bfq_bfqq_may_idle for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_may_idle itself
+ * returns true.
+ */
+static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+{
+	return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq);
+}
+
+/*
+ * Select a queue for service.  If we have a current queue in service,
+ * check whether to continue servicing it, or retrieve and set a new one.
+ */
+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq;
+	struct request *next_rq;
+	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+
+	bfqq = bfqd->in_service_queue;
+	if (!bfqq)
+		goto new_queue;
+
+	bfq_log_bfqq(bfqd, bfqq, "already in-service queue");
+
+	if (bfq_may_expire_for_budg_timeout(bfqq) &&
+	    !hrtimer_active(&bfqd->idle_slice_timer) &&
+	    !bfq_bfqq_must_idle(bfqq))
+		goto expire;
+
+check_queue:
+	/*
+	 * This loop is rarely executed more than once. Even when it
+	 * happens, it is much more convenient to re-execute this loop
+	 * than to return NULL and trigger a new dispatch to get a
+	 * request served.
+	 */
+	next_rq = bfqq->next_rq;
+	/*
+	 * If bfqq has requests queued and it has enough budget left to
+	 * serve them, keep the queue, otherwise expire it.
+	 */
+	if (next_rq) {
+		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+
+		if (bfq_serv_to_charge(next_rq, bfqq) >
+			bfq_bfqq_budget_left(bfqq)) {
+			/*
+			 * Expire the queue for budget exhaustion,
+			 * which makes sure that the next budget is
+			 * enough to serve the next request, even if
+			 * it comes from the fifo expired path.
+			 */
+			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
+			goto expire;
+		} else {
+			/*
+			 * The idle timer may be pending because we may
+			 * not disable disk idling even when a new request
+			 * arrives.
+			 */
+			if (bfq_bfqq_wait_request(bfqq)) {
+				BUG_ON(!hrtimer_active(&bfqd->idle_slice_timer));
+				/*
+				 * If we get here: 1) at least a new request
+				 * has arrived but we have not disabled the
+				 * timer because the request was too small,
+				 * 2) then the block layer has unplugged
+				 * the device, causing the dispatch to be
+				 * invoked.
+				 *
+				 * Since the device is unplugged, now the
+				 * requests are probably large enough to
+				 * provide a reasonable throughput.
+				 * So we disable idling.
+				 */
+				bfq_clear_bfqq_wait_request(bfqq);
+				hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+				bfqg_stats_update_idle_time(bfqq_group(bfqq));
+			}
+			goto keep_queue;
+		}
+	}
+
+	/*
+	 * No requests pending. However, if the in-service queue is idling
+	 * for a new request, or has requests waiting for a completion and
+	 * may idle after their completion, then keep it anyway.
+	 */
+	if (hrtimer_active(&bfqd->idle_slice_timer) ||
+	    (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
+		bfqq = NULL;
+		goto keep_queue;
+	}
+
+	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, false, reason);
+new_queue:
+	bfqq = bfq_set_in_service_queue(bfqd);
+	if (bfqq) {
+		bfq_log_bfqq(bfqd, bfqq, "checking new queue");
+		goto check_queue;
+	}
+keep_queue:
+	if (bfqq)
+		bfq_log_bfqq(bfqd, bfqq, "returned this queue");
+	else
+		bfq_log(bfqd, "no queue returned");
+
+	return bfqq;
+}
+
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	struct bfq_entity *entity = &bfqq->entity;
+
+	if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
+		BUG_ON(bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+		       time_is_after_jiffies(bfqq->last_wr_start_finish));
+
+		bfq_log_bfqq(bfqd, bfqq,
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
+
+		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
+		       entity->orig_weight * bfqq->wr_coeff);
+		if (entity->prio_changed)
+			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
+		/*
+		 * If the queue was activated in a burst, or too much
+		 * time has elapsed from the beginning of this
+		 * weight-raising period, then end weight raising.
+		 */
+		if (bfq_bfqq_in_large_burst(bfqq))
+			bfq_bfqq_end_wr(bfqq);
+		else if (time_is_before_jiffies(bfqq->last_wr_start_finish +
+					   bfqq->wr_cur_max_time)) {
+			if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
+			time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
+					bfq_wr_duration(bfqd)))
+				bfq_bfqq_end_wr(bfqq);
+			else {
+				switch_back_to_interactive_wr(bfqq, bfqd);
+				BUG_ON(time_is_after_jiffies(
+					       bfqq->last_wr_start_finish));
+				bfqq->entity.prio_changed = 1;
+				bfq_log_bfqq(bfqd, bfqq,
+					"back to interactive wr");
+			}
+		}
+		if (bfqq->wr_coeff > 1 &&
+		       bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time &&
+		       bfqq->service_from_wr > max_service_from_wr) {
+			       /* see comments on max_service_from_wr */
+			       bfq_bfqq_end_wr(bfqq);
+			       bfq_log_bfqq(bfqd, bfqq,
+					       "too much service");
+	       }
+	}
+	/*
+	 * To improve latency (for this or other queues), immediately
+	 * update weight both if it must be raised and if it must be
+	 * lowered. Since, entity may be on some active tree here, and
+	 * might have a pending change of its ioprio class, invoke
+	 * next function with the last parameter unset (see the
+	 * comments on the function).
+	 */
+	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
+		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+						entity, false);
+}
+
+/*
+ * Dispatch one request from bfqq, moving it to the request queue
+ * dispatch list.
+ */
+static int bfq_dispatch_request(struct bfq_data *bfqd,
+				struct bfq_queue *bfqq)
+{
+	int dispatched = 0;
+	struct request *rq = bfqq->next_rq;
+	unsigned long service_to_charge;
+
+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
+	BUG_ON(!rq);
+	service_to_charge = bfq_serv_to_charge(rq, bfqq);
+
+	BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq));
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	bfq_bfqq_served(bfqq, service_to_charge);
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	bfq_dispatch_insert(bfqd->queue, rq);
+
+	/*
+	 * If weight raising has to terminate for bfqq, then next
+	 * function causes an immediate update of bfqq's weight,
+	 * without waiting for next activation. As a consequence, on
+	 * expiration, bfqq will be timestamped as if has never been
+	 * weight-raised during this service slot, even if it has
+	 * received part or even most of the service as a
+	 * weight-raised queue. This inflates bfqq's timestamps, which
+	 * is beneficial, as bfqq is then more willing to leave the
+	 * device immediately to possible other weight-raised queues.
+	 */
+	bfq_update_wr_data(bfqd, bfqq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+			"dispatched %u sec req (%llu), budg left %d",
+			blk_rq_sectors(rq),
+			(unsigned long long) blk_rq_pos(rq),
+			bfq_bfqq_budget_left(bfqq));
+
+	dispatched++;
+
+	if (!bfqd->in_service_bic) {
+		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
+		bfqd->in_service_bic = RQ_BIC(rq);
+		BUG_ON(!bfqd->in_service_bic);
+	}
+
+	if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
+		goto expire;
+
+	return dispatched;
+
+expire:
+	bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED);
+	return dispatched;
+}
+
+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
+{
+	int dispatched = 0;
+
+	while (bfqq->next_rq) {
+		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
+		dispatched++;
+	}
+
+	BUG_ON(!list_empty(&bfqq->fifo));
+	return dispatched;
+}
+
+/*
+ * Drain our current requests.
+ * Used for barriers and when switching io schedulers on-the-fly.
+ */
+static int bfq_forced_dispatch(struct bfq_data *bfqd)
+{
+	struct bfq_queue *bfqq, *n;
+	struct bfq_service_tree *st;
+	int dispatched = 0;
+
+	bfqq = bfqd->in_service_queue;
+	if (bfqq)
+		__bfq_bfqq_expire(bfqd, bfqq);
+
+	/*
+	 * Loop through classes, and be careful to leave the scheduler
+	 * in a consistent state, as feedback mechanisms and vtime
+	 * updates cannot be disabled during the process.
+	 */
+	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
+		st = bfq_entity_service_tree(&bfqq->entity);
+
+		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
+
+		bfqq->max_budget = bfq_max_budget(bfqd);
+		bfq_forget_idle(st);
+	}
+
+	BUG_ON(bfqd->busy_queues != 0);
+
+	return dispatched;
+}
+
+static int bfq_dispatch_requests(struct request_queue *q, int force)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq;
+
+	bfq_log(bfqd, "%d busy queues", bfqd->busy_queues);
+
+	if (bfqd->busy_queues == 0)
+		return 0;
+
+	if (unlikely(force))
+		return bfq_forced_dispatch(bfqd);
+
+	/*
+	 * Force device to serve one request at a time if
+	 * strict_guarantees is true. Forcing this service scheme is
+	 * currently the ONLY way to guarantee that the request
+	 * service order enforced by the scheduler is respected by a
+	 * queueing device. Otherwise the device is free even to make
+	 * some unlucky request wait for as long as the device
+	 * wishes.
+	 *
+	 * Of course, serving one request at at time may cause loss of
+	 * throughput.
+	 */
+	if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
+		return 0;
+
+	bfqq = bfq_select_queue(bfqd);
+	if (!bfqq)
+		return 0;
+
+	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+	BUG_ON(bfq_bfqq_wait_request(bfqq));
+
+	if (!bfq_dispatch_request(bfqd, bfqq))
+		return 0;
+
+	bfq_log_bfqq(bfqd, bfqq, "%s request",
+			bfq_bfqq_sync(bfqq) ? "sync" : "async");
+
+	BUG_ON(bfqq->next_rq == NULL &&
+	       bfqq->entity.budget < bfqq->entity.service);
+	return 1;
+}
+
+/*
+ * Task holds one reference to the queue, dropped when task exits.  Each rq
+ * in-flight on this queue also holds a reference, dropped when rq is freed.
+ *
+ * Queue lock must be held here. Recall not to use bfqq after calling
+ * this function on it.
+ */
+static void bfq_put_queue(struct bfq_queue *bfqq)
+{
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	struct bfq_group *bfqg = bfqq_group(bfqq);
+#endif
+
+	BUG_ON(bfqq->ref <= 0);
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref);
+	bfqq->ref--;
+	if (bfqq->ref)
+		return;
+
+	BUG_ON(rb_first(&bfqq->sort_list));
+	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
+	BUG_ON(bfqq->entity.tree);
+	BUG_ON(bfq_bfqq_busy(bfqq));
+
+	if (!hlist_unhashed(&bfqq->burst_list_node)) {
+		hlist_del_init(&bfqq->burst_list_node);
+		/*
+		 * Decrement also burst size after the removal, if the
+		 * process associated with bfqq is exiting, and thus
+		 * does not contribute to the burst any longer. This
+		 * decrement helps filter out false positives of large
+		 * bursts, when some short-lived process (often due to
+		 * the execution of commands by some service) happens
+		 * to start and exit while a complex application is
+		 * starting, and thus spawning several processes that
+		 * do I/O (and that *must not* be treated as a large
+		 * burst, see comments on bfq_handle_burst).
+		 *
+		 * In particular, the decrement is performed only if:
+		 * 1) bfqq is not a merged queue, because, if it is,
+		 * then this free of bfqq is not triggered by the exit
+		 * of the process bfqq is associated with, but exactly
+		 * by the fact that bfqq has just been merged.
+		 * 2) burst_size is greater than 0, to handle
+		 * unbalanced decrements. Unbalanced decrements may
+		 * happen in te following case: bfqq is inserted into
+		 * the current burst list--without incrementing
+		 * bust_size--because of a split, but the current
+		 * burst list is not the burst list bfqq belonged to
+		 * (see comments on the case of a split in
+		 * bfq_set_request).
+		 */
+		if (bfqq->bic && bfqq->bfqd->burst_size > 0)
+			bfqq->bfqd->burst_size--;
+	}
+
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq);
+
+	kmem_cache_free(bfq_pool, bfqq);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	bfqg_put(bfqg);
+#endif
+}
+
+static void bfq_put_cooperator(struct bfq_queue *bfqq)
+{
+	struct bfq_queue *__bfqq, *next;
+
+	/*
+	 * If this queue was scheduled to merge with another queue, be
+	 * sure to drop the reference taken on that queue (and others in
+	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
+	 */
+	__bfqq = bfqq->new_bfqq;
+	while (__bfqq) {
+		if (__bfqq == bfqq)
+			break;
+		next = __bfqq->new_bfqq;
+		bfq_put_queue(__bfqq);
+		__bfqq = next;
+	}
+}
+
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	if (bfqq == bfqd->in_service_queue) {
+		__bfq_bfqq_expire(bfqd, bfqq);
+		bfq_schedule_dispatch(bfqd);
+	}
+
+	bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref);
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq); /* release process reference */
+}
+
+static void bfq_init_icq(struct io_cq *icq)
+{
+	icq_to_bic(icq)->ttime.last_end_request = ktime_get_ns() - (1ULL<<32);
+}
+
+static void bfq_exit_icq(struct io_cq *icq)
+{
+	struct bfq_io_cq *bic = icq_to_bic(icq);
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+
+	if (bic_to_bfqq(bic, false)) {
+		bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, false));
+		bic_set_bfqq(bic, NULL, false);
+	}
+
+	if (bic_to_bfqq(bic, true)) {
+		/*
+		 * If the bic is using a shared queue, put the reference
+		 * taken on the io_context when the bic started using a
+		 * shared bfq_queue.
+		 */
+		if (bfq_bfqq_coop(bic_to_bfqq(bic, true)))
+			put_io_context(icq->ioc);
+		bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, true));
+		bic_set_bfqq(bic, NULL, true);
+	}
+}
+
+/*
+ * Update the entity prio values; note that the new values will not
+ * be used until the next (re)activation.
+ */
+static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq,
+				     struct bfq_io_cq *bic)
+{
+	struct task_struct *tsk = current;
+	int ioprio_class;
+
+	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	switch (ioprio_class) {
+	default:
+		dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
+			"bfq: bad prio class %d\n", ioprio_class);
+	case IOPRIO_CLASS_NONE:
+		/*
+		 * No prio set, inherit CPU scheduling settings.
+		 */
+		bfqq->new_ioprio = task_nice_ioprio(tsk);
+		bfqq->new_ioprio_class = task_nice_ioclass(tsk);
+		break;
+	case IOPRIO_CLASS_RT:
+		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
+		break;
+	case IOPRIO_CLASS_BE:
+		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+		bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
+		break;
+	case IOPRIO_CLASS_IDLE:
+		bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
+		bfqq->new_ioprio = 7;
+		break;
+	}
+
+	if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+		pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
+			bfqq->new_ioprio);
+		BUG();
+	}
+
+	bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+	bfqq->entity.prio_changed = 1;
+	bfq_log_bfqq(bfqq->bfqd, bfqq,
+		     "bic_class %d prio %d class %d",
+		     ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class);
+}
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+{
+	struct bfq_data *bfqd = bic_to_bfqd(bic);
+	struct bfq_queue *bfqq;
+	unsigned long uninitialized_var(flags);
+	int ioprio = bic->icq.ioc->ioprio;
+
+	/*
+	 * This condition may trigger on a newly created bic, be sure to
+	 * drop the lock before returning.
+	 */
+	if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
+		return;
+
+	bic->ioprio = ioprio;
+
+	bfqq = bic_to_bfqq(bic, false);
+	if (bfqq) {
+		/* release process reference on this queue */
+		bfq_put_queue(bfqq);
+		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
+		bic_set_bfqq(bic, bfqq, false);
+		bfq_log_bfqq(bfqd, bfqq,
+			     "bfqq %p %d",
+			     bfqq, bfqq->ref);
+	}
+
+	bfqq = bic_to_bfqq(bic, true);
+	if (bfqq)
+		bfq_set_next_ioprio_data(bfqq, bic);
+}
+
+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			  struct bfq_io_cq *bic, pid_t pid, int is_sync)
+{
+	RB_CLEAR_NODE(&bfqq->entity.rb_node);
+	INIT_LIST_HEAD(&bfqq->fifo);
+	INIT_HLIST_NODE(&bfqq->burst_list_node);
+	BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+
+	bfqq->ref = 0;
+	bfqq->bfqd = bfqd;
+
+	if (bic)
+		bfq_set_next_ioprio_data(bfqq, bic);
+
+	if (is_sync) {
+		/*
+		 * No need to mark as has_short_ttime if in
+		 * idle_class, because no device idling is performed
+		 * for queues in idle class
+		 */
+		if (!bfq_class_idle(bfqq))
+			/* tentatively mark as has_short_ttime */
+			bfq_mark_bfqq_has_short_ttime(bfqq);
+		bfq_mark_bfqq_sync(bfqq);
+		bfq_mark_bfqq_just_created(bfqq);
+	} else
+		bfq_clear_bfqq_sync(bfqq);
+	bfq_mark_bfqq_IO_bound(bfqq);
+
+	/* Tentative initial value to trade off between thr and lat */
+	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
+	bfqq->pid = pid;
+
+	bfqq->wr_coeff = 1;
+	bfqq->last_wr_start_finish = jiffies;
+	bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now();
+	bfqq->budget_timeout = bfq_smallest_from_now();
+	bfqq->split_time = bfq_smallest_from_now();
+
+	/*
+	 * To not forget the possibly high bandwidth consumed by a
+	 * process/queue in the recent past,
+	 * bfq_bfqq_softrt_next_start() returns a value at least equal
+	 * to the current value of bfqq->soft_rt_next_start (see
+	 * comments on bfq_bfqq_softrt_next_start).  Set
+	 * soft_rt_next_start to now, to mean that bfqq has consumed
+	 * no bandwidth so far.
+	 */
+	bfqq->soft_rt_next_start = jiffies;
+
+	/* first request is almost certainly seeky */
+	bfqq->seek_history = 1;
+}
+
+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+					       struct bfq_group *bfqg,
+					       int ioprio_class, int ioprio)
+{
+	switch (ioprio_class) {
+	case IOPRIO_CLASS_RT:
+		return &bfqg->async_bfqq[0][ioprio];
+	case IOPRIO_CLASS_NONE:
+		ioprio = IOPRIO_NORM;
+		/* fall through */
+	case IOPRIO_CLASS_BE:
+		return &bfqg->async_bfqq[1][ioprio];
+	case IOPRIO_CLASS_IDLE:
+		return &bfqg->async_idle_bfqq;
+	default:
+		BUG();
+	}
+}
+
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bio *bio, bool is_sync,
+				       struct bfq_io_cq *bic)
+{
+	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+	struct bfq_queue **async_bfqq = NULL;
+	struct bfq_queue *bfqq;
+	struct bfq_group *bfqg;
+
+	rcu_read_lock();
+
+	bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+	if (!bfqg) {
+		bfqq = &bfqd->oom_bfqq;
+		goto out;
+	}
+
+	if (!is_sync) {
+		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+						  ioprio);
+		bfqq = *async_bfqq;
+		if (bfqq)
+			goto out;
+	}
+
+	bfqq = kmem_cache_alloc_node(bfq_pool,
+				     GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
+				     bfqd->queue->node);
+
+	if (bfqq) {
+		bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+			      is_sync);
+		bfq_init_entity(&bfqq->entity, bfqg);
+		bfq_log_bfqq(bfqd, bfqq, "allocated");
+	} else {
+		bfqq = &bfqd->oom_bfqq;
+		bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+		goto out;
+	}
+
+	/*
+	 * Pin the queue now that it's allocated, scheduler exit will
+	 * prune it.
+	 */
+	if (async_bfqq) {
+		bfqq->ref++; /*
+			      * Extra group reference, w.r.t. sync
+			      * queue. This extra reference is removed
+			      * only if bfqq->bfqg disappears, to
+			      * guarantee that this queue is not freed
+			      * until its group goes away.
+			      */
+		bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d",
+			     bfqq, bfqq->ref);
+		*async_bfqq = bfqq;
+	}
+
+out:
+	bfqq->ref++; /* get a process reference to this queue */
+	bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref);
+	rcu_read_unlock();
+	return bfqq;
+}
+
+static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+				    struct bfq_io_cq *bic)
+{
+	struct bfq_ttime *ttime = &bic->ttime;
+	u64 elapsed = ktime_get_ns() - bic->ttime.last_end_request;
+
+	elapsed = min_t(u64, elapsed, 2 * bfqd->bfq_slice_idle);
+
+	ttime->ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
+	ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
+	ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+				     ttime->ttime_samples);
+}
+
+static void
+bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+		       struct request *rq)
+{
+	bfqq->seek_history <<= 1;
+	bfqq->seek_history |=
+		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
+		(!blk_queue_nonrot(bfqd->queue) ||
+		 blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
+}
+
+static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
+				       struct bfq_queue *bfqq,
+				       struct bfq_io_cq *bic)
+{
+	bool has_short_ttime = true;
+
+	/*
+	 * No need to update has_short_ttime if bfqq is async or in
+	 * idle io prio class, or if bfq_slice_idle is zero, because
+	 * no device idling is performed for bfqq in this case.
+	 */
+	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) ||
+	    bfqd->bfq_slice_idle == 0)
+		return;
+
+	/* Idle window just restored, statistics are meaningless. */
+	if (time_is_after_eq_jiffies(bfqq->split_time +
+				     bfqd->bfq_wr_min_idle_time))
+		return;
+
+	/* Think time is infinite if no process is linked to
+	 * bfqq. Otherwise check average think time to
+	 * decide whether to mark as has_short_ttime
+	 */
+	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+	    (bfq_sample_valid(bic->ttime.ttime_samples) &&
+	     bic->ttime.ttime_mean > bfqd->bfq_slice_idle))
+		has_short_ttime = false;
+
+	bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d",
+		has_short_ttime);
+
+	if (has_short_ttime)
+		bfq_mark_bfqq_has_short_ttime(bfqq);
+	else
+		bfq_clear_bfqq_has_short_ttime(bfqq);
+}
+
+/*
+ * Called when a new fs request (rq) is added to bfqq.  Check if there's
+ * something we should do about it.
+ */
+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+			    struct request *rq)
+{
+	struct bfq_io_cq *bic = RQ_BIC(rq);
+
+	if (rq->cmd_flags & REQ_META)
+		bfqq->meta_pending++;
+
+	bfq_update_io_thinktime(bfqd, bic);
+	bfq_update_has_short_ttime(bfqd, bfqq, bic);
+	bfq_update_io_seektime(bfqd, bfqq, rq);
+
+	bfq_log_bfqq(bfqd, bfqq,
+		     "has_short_ttime=%d (seeky %d)",
+		     bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq));
+
+	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+	if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
+		bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
+				 blk_rq_sectors(rq) < 32;
+		bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
+
+		/*
+		 * There is just this request queued: if the request
+		 * is small and the queue is not to be expired, then
+		 * just exit.
+		 *
+		 * In this way, if the device is being idled to wait
+		 * for a new request from the in-service queue, we
+		 * avoid unplugging the device and committing the
+		 * device to serve just a small request. On the
+		 * contrary, we wait for the block layer to decide
+		 * when to unplug the device: hopefully, new requests
+		 * will be merged to this one quickly, then the device
+		 * will be unplugged and larger requests will be
+		 * dispatched.
+		 */
+		if (small_req && !budget_timeout)
+			return;
+
+		/*
+		 * A large enough request arrived, or the queue is to
+		 * be expired: in both cases disk idling is to be
+		 * stopped, so clear wait_request flag and reset
+		 * timer.
+		 */
+		bfq_clear_bfqq_wait_request(bfqq);
+		hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
+		bfqg_stats_update_idle_time(bfqq_group(bfqq));
+
+		/*
+		 * The queue is not empty, because a new request just
+		 * arrived. Hence we can safely expire the queue, in
+		 * case of budget timeout, without risking that the
+		 * timestamps of the queue are not updated correctly.
+		 * See [1] for more details.
+		 */
+		if (budget_timeout)
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_BUDGET_TIMEOUT);
+
+		/*
+		 * Let the request rip immediately, or let a new queue be
+		 * selected if bfqq has just been expired.
+		 */
+		__blk_run_queue(bfqd->queue);
+	}
+}
+
+static void bfq_insert_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+
+	assert_spin_locked(bfqd->queue->queue_lock);
+
+	/*
+	 * An unplug may trigger a requeue of a request from the device
+	 * driver: make sure we are in process context while trying to
+	 * merge two bfq_queues.
+	 */
+	if (!in_interrupt()) {
+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+		if (new_bfqq) {
+			if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
+				new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
+			/*
+			 * Release the request's reference to the old bfqq
+			 * and make sure one is taken to the shared queue.
+			 */
+			new_bfqq->allocated[rq_data_dir(rq)]++;
+			bfqq->allocated[rq_data_dir(rq)]--;
+			new_bfqq->ref++;
+			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+						bfqq, new_bfqq);
+
+			bfq_clear_bfqq_just_created(bfqq);
+			/*
+			 * rq is about to be enqueued into new_bfqq,
+			 * release rq reference on bfqq
+			 */
+			bfq_put_queue(bfqq);
+			rq->elv.priv[1] = new_bfqq;
+			bfqq = new_bfqq;
+		}
+	}
+
+	bfq_add_request(rq);
+
+	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+	list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+	bfq_rq_enqueued(bfqd, bfqq, rq);
+}
+
+static void bfq_update_hw_tag(struct bfq_data *bfqd)
+{
+	bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
+				       bfqd->rq_in_driver);
+
+	if (bfqd->hw_tag == 1)
+		return;
+
+	/*
+	 * This sample is valid if the number of outstanding requests
+	 * is large enough to allow a queueing behavior.  Note that the
+	 * sum is not exact, as it's not taking into account deactivated
+	 * requests.
+	 */
+	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
+		return;
+
+	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
+		return;
+
+	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
+	bfqd->max_rq_in_driver = 0;
+	bfqd->hw_tag_samples = 0;
+}
+
+static void bfq_completed_request(struct request_queue *q, struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_data *bfqd = bfqq->bfqd;
+	u64 now_ns;
+	u32 delta_us;
+
+	bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left",
+		     blk_rq_sectors(rq));
+
+	assert_spin_locked(bfqd->queue->queue_lock);
+	bfq_update_hw_tag(bfqd);
+
+	BUG_ON(!bfqd->rq_in_driver);
+	BUG_ON(!bfqq->dispatched);
+	bfqd->rq_in_driver--;
+	bfqq->dispatched--;
+	bfqg_stats_update_completion(bfqq_group(bfqq),
+				     rq_start_time_ns(rq),
+				     rq_io_start_time_ns(rq),
+				     rq->cmd_flags);
+
+	if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
+		BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+		/*
+		 * Set budget_timeout (which we overload to store the
+		 * time at which the queue remains with no backlog and
+		 * no outstanding request; used by the weight-raising
+		 * mechanism).
+		 */
+		bfqq->budget_timeout = jiffies;
+
+		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+					&bfqd->queue_weights_tree);
+	}
+
+	now_ns = ktime_get_ns();
+
+	RQ_BIC(rq)->ttime.last_end_request = now_ns;
+
+	/*
+	 * Using us instead of ns, to get a reasonable precision in
+	 * computing rate in next check.
+	 */
+	delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC);
+
+	bfq_log(bfqd, "delta %uus/%luus max_size %u rate %llu/%llu",
+		delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size,
+		delta_us > 0 ?
+		(USEC_PER_SEC*
+		(u64)((bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us))
+			>>BFQ_RATE_SHIFT :
+		(USEC_PER_SEC*
+		(u64)(bfqd->last_rq_max_size<<BFQ_RATE_SHIFT))>>BFQ_RATE_SHIFT,
+		(USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT);
+
+	/*
+	 * If the request took rather long to complete, and, according
+	 * to the maximum request size recorded, this completion latency
+	 * implies that the request was certainly served at a very low
+	 * rate (less than 1M sectors/sec), then the whole observation
+	 * interval that lasts up to this time instant cannot be a
+	 * valid time interval for computing a new peak rate.  Invoke
+	 * bfq_update_rate_reset to have the following three steps
+	 * taken:
+	 * - close the observation interval at the last (previous)
+	 *   request dispatch or completion
+	 * - compute rate, if possible, for that observation interval
+	 * - reset to zero samples, which will trigger a proper
+	 *   re-initialization of the observation interval on next
+	 *   dispatch
+	 */
+	if (delta_us > BFQ_MIN_TT/NSEC_PER_USEC &&
+	   (bfqd->last_rq_max_size<<BFQ_RATE_SHIFT)/delta_us <
+			1UL<<(BFQ_RATE_SHIFT - 10))
+		bfq_update_rate_reset(bfqd, NULL);
+	bfqd->last_completion = now_ns;
+
+	/*
+	 * If we are waiting to discover whether the request pattern
+	 * of the task associated with the queue is actually
+	 * isochronous, and both requisites for this condition to hold
+	 * are now satisfied, then compute soft_rt_next_start (see the
+	 * comments on the function bfq_bfqq_softrt_next_start()). We
+	 * schedule this delayed check when bfqq expires, if it still
+	 * has in-flight requests.
+	 */
+	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+	    RB_EMPTY_ROOT(&bfqq->sort_list))
+		bfqq->soft_rt_next_start =
+			bfq_bfqq_softrt_next_start(bfqd, bfqq);
+
+	/*
+	 * If this is the in-service queue, check if it needs to be expired,
+	 * or if we want to idle in case it has no pending requests.
+	 */
+	if (bfqd->in_service_queue == bfqq) {
+		if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
+			bfq_arm_slice_timer(bfqd);
+			goto out;
+		} else if (bfq_may_expire_for_budg_timeout(bfqq))
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_BUDGET_TIMEOUT);
+		else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
+			 (bfqq->dispatched == 0 ||
+			  !bfq_bfqq_may_idle(bfqq)))
+			bfq_bfqq_expire(bfqd, bfqq, false,
+					BFQ_BFQQ_NO_MORE_REQUESTS);
+	}
+
+	if (!bfqd->rq_in_driver)
+		bfq_schedule_dispatch(bfqd);
+
+out:
+	return;
+}
+
+static int __bfq_may_queue(struct bfq_queue *bfqq)
+{
+	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
+		bfq_clear_bfqq_must_alloc(bfqq);
+		return ELV_MQUEUE_MUST;
+	}
+
+	return ELV_MQUEUE_MAY;
+}
+
+static int bfq_may_queue(struct request_queue *q, unsigned int op)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct task_struct *tsk = current;
+	struct bfq_io_cq *bic;
+	struct bfq_queue *bfqq;
+
+	/*
+	 * Don't force setup of a queue from here, as a call to may_queue
+	 * does not necessarily imply that a request actually will be
+	 * queued. So just lookup a possibly existing queue, or return
+	 * 'may queue' if that fails.
+	 */
+	bic = bfq_bic_lookup(bfqd, tsk->io_context);
+	if (!bic)
+		return ELV_MQUEUE_MAY;
+
+	bfqq = bic_to_bfqq(bic, op_is_sync(op));
+	if (bfqq)
+		return __bfq_may_queue(bfqq);
+
+	return ELV_MQUEUE_MAY;
+}
+
+/*
+ * Queue lock held here.
+ */
+static void bfq_put_request(struct request *rq)
+{
+	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+	if (bfqq) {
+		const int rw = rq_data_dir(rq);
+
+		BUG_ON(!bfqq->allocated[rw]);
+		bfqq->allocated[rw]--;
+
+		rq->elv.priv[0] = NULL;
+		rq->elv.priv[1] = NULL;
+
+		bfq_log_bfqq(bfqq->bfqd, bfqq, "%p, %d",
+			     bfqq, bfqq->ref);
+		bfq_put_queue(bfqq);
+	}
+}
+
+/*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+ * was the last process referring to that bfqq.
+ */
+static struct bfq_queue *
+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+{
+	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
+
+	put_io_context(bic->icq.ioc);
+
+	if (bfqq_process_refs(bfqq) == 1) {
+		bfqq->pid = current->pid;
+		bfq_clear_bfqq_coop(bfqq);
+		bfq_clear_bfqq_split_coop(bfqq);
+		return bfqq;
+	}
+
+	bic_set_bfqq(bic, NULL, 1);
+
+	bfq_put_cooperator(bfqq);
+
+	bfq_put_queue(bfqq);
+	return NULL;
+}
+
+/*
+ * Allocate bfq data structures associated with this request.
+ */
+static int bfq_set_request(struct request_queue *q, struct request *rq,
+			   struct bio *bio, gfp_t gfp_mask)
+{
+	struct bfq_data *bfqd = q->elevator->elevator_data;
+	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+	const int rw = rq_data_dir(rq);
+	const int is_sync = rq_is_sync(rq);
+	struct bfq_queue *bfqq;
+	unsigned long flags;
+	bool bfqq_already_existing = false, split = false;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (!bic)
+		goto queue_fail;
+
+	bfq_check_ioprio_change(bic, bio);
+
+	bfq_bic_update_cgroup(bic, bio);
+
+new_queue:
+	bfqq = bic_to_bfqq(bic, is_sync);
+	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+		if (bfqq)
+			bfq_put_queue(bfqq);
+		bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
+		BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+
+		bic_set_bfqq(bic, bfqq, is_sync);
+		if (split && is_sync) {
+			bfq_log_bfqq(bfqd, bfqq,
+				     "was_in_list %d "
+				     "was_in_large_burst %d "
+				     "large burst in progress %d",
+				     bic->was_in_burst_list,
+				     bic->saved_in_large_burst,
+				     bfqd->large_burst);
+
+			if ((bic->was_in_burst_list && bfqd->large_burst) ||
+			    bic->saved_in_large_burst) {
+				bfq_log_bfqq(bfqd, bfqq,
+					     "marking in "
+					     "large burst");
+				bfq_mark_bfqq_in_large_burst(bfqq);
+			} else {
+				bfq_log_bfqq(bfqd, bfqq,
+					     "clearing in "
+					     "large burst");
+				bfq_clear_bfqq_in_large_burst(bfqq);
+				if (bic->was_in_burst_list)
+					/*
+					 * If bfqq was in the current
+					 * burst list before being
+					 * merged, then we have to add
+					 * it back. And we do not need
+					 * to increase burst_size, as
+					 * we did not decrement
+					 * burst_size when we removed
+					 * bfqq from the burst list as
+					 * a consequence of a merge
+					 * (see comments in
+					 * bfq_put_queue). In this
+					 * respect, it would be rather
+					 * costly to know whether the
+					 * current burst list is still
+					 * the same burst list from
+					 * which bfqq was removed on
+					 * the merge. To avoid this
+					 * cost, if bfqq was in a
+					 * burst list, then we add
+					 * bfqq to the current burst
+					 * list without any further
+					 * check. This can cause
+					 * inappropriate insertions,
+					 * but rarely enough to not
+					 * harm the detection of large
+					 * bursts significantly.
+					 */
+					hlist_add_head(&bfqq->burst_list_node,
+						       &bfqd->burst_list);
+			}
+			bfqq->split_time = jiffies;
+		}
+	} else {
+		/* If the queue was seeky for too long, break it apart. */
+		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
+
+			/* Update bic before losing reference to bfqq */
+			if (bfq_bfqq_in_large_burst(bfqq))
+				bic->saved_in_large_burst = true;
+
+			bfqq = bfq_split_bfqq(bic, bfqq);
+			split = true;
+			if (!bfqq)
+				goto new_queue;
+			else
+				bfqq_already_existing = true;
+		}
+	}
+
+	bfqq->allocated[rw]++;
+	bfqq->ref++;
+	bfq_log_bfqq(bfqd, bfqq, "bfqq %p, %d", bfqq, bfqq->ref);
+
+	rq->elv.priv[0] = bic;
+	rq->elv.priv[1] = bfqq;
+
+	/*
+	 * If a bfq_queue has only one process reference, it is owned
+	 * by only one bfq_io_cq: we can set the bic field of the
+	 * bfq_queue to the address of that structure. Also, if the
+	 * queue has just been split, mark a flag so that the
+	 * information is available to the other scheduler hooks.
+	 */
+	if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+		bfqq->bic = bic;
+		if (split) {
+			/*
+			 * If the queue has just been split from a shared
+			 * queue, restore the idle window and the possible
+			 * weight raising period.
+			 */
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
+		}
+	}
+
+	if (unlikely(bfq_bfqq_just_created(bfqq)))
+		bfq_handle_burst(bfqd, bfqq);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 0;
+
+queue_fail:
+	bfq_schedule_dispatch(bfqd);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 1;
+}
+
+static void bfq_kick_queue(struct work_struct *work)
+{
+	struct bfq_data *bfqd =
+		container_of(work, struct bfq_data, unplug_work);
+	struct request_queue *q = bfqd->queue;
+
+	spin_lock_irq(q->queue_lock);
+	__blk_run_queue(q);
+	spin_unlock_irq(q->queue_lock);
+}
+
+/*
+ * Handler of the expiration of the timer running if the in-service queue
+ * is idling inside its time slice.
+ */
+static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
+{
+	struct bfq_data *bfqd = container_of(timer, struct bfq_data,
+					     idle_slice_timer);
+	struct bfq_queue *bfqq;
+	unsigned long flags;
+	enum bfqq_expiration reason;
+
+	spin_lock_irqsave(bfqd->queue->queue_lock, flags);
+
+	bfqq = bfqd->in_service_queue;
+	/*
+	 * Theoretical race here: the in-service queue can be NULL or
+	 * different from the queue that was idling if the timer handler
+	 * spins on the queue_lock and a new request arrives for the
+	 * current queue and there is a full dispatch cycle that changes
+	 * the in-service queue.  This can hardly happen, but in the worst
+	 * case we just expire a queue too early.
+	 */
+	if (bfqq) {
+		bfq_log_bfqq(bfqd, bfqq, "expired");
+		bfq_clear_bfqq_wait_request(bfqq);
+
+		if (bfq_bfqq_budget_timeout(bfqq))
+			/*
+			 * Also here the queue can be safely expired
+			 * for budget timeout without wasting
+			 * guarantees
+			 */
+			reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
+			/*
+			 * The queue may not be empty upon timer expiration,
+			 * because we may not disable the timer when the
+			 * first request of the in-service queue arrives
+			 * during disk idling.
+			 */
+			reason = BFQ_BFQQ_TOO_IDLE;
+		else
+			goto schedule_dispatch;
+
+		bfq_bfqq_expire(bfqd, bfqq, true, reason);
+	}
+
+schedule_dispatch:
+	bfq_schedule_dispatch(bfqd);
+
+	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
+	return HRTIMER_NORESTART;
+}
+
+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+{
+	hrtimer_cancel(&bfqd->idle_slice_timer);
+	cancel_work_sync(&bfqd->unplug_work);
+}
+
+static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+				 struct bfq_queue **bfqq_ptr)
+{
+	struct bfq_group *root_group = bfqd->root_group;
+	struct bfq_queue *bfqq = *bfqq_ptr;
+
+	bfq_log(bfqd, "%p", bfqq);
+	if (bfqq) {
+		bfq_bfqq_move(bfqd, bfqq, root_group);
+		bfq_log_bfqq(bfqd, bfqq, "putting %p, %d",
+			     bfqq, bfqq->ref);
+		bfq_put_queue(bfqq);
+		*bfqq_ptr = NULL;
+	}
+}
+
+/*
+ * Release all the bfqg references to its async queues.  If we are
+ * deallocating the group these queues may still contain requests, so
+ * we reparent them to the root cgroup (i.e., the only one that will
+ * exist for sure until all the requests on a device are gone).
+ */
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+{
+	int i, j;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < IOPRIO_BE_NR; j++)
+			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
+
+	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+}
+
+static void bfq_exit_queue(struct elevator_queue *e)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	struct request_queue *q = bfqd->queue;
+	struct bfq_queue *bfqq, *n;
+
+	bfq_shutdown_timer_wq(bfqd);
+
+	spin_lock_irq(q->queue_lock);
+
+	BUG_ON(bfqd->in_service_queue);
+	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
+		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
+
+	spin_unlock_irq(q->queue_lock);
+
+	bfq_shutdown_timer_wq(bfqd);
+
+	BUG_ON(hrtimer_active(&bfqd->idle_slice_timer));
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	/* release oom-queue reference to root group */
+	bfqg_put(bfqd->root_group);
+
+	blkcg_deactivate_policy(q, &blkcg_policy_bfq);
+#else
+	bfq_put_async_queues(bfqd, bfqd->root_group);
+	kfree(bfqd->root_group);
+#endif
+
+	kfree(bfqd);
+}
+
+static void bfq_init_root_group(struct bfq_group *root_group,
+				struct bfq_data *bfqd)
+{
+	int i;
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	root_group->entity.parent = NULL;
+	root_group->my_entity = NULL;
+	root_group->bfqd = bfqd;
+#endif
+	root_group->rq_pos_tree = RB_ROOT;
+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+	root_group->sched_data.bfq_class_idle_last_service = jiffies;
+}
+
+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+{
+	struct bfq_data *bfqd;
+	struct elevator_queue *eq;
+
+	eq = elevator_alloc(q, e);
+	if (!eq)
+		return -ENOMEM;
+
+	bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
+	if (!bfqd) {
+		kobject_put(&eq->kobj);
+		return -ENOMEM;
+	}
+	eq->elevator_data = bfqd;
+
+	/*
+	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
+	 * Grab a permanent reference to it, so that the normal code flow
+	 * will not attempt to free it.
+	 */
+	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+	bfqd->oom_bfqq.ref++;
+	bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+	bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+	bfqd->oom_bfqq.entity.new_weight =
+		bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
+
+	/* oom_bfqq does not participate to bursts */
+	bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+	/*
+	 * Trigger weight initialization, according to ioprio, at the
+	 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+	 * class won't be changed any more.
+	 */
+	bfqd->oom_bfqq.entity.prio_changed = 1;
+
+	bfqd->queue = q;
+
+	spin_lock_irq(q->queue_lock);
+	q->elevator = eq;
+	spin_unlock_irq(q->queue_lock);
+
+	bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
+	if (!bfqd->root_group)
+		goto out_free;
+	bfq_init_root_group(bfqd->root_group, bfqd);
+	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+
+	hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+
+	bfqd->queue_weights_tree = RB_ROOT;
+	bfqd->group_weights_tree = RB_ROOT;
+
+	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
+
+	INIT_LIST_HEAD(&bfqd->active_list);
+	INIT_LIST_HEAD(&bfqd->idle_list);
+	INIT_HLIST_HEAD(&bfqd->burst_list);
+
+	bfqd->hw_tag = -1;
+
+	bfqd->bfq_max_budget = bfq_default_max_budget;
+
+	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
+	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
+	bfqd->bfq_back_max = bfq_back_max;
+	bfqd->bfq_back_penalty = bfq_back_penalty;
+	bfqd->bfq_slice_idle = bfq_slice_idle;
+	bfqd->bfq_timeout = bfq_timeout;
+
+	bfqd->bfq_requests_within_timer = 120;
+
+	bfqd->bfq_large_burst_thresh = 8;
+	bfqd->bfq_burst_interval = msecs_to_jiffies(180);
+
+	bfqd->low_latency = true;
+
+	/*
+	 * Trade-off between responsiveness and fairness.
+	 */
+	bfqd->bfq_wr_coeff = 30;
+	bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+	bfqd->bfq_wr_max_time = 0;
+	bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+	bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
+	bfqd->bfq_wr_max_softrt_rate = 7000; /*
+					      * Approximate rate required
+					      * to playback or record a
+					      * high-definition compressed
+					      * video.
+					      */
+	bfqd->wr_busy_queues = 0;
+
+	/*
+	 * Begin by assuming, optimistically, that the device is a
+	 * high-speed one, and that its peak rate is equal to 2/3 of
+	 * the highest reference rate.
+	 */
+	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+			T_fast[blk_queue_nonrot(bfqd->queue)];
+	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+	bfqd->device_speed = BFQ_BFQD_FAST;
+
+	return 0;
+
+out_free:
+	kfree(bfqd);
+	kobject_put(&eq->kobj);
+	return -ENOMEM;
+}
+
+static void bfq_registered_queue(struct request_queue *q)
+{
+	wbt_disable_default(q);
+}
+
+static void bfq_slab_kill(void)
+{
+	kmem_cache_destroy(bfq_pool);
+}
+
+static int __init bfq_slab_setup(void)
+{
+	bfq_pool = KMEM_CACHE(bfq_queue, 0);
+	if (!bfq_pool)
+		return -ENOMEM;
+	return 0;
+}
+
+static ssize_t bfq_var_show(unsigned int var, char *page)
+{
+	return sprintf(page, "%u\n", var);
+}
+
+static ssize_t bfq_var_store(unsigned long *var, const char *page,
+			     size_t count)
+{
+	unsigned long new_val;
+	int ret = kstrtoul(page, 10, &new_val);
+
+	if (ret == 0)
+		*var = new_val;
+
+	return count;
+}
+
+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+
+	return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
+		       jiffies_to_msecs(bfqd->bfq_wr_max_time) :
+		       jiffies_to_msecs(bfq_wr_duration(bfqd)));
+}
+
+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
+{
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd = e->elevator_data;
+	ssize_t num_char = 0;
+
+	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
+			    bfqd->queued);
+
+	spin_lock_irq(bfqd->queue->queue_lock);
+
+	num_char += sprintf(page + num_char, "Active:\n");
+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
+		num_char += sprintf(page + num_char,
+				    "pid%d: weight %hu, nr_queued %d %d, ",
+				    bfqq->pid,
+				    bfqq->entity.weight,
+				    bfqq->queued[0],
+				    bfqq->queued[1]);
+		num_char += sprintf(page + num_char,
+				    "dur %d/%u\n",
+				    jiffies_to_msecs(
+					    jiffies -
+					    bfqq->last_wr_start_finish),
+				    jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	num_char += sprintf(page + num_char, "Idle:\n");
+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
+		num_char += sprintf(page + num_char,
+				    "pid%d: weight %hu, dur %d/%u\n",
+				    bfqq->pid,
+				    bfqq->entity.weight,
+				    jiffies_to_msecs(jiffies -
+						     bfqq->last_wr_start_finish),
+				    jiffies_to_msecs(bfqq->wr_cur_max_time));
+	}
+
+	spin_unlock_irq(bfqd->queue->queue_lock);
+
+	return num_char;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	u64 __data = __VAR;						\
+	if (__CONV == 1)						\
+		__data = jiffies_to_msecs(__data);			\
+	else if (__CONV == 2)						\
+		__data = div_u64(__data, NSEC_PER_MSEC);		\
+	return bfq_var_show(__data, (page));				\
+}
+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2);
+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2);
+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2);
+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
+SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1);
+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
+	1);
+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
+#undef SHOW_FUNCTION
+
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)				\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	u64 __data = __VAR;						\
+	__data = div_u64(__data, NSEC_PER_USEC);			\
+	return bfq_var_show(__data, (page));				\
+}
+USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle);
+#undef USEC_SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
+static ssize_t								\
+__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned long uninitialized_var(__data);			\
+	int ret = bfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	if (__CONV == 1)						\
+		*(__PTR) = msecs_to_jiffies(__data);			\
+	else if (__CONV == 2)						\
+		*(__PTR) = (u64)__data * NSEC_PER_MSEC;			\
+	else								\
+		*(__PTR) = __data;					\
+	return ret;							\
+}
+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
+		INT_MAX, 2);
+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
+		INT_MAX, 2);
+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+		INT_MAX, 0);
+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2);
+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
+		1);
+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0,
+		INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store,
+		&bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
+		INT_MAX, 0);
+#undef STORE_FUNCTION
+
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)			\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+{									\
+	struct bfq_data *bfqd = e->elevator_data;			\
+	unsigned long uninitialized_var(__data);			\
+	int ret = bfq_var_store(&__data, (page), count);		\
+	if (__data < (MIN))						\
+		__data = (MIN);						\
+	else if (__data > (MAX))					\
+		__data = (MAX);						\
+	*(__PTR) = (u64)__data * NSEC_PER_USEC;				\
+	return ret;							\
+}
+USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
+		    UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
+/* do nothing for the moment */
+static ssize_t bfq_weights_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	return count;
+}
+
+static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+				    const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data == 0)
+		bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+	else {
+		if (__data > INT_MAX)
+			__data = INT_MAX;
+		bfqd->bfq_max_budget = __data;
+	}
+
+	bfqd->bfq_user_max_budget = __data;
+
+	return ret;
+}
+
+/*
+ * Leaving this name to preserve name compatibility with cfq
+ * parameters, but this timeout is used for both sync and async.
+ */
+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+				      const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data < 1)
+		__data = 1;
+	else if (__data > INT_MAX)
+		__data = INT_MAX;
+
+	bfqd->bfq_timeout = msecs_to_jiffies(__data);
+	if (bfqd->bfq_user_max_budget == 0)
+		bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
+
+	return ret;
+}
+
+static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
+				     const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data > 1)
+		__data = 1;
+	if (!bfqd->strict_guarantees && __data == 1
+	    && bfqd->bfq_slice_idle < 8 * NSEC_PER_MSEC)
+		bfqd->bfq_slice_idle = 8 * NSEC_PER_MSEC;
+
+	bfqd->strict_guarantees = __data;
+
+	return ret;
+}
+
+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+				     const char *page, size_t count)
+{
+	struct bfq_data *bfqd = e->elevator_data;
+	unsigned long uninitialized_var(__data);
+	int ret = bfq_var_store(&__data, (page), count);
+
+	if (__data > 1)
+		__data = 1;
+	if (__data == 0 && bfqd->low_latency != 0)
+		bfq_end_wr(bfqd);
+	bfqd->low_latency = __data;
+
+	return ret;
+}
+
+#define BFQ_ATTR(name) \
+	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
+
+static struct elv_fs_entry bfq_attrs[] = {
+	BFQ_ATTR(fifo_expire_sync),
+	BFQ_ATTR(fifo_expire_async),
+	BFQ_ATTR(back_seek_max),
+	BFQ_ATTR(back_seek_penalty),
+	BFQ_ATTR(slice_idle),
+	BFQ_ATTR(slice_idle_us),
+	BFQ_ATTR(max_budget),
+	BFQ_ATTR(timeout_sync),
+	BFQ_ATTR(strict_guarantees),
+	BFQ_ATTR(low_latency),
+	BFQ_ATTR(wr_coeff),
+	BFQ_ATTR(wr_max_time),
+	BFQ_ATTR(wr_rt_max_time),
+	BFQ_ATTR(wr_min_idle_time),
+	BFQ_ATTR(wr_min_inter_arr_async),
+	BFQ_ATTR(wr_max_softrt_rate),
+	BFQ_ATTR(weights),
+	__ATTR_NULL
+};
+
+static struct elevator_type iosched_bfq = {
+	.ops.sq = {
+		.elevator_merge_fn =		bfq_merge,
+		.elevator_merged_fn =		bfq_merged_request,
+		.elevator_merge_req_fn =	bfq_merged_requests,
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+		.elevator_bio_merged_fn =	bfq_bio_merged,
+#endif
+		.elevator_allow_bio_merge_fn =	bfq_allow_bio_merge,
+		.elevator_allow_rq_merge_fn =	bfq_allow_rq_merge,
+		.elevator_dispatch_fn =		bfq_dispatch_requests,
+		.elevator_add_req_fn =		bfq_insert_request,
+		.elevator_activate_req_fn =	bfq_activate_request,
+		.elevator_deactivate_req_fn =	bfq_deactivate_request,
+		.elevator_completed_req_fn =	bfq_completed_request,
+		.elevator_former_req_fn =	elv_rb_former_request,
+		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_init_icq_fn =		bfq_init_icq,
+		.elevator_exit_icq_fn =		bfq_exit_icq,
+		.elevator_set_req_fn =		bfq_set_request,
+		.elevator_put_req_fn =		bfq_put_request,
+		.elevator_may_queue_fn =	bfq_may_queue,
+		.elevator_init_fn =		bfq_init_queue,
+		.elevator_exit_fn =		bfq_exit_queue,
+		.elevator_registered_fn =	bfq_registered_queue,
+	},
+	.icq_size =		sizeof(struct bfq_io_cq),
+	.icq_align =		__alignof__(struct bfq_io_cq),
+	.elevator_attrs =	bfq_attrs,
+	.elevator_name =	"bfq-sq",
+	.elevator_owner =	THIS_MODULE,
+};
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct blkcg_policy blkcg_policy_bfq = {
+	.dfl_cftypes		= bfq_blkg_files,
+	.legacy_cftypes		= bfq_blkcg_legacy_files,
+
+	.cpd_alloc_fn		= bfq_cpd_alloc,
+	.cpd_init_fn		= bfq_cpd_init,
+	.cpd_bind_fn	        = bfq_cpd_init,
+	.cpd_free_fn		= bfq_cpd_free,
+
+	.pd_alloc_fn		= bfq_pd_alloc,
+	.pd_init_fn		= bfq_pd_init,
+	.pd_offline_fn		= bfq_pd_offline,
+	.pd_free_fn		= bfq_pd_free,
+	.pd_reset_stats_fn	= bfq_pd_reset_stats,
+};
+#endif
+
+static int __init bfq_init(void)
+{
+	int ret;
+	char msg[60] = "BFQ I/O-scheduler: v8r12";
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	ret = blkcg_policy_register(&blkcg_policy_bfq);
+	if (ret)
+		return ret;
+#endif
+
+	ret = -ENOMEM;
+	if (bfq_slab_setup())
+		goto err_pol_unreg;
+
+	/*
+	 * Times to load large popular applications for the typical
+	 * systems installed on the reference devices (see the
+	 * comments before the definitions of the next two
+	 * arrays). Actually, we use slightly slower values, as the
+	 * estimated peak rate tends to be smaller than the actual
+	 * peak rate.  The reason for this last fact is that estimates
+	 * are computed over much shorter time intervals than the long
+	 * intervals typically used for benchmarking. Why? First, to
+	 * adapt more quickly to variations. Second, because an I/O
+	 * scheduler cannot rely on a peak-rate-evaluation workload to
+	 * be run for a long time.
+	 */
+	T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
+	T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
+	T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+	T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
+
+	/*
+	 * Thresholds that determine the switch between speed classes
+	 * (see the comments before the definition of the array
+	 * device_speed_thresh). These thresholds are biased towards
+	 * transitions to the fast class. This is safer than the
+	 * opposite bias. In fact, a wrong transition to the slow
+	 * class results in short weight-raising periods, because the
+	 * speed of the device then tends to be higher that the
+	 * reference peak rate. On the opposite end, a wrong
+	 * transition to the fast class tends to increase
+	 * weight-raising periods, because of the opposite reason.
+	 */
+	device_speed_thresh[0] = (4 * R_slow[0]) / 3;
+	device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+
+	ret = elv_register(&iosched_bfq);
+	if (ret)
+		goto slab_kill;
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	strcat(msg, " (with cgroups support)");
+#endif
+	pr_info("%s", msg);
+
+	return 0;
+
+slab_kill:
+	bfq_slab_kill();
+err_pol_unreg:
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+	return ret;
+}
+
+static void __exit bfq_exit(void)
+{
+	elv_unregister(&iosched_bfq);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+	bfq_slab_kill();
+}
+
+module_init(bfq_init);
+module_exit(bfq_exit);
+
+MODULE_AUTHOR("Arianna Avanzini, Fabio Checconi, Paolo Valente");
+MODULE_LICENSE("GPL");
diff --git a/block/bfq.h b/block/bfq.h
new file mode 100644
index 000000000..a25e76c90
--- /dev/null
+++ b/block/bfq.h
@@ -0,0 +1,1002 @@
+/*
+ * BFQ v8r12 for 4.11.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
+ */
+
+#ifndef _BFQ_H
+#define _BFQ_H
+
+#include <linux/hrtimer.h>
+#include <linux/blk-cgroup.h>
+
+/*
+ * Define an alternative macro to compile cgroups support. This is one
+ * of the steps needed to let bfq-mq share the files bfq-sched.c and
+ * bfq-cgroup.c with bfq-sq. For bfq-mq, the macro
+ * BFQ_GROUP_IOSCHED_ENABLED will be defined as a function of whether
+ * the configuration option CONFIG_BFQ_MQ_GROUP_IOSCHED, and not
+ * CONFIG_BFQ_GROUP_IOSCHED, is defined.
+ */
+#ifdef CONFIG_BFQ_SQ_GROUP_IOSCHED
+#define BFQ_GROUP_IOSCHED_ENABLED
+#endif
+
+#define BFQ_IOPRIO_CLASSES	3
+#define BFQ_CL_IDLE_TIMEOUT	(HZ/5)
+
+#define BFQ_MIN_WEIGHT			1
+#define BFQ_MAX_WEIGHT			1000
+#define BFQ_WEIGHT_CONVERSION_COEFF	10
+
+#define BFQ_DEFAULT_QUEUE_IOPRIO	4
+
+#define BFQ_WEIGHT_LEGACY_DFL	100
+#define BFQ_DEFAULT_GRP_IOPRIO	0
+#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
+
+/*
+ * Soft real-time applications are extremely more latency sensitive
+ * than interactive ones. Over-raise the weight of the former to
+ * privilege them against the latter.
+ */
+#define BFQ_SOFTRT_WEIGHT_FACTOR	100
+
+struct bfq_entity;
+
+/**
+ * struct bfq_service_tree - per ioprio_class service tree.
+ *
+ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
+ * ioprio_class has its own independent scheduler, and so its own
+ * bfq_service_tree.  All the fields are protected by the queue lock
+ * of the containing bfqd.
+ */
+struct bfq_service_tree {
+	/* tree for active entities (i.e., those backlogged) */
+	struct rb_root active;
+	/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+	struct rb_root idle;
+
+	struct bfq_entity *first_idle;	/* idle entity with minimum F_i */
+	struct bfq_entity *last_idle;	/* idle entity with maximum F_i */
+
+	u64 vtime; /* scheduler virtual time */
+	/* scheduler weight sum; active and idle entities contribute to it */
+	unsigned long wsum;
+};
+
+/**
+ * struct bfq_sched_data - multi-class scheduler.
+ *
+ * bfq_sched_data is the basic scheduler queue.  It supports three
+ * ioprio_classes, and can be used either as a toplevel queue or as an
+ * intermediate queue in a hierarchical setup.
+ *
+ * The supported ioprio_classes are the same as in CFQ, in descending
+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+ * Requests from higher priority queues are served before all the
+ * requests from lower priority queues; among requests of the same
+ * queue requests are served according to B-WF2Q+.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All the fields are protected by the queue lock of the containing
+ * bfqd.
+ */
+struct bfq_sched_data {
+	struct bfq_entity *in_service_entity;  /* entity in service */
+	/* head-of-the-line entity in the scheduler (see comments above) */
+	struct bfq_entity *next_in_service;
+	/* array of service trees, one per ioprio_class */
+	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+	/* last time CLASS_IDLE was served */
+	unsigned long bfq_class_idle_last_service;
+
+};
+
+/**
+ * struct bfq_weight_counter - counter of the number of all active entities
+ *                             with a given weight.
+ */
+struct bfq_weight_counter {
+	unsigned int weight; /* weight of the entities this counter refers to */
+	unsigned int num_active; /* nr of active entities with this weight */
+	/*
+	 * Weights tree member (see bfq_data's @queue_weights_tree and
+	 * @group_weights_tree)
+	 */
+	struct rb_node weights_node;
+};
+
+/**
+ * struct bfq_entity - schedulable entity.
+ *
+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
+ * entity belongs to the sched_data of the parent group in the cgroup
+ * hierarchy.  Non-leaf entities have also their own sched_data, stored
+ * in @my_sched_data.
+ *
+ * Each entity stores independently its priority values; this would
+ * allow different weights on different devices, but this
+ * functionality is not exported to userspace by now.  Priorities and
+ * weights are updated lazily, first storing the new values into the
+ * new_* fields, then setting the @prio_changed flag.  As soon as
+ * there is a transition in the entity state that allows the priority
+ * update to take place the effective and the requested priority
+ * values are synchronized.
+ *
+ * Unless cgroups are used, the weight value is calculated from the
+ * ioprio to export the same interface as CFQ.  When dealing with
+ * ``well-behaved'' queues (i.e., queues that do not spend too much
+ * time to consume their budget and have true sequential behavior, and
+ * when there are no external factors breaking anticipation) the
+ * relative weights at each level of the cgroups hierarchy should be
+ * guaranteed.  All the fields are protected by the queue lock of the
+ * containing bfqd.
+ */
+struct bfq_entity {
+	struct rb_node rb_node; /* service_tree member */
+	/* pointer to the weight counter associated with this entity */
+	struct bfq_weight_counter *weight_counter;
+
+	/*
+	 * Flag, true if the entity is on a tree (either the active or
+	 * the idle one of its service_tree) or is in service.
+	 */
+	bool on_st;
+
+	u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */
+	u64 start;  /* B-WF2Q+ start timestamp (aka S_i) */
+
+	/* tree the entity is enqueued into; %NULL if not on a tree */
+	struct rb_root *tree;
+
+	/*
+	 * minimum start time of the (active) subtree rooted at this
+	 * entity; used for O(log N) lookups into active trees
+	 */
+	u64 min_start;
+
+	/* amount of service received during the last service slot */
+	int service;
+
+	/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
+	int budget;
+
+	unsigned int weight;	 /* weight of the queue */
+	unsigned int new_weight; /* next weight if a change is in progress */
+
+	/* original weight, used to implement weight boosting */
+	unsigned int orig_weight;
+
+	/* parent entity, for hierarchical scheduling */
+	struct bfq_entity *parent;
+
+	/*
+	 * For non-leaf nodes in the hierarchy, the associated
+	 * scheduler queue, %NULL on leaf nodes.
+	 */
+	struct bfq_sched_data *my_sched_data;
+	/* the scheduler queue this entity belongs to */
+	struct bfq_sched_data *sched_data;
+
+	/* flag, set to request a weight, ioprio or ioprio_class change  */
+	int prio_changed;
+};
+
+struct bfq_group;
+
+/**
+ * struct bfq_queue - leaf schedulable entity.
+ *
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it  is  async or shared  between  cooperating
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
+ * does not disappear while a bfqq still references it (mostly to avoid
+ * races between request issuing and task migration followed by cgroup
+ * destruction).
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+struct bfq_queue {
+	/* reference counter */
+	int ref;
+	/* parent bfq_data */
+	struct bfq_data *bfqd;
+
+	/* current ioprio and ioprio class */
+	unsigned short ioprio, ioprio_class;
+	/* next ioprio and ioprio class if a change is in progress */
+	unsigned short new_ioprio, new_ioprio_class;
+
+	/*
+	 * Shared bfq_queue if queue is cooperating with one or more
+	 * other queues.
+	 */
+	struct bfq_queue *new_bfqq;
+	/* request-position tree member (see bfq_group's @rq_pos_tree) */
+	struct rb_node pos_node;
+	/* request-position tree root (see bfq_group's @rq_pos_tree) */
+	struct rb_root *pos_root;
+
+	/* sorted list of pending requests */
+	struct rb_root sort_list;
+	/* if fifo isn't expired, next request to serve */
+	struct request *next_rq;
+	/* number of sync and async requests queued */
+	int queued[2];
+	/* number of sync and async requests currently allocated */
+	int allocated[2];
+	/* number of pending metadata requests */
+	int meta_pending;
+	/* fifo list of requests in sort_list */
+	struct list_head fifo;
+
+	/* entity representing this queue in the scheduler */
+	struct bfq_entity entity;
+
+	/* maximum budget allowed from the feedback mechanism */
+	int max_budget;
+	/* budget expiration (in jiffies) */
+	unsigned long budget_timeout;
+
+	/* number of requests on the dispatch list or inside driver */
+	int dispatched;
+
+	unsigned int flags; /* status flags.*/
+
+	/* node for active/idle bfqq list inside parent bfqd */
+	struct list_head bfqq_list;
+
+	/* bit vector: a 1 for each seeky requests in history */
+	u32 seek_history;
+
+	/* node for the device's burst list */
+	struct hlist_node burst_list_node;
+
+	/* position of the last request enqueued */
+	sector_t last_request_pos;
+
+	/* Number of consecutive pairs of request completion and
+	 * arrival, such that the queue becomes idle after the
+	 * completion, but the next request arrives within an idle
+	 * time slice; used only if the queue's IO_bound flag has been
+	 * cleared.
+	 */
+	unsigned int requests_within_timer;
+
+	/* pid of the process owning the queue, used for logging purposes */
+	pid_t pid;
+
+	/*
+	 * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
+	 * if the queue is shared.
+	 */
+	struct bfq_io_cq *bic;
+
+	/* current maximum weight-raising time for this queue */
+	unsigned long wr_cur_max_time;
+	/*
+	 * Minimum time instant such that, only if a new request is
+	 * enqueued after this time instant in an idle @bfq_queue with
+	 * no outstanding requests, then the task associated with the
+	 * queue it is deemed as soft real-time (see the comments on
+	 * the function bfq_bfqq_softrt_next_start())
+	 */
+	unsigned long soft_rt_next_start;
+	/*
+	 * Start time of the current weight-raising period if
+	 * the @bfq-queue is being weight-raised, otherwise
+	 * finish time of the last weight-raising period.
+	 */
+	unsigned long last_wr_start_finish;
+	/* factor by which the weight of this queue is multiplied */
+	unsigned int wr_coeff;
+	/*
+	 * Time of the last transition of the @bfq_queue from idle to
+	 * backlogged.
+	 */
+	unsigned long last_idle_bklogged;
+	/*
+	 * Cumulative service received from the @bfq_queue since the
+	 * last transition from idle to backlogged.
+	 */
+	unsigned long service_from_backlogged;
+	/*
+	 * Cumulative service received from the @bfq_queue since its
+	 * last transition to weight-raised state.
+	 */
+	unsigned long service_from_wr;
+	/*
+	 * Value of wr start time when switching to soft rt
+	 */
+	unsigned long wr_start_at_switch_to_srt;
+
+	unsigned long split_time; /* time of last split */
+
+	unsigned long first_IO_time; /* time of first I/O for this queue */
+};
+
+/**
+ * struct bfq_ttime - per process thinktime stats.
+ */
+struct bfq_ttime {
+	u64 last_end_request; /* completion time of last request */
+
+	u64 ttime_total; /* total process thinktime */
+	unsigned long ttime_samples; /* number of thinktime samples */
+	u64 ttime_mean; /* average process thinktime */
+
+};
+
+/**
+ * struct bfq_io_cq - per (request_queue, io_context) structure.
+ */
+struct bfq_io_cq {
+	/* associated io_cq structure */
+	struct io_cq icq; /* must be the first member */
+	/* array of two process queues, the sync and the async */
+	struct bfq_queue *bfqq[2];
+	/* associated @bfq_ttime struct */
+	struct bfq_ttime ttime;
+	/* per (request_queue, blkcg) ioprio */
+	int ioprio;
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	uint64_t blkcg_serial_nr; /* the current blkcg serial */
+#endif
+
+	/*
+	 * Snapshot of the has_short_time flag before merging; taken
+	 * to remember its value while the queue is merged, so as to
+	 * be able to restore it in case of split.
+	 */
+	bool saved_has_short_ttime;
+	/*
+	 * Same purpose as the previous two fields for the I/O bound
+	 * classification of a queue.
+	 */
+	bool saved_IO_bound;
+
+	/*
+	 * Same purpose as the previous fields for the value of the
+	 * field keeping the queue's belonging to a large burst
+	 */
+	bool saved_in_large_burst;
+	/*
+	 * True if the queue belonged to a burst list before its merge
+	 * with another cooperating queue.
+	 */
+	bool was_in_burst_list;
+
+	/*
+	 * Similar to previous fields: save wr information.
+	 */
+	unsigned long saved_wr_coeff;
+	unsigned long saved_last_wr_start_finish;
+	unsigned long saved_wr_start_at_switch_to_srt;
+	unsigned int saved_wr_cur_max_time;
+};
+
+enum bfq_device_speed {
+	BFQ_BFQD_FAST,
+	BFQ_BFQD_SLOW,
+};
+
+/**
+ * struct bfq_data - per-device data structure.
+ *
+ * All the fields are protected by the @queue lock.
+ */
+struct bfq_data {
+	/* request queue for the device */
+	struct request_queue *queue;
+
+	/* root bfq_group for the device */
+	struct bfq_group *root_group;
+
+	/*
+	 * rbtree of weight counters of @bfq_queues, sorted by
+	 * weight. Used to keep track of whether all @bfq_queues have
+	 * the same weight. The tree contains one counter for each
+	 * distinct weight associated to some active and not
+	 * weight-raised @bfq_queue (see the comments to the functions
+	 * bfq_weights_tree_[add|remove] for further details).
+	 */
+	struct rb_root queue_weights_tree;
+	/*
+	 * rbtree of non-queue @bfq_entity weight counters, sorted by
+	 * weight. Used to keep track of whether all @bfq_groups have
+	 * the same weight. The tree contains one counter for each
+	 * distinct weight associated to some active @bfq_group (see
+	 * the comments to the functions bfq_weights_tree_[add|remove]
+	 * for further details).
+	 */
+	struct rb_root group_weights_tree;
+
+	/*
+	 * Number of bfq_queues containing requests (including the
+	 * queue in service, even if it is idling).
+	 */
+	int busy_queues;
+	/* number of weight-raised busy @bfq_queues */
+	int wr_busy_queues;
+	/* number of queued requests */
+	int queued;
+	/* number of requests dispatched and waiting for completion */
+	int rq_in_driver;
+
+	/*
+	 * Maximum number of requests in driver in the last
+	 * @hw_tag_samples completed requests.
+	 */
+	int max_rq_in_driver;
+	/* number of samples used to calculate hw_tag */
+	int hw_tag_samples;
+	/* flag set to one if the driver is showing a queueing behavior */
+	int hw_tag;
+
+	/* number of budgets assigned */
+	int budgets_assigned;
+
+	/*
+	 * Timer set when idling (waiting) for the next request from
+	 * the queue in service.
+	 */
+	struct hrtimer idle_slice_timer;
+	/* delayed work to restart dispatching on the request queue */
+	struct work_struct unplug_work;
+
+	/* bfq_queue in service */
+	struct bfq_queue *in_service_queue;
+	/* bfq_io_cq (bic) associated with the @in_service_queue */
+	struct bfq_io_cq *in_service_bic;
+
+	/* on-disk position of the last served request */
+	sector_t last_position;
+
+	/* time of last request completion (ns) */
+	u64 last_completion;
+
+	/* time of first rq dispatch in current observation interval (ns) */
+	u64 first_dispatch;
+	/* time of last rq dispatch in current observation interval (ns) */
+	u64 last_dispatch;
+
+	/* beginning of the last budget */
+	ktime_t last_budget_start;
+	/* beginning of the last idle slice */
+	ktime_t last_idling_start;
+
+	/* number of samples in current observation interval */
+	int peak_rate_samples;
+	/* num of samples of seq dispatches in current observation interval */
+	u32 sequential_samples;
+	/* total num of sectors transferred in current observation interval */
+	u64 tot_sectors_dispatched;
+	/* max rq size seen during current observation interval (sectors) */
+	u32 last_rq_max_size;
+	/* time elapsed from first dispatch in current observ. interval (us) */
+	u64 delta_from_first;
+	/*
+	 * Current estimate of the device peak rate, measured in
+	 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
+	 * BFQ_RATE_SHIFT is performed to increase precision in
+	 * fixed-point calculations.
+	 */
+	u32 peak_rate;
+
+	/* maximum budget allotted to a bfq_queue before rescheduling */
+	int bfq_max_budget;
+
+	/* list of all the bfq_queues active on the device */
+	struct list_head active_list;
+	/* list of all the bfq_queues idle on the device */
+	struct list_head idle_list;
+
+	/*
+	 * Timeout for async/sync requests; when it fires, requests
+	 * are served in fifo order.
+	 */
+	u64 bfq_fifo_expire[2];
+	/* weight of backward seeks wrt forward ones */
+	unsigned int bfq_back_penalty;
+	/* maximum allowed backward seek */
+	unsigned int bfq_back_max;
+	/* maximum idling time */
+	u32 bfq_slice_idle;
+
+	/* user-configured max budget value (0 for auto-tuning) */
+	int bfq_user_max_budget;
+	/*
+	 * Timeout for bfq_queues to consume their budget; used to
+	 * prevent seeky queues from imposing long latencies to
+	 * sequential or quasi-sequential ones (this also implies that
+	 * seeky queues cannot receive guarantees in the service
+	 * domain; after a timeout they are charged for the time they
+	 * have been in service, to preserve fairness among them, but
+	 * without service-domain guarantees).
+	 */
+	unsigned int bfq_timeout;
+
+	/*
+	 * Number of consecutive requests that must be issued within
+	 * the idle time slice to set again idling to a queue which
+	 * was marked as non-I/O-bound (see the definition of the
+	 * IO_bound flag for further details).
+	 */
+	unsigned int bfq_requests_within_timer;
+
+	/*
+	 * Force device idling whenever needed to provide accurate
+	 * service guarantees, without caring about throughput
+	 * issues. CAVEAT: this may even increase latencies, in case
+	 * of useless idling for processes that did stop doing I/O.
+	 */
+	bool strict_guarantees;
+
+	/*
+	 * Last time at which a queue entered the current burst of
+	 * queues being activated shortly after each other; for more
+	 * details about this and the following parameters related to
+	 * a burst of activations, see the comments on the function
+	 * bfq_handle_burst.
+	 */
+	unsigned long last_ins_in_burst;
+	/*
+	 * Reference time interval used to decide whether a queue has
+	 * been activated shortly after @last_ins_in_burst.
+	 */
+	unsigned long bfq_burst_interval;
+	/* number of queues in the current burst of queue activations */
+	int burst_size;
+
+	/* common parent entity for the queues in the burst */
+	struct bfq_entity *burst_parent_entity;
+	/* Maximum burst size above which the current queue-activation
+	 * burst is deemed as 'large'.
+	 */
+	unsigned long bfq_large_burst_thresh;
+	/* true if a large queue-activation burst is in progress */
+	bool large_burst;
+	/*
+	 * Head of the burst list (as for the above fields, more
+	 * details in the comments on the function bfq_handle_burst).
+	 */
+	struct hlist_head burst_list;
+
+	/* if set to true, low-latency heuristics are enabled */
+	bool low_latency;
+	/*
+	 * Maximum factor by which the weight of a weight-raised queue
+	 * is multiplied.
+	 */
+	unsigned int bfq_wr_coeff;
+	/* maximum duration of a weight-raising period (jiffies) */
+	unsigned int bfq_wr_max_time;
+
+	/* Maximum weight-raising duration for soft real-time processes */
+	unsigned int bfq_wr_rt_max_time;
+	/*
+	 * Minimum idle period after which weight-raising may be
+	 * reactivated for a queue (in jiffies).
+	 */
+	unsigned int bfq_wr_min_idle_time;
+	/*
+	 * Minimum period between request arrivals after which
+	 * weight-raising may be reactivated for an already busy async
+	 * queue (in jiffies).
+	 */
+	unsigned long bfq_wr_min_inter_arr_async;
+
+	/* Max service-rate for a soft real-time queue, in sectors/sec */
+	unsigned int bfq_wr_max_softrt_rate;
+	/*
+	 * Cached value of the product R*T, used for computing the
+	 * maximum duration of weight raising automatically.
+	 */
+	u64 RT_prod;
+	/* device-speed class for the low-latency heuristic */
+	enum bfq_device_speed device_speed;
+
+	/* fallback dummy bfqq for extreme OOM conditions */
+	struct bfq_queue oom_bfqq;
+};
+
+enum bfqq_state_flags {
+	BFQ_BFQQ_FLAG_just_created = 0,	/* queue just allocated */
+	BFQ_BFQQ_FLAG_busy,		/* has requests or is in service */
+	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
+	BFQ_BFQQ_FLAG_non_blocking_wait_rq, /*
+					     * waiting for a request
+					     * without idling the device
+					     */
+	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
+	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
+	BFQ_BFQQ_FLAG_has_short_ttime,	/* queue has a short think time */
+	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
+	BFQ_BFQQ_FLAG_IO_bound,		/*
+					 * bfqq has timed-out at least once
+					 * having consumed at most 2/10 of
+					 * its budget
+					 */
+	BFQ_BFQQ_FLAG_in_large_burst,	/*
+					 * bfqq activated in a large burst,
+					 * see comments to bfq_handle_burst.
+					 */
+	BFQ_BFQQ_FLAG_softrt_update,	/*
+					 * may need softrt-next-start
+					 * update
+					 */
+	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
+	BFQ_BFQQ_FLAG_split_coop	/* shared bfqq will be split */
+};
+
+#define BFQ_BFQQ_FNS(name)						\
+static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
+{									\
+	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)		\
+{									\
+	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
+}									\
+static int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
+{									\
+	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
+}
+
+BFQ_BFQQ_FNS(just_created);
+BFQ_BFQQ_FNS(busy);
+BFQ_BFQQ_FNS(wait_request);
+BFQ_BFQQ_FNS(non_blocking_wait_rq);
+BFQ_BFQQ_FNS(must_alloc);
+BFQ_BFQQ_FNS(fifo_expire);
+BFQ_BFQQ_FNS(has_short_ttime);
+BFQ_BFQQ_FNS(sync);
+BFQ_BFQQ_FNS(IO_bound);
+BFQ_BFQQ_FNS(in_large_burst);
+BFQ_BFQQ_FNS(coop);
+BFQ_BFQQ_FNS(split_coop);
+BFQ_BFQQ_FNS(softrt_update);
+#undef BFQ_BFQQ_FNS
+
+/* Logging facilities. */
+#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE
+
+static const char *checked_dev_name(const struct device *dev)
+{
+	static const char nodev[] = "nodev";
+
+	if (dev)
+		return dev_name(dev);
+
+	return nodev;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	assert_spin_locked((bfqd)->queue->queue_lock);			\
+	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
+	pr_crit("%s bfq%d%c %s [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		(bfqq)->pid,						\
+		bfq_bfqq_sync((bfqq)) ? 'S' : 'A',			\
+		__pbuf, __func__, ##args);				\
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
+	pr_crit("%s %s [%s] " fmt "\n",					\
+	checked_dev_name((bfqd)->queue->backing_dev_info->dev),		\
+	__pbuf, __func__, ##args);					\
+} while (0)
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)				\
+	pr_crit("%s bfq%d%c [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		(bfqq)->pid, bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
+		__func__, ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log(bfqd, fmt, args...) \
+	pr_crit("%s bfq [%s] " fmt "\n",				\
+		checked_dev_name((bfqd)->queue->backing_dev_info->dev),	\
+		__func__, ##args)
+
+#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+
+#if !defined(CONFIG_BLK_DEV_IO_TRACE)
+
+/* Avoid possible "unused-variable" warning. See commit message. */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	((void) (bfqq))
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	((void) (bfqg))
+
+#define bfq_log(bfqd, fmt, args...)		do {} while (0)
+
+#else /* CONFIG_BLK_DEV_IO_TRACE */
+
+#include <linux/blktrace_api.h>
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	assert_spin_locked((bfqd)->queue->queue_lock);			\
+	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s [%s] " fmt, \
+			  (bfqq)->pid,			  \
+			  bfq_bfqq_sync((bfqq)) ? 'S' : 'A',	\
+			  __pbuf, __func__, ##args);			\
+} while (0)
+
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
+	blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, __pbuf, \
+	__func__, ##args);	\
+} while (0)
+
+#else /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	\
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c [%s] " fmt, (bfqq)->pid, \
+			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
+				__func__, ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
+
+#endif /* BFQ_GROUP_IOSCHED_ENABLED */
+
+#define bfq_log(bfqd, fmt, args...) \
+	blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args)
+
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */
+
+/* Expiration reasons. */
+enum bfqq_expiration {
+	BFQ_BFQQ_TOO_IDLE = 0,		/*
+					 * queue has been idling for
+					 * too long
+					 */
+	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
+	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
+	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
+	BFQ_BFQQ_PREEMPTED		/* preemption in progress */
+};
+
+
+struct bfqg_stats {
+#if defined(BFQ_GROUP_IOSCHED_ENABLED) &&  defined(CONFIG_DEBUG_BLK_CGROUP)
+	/* number of ios merged */
+	struct blkg_rwstat		merged;
+	/* total time spent on device in ns, may not be accurate w/ queueing */
+	struct blkg_rwstat		service_time;
+	/* total time spent waiting in scheduler queue in ns */
+	struct blkg_rwstat		wait_time;
+	/* number of IOs queued up */
+	struct blkg_rwstat		queued;
+	/* total disk time and nr sectors dispatched by this group */
+	struct blkg_stat		time;
+	/* sum of number of ios queued across all samples */
+	struct blkg_stat		avg_queue_size_sum;
+	/* count of samples taken for average */
+	struct blkg_stat		avg_queue_size_samples;
+	/* how many times this group has been removed from service tree */
+	struct blkg_stat		dequeue;
+	/* total time spent waiting for it to be assigned a timeslice. */
+	struct blkg_stat		group_wait_time;
+	/* time spent idling for this blkcg_gq */
+	struct blkg_stat		idle_time;
+	/* total time with empty current active q with other requests queued */
+	struct blkg_stat		empty_time;
+	/* fields after this shouldn't be cleared on stat reset */
+	uint64_t			start_group_wait_time;
+	uint64_t			start_idle_time;
+	uint64_t			start_empty_time;
+	uint16_t			flags;
+#endif /* BFQ_GROUP_IOSCHED_ENABLED && CONFIG_DEBUG_BLK_CGROUP */
+};
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+/*
+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+ *
+ * @ps: @blkcg_policy_storage that this structure inherits
+ * @weight: weight of the bfq_group
+ */
+struct bfq_group_data {
+	/* must be the first member */
+	struct blkcg_policy_data pd;
+
+	unsigned int weight;
+};
+
+/**
+ * struct bfq_group - per (device, cgroup) data structure.
+ * @entity: schedulable entity to insert into the parent group sched_data.
+ * @sched_data: own sched_data, to contain child entities (they may be
+ *              both bfq_queues and bfq_groups).
+ * @bfqd: the bfq_data for the device this group acts upon.
+ * @async_bfqq: array of async queues for all the tasks belonging to
+ *              the group, one queue per ioprio value per ioprio_class,
+ *              except for the idle class that has only one queue.
+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
+ *             to avoid too many special cases during group creation/
+ *             migration.
+ * @active_entities: number of active entities belonging to the group;
+ *                   unused for the root group. Used to know whether there
+ *                   are groups with more than one active @bfq_entity
+ *                   (see the comments to the function
+ *                   bfq_bfqq_may_idle()).
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ *               determining if two or more queues have interleaving
+ *               requests (see bfq_find_close_cooperator()).
+ *
+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+ * there is a set of bfq_groups, each one collecting the lower-level
+ * entities belonging to the group that are acting on the same device.
+ *
+ * Locking works as follows:
+ *    o @bfqd is protected by the queue lock, RCU is used to access it
+ *      from the readers.
+ *    o All the other fields are protected by the @bfqd queue lock.
+ */
+struct bfq_group {
+	/* must be the first member */
+	struct blkg_policy_data pd;
+
+	struct bfq_entity entity;
+	struct bfq_sched_data sched_data;
+
+	void *bfqd;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+
+	struct bfq_entity *my_entity;
+
+	int active_entities;
+
+	struct rb_root rq_pos_tree;
+
+	struct bfqg_stats stats;
+};
+
+#else
+struct bfq_group {
+	struct bfq_sched_data sched_data;
+
+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+	struct bfq_queue *async_idle_bfqq;
+
+	struct rb_root rq_pos_tree;
+};
+#endif
+
+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+
+static unsigned int bfq_class_idx(struct bfq_entity *entity)
+{
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+	return bfqq ? bfqq->ioprio_class - 1 :
+		BFQ_DEFAULT_GRP_CLASS - 1;
+}
+
+static struct bfq_service_tree *
+bfq_entity_service_tree(struct bfq_entity *entity)
+{
+	struct bfq_sched_data *sched_data = entity->sched_data;
+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+	unsigned int idx = bfq_class_idx(entity);
+
+	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+	BUG_ON(sched_data == NULL);
+
+	if (bfqq)
+		bfq_log_bfqq(bfqq->bfqd, bfqq,
+			     "%p %d",
+			     sched_data->service_tree + idx, idx);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+	else {
+		struct bfq_group *bfqg =
+			container_of(entity, struct bfq_group, entity);
+
+		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
+			     "%p %d",
+			     sched_data->service_tree + idx, idx);
+	}
+#endif
+	return sched_data->service_tree + idx;
+}
+
+static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
+{
+	return bic->bfqq[is_sync];
+}
+
+static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq,
+			 bool is_sync)
+{
+	bic->bfqq[is_sync] = bfqq;
+}
+
+static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+{
+	return bic->icq.q->elevator->elevator_data;
+}
+
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+	struct bfq_entity *group_entity = bfqq->entity.parent;
+
+	if (!group_entity)
+		group_entity = &bfqq->bfqd->root_group->entity;
+
+	return container_of(group_entity, struct bfq_group, entity);
+}
+
+#else
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+	return bfqq->bfqd->root_group;
+}
+
+#endif
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+static void bfq_put_queue(struct bfq_queue *bfqq);
+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+				       struct bio *bio, bool is_sync,
+				       struct bfq_io_cq *bic);
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+				    struct bfq_group *bfqg);
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+#endif
+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
+#endif /* _BFQ_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index b459d277d..fc7840c3f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -56,6 +56,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
 
+int trap_non_toi_io;
+
 /*
  * For the allocated request tables
  */
@@ -2427,6 +2429,9 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
+	if (unlikely(trap_non_toi_io))
+		BUG_ON(!bio_flagged(bio, BIO_TOI));
+
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
diff --git a/block/elevator.c b/block/elevator.c
index e87e9b43a..a000255a1 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -246,7 +246,11 @@ int elevator_init(struct request_queue *q, char *name)
 		 */
 		if (q->mq_ops) {
 			if (q->nr_hw_queues == 1)
+#if defined(CONFIG_PCK_INTERACTIVE) && defined(CONFIG_IOSCHED_BFQ)
+				e = elevator_get(q, "bfq", false);
+#else
 				e = elevator_get(q, "mq-deadline", false);
+#endif
 			if (!e)
 				return 0;
 		} else
diff --git a/block/genhd.c b/block/genhd.c
index 9656f9e9f..7ced0d129 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,8 @@
 #include <linux/kobj_map.h>
 #include <linux/mutex.h>
 #include <linux/idr.h>
+#include <linux/ctype.h>
+#include <linux/fs_uuid.h>
 #include <linux/log2.h>
 #include <linux/pm_runtime.h>
 #include <linux/badblocks.h>
@@ -1572,6 +1574,85 @@ int invalidate_partition(struct gendisk *disk, int partno)
 
 EXPORT_SYMBOL(invalidate_partition);
 
+dev_t blk_lookup_fs_info(struct fs_info *seek)
+{
+	dev_t devt = MKDEV(0, 0);
+	struct class_dev_iter iter;
+	struct device *dev;
+	int best_score = 0;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while (best_score < 3 && (dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+
+		while (best_score < 3 && (part = disk_part_iter_next(&piter))) {
+			int score = part_matches_fs_info(part, seek);
+			if (score > best_score) {
+				devt = part_devt(part);
+				best_score = score;
+			}
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
+	return devt;
+}
+
+/* Caller uses NULL, key to start. For each match found, we return a bdev on
+ * which we have done blkdev_get, and we do the blkdev_put on block devices
+ * that are passed to us. When no more matches are found, we return NULL.
+ */
+struct block_device *next_bdev_of_type(struct block_device *last,
+	const char *key)
+{
+	dev_t devt = MKDEV(0, 0);
+	struct class_dev_iter iter;
+	struct device *dev;
+	struct block_device *next = NULL, *bdev;
+	int got_last = 0;
+
+	if (!key)
+		goto out;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while (!devt && (dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+
+		while ((part = disk_part_iter_next(&piter))) {
+			bdev = bdget(part_devt(part));
+			if (last && !got_last) {
+				if (last == bdev)
+					got_last = 1;
+				continue;
+			}
+
+			if (blkdev_get(bdev, FMODE_READ, 0))
+				continue;
+
+			if (bdev_matches_key(bdev, key)) {
+				next = bdev;
+				break;
+			}
+
+			blkdev_put(bdev, FMODE_READ);
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
+out:
+	if (last)
+		blkdev_put(last, FMODE_READ);
+	return next;
+}
+
 /*
  * Disk events - monitor disk events like media change and eject request.
  */
diff --git a/block/uuid.c b/block/uuid.c
new file mode 100644
index 000000000..26cedebad
--- /dev/null
+++ b/block/uuid.c
@@ -0,0 +1,510 @@
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+#include <linux/fs_uuid.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+static int debug_enabled;
+
+#define PRINTK(fmt, args...) do {					\
+	if (debug_enabled)						\
+		printk(KERN_DEBUG fmt, ## args);			\
+	} while(0)
+
+#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8)			\
+	do {								\
+		if (debug_enabled)					\
+			print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8);	\
+	} while(0)
+
+/*
+ * Simple UUID translation
+ */
+
+struct uuid_info {
+	const char *key;
+	const char *name;
+	long bkoff;
+	unsigned sboff;
+	unsigned sig_len;
+	const char *magic;
+	int uuid_offset;
+	int last_mount_offset;
+	int last_mount_size;
+};
+
+/*
+ * Based on libuuid's blkid_magic array. Note that I don't
+ * have uuid offsets for all of these yet - mssing ones are 0x0.
+ * Further information welcome.
+ *
+ * Rearranged by page of fs signature for optimisation.
+ */
+static struct uuid_info uuid_list[] = {
+ { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 },
+ { "ntfs", "ntfs", 0, 3, 8, "NTFS    ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x52, 8, "FAT32   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 8, "FAT16   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x36, 8, "FAT12   ", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 },
+ { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 },
+ { "xfs", "xfs", 0, 0, 4, "XFSB", 0x20, 0, 0 },
+ { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 },
+ { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 },
+ { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 },
+ { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 },
+ { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 },
+ { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 },
+ { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 },
+ { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
+ { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 },
+ { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 },
+ { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 },
+ { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
+ { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 },
+ { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 },
+ { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
+ { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 },
+ { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 },
+ { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
+ { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
+ { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 },
+ { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 },
+ { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
+ { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 },
+ { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
+ { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
+ { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 },
+ { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 },
+ { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 },
+ { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 },
+ { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 },
+ { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 },
+ { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
+ { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
+ { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
+ { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
+ { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
+ { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 }
+};
+
+static int null_uuid(const char *uuid)
+{
+	int i;
+
+	for (i = 0; i < 16 && !uuid[i]; i++);
+
+	return (i == 16);
+}
+
+
+static void uuid_end_bio(struct bio *bio)
+{
+	struct page *page = bio->bi_io_vec[0].bv_page;
+
+        if (bio->bi_status == BLK_STS_IOERR)
+            SetPageError(page);
+
+	unlock_page(page);
+	bio_put(bio);
+}
+
+
+/**
+ * read_bdev_page - Read a page from a device.
+ * @dev: The block device we're using.
+ * @page_num: The page we're reading.
+ *
+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
+ * textbook - allocate and initialize the bio. If we're writing, make sure
+ * the page is marked as dirty. Then submit it and carry on."
+ **/
+static struct page *read_bdev_page(struct block_device *dev, int page_num)
+{
+	struct bio *bio = NULL;
+	struct page *page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+
+	if (!page) {
+		printk(KERN_ERR "Failed to allocate a page for reading data "
+				"in UUID checks.");
+		return NULL;
+	}
+
+	bio = bio_alloc(GFP_NOFS, 1);
+	bio_set_dev(bio, dev);
+	bio->bi_iter.bi_sector = page_num << 3;
+	bio->bi_end_io = uuid_end_bio;
+	bio->bi_flags |= (1 << BIO_TOI);
+
+	PRINTK("Submitting bio on device %lx, page %d using bio %p and page %p.\n",
+			(unsigned long) dev->bd_dev, page_num, bio, page);
+
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk(KERN_DEBUG "ERROR: adding page to bio at %d\n",
+				page_num);
+		bio_put(bio);
+		__free_page(page);
+		printk(KERN_DEBUG "read_bdev_page freed page %p (in error "
+				"path).\n", page);
+		return NULL;
+	}
+
+	lock_page(page);
+	bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+	submit_bio(bio);
+
+	wait_on_page_locked(page);
+	if (PageError(page)) {
+		__free_page(page);
+		page = NULL;
+	}
+	return page;
+}
+
+int bdev_matches_key(struct block_device *bdev, const char *key)
+{
+	unsigned char *data = NULL;
+	struct page *data_page = NULL;
+
+	int dev_offset, pg_num, pg_off, i;
+	int last_pg_num = -1;
+	int result = 0;
+	char buf[50];
+
+	if (null_uuid(key)) {
+		PRINTK("Refusing to find a NULL key.\n");
+		return 0;
+	}
+
+	if (!bdev->bd_disk) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no bd_disk.\n", buf);
+		return 0;
+	}
+
+	if (!bdev->bd_disk->queue) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no queue.\n", buf);
+		return 0;
+	}
+
+	for (i = 0; uuid_list[i].name; i++) {
+		struct uuid_info *dat = &uuid_list[i];
+
+		if (!dat->key || strcmp(dat->key, key))
+			continue;
+
+		dev_offset = (dat->bkoff << 10) + dat->sboff;
+		pg_num = dev_offset >> 12;
+		pg_off = dev_offset & 0xfff;
+
+		if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
+			continue;
+
+		if (pg_num != last_pg_num) {
+			if (data_page) {
+				kunmap(data_page);
+				__free_page(data_page);
+			}
+			data_page = read_bdev_page(bdev, pg_num);
+			if (!data_page)
+				continue;
+			data = kmap(data_page);
+		}
+
+		last_pg_num = pg_num;
+
+		if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
+			continue;
+
+		result = 1;
+		break;
+	}
+
+	if (data_page) {
+		kunmap(data_page);
+		__free_page(data_page);
+	}
+
+	return result;
+}
+
+/* 
+ * part_matches_fs_info - Does the given partition match the details given?
+ *
+ * Returns a score saying how good the match is.
+ * 0 = no UUID match.
+ * 1 = UUID but last mount time differs.
+ * 2 = UUID, last mount time but not dev_t
+ * 3 = perfect match
+ *
+ * This lets us cope elegantly with probing resulting in dev_ts changing
+ * from boot to boot, and with the case where a user copies a partition
+ * (UUID is non unique), and we need to check the last mount time of the
+ * correct partition.
+ */
+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek)
+{
+	struct block_device *bdev;
+	struct fs_info *got;
+	int result = 0;
+	char buf[50];
+
+	if (null_uuid((char *) &seek->uuid)) {
+		PRINTK("Refusing to find a NULL uuid.\n");
+		return 0;
+	}
+
+	bdev = bdget(part_devt(part));
+
+	PRINTK("part_matches fs info considering %x.\n", part_devt(part));
+
+	if (blkdev_get(bdev, FMODE_READ, 0)) {
+		PRINTK("blkdev_get failed.\n");
+		return 0;
+	}
+
+	if (!bdev->bd_disk) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no bd_disk.\n", buf);
+		goto out;
+	}
+
+	if (!bdev->bd_disk->queue) {
+		bdevname(bdev, buf);
+		PRINTK("bdev %s has no queue.\n", buf);
+		goto out;
+	}
+
+	got = fs_info_from_block_dev(bdev);
+
+	if (got && !memcmp(got->uuid, seek->uuid, 16)) {
+		PRINTK(" Have matching UUID.\n");
+		PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount);
+		PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount);
+		result = 1;
+
+		if (got->last_mount_size == seek->last_mount_size &&
+		    got->last_mount && seek->last_mount &&
+		    !memcmp(got->last_mount, seek->last_mount,
+			    got->last_mount_size)) {
+			result = 2;
+
+			PRINTK(" Matching last mount time.\n");
+
+			if (part_devt(part) == seek->dev_t) {
+				result = 3;
+				PRINTK(" Matching dev_t.\n");
+			} else
+				PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t);
+		}
+	}
+
+	PRINTK(" Score for %x is %d.\n", part_devt(part), result);
+	free_fs_info(got);
+out:
+	blkdev_put(bdev, FMODE_READ);
+	return result;
+}
+
+void free_fs_info(struct fs_info *fs_info)
+{
+	if (!fs_info || IS_ERR(fs_info))
+		return;
+
+	if (fs_info->last_mount)
+		kfree(fs_info->last_mount);
+
+	kfree(fs_info);
+}
+
+struct fs_info *fs_info_from_block_dev(struct block_device *bdev)
+{
+	unsigned char *data = NULL;
+	struct page *data_page = NULL;
+
+	int dev_offset, pg_num, pg_off;
+	int uuid_pg_num, uuid_pg_off, i;
+	unsigned char *uuid_data = NULL;
+	struct page *uuid_data_page = NULL;
+
+	int last_pg_num = -1, last_uuid_pg_num = 0;
+	char buf[50];
+	struct fs_info *fs_info = NULL;
+
+	bdevname(bdev, buf);
+
+	PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf);
+
+	for (i = 0; uuid_list[i].name; i++) {
+		struct uuid_info *dat = &uuid_list[i];
+		dev_offset = (dat->bkoff << 10) + dat->sboff;
+		pg_num = dev_offset >> 12;
+		pg_off = dev_offset & 0xfff;
+		uuid_pg_num = dat->uuid_offset >> 12;
+		uuid_pg_off = dat->uuid_offset & 0xfff;
+
+		if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
+			continue;
+
+		/* Ignore partition types with no UUID offset */
+		if (!dat->uuid_offset)
+			continue;
+
+		if (pg_num != last_pg_num) {
+			if (data_page) {
+				kunmap(data_page);
+				__free_page(data_page);
+			}
+			data_page = read_bdev_page(bdev, pg_num);
+			if (!data_page)
+				continue;
+			data = kmap(data_page);
+		}
+
+		last_pg_num = pg_num;
+
+		if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
+			continue;
+
+		PRINTK("This partition looks like %s.\n", dat->name);
+
+		fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL);
+
+		if (!fs_info) {
+			PRINTK("Failed to allocate fs_info struct.");
+			fs_info = ERR_PTR(-ENOMEM);
+			break;
+		}
+
+		/* UUID can't be off the end of the disk */
+		if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) ||
+				!dat->uuid_offset)
+			goto no_uuid;
+
+		if (!uuid_data || uuid_pg_num != last_uuid_pg_num) {
+			/* No need to reread the page from above */
+			if (uuid_pg_num == pg_num && uuid_data)
+				memcpy(uuid_data, data, PAGE_SIZE);
+			else {
+				if (uuid_data_page) {
+					kunmap(uuid_data_page);
+					__free_page(uuid_data_page);
+				}
+				uuid_data_page = read_bdev_page(bdev, uuid_pg_num);
+				if (!uuid_data_page)
+					continue;
+				uuid_data = kmap(uuid_data_page);
+			}
+		}
+
+		last_uuid_pg_num = uuid_pg_num;
+		memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16);
+		fs_info->dev_t = bdev->bd_dev;
+
+no_uuid:
+		PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev "
+				"returning uuid ", DUMP_PREFIX_NONE, 16, 1,
+				fs_info->uuid, 16, 0);
+
+		if (dat->last_mount_size) {
+			int pg = dat->last_mount_offset >> 12, sz;
+			int off = dat->last_mount_offset & 0xfff;
+			struct page *last_mount = read_bdev_page(bdev, pg);
+			unsigned char *last_mount_data;
+			char *ptr;
+
+			if (!last_mount) {
+				fs_info = ERR_PTR(-ENOMEM);
+				break;
+			}
+			last_mount_data = kmap(last_mount);
+			sz = dat->last_mount_size;
+			ptr = kmalloc(sz, GFP_KERNEL);
+
+			if (!ptr) {
+				printk(KERN_EMERG "fs_info_from_block_dev "
+					"failed to get memory for last mount "
+					"timestamp.");
+				free_fs_info(fs_info);
+				fs_info = ERR_PTR(-ENOMEM);
+			} else {
+				fs_info->last_mount = ptr;
+				fs_info->last_mount_size = sz;
+				memcpy(ptr, &last_mount_data[off], sz);
+			}
+
+			kunmap(last_mount);
+			__free_page(last_mount);
+		}
+		break;
+	}
+
+	if (data_page) {
+		kunmap(data_page);
+		__free_page(data_page);
+	}
+
+	if (uuid_data_page) {
+		kunmap(uuid_data_page);
+		__free_page(uuid_data_page);
+	}
+
+	return fs_info;
+}
+
+static int __init uuid_debug_setup(char *str)
+{
+	int value;
+
+	if (sscanf(str, "=%d", &value))
+		debug_enabled = value;
+
+	return 1;
+}
+
+__setup("uuid_debug", uuid_debug_setup);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index fe92cb972..44017835e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -691,6 +691,24 @@ static inline int is_loop_device(struct file *file)
 	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
 }
 
+/*
+ * for AUFS
+ * no get/put for file.
+ */
+struct file *loop_backing_file(struct super_block *sb)
+{
+	struct file *ret;
+	struct loop_device *l;
+
+	ret = NULL;
+	if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
+		l = sb->s_bdev->bd_disk->private_data;
+		ret = l->lo_backing_file;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(loop_backing_file);
+
 /* loop sysfs attributes */
 
 static ssize_t loop_attr_show(struct device *dev, char *page,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebf..4b4932e22 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,18 @@
 #include "cpufreq_ondemand.h"
 
 /* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD		(80)
+#if defined(CONFIG_PCK_INTERACTIVE) && defined(CONFIG_SCHED_MUQSS)
+	#define DEF_FREQUENCY_UP_THRESHOLD	(45)
+	#define MICRO_FREQUENCY_UP_THRESHOLD	(45)
+#elif defined(CONFIG_PCK_INTERACTIVE)
+	#define DEF_FREQUENCY_UP_THRESHOLD	(80)
+	#define MICRO_FREQUENCY_UP_THRESHOLD	(85)
+#else
+	#define DEF_FREQUENCY_UP_THRESHOLD	(80)
+	#define MICRO_FREQUENCY_UP_THRESHOLD	(95)
+#endif
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
 #define MAX_SAMPLING_DOWN_FACTOR		(100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
 #define MIN_FREQUENCY_UP_THRESHOLD		(1)
 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/logger.c b/drivers/gpu/drm/amd/display/dc/basics/logger.c
index 180a9d69d..62126d512 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/logger.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/logger.c
@@ -345,7 +345,7 @@ void dm_logger_append_va(
 		if (size < LOG_MAX_LINE_SIZE - 1) {
 			append_entry(entry, buffer, size);
 		} else {
-			append_entry(entry, "LOG_ERROR, line too long\n", 27);
+			append_entry(entry, "LOG_ERROR, line too long\n", 25);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 79521da5d..837cbaa55 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1597,7 +1597,8 @@ static bool intel_edp_compare_alt_mode(struct drm_display_mode *m1,
 			m1->vdisplay == m2->vdisplay &&
 			m1->vsync_start == m2->vsync_start &&
 			m1->vsync_end == m2->vsync_end &&
-			m1->vtotal == m2->vtotal);
+			m1->vtotal == m2->vtotal &&
+			m1->vrefresh == m2->vrefresh);
 	return bres;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index cf8fef8b6..05907fa8a 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -328,22 +328,14 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 	return;
 
  failure_handling:
-	/* Dont fallback and prune modes if its eDP */
-	if (!intel_dp_is_edp(intel_dp)) {
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d",
-			      intel_connector->base.base.id,
-			      intel_connector->base.name,
-			      intel_dp->link_rate, intel_dp->lane_count);
-		if (!intel_dp_get_link_train_fallback_values(intel_dp,
-							     intel_dp->link_rate,
-							     intel_dp->lane_count))
-			/* Schedule a Hotplug Uevent to userspace to start modeset */
-			schedule_work(&intel_connector->modeset_retry_work);
-	} else {
-		DRM_ERROR("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d",
-			  intel_connector->base.base.id,
-			  intel_connector->base.name,
-			  intel_dp->link_rate, intel_dp->lane_count);
-	}
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d",
+		      intel_connector->base.base.id,
+		      intel_connector->base.name,
+		      intel_dp->link_rate, intel_dp->lane_count);
+	if (!intel_dp_get_link_train_fallback_values(intel_dp,
+						     intel_dp->link_rate,
+						     intel_dp->lane_count))
+		/* Schedule a Hotplug Uevent to userspace to start modeset */
+		schedule_work(&intel_connector->modeset_retry_work);
 	return;
 }
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index cb1d2ab13..d21700f57 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -779,6 +779,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 		struct sockaddr     _sockaddr;
 		struct sockaddr_in  _sockaddr_in;
 		struct sockaddr_in6 _sockaddr_in6;
+		struct sockaddr_ib  _sockaddr_ib;
 	} sgid_addr, dgid_addr;
 	int ret;
 
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index a246fc686..f16a75d71 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -1325,7 +1325,9 @@ static void set_input_params(struct psmouse *psmouse,
 		/* Clickpads report only left button */
 		__clear_bit(BTN_RIGHT, dev->keybit);
 		__clear_bit(BTN_MIDDLE, dev->keybit);
-	}
+	} else if (SYN_CAP_CLICKPAD2BTN(info->ext_cap_0c) ||
+		   SYN_CAP_CLICKPAD2BTN2(info->ext_cap_0c))
+		__set_bit(INPUT_PROP_BUTTONPAD, dev->propbit);
 }
 
 static ssize_t synaptics_show_disable_gesture(struct psmouse *psmouse,
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index fc00e005c..4cfbeec3a 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -86,6 +86,7 @@
  */
 #define SYN_CAP_CLICKPAD(ex0c)		((ex0c) & BIT(20)) /* 1-button ClickPad */
 #define SYN_CAP_CLICKPAD2BTN(ex0c)	((ex0c) & BIT(8))  /* 2-button ClickPad */
+#define SYN_CAP_CLICKPAD2BTN2(ex0c)	((ex0c) & BIT(21)) /* 2-button ClickPad */
 #define SYN_CAP_MAX_DIMENSIONS(ex0c)	((ex0c) & BIT(17))
 #define SYN_CAP_MIN_DIMENSIONS(ex0c)	((ex0c) & BIT(13))
 #define SYN_CAP_ADV_GESTURE(ex0c)	((ex0c) & BIT(19))
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 97a420c11..c8621e9b2 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -159,6 +159,13 @@ config INPUT_ADBHID
 
 	  If unsure, say Y.
 
+config ADB_TRACKPAD_ABSOLUTE
+	bool "Enable absolute mode for adb trackpads"
+	depends on INPUT_ADBHID
+	help
+	  Enable absolute mode in adb-base trackpads. This feature adds
+	  compatibility with synaptics Xorg / Xfree drivers.
+
 config MAC_EMUMOUSEBTN
 	tristate "Support for mouse button 2+3 emulation"
 	depends on SYSCTL && INPUT
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index a261892c0..a85192de8 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -262,6 +262,15 @@ static struct adb_ids buttons_ids;
 #define ADBMOUSE_MS_A3		8	/* Mouse systems A3 trackball (handler 3) */
 #define ADBMOUSE_MACALLY2	9	/* MacAlly 2-button mouse */
 
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+#define	ABS_XMIN	310
+#define	ABS_XMAX	1700
+#define	ABS_YMIN	200
+#define	ABS_YMAX	1000
+#define	ABS_ZMIN	0
+#define	ABS_ZMAX	55
+#endif
+
 static void
 adbhid_keyboard_input(unsigned char *data, int nb, int apoll)
 {
@@ -405,6 +414,9 @@ static void
 adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
 {
 	int id = (data[0] >> 4) & 0x0f;
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	int btn = 0; int x_axis = 0; int y_axis = 0; int z_axis = 0;
+#endif
 
 	if (!adbhid[id]) {
 		pr_err("ADB HID on ID %d not yet registered\n", id);
@@ -436,6 +448,17 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
 	      high bits of y-axis motion.  XY is additional
 	      high bits of x-axis motion.
 
+    For ADB Absolute motion protocol the data array will contain the
+    following values:
+
+		BITS    COMMENTS
+    data[0] = dddd 1100 ADB command: Talk, register 0, for device dddd.
+    data[1] = byyy yyyy Left button and y-axis motion.
+    data[2] = bxxx xxxx Second button and x-axis motion.
+    data[3] = 1yyy 1xxx Half bits of y-axis and x-axis motion.
+    data[4] = 1yyy 1xxx Higher bits of y-axis and x-axis motion.
+    data[5] = 1zzz 1zzz Higher and lower bits of z-pressure.
+
     MacAlly 2-button mouse protocol.
 
     For MacAlly 2-button mouse protocol the data array will contain the
@@ -458,8 +481,17 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
 	switch (adbhid[id]->mouse_kind)
 	{
 	    case ADBMOUSE_TRACKPAD:
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+		x_axis = (data[2] & 0x7f) | ((data[3] & 0x07) << 7) |
+			((data[4] & 0x07) << 10);
+		y_axis = (data[1] & 0x7f) | ((data[3] & 0x70) << 3) |
+			((data[4] & 0x70) << 6);
+		z_axis = (data[5] & 0x07) | ((data[5] & 0x70) >> 1);
+		btn = (!(data[1] >> 7)) & 1;
+#else
 		data[1] = (data[1] & 0x7f) | ((data[1] & data[2]) & 0x80);
 		data[2] = data[2] | 0x80;
+#endif
 		break;
 	    case ADBMOUSE_MICROSPEED:
 		data[1] = (data[1] & 0x7f) | ((data[3] & 0x01) << 7);
@@ -485,17 +517,39 @@ adbhid_mouse_input(unsigned char *data, int nb, int autopoll)
                 break;
 	}
 
-	input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
-	input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	if ( adbhid[id]->mouse_kind == ADBMOUSE_TRACKPAD ) {
 
-	if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
-		input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
+		if(z_axis > 30) input_report_key(adbhid[id]->input, BTN_TOUCH, 1);
+		if(z_axis < 25) input_report_key(adbhid[id]->input, BTN_TOUCH, 0);
 
-	input_report_rel(adbhid[id]->input, REL_X,
-			 ((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
-	input_report_rel(adbhid[id]->input, REL_Y,
-			 ((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
+		if(z_axis > 0){
+			input_report_abs(adbhid[id]->input, ABS_X, x_axis);
+			input_report_abs(adbhid[id]->input, ABS_Y, y_axis);
+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 1);
+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 5);
+		} else {
+			input_report_key(adbhid[id]->input, BTN_TOOL_FINGER, 0);
+			input_report_key(adbhid[id]->input, ABS_TOOL_WIDTH, 0);
+		}
+
+		input_report_abs(adbhid[id]->input, ABS_PRESSURE, z_axis);
+		input_report_key(adbhid[id]->input, BTN_LEFT, btn);
+	} else {
+#endif
+		input_report_key(adbhid[id]->input, BTN_LEFT,   !((data[1] >> 7) & 1));
+		input_report_key(adbhid[id]->input, BTN_MIDDLE, !((data[2] >> 7) & 1));
+
+		if (nb >= 4 && adbhid[id]->mouse_kind != ADBMOUSE_TRACKPAD)
+			input_report_key(adbhid[id]->input, BTN_RIGHT,  !((data[3] >> 7) & 1));
 
+		input_report_rel(adbhid[id]->input, REL_X,
+				((data[2]&0x7f) < 64 ? (data[2]&0x7f) : (data[2]&0x7f)-128 ));
+		input_report_rel(adbhid[id]->input, REL_Y,
+				((data[1]&0x7f) < 64 ? (data[1]&0x7f) : (data[1]&0x7f)-128 ));
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+	}
+#endif
 	input_sync(adbhid[id]->input);
 }
 
@@ -849,6 +903,15 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 		input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
 			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 		input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+                set_bit(EV_ABS, input_dev->evbit);
+		input_set_abs_params(input_dev, ABS_X, ABS_XMIN, ABS_XMAX, 0, 0);
+		input_set_abs_params(input_dev, ABS_Y, ABS_YMIN, ABS_YMAX, 0, 0);
+		input_set_abs_params(input_dev, ABS_PRESSURE, ABS_ZMIN, ABS_ZMAX, 0, 0);
+		set_bit(BTN_TOUCH, input_dev->keybit);
+		set_bit(BTN_TOOL_FINGER, input_dev->keybit);
+		set_bit(ABS_TOOL_WIDTH, input_dev->absbit);
+#endif
 		break;
 
 	case ADB_MISC:
@@ -1132,7 +1195,11 @@ init_trackpad(int id)
 	            r1_buffer[3],
 	            r1_buffer[4],
 	            r1_buffer[5],
+#ifdef CONFIG_ADB_TRACKPAD_ABSOLUTE
+		    0x00, /* Enable absolute mode */
+#else
 	            0x03, /*r1_buffer[6],*/
+#endif
 	            r1_buffer[7]);
 
 	    /* Without this flush, the trackpad may be locked up */
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 51a1b4976..e56c6b254 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -572,9 +572,28 @@ config THINKPAD_ACPI_HOTKEY_POLL
 	  If you are not sure, say Y here.  The driver enables polling only if
 	  it is strictly necessary to do so.
 
+config THINKPAD_EC
+	tristate
+	---help---
+	  This is a low-level driver for accessing the ThinkPad H8S embedded
+	  controller over the LPC bus (not to be confused with the ACPI Embedded
+	  Controller interface).
+
+config TP_SMAPI
+	tristate "ThinkPad SMAPI Support"
+	select THINKPAD_EC
+	default n
+	help
+	  This adds SMAPI support on Lenovo/IBM ThinkPads, for features such
+	  as battery charging control. For more information about this driver
+	  see <http://www.thinkwiki.org/wiki/tp_smapi>.
+
+	  If you have a Lenovo/IBM ThinkPad laptop, say Y or M here.
+
 config SENSORS_HDAPS
 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
 	depends on INPUT
+	select THINKPAD_EC
 	select INPUT_POLLDEV
 	help
 	  This driver provides support for the IBM Hard Drive Active Protection
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 2ba6cb795..399f8b886 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -35,6 +35,8 @@ obj-$(CONFIG_TC1100_WMI)	+= tc1100-wmi.o
 obj-$(CONFIG_SONY_LAPTOP)	+= sony-laptop.o
 obj-$(CONFIG_IDEAPAD_LAPTOP)	+= ideapad-laptop.o
 obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
+obj-$(CONFIG_THINKPAD_EC)	+= thinkpad_ec.o
+obj-$(CONFIG_TP_SMAPI)		+= tp_smapi.o
 obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
 obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
 obj-$(CONFIG_FUJITSU_TABLET)	+= fujitsu-tablet.o
diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c
index c26baf779..1814614f2 100644
--- a/drivers/platform/x86/hdaps.c
+++ b/drivers/platform/x86/hdaps.c
@@ -2,7 +2,7 @@
  * hdaps.c - driver for IBM's Hard Drive Active Protection System
  *
  * Copyright (C) 2005 Robert Love <rml@novell.com>
- * Copyright (C) 2005 Jesper Juhl <jj@chaosbits.net>
+ * Copyright (C) 2005 Jesper Juhl <jesper.juhl@gmail.com>
  *
  * The HardDisk Active Protection System (hdaps) is present in IBM ThinkPads
  * starting with the R40, T41, and X40.  It provides a basic two-axis
@@ -30,266 +30,384 @@
 
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/input-polldev.h>
+#include <linux/input.h>
 #include <linux/kernel.h>
-#include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/dmi.h>
 #include <linux/jiffies.h>
-#include <linux/io.h>
-
-#define HDAPS_LOW_PORT		0x1600	/* first port used by hdaps */
-#define HDAPS_NR_PORTS		0x30	/* number of ports: 0x1600 - 0x162f */
-
-#define HDAPS_PORT_STATE	0x1611	/* device state */
-#define HDAPS_PORT_YPOS		0x1612	/* y-axis position */
-#define	HDAPS_PORT_XPOS		0x1614	/* x-axis position */
-#define HDAPS_PORT_TEMP1	0x1616	/* device temperature, in Celsius */
-#define HDAPS_PORT_YVAR		0x1617	/* y-axis variance (what is this?) */
-#define HDAPS_PORT_XVAR		0x1619	/* x-axis variance (what is this?) */
-#define HDAPS_PORT_TEMP2	0x161b	/* device temperature (again?) */
-#define HDAPS_PORT_UNKNOWN	0x161c	/* what is this? */
-#define HDAPS_PORT_KMACT	0x161d	/* keyboard or mouse activity */
-
-#define STATE_FRESH		0x50	/* accelerometer data is fresh */
+#include <linux/thinkpad_ec.h>
+#include <linux/pci_ids.h>
+#include <linux/version.h>
+
+/* Embedded controller accelerometer read command and its result: */
+static const struct thinkpad_ec_row ec_accel_args =
+	{ .mask = 0x0001, .val = {0x11} };
+#define EC_ACCEL_IDX_READOUTS	0x1	/* readouts included in this read */
+					/* First readout, if READOUTS>=1: */
+#define EC_ACCEL_IDX_YPOS1	0x2	/*   y-axis position word */
+#define EC_ACCEL_IDX_XPOS1	0x4	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP1	0x6	/*   device temperature in Celsius */
+					/* Second readout, if READOUTS>=2: */
+#define EC_ACCEL_IDX_XPOS2	0x7	/*   y-axis position word */
+#define EC_ACCEL_IDX_YPOS2	0x9	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP2	0xb	/*   device temperature in Celsius */
+#define EC_ACCEL_IDX_QUEUED	0xc	/* Number of queued readouts left */
+#define EC_ACCEL_IDX_KMACT	0xd	/* keyboard or mouse activity */
+#define EC_ACCEL_IDX_RETVAL	0xf	/* command return value, good=0x00 */
 
 #define KEYBD_MASK		0x20	/* set if keyboard activity */
 #define MOUSE_MASK		0x40	/* set if mouse activity */
-#define KEYBD_ISSET(n)		(!! (n & KEYBD_MASK))	/* keyboard used? */
-#define MOUSE_ISSET(n)		(!! (n & MOUSE_MASK))	/* mouse used? */
 
-#define INIT_TIMEOUT_MSECS	4000	/* wait up to 4s for device init ... */
-#define INIT_WAIT_MSECS		200	/* ... in 200ms increments */
+#define READ_TIMEOUT_MSECS	100	/* wait this long for device read */
+#define RETRY_MSECS		3	/* retry delay */
 
-#define HDAPS_POLL_INTERVAL	50	/* poll for input every 1/20s (50 ms)*/
 #define HDAPS_INPUT_FUZZ	4	/* input event threshold */
 #define HDAPS_INPUT_FLAT	4
-
-#define HDAPS_X_AXIS		(1 << 0)
-#define HDAPS_Y_AXIS		(1 << 1)
-#define HDAPS_BOTH_AXES		(HDAPS_X_AXIS | HDAPS_Y_AXIS)
-
+#define KMACT_REMEMBER_PERIOD   (HZ/10) /* keyboard/mouse persistence */
+
+/* Input IDs */
+#define HDAPS_INPUT_VENDOR	PCI_VENDOR_ID_IBM
+#define HDAPS_INPUT_PRODUCT	0x5054 /* "TP", shared with thinkpad_acpi */
+#define HDAPS_INPUT_JS_VERSION	0x6801 /* Joystick emulation input device */
+#define HDAPS_INPUT_RAW_VERSION	0x4801 /* Raw accelerometer input device */
+
+/* Axis orientation. */
+/* The unnatural bit-representation of inversions is for backward
+ * compatibility with the"invert=1" module parameter.             */
+#define HDAPS_ORIENT_INVERT_XY  0x01   /* Invert both X and Y axes.       */
+#define HDAPS_ORIENT_INVERT_X   0x02   /* Invert the X axis (uninvert if
+					* already inverted by INVERT_XY). */
+#define HDAPS_ORIENT_SWAP       0x04   /* Swap the axes. The swap occurs
+					* before inverting X or Y.        */
+#define HDAPS_ORIENT_MAX        0x07
+#define HDAPS_ORIENT_UNDEFINED  0xFF   /* Placeholder during initialization */
+#define HDAPS_ORIENT_INVERT_Y   (HDAPS_ORIENT_INVERT_XY | HDAPS_ORIENT_INVERT_X)
+
+static struct timer_list hdaps_timer;
 static struct platform_device *pdev;
-static struct input_polled_dev *hdaps_idev;
-static unsigned int hdaps_invert;
-static u8 km_activity;
-static int rest_x;
-static int rest_y;
-
-static DEFINE_MUTEX(hdaps_mtx);
-
-/*
- * __get_latch - Get the value from a given port.  Callers must hold hdaps_mtx.
- */
-static inline u8 __get_latch(u16 port)
+static struct input_dev *hdaps_idev;     /* joystick-like device with fuzz */
+static struct input_dev *hdaps_idev_raw; /* raw hdaps sensor readouts */
+static unsigned int hdaps_invert = HDAPS_ORIENT_UNDEFINED;
+static int needs_calibration;
+
+/* Configuration: */
+static int sampling_rate = 50;       /* Sampling rate  */
+static int oversampling_ratio = 5;   /* Ratio between our sampling rate and
+				      * EC accelerometer sampling rate      */
+static int running_avg_filter_order = 2; /* EC running average filter order */
+
+/* Latest state readout: */
+static int pos_x, pos_y;      /* position */
+static int temperature;       /* temperature */
+static int stale_readout = 1; /* last read invalid */
+static int rest_x, rest_y;    /* calibrated rest position */
+
+/* Last time we saw keyboard and mouse activity: */
+static u64 last_keyboard_jiffies = INITIAL_JIFFIES;
+static u64 last_mouse_jiffies = INITIAL_JIFFIES;
+static u64 last_update_jiffies = INITIAL_JIFFIES;
+
+/* input device use count */
+static int hdaps_users;
+static DEFINE_MUTEX(hdaps_users_mtx);
+
+/* Some models require an axis transformation to the standard representation */
+static void transform_axes(int *x, int *y)
 {
-	return inb(port) & 0xff;
+	if (hdaps_invert & HDAPS_ORIENT_SWAP) {
+		int z;
+		z = *x;
+		*x = *y;
+		*y = z;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_XY) {
+		*x = -*x;
+		*y = -*y;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_X)
+		*x = -*x;
 }
 
-/*
- * __check_latch - Check a port latch for a given value.  Returns zero if the
- * port contains the given value.  Callers must hold hdaps_mtx.
+/**
+ * __hdaps_update - query current state, with locks already acquired
+ * @fast: if nonzero, do one quick attempt without retries.
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query. Caller must hold controller lock.
  */
-static inline int __check_latch(u16 port, u8 val)
+static int __hdaps_update(int fast)
 {
-	if (__get_latch(port) == val)
-		return 0;
-	return -EINVAL;
-}
+	/* Read data: */
+	struct thinkpad_ec_row data;
+	int ret;
 
-/*
- * __wait_latch - Wait up to 100us for a port latch to get a certain value,
- * returning zero if the value is obtained.  Callers must hold hdaps_mtx.
- */
-static int __wait_latch(u16 port, u8 val)
-{
-	unsigned int i;
+	data.mask = (1 << EC_ACCEL_IDX_READOUTS) | (1 << EC_ACCEL_IDX_KMACT) |
+		    (3 << EC_ACCEL_IDX_YPOS1)    | (3 << EC_ACCEL_IDX_XPOS1) |
+		    (1 << EC_ACCEL_IDX_TEMP1)    | (1 << EC_ACCEL_IDX_RETVAL);
+	if (fast)
+		ret = thinkpad_ec_try_read_row(&ec_accel_args, &data);
+	else
+		ret = thinkpad_ec_read_row(&ec_accel_args, &data);
+	thinkpad_ec_prefetch_row(&ec_accel_args); /* Prefetch even if error */
+	if (ret)
+		return ret;
 
-	for (i = 0; i < 20; i++) {
-		if (!__check_latch(port, val))
-			return 0;
-		udelay(5);
+	/* Check status: */
+	if (data.val[EC_ACCEL_IDX_RETVAL] != 0x00) {
+		pr_warn("read RETVAL=0x%02x\n",
+		       data.val[EC_ACCEL_IDX_RETVAL]);
+		return -EIO;
 	}
 
-	return -EIO;
+	if (data.val[EC_ACCEL_IDX_READOUTS] < 1)
+		return -EBUSY; /* no pending readout, try again later */
+
+	/* Parse position data: */
+	pos_x = *(s16 *)(data.val+EC_ACCEL_IDX_XPOS1);
+	pos_y = *(s16 *)(data.val+EC_ACCEL_IDX_YPOS1);
+	transform_axes(&pos_x, &pos_y);
+
+	/* Keyboard and mouse activity status is cleared as soon as it's read,
+	 * so applications will eat each other's events. Thus we remember any
+	 * event for KMACT_REMEMBER_PERIOD jiffies.
+	 */
+	if (data.val[EC_ACCEL_IDX_KMACT] & KEYBD_MASK)
+		last_keyboard_jiffies = get_jiffies_64();
+	if (data.val[EC_ACCEL_IDX_KMACT] & MOUSE_MASK)
+		last_mouse_jiffies = get_jiffies_64();
+
+	temperature = data.val[EC_ACCEL_IDX_TEMP1];
+
+	last_update_jiffies = get_jiffies_64();
+	stale_readout = 0;
+	if (needs_calibration) {
+		rest_x = pos_x;
+		rest_y = pos_y;
+		needs_calibration = 0;
+	}
+
+	return 0;
 }
 
-/*
- * __device_refresh - request a refresh from the accelerometer.  Does not wait
- * for refresh to complete.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_update - acquire locks and query current state
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query.
+ * Retries until timeout if the accelerometer is not in ready status (common).
+ * Does its own locking.
  */
-static void __device_refresh(void)
+static int hdaps_update(void)
 {
-	udelay(200);
-	if (inb(0x1604) != STATE_FRESH) {
-		outb(0x11, 0x1610);
-		outb(0x01, 0x161f);
+	u64 age = get_jiffies_64() - last_update_jiffies;
+	int total, ret;
+
+	if (!stale_readout && age < (9*HZ)/(10*sampling_rate))
+		return 0; /* already updated recently */
+	for (total = 0; total < READ_TIMEOUT_MSECS; total += RETRY_MSECS) {
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+		ret = __hdaps_update(0);
+		thinkpad_ec_unlock();
+
+		if (!ret)
+			return 0;
+		if (ret != -EBUSY)
+			break;
+		msleep(RETRY_MSECS);
 	}
+	return ret;
 }
 
-/*
- * __device_refresh_sync - request a synchronous refresh from the
- * accelerometer.  We wait for the refresh to complete.  Returns zero if
- * successful and nonzero on error.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_power - enable or disable power to the accelerometer.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int __device_refresh_sync(void)
+static int hdaps_set_power(int on)
 {
-	__device_refresh();
-	return __wait_latch(0x1604, STATE_FRESH);
+	struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x14, on?0x01:0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	return 0;
 }
 
-/*
- * __device_complete - indicate to the accelerometer that we are done reading
- * data, and then initiate an async refresh.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_ec_config - set accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * (Normally we have @ec_rate = sampling_rate * oversampling_ratio.)
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static inline void __device_complete(void)
+static int hdaps_set_ec_config(int ec_rate, int order)
 {
-	inb(0x161f);
-	inb(0x1604);
-	__device_refresh();
+	struct thinkpad_ec_row args = { .mask = 0x000F,
+		.val = {0x10, (u8)ec_rate, (u8)(ec_rate>>8), order} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	pr_debug("setting ec_rate=%d, filter_order=%d\n", ec_rate, order);
+	if (ret)
+		return ret;
+	if (data.val[0xF] == 0x03) {
+		pr_warn("config param out of range\n");
+		return -EINVAL;
+	}
+	if (data.val[0xF] == 0x06) {
+		pr_warn("config change already pending\n");
+		return -EBUSY;
+	}
+	if (data.val[0xF] != 0x00) {
+		pr_warn("config change error, ret=%d\n",
+		      data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_readb_one - reads a byte from a single I/O port, placing the value in
- * the given pointer.  Returns zero on success or a negative error on failure.
- * Can sleep.
+/**
+ * hdaps_get_ec_config - get accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_readb_one(unsigned int port, u8 *val)
+static int hdaps_get_ec_config(int *ec_rate, int *order)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	ret = __device_refresh_sync();
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x82} };
+	struct thinkpad_ec_row data = { .mask = 0x801F };
+	int ret = thinkpad_ec_read_row(&args, &data);
 	if (ret)
-		goto out;
-
-	*val = inb(port);
-	__device_complete();
-
-out:
-	mutex_unlock(&hdaps_mtx);
-	return ret;
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	if (!(data.val[0x1] & 0x01))
+		return -ENXIO; /* accelerometer polling not enabled */
+	if (data.val[0x1] & 0x02)
+		return -EBUSY; /* config change in progress, retry later */
+	*ec_rate = data.val[0x2] | ((int)(data.val[0x3]) << 8);
+	*order = data.val[0x4];
+	return 0;
 }
 
-/* __hdaps_read_pair - internal lockless helper for hdaps_read_pair(). */
-static int __hdaps_read_pair(unsigned int port1, unsigned int port2,
-			     int *x, int *y)
+/**
+ * hdaps_get_ec_mode - get EC accelerometer mode
+ * Returns zero on success and negative error code on failure.  Can sleep.
+ */
+static int hdaps_get_ec_mode(u8 *mode)
 {
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	if (__device_refresh_sync())
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0001, .val = {0x13} };
+	struct thinkpad_ec_row data = { .mask = 0x8002 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00) {
+		pr_warn("accelerometer not implemented (0x%02x)\n",
+		       data.val[0xF]);
 		return -EIO;
-
-	*y = inw(port2);
-	*x = inw(port1);
-	km_activity = inb(HDAPS_PORT_KMACT);
-	__device_complete();
-
-	/* hdaps_invert is a bitvector to negate the axes */
-	if (hdaps_invert & HDAPS_X_AXIS)
-		*x = -*x;
-	if (hdaps_invert & HDAPS_Y_AXIS)
-		*y = -*y;
-
+	}
+	*mode = data.val[0x1];
 	return 0;
 }
 
-/*
- * hdaps_read_pair - reads the values from a pair of ports, placing the values
- * in the given pointers.  Returns zero on success.  Can sleep.
+/**
+ * hdaps_check_ec - checks something about the EC.
+ * Follows the clean-room spec for HDAPS; we don't know what it means.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_read_pair(unsigned int port1, unsigned int port2,
-			   int *val1, int *val2)
+static int hdaps_check_ec(void)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-	ret = __hdaps_read_pair(port1, port2, val1, val2);
-	mutex_unlock(&hdaps_mtx);
-
-	return ret;
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x81} };
+	struct thinkpad_ec_row data = { .mask = 0x800E };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return  ret;
+	if (!((data.val[0x1] == 0x00 && data.val[0x2] == 0x60) || /* cleanroom spec */
+	      (data.val[0x1] == 0x01 && data.val[0x2] == 0x00)) || /* seen on T61 */
+	    data.val[0x3] != 0x00 || data.val[0xF] != 0x00) {
+		pr_warn("hdaps_check_ec: bad response (0x%x,0x%x,0x%x,0x%x)\n",
+		       data.val[0x1], data.val[0x2],
+		       data.val[0x3], data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_device_init - initialize the accelerometer.  Returns zero on success
- * and negative error code on failure.  Can sleep.
+/**
+ * hdaps_device_init - initialize the accelerometer.
+ *
+ * Call several embedded controller functions to test and initialize the
+ * accelerometer.
+ * Returns zero on success and negative error code on failure. Can sleep.
  */
+#define FAILED_INIT(msg) pr_err("init failed at: %s\n", msg)
 static int hdaps_device_init(void)
 {
-	int total, ret = -ENXIO;
+	int ret;
+	u8 mode;
 
-	mutex_lock(&hdaps_mtx);
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
 
-	outb(0x13, 0x1610);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
+	if (hdaps_get_ec_mode(&mode))
+		{ FAILED_INIT("hdaps_get_ec_mode failed"); goto bad; }
 
-	/*
-	 * Most ThinkPads return 0x01.
-	 *
-	 * Others--namely the R50p, T41p, and T42p--return 0x03.  These laptops
-	 * have "inverted" axises.
-	 *
-	 * The 0x02 value occurs when the chip has been previously initialized.
-	 */
-	if (__check_latch(0x1611, 0x03) &&
-		     __check_latch(0x1611, 0x02) &&
-		     __check_latch(0x1611, 0x01))
-		goto out;
+	pr_debug("initial mode latch is 0x%02x\n", mode);
+	if (mode == 0x00)
+		{ FAILED_INIT("accelerometer not available"); goto bad; }
 
-	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n",
-	       __get_latch(0x1611));
+	if (hdaps_check_ec())
+		{ FAILED_INIT("hdaps_check_ec failed"); goto bad; }
 
-	outb(0x17, 0x1610);
-	outb(0x81, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
-	if (__wait_latch(0x1612, 0x60))
-		goto out;
-	if (__wait_latch(0x1613, 0x00))
-		goto out;
-	outb(0x14, 0x1610);
-	outb(0x01, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	outb(0x10, 0x1610);
-	outb(0xc8, 0x1611);
-	outb(0x00, 0x1612);
-	outb(0x02, 0x1613);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__device_refresh_sync())
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
-
-	/* we have done our dance, now let's wait for the applause */
-	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
-		int x, y;
+	if (hdaps_set_power(1))
+		{ FAILED_INIT("hdaps_set_power failed"); goto bad; }
 
-		/* a read of the device helps push it into action */
-		__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-		if (!__wait_latch(0x1611, 0x02)) {
-			ret = 0;
-			break;
-		}
+	if (hdaps_set_ec_config(sampling_rate*oversampling_ratio,
+				running_avg_filter_order))
+		{ FAILED_INIT("hdaps_set_ec_config failed"); goto bad; }
 
-		msleep(INIT_WAIT_MSECS);
-	}
+	thinkpad_ec_invalidate();
+	udelay(200);
 
-out:
-	mutex_unlock(&hdaps_mtx);
+	/* Just prefetch instead of reading, to avoid ~1sec delay on load */
+	ret = thinkpad_ec_prefetch_row(&ec_accel_args);
+	if (ret)
+		{ FAILED_INIT("initial prefetch failed"); goto bad; }
+	goto good;
+bad:
+	thinkpad_ec_invalidate();
+	ret = -ENXIO;
+good:
+	stale_readout = 1;
+	thinkpad_ec_unlock();
 	return ret;
 }
 
+/**
+ * hdaps_device_shutdown - power off the accelerometer
+ * Returns nonzero on failure. Can sleep.
+ */
+static int hdaps_device_shutdown(void)
+{
+	int ret;
+	ret = hdaps_set_power(0);
+	if (ret) {
+		pr_warn("cannot power off\n");
+		return ret;
+	}
+	ret = hdaps_set_ec_config(0, 1);
+	if (ret)
+		pr_warn("cannot stop EC sampling\n");
+	return ret;
+}
 
 /* Device model stuff */
 
@@ -306,13 +424,29 @@ static int hdaps_probe(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int hdaps_suspend(struct device *dev)
+{
+	/* Don't do hdaps polls until resume re-initializes the sensor. */
+	del_timer_sync(&hdaps_timer);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
+	return 0;
+}
+
 static int hdaps_resume(struct device *dev)
 {
-	return hdaps_device_init();
+	int ret = hdaps_device_init();
+	if (ret)
+		return ret;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users)
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(hdaps_pm, NULL, hdaps_resume);
+static SIMPLE_DEV_PM_OPS(hdaps_pm, hdaps_suspend, hdaps_resume);
 
 static struct platform_driver hdaps_driver = {
 	.probe = hdaps_probe,
@@ -322,30 +456,51 @@ static struct platform_driver hdaps_driver = {
 	},
 };
 
-/*
- * hdaps_calibrate - Set our "resting" values.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_calibrate - set our "resting" values.
+ * Does its own locking.
  */
 static void hdaps_calibrate(void)
 {
-	__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
+	needs_calibration = 1;
+	hdaps_update();
+	/* If that fails, the mousedev poll will take care of things later. */
 }
 
-static void hdaps_mousedev_poll(struct input_polled_dev *dev)
+/* Timer handler for updating the input device. Runs in softirq context,
+ * so avoid lenghty or blocking operations.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
+static void hdaps_mousedev_poll(unsigned long unused)
+#else
+static void hdaps_mousedev_poll(struct timer_list *unused)
+#endif
 {
-	struct input_dev *input_dev = dev->input;
-	int x, y;
+	int ret;
 
-	mutex_lock(&hdaps_mtx);
+	stale_readout = 1;
 
-	if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
-		goto out;
+	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
+	if (thinkpad_ec_try_lock())
+		goto keep_active;
 
-	input_report_abs(input_dev, ABS_X, x - rest_x);
-	input_report_abs(input_dev, ABS_Y, y - rest_y);
-	input_sync(input_dev);
+	ret = __hdaps_update(1); /* fast update, we're in softirq context */
+	thinkpad_ec_unlock();
+	/* Any of "successful", "not yet ready" and "not prefetched"? */
+	if (ret != 0 && ret != -EBUSY && ret != -ENODATA) {
+		pr_err("poll failed, disabling updates\n");
+		return;
+	}
 
-out:
-	mutex_unlock(&hdaps_mtx);
+keep_active:
+	/* Even if we failed now, pos_x,y may have been updated earlier: */
+	input_report_abs(hdaps_idev, ABS_X, pos_x - rest_x);
+	input_report_abs(hdaps_idev, ABS_Y, pos_y - rest_y);
+	input_sync(hdaps_idev);
+	input_report_abs(hdaps_idev_raw, ABS_X, pos_x);
+	input_report_abs(hdaps_idev_raw, ABS_Y, pos_y);
+	input_sync(hdaps_idev_raw);
+	mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
 }
 
 
@@ -354,65 +509,41 @@ out:
 static ssize_t hdaps_position_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
+	int ret = hdaps_update();
 	if (ret)
 		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
-}
-
-static ssize_t hdaps_variance_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XVAR, HDAPS_PORT_YVAR, &x, &y);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
+	return sprintf(buf, "(%d,%d)\n", pos_x, pos_y);
 }
 
 static ssize_t hdaps_temp1_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	u8 uninitialized_var(temp);
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP1, &temp);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t hdaps_temp2_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	u8 uninitialized_var(temp);
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP2, &temp);
+	int ret = hdaps_update();
 	if (ret)
 		return ret;
-
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temperature);
 }
 
 static ssize_t hdaps_keyboard_activity_show(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
 {
-	return sprintf(buf, "%u\n", KEYBD_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_keyboard_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_mouse_activity_show(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
-	return sprintf(buf, "%u\n", MOUSE_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_mouse_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_calibrate_show(struct device *dev,
@@ -425,10 +556,7 @@ static ssize_t hdaps_calibrate_store(struct device *dev,
 				     struct device_attribute *attr,
 				     const char *buf, size_t count)
 {
-	mutex_lock(&hdaps_mtx);
 	hdaps_calibrate();
-	mutex_unlock(&hdaps_mtx);
-
 	return count;
 }
 
@@ -445,7 +573,7 @@ static ssize_t hdaps_invert_store(struct device *dev,
 	int invert;
 
 	if (sscanf(buf, "%d", &invert) != 1 ||
-	    invert < 0 || invert > HDAPS_BOTH_AXES)
+	    invert < 0 || invert > HDAPS_ORIENT_MAX)
 		return -EINVAL;
 
 	hdaps_invert = invert;
@@ -454,24 +582,128 @@ static ssize_t hdaps_invert_store(struct device *dev,
 	return count;
 }
 
+static ssize_t hdaps_sampling_rate_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", sampling_rate);
+}
+
+static ssize_t hdaps_sampling_rate_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int rate, ret;
+	if (sscanf(buf, "%d", &rate) != 1 || rate > HZ || rate <= 0) {
+		pr_warn("must have 0<input_sampling_rate<=HZ=%d\n", HZ);
+		return -EINVAL;
+	}
+	ret = hdaps_set_ec_config(rate*oversampling_ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	sampling_rate = rate;
+	return count;
+}
+
+static ssize_t hdaps_oversampling_ratio_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ec_rate, order;
+	int ret = hdaps_get_ec_config(&ec_rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", ec_rate / sampling_rate);
+}
+
+static ssize_t hdaps_oversampling_ratio_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int ratio, ret;
+	if (sscanf(buf, "%d", &ratio) != 1 || ratio < 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	oversampling_ratio = ratio;
+	return count;
+}
+
+static ssize_t hdaps_running_avg_filter_order_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int rate, order;
+	int ret = hdaps_get_ec_config(&rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", order);
+}
+
+static ssize_t hdaps_running_avg_filter_order_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int order, ret;
+	if (sscanf(buf, "%d", &order) != 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*oversampling_ratio, order);
+	if (ret)
+		return ret;
+	running_avg_filter_order = order;
+	return count;
+}
+
+static int hdaps_mousedev_open(struct input_dev *dev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users++ == 0) /* first input user */
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
+}
+
+static void hdaps_mousedev_close(struct input_dev *dev)
+{
+	mutex_lock(&hdaps_users_mtx);
+	if (--hdaps_users == 0) /* no input users left */
+		del_timer_sync(&hdaps_timer);
+	mutex_unlock(&hdaps_users_mtx);
+
+	module_put(THIS_MODULE);
+}
+
 static DEVICE_ATTR(position, 0444, hdaps_position_show, NULL);
-static DEVICE_ATTR(variance, 0444, hdaps_variance_show, NULL);
 static DEVICE_ATTR(temp1, 0444, hdaps_temp1_show, NULL);
-static DEVICE_ATTR(temp2, 0444, hdaps_temp2_show, NULL);
-static DEVICE_ATTR(keyboard_activity, 0444, hdaps_keyboard_activity_show, NULL);
+  /* "temp1" instead of "temperature" is hwmon convention */
+static DEVICE_ATTR(keyboard_activity, 0444,
+		   hdaps_keyboard_activity_show, NULL);
 static DEVICE_ATTR(mouse_activity, 0444, hdaps_mouse_activity_show, NULL);
-static DEVICE_ATTR(calibrate, 0644, hdaps_calibrate_show,hdaps_calibrate_store);
+static DEVICE_ATTR(calibrate, 0644,
+		   hdaps_calibrate_show, hdaps_calibrate_store);
 static DEVICE_ATTR(invert, 0644, hdaps_invert_show, hdaps_invert_store);
+static DEVICE_ATTR(sampling_rate, 0644,
+		   hdaps_sampling_rate_show, hdaps_sampling_rate_store);
+static DEVICE_ATTR(oversampling_ratio, 0644,
+		   hdaps_oversampling_ratio_show,
+		   hdaps_oversampling_ratio_store);
+static DEVICE_ATTR(running_avg_filter_order, 0644,
+		   hdaps_running_avg_filter_order_show,
+		   hdaps_running_avg_filter_order_store);
 
 static struct attribute *hdaps_attributes[] = {
 	&dev_attr_position.attr,
-	&dev_attr_variance.attr,
 	&dev_attr_temp1.attr,
-	&dev_attr_temp2.attr,
 	&dev_attr_keyboard_activity.attr,
 	&dev_attr_mouse_activity.attr,
 	&dev_attr_calibrate.attr,
 	&dev_attr_invert.attr,
+	&dev_attr_sampling_rate.attr,
+	&dev_attr_oversampling_ratio.attr,
+	&dev_attr_running_avg_filter_order.attr,
 	NULL,
 };
 
@@ -482,84 +714,82 @@ static struct attribute_group hdaps_attribute_group = {
 
 /* Module stuff */
 
-/* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
-static int __init hdaps_dmi_match(const struct dmi_system_id *id)
-{
-	pr_info("%s detected\n", id->ident);
-	return 1;
-}
-
 /* hdaps_dmi_match_invert - found an inverted match. */
 static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id)
 {
-	hdaps_invert = (unsigned long)id->driver_data;
-	pr_info("inverting axis (%u) readings\n", hdaps_invert);
-	return hdaps_dmi_match(id);
+	unsigned int orient = (kernel_ulong_t) id->driver_data;
+	hdaps_invert = orient;
+	pr_info("%s detected, setting orientation %u\n", id->ident, orient);
+	return 1; /* stop enumeration */
 }
 
-#define HDAPS_DMI_MATCH_INVERT(vendor, model, axes) {	\
+#define HDAPS_DMI_MATCH_INVERT(vendor, model, orient) { \
 	.ident = vendor " " model,			\
 	.callback = hdaps_dmi_match_invert,		\
-	.driver_data = (void *)axes,			\
+	.driver_data = (void *)(orient),		\
 	.matches = {					\
 		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
 		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
 	}						\
 }
 
-#define HDAPS_DMI_MATCH_NORMAL(vendor, model)		\
-	HDAPS_DMI_MATCH_INVERT(vendor, model, 0)
-
-/* Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
-   "ThinkPad T42p", so the order of the entries matters.
-   If your ThinkPad is not recognized, please update to latest
-   BIOS. This is especially the case for some R52 ThinkPads. */
-static const struct dmi_system_id hdaps_whitelist[] __initconst = {
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R50"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R51"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R52"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61i", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T41"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T42"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T43"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61p", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X40"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_Y_AXIS),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61s", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad Z60m"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61m", HDAPS_BOTH_AXES),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61p", HDAPS_BOTH_AXES),
+/* List of models with abnormal axis configuration.
+   Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
+   "ThinkPad T42p", and enumeration stops after first match,
+   so the order of the entries matters. */
+const struct dmi_system_id hdaps_whitelist[] __initconst = {
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X40", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R400", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R500", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60 Tablet", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60s", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400s", HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T400", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410s", HDAPS_ORIENT_SWAP),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T410", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T500", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T510", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad W510", HDAPS_ORIENT_MAX),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad W520", HDAPS_ORIENT_MAX),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X200", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X | HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201 Tablet", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201s", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X201", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X220", HDAPS_ORIENT_SWAP),
 	{ .ident = NULL }
 };
 
 static int __init hdaps_init(void)
 {
-	struct input_dev *idev;
 	int ret;
 
-	if (!dmi_check_system(hdaps_whitelist)) {
-		pr_warn("supported laptop not found!\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!request_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS, "hdaps")) {
-		ret = -ENXIO;
-		goto out;
-	}
-
+	/* Determine axis orientation orientation */
+	if (hdaps_invert == HDAPS_ORIENT_UNDEFINED) /* set by module param? */
+		if (dmi_check_system(hdaps_whitelist) < 1) /* in whitelist? */
+			hdaps_invert = 0; /* default */
+
+	/* Init timer before platform_driver_register, in case of suspend */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
+	init_timer(&hdaps_timer);
+	hdaps_timer.function = hdaps_mousedev_poll;
+#else
+	timer_setup(&hdaps_timer, hdaps_mousedev_poll, 0);
+#endif
 	ret = platform_driver_register(&hdaps_driver);
 	if (ret)
-		goto out_region;
+		goto out;
 
 	pdev = platform_device_register_simple("hdaps", -1, NULL, 0);
 	if (IS_ERR(pdev)) {
@@ -571,47 +801,79 @@ static int __init hdaps_init(void)
 	if (ret)
 		goto out_device;
 
-	hdaps_idev = input_allocate_polled_device();
+	hdaps_idev = input_allocate_device();
 	if (!hdaps_idev) {
 		ret = -ENOMEM;
 		goto out_group;
 	}
 
-	hdaps_idev->poll = hdaps_mousedev_poll;
-	hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
-
-	/* initial calibrate for the input device */
-	hdaps_calibrate();
+	hdaps_idev_raw = input_allocate_device();
+	if (!hdaps_idev_raw) {
+		ret = -ENOMEM;
+		goto out_idev_first;
+	}
 
-	/* initialize the input class */
-	idev = hdaps_idev->input;
-	idev->name = "hdaps";
-	idev->phys = "isa1600/input0";
-	idev->id.bustype = BUS_ISA;
-	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT_MASK(EV_ABS);
-	input_set_abs_params(idev, ABS_X,
+	/* calibration for the input device (deferred to avoid delay) */
+	needs_calibration = 1;
+
+	/* initialize the joystick-like fuzzed input device */
+	hdaps_idev->name = "ThinkPad HDAPS joystick emulation";
+	hdaps_idev->phys = "hdaps/input0";
+	hdaps_idev->id.bustype = BUS_HOST;
+	hdaps_idev->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev->id.version = HDAPS_INPUT_JS_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev->evbit[0] = BIT(EV_ABS);
+	hdaps_idev->open = hdaps_mousedev_open;
+	hdaps_idev->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
-	input_set_abs_params(idev, ABS_Y,
+	input_set_abs_params(hdaps_idev, ABS_Y,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 
-	ret = input_register_polled_device(hdaps_idev);
+	ret = input_register_device(hdaps_idev);
 	if (ret)
 		goto out_idev;
 
-	pr_info("driver successfully loaded\n");
+	/* initialize the raw data input device */
+	hdaps_idev_raw->name = "ThinkPad HDAPS accelerometer data";
+	hdaps_idev_raw->phys = "hdaps/input1";
+	hdaps_idev_raw->id.bustype = BUS_HOST;
+	hdaps_idev_raw->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev_raw->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev_raw->id.version = HDAPS_INPUT_RAW_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev_raw->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev_raw->evbit[0] = BIT(EV_ABS);
+	hdaps_idev_raw->open = hdaps_mousedev_open;
+	hdaps_idev_raw->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev_raw, ABS_X, -32768, 32767, 0, 0);
+	input_set_abs_params(hdaps_idev_raw, ABS_Y, -32768, 32767, 0, 0);
+
+	ret = input_register_device(hdaps_idev_raw);
+	if (ret)
+		goto out_idev_reg_first;
+
+	pr_info("driver successfully loaded.\n");
 	return 0;
 
+out_idev_reg_first:
+	input_unregister_device(hdaps_idev);
 out_idev:
-	input_free_polled_device(hdaps_idev);
+	input_free_device(hdaps_idev_raw);
+out_idev_first:
+	input_free_device(hdaps_idev);
 out_group:
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 out_device:
 	platform_device_unregister(pdev);
 out_driver:
 	platform_driver_unregister(&hdaps_driver);
-out_region:
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
+	hdaps_device_shutdown();
 out:
 	pr_warn("driver init failed (ret=%d)!\n", ret);
 	return ret;
@@ -619,12 +881,12 @@ out:
 
 static void __exit hdaps_exit(void)
 {
-	input_unregister_polled_device(hdaps_idev);
-	input_free_polled_device(hdaps_idev);
+	input_unregister_device(hdaps_idev_raw);
+	input_unregister_device(hdaps_idev);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 	platform_device_unregister(pdev);
 	platform_driver_unregister(&hdaps_driver);
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
 
 	pr_info("driver unloaded\n");
 }
@@ -632,9 +894,8 @@ static void __exit hdaps_exit(void)
 module_init(hdaps_init);
 module_exit(hdaps_exit);
 
-module_param_named(invert, hdaps_invert, int, 0);
-MODULE_PARM_DESC(invert, "invert data along each axis. 1 invert x-axis, "
-		 "2 invert y-axis, 3 invert both axes.");
+module_param_named(invert, hdaps_invert, uint, 0);
+MODULE_PARM_DESC(invert, "axis orientation code");
 
 MODULE_AUTHOR("Robert Love");
 MODULE_DESCRIPTION("IBM Hard Drive Active Protection System (HDAPS) driver");
diff --git a/drivers/platform/x86/thinkpad_ec.c b/drivers/platform/x86/thinkpad_ec.c
new file mode 100644
index 000000000..597614bc1
--- /dev/null
+++ b/drivers/platform/x86/thinkpad_ec.c
@@ -0,0 +1,513 @@
+/*
+ *  thinkpad_ec.c - ThinkPad embedded controller LPC3 functions
+ *
+ *  The embedded controller on ThinkPad laptops has a non-standard interface,
+ *  where LPC channel 3 of the H8S EC chip is hooked up to IO ports
+ *  0x1600-0x161F and implements (a special case of) the H8S LPC protocol.
+ *  The EC LPC interface provides various system management services (currently
+ *  known: battery information and accelerometer readouts). This driver
+ *  provides access and mutual exclusion for the EC interface.
+*
+ *  The LPC protocol and terminology are documented here:
+ *  "H8S/2104B Group Hardware Manual",
+ *  http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
+ *
+ *  Copyright (C) 2006-2007 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/jiffies.h>
+#include <asm/io.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+	#include <asm/semaphore.h>
+#else
+	#include <linux/semaphore.h>
+#endif
+
+#define TP_VERSION "0.42"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION("ThinkPad embedded controller hardware access");
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+/* IO ports used by embedded controller LPC channel 3: */
+#define TPC_BASE_PORT 0x1600
+#define TPC_NUM_PORTS 0x20
+#define TPC_STR3_PORT 0x1604  /* Reads H8S EC register STR3 */
+#define TPC_TWR0_PORT  0x1610 /* Mapped to H8S EC register TWR0MW/SW  */
+#define TPC_TWR15_PORT 0x161F /* Mapped to H8S EC register TWR15. */
+  /* (and port TPC_TWR0_PORT+i is mapped to H8S reg TWRi for 0<i<16) */
+
+/* H8S STR3 status flags (see "H8S/2104B Group Hardware Manual" p.549) */
+#define H8S_STR3_IBF3B 0x80  /* Bidi. Data Register Input Buffer Full */
+#define H8S_STR3_OBF3B 0x40  /* Bidi. Data Register Output Buffer Full */
+#define H8S_STR3_MWMF  0x20  /* Master Write Mode Flag */
+#define H8S_STR3_SWMF  0x10  /* Slave Write Mode Flag */
+#define H8S_STR3_MASK  0xF0  /* All bits we care about in STR3 */
+
+/* Timeouts and retries */
+#define TPC_READ_RETRIES     150
+#define TPC_READ_NDELAY      500
+#define TPC_REQUEST_RETRIES 1000
+#define TPC_REQUEST_NDELAY    10
+#define TPC_PREFETCH_TIMEOUT   (HZ/10)  /* invalidate prefetch after 0.1sec */
+
+/* A few macros for printk()ing: */
+#define MSG_FMT(fmt, args...) \
+  "thinkpad_ec: %s: " fmt "\n", __func__, ## args
+#define REQ_FMT(msg, code) \
+  MSG_FMT("%s: (0x%02x:0x%02x)->0x%02x", \
+	  msg, args->val[0x0], args->val[0xF], code)
+
+/* State of request prefetching: */
+static u8 prefetch_arg0, prefetch_argF;           /* Args of last prefetch */
+static u64 prefetch_jiffies;                      /* time of prefetch, or: */
+#define TPC_PREFETCH_NONE   INITIAL_JIFFIES       /*   No prefetch */
+#define TPC_PREFETCH_JUNK   (INITIAL_JIFFIES+1)   /*   Ignore prefetch */
+
+/* Locking: */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static DECLARE_MUTEX(thinkpad_ec_mutex);
+#else
+static DEFINE_SEMAPHORE(thinkpad_ec_mutex);
+#endif
+
+/* Kludge in case the ACPI DSDT reserves the ports we need. */
+static bool force_io;    /* Willing to do IO to ports we couldn't reserve? */
+static int reserved_io; /* Successfully reserved the ports? */
+module_param_named(force_io, force_io, bool, 0600);
+MODULE_PARM_DESC(force_io, "Force IO even if region already reserved (0=off, 1=on)");
+
+/**
+ * thinkpad_ec_lock - get lock on the ThinkPad EC
+ *
+ * Get exclusive lock for accesing the ThinkPad embedded controller LPC3
+ * interface. Returns 0 iff lock acquired.
+ */
+int thinkpad_ec_lock(void)
+{
+	int ret;
+	ret = down_interruptible(&thinkpad_ec_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_lock);
+
+/**
+ * thinkpad_ec_try_lock - try getting lock on the ThinkPad EC
+ *
+ * Try getting an exclusive lock for accesing the ThinkPad embedded
+ * controller LPC3. Returns immediately if lock is not available; neither
+ * blocks nor sleeps. Returns 0 iff lock acquired .
+ */
+int thinkpad_ec_try_lock(void)
+{
+	return down_trylock(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_lock);
+
+/**
+ * thinkpad_ec_unlock - release lock on ThinkPad EC
+ *
+ * Release a previously acquired exclusive lock on the ThinkPad ebmedded
+ * controller LPC3 interface.
+ */
+void thinkpad_ec_unlock(void)
+{
+	up(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_unlock);
+
+/**
+ * thinkpad_ec_request_row - tell embedded controller to prepare a row
+ * @args Input register arguments
+ *
+ * Requests a data row by writing to H8S LPC registers TRW0 through TWR15 (or
+ * a subset thereof) following the protocol prescribed by the "H8S/2104B Group
+ * Hardware Manual". Does sanity checks via status register STR3.
+ */
+static int thinkpad_ec_request_row(const struct thinkpad_ec_row *args)
+{
+	u8 str3;
+	int i;
+
+	/* EC protocol requires write to TWR0 (function code): */
+	if (!(args->mask & 0x0001)) {
+		printk(KERN_ERR MSG_FMT("bad args->mask=0x%02x", args->mask));
+		return -EINVAL;
+	}
+
+	/* Check initial STR3 status: */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B) { /* data already pending */
+		inb(TPC_TWR15_PORT); /* marks end of previous transaction */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC has result from unrequested transaction",
+			       str3));
+		return -EBUSY; /* EC will be ready in a few usecs */
+	} else if (str3 == H8S_STR3_SWMF) { /* busy with previous request */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC is busy with unrequested transaction",
+			       str3));
+		return -EBUSY; /* data will be pending in a few usecs */
+	} else if (str3 != 0x00) { /* unexpected status? */
+		printk(KERN_WARNING REQ_FMT("unexpected initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Send TWR0MW: */
+	outb(args->val[0], TPC_TWR0_PORT);
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 != H8S_STR3_MWMF) { /* not accepted? */
+		printk(KERN_WARNING REQ_FMT("arg0 rejected", str3));
+		return -EIO;
+	}
+
+	/* Send TWR1 through TWR14: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((args->mask>>i)&1)
+			outb(args->val[i], TPC_TWR0_PORT+i);
+
+	/* Send TWR15 (default to 0x01). This marks end of command. */
+	outb((args->mask & 0x8000) ? args->val[0xF] : 0x01, TPC_TWR15_PORT);
+
+	/* Wait until EC starts writing its reply (~60ns on average).
+	 * Releasing locks before this happens may cause an EC hang
+	 * due to firmware bug!
+	 */
+	for (i = 0; i < TPC_REQUEST_RETRIES; i++) {
+		str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+		if (str3 & H8S_STR3_SWMF) /* EC started replying */
+			return 0;
+		else if (!(str3 & ~(H8S_STR3_IBF3B|H8S_STR3_MWMF)))
+			/* Normal progress (the EC hasn't seen the request
+			 * yet, or is processing it). Wait it out. */
+			ndelay(TPC_REQUEST_NDELAY);
+		else { /* weird EC status */
+			printk(KERN_WARNING
+			       REQ_FMT("bad end STR3", str3));
+			return -EIO;
+		}
+	}
+	printk(KERN_WARNING REQ_FMT("EC is mysteriously silent", str3));
+	return -EIO;
+}
+
+/**
+ * thinkpad_ec_read_data - read pre-requested row-data from EC
+ * @args Input register arguments of pre-requested rows
+ * @data Output register values
+ *
+ * Reads current row data from the controller, assuming it's already
+ * requested. Follows the H8S spec for register access and status checks.
+ */
+static int thinkpad_ec_read_data(const struct thinkpad_ec_row *args,
+				 struct thinkpad_ec_row *data)
+{
+	int i;
+	u8 str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	/* Once we make a request, STR3 assumes the sequence of values listed
+	 * in the following 'if' as it reads the request and writes its data.
+	 * It takes about a few dozen nanosecs total, with very high variance.
+	 */
+	if (str3 == (H8S_STR3_IBF3B|H8S_STR3_MWMF) ||
+	    str3 == 0x00 ||  /* the 0x00 is indistinguishable from idle EC! */
+	    str3 == H8S_STR3_SWMF)
+		return -EBUSY; /* not ready yet */
+	/* Finally, the EC signals output buffer full: */
+	if (str3 != (H8S_STR3_OBF3B|H8S_STR3_SWMF)) {
+		printk(KERN_WARNING
+		       REQ_FMT("bad initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Read first byte (signals start of read transactions): */
+	data->val[0] = inb(TPC_TWR0_PORT);
+	/* Optionally read 14 more bytes: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((data->mask >> i)&1)
+			data->val[i] = inb(TPC_TWR0_PORT+i);
+	/* Read last byte from 0x161F (signals end of read transaction): */
+	data->val[0xF] = inb(TPC_TWR15_PORT);
+
+	/* Readout still pending? */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B)
+		printk(KERN_WARNING
+		       REQ_FMT("OBF3B=1 after read", str3));
+	/* If port 0x161F returns 0x80 too often, the EC may lock up. Warn: */
+	if (data->val[0xF] == 0x80)
+		printk(KERN_WARNING
+		       REQ_FMT("0x161F reports error", data->val[0xF]));
+	return 0;
+}
+
+/**
+ * thinkpad_ec_is_row_fetched - is the given row currently prefetched?
+ *
+ * To keep things simple we compare only the first and last args;
+ * this suffices for all known cases.
+ */
+static int thinkpad_ec_is_row_fetched(const struct thinkpad_ec_row *args)
+{
+	return (prefetch_jiffies != TPC_PREFETCH_NONE) &&
+	       (prefetch_jiffies != TPC_PREFETCH_JUNK) &&
+	       (prefetch_arg0 == args->val[0]) &&
+	       (prefetch_argF == args->val[0xF]) &&
+	       (get_jiffies_64() < prefetch_jiffies + TPC_PREFETCH_TIMEOUT);
+}
+
+/**
+ * thinkpad_ec_read_row - request and read data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Read a data row from the ThinkPad embedded controller LPC3 interface.
+ * Does fetching and retrying if needed. The row is specified by an
+ * array of 16 bytes, some of which may be undefined (but the first is
+ * mandatory). These bytes are given in @args->val[], where @args->val[i] is
+ * used iff (@args->mask>>i)&1). The resulting row data is stored in
+ * @data->val[], but is only guaranteed to be valid for indices corresponding
+ * to set bit in @data->mask. That is, if @data->mask&(1<<i)==0 then
+ * @data->val[i] is undefined.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+			 struct thinkpad_ec_row *data)
+{
+	int retries, ret;
+
+	if (thinkpad_ec_is_row_fetched(args))
+		goto read_row; /* already requested */
+
+	/* Request the row */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_request_row(args);
+		if (!ret)
+			goto read_row;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+	printk(KERN_ERR REQ_FMT("failed requesting row", ret));
+	goto out;
+
+read_row:
+	/* Read the row's data */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			goto out;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+
+	printk(KERN_ERR REQ_FMT("failed waiting for data", ret));
+
+out:
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_read_row);
+
+/**
+ * thinkpad_ec_try_read_row - try reading prefetched data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Try reading a data row from the ThinkPad embedded controller LPC3
+ * interface, if this raw was recently prefetched using
+ * thinkpad_ec_prefetch_row(). Does not fetch, retry or block.
+ * The parameters have the same meaning as in thinkpad_ec_read_row().
+ *
+ * Returns -EBUSY is data not ready and -ENODATA if row not prefetched.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+			     struct thinkpad_ec_row *data)
+{
+	int ret;
+	if (!thinkpad_ec_is_row_fetched(args)) {
+		ret = -ENODATA;
+	} else {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			prefetch_jiffies = TPC_PREFETCH_NONE; /* eaten up */
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_read_row);
+
+/**
+ * thinkpad_ec_prefetch_row - prefetch data from ThinkPad EC
+ * @args Input register arguments
+ *
+ * Prefetch a data row from the ThinkPad embedded controller LCP3
+ * interface. A subsequent call to thinkpad_ec_read_row() with the
+ * same arguments will be faster, and a subsequent call to
+ * thinkpad_ec_try_read_row() stands a good chance of succeeding if
+ * done neither too soon nor too late. See
+ * thinkpad_ec_read_row() for the meaning of @args.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args)
+{
+	int ret;
+	ret = thinkpad_ec_request_row(args);
+	if (ret) {
+		prefetch_jiffies = TPC_PREFETCH_JUNK;
+	} else {
+		prefetch_jiffies = get_jiffies_64();
+		prefetch_arg0 = args->val[0x0];
+		prefetch_argF = args->val[0xF];
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_prefetch_row);
+
+/**
+ * thinkpad_ec_invalidate - invalidate prefetched ThinkPad EC data
+ *
+ * Invalidate the data prefetched via thinkpad_ec_prefetch_row() from the
+ * ThinkPad embedded controller LPC3 interface.
+ * Must be called before unlocking by any code that accesses the controller
+ * ports directly.
+ */
+void thinkpad_ec_invalidate(void)
+{
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_invalidate);
+
+
+/*** Checking for EC hardware ***/
+
+/**
+ * thinkpad_ec_test - verify the EC is present and follows protocol
+ *
+ * Ensure the EC LPC3 channel really works on this machine by making
+ * an EC request and seeing if the EC follows the documented H8S protocol.
+ * The requested row just reads battery status, so it should be harmless to
+ * access it (on a correct EC).
+ * This test writes to IO ports, so execute only after checking DMI.
+ */
+static int __init thinkpad_ec_test(void)
+{
+	int ret;
+	const struct thinkpad_ec_row args = /* battery 0 basic status */
+	  { .mask = 0x8001, .val = {0x01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x0000 };
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	return ret;
+}
+
+/* Search all DMI device names of a given type for a substring */
+static int __init dmi_find_substring(int type, const char *substr)
+{
+	const struct dmi_device *dev = NULL;
+	while ((dev = dmi_find_device(type, NULL, dev))) {
+		if (strstr(dev->name, substr))
+			return 1;
+	}
+	return 0;
+}
+
+#define TP_DMI_MATCH(vendor,model)	{		\
+	.ident = vendor " " model,			\
+	.matches = {					\
+		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
+		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
+	}						\
+}
+
+/* Check DMI for existence of ThinkPad embedded controller */
+static int __init check_dmi_for_ec(void)
+{
+	/* A few old models that have a good EC but don't report it in DMI */
+	struct dmi_system_id tp_whitelist[] = {
+		TP_DMI_MATCH("IBM", "ThinkPad A30"),
+		TP_DMI_MATCH("IBM", "ThinkPad T23"),
+		TP_DMI_MATCH("IBM", "ThinkPad X24"),
+		TP_DMI_MATCH("LENOVO", "ThinkPad"),
+		{ .ident = NULL }
+	};
+	return dmi_find_substring(DMI_DEV_TYPE_OEM_STRING,
+				  "IBM ThinkPad Embedded Controller") ||
+	       dmi_check_system(tp_whitelist);
+}
+
+/*** Init and cleanup ***/
+
+static int __init thinkpad_ec_init(void)
+{
+	if (!check_dmi_for_ec()) {
+		printk(KERN_WARNING
+		       "thinkpad_ec: no ThinkPad embedded controller!\n");
+		return -ENODEV;
+	}
+
+	if (request_region(TPC_BASE_PORT, TPC_NUM_PORTS, "thinkpad_ec")) {
+		reserved_io = 1;
+	} else {
+		printk(KERN_ERR "thinkpad_ec: cannot claim IO ports %#x-%#x... ",
+		       TPC_BASE_PORT,
+		       TPC_BASE_PORT + TPC_NUM_PORTS - 1);
+		if (force_io) {
+			printk("forcing use of unreserved IO ports.\n");
+		} else {
+			printk("consider using force_io=1.\n");
+			return -ENXIO;
+		}
+	}
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	if (thinkpad_ec_test()) {
+		printk(KERN_ERR "thinkpad_ec: initial ec test failed\n");
+		if (reserved_io)
+			release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+		return -ENXIO;
+	}
+	printk(KERN_INFO "thinkpad_ec: thinkpad_ec " TP_VERSION " loaded.\n");
+	return 0;
+}
+
+static void __exit thinkpad_ec_exit(void)
+{
+	if (reserved_io)
+		release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+	printk(KERN_INFO "thinkpad_ec: unloaded.\n");
+}
+
+module_init(thinkpad_ec_init);
+module_exit(thinkpad_ec_exit);
diff --git a/drivers/platform/x86/tp_smapi.c b/drivers/platform/x86/tp_smapi.c
new file mode 100644
index 000000000..209cb6487
--- /dev/null
+++ b/drivers/platform/x86/tp_smapi.c
@@ -0,0 +1,1493 @@
+/*
+ *  tp_smapi.c - ThinkPad SMAPI support
+ *
+ *  This driver exposes some features of the System Management Application
+ *  Program Interface (SMAPI) BIOS found on ThinkPad laptops. It works on
+ *  models in which the SMAPI BIOS runs in SMM and is invoked by writing
+ *  to the APM control port 0xB2.
+ *  It also exposes battery status information, obtained from the ThinkPad
+ *  embedded controller (via the thinkpad_ec module).
+ *  Ancient ThinkPad models use a different interface, supported by the
+ *  "thinkpad" module from "tpctl".
+ *
+ *  Many of the battery status values obtained from the EC simply mirror
+ *  values provided by the battery's Smart Battery System (SBS) interface, so
+ *  their meaning is defined by the Smart Battery Data Specification (see
+ *  http://sbs-forum.org/specs/sbdat110.pdf). References to this SBS spec
+ *  are given in the code where relevant.
+ *
+ *  Copyright (C) 2006 Shem Multinymous <multinymous@gmail.com>.
+ *  SMAPI access code based on the mwave driver by Mike Sullivan.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/mc146818rtc.h>	/* CMOS defines */
+#include <linux/delay.h>
+#include <linux/version.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/platform_device.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define TP_VERSION "0.42"
+#define TP_DESC "ThinkPad SMAPI Support"
+#define TP_DIR "smapi"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION(TP_DESC);
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+static struct platform_device *pdev;
+
+static int tp_debug;
+module_param_named(debug, tp_debug, int, 0600);
+MODULE_PARM_DESC(debug, "Debug level (0=off, 1=on)");
+
+/* A few macros for printk()ing: */
+#define TPRINTK(level, fmt, args...) \
+  dev_printk(level, &(pdev->dev), "%s: " fmt "\n", __func__, ## args)
+#define DPRINTK(fmt, args...) \
+  do { if (tp_debug) TPRINTK(KERN_DEBUG, fmt, ## args); } while (0)
+
+/*********************************************************************
+ * SMAPI interface
+ */
+
+/* SMAPI functions (register BX when making the SMM call). */
+#define SMAPI_GET_INHIBIT_CHARGE                0x2114
+#define SMAPI_SET_INHIBIT_CHARGE                0x2115
+#define SMAPI_GET_THRESH_START                  0x2116
+#define SMAPI_SET_THRESH_START                  0x2117
+#define SMAPI_GET_FORCE_DISCHARGE               0x2118
+#define SMAPI_SET_FORCE_DISCHARGE               0x2119
+#define SMAPI_GET_THRESH_STOP                   0x211a
+#define SMAPI_SET_THRESH_STOP                   0x211b
+
+/* SMAPI error codes (see ThinkPad 770 Technical Reference Manual p.83 at
+ http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD */
+#define SMAPI_RETCODE_EOF 0xff
+static struct { u8 rc; char *msg; int ret; } smapi_retcode[] =
+{
+	{0x00, "OK", 0},
+	{0x53, "SMAPI function is not available", -ENXIO},
+	{0x81, "Invalid parameter", -EINVAL},
+	{0x86, "Function is not supported by SMAPI BIOS", -EOPNOTSUPP},
+	{0x90, "System error", -EIO},
+	{0x91, "System is invalid", -EIO},
+	{0x92, "System is busy, -EBUSY"},
+	{0xa0, "Device error (disk read error)", -EIO},
+	{0xa1, "Device is busy", -EBUSY},
+	{0xa2, "Device is not attached", -ENXIO},
+	{0xa3, "Device is disbled", -EIO},
+	{0xa4, "Request parameter is out of range", -EINVAL},
+	{0xa5, "Request parameter is not accepted", -EINVAL},
+	{0xa6, "Transient error", -EBUSY}, /* ? */
+	{SMAPI_RETCODE_EOF, "Unknown error code", -EIO}
+};
+
+
+#define SMAPI_MAX_RETRIES 10
+#define SMAPI_PORT2 0x4F           /* fixed port, meaning unclear */
+static unsigned short smapi_port;  /* APM control port, normally 0xB2 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static DECLARE_MUTEX(smapi_mutex);
+#else
+static DEFINE_SEMAPHORE(smapi_mutex);
+#endif
+
+/**
+ * find_smapi_port - read SMAPI port from NVRAM
+ */
+static int __init find_smapi_port(void)
+{
+	u16 smapi_id = 0;
+	unsigned short port = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	smapi_id = CMOS_READ(0x7C);
+	smapi_id |= (CMOS_READ(0x7D) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	if (smapi_id != 0x5349) {
+		printk(KERN_ERR "SMAPI not supported (ID=0x%x)\n", smapi_id);
+		return -ENXIO;
+	}
+	spin_lock_irqsave(&rtc_lock, flags);
+	port = CMOS_READ(0x7E);
+	port |= (CMOS_READ(0x7F) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	if (port == 0) {
+		printk(KERN_ERR "unable to read SMAPI port number\n");
+		return -ENXIO;
+	}
+	return port;
+}
+
+/**
+ * smapi_request - make a SMAPI call
+ * @inEBX, @inECX, @inEDI, @inESI: input registers
+ * @outEBX, @outECX, @outEDX, @outEDI, @outESI: outputs registers
+ * @msg: textual error message
+ * Invokes the SMAPI SMBIOS with the given input and outpu args.
+ * All outputs are optional (can be %NULL).
+ * Returns 0 when successful, and a negative errno constant
+ * (see smapi_retcode above) upon failure.
+ */
+static int smapi_request(u32 inEBX, u32 inECX,
+			 u32 inEDI, u32 inESI,
+			 u32 *outEBX, u32 *outECX, u32 *outEDX,
+			 u32 *outEDI, u32 *outESI, const char **msg)
+{
+	int ret = 0;
+	int i;
+	int retries;
+	u8 rc;
+	/* Must use local vars for output regs, due to reg pressure. */
+	u32 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI;
+
+	for (retries = 0; retries < SMAPI_MAX_RETRIES; ++retries) {
+		DPRINTK("req_in: BX=%x CX=%x DI=%x SI=%x",
+			inEBX, inECX, inEDI, inESI);
+
+		/* SMAPI's SMBIOS call and thinkpad_ec end up using use
+		 * different interfaces to the same chip, so play it safe. */
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+
+		__asm__ __volatile__(
+			"movl  $0x00005380,%%eax\n\t"
+			"movl  %6,%%ebx\n\t"
+			"movl  %7,%%ecx\n\t"
+			"movl  %8,%%edi\n\t"
+			"movl  %9,%%esi\n\t"
+			"xorl  %%edx,%%edx\n\t"
+			"movw  %10,%%dx\n\t"
+			"out   %%al,%%dx\n\t"  /* trigger SMI to SMBIOS */
+			"out   %%al,$0x4F\n\t"
+			"movl  %%eax,%0\n\t"
+			"movl  %%ebx,%1\n\t"
+			"movl  %%ecx,%2\n\t"
+			"movl  %%edx,%3\n\t"
+			"movl  %%edi,%4\n\t"
+			"movl  %%esi,%5\n\t"
+			:"=m"(tmpEAX),
+			 "=m"(tmpEBX),
+			 "=m"(tmpECX),
+			 "=m"(tmpEDX),
+			 "=m"(tmpEDI),
+			 "=m"(tmpESI)
+			:"m"(inEBX), "m"(inECX), "m"(inEDI), "m"(inESI),
+			 "m"((u16)smapi_port)
+			:"%eax", "%ebx", "%ecx", "%edx", "%edi",
+			 "%esi");
+
+		thinkpad_ec_invalidate();
+		thinkpad_ec_unlock();
+
+		/* Don't let the next SMAPI access happen too quickly,
+		 * may case problems. (We're hold smapi_mutex).       */
+		msleep(50);
+
+		if (outEBX) *outEBX = tmpEBX;
+		if (outECX) *outECX = tmpECX;
+		if (outEDX) *outEDX = tmpEDX;
+		if (outESI) *outESI = tmpESI;
+		if (outEDI) *outEDI = tmpEDI;
+
+		/* Look up error code */
+		rc = (tmpEAX>>8)&0xFF;
+		for (i = 0; smapi_retcode[i].rc != SMAPI_RETCODE_EOF &&
+			    smapi_retcode[i].rc != rc; ++i) {}
+		ret = smapi_retcode[i].ret;
+		if (msg)
+			*msg = smapi_retcode[i].msg;
+
+		DPRINTK("req_out: AX=%x BX=%x CX=%x DX=%x DI=%x SI=%x r=%d",
+			 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI, ret);
+		if (ret)
+			TPRINTK(KERN_NOTICE, "SMAPI error: %s (func=%x)",
+				smapi_retcode[i].msg, inEBX);
+
+		if (ret != -EBUSY)
+			return ret;
+	}
+	return ret;
+}
+
+/* Convenience wrapper: discard output arguments */
+static int smapi_write(u32 inEBX, u32 inECX,
+		       u32 inEDI, u32 inESI, const char **msg)
+{
+	return smapi_request(inEBX, inECX, inEDI, inESI,
+			     NULL, NULL, NULL, NULL, NULL, msg);
+}
+
+
+/*********************************************************************
+ * Specific SMAPI services
+ * All of these functions return 0 upon success, and a negative errno
+ * constant (see smapi_retcode) on failure.
+ */
+
+enum thresh_type {
+	THRESH_STOP  = 0, /* the code assumes this is 0 for brevity */
+	THRESH_START
+};
+#define THRESH_NAME(which) ((which == THRESH_START) ? "start" : "stop")
+
+/**
+ * __get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default 1..99, 0=default (pass this as-is to SMAPI)
+ * @outEDI: some additional state that needs to be preserved, meaning unknown
+ * @outESI: some additional state that needs to be preserved, meaning unknown
+ */
+static int __get_real_thresh(int bat, enum thresh_type which, int *thresh,
+			     u32 *outEDI, u32 *outESI)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_GET_THRESH_START
+					  : SMAPI_GET_THRESH_STOP;
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(ebx, ecx, 0, 0, NULL,
+				&ecx, NULL, outEDI, outESI, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: %s",
+			THRESH_NAME(which), bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x00000100)) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: ecx=0%x",
+			THRESH_NAME(which), bat, ecx);
+		return -EIO;
+	}
+	if (thresh)
+		*thresh = ecx&0xFF;
+	return 0;
+}
+
+/**
+ * get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int get_real_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	return __get_real_thresh(bat, which, thresh, NULL, NULL);
+}
+
+/**
+ * set_real_thresh - write battery start/top charge threshold to SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int set_real_thresh(int bat, enum thresh_type which, int thresh)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_SET_THRESH_START
+					  : SMAPI_SET_THRESH_STOP;
+	u32 ecx = ((bat+1)<<8) + thresh;
+	u32 getDI, getSI;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_real_thresh(bat, which, NULL, &getDI, &getSI);
+	if (ret)
+		return ret;
+
+	ret = smapi_write(ebx, ecx, getDI, getSI, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set %s to %d for bat=%d failed: %s",
+			THRESH_NAME(which), thresh, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set %s to %d for bat=%d",
+			THRESH_NAME(which), thresh, bat);
+	return ret;
+}
+
+/**
+ * __get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * @outECX: some additional state that needs to be preserved, meaning unknown
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int __get_inhibit_charge_minutes(int bat, int *minutes, u32 *outECX)
+{
+	u32 ecx = (bat+1)<<8;
+	u32 esi;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_INHIBIT_CHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, &esi, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x0100)) {
+		TPRINTK(KERN_NOTICE, "bad ecx=0x%x for bat=%d", ecx, bat);
+		return -EIO;
+	}
+	if (minutes)
+		*minutes = (ecx&0x0001)?esi:0;
+	if (outECX)
+		*outECX = ecx;
+	return 0;
+}
+
+/**
+ * get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int get_inhibit_charge_minutes(int bat, int *minutes)
+{
+	return __get_inhibit_charge_minutes(bat, minutes, NULL);
+}
+
+/**
+ * set_inhibit_charge_minutes - write inhibit charge period to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ */
+static int set_inhibit_charge_minutes(int bat, int minutes)
+{
+	u32 ecx;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_inhibit_charge_minutes(bat, NULL, &ecx);
+	if (ret)
+		return ret;
+
+	ecx = ((bat+1)<<8) | (ecx&0x00FE) | (minutes > 0 ? 0x0001 : 0x0000);
+	if (minutes > 0xFFFF)
+		minutes = 0xFFFF;
+	ret = smapi_write(SMAPI_SET_INHIBIT_CHARGE, ecx, 0, minutes, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE,
+			"set to %d failed for bat=%d: %s", minutes, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d\n", minutes, bat);
+	return ret;
+}
+
+
+/**
+ * get_force_discharge - get status of forced discharging from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int get_force_discharge(int bat, int *enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	*enabled = (!(ecx&0x00000100) && (ecx&0x00000001))?1:0;
+	return 0;
+}
+
+/**
+ * set_force_discharge - write status of forced discharging to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int set_force_discharge(int bat, int enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "get failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (ecx&0x00000100) {
+		TPRINTK(KERN_NOTICE, "cannot force discharge bat=%d", bat);
+		return -EIO;
+	}
+
+	ecx = ((bat+1)<<8) | (ecx&0x000000FA) | (enabled?0x00000001:0);
+	ret = smapi_write(SMAPI_SET_FORCE_DISCHARGE, ecx, 0, 0, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set to %d failed for bat=%d: %s",
+			enabled, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d", enabled, bat);
+	return ret;
+}
+
+
+/*********************************************************************
+ * Wrappers to threshold-related SMAPI functions, which handle default
+ * thresholds and related quirks.
+ */
+
+/* Minimum, default and minimum difference for battery charging thresholds: */
+#define MIN_THRESH_DELTA      4  /* Min delta between start and stop thresh */
+#define MIN_THRESH_START      2
+#define MAX_THRESH_START      (100-MIN_THRESH_DELTA)
+#define MIN_THRESH_STOP       (MIN_THRESH_START + MIN_THRESH_DELTA)
+#define MAX_THRESH_STOP       100
+#define DEFAULT_THRESH_START  MAX_THRESH_START
+#define DEFAULT_THRESH_STOP   MAX_THRESH_STOP
+
+/* The GUI of IBM's Battery Maximizer seems to show a start threshold that
+ * is 1 more than the value we set/get via SMAPI. Since the threshold is
+ * maintained across reboot, this can be confusing. So we kludge our
+ * interface for interoperability: */
+#define BATMAX_FIX   1
+
+/* Get charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int get_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	int ret = get_real_thresh(bat, which, thresh);
+	if (ret)
+		return ret;
+	if (*thresh == 0)
+		*thresh = (which == THRESH_START) ? DEFAULT_THRESH_START
+						  : DEFAULT_THRESH_STOP;
+	else if (which == THRESH_START)
+		*thresh += BATMAX_FIX;
+	return 0;
+}
+
+
+/* Set charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int set_thresh(int bat, enum thresh_type which, int thresh)
+{
+	if (which == THRESH_STOP && thresh == DEFAULT_THRESH_STOP)
+		thresh = 0; /* 100 is out of range, but default means 100 */
+	if (which == THRESH_START)
+		thresh -= BATMAX_FIX;
+	return set_real_thresh(bat, which, thresh);
+}
+
+/*********************************************************************
+ * ThinkPad embedded controller readout and basic functions
+ */
+
+/**
+ * read_tp_ec_row - read data row from the ThinkPad embedded controller
+ * @arg0: EC command code
+ * @bat: battery number, 0 or 1
+ * @j: the byte value to be used for "junk" (unused) input/outputs
+ * @dataval: result vector
+ */
+static int read_tp_ec_row(u8 arg0, int bat, u8 j, u8 *dataval)
+{
+	int ret;
+	const struct thinkpad_ec_row args = { .mask = 0xFFFF,
+		.val = {arg0, j,j,j,j,j,j,j,j,j,j,j,j,j,j, (u8)bat} };
+	struct thinkpad_ec_row data = { .mask = 0xFFFF };
+
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	memcpy(dataval, &data.val, TP_CONTROLLER_ROW_LEN);
+	return ret;
+}
+
+/**
+ * power_device_present - check for presence of battery or AC power
+ * @bat: 0 for battery 0, 1 for battery 1, otherwise AC power
+ * Returns 1 if present, 0 if not present, negative if error.
+ */
+static int power_device_present(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u8 test;
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (bat) {
+	case 0:  test = 0x40; break; /* battery 0 */
+	case 1:  test = 0x20; break; /* battery 1 */
+	default: test = 0x80;        /* AC power */
+	}
+	return (row[0] & test) ? 1 : 0;
+}
+
+/**
+ * bat_has_status - check if battery can report detailed status
+ * @bat: 0 for battery 0, 1 for battery 1
+ * Returns 1 if yes, 0 if no, negative if error.
+ */
+static int bat_has_status(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	if ((row[0] & (bat?0x20:0x40)) == 0) /* no battery */
+		return 0;
+	if ((row[1] & (0x60)) == 0) /* no status */
+		return 0;
+	return 1;
+}
+
+/**
+ * get_tp_ec_bat_16 - read a 16-bit value from EC battery status data
+ * @arg0: first argument to EC
+ * @off: offset in row returned from EC
+ * @bat: battery (0 or 1)
+ * @val: the 16-bit value obtained
+ * Returns nonzero on error.
+ */
+static int get_tp_ec_bat_16(u8 arg0, int offset, int bat, u16 *val)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	*val = *(u16 *)(row+offset);
+	return 0;
+}
+
+/*********************************************************************
+ * sysfs attributes for batteries -
+ * definitions and helper functions
+ */
+
+/* A custom device attribute struct which holds a battery number */
+struct bat_device_attribute {
+	struct device_attribute dev_attr;
+	int bat;
+};
+
+/**
+ * attr_get_bat - get the battery to which the attribute belongs
+ */
+static int attr_get_bat(struct device_attribute *attr)
+{
+	return container_of(attr, struct bat_device_attribute, dev_attr)->bat;
+}
+
+/**
+ * show_tp_ec_bat_u16 - show an unsigned 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @na_msg: string to output is value not available (0xFFFFFFFF)
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as unsigned,
+ * transformed as x->mul*x, and printed to the buffer.
+ * If the value is 0xFFFFFFFF and na_msg!=%NULL, na_msg is printed instead.
+ */
+static ssize_t show_tp_ec_bat_u16(u8 arg0, int offset, int mul,
+			      const char *na_msg,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	if (na_msg && val == 0xFFFF)
+		return sprintf(buf, "%s\n", na_msg);
+	else
+		return sprintf(buf, "%u\n", mul*(unsigned int)val);
+}
+
+/**
+ * show_tp_ec_bat_s16 - show an signed 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @add: correction term to add after multiplication
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as signed,
+ * transformed as x->mul*x+add, and printed to the buffer.
+ */
+static ssize_t show_tp_ec_bat_s16(u8 arg0, int offset, int mul, int add,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", mul*(s16)val+add);
+}
+
+/**
+ * show_tp_ec_bat_str - show a string from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @maxlen: maximum string length
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_str(u8 arg0, int offset, int maxlen,
+			      struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	strncpy(buf, (char *)row+offset, maxlen);
+	buf[maxlen] = 0;
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+/**
+ * show_tp_ec_bat_power - show a power readout from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offV: byte offset of voltage in EC raw data
+ * @offI: byte offset of current in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ * Computes the power as current*voltage from the two given readout offsets.
+ */
+static ssize_t show_tp_ec_bat_power(u8 arg0, int offV, int offI,
+				struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int milliamp, millivolt, ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	millivolt = *(u16 *)(row+offV);
+	milliamp = *(s16 *)(row+offI);
+	return sprintf(buf, "%d\n", milliamp*millivolt/1000); /* units: mW */
+}
+
+/**
+ * show_tp_ec_bat_date - decode and show a date from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_date(u8 arg0, int offset,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u16 v;
+	int ret;
+	int day, month, year;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+
+	/* Decode bit-packed: v = day | (month<<5) | ((year-1980)<<9) */
+	v = *(u16 *)(row+offset);
+	day = v & 0x1F;
+	month = (v >> 5) & 0xF;
+	year = (v >> 9) + 1980;
+
+	return sprintf(buf, "%04d-%02d-%02d\n", year, month, day);
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O for batteries -
+ * the actual attribute show/store functions
+ */
+
+static ssize_t show_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_START, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+static ssize_t show_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_STOP, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+/**
+ * store_battery_start_charge_thresh - store battery_start_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_START) /* clamp up to MIN_THRESH_START */
+		thresh = MIN_THRESH_START;
+	if (thresh > MAX_THRESH_START) /* clamp down to MAX_THRESH_START */
+		thresh = MAX_THRESH_START;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_STOP, &other_thresh);
+	if (ret != -EOPNOTSUPP && ret != -ENXIO) {
+		if (ret) /* other threshold is set? */
+			goto out;
+		ret = get_real_thresh(bat, THRESH_START, NULL);
+		if (ret) /* this threshold is set? */
+			goto out;
+		if (other_thresh < thresh+MIN_THRESH_DELTA) {
+			/* move other thresh to keep it above this one */
+			ret = set_thresh(bat, THRESH_STOP,
+					 thresh+MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_START, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+
+}
+
+/**
+ * store_battery_stop_charge_thresh - store battery_stop_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_STOP) /* clamp up to MIN_THRESH_STOP */
+		thresh = MIN_THRESH_STOP;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_START, &other_thresh);
+	if (ret != -EOPNOTSUPP && ret != -ENXIO) { /* other threshold exists? */
+		if (ret)
+			goto out;
+		/* this threshold exists? */
+		ret = get_real_thresh(bat, THRESH_STOP, NULL);
+		if (ret)
+			goto out;
+		if (other_thresh >= thresh-MIN_THRESH_DELTA) {
+			 /* move other thresh to be below this one */
+			ret = set_thresh(bat, THRESH_START,
+					 thresh-MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_STOP, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+}
+
+static ssize_t show_battery_inhibit_charge_minutes(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int minutes;
+	int bat = attr_get_bat(attr);
+	int ret = get_inhibit_charge_minutes(bat, &minutes);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", minutes);  /* units: minutes */
+}
+
+static ssize_t store_battery_inhibit_charge_minutes(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int ret;
+	int minutes;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &minutes) != 1 || minutes < 0) {
+		TPRINTK(KERN_ERR, "inhibit_charge_minutes: "
+			      "must be a non-negative integer");
+		return -EINVAL;
+	}
+	ret = set_inhibit_charge_minutes(bat, minutes);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int enabled;
+	int bat = attr_get_bat(attr);
+	int ret = get_force_discharge(bat, &enabled);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", enabled);  /* type: boolean */
+}
+
+static ssize_t store_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	int enabled;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &enabled) != 1 || enabled < 0 || enabled > 1)
+		return -EINVAL;
+	ret = set_force_discharge(bat, enabled);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_installed(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	int ret = power_device_present(bat);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret); /* type: boolean */
+}
+
+static ssize_t show_battery_state(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	const char *txt;
+	int ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return sprintf(buf, "none\n");
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (row[1] & 0xf0) {
+	case 0xc0: txt = "idle"; break;
+	case 0xd0: txt = "discharging"; break;
+	case 0xe0: txt = "charging"; break;
+	default:   return sprintf(buf, "unknown (0x%x)\n", row[1]);
+	}
+	return sprintf(buf, "%s\n", txt);  /* type: string from fixed set */
+}
+
+static ssize_t show_battery_manufacturer(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: ManufacturerName() */
+	return show_tp_ec_bat_str(4, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_model(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: DeviceName() */
+	return show_tp_ec_bat_str(5, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_barcoding(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string */
+	return show_tp_ec_bat_str(7, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_chemistry(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34-35: DeviceChemistry() */
+	return show_tp_ec_bat_str(6, 2, 5, attr, buf);
+}
+
+static ssize_t show_battery_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p24: Voltage() */
+	return show_tp_ec_bat_u16(1, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_design_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p32: DesignVoltage() */
+	return show_tp_ec_bat_u16(3, 4, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p37,39: ChargingVoltage() */
+	return show_tp_ec_bat_u16(9, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group0_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group1_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 10, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group2_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group3_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_current_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p24: Current() */
+	return show_tp_ec_bat_s16(1, 8, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_current_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p24: AverageCurrent() */
+	return show_tp_ec_bat_s16(1, 10, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_current(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p36,38: ChargingCurrent() */
+	return show_tp_ec_bat_s16(9, 6, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_power_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*Current() */
+	return show_tp_ec_bat_power(1, 6, 8, attr, buf);
+}
+
+static ssize_t show_battery_power_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*AverageCurrent() */
+	return show_tp_ec_bat_power(1, 6, 10, attr, buf);
+}
+
+static ssize_t show_battery_remaining_percent(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: percent. SBS spec v1.1 p25: RelativeStateOfCharge() */
+	return show_tp_ec_bat_u16(1, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_remaining_percent_error(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: percent. SBS spec v1.1 p25: MaxError() */
+	return show_tp_ec_bat_u16(9, 4, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_remaining_charging_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: AverageTimeToFull() */
+	return show_tp_ec_bat_u16(2, 8, 1, "not_charging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 6, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 4, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26. */
+	return show_tp_ec_bat_u16(1, 14, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_last_full_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26: FullChargeCapacity() */
+	return show_tp_ec_bat_u16(2, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_design_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p32: DesignCapacity() */
+	return show_tp_ec_bat_u16(3, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_cycle_count(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: ordinal. SBS spec v1.1 p32: CycleCount() */
+	return show_tp_ec_bat_u16(2, 12, 1, "", attr, buf);
+}
+
+static ssize_t show_battery_temperature(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: millicelsius. SBS spec v1.1: Temperature()*10 */
+	return show_tp_ec_bat_s16(1, 4, 100, -273100, attr, buf);
+}
+
+static ssize_t show_battery_serial(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: int. SBS spec v1.1 p34: SerialNumber() */
+	return show_tp_ec_bat_u16(3, 10, 1, "", attr, buf);
+}
+
+static ssize_t show_battery_manufacture_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD. SBS spec v1.1 p34: ManufactureDate() */
+	return show_tp_ec_bat_date(3, 8, attr, buf);
+}
+
+static ssize_t show_battery_first_use_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD */
+	return show_tp_ec_bat_date(8, 2, attr, buf);
+}
+
+/**
+ * show_battery_dump - show the battery's dump attribute
+ * The dump attribute gives a hex dump of all EC readouts related to a
+ * battery. Some of the enumerated values don't really exist (i.e., the
+ * EC function just leaves them untouched); we use a kludge to detect and
+ * denote these.
+ */
+#define MIN_DUMP_ARG0 0x00
+#define MAX_DUMP_ARG0 0x0a /* 0x0b is useful too but hangs old EC firmware */
+static ssize_t show_battery_dump(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int i;
+	char *p = buf;
+	int bat = attr_get_bat(attr);
+	u8 arg0; /* first argument to EC */
+	u8 rowa[TP_CONTROLLER_ROW_LEN],
+	   rowb[TP_CONTROLLER_ROW_LEN];
+	const u8 junka = 0xAA,
+		 junkb = 0x55; /* junk values for testing changes */
+	int ret;
+
+	for (arg0 = MIN_DUMP_ARG0; arg0 <= MAX_DUMP_ARG0; ++arg0) {
+		if ((p-buf) > PAGE_SIZE-TP_CONTROLLER_ROW_LEN*5)
+			return -ENOMEM; /* don't overflow sysfs buf */
+		/* Read raw twice with different junk values,
+		 * to detect unused output bytes which are left unchaged: */
+		ret = read_tp_ec_row(arg0, bat, junka, rowa);
+		if (ret)
+			return ret;
+		ret = read_tp_ec_row(arg0, bat, junkb, rowb);
+		if (ret)
+			return ret;
+		for (i = 0; i < TP_CONTROLLER_ROW_LEN; i++) {
+			if (rowa[i] == junka && rowb[i] == junkb)
+				p += sprintf(p, "-- "); /* unused by EC */
+			else
+				p += sprintf(p, "%02x ", rowa[i]);
+		}
+		p += sprintf(p, "\n");
+	}
+	return p-buf;
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O, other than batteries
+ */
+
+static ssize_t show_ac_connected(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ret = power_device_present(0xFF);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret);  /* type: boolean */
+}
+
+/*********************************************************************
+ * The the "smapi_request" sysfs attribute executes a raw SMAPI call.
+ * You write to make a request and read to get the result. The state
+ * is saved globally rather than per fd (sysfs limitation), so
+ * simultaenous requests may get each other's results! So this is for
+ * development and debugging only.
+ */
+#define MAX_SMAPI_ATTR_ANSWER_LEN   128
+static char smapi_attr_answer[MAX_SMAPI_ATTR_ANSWER_LEN] = "";
+
+static ssize_t show_smapi_request(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int ret = snprintf(buf, PAGE_SIZE, "%s", smapi_attr_answer);
+	smapi_attr_answer[0] = '\0';
+	return ret;
+}
+
+static ssize_t store_smapi_request(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned int inEBX, inECX, inEDI, inESI;
+	u32 outEBX, outECX, outEDX, outEDI, outESI;
+	const char *msg;
+	int ret;
+	if (sscanf(buf, "%x %x %x %x", &inEBX, &inECX, &inEDI, &inESI) != 4) {
+		smapi_attr_answer[0] = '\0';
+		return -EINVAL;
+	}
+	ret = smapi_request(
+		   inEBX, inECX, inEDI, inESI,
+		   &outEBX, &outECX, &outEDX, &outEDI, &outESI, &msg);
+	snprintf(smapi_attr_answer, MAX_SMAPI_ATTR_ANSWER_LEN,
+		 "%x %x %x %x %x %d '%s'\n",
+		 (unsigned int)outEBX, (unsigned int)outECX,
+		 (unsigned int)outEDX, (unsigned int)outEDI,
+		 (unsigned int)outESI, ret, msg);
+	if (ret)
+		return ret;
+	else
+		return count;
+}
+
+/*********************************************************************
+ * Power management: the embedded controller forgets the battery
+ * thresholds when the system is suspended to disk and unplugged from
+ * AC and battery, so we restore it upon resume.
+ */
+
+static int saved_threshs[4] = {-1, -1, -1, -1};  /* -1 = don't know */
+
+static int tp_suspend(struct platform_device *dev, pm_message_t state)
+{
+	int restore = (state.event == PM_EVENT_HIBERNATE ||
+	               state.event == PM_EVENT_FREEZE);
+	if (!restore || get_real_thresh(0, THRESH_STOP , &saved_threshs[0]))
+		saved_threshs[0] = -1;
+	if (!restore || get_real_thresh(0, THRESH_START, &saved_threshs[1]))
+		saved_threshs[1] = -1;
+	if (!restore || get_real_thresh(1, THRESH_STOP , &saved_threshs[2]))
+		saved_threshs[2] = -1;
+	if (!restore || get_real_thresh(1, THRESH_START, &saved_threshs[3]))
+		saved_threshs[3] = -1;
+	DPRINTK("suspend saved: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	return 0;
+}
+
+static int tp_resume(struct platform_device *dev)
+{
+	DPRINTK("resume restoring: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	if (saved_threshs[0] >= 0)
+		set_real_thresh(0, THRESH_STOP , saved_threshs[0]);
+	if (saved_threshs[1] >= 0)
+		set_real_thresh(0, THRESH_START, saved_threshs[1]);
+	if (saved_threshs[2] >= 0)
+		set_real_thresh(1, THRESH_STOP , saved_threshs[2]);
+	if (saved_threshs[3] >= 0)
+		set_real_thresh(1, THRESH_START, saved_threshs[3]);
+	return 0;
+}
+
+
+/*********************************************************************
+ * Driver model
+ */
+
+static struct platform_driver tp_driver = {
+	.suspend = tp_suspend,
+	.resume = tp_resume,
+	.driver = {
+		.name = "smapi",
+		.owner = THIS_MODULE
+	},
+};
+
+
+/*********************************************************************
+ * Sysfs device model
+ */
+
+/* Attributes in /sys/devices/platform/smapi/ */
+
+static DEVICE_ATTR(ac_connected, 0444, show_ac_connected, NULL);
+static DEVICE_ATTR(smapi_request, 0600, show_smapi_request,
+					store_smapi_request);
+
+static struct attribute *tp_root_attributes[] = {
+	&dev_attr_ac_connected.attr,
+	&dev_attr_smapi_request.attr,
+	NULL
+};
+static struct attribute_group tp_root_attribute_group = {
+	.attrs = tp_root_attributes
+};
+
+/* Attributes under /sys/devices/platform/smapi/BAT{0,1}/ :
+ * Every attribute needs to be defined (i.e., statically allocated) for
+ * each battery, and then referenced in the attribute list of each battery.
+ * We use preprocessor voodoo to avoid duplicating the list of attributes 4
+ * times. The preprocessor output is just normal sysfs attributes code.
+ */
+
+/**
+ * FOREACH_BAT_ATTR - invoke the given macros on all our battery attributes
+ * @_BAT:     battery number (0 or 1)
+ * @_ATTR_RW: macro to invoke for each read/write attribute
+ * @_ATTR_R:  macro to invoke for each read-only  attribute
+ */
+#define FOREACH_BAT_ATTR(_BAT, _ATTR_RW, _ATTR_R) \
+	_ATTR_RW(_BAT, start_charge_thresh) \
+	_ATTR_RW(_BAT, stop_charge_thresh) \
+	_ATTR_RW(_BAT, inhibit_charge_minutes) \
+	_ATTR_RW(_BAT, force_discharge) \
+	_ATTR_R(_BAT, installed) \
+	_ATTR_R(_BAT, state) \
+	_ATTR_R(_BAT, manufacturer) \
+	_ATTR_R(_BAT, model) \
+	_ATTR_R(_BAT, barcoding) \
+	_ATTR_R(_BAT, chemistry) \
+	_ATTR_R(_BAT, voltage) \
+	_ATTR_R(_BAT, group0_voltage) \
+	_ATTR_R(_BAT, group1_voltage) \
+	_ATTR_R(_BAT, group2_voltage) \
+	_ATTR_R(_BAT, group3_voltage) \
+	_ATTR_R(_BAT, current_now) \
+	_ATTR_R(_BAT, current_avg) \
+	_ATTR_R(_BAT, charging_max_current) \
+	_ATTR_R(_BAT, power_now) \
+	_ATTR_R(_BAT, power_avg) \
+	_ATTR_R(_BAT, remaining_percent) \
+	_ATTR_R(_BAT, remaining_percent_error) \
+	_ATTR_R(_BAT, remaining_charging_time) \
+	_ATTR_R(_BAT, remaining_running_time) \
+	_ATTR_R(_BAT, remaining_running_time_now) \
+	_ATTR_R(_BAT, remaining_capacity) \
+	_ATTR_R(_BAT, last_full_capacity) \
+	_ATTR_R(_BAT, design_voltage) \
+	_ATTR_R(_BAT, charging_max_voltage) \
+	_ATTR_R(_BAT, design_capacity) \
+	_ATTR_R(_BAT, cycle_count) \
+	_ATTR_R(_BAT, temperature) \
+	_ATTR_R(_BAT, serial) \
+	_ATTR_R(_BAT, manufacture_date) \
+	_ATTR_R(_BAT, first_use_date) \
+	_ATTR_R(_BAT, dump)
+
+/* Define several macros we will feed into FOREACH_BAT_ATTR: */
+
+#define DEFINE_BAT_ATTR_RW(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {  \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME,   \
+						store_battery_##_NAME), \
+		.bat = _BAT \
+	};
+
+#define DEFINE_BAT_ATTR_R(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {    \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME, 0), \
+		.bat = _BAT \
+	};
+
+#define REF_BAT_ATTR(_BAT,_NAME) \
+	&dev_attr_##_NAME##_##_BAT.dev_attr.attr,
+
+/* This provide all attributes for one battery: */
+
+#define PROVIDE_BAT_ATTRS(_BAT) \
+	FOREACH_BAT_ATTR(_BAT, DEFINE_BAT_ATTR_RW, DEFINE_BAT_ATTR_R) \
+	static struct attribute *tp_bat##_BAT##_attributes[] = { \
+		FOREACH_BAT_ATTR(_BAT, REF_BAT_ATTR, REF_BAT_ATTR) \
+		NULL \
+	}; \
+	static struct attribute_group tp_bat##_BAT##_attribute_group = { \
+		.name  = "BAT" #_BAT, \
+		.attrs = tp_bat##_BAT##_attributes \
+	};
+
+/* Finally genereate the attributes: */
+
+PROVIDE_BAT_ATTRS(0)
+PROVIDE_BAT_ATTRS(1)
+
+/* List of attribute groups */
+
+static struct attribute_group *attr_groups[] = {
+	&tp_root_attribute_group,
+	&tp_bat0_attribute_group,
+	&tp_bat1_attribute_group,
+	NULL
+};
+
+
+/*********************************************************************
+ * Init and cleanup
+ */
+
+static struct attribute_group **next_attr_group; /* next to register */
+
+static int __init tp_init(void)
+{
+	int ret;
+	printk(KERN_INFO "tp_smapi " TP_VERSION " loading...\n");
+
+	ret = find_smapi_port();
+	if (ret < 0)
+		goto err;
+	else
+		smapi_port = ret;
+
+	if (!request_region(smapi_port, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       smapi_port);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!request_region(SMAPI_PORT2, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       SMAPI_PORT2);
+		ret = -ENXIO;
+		goto err_port1;
+	}
+
+	ret = platform_driver_register(&tp_driver);
+	if (ret)
+		goto err_port2;
+
+	pdev = platform_device_alloc("smapi", -1);
+	if (!pdev) {
+		ret = -ENOMEM;
+		goto err_driver;
+	}
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err_device_free;
+
+	for (next_attr_group = attr_groups; *next_attr_group;
+	     ++next_attr_group) {
+		ret = sysfs_create_group(&pdev->dev.kobj, *next_attr_group);
+		if (ret)
+			goto err_attr;
+	}
+
+	printk(KERN_INFO "tp_smapi successfully loaded (smapi_port=0x%x).\n",
+	       smapi_port);
+	return 0;
+
+err_attr:
+	while (--next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+err_device_free:
+	platform_device_put(pdev);
+err_driver:
+	platform_driver_unregister(&tp_driver);
+err_port2:
+	release_region(SMAPI_PORT2, 1);
+err_port1:
+	release_region(smapi_port, 1);
+err:
+	printk(KERN_ERR "tp_smapi init failed (ret=%d)!\n", ret);
+	return ret;
+}
+
+static void __exit tp_exit(void)
+{
+	while (next_attr_group && --next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+	platform_driver_unregister(&tp_driver);
+	release_region(SMAPI_PORT2, 1);
+	if (smapi_port)
+		release_region(smapi_port, 1);
+
+	printk(KERN_INFO "tp_smapi unloaded.\n");
+}
+
+module_init(tp_init);
+module_exit(tp_exit);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8a739b74c..58ec7834a 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1614,4 +1614,6 @@ source "drivers/scsi/device_handler/Kconfig"
 
 source "drivers/scsi/osd/Kconfig"
 
+source "drivers/scsi/vhba/Kconfig"
+
 endmenu
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index de1b3fce9..89f9ff693 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -155,6 +155,7 @@ obj-$(CONFIG_SCSI_ENCLOSURE)	+= ses.o
 
 obj-$(CONFIG_SCSI_OSD_INITIATOR) += osd/
 obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/
+obj-$(CONFIG_VHBA)		+= vhba/
 
 # This goes last, so that "real" scsi devices probe earlier
 obj-$(CONFIG_SCSI_DEBUG)	+= scsi_debug.o
diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig
new file mode 100644
index 000000000..7ccb7d8dc
--- /dev/null
+++ b/drivers/scsi/vhba/Kconfig
@@ -0,0 +1,9 @@
+config VHBA
+	tristate "Virtual (SCSI) Host Bus Adapter"
+	depends on SCSI
+	---help---
+        This is the in-kernel part of CDEmu, a CD/DVD-ROM device
+        emulator.
+
+	This driver can also be built as a module. If so, the module
+	will be called vhba.
diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile
new file mode 100644
index 000000000..a2a3f9d9c
--- /dev/null
+++ b/drivers/scsi/vhba/Makefile
@@ -0,0 +1,4 @@
+VHBA_VERSION := 20170610
+
+obj-$(CONFIG_VHBA)		+= vhba.o
+ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror
diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c
new file mode 100644
index 000000000..ff30e4cb5
--- /dev/null
+++ b/drivers/scsi/vhba/vhba.c
@@ -0,0 +1,1076 @@
+/*
+ * vhba.c
+ *
+ * Copyright (C) 2007-2012 Chia-I Wu <b90201047 AT ntu DOT edu DOT tw>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/version.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#include <linux/sched/signal.h>
+#else
+#include <linux/sched.h>
+#endif
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <asm/uaccess.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+
+/* scatterlist.page_link and sg_page() were introduced in 2.6.24 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
+#define USE_SG_PAGE
+#include <linux/scatterlist.h>
+#endif
+
+MODULE_AUTHOR("Chia-I Wu");
+MODULE_VERSION(VHBA_VERSION);
+MODULE_DESCRIPTION("Virtual SCSI HBA");
+MODULE_LICENSE("GPL");
+
+#ifdef DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__, ## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+/* scmd_dbg was introduced in 3.15 */
+#ifndef scmd_dbg
+#define scmd_dbg(scmd, fmt, a...)       \
+    dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a)
+#endif
+
+#ifndef scmd_warn
+#define scmd_warn(scmd, fmt, a...)      \
+    dev_warn(&(scmd)->device->sdev_gendev, fmt, ##a)
+#endif
+
+#define VHBA_MAX_SECTORS_PER_IO 256
+#define VHBA_MAX_ID 32
+#define VHBA_CAN_QUEUE 32
+#define VHBA_INVALID_ID VHBA_MAX_ID
+
+#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL)
+
+
+/* SCSI macros were introduced in 2.6.23 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23)
+#define scsi_sg_count(cmd) ((cmd)->use_sg)
+#define scsi_sglist(cmd) ((cmd)->request_buffer)
+#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
+#define scsi_set_resid(cmd, to_read) {(cmd)->resid = (to_read);}
+#endif
+
+/* 1-argument form of k[un]map_atomic was introduced in 2.6.37-rc1;
+   2-argument form was deprecated in 3.4-rc1 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
+#define vhba_kmap_atomic kmap_atomic
+#define vhba_kunmap_atomic kunmap_atomic
+#else
+#define vhba_kmap_atomic(page) kmap_atomic(page, KM_USER0)
+#define vhba_kunmap_atomic(page) kunmap_atomic(page, KM_USER0)
+#endif
+
+
+enum vhba_req_state {
+    VHBA_REQ_FREE,
+    VHBA_REQ_PENDING,
+    VHBA_REQ_READING,
+    VHBA_REQ_SENT,
+    VHBA_REQ_WRITING,
+};
+
+struct vhba_command {
+    struct scsi_cmnd *cmd;
+    int status;
+    struct list_head entry;
+};
+
+struct vhba_device {
+    uint id;
+    spinlock_t cmd_lock;
+    struct list_head cmd_list;
+    wait_queue_head_t cmd_wq;
+    atomic_t refcnt;
+};
+
+struct vhba_host {
+    struct Scsi_Host *shost;
+    spinlock_t cmd_lock;
+    int cmd_next;
+    struct vhba_command commands[VHBA_CAN_QUEUE];
+    spinlock_t dev_lock;
+    struct vhba_device *devices[VHBA_MAX_ID];
+    int num_devices;
+    DECLARE_BITMAP(chgmap, VHBA_MAX_ID);
+    int chgtype[VHBA_MAX_ID];
+    struct work_struct scan_devices;
+};
+
+#define MAX_COMMAND_SIZE 16
+
+struct vhba_request {
+    __u32 tag;
+    __u32 lun;
+    __u8 cdb[MAX_COMMAND_SIZE];
+    __u8 cdb_len;
+    __u32 data_len;
+};
+
+struct vhba_response {
+    __u32 tag;
+    __u32 status;
+    __u32 data_len;
+};
+
+static struct vhba_command *vhba_alloc_command (void);
+static void vhba_free_command (struct vhba_command *vcmd);
+
+static struct platform_device vhba_platform_device;
+
+static struct vhba_device *vhba_device_alloc (void)
+{
+    struct vhba_device *vdev;
+
+    vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL);
+    if (!vdev) {
+        return NULL;
+    }
+
+    vdev->id = VHBA_INVALID_ID;
+    spin_lock_init(&vdev->cmd_lock);
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    init_waitqueue_head(&vdev->cmd_wq);
+    atomic_set(&vdev->refcnt, 1);
+
+    return vdev;
+}
+
+static void vhba_device_put (struct vhba_device *vdev)
+{
+    if (atomic_dec_and_test(&vdev->refcnt)) {
+        kfree(vdev);
+    }
+}
+
+static struct vhba_device *vhba_device_get (struct vhba_device *vdev)
+{
+    atomic_inc(&vdev->refcnt);
+
+    return vdev;
+}
+
+static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    vcmd = vhba_alloc_command();
+    if (!vcmd) {
+        return SCSI_MLQUEUE_HOST_BUSY;
+    }
+
+    vcmd->cmd = cmd;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_add_tail(&vcmd->entry, &vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    wake_up_interruptible(&vdev->cmd_wq);
+
+    return 0;
+}
+
+static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd)
+{
+    struct vhba_command *vcmd;
+    int retval;
+    unsigned long flags;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->cmd == cmd) {
+            list_del_init(&vcmd->entry);
+            break;
+        }
+    }
+
+    /* command not found */
+    if (&vcmd->entry == &vdev->cmd_list) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        return SUCCESS;
+    }
+
+    while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        scmd_dbg(cmd, "wait for I/O before aborting\n");
+        schedule_timeout(1);
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS;
+
+    vhba_free_command(vcmd);
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return retval;
+}
+
+static inline void vhba_scan_devices_add (struct vhba_host *vhost, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, 0, id, 0);
+    if (!sdev) {
+        scsi_add_device(vhost->shost, 0, id, 0);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device 0:%d:0!\n", id);
+        scsi_device_put(sdev);
+    }
+}
+
+static inline void vhba_scan_devices_remove (struct vhba_host *vhost, int id)
+{
+    struct scsi_device *sdev;
+
+    sdev = scsi_device_lookup(vhost->shost, 0, id, 0);
+    if (sdev) {
+        scsi_remove_device(sdev);
+        scsi_device_put(sdev);
+    } else {
+        dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device 0:%d:0!\n", id);
+    }
+}
+
+static void vhba_scan_devices (struct work_struct *work)
+{
+    struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices);
+    unsigned long flags;
+    int id, change, exists;
+
+    while (1) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+
+        id = find_first_bit(vhost->chgmap, vhost->shost->max_id);
+        if (id >= vhost->shost->max_id) {
+            spin_unlock_irqrestore(&vhost->dev_lock, flags);
+            break;
+        }
+        change = vhost->chgtype[id];
+        exists = vhost->devices[id] != NULL;
+
+        vhost->chgtype[id] = 0;
+        clear_bit(id, vhost->chgmap);
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+        if (change < 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to remove target 0:%d:0\n", id);
+            vhba_scan_devices_remove(vhost, id);
+        } else if (change > 0) {
+            dev_dbg(&vhost->shost->shost_gendev, "trying to add target 0:%d:0\n", id);
+            vhba_scan_devices_add(vhost, id);
+        } else {
+            /* quick sequence of add/remove or remove/add; we determine
+               which one it was by checking if device structure exists */
+            if (exists) {
+                /* remove followed by add: remove and (re)add */
+                dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target 0:%d:0\n", id);
+                vhba_scan_devices_remove(vhost, id);
+                vhba_scan_devices_add(vhost, id);
+            } else {
+                /* add followed by remove: no-op */
+                dev_dbg(&vhost->shost->shost_gendev, "no-op for target 0:%d:0\n", id);
+            }
+        }
+    }
+}
+
+static int vhba_add_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    int i;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    vhba_device_get(vdev);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    if (vhost->num_devices >= vhost->shost->max_id) {
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+        vhba_device_put(vdev);
+        return -EBUSY;
+    }
+
+    for (i = 0; i < vhost->shost->max_id; i++) {
+        if (vhost->devices[i] == NULL) {
+            vdev->id = i;
+            vhost->devices[i] = vdev;
+            vhost->num_devices++;
+            set_bit(vdev->id, vhost->chgmap);
+            vhost->chgtype[vdev->id]++;
+            break;
+        }
+    }
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static int vhba_remove_device (struct vhba_device *vdev)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->dev_lock, flags);
+    set_bit(vdev->id, vhost->chgmap);
+    vhost->chgtype[vdev->id]--;
+    vhost->devices[vdev->id] = NULL;
+    vhost->num_devices--;
+    vdev->id = VHBA_INVALID_ID;
+    spin_unlock_irqrestore(&vhost->dev_lock, flags);
+
+    vhba_device_put(vdev);
+
+    schedule_work(&vhost->scan_devices);
+
+    return 0;
+}
+
+static struct vhba_device *vhba_lookup_device (int id)
+{
+    struct vhba_host *vhost;
+    struct vhba_device *vdev = NULL;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    if (likely(id < vhost->shost->max_id)) {
+        spin_lock_irqsave(&vhost->dev_lock, flags);
+        vdev = vhost->devices[id];
+        if (vdev) {
+            vdev = vhba_device_get(vdev);
+        }
+
+        spin_unlock_irqrestore(&vhost->dev_lock, flags);
+    }
+
+    return vdev;
+}
+
+static struct vhba_command *vhba_alloc_command (void)
+{
+    struct vhba_host *vhost;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+    int i;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+
+    vcmd = vhost->commands + vhost->cmd_next++;
+    if (vcmd->status != VHBA_REQ_FREE) {
+        for (i = 0; i < vhost->shost->can_queue; i++) {
+            vcmd = vhost->commands + i;
+
+            if (vcmd->status == VHBA_REQ_FREE) {
+                vhost->cmd_next = i + 1;
+                break;
+            }
+        }
+
+        if (i == vhost->shost->can_queue) {
+            vcmd = NULL;
+        }
+    }
+
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    vhost->cmd_next %= vhost->shost->can_queue;
+
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+
+    return vcmd;
+}
+
+static void vhba_free_command (struct vhba_command *vcmd)
+{
+    struct vhba_host *vhost;
+    unsigned long flags;
+
+    vhost = platform_get_drvdata(&vhba_platform_device);
+
+    spin_lock_irqsave(&vhost->cmd_lock, flags);
+    vcmd->status = VHBA_REQ_FREE;
+    spin_unlock_irqrestore(&vhost->cmd_lock, flags);
+}
+
+static int vhba_queuecommand_lck (struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+{
+    struct vhba_device *vdev;
+    int retval;
+
+    scmd_dbg(cmd, "queue %lu\n", cmd->serial_number);
+
+    vdev = vhba_lookup_device(cmd->device->id);
+    if (!vdev) {
+        scmd_dbg(cmd, "no such device\n");
+
+        cmd->result = DID_NO_CONNECT << 16;
+        done(cmd);
+
+        return 0;
+    }
+
+    cmd->scsi_done = done;
+    retval = vhba_device_queue(vdev, cmd);
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+#ifdef DEF_SCSI_QCMD
+DEF_SCSI_QCMD(vhba_queuecommand)
+#else
+#define vhba_queuecommand vhba_queuecommand_lck
+#endif
+
+static int vhba_abort (struct scsi_cmnd *cmd)
+{
+    struct vhba_device *vdev;
+    int retval = SUCCESS;
+
+    scmd_warn(cmd, "abort %lu\n", cmd->serial_number);
+
+    vdev = vhba_lookup_device(cmd->device->id);
+    if (vdev) {
+        retval = vhba_device_dequeue(vdev, cmd);
+        vhba_device_put(vdev);
+    } else {
+        cmd->result = DID_NO_CONNECT << 16;
+    }
+
+    return retval;
+}
+
+static struct scsi_host_template vhba_template = {
+    .module = THIS_MODULE,
+    .name = "vhba",
+    .proc_name = "vhba",
+    .queuecommand = vhba_queuecommand,
+    .eh_abort_handler = vhba_abort,
+    .can_queue = VHBA_CAN_QUEUE,
+    .this_id = -1,
+    .cmd_per_lun = 1,
+    .max_sectors = VHBA_MAX_SECTORS_PER_IO,
+    .sg_tablesize = 256,
+};
+
+static ssize_t do_request (struct scsi_cmnd *cmd, char __user *buf, size_t buf_len)
+{
+    struct vhba_request vreq;
+    ssize_t ret;
+
+    scmd_dbg(cmd, "request %lu, cdb 0x%x, bufflen %d, use_sg %d\n",
+        cmd->serial_number, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd));
+
+    ret = sizeof(vreq);
+    if (DATA_TO_DEVICE(cmd->sc_data_direction)) {
+        ret += scsi_bufflen(cmd);
+    }
+
+    if (ret > buf_len) {
+        scmd_warn(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret);
+        return -EIO;
+    }
+
+    vreq.tag = cmd->serial_number;
+    vreq.lun = cmd->device->lun;
+    memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE);
+    vreq.cdb_len = cmd->cmd_len;
+    vreq.data_len = scsi_bufflen(cmd);
+
+    if (copy_to_user(buf, &vreq, sizeof(vreq))) {
+        return -EFAULT;
+    }
+
+    if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) {
+        buf += sizeof(vreq);
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char buf_stack[64];
+            unsigned char *kaddr, *uaddr, *kbuf;
+            struct scatterlist *sg = scsi_sglist(cmd);
+            int i;
+
+            uaddr = (unsigned char *) buf;
+
+            if (vreq.data_len > 64) {
+                kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+            } else {
+                kbuf = buf_stack;
+            }
+
+            for (i = 0; i < scsi_sg_count(cmd); i++) {
+                size_t len = sg[i].length;
+
+#ifdef USE_SG_PAGE
+                kaddr = vhba_kmap_atomic(sg_page(&sg[i]));
+#else
+                kaddr = vhba_kmap_atomic(sg[i].page);
+#endif
+                memcpy(kbuf, kaddr + sg[i].offset, len);
+                vhba_kunmap_atomic(kaddr);
+
+                if (copy_to_user(uaddr, kbuf, len)) {
+                    if (kbuf != buf_stack) {
+                        kfree(kbuf);
+                    }
+                    return -EFAULT;
+                }
+                uaddr += len;
+            }
+
+            if (kbuf != buf_stack) {
+                kfree(kbuf);
+            }
+        } else {
+            if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) {
+                return -EFAULT;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static ssize_t do_response (struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res)
+{
+    ssize_t ret = 0;
+
+    scmd_dbg(cmd, "response %lu, status %x, data len %d, use_sg %d\n",
+         cmd->serial_number, res->status, res->data_len, scsi_sg_count(cmd));
+
+    if (res->status) {
+        unsigned char sense_stack[SCSI_SENSE_BUFFERSIZE];
+
+        if (res->data_len > SCSI_SENSE_BUFFERSIZE) {
+            scmd_warn(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len);
+            res->data_len = SCSI_SENSE_BUFFERSIZE;
+        }
+
+        /* Copy via temporary buffer on stack in order to avoid problems
+           with PAX on grsecurity-enabled kernels */
+        if (copy_from_user(sense_stack, buf, res->data_len)) {
+            return -EFAULT;
+        }
+        memcpy(cmd->sense_buffer, sense_stack, res->data_len);
+
+        cmd->result = res->status;
+
+        ret += res->data_len;
+    } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) {
+        size_t to_read;
+
+        if (res->data_len > scsi_bufflen(cmd)) {
+            scmd_warn(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len);
+            res->data_len = scsi_bufflen(cmd);
+        }
+
+        to_read = res->data_len;
+
+        if (scsi_sg_count(cmd)) {
+            unsigned char buf_stack[64];
+            unsigned char *kaddr, *uaddr, *kbuf;
+            struct scatterlist *sg = scsi_sglist(cmd);
+            int i;
+
+            uaddr = (unsigned char *)buf;
+
+            if (res->data_len > 64) {
+                kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+            } else {
+                kbuf = buf_stack;
+            }
+
+            for (i = 0; i < scsi_sg_count(cmd); i++) {
+                size_t len = (sg[i].length < to_read) ? sg[i].length : to_read;
+
+                if (copy_from_user(kbuf, uaddr, len)) {
+                    if (kbuf != buf_stack) {
+                        kfree(kbuf);
+                    }
+                    return -EFAULT;
+                }
+                uaddr += len;
+
+#ifdef USE_SG_PAGE
+                kaddr = vhba_kmap_atomic(sg_page(&sg[i]));
+#else
+                kaddr = vhba_kmap_atomic(sg[i].page);
+#endif
+                memcpy(kaddr + sg[i].offset, kbuf, len);
+                vhba_kunmap_atomic(kaddr);
+
+                to_read -= len;
+                if (to_read == 0) {
+                    break;
+                }
+            }
+
+            if (kbuf != buf_stack) {
+                kfree(kbuf);
+            }
+        } else {
+            if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) {
+                return -EFAULT;
+            }
+
+            to_read -= res->data_len;
+        }
+
+        scsi_set_resid(cmd, to_read);
+
+        ret += res->data_len - to_read;
+    }
+
+    return ret;
+}
+
+static inline struct vhba_command *next_command (struct vhba_device *vdev)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->status == VHBA_REQ_PENDING) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static inline struct vhba_command *match_command (struct vhba_device *vdev, u32 tag)
+{
+    struct vhba_command *vcmd;
+
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        if (vcmd->cmd->serial_number == tag) {
+            break;
+        }
+    }
+
+    if (&vcmd->entry == &vdev->cmd_list) {
+        vcmd = NULL;
+    }
+
+    return vcmd;
+}
+
+static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags)
+{
+    struct vhba_command *vcmd;
+    DEFINE_WAIT(wait);
+
+    while (!(vcmd = next_command(vdev))) {
+        if (signal_pending(current)) {
+            break;
+        }
+
+        prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE);
+
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        schedule();
+
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+    }
+
+    finish_wait(&vdev->cmd_wq, &wait);
+    if (vcmd) {
+        vcmd->status = VHBA_REQ_READING;
+    }
+
+    return vcmd;
+}
+
+static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    ssize_t ret;
+    unsigned long flags;
+
+    vdev = file->private_data;
+
+    /* Get next command */
+    if (file->f_flags & O_NONBLOCK) {
+        /* Non-blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = next_command(vdev);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -EWOULDBLOCK;
+        }
+    } else {
+        /* Blocking variant */
+        spin_lock_irqsave(&vdev->cmd_lock, flags);
+        vcmd = wait_command(vdev, flags);
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+        if (!vcmd) {
+            return -ERESTARTSYS;
+        }
+    }
+
+    ret = do_request(vcmd->cmd, buf, buf_len);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+        vcmd->status = VHBA_REQ_SENT;
+        *offset += ret;
+    } else {
+        vcmd->status = VHBA_REQ_PENDING;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    struct vhba_response res;
+    ssize_t ret;
+    unsigned long flags;
+
+    if (buf_len < sizeof(res)) {
+        return -EIO;
+    }
+
+    if (copy_from_user(&res, buf, sizeof(res))) {
+        return -EFAULT;
+    }
+
+    vdev = file->private_data;
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    vcmd = match_command(vdev, res.tag);
+    if (!vcmd || vcmd->status != VHBA_REQ_SENT) {
+        spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+        DPRINTK("not expecting response\n");
+        return -EIO;
+    }
+    vcmd->status = VHBA_REQ_WRITING;
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    ret = do_response(vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (ret >= 0) {
+        vcmd->cmd->scsi_done(vcmd->cmd);
+        ret += sizeof(res);
+
+        /* don't compete with vhba_device_dequeue */
+        if (!list_empty(&vcmd->entry)) {
+            list_del_init(&vcmd->entry);
+            vhba_free_command(vcmd);
+        }
+    } else {
+        vcmd->status = VHBA_REQ_SENT;
+    }
+
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return ret;
+}
+
+static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    struct vhba_device *vdev = file->private_data;
+    struct vhba_host *vhost;
+    struct scsi_device *sdev;
+
+    switch (cmd) {
+        case 0xBEEF001: {
+            vhost = platform_get_drvdata(&vhba_platform_device);
+            sdev = scsi_device_lookup(vhost->shost, 0, vdev->id, 0);
+
+            if (sdev) {
+                int id[4] = {
+                    sdev->host->host_no,
+                    sdev->channel,
+                    sdev->id,
+                    sdev->lun
+                };
+
+                scsi_device_put(sdev);
+
+                if (copy_to_user((void *)arg, id, sizeof(id))) {
+                    return -EFAULT;
+                }
+
+                return 0;
+            } else {
+                return -ENODEV;
+            }
+        }
+    }
+
+    return -ENOTTY;
+}
+
+#ifdef CONFIG_COMPAT
+static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg)
+{
+    unsigned long compat_arg = (unsigned long)compat_ptr(arg);
+    return vhba_ctl_ioctl(file, cmd, compat_arg);
+}
+#endif
+
+static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait)
+{
+    struct vhba_device *vdev = file->private_data;
+    unsigned int mask = 0;
+    unsigned long flags;
+
+    poll_wait(file, &vdev->cmd_wq, wait);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    if (next_command(vdev)) {
+        mask |= POLLIN | POLLRDNORM;
+    }
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    return mask;
+}
+
+static int vhba_ctl_open (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    int retval;
+
+    DPRINTK("open\n");
+
+    /* check if vhba is probed */
+    if (!platform_get_drvdata(&vhba_platform_device)) {
+        return -ENODEV;
+    }
+
+    vdev = vhba_device_alloc();
+    if (!vdev) {
+        return -ENOMEM;
+    }
+
+    if (!(retval = vhba_add_device(vdev))) {
+        file->private_data = vdev;
+    }
+
+    vhba_device_put(vdev);
+
+    return retval;
+}
+
+static int vhba_ctl_release (struct inode *inode, struct file *file)
+{
+    struct vhba_device *vdev;
+    struct vhba_command *vcmd;
+    unsigned long flags;
+
+    DPRINTK("release\n");
+
+    vdev = file->private_data;
+
+    vhba_device_get(vdev);
+    vhba_remove_device(vdev);
+
+    spin_lock_irqsave(&vdev->cmd_lock, flags);
+    list_for_each_entry(vcmd, &vdev->cmd_list, entry) {
+        WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING);
+
+        scmd_warn(vcmd->cmd, "device released with command %lu\n", vcmd->cmd->serial_number);
+        vcmd->cmd->result = DID_NO_CONNECT << 16;
+        vcmd->cmd->scsi_done(vcmd->cmd);
+
+        vhba_free_command(vcmd);
+    }
+    INIT_LIST_HEAD(&vdev->cmd_list);
+    spin_unlock_irqrestore(&vdev->cmd_lock, flags);
+
+    vhba_device_put(vdev);
+
+    return 0;
+}
+
+static struct file_operations vhba_ctl_fops = {
+    .owner = THIS_MODULE,
+    .open = vhba_ctl_open,
+    .release = vhba_ctl_release,
+    .read = vhba_ctl_read,
+    .write = vhba_ctl_write,
+    .poll = vhba_ctl_poll,
+    .unlocked_ioctl = vhba_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+    .compat_ioctl = vhba_ctl_compat_ioctl,
+#endif
+};
+
+static struct miscdevice vhba_miscdev = {
+    .minor = MISC_DYNAMIC_MINOR,
+    .name = "vhba_ctl",
+    .fops = &vhba_ctl_fops,
+};
+
+static int vhba_probe (struct platform_device *pdev)
+{
+    struct Scsi_Host *shost;
+    struct vhba_host *vhost;
+    int i;
+
+    shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host));
+    if (!shost) {
+        return -ENOMEM;
+    }
+
+    shost->max_id = VHBA_MAX_ID;
+    /* we don't support lun > 0 */
+    shost->max_lun = 1;
+    shost->max_cmd_len = MAX_COMMAND_SIZE;
+
+    vhost = (struct vhba_host *)shost->hostdata;
+    memset(vhost, 0, sizeof(*vhost));
+
+    vhost->shost = shost;
+    vhost->num_devices = 0;
+    spin_lock_init(&vhost->dev_lock);
+    spin_lock_init(&vhost->cmd_lock);
+    INIT_WORK(&vhost->scan_devices, vhba_scan_devices);
+    vhost->cmd_next = 0;
+    for (i = 0; i < vhost->shost->can_queue; i++) {
+        vhost->commands[i].status = VHBA_REQ_FREE;
+    }
+
+    platform_set_drvdata(pdev, vhost);
+
+    if (scsi_add_host(shost, &pdev->dev)) {
+        scsi_host_put(shost);
+        return -ENOMEM;
+    }
+
+    return 0;
+}
+
+static int vhba_remove (struct platform_device *pdev)
+{
+    struct vhba_host *vhost;
+    struct Scsi_Host *shost;
+
+    vhost = platform_get_drvdata(pdev);
+    shost = vhost->shost;
+
+    scsi_remove_host(shost);
+    scsi_host_put(shost);
+
+    return 0;
+}
+
+static void vhba_release (struct device * dev)
+{
+    return;
+}
+
+static struct platform_device vhba_platform_device = {
+    .name = "vhba",
+    .id = -1,
+    .dev = {
+        .release = vhba_release,
+    },
+};
+
+static struct platform_driver vhba_platform_driver = {
+    .driver = {
+        .owner = THIS_MODULE,
+        .name = "vhba",
+    },
+    .probe = vhba_probe,
+    .remove = vhba_remove,
+};
+
+static int __init vhba_init (void)
+{
+    int ret;
+
+    ret = platform_device_register(&vhba_platform_device);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = platform_driver_register(&vhba_platform_driver);
+    if (ret < 0) {
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    ret = misc_register(&vhba_miscdev);
+    if (ret < 0) {
+        platform_driver_unregister(&vhba_platform_driver);
+        platform_device_unregister(&vhba_platform_device);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void __exit vhba_exit(void)
+{
+    misc_deregister(&vhba_miscdev);
+    platform_driver_unregister(&vhba_platform_driver);
+    platform_device_unregister(&vhba_platform_device);
+}
+
+module_init(vhba_init);
+module_exit(vhba_exit);
+
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b811442c5..61d8583f7 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -75,6 +75,19 @@ config VT_CONSOLE_SLEEP
 	def_bool y
 	depends on VT_CONSOLE && PM_SLEEP
 
+config NR_TTY_DEVICES
+        int "Maximum tty device number"
+        depends on VT
+        range 12 63
+        default 63
+        ---help---
+          This option is used to change the number of tty devices in /dev.
+          The default value is 63. The lowest number you can set is 12,
+          63 is also the upper limit so we don't overrun the serial
+          consoles.
+
+          If unsure, say 63.
+
 config HW_CONSOLE
 	bool
 	depends on VT && !UML
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 72ebbc908..32cd52ca8 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 
 	slot_id = 0;
 	for (i = 0; i < MAX_HC_SLOTS; i++) {
-		if (!xhci->devs[i])
+		if (!xhci->devs[i] || !xhci->devs[i]->udev)
 			continue;
 		speed = xhci->devs[i]->udev->speed;
 		if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3))
diff --git a/drivers/video/logo/Kconfig b/drivers/video/logo/Kconfig
index 0037104d6..00944ea70 100644
--- a/drivers/video/logo/Kconfig
+++ b/drivers/video/logo/Kconfig
@@ -15,71 +15,138 @@ config FB_LOGO_EXTRA
 	depends on FB=y
 	default y if SPU_BASE
 
+config LOGO_RANDOM
+	bool "Select random available logo"
+	default y
+	help
+	  Enable this option to use any available logo randomly at bootup.
+
+comment "Available logos"
+
+config LOGO_PCK_CLUT224
+	bool "224-color Zen Kernel/Meditating Tux logo"
+	default y
+
 config LOGO_LINUX_MONO
 	bool "Standard black and white Linux logo"
-	default y
+	default n
 
 config LOGO_LINUX_VGA16
 	bool "Standard 16-color Linux logo"
-	default y
+	default n
 
 config LOGO_LINUX_CLUT224
 	bool "Standard 224-color Linux logo"
-	default y
+	default n
 
 config LOGO_BLACKFIN_VGA16
 	bool "16-colour Blackfin Processor Linux logo"
 	depends on BLACKFIN
-	default y
+	default n
 
 config LOGO_BLACKFIN_CLUT224
 	bool "224-colour Blackfin Processor Linux logo"
 	depends on BLACKFIN
-	default y
+	default n
+
+config LOGO_OLDPCK_CLUT224
+	bool "224-color Old Zen Kernel logo"
+	depends on LOGO
+	default n
+
+config LOGO_ARCH_CLUT224
+	bool "224-color Arch Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_GENTOO_CLUT224
+	bool "224-color Gentoo Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_EXHERBO_CLUT224
+	bool "224-color Exherbo Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_SLACKWARE_CLUT224
+	bool "224-color Slackware Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_DEBIAN_CLUT224
+	bool "224-color Debian Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_FEDORASIMPLE_CLUT224
+	bool "224-color Fedora Simple Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_FEDORAGLOSSY_CLUT224
+	bool "224-color Fedora Glossy Linux logo"
+	depends on LOGO
+	default n
+
+config LOGO_TITS_CLUT224
+	bool "224-color Tits logo"
+	depends on LOGO
+	default n
+
+config LOGO_BSD_CLUT224
+	bool "224-color BSD Devil logo"
+	depends on LOGO
+	default n
+
+config LOGO_FBSD_CLUT224
+	bool "224-color FreeBSD logo"
+	depends on LOGO
+	default n
 
 config LOGO_DEC_CLUT224
 	bool "224-color Digital Equipment Corporation Linux logo"
 	depends on MACH_DECSTATION || ALPHA
-	default y
+	default n
 
 config LOGO_MAC_CLUT224
 	bool "224-color Macintosh Linux logo"
 	depends on MAC
-	default y
+	default n
 
 config LOGO_PARISC_CLUT224
 	bool "224-color PA-RISC Linux logo"
 	depends on PARISC
-	default y
+	default n
 
 config LOGO_SGI_CLUT224
 	bool "224-color SGI Linux logo"
 	depends on SGI_IP22 || SGI_IP27 || SGI_IP32
-	default y
+	default n
 
 config LOGO_SUN_CLUT224
 	bool "224-color Sun Linux logo"
 	depends on SPARC
-	default y
+	default n
 
 config LOGO_SUPERH_MONO
 	bool "Black and white SuperH Linux logo"
 	depends on SUPERH
-	default y
+	default n
 
 config LOGO_SUPERH_VGA16
 	bool "16-color SuperH Linux logo"
 	depends on SUPERH
-	default y
+	default n
 
 config LOGO_SUPERH_CLUT224
 	bool "224-color SuperH Linux logo"
 	depends on SUPERH
-	default y
+	default n
 
 config LOGO_M32R_CLUT224
 	bool "224-color M32R Linux logo"
 	depends on M32R
-	default y
+	default n
 
 endif # LOGO
diff --git a/drivers/video/logo/Makefile b/drivers/video/logo/Makefile
index 6194373ee..501b363bb 100644
--- a/drivers/video/logo/Makefile
+++ b/drivers/video/logo/Makefile
@@ -7,6 +7,18 @@ obj-$(CONFIG_LOGO_LINUX_VGA16)		+= logo_linux_vga16.o
 obj-$(CONFIG_LOGO_LINUX_CLUT224)	+= logo_linux_clut224.o
 obj-$(CONFIG_LOGO_BLACKFIN_CLUT224)	+= logo_blackfin_clut224.o
 obj-$(CONFIG_LOGO_BLACKFIN_VGA16)	+= logo_blackfin_vga16.o
+obj-$(CONFIG_LOGO_PCK_CLUT224)		+= logo_zen_clut224.o
+obj-$(CONFIG_LOGO_OLDPCK_CLUT224)	+= logo_oldzen_clut224.o
+obj-$(CONFIG_LOGO_ARCH_CLUT224)		+= logo_arch_clut224.o
+obj-$(CONFIG_LOGO_GENTOO_CLUT224)	+= logo_gentoo_clut224.o
+obj-$(CONFIG_LOGO_EXHERBO_CLUT224)	+= logo_exherbo_clut224.o
+obj-$(CONFIG_LOGO_SLACKWARE_CLUT224)	+= logo_slackware_clut224.o
+obj-$(CONFIG_LOGO_DEBIAN_CLUT224)       += logo_debian_clut224.o
+obj-$(CONFIG_LOGO_FEDORASIMPLE_CLUT224) += logo_fedorasimple_clut224.o
+obj-$(CONFIG_LOGO_FEDORAGLOSSY_CLUT224) += logo_fedoraglossy_clut224.o
+obj-$(CONFIG_LOGO_TITS_CLUT224)		+= logo_tits_clut224.o
+obj-$(CONFIG_LOGO_BSD_CLUT224)		+= logo_bsd_clut224.o
+obj-$(CONFIG_LOGO_FBSD_CLUT224)		+= logo_fbsd_clut224.o
 obj-$(CONFIG_LOGO_DEC_CLUT224)		+= logo_dec_clut224.o
 obj-$(CONFIG_LOGO_MAC_CLUT224)		+= logo_mac_clut224.o
 obj-$(CONFIG_LOGO_PARISC_CLUT224)	+= logo_parisc_clut224.o
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index 4d50bfd13..d3a133518 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -1,26 +1,127 @@
 
 /*
- *  Linux logo to be displayed on boot
- *
- *  Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu)
- *  Copyright (C) 1996,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- *  Copyright (C) 2001 Greg Banks <gnb@alphalink.com.au>
- *  Copyright (C) 2001 Jan-Benedict Glaw <jbglaw@lug-owl.de>
- *  Copyright (C) 2003 Geert Uytterhoeven <geert@linux-m68k.org>
- */
+*  Linux logo to be displayed on boot
+*
+*  Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu)
+*  Copyright (C) 1996,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+*  Copyright (C) 2001 Greg Banks <gnb@alphalink.com.au>
+*  Copyright (C) 2001 Jan-Benedict Glaw <jbglaw@lug-owl.de>
+*  Copyright (C) 2003 Geert Uytterhoeven <geert@linux-m68k.org>
+*/
 
 #include <linux/linux_logo.h>
 #include <linux/stddef.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_LOGO_RANDOM
+#include <linux/random.h>
+#endif
+
 #ifdef CONFIG_M68K
 #include <asm/setup.h>
 #endif
 
+
 static bool nologo;
 module_param(nologo, bool, 0);
 MODULE_PARM_DESC(nologo, "Disables startup logo");
 
+/* Monochromatic logos */
+static const struct linux_logo *logo_mono[] = {
+#ifdef CONFIG_LOGO_LINUX_MONO
+      &logo_linux_mono,		/* Generic Linux logo */
+#endif
+#ifdef CONFIG_LOGO_SUPERH_MONO
+      &logo_superh_mono,		/* SuperH Linux logo */
+#endif
+};
+
+/* 16-colour logos */
+static const struct linux_logo *logo_vga16[] = {
+#ifdef CONFIG_LOGO_LINUX_VGA16
+      &logo_linux_vga16,		/* Generic Linux logo */
+#endif
+#ifdef CONFIG_LOGO_BLACKFIN_VGA16
+      &logo_blackfin_vga16,		/* Blackfin processor logo */
+#endif
+#ifdef CONFIG_LOGO_SUPERH_VGA16
+      &logo_superh_vga16,		/* SuperH Linux logo */
+#endif
+};
+
+/* 224-colour logos */
+static const struct linux_logo *logo_clut224[] = {
+#ifdef CONFIG_LOGO_LINUX_CLUT224
+      &logo_linux_clut224,		/* Generic Linux logo */
+#endif
+#ifdef CONFIG_LOGO_BLACKFIN_CLUT224
+      &logo_blackfin_clut224,		/* Blackfin Linux logo */
+#endif
+#ifdef CONFIG_LOGO_DEC_CLUT224
+      &logo_dec_clut224,		/* DEC Linux logo on MIPS/MIPS64 or ALPHA */
+#endif
+#ifdef CONFIG_LOGO_MAC_CLUT224
+      &logo_mac_clut224,		/* Macintosh Linux logo on m68k */
+#endif
+#ifdef CONFIG_LOGO_PARISC_CLUT224
+      &logo_parisc_clut224,		/* PA-RISC Linux logo */
+#endif
+#ifdef CONFIG_LOGO_SGI_CLUT224
+	&logo_sgi_clut224,		/* SGI Linux logo on MIPS/MIPS64 */
+#endif
+#ifdef CONFIG_LOGO_SUN_CLUT224
+	&logo_sun_clut224,		/* Sun Linux logo */
+#endif
+#ifdef CONFIG_LOGO_SUPERH_CLUT224
+	&logo_superh_clut224,		/* SuperH Linux logo */
+#endif
+#ifdef CONFIG_LOGO_M32R_CLUT224
+	&logo_m32r_clut224,		/* M32R Linux logo */
+#endif
+#ifdef CONFIG_LOGO_PCK_CLUT224
+	&logo_zen_clut224,		/* Zen-Kernel logo */
+#endif
+#ifdef CONFIG_LOGO_OLDPCK_CLUT224
+	&logo_oldzen_clut224,		/* Old Zen-Kernel logo */
+#endif
+#ifdef CONFIG_LOGO_ARCH_CLUT224
+	&logo_arch_clut224,		/* Arch Linux logo */
+#endif
+#ifdef CONFIG_LOGO_GENTOO_CLUT224
+	&logo_gentoo_clut224,		/* Gentoo Linux logo */
+#endif
+#ifdef CONFIG_LOGO_EXHERBO_CLUT224
+	&logo_exherbo_clut224,		/* Exherbo Linux logo */
+#endif
+#ifdef CONFIG_LOGO_SLACKWARE_CLUT224
+	&logo_slackware_clut224,	/* Slackware Linux logo */
+#endif
+#ifdef CONFIG_LOGO_DEBIAN_CLUT224
+	&logo_debian_clut224,		/* Debian Linux logo */
+#endif
+#ifdef CONFIG_LOGO_FEDORASIMPLE_CLUT224
+	&logo_fedorasimple_clut224,	/* Fedora Simple logo */
+#endif
+#ifdef CONFIG_LOGO_FEDORAGLOSSY_CLUT224
+	&logo_fedoraglossy_clut224,	/* Fedora Glossy logo */
+#endif
+#ifdef CONFIG_LOGO_TITS_CLUT224
+	&logo_tits_clut224,		/* Tits logo */
+#endif
+#ifdef CONFIG_LOGO_BSD_CLUT224
+	&logo_bsd_clut224,		/* BSD logo */
+#endif
+#ifdef CONFIG_LOGO_FBSD_CLUT224
+	&logo_fbsd_clut224,		/* Free BSD logo */
+#endif
+};
+
+#ifdef CONFIG_LOGO_RANDOM
+#define LOGO_INDEX(s)	(get_random_int() % s)
+#else
+#define LOGO_INDEX(s)	(s - 1)
+#endif
+
 /*
  * Logos are located in the initdata, and will be freed in kernel_init.
  * Use late_init to mark the logos as freed to prevent any further use.
@@ -43,75 +144,30 @@ late_initcall_sync(fb_logo_late_init);
 const struct linux_logo * __ref fb_find_logo(int depth)
 {
 	const struct linux_logo *logo = NULL;
+	const struct linux_logo **array = NULL;
+	unsigned int size;
 
 	if (nologo || logos_freed)
 		return NULL;
 
+	/* Select logo array */
 	if (depth >= 1) {
-#ifdef CONFIG_LOGO_LINUX_MONO
-		/* Generic Linux logo */
-		logo = &logo_linux_mono;
-#endif
-#ifdef CONFIG_LOGO_SUPERH_MONO
-		/* SuperH Linux logo */
-		logo = &logo_superh_mono;
-#endif
+		array = logo_mono;
+		size = ARRAY_SIZE(logo_mono);
 	}
-	
 	if (depth >= 4) {
-#ifdef CONFIG_LOGO_LINUX_VGA16
-		/* Generic Linux logo */
-		logo = &logo_linux_vga16;
-#endif
-#ifdef CONFIG_LOGO_BLACKFIN_VGA16
-		/* Blackfin processor logo */
-		logo = &logo_blackfin_vga16;
-#endif
-#ifdef CONFIG_LOGO_SUPERH_VGA16
-		/* SuperH Linux logo */
-		logo = &logo_superh_vga16;
-#endif
+		array = logo_vga16;
+		size = ARRAY_SIZE(logo_vga16);
 	}
-	
 	if (depth >= 8) {
-#ifdef CONFIG_LOGO_LINUX_CLUT224
-		/* Generic Linux logo */
-		logo = &logo_linux_clut224;
-#endif
-#ifdef CONFIG_LOGO_BLACKFIN_CLUT224
-		/* Blackfin Linux logo */
-		logo = &logo_blackfin_clut224;
-#endif
-#ifdef CONFIG_LOGO_DEC_CLUT224
-		/* DEC Linux logo on MIPS/MIPS64 or ALPHA */
-		logo = &logo_dec_clut224;
-#endif
-#ifdef CONFIG_LOGO_MAC_CLUT224
-		/* Macintosh Linux logo on m68k */
-		if (MACH_IS_MAC)
-			logo = &logo_mac_clut224;
-#endif
-#ifdef CONFIG_LOGO_PARISC_CLUT224
-		/* PA-RISC Linux logo */
-		logo = &logo_parisc_clut224;
-#endif
-#ifdef CONFIG_LOGO_SGI_CLUT224
-		/* SGI Linux logo on MIPS/MIPS64 */
-		logo = &logo_sgi_clut224;
-#endif
-#ifdef CONFIG_LOGO_SUN_CLUT224
-		/* Sun Linux logo */
-		logo = &logo_sun_clut224;
-#endif
-#ifdef CONFIG_LOGO_SUPERH_CLUT224
-		/* SuperH Linux logo */
-		logo = &logo_superh_clut224;
-#endif
-#ifdef CONFIG_LOGO_M32R_CLUT224
-		/* M32R Linux logo */
-		logo = &logo_m32r_clut224;
-#endif
+		array = logo_clut224;
+		size = ARRAY_SIZE(logo_clut224);
 	}
+
+	/* We've got some logos to display */
+	if (array && size)
+		logo = array[LOGO_INDEX(size)];
+
 	return logo;
 }
 EXPORT_SYMBOL_GPL(fb_find_logo);
diff --git a/drivers/video/logo/logo_arch_clut224.ppm b/drivers/video/logo/logo_arch_clut224.ppm
new file mode 100644
index 000000000..e4d8daa69
--- /dev/null
+++ b/drivers/video/logo/logo_arch_clut224.ppm
@@ -0,0 +1,43204 @@
+P3
+# CREATOR: GIMP PNM Filter Version 1.1
+120 120
+255
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+110
+160
+33
+122
+166
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+27
+151
+213
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+4
+45
+68
+13
+147
+209
+13
+147
+209
+17
+73
+101
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+24
+137
+199
+13
+147
+209
+13
+147
+209
+54
+155
+212
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+7
+10
+13
+147
+209
+13
+147
+209
+13
+147
+209
+48
+164
+219
+3
+23
+31
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+33
+133
+182
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+150
+212
+40
+160
+215
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+65
+166
+216
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+3
+76
+109
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+65
+166
+216
+54
+136
+181
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+42
+161
+216
+48
+164
+219
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+9
+18
+24
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+48
+164
+219
+40
+90
+118
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+37
+132
+189
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+65
+166
+216
+48
+164
+219
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+48
+164
+219
+48
+164
+219
+11
+35
+49
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+25
+87
+120
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+27
+151
+213
+62
+163
+214
+62
+163
+214
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+65
+166
+216
+48
+164
+219
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+17
+148
+210
+17
+148
+210
+17
+148
+210
+17
+148
+210
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+62
+163
+214
+48
+164
+219
+36
+86
+115
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+110
+160
+21
+149
+211
+21
+149
+211
+17
+148
+210
+17
+148
+210
+17
+148
+210
+17
+148
+210
+17
+148
+210
+13
+147
+209
+13
+147
+209
+13
+147
+209
+49
+151
+208
+48
+164
+219
+65
+166
+216
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+17
+148
+210
+24
+150
+212
+21
+149
+211
+13
+147
+209
+13
+147
+209
+21
+149
+211
+17
+148
+210
+17
+148
+210
+17
+148
+210
+17
+148
+210
+17
+148
+210
+13
+147
+209
+65
+166
+216
+65
+166
+216
+11
+35
+49
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+74
+106
+31
+155
+211
+24
+150
+212
+24
+150
+212
+27
+151
+213
+17
+148
+210
+21
+149
+211
+13
+147
+209
+13
+147
+209
+49
+151
+208
+21
+149
+211
+17
+148
+210
+17
+148
+210
+48
+164
+219
+65
+166
+216
+62
+163
+214
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+24
+150
+212
+17
+148
+210
+31
+155
+211
+17
+148
+210
+17
+148
+210
+2
+145
+206
+31
+155
+211
+31
+155
+211
+27
+151
+213
+21
+149
+211
+49
+151
+208
+21
+149
+211
+21
+149
+211
+49
+151
+208
+65
+166
+216
+65
+166
+216
+5
+18
+28
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+27
+39
+27
+151
+213
+27
+151
+213
+49
+151
+208
+24
+150
+212
+24
+150
+212
+31
+155
+211
+24
+150
+212
+21
+149
+211
+24
+150
+212
+27
+151
+213
+27
+151
+213
+2
+145
+206
+21
+149
+211
+21
+149
+211
+72
+171
+221
+62
+163
+214
+48
+154
+203
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+143
+204
+31
+155
+211
+31
+155
+211
+24
+150
+212
+31
+155
+211
+27
+151
+213
+24
+150
+212
+24
+150
+212
+31
+155
+211
+31
+155
+211
+21
+149
+211
+31
+155
+211
+31
+155
+211
+21
+149
+211
+27
+151
+213
+62
+163
+214
+67
+167
+217
+67
+167
+217
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+31
+155
+211
+31
+155
+211
+31
+155
+211
+31
+155
+211
+24
+150
+212
+24
+150
+212
+24
+150
+212
+21
+149
+211
+31
+155
+211
+29
+152
+214
+21
+149
+211
+17
+148
+210
+31
+155
+211
+17
+148
+210
+24
+150
+212
+49
+151
+208
+67
+167
+217
+67
+167
+217
+49
+132
+177
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+29
+108
+153
+51
+153
+210
+51
+153
+210
+31
+155
+211
+31
+155
+211
+24
+150
+212
+24
+150
+212
+31
+155
+211
+31
+155
+211
+24
+150
+212
+24
+150
+212
+27
+151
+213
+27
+151
+213
+49
+151
+208
+21
+149
+211
+21
+149
+211
+27
+151
+213
+67
+167
+217
+67
+167
+217
+67
+167
+217
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+49
+151
+208
+31
+155
+211
+51
+153
+210
+29
+152
+214
+32
+153
+215
+32
+153
+215
+29
+152
+214
+31
+155
+211
+24
+150
+212
+24
+150
+212
+24
+150
+212
+31
+155
+211
+24
+150
+212
+31
+155
+211
+31
+155
+211
+27
+151
+213
+24
+150
+212
+62
+163
+214
+68
+168
+218
+68
+168
+218
+40
+90
+118
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+71
+103
+33
+156
+212
+31
+155
+211
+31
+155
+211
+31
+155
+211
+49
+151
+208
+29
+152
+214
+31
+155
+211
+31
+155
+211
+32
+153
+215
+24
+150
+212
+24
+150
+212
+31
+155
+211
+31
+155
+211
+24
+150
+212
+31
+155
+211
+31
+155
+211
+24
+150
+212
+29
+152
+214
+78
+167
+212
+68
+168
+218
+65
+166
+216
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+31
+155
+211
+31
+155
+211
+33
+156
+212
+32
+153
+215
+32
+153
+215
+51
+153
+210
+51
+153
+210
+31
+155
+211
+49
+151
+208
+29
+152
+214
+29
+152
+214
+31
+155
+211
+32
+153
+215
+31
+155
+211
+31
+155
+211
+24
+150
+212
+24
+150
+212
+24
+150
+212
+31
+155
+211
+68
+168
+218
+69
+169
+219
+71
+170
+220
+16
+56
+73
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+11
+35
+49
+33
+156
+212
+33
+156
+212
+53
+154
+211
+53
+154
+211
+31
+155
+211
+31
+155
+211
+31
+155
+211
+51
+153
+210
+31
+155
+211
+31
+155
+211
+31
+155
+211
+51
+153
+210
+51
+153
+210
+29
+152
+214
+51
+153
+210
+31
+155
+211
+31
+155
+211
+24
+150
+212
+31
+155
+211
+31
+155
+211
+69
+169
+219
+69
+169
+219
+65
+166
+216
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+49
+151
+208
+32
+153
+215
+33
+156
+212
+32
+153
+215
+54
+155
+212
+31
+155
+211
+32
+153
+215
+32
+153
+215
+33
+156
+212
+31
+155
+211
+32
+153
+215
+29
+152
+214
+51
+153
+210
+32
+153
+215
+31
+155
+211
+31
+155
+211
+29
+152
+214
+31
+155
+211
+51
+153
+210
+31
+155
+211
+31
+155
+211
+68
+168
+218
+82
+170
+215
+69
+169
+219
+12
+30
+39
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+18
+23
+54
+155
+212
+54
+155
+212
+33
+156
+212
+33
+156
+212
+33
+156
+212
+33
+156
+212
+38
+159
+214
+53
+154
+211
+53
+154
+211
+33
+156
+212
+31
+155
+211
+31
+155
+211
+33
+156
+212
+32
+153
+215
+32
+153
+215
+31
+155
+211
+31
+155
+211
+51
+153
+210
+31
+155
+211
+31
+155
+211
+31
+155
+211
+44
+162
+217
+82
+170
+215
+71
+170
+220
+62
+163
+214
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+41
+147
+203
+42
+161
+216
+38
+159
+214
+54
+155
+212
+38
+159
+214
+38
+159
+214
+32
+153
+215
+33
+156
+212
+32
+153
+215
+32
+153
+215
+54
+155
+212
+54
+155
+212
+32
+153
+215
+31
+155
+211
+31
+155
+211
+32
+153
+215
+32
+153
+215
+33
+156
+212
+31
+155
+211
+49
+151
+208
+31
+155
+211
+49
+151
+208
+32
+153
+215
+72
+171
+221
+71
+170
+220
+71
+170
+220
+8
+27
+35
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+42
+161
+216
+42
+161
+216
+54
+155
+212
+54
+155
+212
+42
+161
+216
+54
+155
+212
+54
+155
+212
+38
+159
+214
+38
+159
+214
+38
+159
+214
+54
+155
+212
+38
+159
+214
+33
+156
+212
+33
+156
+212
+33
+156
+212
+53
+154
+211
+32
+153
+215
+32
+153
+215
+31
+155
+211
+33
+156
+212
+32
+153
+215
+32
+153
+215
+32
+153
+215
+62
+163
+214
+82
+170
+215
+72
+171
+221
+55
+159
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+42
+139
+189
+44
+162
+217
+44
+162
+217
+54
+155
+212
+42
+161
+216
+42
+161
+216
+54
+155
+212
+54
+155
+212
+42
+161
+216
+38
+159
+214
+54
+155
+212
+54
+155
+212
+40
+160
+215
+54
+155
+212
+33
+156
+212
+38
+159
+214
+54
+155
+212
+54
+155
+212
+33
+156
+212
+33
+156
+212
+53
+154
+211
+33
+156
+212
+31
+155
+211
+31
+155
+211
+54
+155
+212
+72
+171
+221
+72
+171
+221
+71
+170
+220
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+44
+162
+217
+55
+159
+209
+54
+155
+212
+54
+155
+212
+54
+155
+212
+54
+155
+212
+54
+155
+212
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+54
+155
+212
+38
+159
+214
+38
+159
+214
+38
+159
+214
+38
+159
+214
+38
+159
+214
+33
+156
+212
+54
+155
+212
+54
+155
+212
+33
+156
+212
+53
+154
+211
+32
+153
+215
+33
+156
+212
+68
+168
+218
+74
+172
+223
+72
+171
+221
+48
+154
+203
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+51
+74
+40
+160
+215
+55
+159
+209
+42
+161
+216
+42
+161
+216
+54
+155
+212
+40
+160
+215
+62
+163
+214
+44
+162
+217
+42
+161
+216
+54
+155
+212
+54
+155
+212
+42
+161
+216
+54
+155
+212
+54
+155
+212
+54
+155
+212
+54
+155
+212
+54
+155
+212
+38
+159
+214
+54
+155
+212
+40
+160
+215
+32
+153
+215
+54
+155
+212
+33
+156
+212
+38
+159
+214
+33
+156
+212
+31
+155
+211
+83
+172
+217
+74
+172
+223
+82
+170
+215
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+24
+70
+92
+48
+164
+219
+62
+163
+214
+54
+155
+212
+54
+155
+212
+55
+159
+209
+46
+163
+218
+54
+155
+212
+46
+163
+218
+55
+159
+209
+54
+155
+212
+54
+155
+212
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+54
+155
+212
+38
+159
+214
+54
+155
+212
+38
+159
+214
+40
+160
+215
+32
+153
+215
+54
+155
+212
+54
+155
+212
+33
+156
+212
+72
+171
+221
+74
+172
+223
+74
+172
+223
+53
+145
+195
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+23
+77
+105
+44
+162
+217
+48
+164
+219
+44
+162
+217
+54
+155
+212
+42
+161
+216
+46
+163
+218
+62
+163
+214
+40
+160
+215
+55
+159
+209
+44
+162
+217
+40
+160
+215
+33
+156
+212
+54
+155
+212
+54
+155
+212
+42
+161
+216
+54
+155
+212
+42
+161
+216
+42
+161
+216
+42
+161
+216
+54
+155
+212
+38
+159
+214
+38
+159
+214
+40
+160
+215
+40
+160
+215
+33
+156
+212
+74
+172
+223
+83
+172
+217
+74
+172
+223
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+36
+86
+115
+48
+164
+219
+44
+162
+217
+44
+162
+217
+54
+155
+212
+62
+163
+214
+46
+163
+218
+40
+160
+215
+42
+161
+216
+44
+162
+217
+46
+163
+218
+55
+159
+209
+44
+162
+217
+33
+156
+212
+55
+159
+209
+42
+161
+216
+54
+155
+212
+42
+161
+216
+42
+161
+216
+54
+155
+212
+42
+161
+216
+42
+161
+216
+38
+159
+214
+54
+155
+212
+38
+159
+214
+74
+172
+223
+74
+172
+223
+86
+174
+219
+54
+136
+181
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+23
+88
+115
+5
+18
+28
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+24
+70
+92
+62
+163
+214
+54
+155
+212
+62
+163
+214
+44
+162
+217
+54
+155
+212
+48
+164
+219
+48
+164
+219
+54
+155
+212
+42
+161
+216
+48
+164
+219
+42
+161
+216
+54
+155
+212
+40
+160
+215
+54
+155
+212
+40
+160
+215
+40
+160
+215
+54
+155
+212
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+42
+161
+216
+54
+155
+212
+42
+161
+216
+74
+172
+223
+86
+174
+219
+83
+172
+217
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+62
+163
+214
+67
+167
+217
+36
+86
+115
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+16
+56
+73
+62
+163
+214
+46
+163
+218
+62
+163
+214
+44
+162
+217
+46
+163
+218
+44
+162
+217
+62
+163
+214
+44
+162
+217
+46
+163
+218
+54
+155
+212
+54
+155
+212
+42
+161
+216
+55
+159
+209
+54
+155
+212
+55
+159
+209
+54
+155
+212
+55
+159
+209
+54
+155
+212
+42
+161
+216
+54
+155
+212
+42
+161
+216
+54
+155
+212
+42
+161
+216
+74
+172
+223
+86
+174
+219
+86
+174
+219
+57
+132
+172
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+23
+88
+115
+62
+163
+214
+48
+164
+219
+48
+164
+219
+42
+151
+200
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+18
+42
+55
+44
+162
+217
+62
+163
+214
+48
+164
+219
+62
+163
+214
+54
+155
+212
+44
+162
+217
+65
+166
+216
+46
+163
+218
+48
+164
+219
+44
+162
+217
+62
+163
+214
+42
+161
+216
+62
+163
+214
+46
+163
+218
+44
+162
+217
+54
+155
+212
+55
+159
+209
+54
+155
+212
+40
+160
+215
+40
+160
+215
+54
+155
+212
+42
+161
+216
+55
+159
+209
+86
+174
+219
+86
+174
+219
+85
+173
+218
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+62
+163
+214
+62
+163
+214
+62
+163
+214
+48
+164
+219
+60
+162
+212
+62
+163
+214
+16
+56
+73
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+16
+23
+62
+163
+214
+44
+162
+217
+62
+163
+214
+46
+163
+218
+48
+164
+219
+62
+163
+214
+46
+163
+218
+62
+163
+214
+62
+163
+214
+44
+162
+217
+67
+167
+217
+44
+162
+217
+54
+155
+212
+54
+155
+212
+42
+161
+216
+48
+164
+219
+54
+155
+212
+54
+155
+212
+40
+160
+215
+42
+161
+216
+62
+163
+214
+40
+160
+215
+74
+172
+223
+90
+177
+222
+87
+175
+220
+28
+73
+96
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+19
+48
+67
+48
+164
+219
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+48
+164
+219
+60
+162
+212
+62
+163
+214
+8
+27
+35
+0
+2
+0
+0
+2
+0
+0
+1
+4
+54
+155
+212
+67
+167
+217
+68
+168
+218
+65
+166
+216
+62
+163
+214
+62
+163
+214
+65
+166
+216
+62
+163
+214
+62
+163
+214
+44
+162
+217
+48
+164
+219
+54
+155
+212
+62
+163
+214
+48
+164
+219
+62
+163
+214
+54
+155
+212
+42
+161
+216
+54
+155
+212
+46
+163
+218
+42
+161
+216
+42
+161
+216
+54
+155
+212
+87
+175
+220
+87
+175
+220
+85
+173
+218
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+67
+167
+217
+65
+166
+216
+67
+167
+217
+65
+166
+216
+62
+163
+214
+65
+166
+216
+48
+164
+219
+65
+166
+216
+62
+163
+214
+65
+166
+216
+53
+145
+195
+0
+7
+10
+0
+2
+0
+0
+4
+7
+58
+151
+195
+54
+155
+212
+48
+164
+219
+69
+169
+219
+48
+164
+219
+46
+163
+218
+48
+164
+219
+46
+163
+218
+54
+155
+212
+62
+163
+214
+54
+155
+212
+44
+162
+217
+42
+161
+216
+62
+163
+214
+48
+164
+219
+44
+162
+217
+54
+155
+212
+44
+162
+217
+54
+155
+212
+38
+159
+214
+48
+164
+219
+83
+172
+217
+90
+177
+222
+90
+177
+222
+30
+93
+120
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+22
+45
+59
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+72
+171
+221
+65
+166
+216
+48
+164
+219
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+33
+133
+182
+0
+4
+7
+0
+1
+4
+34
+104
+137
+62
+163
+214
+62
+163
+214
+62
+163
+214
+62
+163
+214
+67
+167
+217
+69
+169
+219
+62
+163
+214
+62
+163
+214
+48
+164
+219
+62
+163
+214
+62
+163
+214
+62
+163
+214
+48
+164
+219
+65
+166
+216
+42
+161
+216
+48
+164
+219
+48
+164
+219
+44
+162
+217
+55
+159
+209
+38
+159
+214
+88
+176
+221
+88
+176
+221
+85
+173
+218
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+65
+166
+216
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+67
+167
+217
+62
+163
+214
+65
+166
+216
+62
+163
+214
+48
+164
+219
+68
+168
+218
+36
+115
+153
+0
+4
+7
+17
+35
+44
+62
+163
+214
+65
+166
+216
+62
+163
+214
+48
+164
+219
+48
+164
+219
+62
+163
+214
+67
+167
+217
+68
+168
+218
+62
+163
+214
+65
+166
+216
+62
+163
+214
+44
+162
+217
+65
+166
+216
+62
+163
+214
+62
+163
+214
+48
+164
+219
+62
+163
+214
+44
+162
+217
+54
+155
+212
+72
+171
+221
+88
+176
+221
+88
+176
+221
+40
+90
+118
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+26
+49
+63
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+67
+167
+217
+48
+164
+219
+48
+164
+219
+65
+166
+216
+62
+163
+214
+31
+134
+178
+5
+11
+14
+48
+154
+203
+60
+162
+212
+48
+164
+219
+62
+163
+214
+48
+164
+219
+68
+168
+218
+48
+164
+219
+62
+163
+214
+62
+163
+214
+68
+168
+218
+62
+163
+214
+48
+164
+219
+65
+166
+216
+54
+155
+212
+62
+163
+214
+44
+162
+217
+48
+164
+219
+62
+163
+214
+42
+161
+216
+90
+177
+222
+88
+176
+221
+85
+173
+218
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+62
+163
+214
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+67
+167
+217
+65
+166
+216
+62
+163
+214
+62
+163
+214
+48
+154
+203
+53
+127
+166
+48
+164
+219
+65
+166
+216
+62
+163
+214
+48
+164
+219
+62
+163
+214
+54
+155
+212
+69
+169
+219
+48
+164
+219
+67
+167
+217
+48
+164
+219
+62
+163
+214
+46
+163
+218
+62
+163
+214
+62
+163
+214
+46
+163
+218
+46
+163
+218
+46
+163
+218
+74
+172
+223
+90
+177
+222
+90
+177
+222
+25
+99
+131
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+22
+45
+59
+78
+167
+212
+78
+167
+212
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+48
+164
+219
+48
+164
+219
+62
+163
+214
+48
+164
+219
+62
+163
+214
+65
+166
+216
+48
+164
+219
+48
+164
+219
+60
+162
+212
+48
+164
+219
+64
+165
+215
+62
+163
+214
+71
+170
+220
+48
+164
+219
+62
+163
+214
+48
+164
+219
+62
+163
+214
+62
+163
+214
+44
+162
+217
+62
+163
+214
+62
+163
+214
+90
+177
+222
+90
+177
+222
+74
+172
+223
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+78
+167
+212
+71
+170
+220
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+68
+168
+218
+65
+166
+216
+62
+163
+214
+65
+166
+216
+65
+166
+216
+65
+166
+216
+48
+164
+219
+65
+166
+216
+48
+164
+219
+48
+164
+219
+62
+163
+214
+48
+164
+219
+62
+163
+214
+48
+164
+219
+54
+155
+212
+48
+164
+219
+69
+169
+219
+62
+163
+214
+78
+167
+212
+102
+181
+221
+90
+177
+222
+34
+104
+137
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+19
+48
+67
+71
+170
+220
+71
+170
+220
+69
+169
+219
+78
+167
+212
+71
+170
+220
+78
+167
+212
+71
+170
+220
+69
+169
+219
+69
+169
+219
+71
+170
+220
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+68
+168
+218
+65
+166
+216
+62
+163
+214
+65
+166
+216
+48
+164
+219
+48
+164
+219
+48
+164
+219
+48
+164
+219
+62
+163
+214
+48
+164
+219
+62
+163
+214
+62
+163
+214
+62
+163
+214
+65
+166
+216
+48
+164
+219
+62
+163
+214
+90
+177
+222
+88
+176
+221
+87
+175
+220
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+69
+169
+219
+82
+170
+215
+82
+170
+215
+69
+169
+219
+71
+170
+220
+82
+170
+215
+71
+170
+220
+78
+167
+212
+78
+167
+212
+71
+170
+220
+69
+169
+219
+71
+170
+220
+78
+167
+212
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+68
+168
+218
+62
+163
+214
+62
+163
+214
+65
+166
+216
+62
+163
+214
+65
+166
+216
+48
+164
+219
+64
+165
+215
+62
+163
+214
+48
+164
+219
+48
+164
+219
+62
+163
+214
+62
+163
+214
+65
+166
+216
+91
+178
+224
+91
+178
+224
+21
+105
+143
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+22
+61
+78
+74
+172
+223
+82
+170
+215
+82
+170
+215
+72
+171
+221
+72
+171
+221
+71
+170
+220
+71
+170
+220
+82
+170
+215
+69
+169
+219
+71
+170
+220
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+62
+163
+214
+65
+166
+216
+62
+163
+214
+65
+166
+216
+48
+164
+219
+48
+164
+219
+65
+166
+216
+65
+166
+216
+60
+162
+212
+60
+162
+212
+48
+164
+219
+91
+178
+224
+91
+178
+224
+88
+176
+221
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+82
+170
+215
+74
+172
+223
+83
+172
+217
+83
+172
+217
+83
+172
+217
+82
+170
+215
+72
+171
+221
+72
+171
+221
+82
+170
+215
+71
+170
+220
+69
+169
+219
+71
+170
+220
+71
+170
+220
+82
+170
+215
+69
+169
+219
+71
+170
+220
+69
+169
+219
+71
+170
+220
+69
+169
+219
+78
+167
+212
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+67
+167
+217
+65
+166
+216
+62
+163
+214
+62
+163
+214
+65
+166
+216
+48
+164
+219
+67
+167
+217
+65
+166
+216
+62
+163
+214
+90
+177
+222
+91
+178
+224
+48
+115
+149
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+45
+104
+131
+74
+172
+223
+83
+172
+217
+74
+172
+223
+83
+172
+217
+74
+172
+223
+83
+172
+217
+83
+172
+217
+83
+172
+217
+82
+170
+215
+82
+170
+215
+82
+170
+215
+82
+170
+215
+82
+170
+215
+71
+170
+220
+69
+169
+219
+71
+170
+220
+71
+170
+220
+71
+170
+220
+69
+169
+219
+69
+169
+219
+71
+170
+220
+69
+169
+219
+78
+167
+212
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+68
+168
+218
+62
+163
+214
+48
+164
+219
+48
+164
+219
+48
+164
+219
+67
+167
+217
+71
+170
+220
+91
+178
+224
+91
+178
+224
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+85
+173
+218
+74
+172
+223
+85
+173
+218
+85
+173
+218
+74
+172
+223
+74
+172
+223
+74
+172
+223
+83
+172
+217
+74
+172
+223
+83
+172
+217
+83
+172
+217
+83
+172
+217
+72
+171
+221
+72
+171
+221
+72
+171
+221
+72
+171
+221
+71
+170
+220
+82
+170
+215
+82
+170
+215
+71
+170
+220
+82
+170
+215
+69
+169
+219
+78
+167
+212
+69
+169
+219
+69
+169
+219
+78
+167
+212
+69
+169
+219
+69
+169
+219
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+68
+168
+218
+48
+164
+219
+62
+163
+214
+62
+163
+214
+90
+177
+222
+91
+178
+224
+50
+145
+189
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+48
+115
+149
+86
+174
+219
+88
+176
+221
+88
+176
+221
+86
+174
+219
+86
+174
+219
+85
+173
+218
+86
+174
+219
+85
+173
+218
+83
+172
+217
+83
+172
+217
+83
+172
+217
+74
+172
+223
+83
+172
+217
+83
+172
+217
+74
+172
+223
+72
+171
+221
+82
+170
+215
+82
+170
+215
+72
+171
+221
+71
+170
+220
+71
+170
+220
+82
+170
+215
+71
+170
+220
+82
+170
+215
+69
+169
+219
+69
+169
+219
+69
+169
+219
+68
+168
+218
+69
+169
+219
+68
+168
+218
+68
+168
+218
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+68
+168
+218
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+62
+163
+214
+102
+181
+221
+91
+178
+224
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+88
+176
+221
+87
+175
+220
+88
+176
+221
+74
+172
+223
+86
+174
+219
+86
+174
+219
+88
+176
+221
+85
+173
+218
+86
+174
+219
+74
+172
+223
+85
+173
+218
+74
+172
+223
+74
+172
+223
+83
+172
+217
+83
+172
+217
+83
+172
+217
+74
+172
+223
+74
+172
+223
+74
+172
+223
+82
+170
+215
+82
+170
+215
+82
+170
+215
+78
+167
+212
+48
+164
+219
+29
+152
+214
+17
+148
+210
+7
+146
+208
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+49
+151
+208
+33
+156
+212
+48
+164
+219
+65
+166
+216
+62
+163
+214
+67
+167
+217
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+88
+176
+221
+95
+181
+227
+50
+145
+189
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+64
+129
+163
+87
+175
+220
+87
+175
+220
+87
+175
+220
+87
+175
+220
+87
+175
+220
+74
+172
+223
+86
+174
+219
+86
+174
+219
+88
+176
+221
+86
+174
+219
+86
+174
+219
+85
+173
+218
+86
+174
+219
+85
+173
+218
+74
+172
+223
+74
+172
+223
+83
+172
+217
+83
+172
+217
+82
+170
+215
+55
+159
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+51
+153
+210
+62
+163
+214
+65
+166
+216
+65
+166
+216
+65
+166
+216
+65
+166
+216
+93
+180
+226
+91
+178
+224
+2
+9
+12
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+88
+176
+221
+88
+176
+221
+87
+175
+220
+87
+175
+220
+87
+175
+220
+90
+177
+222
+88
+176
+221
+74
+172
+223
+74
+172
+223
+86
+174
+219
+74
+172
+223
+86
+174
+219
+86
+174
+219
+74
+172
+223
+85
+173
+218
+74
+172
+223
+71
+170
+220
+24
+150
+212
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+31
+155
+211
+62
+163
+214
+69
+169
+219
+102
+181
+221
+66
+157
+202
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+68
+139
+173
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+87
+175
+220
+87
+175
+220
+87
+175
+220
+87
+175
+220
+91
+178
+224
+87
+175
+220
+74
+172
+223
+86
+174
+219
+86
+174
+219
+29
+152
+214
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+65
+166
+216
+93
+180
+226
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+87
+175
+220
+87
+175
+220
+74
+172
+223
+82
+170
+215
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+62
+163
+214
+55
+159
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+76
+157
+196
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+87
+175
+220
+62
+163
+214
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+150
+212
+48
+164
+219
+5
+18
+28
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+11
+14
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+68
+168
+218
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+48
+164
+219
+62
+163
+214
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+73
+163
+208
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+78
+167
+212
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+67
+167
+217
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+0
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+83
+172
+217
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+42
+151
+200
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+73
+163
+208
+88
+176
+221
+88
+176
+221
+88
+176
+221
+88
+176
+221
+83
+172
+217
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+54
+155
+212
+2
+27
+39
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+13
+16
+88
+176
+221
+88
+176
+221
+88
+176
+221
+86
+174
+219
+21
+149
+211
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+55
+159
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+78
+167
+212
+88
+176
+221
+87
+175
+220
+33
+156
+212
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+14
+38
+51
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+12
+30
+39
+88
+176
+221
+71
+170
+220
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+31
+155
+211
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+102
+181
+221
+17
+148
+210
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+8
+135
+190
+3
+28
+41
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+5
+18
+28
+7
+123
+172
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+19
+48
+67
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+42
+60
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+143
+204
+1
+4
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+16
+137
+192
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+139
+199
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+23
+115
+165
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+60
+88
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+65
+92
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+8
+135
+190
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+110
+160
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+33
+120
+170
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+85
+122
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+6
+88
+126
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+90
+129
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+18
+28
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+102
+146
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+110
+160
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+129
+183
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+1
+11
+20
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+8
+33
+46
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+139
+199
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+16
+137
+192
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+24
+126
+182
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+4
+14
+21
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+22
+30
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+90
+129
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+11
+14
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+28
+141
+196
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+24
+37
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+4
+45
+68
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+26
+97
+136
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+19
+113
+163
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+4
+14
+21
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+10
+42
+60
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+71
+103
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+6
+31
+44
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+148
+210
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+34
+143
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+60
+88
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+9
+99
+143
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+27
+117
+167
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+10
+69
+97
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+6
+39
+56
+23
+115
+165
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+6
+39
+56
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+85
+122
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+28
+141
+196
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+27
+39
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+126
+182
+0
+4
+7
+0
+4
+7
+18
+92
+131
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+31
+129
+185
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+17
+102
+146
+0
+2
+0
+0
+2
+0
+2
+9
+12
+19
+113
+163
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+11
+61
+83
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+33
+133
+182
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+27
+117
+167
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+85
+122
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+11
+14
+33
+133
+182
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+11
+14
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+2
+111
+154
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+137
+199
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+3
+6
+20
+127
+176
+13
+147
+209
+2
+111
+154
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+139
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+118
+167
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+9
+99
+143
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+37
+132
+189
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+27
+39
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+27
+117
+167
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+0
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+139
+199
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+57
+84
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+60
+87
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+137
+199
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+102
+146
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+3
+36
+54
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+37
+132
+189
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+96
+140
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+3
+76
+109
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+143
+204
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+129
+183
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+51
+74
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+123
+172
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+4
+45
+68
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+2
+111
+154
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+7
+10
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+17
+102
+146
+0
+24
+37
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+18
+28
+22
+104
+148
+0
+143
+204
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+139
+199
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+28
+141
+196
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+24
+126
+182
+6
+39
+56
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+0
+0
+2
+0
+6
+39
+56
+24
+126
+182
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+6
+39
+56
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+28
+141
+196
+5
+67
+94
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+0
+4
+7
+0
+63
+90
+34
+143
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+17
+102
+146
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+3
+76
+109
+0
+4
+7
+0
+1
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+3
+76
+109
+24
+137
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+21
+149
+211
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+3
+76
+109
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+123
+172
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+7
+10
+33
+120
+170
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+16
+123
+179
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+13
+90
+129
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+90
+129
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+29
+108
+153
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+139
+199
+8
+27
+35
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+27
+39
+34
+143
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+37
+132
+189
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+31
+129
+185
+5
+11
+14
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+37
+132
+189
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+20
+28
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+31
+129
+185
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+23
+115
+165
+0
+1
+4
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+4
+2
+111
+154
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+28
+141
+196
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+13
+90
+129
+0
+1
+4
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+1
+0
+0
+85
+122
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+2
+9
+12
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+127
+181
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+118
+167
+0
+1
+4
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+29
+108
+153
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+137
+199
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+2
+9
+12
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+19
+113
+163
+0
+1
+4
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+10
+110
+160
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+24
+37
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+139
+199
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+118
+167
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+29
+108
+153
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+3
+23
+31
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+129
+183
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+33
+120
+170
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+10
+42
+60
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+118
+167
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+0
+118
+167
+7
+146
+208
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+7
+146
+208
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+85
+122
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+24
+126
+182
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+33
+120
+170
+13
+147
+209
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+96
+140
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+7
+146
+208
+13
+147
+209
+13
+147
+209
+0
+143
+204
+0
+7
+10
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+5
+11
+14
+0
+143
+204
+13
+147
+209
+13
+147
+209
+13
+147
+209
+0
+4
+7
+0
+2
+0
+0
+2
+0
+29
+108
+153
+13
+147
+209
+7
+146
+208
+17
+54
+77
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+17
+54
+77
+7
+146
+208
+13
+147
+209
+0
+118
+167
+0
+2
+0
+0
+4
+7
+7
+146
+208
+17
+102
+146
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+4
+7
+17
+102
+146
+7
+146
+208
+0
+4
+7
+27
+117
+167
+0
+4
+7
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+0
+2
+0
+27
+117
+167
diff --git a/drivers/video/logo/logo_bsd_clut224.ppm b/drivers/video/logo/logo_bsd_clut224.ppm
new file mode 100644
index 000000000..408f0282a
--- /dev/null
+++ b/drivers/video/logo/logo_bsd_clut224.ppm
@@ -0,0 +1,2403 @@
+P3
+120 120
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 0 1  7 1 3  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 1 1  28 8 14
+51 24 39  16 5 9  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  9 3 6  39 15 26  61 26 49  58 26 51
+18 7 11  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  57 17 29  89 29 45  73 26 48  61 26 49  22 7 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  20 6 10
+102 29 42  132 43 63  76 28 47  52 25 42  22 7 12  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 1  16 5 9  3 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  69 20 33  154 39 54
+159 46 62  89 29 45  49 25 40  18 7 11  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  16 5 9  102 29 42  41 13 21
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  97 28 42  185 38 47  145 39 54
+89 29 45  58 26 51  16 5 9  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 1  132 30 39  148 30 39
+28 8 14  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  41 13 21  132 30 39  185 38 47  152 40 56  105 35 55
+76 28 47  22 7 12  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  41 13 21  185 38 47
+124 30 42  36 11 20  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 2 4
+117 31 42  185 38 47  185 38 47  152 40 56  105 35 55  76 28 47
+59 25 44  4 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  4 0 1  121 30 42
+185 38 47  102 29 42  48 19 31  9 3 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  123 33 45
+185 38 47  185 38 47  159 38 55  105 35 55  76 28 47  73 26 48
+49 25 40  4 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  41 13 21
+185 38 47  132 30 39  71 23 37  41 14 25  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 1  117 31 42  185 38 47
+185 38 47  171 39 51  118 34 52  88 29 45  76 28 47  62 26 49
+52 24 38  18 7 11  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  10 4 6  16 5 9  16 5 9
+18 7 11  18 7 11  16 5 9  10 4 6  16 5 9  7 2 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 1 3
+148 30 39  152 32 42  67 22 35  54 21 35  35 12 21  4 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  69 20 33  185 38 47  185 38 47
+185 38 47  159 35 49  105 35 55  89 29 45  76 28 47  62 26 49
+60 25 40  42 17 30  3 1 1  0 0 0  0 0 0  0 0 0
+1 0 0  41 13 21  79 27 44  118 34 52  124 30 42  121 30 42
+142 34 47  121 30 42  86 29 44  68 23 35  60 22 36  52 24 38
+42 17 30  30 10 18  16 5 9  9 3 6  9 3 6  3 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+148 30 39  185 38 47  71 23 37  50 18 33  49 25 40  35 12 21
+7 1 3  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  10 4 6  185 38 47  185 38 47  185 38 47
+185 38 47  140 36 52  105 35 55  89 29 45  76 28 47  67 27 47
+56 25 45  52 24 38  48 19 31  39 15 26  30 10 18  31 10 18
+89 29 45  129 39 59  159 38 55  171 39 51  171 39 51  171 39 51
+159 38 55  138 37 53  119 34 51  105 35 55  88 29 45  76 28 47
+76 28 47  62 26 49  57 25 43  49 25 40  48 19 31  31 10 18
+10 4 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 0 0
+132 30 39  185 38 47  121 30 42  64 24 39  49 25 40  48 19 31
+10 4 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  102 29 42  185 38 47  185 38 47  185 38 47
+185 38 47  131 38 56  105 35 55  89 29 45  67 27 47  64 25 43
+64 25 43  105 35 55  121 37 56  79 28 45  65 27 44  105 35 55
+159 46 62  159 46 62  159 38 55  171 39 51  171 39 51  144 40 57
+135 40 58  121 37 56  105 35 55  105 35 55  105 35 55  89 29 45
+88 29 45  67 27 47  61 26 49  58 26 51  52 25 42  49 25 40
+48 19 31  35 12 21  7 2 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+97 28 42  185 38 47  148 30 39  71 23 37  52 24 38  42 17 30
+35 12 21  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  36 11 20  171 39 51  185 38 47  185 38 47  185 38 47
+185 38 47  140 40 59  105 35 55  76 28 47  58 20 33  102 29 42
+140 36 52  171 39 51  159 38 55  119 40 62  105 35 55  143 45 64
+166 48 64  159 46 62  159 46 62  144 40 57  100 28 46  89 29 45
+86 29 44  73 26 42  76 28 47  88 29 45  89 29 45  105 35 55
+105 35 55  88 29 45  67 27 47  61 26 49  56 25 45  52 25 42
+52 25 42  52 25 42  49 25 40  30 10 18  7 2 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  4 0 1
+97 28 42  185 38 47  142 31 41  69 20 33  57 25 43  50 18 33
+42 17 30  10 4 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  31 10 18  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  139 38 56  79 27 44  71 23 37  124 30 42  185 38 47
+185 38 47  185 38 47  185 38 47  166 48 64  152 49 69  166 48 64
+185 38 47  166 48 64  143 45 64  112 32 46  117 31 42  141 31 41
+144 31 43  123 33 48  79 27 44  60 26 44  76 28 47  76 28 47
+105 35 55  105 35 55  89 29 45  88 29 45  73 26 42  60 26 44
+56 25 45  52 25 42  49 25 40  49 25 40  40 15 29  16 5 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  35 12 21
+132 30 39  185 38 47  132 30 39  71 23 37  50 18 33  49 25 40
+42 17 30  30 10 18  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  36 11 20  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  110 32 46  71 23 37  142 31 41  185 38 47  185 38 47
+185 38 47  171 39 51  166 48 64  166 48 64  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  140 36 52  86 29 44  59 25 44  67 27 47
+89 29 45  113 41 62  113 41 62  97 28 42  100 28 46  88 29 45
+65 27 44  58 26 51  56 25 45  49 25 40  52 25 42  42 17 30
+28 8 14  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  41 14 25  97 28 42
+148 30 39  162 38 49  117 31 42  67 23 37  49 25 40  42 17 30
+42 17 30  35 12 21  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  41 13 21  185 38 47  185 38 47  185 38 47  185 38 47
+159 35 49  71 23 37  141 31 41  185 38 47  185 38 47  141 31 41
+117 31 42  113 41 62  118 50 79  113 41 62  166 48 64  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  129 39 59  67 27 47  56 25 45
+76 28 47  105 35 55  133 35 50  112 32 46  110 32 46  100 28 46
+89 29 45  86 29 44  73 26 48  62 26 49  58 26 51  52 25 42
+49 25 40  39 15 26  9 3 6  0 0 0  0 0 0  3 1 1
+10 4 6  20 6 10  47 16 28  69 20 33  135 30 41  185 38 47
+185 38 47  152 32 42  97 28 42  63 22 36  50 18 33  48 19 31
+50 18 33  39 15 26  4 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  41 13 21  185 38 47  185 38 47  185 38 47  171 39 51
+97 28 42  132 31 43  185 38 47  171 39 51  102 29 42  118 50 79
+167 135 158  210 187 199  171 141 162  108 54 86  152 40 56  185 38 47
+185 38 47  185 38 47  171 39 51  121 30 42  132 30 39  152 32 42
+185 38 47  185 38 47  185 38 47  185 38 47  118 34 52  61 26 49
+67 27 47  89 29 45  128 33 49  128 33 49  123 33 48  120 32 46
+120 32 46  120 32 46  100 28 46  76 28 47  73 26 48  65 27 44
+68 23 35  66 24 37  65 27 44  47 16 28  28 8 14  69 20 33
+102 29 42  132 30 39  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  124 30 42  79 27 44  60 22 36  51 24 39  50 18 33
+48 19 31  40 15 29  9 3 6  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  36 11 20  185 38 47  185 38 47  185 38 47  102 29 42
+117 31 42  185 38 47  171 39 51  112 32 46  148 114 145  233 215 221
+253 249 249  238 222 225  148 114 145  113 41 62  185 38 47  185 38 47
+171 39 51  125 35 52  134 49 77  148 114 145  148 114 145  131 80 105
+113 41 62  154 38 52  185 38 47  185 38 47  171 39 51  88 29 45
+76 28 47  105 35 55  119 34 51  133 35 50  135 32 45  128 33 49
+128 33 49  133 35 50  118 34 52  100 28 46  89 29 45  86 29 44
+100 28 46  121 30 42  135 30 41  135 30 41  148 30 39  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+132 30 39  73 26 42  65 27 44  65 27 44  57 25 43  49 25 40
+42 17 30  50 18 33  18 7 11  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 1 3  141 31 41  185 38 47  145 31 41  97 28 42
+185 38 47  162 38 49  110 32 46  191 162 183  248 240 240  253 249 249
+243 232 234  210 187 199  131 80 105  119 40 62  185 38 47  185 38 47
+128 41 59  159 124 153  226 205 215  231 213 218  226 205 215  228 209 216
+171 141 162  118 50 79  138 32 45  185 38 47  185 38 47  119 34 51
+88 29 45  119 40 62  119 40 62  125 41 61  124 34 50  128 33 49
+128 33 49  128 33 49  124 34 50  118 34 52  110 32 46  119 34 51
+136 33 46  142 31 41  152 32 42  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  171 39 51  132 30 39
+97 28 42  71 23 37  66 25 40  57 25 43  49 25 40  42 17 30
+42 17 30  50 18 33  18 7 11  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  57 17 29  171 39 51  97 28 42  148 30 39
+185 38 47  97 28 42  167 135 158  253 249 249  253 249 249  243 232 234
+241 229 231  202 176 194  113 41 62  171 39 51  185 38 47  133 35 50
+167 135 158  253 249 249  245 236 237  234 219 224  231 213 218  228 209 216
+219 198 209  148 114 145  86 29 44  171 39 51  185 38 47  159 38 55
+119 34 51  122 42 63  132 43 63  122 42 63  121 37 56  128 33 49
+123 33 48  119 34 51  123 32 47  110 32 46  118 34 52  140 36 52
+145 31 41  152 32 42  162 38 49  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  152 32 42  132 30 39  97 28 42
+71 23 37  71 23 37  65 27 44  56 25 45  49 25 40  42 17 30
+42 17 30  42 17 30  18 6 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  22 7 12  113 41 62  102 29 42  185 38 47
+116 30 44  148 114 145  245 236 237  253 249 249  248 240 240  238 224 228
+243 232 234  167 135 158  141 31 41  185 38 47  162 38 49  141 92 121
+245 236 237  253 249 249  248 240 240  243 232 234  238 224 228  233 215 221
+226 205 215  171 141 162  108 54 86  135 30 41  185 38 47  185 38 47
+152 40 56  135 40 58  141 42 59  135 40 58  128 33 49  133 35 50
+133 35 50  128 33 49  123 32 47  105 35 55  105 35 55  131 38 56
+138 32 45  136 31 43  148 30 39  152 32 42  185 38 47  185 38 47
+185 38 47  171 39 51  142 31 41  117 31 42  97 28 42  86 29 44
+68 23 35  66 24 37  58 24 38  49 25 40  48 19 31  42 17 30
+49 25 40  42 17 30  10 4 6  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  7 1 3  73 26 42  145 39 54  144 31 43
+120 71 102  238 222 225  253 249 249  253 249 249  241 229 231  241 229 231
+228 209 216  131 80 105  171 39 51  185 38 47  131 80 105  238 224 228
+253 249 249  248 240 240  238 222 225  238 222 225  238 224 228  238 222 225
+228 209 216  180 150 172  108 66 98  102 29 42  185 38 47  185 38 47
+185 38 47  159 38 55  171 39 51  150 36 50  133 35 50  133 31 44
+129 33 45  120 32 46  128 33 49  128 33 49  121 37 56  128 33 49
+128 33 49  125 31 44  141 31 41  148 30 39  152 32 42  148 30 39
+135 30 41  132 30 39  124 30 42  102 29 42  86 29 44  71 23 37
+65 27 44  56 25 45  51 24 39  49 25 40  50 18 33  48 19 31
+42 17 30  39 15 26  7 2 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  97 28 42  162 38 49  126 41 60
+210 187 199  253 249 249  253 249 249  248 240 240  238 224 228  245 236 237
+196 169 185  136 33 46  185 38 47  143 45 64  196 169 185  253 249 249
+253 249 249  243 232 234  238 224 228  238 224 228  238 224 228  238 222 225
+231 213 218  180 150 172  138 88 121  89 29 45  171 39 51  185 38 47
+185 38 47  185 38 47  171 39 51  150 36 50  142 33 47  135 31 44
+125 31 44  117 31 44  112 32 46  138 34 49  117 31 44  120 32 46
+120 32 46  117 31 44  136 31 43  144 31 43  148 30 39  121 30 42
+117 31 42  102 29 42  97 28 42  73 26 42  65 27 44  60 26 44
+52 25 42  49 25 40  50 18 33  50 18 33  49 25 40  42 17 30
+38 12 21  28 8 14  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  18 7 11  117 31 42  132 30 39  167 135 158
+248 240 240  253 249 249  253 249 249  245 236 237  245 236 237  245 236 237
+148 114 145  159 38 55  159 38 55  171 141 162  253 249 249  253 249 249
+248 240 240  243 232 234  233 215 221  234 219 224  241 229 231  231 213 218
+238 222 225  191 162 183  137 96 131  105 35 55  162 38 49  185 38 47
+185 38 47  171 39 51  171 39 51  159 35 49  142 33 47  133 32 44
+123 33 45  117 31 42  132 30 39  123 32 47  118 34 52  118 34 52
+112 32 46  117 31 44  117 31 44  117 31 42  117 31 42  97 28 42
+97 28 42  86 29 44  71 23 37  60 25 40  52 25 42  56 25 45
+49 25 40  49 25 40  49 25 40  49 25 40  48 19 31  42 17 30
+47 16 28  7 2 4  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  41 13 21  140 36 52  110 32 46  226 205 215
+233 215 221  233 215 221  241 229 231  241 229 231  243 232 234  233 215 221
+152 49 69  159 35 49  131 80 105  241 229 231  253 249 249  248 240 240
+241 229 231  241 229 231  238 222 225  238 224 228  241 229 231  238 222 225
+233 215 221  202 176 194  148 114 145  108 54 86  152 32 42  185 38 47
+185 38 47  185 38 47  171 39 51  153 36 50  139 38 56  128 33 49
+117 31 44  125 31 44  131 32 43  117 31 44  113 41 62  110 32 46
+117 31 44  133 32 44  117 31 42  89 29 45  89 29 45  86 29 44
+71 23 37  64 25 43  60 26 44  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  49 25 40  50 18 33  42 17 30  35 12 21
+16 5 9  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  57 17 29  135 30 41  159 124 153  196 169 185
+191 162 183  140 102 127  108 66 98  210 187 199  248 240 240  196 169 185
+143 45 64  159 46 63  180 150 172  245 236 237  253 249 249  248 240 240
+241 229 231  231 213 218  241 229 231  241 229 231  238 222 225  238 222 225
+228 209 216  191 162 183  159 124 153  108 54 86  141 31 41  185 38 47
+185 38 47  185 38 47  185 38 47  150 36 50  140 36 52  125 31 44
+123 33 45  121 30 42  116 30 44  102 29 42  110 32 46  112 32 46
+110 32 46  102 29 42  86 29 44  73 26 42  67 27 47  56 25 45
+52 25 42  52 25 42  52 25 42  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  42 17 30  40 15 29  35 12 21  10 4 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  97 28 42  152 49 69  191 162 183  141 92 121
+228 209 216  108 66 98  50 18 33  140 102 127  243 232 234  131 80 105
+148 37 53  134 49 77  180 150 172  191 162 183  196 169 185  210 187 199
+241 229 231  233 215 221  231 213 218  238 224 228  238 222 225  238 222 225
+233 215 221  202 176 194  159 124 153  93 57 98  124 30 42  185 38 47
+185 38 47  185 38 47  185 38 47  146 36 50  133 35 50  124 30 42
+125 31 44  124 30 42  110 32 46  102 29 42  97 28 42  97 28 42
+97 28 42  71 23 37  57 25 43  56 25 45  56 25 45  52 25 42
+49 25 40  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  49 25 40  31 10 18  7 2 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  4 0 1  102 29 42  141 92 121  131 80 105  180 150 172
+196 169 185  58 26 51  52 25 42  120 71 102  202 176 194  121 37 56
+135 40 58  71 23 37  108 54 86  210 187 199  108 54 86  89 29 45
+202 176 194  243 232 234  238 224 228  234 219 224  241 229 231  238 222 225
+233 215 221  202 176 194  159 124 153  108 54 86  124 30 42  185 38 47
+185 38 47  185 38 47  185 38 47  162 38 49  159 35 49  135 31 44
+125 31 44  121 30 42  117 31 42  97 28 42  86 29 44  67 23 37
+52 24 38  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  35 12 21  7 2 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 4 6  97 28 42  131 80 105  76 28 47  137 96 131
+93 57 98  54 21 35  50 18 33  108 54 86  159 124 153  125 35 52
+89 29 45  48 19 31  159 124 153  191 162 183  51 24 39  54 19 32
+131 80 105  243 232 234  245 236 237  243 232 234  241 229 231  238 224 228
+228 209 216  202 176 194  167 135 158  93 57 98  121 30 42  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  152 32 42  129 33 45
+121 30 42  117 31 42  97 28 42  71 23 37  67 22 35  54 21 35
+52 24 38  56 25 45  52 25 42  49 25 40  52 25 42  49 25 40
+49 25 40  49 25 40  49 25 40  48 19 31  50 18 33  42 17 30
+50 18 33  22 7 12  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 4 6  86 29 44  79 27 44  60 25 40  49 25 40
+57 25 43  56 25 45  52 20 33  108 54 86  134 49 77  129 39 59
+71 23 37  60 25 40  196 169 185  167 135 158  50 18 33  52 25 42
+76 28 47  219 198 209  248 240 240  241 229 231  245 236 237  238 224 228
+231 213 218  202 176 194  159 124 153  93 57 98  132 30 39  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  152 32 42  132 30 39
+124 30 42  102 29 42  86 29 44  71 23 37  67 22 35  52 24 38
+52 25 42  49 25 40  52 25 42  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  49 25 40  42 17 30  42 17 30  28 8 14
+22 7 12  3 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  57 17 29  138 37 53  65 27 44  61 26 49  56 25 45
+51 24 39  49 25 40  58 24 38  88 29 45  144 40 57  135 40 58
+63 22 36  52 25 42  167 135 158  120 71 102  49 25 40  56 25 45
+67 27 47  196 169 185  248 240 240  243 232 234  245 236 237  238 224 228
+219 198 209  196 169 185  137 96 131  76 28 47  159 35 49  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  152 32 42  142 31 41
+135 30 41  121 30 42  97 28 42  71 23 37  61 21 34  50 18 33
+52 25 42  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  48 19 31  35 12 21  16 5 9  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+22 7 12  152 32 42  148 37 50  66 25 40  67 27 47  61 26 49
+52 25 42  52 25 42  54 21 35  79 27 44  154 39 54  113 41 62
+60 25 40  61 26 49  58 26 51  62 26 49  56 25 45  56 25 45
+61 21 34  167 135 158  248 240 240  238 222 225  243 232 234  238 222 225
+228 209 216  180 150 172  137 96 131  79 28 45  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  159 35 49  135 30 41
+132 30 39  124 30 42  102 29 42  71 23 37  67 22 35  48 16 29
+49 25 40  52 25 42  49 25 40  49 25 40  49 25 40  50 18 33
+42 17 30  41 14 25  22 7 12  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 1 1  57 17 29
+162 38 49  185 38 47  171 39 51  97 28 42  60 23 38  65 27 44
+60 26 44  60 26 44  57 17 29  131 32 43  159 38 55  89 29 45
+57 25 43  61 26 49  56 25 45  61 26 49  61 26 49  61 26 49
+65 27 44  171 141 162  243 232 234  243 232 234  245 236 237  233 215 221
+219 198 209  171 141 162  120 71 102  100 28 46  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  150 36 50  135 31 44
+121 30 42  121 30 42  102 29 42  86 29 44  61 21 34  56 22 36
+49 25 40  52 25 42  52 25 42  49 25 40  49 25 40  42 17 30
+48 19 31  22 7 12  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  10 4 6  97 28 42  185 38 47
+185 38 47  185 38 47  185 38 47  159 35 49  117 31 42  71 23 37
+71 23 37  71 23 37  124 30 42  185 38 47  171 39 51  97 28 42
+60 26 44  62 26 49  52 25 42  61 26 49  61 26 49  67 27 47
+73 26 48  171 141 162  243 232 234  245 236 237  243 232 234  231 213 218
+202 176 194  159 124 153  93 57 98  131 38 56  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  159 35 49  135 32 45
+124 30 42  117 31 42  102 29 42  71 23 37  61 21 34  52 24 38
+49 25 40  49 25 40  52 25 42  52 25 42  49 25 40  40 15 29
+36 11 20  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  7 2 4  102 29 42  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  133 32 44  71 23 37
+73 26 42  110 32 46  153 36 50  171 39 51  171 39 51  112 32 46
+67 27 47  61 26 49  61 26 49  61 26 49  56 25 45  57 25 43
+105 35 55  210 187 199  245 236 237  241 229 231  231 213 218  233 215 221
+210 187 199  146 108 143  76 28 47  159 35 49  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  152 32 42  138 32 45
+117 31 42  110 32 46  97 28 42  71 23 37  60 23 38  51 24 39
+51 24 39  49 25 40  52 25 42  52 25 42  49 25 40  49 25 40
+16 5 9  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  38 12 21  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  135 30 41  124 30 42  152 32 42
+171 39 51  171 39 51  153 36 50  136 31 43  171 39 51  171 39 51
+89 29 45  67 27 47  56 25 45  60 26 44  60 22 36  67 22 35
+159 124 153  243 232 234  243 232 234  233 215 221  231 213 218  226 205 215
+180 150 172  108 66 98  97 28 42  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  171 39 51  171 39 51  153 36 50  135 32 45
+123 32 47  102 29 42  97 28 42  71 23 37  60 25 40  57 25 43
+49 25 40  49 25 40  52 25 42  52 25 42  49 25 40  49 25 40
+16 5 9  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  10 4 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 2 4  110 32 46  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  148 30 39  148 30 39  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  162 38 49  185 38 47
+120 32 46  71 23 37  56 25 45  57 25 43  61 21 34  108 54 86
+219 198 209  241 229 231  233 215 221  238 222 225  226 205 215  191 162 183
+137 96 131  105 35 55  171 39 51  185 38 47  185 38 47  185 38 47
+185 38 47  159 38 55  159 38 55  162 38 49  144 40 57  136 33 46
+112 32 46  110 32 46  86 29 44  73 26 42  64 24 39  57 25 43
+56 25 45  58 26 51  58 26 51  58 26 51  49 25 40  49 25 40
+9 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  108 66 98  120 71 102  140 102 127
+93 57 98  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 4 6  153 36 50  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  135 30 41  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  159 38 55  159 38 55
+150 39 55  105 35 55  66 25 40  60 23 38  76 28 47  167 135 158
+248 240 240  253 249 249  243 232 234  233 215 221  202 176 194  148 114 145
+108 66 98  121 37 56  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  171 39 51  171 39 51  150 39 55  138 34 49
+112 32 46  102 29 42  97 28 42  71 23 37  67 23 37  60 26 44
+61 26 49  58 26 51  58 26 51  52 25 42  49 25 40  49 25 40
+9 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 0 0  93 57 98  202 176 194
+167 135 158  108 66 98  42 17 30  0 0 0  0 0 0  0 0 0
+0 0 0  7 2 4  152 32 42  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  132 30 39  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  152 40 56  128 41 59
+121 37 56  119 40 62  105 35 55  79 27 44  105 35 55  148 114 145
+180 150 172  191 162 183  148 114 145  141 92 121  108 54 86  108 54 86
+86 29 44  124 37 57  159 35 49  171 39 51  185 38 47  185 38 47
+153 36 50  159 46 62  171 39 51  171 39 51  152 32 42  135 32 45
+110 32 46  102 29 42  86 29 44  71 23 37  52 24 38  60 26 44
+58 26 51  58 26 51  58 26 51  52 25 42  49 25 40  49 25 40
+9 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  138 88 121
+146 108 143  58 26 51  58 26 51  10 4 6  0 0 0  0 0 0
+0 0 0  7 2 4  141 31 41  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  135 30 41  148 30 39  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  171 39 51  143 45 64  123 43 68
+113 41 62  105 35 55  105 35 55  136 44 65  131 38 56  105 35 55
+105 35 55  116 30 44  116 30 44  102 29 42  117 31 42  124 30 42
+132 30 39  153 36 50  159 35 49  171 39 51  171 39 51  159 35 49
+153 36 50  144 40 57  150 39 55  154 39 54  159 38 55  123 33 45
+102 29 42  97 28 42  86 29 44  73 26 42  51 24 39  58 26 51
+58 26 51  58 26 51  52 25 42  52 25 42  49 25 40  39 15 26
+4 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  58 26 51
+196 169 185  93 57 98  58 26 51  42 17 30  3 1 1  0 0 0
+0 0 0  7 2 4  97 28 42  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  116 30 44  132 30 39  185 38 47
+185 38 47  171 39 51  159 46 63  143 45 66  134 49 77  108 54 86
+108 54 86  108 54 86  76 28 47  119 34 51  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+171 39 51  154 39 54  144 40 57  140 36 52  142 33 47  120 32 46
+102 29 42  89 29 45  86 29 44  65 27 44  56 25 45  58 26 51
+58 26 51  58 26 51  52 25 42  52 25 42  52 25 42  35 12 21
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  49 25 40
+171 141 162  120 71 102  93 57 98  93 57 98  18 7 11  0 0 0
+0 0 0  0 0 0  36 11 20  171 39 51  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  159 38 55  112 32 46  110 32 46
+117 31 44  119 40 62  113 41 62  89 29 45  88 29 45  76 28 47
+76 28 47  105 35 55  76 28 47  110 32 46  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  159 35 49  159 35 49  136 33 46  110 32 46
+102 29 42  86 29 44  73 26 42  59 25 44  61 26 49  58 26 51
+58 26 51  58 26 51  58 26 51  49 25 40  49 25 40  30 10 18
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+49 25 40  22 7 12  7 2 4  93 57 98  93 57 98  0 0 0
+0 0 0  0 0 0  18 7 11  133 35 50  185 38 47  185 38 47
+185 38 47  154 39 54  141 40 57  159 46 63  166 48 64  138 34 49
+116 30 44  105 35 55  113 41 62  105 35 55  105 35 55  121 37 56
+105 35 55  113 41 62  113 41 62  135 40 58  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  154 38 52  128 33 49  101 29 42
+86 29 44  79 28 45  67 27 47  61 26 49  56 25 45  52 25 42
+58 26 51  58 26 51  52 25 42  49 25 40  49 25 40  16 5 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  49 25 40
+138 88 121  148 114 145  140 102 127  148 114 145  30 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  18 7 11  137 96 131  40 15 29
+0 0 0  0 0 0  0 0 0  58 24 38  159 46 62  185 38 47
+185 38 47  141 43 61  89 29 45  86 29 44  140 36 52  185 38 47
+171 39 51  171 39 51  159 38 55  159 38 55  166 48 64  166 48 64
+166 48 64  159 46 63  152 49 69  166 48 64  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  159 35 49  124 34 50  118 34 52  100 28 46
+86 29 44  67 27 47  67 27 47  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  52 25 42  40 15 29  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  39 15 26
+171 141 162  146 108 143  146 108 143  120 71 102  30 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  1 0 0  108 66 98  95 59 101
+0 0 0  0 0 0  0 0 0  0 0 0  35 12 21  117 31 44
+171 39 51  171 39 51  135 40 58  79 28 45  77 27 45  141 40 57
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  171 39 51
+171 39 51  159 38 55  133 35 50  121 37 56  105 35 55  105 35 55
+88 29 45  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  49 25 40  39 15 26  7 2 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 2 4
+140 102 127  76 28 47  58 26 51  93 57 98  39 15 26  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  30 10 18  137 96 131
+39 15 26  0 0 0  0 0 0  0 0 0  0 0 0  4 0 1
+79 27 44  144 45 67  159 46 63  134 49 77  88 29 45  89 29 45
+152 40 56  171 39 51  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  166 48 64  159 46 63  159 38 55  159 38 55  150 39 55
+140 40 59  119 40 62  113 41 62  105 35 55  105 35 55  88 29 45
+67 27 47  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+58 26 51  52 25 42  40 15 29  7 2 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+140 102 127  93 57 98  76 28 47  58 26 51  93 57 98  30 10 18
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  148 114 145
+58 26 51  3 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  28 8 14  61 21 34  105 35 55  118 50 79  88 29 45
+102 29 42  152 40 56  166 48 64  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  166 48 64  159 46 63
+141 43 61  125 41 61  105 35 55  105 35 55  105 35 55  105 35 55
+105 35 55  122 42 63  113 41 62  105 35 55  88 29 45  73 26 48
+67 27 47  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+52 25 42  35 12 21  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+140 102 127  49 25 40  35 12 21  58 26 51  93 57 98  58 26 51
+49 25 40  30 10 18  16 5 9  0 0 0  52 25 42  138 88 121
+40 15 29  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  16 5 9  51 24 39  105 35 55
+76 28 47  105 35 55  143 45 64  166 48 64  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  166 48 64  159 46 63  134 49 77  118 50 79
+113 41 62  66 24 37  61 21 34  79 27 44  76 28 47  76 28 47
+88 29 45  105 35 55  105 35 55  76 28 47  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+40 15 29  1 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  42 17 30  93 57 98
+58 26 51  58 26 51  58 26 51  93 57 98  108 66 98  58 26 51
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  16 5 9
+42 17 30  77 27 45  105 35 55  128 41 59  166 48 64  166 48 64
+166 48 64  185 38 47  171 39 51  171 39 51  166 48 64  159 46 62
+159 38 55  159 46 62  139 44 66  118 50 79  108 54 86  76 28 47
+73 26 42  73 26 42  89 29 45  113 41 62  118 50 79  123 42 67
+122 42 63  113 41 62  105 35 55  73 26 48  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  39 15 26
+3 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  10 4 6  140 102 127  159 124 153  120 71 102
+140 102 127  140 102 127  131 80 105  131 80 105  108 66 98  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  39 15 26
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  42 17 30
+1 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  18 7 11  47 16 28  79 27 44  119 40 62  145 47 69
+152 49 69  152 49 69  152 49 69  145 47 69  145 47 69  136 44 65
+125 41 61  113 41 62  105 35 55  76 28 47  76 28 47  97 28 42
+117 31 44  128 41 59  128 42 60  128 42 60  124 42 65  113 41 62
+105 35 55  105 35 55  105 35 55  88 29 45  76 28 47  58 26 51
+58 26 51  58 26 51  58 26 51  49 25 40  30 10 18  4 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  39 15 26  202 176 194  226 205 215
+202 176 194  191 162 183  159 124 153  131 80 105  35 12 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+52 25 42  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+22 7 12  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  9 3 6  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  18 6 10  73 26 42  105 35 55
+105 35 55  113 41 62  113 41 62  113 41 62  113 41 62  105 35 55
+105 35 55  76 28 47  79 27 44  101 29 42  129 33 45  150 39 55
+144 40 57  136 33 46  125 35 52  113 41 62  105 35 55  105 35 55
+105 35 55  105 35 55  88 29 45  76 28 47  58 26 51  58 26 51
+58 26 51  52 25 42  49 25 40  22 7 12  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  18 7 11  159 124 153
+146 108 143  146 108 143  148 114 145  108 66 98  10 4 6  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 2 4
+93 57 98  93 63 110  58 26 51  58 26 51  58 26 51  58 26 51
+56 25 45  18 7 11  7 2 4  3 1 1  39 15 26  124 30 42
+152 32 42  159 38 55  171 39 51  97 28 42  7 1 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  4 0 1  57 17 29
+121 37 56  105 35 55  100 28 46  97 28 42  86 29 44  86 29 44
+97 28 42  117 31 42  141 33 45  153 36 50  153 36 50  140 36 52
+128 33 49  105 35 55  105 35 55  105 35 55  89 29 45  105 35 55
+105 35 55  76 28 47  67 27 47  58 26 51  58 26 51  58 26 51
+52 25 42  49 25 40  16 5 9  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 2 4
+93 57 98  93 63 110  93 57 98  108 66 98  131 80 105  42 17 30
+49 25 40  76 28 47  67 27 47  42 17 30  76 28 47  140 102 127
+171 141 162  93 63 110  58 26 51  42 17 30  58 26 51  58 26 51
+58 26 51  58 26 51  76 28 47  65 27 44  152 32 42  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  112 32 46  39 15 26
+3 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+20 6 10  69 20 33  139 37 53  159 38 55  159 35 49  162 38 49
+159 38 55  162 38 49  150 36 50  142 32 45  124 34 50  105 35 55
+105 35 55  88 29 45  105 35 55  105 35 55  89 29 45  89 29 45
+105 35 55  88 29 45  76 28 47  58 26 51  58 26 51  52 25 42
+52 25 42  39 15 26  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 4 6  58 26 51  58 26 51  58 26 51  93 57 98
+108 66 98  131 80 105  137 96 131  159 124 153  167 135 158  137 96 131
+58 26 51  22 7 12  7 2 4  0 0 0  9 3 6  35 12 21
+49 25 40  58 26 51  58 26 51  112 32 46  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  153 36 50  105 35 55  58 26 51
+18 7 11  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  3 1 1  42 17 30  97 28 42  119 34 51  128 33 49
+131 38 56  122 40 63  105 35 55  105 35 55  88 29 45  76 28 47
+76 28 47  73 26 42  105 35 55  105 35 55  105 35 55  105 35 55
+105 35 55  105 35 55  89 29 45  76 28 47  58 26 51  52 25 42
+49 25 40  18 7 11  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  22 7 12  18 7 11  22 7 12
+30 10 18  30 10 18  40 15 29  42 17 30  30 10 18  9 3 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+28 8 14  58 26 51  58 26 51  118 34 52  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  118 34 52  58 26 51  58 26 51
+49 25 40  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  57 17 29  79 27 44  71 23 37
+73 26 42  71 23 37  79 27 44  67 23 37  68 23 35  79 27 44
+86 29 44  105 35 55  124 35 54  128 33 49  135 32 45  138 32 45
+128 33 49  119 34 51  100 28 46  86 29 44  58 26 51  58 26 51
+48 19 31  16 5 9  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  39 15 26  58 26 51  76 28 47  152 32 42  185 38 47
+185 38 47  185 38 47  135 40 58  76 28 47  58 26 51  58 26 51
+30 10 18  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  57 17 29  139 44 68  139 38 56
+135 30 41  117 31 42  116 30 44  123 32 47  125 35 52  135 40 58
+135 40 58  140 36 52  152 32 42  162 38 49  162 38 49  185 38 47
+162 38 49  152 32 42  128 33 49  100 28 46  62 26 49  58 26 51
+52 25 42  22 7 12  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  30 10 18  76 28 47  119 40 62  102 29 42  142 34 47
+150 36 50  128 41 59  105 35 55  76 28 47  58 26 51  48 19 31
+1 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  7 2 4  118 34 52  159 46 63  171 39 51
+185 38 47  171 39 51  171 39 51  171 39 51  171 39 51  171 39 51
+171 39 51  162 38 49  171 39 51  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  118 34 52  76 28 47  56 25 45
+58 26 51  30 10 18  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  31 10 18  154 39 54  185 38 47  150 36 50  123 33 45
+112 32 46  105 35 55  105 35 55  76 28 47  61 26 49  67 27 47
+1 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 0 0  61 21 34  166 48 64  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  138 32 45  76 28 47  56 25 45
+49 25 40  42 17 30  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  117 31 42  185 38 47  185 38 47  185 38 47  185 38 47
+162 38 49  125 41 61  105 35 55  67 27 47  61 21 34  61 26 49
+39 15 26  22 7 12  18 7 11  7 2 4  3 1 1  3 1 1
+30 10 18  66 25 40  139 38 56  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  100 28 46  61 26 49
+49 25 40  49 25 40  20 6 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+28 8 14  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  139 44 68  108 54 86  73 26 48  57 25 43  58 26 51
+58 26 51  58 26 51  58 26 51  56 25 45  52 25 42  56 25 45
+73 26 48  113 41 62  152 49 69  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  162 38 49  162 38 49  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  132 30 39  76 28 47
+58 26 51  49 25 40  35 12 21  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+41 13 21  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+166 48 64  134 49 77  108 54 86  76 28 47  67 27 47  73 26 48
+58 26 51  58 26 51  58 26 51  58 26 51  61 26 49  61 26 49
+108 54 86  105 35 55  105 35 55  119 34 51  133 32 44  148 30 39
+152 32 42  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  159 35 49  159 35 49  159 35 49  159 35 49  171 39 51
+185 38 47  185 38 47  185 38 47  171 39 51  125 31 44  79 27 44
+58 26 51  49 25 40  40 15 29  10 4 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+36 11 20  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+152 40 56  118 50 79  108 54 86  76 28 47  66 25 40  76 28 47
+58 26 51  58 26 51  58 26 51  58 26 51  61 26 49  76 28 47
+113 41 62  133 35 50  150 39 55  162 38 49  162 38 49  148 30 39
+132 30 39  132 30 39  132 30 39  154 39 54  159 46 62  154 39 54
+146 36 50  138 34 49  138 37 53  135 40 58  141 40 57  150 36 50
+159 35 49  171 39 51  185 38 47  150 36 50  105 35 55  79 28 45
+58 26 51  52 25 42  49 25 40  30 10 18  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+7 2 4  117 31 44  171 39 51  185 38 47  166 48 64  145 47 69
+118 50 79  108 54 86  105 35 55  73 26 42  76 28 47  105 35 55
+76 28 47  58 26 51  58 26 51  58 26 51  52 24 38  64 24 39
+121 37 56  171 39 51  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  152 32 42  121 30 42  102 29 42  124 34 50
+124 35 54  113 41 62  113 41 62  113 41 62  113 41 62  118 34 52
+133 35 50  159 35 49  159 35 49  133 35 50  88 29 45  79 28 45
+58 26 51  52 25 42  49 25 40  42 17 30  10 4 6  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  16 5 9  105 35 55  113 41 62  100 28 46  89 29 45
+76 28 47  76 28 47  73 26 42  76 28 47  105 35 55  105 35 55
+58 26 51  58 26 51  58 26 51  58 26 51  56 25 45  60 22 36
+101 29 42  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  124 30 42  86 29 44
+105 35 55  105 35 55  105 35 55  76 28 47  76 28 47  105 35 55
+100 28 46  116 30 44  152 32 42  138 34 49  79 28 45  62 26 49
+52 25 42  52 25 42  49 25 40  49 25 40  28 8 14  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  7 2 4  60 26 44  110 32 46  102 29 42
+97 28 42  118 34 52  121 37 56  113 41 62  113 41 62  73 26 48
+58 26 51  58 26 51  58 26 51  61 26 49  62 26 49  52 25 42
+71 23 37  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  171 39 51  133 35 50
+97 28 42  86 29 44  76 28 47  73 26 48  76 28 47  88 29 45
+69 20 33  79 27 44  139 38 56  125 35 52  76 28 47  58 26 51
+58 26 51  52 25 42  49 25 40  49 25 40  30 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 0 0  57 17 29  152 32 42
+185 38 47  166 48 64  123 43 68  108 54 86  105 35 55  58 26 51
+58 26 51  58 24 38  58 26 51  58 26 51  56 25 45  54 19 32
+105 35 55  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+159 35 49  117 31 42  86 29 44  71 23 37  71 23 37  67 23 37
+71 23 37  116 30 44  138 37 53  105 35 55  76 28 47  58 26 51
+58 26 51  52 25 42  49 25 40  49 25 40  35 12 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  28 8 14
+102 29 42  110 32 46  105 35 55  76 28 47  65 27 44  61 26 49
+49 25 40  56 25 45  58 26 51  58 26 51  58 20 33  71 23 37
+159 38 55  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  150 36 50  128 33 49  118 34 52  89 29 45
+89 29 45  105 35 55  89 29 45  73 26 48  61 26 49  58 26 51
+52 25 42  52 25 42  49 25 40  49 25 40  39 15 26  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  10 4 6  35 12 21  58 26 51
+58 26 51  58 26 51  58 26 51  49 25 40  69 20 33  171 39 51
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  171 39 51
+171 39 51  151 38 54  133 35 50  121 37 56  105 35 55  105 35 55
+76 28 47  62 26 49  73 26 48  58 26 51  58 26 51  58 26 51
+52 25 42  52 25 42  49 25 40  49 25 40  40 15 29  9 3 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  10 4 6
+51 24 39  56 25 45  66 25 40  110 32 46  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  171 39 51  139 37 53
+125 35 52  118 34 52  105 35 55  105 35 55  88 29 45  76 28 47
+62 26 49  61 26 49  62 26 49  58 26 51  56 25 45  49 25 40
+52 25 42  52 25 42  49 25 40  42 17 30  42 17 30  22 7 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+10 4 6  71 23 37  119 34 51  159 38 55  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  152 32 42  138 34 46  118 34 52
+105 35 55  89 29 45  76 28 47  76 28 47  73 26 48  73 26 48
+62 26 49  58 26 51  61 26 49  52 25 42  52 25 42  58 26 51
+58 26 51  49 25 40  49 25 40  48 19 31  50 18 33  28 8 14
+1 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  48 16 29  125 41 61  128 42 60  145 39 54  171 39 51
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  159 38 55  141 40 57  124 34 50  100 28 46  89 29 45
+86 29 44  76 28 47  73 26 48  67 27 47  58 26 51  62 26 49
+58 26 51  56 25 45  56 25 45  58 26 51  58 26 51  58 26 51
+52 25 42  49 25 40  49 25 40  42 17 30  35 12 21  16 5 9
+1 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  22 7 12  89 29 45  113 41 62  118 50 79  122 42 63
+132 43 63  144 40 57  159 38 55  153 36 50  171 39 51  150 39 55
+135 40 58  128 41 59  113 41 62  105 35 55  105 35 55  76 28 47
+73 26 48  73 26 48  73 26 48  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  52 25 42  49 25 40
+49 25 40  49 25 40  49 25 40  50 18 33  20 6 10  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 2 4  56 22 36  76 28 47  105 35 55  113 41 62
+118 50 79  113 41 62  123 43 68  113 41 62  122 40 63  119 40 62
+113 41 62  105 35 55  105 35 55  89 29 45  76 28 47  73 26 48
+73 26 48  73 26 48  58 26 51  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  52 25 42  52 25 42  49 25 40
+49 25 40  48 19 31  42 17 30  42 17 30  22 7 12  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  31 10 18  69 20 33  86 29 44  89 29 45
+105 35 55  105 35 55  118 50 79  113 41 62  113 41 62  105 35 55
+105 35 55  105 35 55  105 35 55  76 28 47  73 26 48  67 27 47
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  52 25 42  49 25 40  49 25 40  49 25 40
+49 25 40  50 18 33  40 15 29  48 16 29  16 5 9  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  86 29 44  97 28 42  105 35 55  88 29 45
+88 29 45  105 35 55  113 41 62  113 41 62  108 54 86  108 54 86
+105 35 55  105 35 55  76 28 47  76 28 47  73 26 48  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  52 25 42
+49 25 40  49 25 40  49 25 40  49 25 40  49 25 40  50 18 33
+42 17 30  39 15 26  38 12 21  28 8 14  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  28 8 14  185 38 47  162 38 49  86 29 44  79 28 45
+76 28 47  113 41 62  118 50 79  108 54 86  108 54 86  108 54 86
+105 35 55  76 28 47  76 28 47  62 26 49  62 26 49  58 26 51
+58 26 51  56 25 45  52 25 42  52 25 42  49 25 40  49 25 40
+49 25 40  49 25 40  49 25 40  48 19 31  42 17 30  39 15 26
+39 15 26  38 12 21  42 17 30  28 8 14  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  57 17 29  185 38 47  185 38 47  171 39 51  119 34 51
+89 29 45  105 35 55  108 54 86  108 54 86  108 54 86  108 54 86
+73 26 48  73 26 48  62 26 49  73 26 48  58 26 51  58 26 51
+58 26 51  58 26 51  52 25 42  49 25 40  49 25 40  49 25 40
+49 25 40  48 19 31  42 17 30  39 15 26  39 15 26  42 17 30
+42 17 30  49 25 40  52 25 42  35 12 21  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  69 20 33  185 38 47  185 38 47  185 38 47  185 38 47
+154 39 54  119 40 62  105 35 55  105 35 55  105 35 55  105 35 55
+76 28 47  73 26 48  62 26 49  58 26 51  58 26 51  56 25 45
+61 26 49  49 25 40  42 17 30  50 18 33  48 19 31  42 17 30
+42 17 30  42 17 30  42 17 30  49 25 40  49 25 40  58 26 51
+58 26 51  49 25 40  48 19 31  42 17 30  7 1 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  69 20 33  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  166 48 64  135 40 58  105 35 55  88 29 45  88 29 45
+76 28 47  62 26 49  60 26 44  52 24 38  49 25 40  49 25 40
+57 25 43  58 24 38  52 24 38  49 25 40  49 25 40  50 18 33
+49 25 40  50 18 33  48 19 31  52 25 42  58 26 51  58 26 51
+58 26 51  52 25 42  48 19 31  42 17 30  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  41 13 21  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  152 40 56  119 34 51  105 35 55
+88 29 45  76 28 47  65 27 44  64 25 43  65 27 44  73 26 48
+76 28 47  76 28 47  76 28 47  60 26 44  48 19 31  42 17 30
+48 19 31  49 25 40  49 25 40  42 17 30  52 25 42  52 25 42
+52 25 42  52 25 42  49 25 40  48 19 31  10 4 6  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  16 5 9  152 32 42  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  166 48 64  134 49 77
+119 40 62  113 41 62  105 35 55  105 35 55  105 35 55  105 35 55
+105 35 55  88 29 45  105 35 55  76 28 47  73 26 48  58 24 38
+42 17 30  42 17 30  42 17 30  42 17 30  42 17 30  42 17 30
+52 25 42  56 25 45  49 25 40  49 25 40  30 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  9 3 6  102 29 42  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  171 39 51
+152 49 69  152 40 56  141 42 59  131 42 64  126 41 60  105 35 55
+105 35 55  113 41 62  105 35 55  88 29 45  105 35 55  105 35 55
+61 26 49  48 19 31  49 25 40  42 17 30  42 17 30  42 17 30
+48 19 31  49 25 40  49 25 40  49 25 40  40 15 29  3 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  16 5 9  159 35 49  185 38 47  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+166 48 64  166 48 64  159 46 63  144 40 57  136 44 65  125 41 61
+113 41 62  113 41 62  105 35 55  105 35 55  105 35 55  105 35 55
+76 28 47  61 26 49  48 19 31  48 19 31  48 19 31  42 17 30
+49 25 40  49 25 40  50 18 33  49 25 40  42 17 30  22 7 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  36 11 20  171 39 51  185 38 47
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  185 38 47  171 39 51  154 39 54  144 40 57  128 42 60
+113 41 62  105 35 55  105 35 55  105 35 55  105 35 55  76 28 47
+73 26 48  58 26 51  58 26 51  49 25 40  42 17 30  42 17 30
+42 17 30  49 25 40  42 17 30  48 19 31  49 25 40  42 17 30
+7 2 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  28 8 14  152 32 42
+185 38 47  185 38 47  185 38 47  185 38 47  185 38 47  185 38 47
+185 38 47  171 39 51  159 46 62  144 40 57  144 40 57  129 39 59
+113 41 62  105 35 55  105 35 55  105 35 55  76 28 47  73 26 48
+58 26 51  58 26 51  58 26 51  58 26 51  52 25 42  42 17 30
+42 17 30  42 17 30  48 19 31  49 25 40  49 25 40  49 25 40
+35 12 21  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  36 11 20
+144 40 57  152 40 56  152 40 56  159 38 55  159 38 55  166 48 64
+166 48 64  159 46 63  145 47 69  130 43 63  119 40 62  105 35 55
+105 35 55  105 35 55  105 35 55  93 57 98  76 28 47  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  52 25 42  49 25 40
+49 25 40  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+48 19 31  16 5 9  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 2 4
+86 29 44  118 50 79  118 50 79  118 50 79  129 39 59  136 44 65
+126 41 60  123 42 67  118 50 79  108 54 86  108 54 86  105 35 55
+105 35 55  105 35 55  76 28 47  73 26 48  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  49 25 40  49 25 40  52 25 42
+49 25 40  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+48 19 31  36 11 20  9 3 6  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  30 10 18
+108 54 86  76 28 47  105 35 55  113 41 62  118 50 79  113 41 62
+113 41 62  108 54 86  105 35 55  105 35 55  105 35 55  105 35 55
+76 28 47  76 28 47  73 26 48  58 26 51  58 26 51  58 26 51
+58 26 51  58 26 51  52 25 42  52 25 42  52 25 42  49 25 40
+52 25 42  49 25 40  49 25 40  49 25 40  49 25 40  49 25 40
+42 17 30  50 18 33  35 12 21  4 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  39 15 26
+105 35 55  105 35 55  113 41 62  105 35 55  76 28 47  76 28 47
+76 28 47  88 29 45  76 28 47  73 26 48  67 27 47  65 27 44
+62 26 49  62 26 49  67 27 47  60 26 44  52 25 42  52 25 42
+49 25 40  56 25 45  52 25 42  50 18 33  49 25 40  49 25 40
+52 25 42  56 25 45  52 25 42  49 25 40  49 25 40  49 25 40
+49 25 40  49 25 40  42 17 30  28 8 14  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  41 13 21
+129 39 59  152 40 56  159 46 63  133 43 64  105 35 55  66 25 40
+67 27 47  61 25 44  65 27 44  76 28 47  79 28 45  86 29 44
+89 29 45  88 29 45  67 27 47  49 25 40  52 24 38  52 24 38
+51 24 39  49 25 40  49 25 40  52 25 42  56 25 45  58 26 51
+62 26 49  56 25 45  42 17 30  35 12 21  42 17 30  49 25 40
+49 25 40  42 17 30  48 19 31  42 17 30  28 8 14  4 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  7 2 4  67 23 37
+134 49 77  152 49 69  166 48 64  166 48 64  145 39 54  102 29 42
+100 28 46  77 27 45  60 26 44  73 26 48  113 41 62  138 37 53
+133 35 50  131 38 56  105 35 55  76 28 47  66 25 40  76 28 47
+88 29 45  105 35 55  76 28 47  73 26 48  73 26 48  73 26 48
+73 26 48  48 19 31  10 4 6  0 0 0  18 7 11  49 25 40
+52 25 42  49 25 40  49 25 40  49 25 40  49 25 40  35 12 21
+10 4 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  42 17 30  76 28 47
+141 92 121  167 135 158  167 135 158  141 92 121  141 92 121  131 80 105
+116 30 44  105 35 55  67 23 37  76 28 47  118 50 79  152 49 69
+159 46 62  148 37 53  124 35 54  110 32 46  79 27 44  105 35 55
+136 44 65  134 49 77  118 50 79  108 54 86  76 28 47  58 26 51
+76 28 47  30 10 18  0 0 0  0 0 0  0 0 0  28 8 14
+52 25 42  52 25 42  52 25 42  52 25 42  52 25 42  49 25 40
+49 25 40  31 10 18  10 4 6  1 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  1 0 0  18 7 11  108 54 86  89 29 45
+133 43 64  142 34 47  137 33 45  128 41 59  134 49 77  141 92 121
+143 45 64  121 37 56  73 26 42  118 50 79  131 80 105  141 92 121
+141 92 121  141 92 121  152 49 69  152 49 69  110 32 46  120 32 46
+138 37 53  139 44 68  119 40 62  88 29 45  93 57 98  93 57 98
+58 26 51  49 25 40  0 0 0  0 0 0  0 0 0  0 0 0
+22 7 12  50 18 33  49 25 40  58 26 51  58 26 51  58 26 51
+52 25 42  49 25 40  49 25 40  40 15 29  31 10 18  28 8 14
+18 7 11  1 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  49 25 40
+93 57 98  49 25 40  73 26 42  79 28 45  118 50 79  146 108 143
+167 135 158  159 124 153  141 92 121  166 48 64  152 49 69  135 40 58
+124 37 57  121 37 56  86 29 44  105 35 55  139 44 66  124 35 54
+110 32 46  116 30 44  123 34 49  128 42 60  121 37 56  117 31 44
+133 35 50  139 38 56  105 35 55  93 57 98  137 96 131  146 108 143
+93 57 98  56 25 45  16 5 9  0 0 0  0 0 0  0 0 0
+1 0 0  30 10 18  41 14 25  39 15 26  49 25 40  52 25 42
+52 25 42  58 26 51  58 26 51  52 25 42  52 25 42  42 17 30
+50 18 33  35 12 21  18 7 11  7 2 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  10 4 6
+18 7 11  18 7 11  10 4 6  10 4 6  30 10 18  39 15 26
+30 10 18  22 7 12  30 10 18  60 26 44  76 28 47  105 35 55
+134 49 77  141 92 121  141 92 121  141 92 121  166 48 64  131 80 105
+145 47 69  145 47 69  152 49 69  141 92 121  141 92 121  139 44 68
+105 35 55  86 29 44  86 29 44  131 80 105  141 92 121  148 114 145
+148 114 145  141 92 121  166 48 64  152 49 69  105 35 55  112 32 46
+133 35 50  128 41 59  105 35 55  88 61 125  146 108 143  146 108 143
+88 61 125  58 26 51  49 25 40  22 7 12  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  10 4 6  31 10 18  42 17 30
+48 19 31  52 25 42  58 26 51  58 26 51  52 25 42  52 25 42
+58 26 51  56 25 45  48 19 31  42 17 30  30 10 18  22 7 12
+16 5 9  7 1 3  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 0 0
+10 4 6  35 12 21  58 26 51  108 66 98  140 102 127  171 141 162
+180 150 172  171 141 162  131 80 105  76 28 47  76 28 47  105 35 55
+113 41 62  125 35 52  166 48 64  141 92 121  141 92 121  166 48 64
+166 48 64  166 48 64  152 49 69  136 44 65  152 49 69  141 92 121
+141 92 121  152 49 69  123 34 49  113 41 62  119 40 62  89 29 45
+118 50 79  120 71 102  118 50 79  123 43 68  121 37 56  105 35 55
+113 41 62  123 43 68  134 49 77  152 49 69  128 42 60  118 34 52
+129 39 59  130 43 63  105 35 55  93 63 110  146 108 143  146 108 143
+93 57 98  58 26 51  58 26 51  48 19 31  1 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 0 0  7 2 4
+22 7 12  30 10 18  35 12 21  42 17 30  52 25 42  58 26 51
+58 26 51  58 26 51  56 25 45  52 25 42  52 25 42  49 25 40
+48 19 31  42 17 30  31 10 18  20 6 10  7 2 4  7 1 3
+7 2 4  1 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 4 6  35 12 21  49 25 40  120 71 102  167 135 158
+191 162 183  210 187 199  234 219 224  243 232 234  241 229 231  226 205 215
+233 215 221  233 215 221  219 198 209  159 124 153  108 66 98  105 35 55
+123 43 68  152 49 69  166 48 64  162 38 49  142 34 47  123 33 48
+119 34 51  126 41 60  136 44 65  116 30 44  102 29 42  110 32 46
+123 43 68  134 49 77  131 80 105  134 49 77  134 49 77  118 50 79
+118 50 79  134 49 77  134 49 77  134 49 77  134 49 77  118 34 52
+97 28 42  69 20 33  79 27 44  97 28 42  112 32 46  128 41 59
+132 43 63  123 43 68  118 50 79  76 28 47  93 57 98  93 57 98
+62 26 49  58 26 51  58 26 51  49 25 40  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  3 1 1  1 0 0  9 3 6  39 15 26  31 10 18
+30 10 18  48 19 31  60 25 40  52 25 42  52 25 42  52 25 42
+52 25 42  52 25 42  58 24 38  52 25 42  54 21 35  50 18 33
+40 15 29  31 10 18  22 7 12  10 4 6  10 4 6  7 2 4
+3 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+49 25 40  140 102 127  171 141 162  210 187 199  238 224 228  248 240 240
+243 232 234  245 236 237  245 236 237  243 232 234  234 219 224  231 213 218
+238 224 228  228 209 216  202 176 194  167 135 158  148 114 145  137 96 131
+108 54 86  113 41 62  121 37 56  130 43 63  134 49 77  138 88 121
+140 102 127  159 124 153  180 150 172  148 114 145  131 80 105  113 41 62
+105 35 55  113 41 62  134 49 77  130 43 63  145 47 69  141 92 121
+131 80 105  143 45 66  110 32 46  97 28 42  102 29 42  102 29 42
+110 32 46  119 34 51  125 35 52  128 41 59  126 41 60  128 42 60
+132 43 63  123 43 68  118 50 79  113 41 62  76 28 47  58 26 51
+58 26 51  58 26 51  58 26 51  73 26 48  30 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  7 2 4  16 5 9  22 7 12  35 12 21  42 17 30
+50 18 33  48 19 31  54 19 32  60 22 36  63 22 36  63 22 36
+58 24 38  52 24 38  58 20 33  48 16 29  47 16 28  36 11 20
+31 10 18  22 7 12  7 1 3  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  10 4 6  120 71 102
+191 162 183  210 187 199  226 205 215  238 224 228  233 215 221  234 219 224
+238 224 228  241 229 231  243 232 234  238 224 228  234 219 224  228 209 216
+233 215 221  219 198 209  191 162 183  167 135 158  148 114 145  108 66 98
+108 54 86  131 80 105  171 141 162  202 176 194  228 209 216  238 224 228
+241 229 231  245 236 237  248 240 240  248 240 240  238 224 228  219 198 209
+191 162 183  131 80 105  105 35 55  105 35 55  128 41 59  140 40 59
+144 40 57  152 40 56  139 38 56  141 40 57  141 43 61  141 42 59
+141 43 61  131 38 56  135 40 58  135 40 58  128 41 59  131 42 64
+134 49 77  118 50 79  108 54 86  108 54 86  93 57 98  93 57 98
+73 26 48  58 26 51  58 26 51  58 26 51  58 26 51  18 7 11
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  3 1 1
+7 1 3  7 1 3  10 4 6  16 5 9  20 6 10  41 14 25
+57 17 29  57 17 29  57 17 29  69 20 33  60 21 35  54 19 32
+57 17 29  48 19 31  41 14 25  28 8 14  16 5 9  3 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  62 26 49  167 135 158
+226 205 215  202 176 194  180 150 172  191 162 183  202 176 194  226 205 215
+231 213 218  231 213 218  231 213 218  219 198 209  210 187 199  202 176 194
+202 176 194  202 176 194  171 141 162  137 96 131  131 80 105  140 102 127
+210 187 199  238 222 225  243 232 234  248 240 240  248 240 240  248 240 240
+245 236 237  243 232 234  238 224 228  241 229 231  238 222 225  238 222 225
+233 215 221  210 187 199  171 141 162  131 80 105  105 35 55  131 42 64
+145 47 69  152 40 56  159 46 62  144 40 57  141 43 61  143 45 64
+140 40 59  135 40 58  130 43 63  131 42 64  131 42 64  123 43 68
+118 50 79  108 54 86  108 54 86  93 57 98  76 28 47  76 28 47
+76 28 47  58 26 51  58 26 51  58 26 51  58 26 51  30 10 18
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  3 1 1
+7 1 3  20 6 10  31 10 18  41 13 21  57 17 29  57 17 29
+69 20 33  69 20 33  79 27 44  71 23 37  71 23 37  57 17 29
+36 11 20  38 12 21  16 5 9  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 1 1  140 102 127
+210 187 199  226 205 215  210 187 199  210 187 199  210 187 199  210 187 199
+210 187 199  210 187 199  202 176 194  202 176 194  180 150 172  191 162 183
+180 150 172  171 141 162  146 108 143  95 59 101  196 169 185  245 236 237
+238 224 228  234 219 224  238 224 228  238 224 228  243 232 234  243 232 234
+245 236 237  245 236 237  238 224 228  231 213 218  238 224 228  226 205 215
+219 198 209  202 176 194  196 169 185  167 135 158  120 71 102  105 35 55
+134 49 77  152 49 69  159 46 62  159 46 63  152 40 56  152 40 56
+139 44 66  134 49 77  134 49 77  134 49 77  118 50 79  118 50 79
+108 54 86  93 57 98  73 26 48  93 57 98  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  93 57 98  49 25 40
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  3 1 1  10 4 6  7 2 4
+10 4 6  28 8 14  57 17 29  97 28 42  102 29 42  97 28 42
+86 29 44  69 20 33  69 20 33  41 13 21  20 6 10  16 5 9
+9 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  49 25 40
+196 169 185  228 209 216  226 205 215  233 215 221  234 219 224  238 224 228
+238 224 228  234 219 224  233 215 221  231 213 218  202 176 194  180 150 172
+171 141 162  146 108 143  137 96 131  196 169 185  238 224 228  243 232 234
+241 229 231  234 219 224  233 215 221  234 219 224  241 229 231  234 219 224
+238 224 228  234 219 224  228 209 216  219 198 209  191 162 183  191 162 183
+202 176 194  191 162 183  159 124 153  148 114 145  137 96 131  93 57 98
+113 41 62  134 49 77  145 47 69  145 47 69  134 49 77  134 49 77
+134 49 77  118 50 79  108 54 86  108 54 86  108 54 86  76 28 47
+73 26 48  73 26 48  73 26 48  58 26 51  58 26 51  58 26 51
+58 26 51  93 57 98  93 57 98  93 57 98  88 61 125  58 26 51
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 1  36 11 20  69 20 33
+102 29 42  117 31 42  102 29 42  71 23 37  71 23 37  61 21 34
+41 13 21  9 3 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  3 1 1
+67 27 47  167 135 158  210 187 199  219 198 209  226 205 215  234 219 224
+226 205 215  233 215 221  234 219 224  226 205 215  219 198 209  180 150 172
+167 135 158  137 96 131  137 96 131  226 205 215  210 187 199  202 176 194
+219 198 209  226 205 215  226 205 215  219 198 209  228 209 216  226 205 215
+210 187 199  202 176 194  202 176 194  191 162 183  171 141 162  167 135 158
+159 124 153  159 124 153  137 96 131  137 96 131  88 61 125  93 63 110
+76 28 47  118 50 79  118 50 79  118 50 79  108 54 86  108 54 86
+108 54 86  108 54 86  76 28 47  73 26 48  76 28 47  73 26 48
+58 26 51  58 26 51  58 26 51  58 26 51  93 57 98  88 61 125
+88 61 125  88 61 125  88 61 125  93 57 98  58 26 51  49 25 40
+3 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  41 13 21  102 29 42  132 30 39  97 28 42  69 20 33
+71 23 37  47 16 28  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  18 7 11  93 57 98  108 54 86  140 102 127  167 135 158
+171 141 162  167 135 158  159 124 153  159 124 153  159 124 153  146 108 143
+137 96 131  93 63 110  93 57 98  219 198 209  231 213 218  219 198 209
+202 176 194  191 162 183  171 141 162  167 135 158  171 141 162  167 135 158
+159 124 153  159 124 153  148 114 145  146 108 143  146 108 143  146 108 143
+137 96 131  137 96 131  88 61 125  88 61 125  88 61 125  93 57 98
+73 26 48  105 35 55  105 35 55  76 28 47  73 26 48  73 26 48
+76 28 47  58 26 51  58 26 51  93 57 98  93 57 98  93 57 98
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  93 57 98  93 57 98  40 15 29  16 5 9  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 0 1  69 20 33  132 30 39  117 31 42
+86 29 44  69 20 33  9 3 6  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  10 4 6
+58 26 51  140 102 127  140 102 127  219 198 209  238 224 228  234 219 224
+226 205 215  226 205 215  219 198 209  202 176 194  196 169 185  171 141 162
+159 124 153  146 108 143  138 88 121  108 66 98  93 63 110  93 63 110
+93 63 110  93 63 110  88 61 125  88 61 125  88 61 125  88 61 125
+93 57 98  93 57 98  93 57 98  93 57 98  93 57 98  93 57 98
+93 57 98  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+58 26 51  40 15 29  7 2 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  102 29 42  102 29 42
+71 23 37  79 27 44  38 12 21  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 1 1  167 135 158  202 176 194  210 187 199
+228 209 216  228 209 216  228 209 216  228 209 216  226 205 215  219 198 209
+210 187 199  191 162 183  180 150 172  148 114 145  137 96 131  137 96 131
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  58 26 51  40 15 29  16 5 9  3 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  57 17 29  97 28 42
+97 28 42  86 29 44  41 14 25  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  7 2 4  52 25 42  131 80 105
+148 114 145  196 169 185  219 198 209  226 205 215  233 215 221  219 198 209
+202 176 194  202 176 194  191 162 183  171 141 162  148 114 145  146 108 143
+137 96 131  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  88 61 125  88 61 125  88 61 125  88 61 125
+88 61 125  88 61 125  93 57 98  58 26 51  58 26 51  49 25 40
+18 7 11  9 3 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  16 5 9  69 20 33  28 8 14  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  7 1 3  97 28 42  97 28 42
+71 23 37  61 21 34  36 11 20  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  22 7 12  52 25 42  67 27 47  93 57 98  93 57 98
+93 57 98  93 57 98  58 26 51  93 57 98  58 26 51  58 26 51
+58 26 51  58 26 51  58 26 51  58 26 51  58 26 51  58 26 51
+49 25 40  40 15 29  40 15 29  16 5 9  9 3 6  3 1 1
+1 0 0  4 0 1  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  41 13 21  148 30 39  185 38 47  152 32 42  28 8 14
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  20 6 10  86 29 44  97 28 42  69 20 33
+47 16 28  40 15 29  30 10 18  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  28 8 14
+97 28 42  185 38 47  185 38 47  159 35 49  152 32 42  102 29 42
+28 8 14  7 2 4  0 0 0  1 0 0  7 1 3  28 8 14
+31 10 18  57 17 29  97 28 42  69 20 33  54 19 32  48 19 31
+28 8 14  9 3 6  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  16 5 9  69 20 33  121 30 42  152 32 42
+185 38 47  152 32 42  152 32 42  152 32 42  141 31 41  132 30 39
+97 28 42  69 20 33  57 17 29  69 20 33  69 20 33  71 23 37
+71 23 37  71 23 37  51 18 32  58 20 33  48 19 31  18 7 11
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  10 4 6  39 15 26
+54 19 32  97 28 42  148 30 39  185 38 47  185 38 47  185 38 47
+159 35 49  152 32 42  152 32 42  148 30 39  142 31 41  141 31 41
+132 30 39  117 31 42  97 28 42  73 26 42  67 22 35  58 20 33
+47 16 28  47 16 28  40 15 29  40 15 29  22 7 12  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  10 4 6  47 16 28  69 20 33  131 32 43
+152 32 42  185 38 47  185 38 47  185 38 47  185 38 47  159 35 49
+148 30 39  148 30 39  152 32 42  141 31 41  148 30 39  152 32 42
+138 32 45  102 29 42  71 23 37  52 24 38  48 19 31  47 16 28
+41 14 25  41 14 25  41 14 25  41 13 21  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+7 1 3  102 29 42  148 30 39  185 38 47  185 38 47  185 38 47
+162 38 49  152 32 42  152 32 42  148 30 39  148 30 39  132 30 39
+124 30 42  117 31 42  117 31 42  97 28 42  97 28 42  86 29 44
+71 23 37  47 16 28  36 11 20  41 13 21  28 8 14  28 8 14
+28 8 14  22 7 12  16 5 9  16 5 9  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+22 7 12  117 31 42  102 29 42  97 28 42  69 20 33  69 20 33
+57 17 29  47 16 28  47 16 28  41 13 21  41 13 21  41 13 21
+41 13 21  41 13 21  36 11 20  41 13 21  30 10 18  7 2 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_debian_clut224.ppm b/drivers/video/logo/logo_debian_clut224.ppm
new file mode 100644
index 000000000..0daf7736a
--- /dev/null
+++ b/drivers/video/logo/logo_debian_clut224.ppm
@@ -0,0 +1,883 @@
+P3
+64 80
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  36 1 13  99 3 37  93 2 34  135 4 50  95 3 36
+41 1 15  21 0 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 0 0  64 2 24
+154 4 57  213 6 80  228 7 85  227 7 85  226 7 85  209 6 78
+217 7 81  194 6 73  98 3 37  95 3 36  126 4 47  159 4 59
+147 4 55  150 4 56  175 4 65  106 3 39  43 1 16  2 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  9 0 3  87 2 32  177 5 66  224 7 84
+225 7 85  218 7 82  215 7 81  215 7 81  214 6 80  217 7 82
+217 7 82  219 7 82  226 7 85  227 7 85  226 7 85  225 7 84
+225 7 85  224 7 84  223 7 84  227 7 85  219 7 82  182 5 68
+106 3 39  24 0 9  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 0 1  90 2 33  191 6 71  227 7 85  223 7 84  216 7 81
+214 7 80  214 7 81  215 7 81  215 7 81  215 7 81  215 7 81
+214 7 80  215 7 81  215 7 81  215 7 81  215 7 81  214 7 81
+215 7 81  215 7 81  214 7 80  215 7 81  217 7 81  222 7 83
+228 7 86  206 6 77  120 3 45  41 1 15  63 1 23  104 3 39
+49 1 18  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  62 1 23
+182 5 68  228 7 85  221 7 83  215 7 81  214 7 80  215 7 81
+215 7 81  215 7 81  215 7 81  215 7 81  215 7 81  215 7 81
+215 7 81  215 7 81  215 7 81  215 7 81  215 7 81  215 7 81
+215 7 81  215 7 81  215 7 81  215 7 81  215 7 81  214 7 80
+215 7 81  219 7 82  226 7 85  219 7 82  223 7 84  231 7 86
+215 7 80  40 1 15  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 0 3  133 4 50  225 7 84
+223 7 84  215 7 81  214 7 80  215 7 81  215 7 81  215 7 81
+215 7 81  215 7 81  215 7 81  214 7 80  215 7 81  216 7 81
+216 7 81  216 7 81  217 7 81  218 7 82  218 7 82  218 7 82
+217 7 82  216 7 81  215 7 81  214 7 81  214 7 81  215 7 81
+215 7 81  214 7 80  215 7 81  217 7 81  216 7 81  214 7 80
+221 7 83  204 6 76  81 2 30  57 1 21  2 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 0 0  99 3 37  171 4 64  229 7 86  216 7 81
+214 7 80  214 7 81  215 7 81  215 7 81  215 7 81  215 7 81
+214 7 81  214 7 80  217 7 81  224 7 84  227 7 85  222 7 83
+219 7 82  222 7 83  210 5 78  205 6 77  202 5 76  205 6 77
+209 6 78  219 7 82  225 7 84  227 7 85  224 7 84  219 7 82
+215 7 81  214 7 80  215 7 81  215 7 81  215 7 81  214 7 81
+214 7 80  220 7 82  221 7 83  217 7 81  166 5 62  26 0 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+6 0 2  148 4 55  232 7 87  222 7 83  214 7 80  214 7 80
+215 7 81  215 7 81  215 7 81  215 7 81  215 7 81  214 7 80
+215 7 81  225 7 84  214 6 80  153 4 56  95 3 36  54 1 20
+42 1 16  51 1 19  26 0 10  16 0 6  14 0 5  16 0 6
+24 0 9  41 1 15  68 1 25  105 2 39  155 4 58  199 6 74
+225 7 84  225 7 84  217 7 81  214 7 80  214 7 81  215 7 81
+215 7 81  214 7 80  216 7 81  217 7 81  227 7 85  201 6 75
+54 1 20  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  14 0 5
+166 5 62  229 7 86  215 7 81  214 7 81  214 7 80  215 7 81
+215 7 81  215 7 81  215 7 81  214 7 80  217 7 82  225 7 85
+225 7 84  160 4 59  42 1 15  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  12 0 4
+68 1 25  151 4 56  215 7 80  225 7 84  215 7 81  214 7 80
+215 7 81  215 7 81  215 7 81  214 7 80  214 7 80  222 7 83
+215 6 81  71 2 26  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  22 0 8  180 5 67
+228 7 85  214 7 80  214 7 80  215 7 81  215 7 81  215 7 81
+214 7 80  215 7 81  219 7 82  227 7 85  207 6 77  147 4 55
+64 2 24  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  43 1 16  159 5 60  227 7 85  218 7 82
+214 7 80  215 7 81  215 7 81  215 7 81  214 7 81  214 7 80
+220 7 82  222 7 83  74 2 28  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  22 0 8  183 5 68  227 7 85
+214 7 80  214 7 80  215 7 81  215 7 81  215 7 81  214 7 80
+217 7 81  225 7 85  195 5 73  163 5 61  90 2 33  10 0 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  97 3 36  215 5 80
+221 7 83  214 7 80  214 7 81  215 7 81  215 7 81  215 7 81
+214 7 80  220 7 82  216 7 80  55 1 20  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  10 0 3  175 4 65  227 7 85  214 7 80
+214 7 80  215 7 81  215 7 81  215 7 81  214 7 80  220 7 82
+214 5 80  105 3 39  13 0 5  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  57 1 21
+204 6 76  222 7 83  214 7 80  214 7 81  215 7 81  215 7 81
+215 7 81  214 7 80  222 7 83  201 6 75  29 0 10  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  124 4 46  230 7 86  214 7 80  214 7 80
+215 7 81  215 7 81  214 7 80  215 7 81  222 7 83  206 5 76
+58 1 21  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+41 1 15  201 6 75  222 7 83  214 7 80  215 7 81  215 7 81
+215 7 81  214 7 81  214 7 80  226 7 85  170 5 63  3 0 1
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  43 1 16  211 6 79  218 7 81  214 7 80  215 7 81
+214 7 81  214 7 80  217 7 81  226 7 85  206 6 77  105 3 39
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  42 1 15  206 6 77  220 7 82  214 7 80  215 7 81
+215 7 81  215 7 81  214 7 80  215 7 80  229 7 86  105 3 39
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+5 0 1  183 5 68  224 7 84  214 7 80  215 7 81  214 7 80
+214 7 80  222 7 83  214 5 80  115 3 43  19 0 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  62 1 23  219 6 82  217 7 81  214 7 80
+215 7 81  215 7 81  215 7 81  214 7 80  219 7 82  211 6 79
+30 1 11  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+69 1 26  226 7 85  215 7 81  214 7 81  214 7 80  214 7 80
+226 7 85  191 6 71  49 1 18  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  104 3 39  228 7 85  214 7 80
+214 7 81  215 7 81  215 7 81  215 7 81  214 7 80  227 7 85
+138 3 51  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  51 1 19  0 0 0  0 0 0  0 0 0
+162 4 60  224 7 84  214 7 80  214 7 80  214 7 80  227 7 85
+170 5 64  20 0 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  166 5 62  224 7 84
+214 7 80  215 7 81  215 7 81  214 7 80  215 7 81  220 7 82
+214 6 80  32 1 12  0 0 0  0 0 0
+0 0 0  5 0 2  49 1 18  0 0 0  0 0 0  40 1 15
+218 6 81  216 7 81  214 7 80  214 7 80  228 7 85  163 5 61
+8 0 3  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  37 1 14  214 6 80
+216 7 81  214 7 81  214 7 81  214 7 80  222 7 83  215 7 81
+228 7 85  106 3 39  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  32 1 12  174 5 65
+222 7 83  214 7 80  214 7 80  227 7 85  163 5 61  8 0 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  129 4 48
+226 7 85  214 7 80  215 7 81  222 7 83  115 3 43  44 1 16
+140 3 52  85 2 31  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  15 0 5  201 6 75  225 7 84
+214 7 80  214 7 80  225 7 84  178 4 66  12 0 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  26 0 9
+210 5 78  217 7 81  215 7 81  227 7 85  79 2 30  0 0 0
+36 1 13  82 2 31  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  97 3 36  229 7 86  215 7 80
+214 7 80  221 7 83  199 5 74  25 0 9  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+145 4 54  224 7 84  214 7 80  222 7 83  168 5 62  40 1 15
+0 0 0  20 0 7  0 0 0  0 0 0
+0 0 0  0 0 0  4 0 1  185 6 69  221 7 83  214 7 80
+216 7 81  213 6 80  54 1 20  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  40 1 15  110 3 41  163 5 61  188 6 70
+194 6 73  182 5 68  145 4 54  80 2 30  13 0 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+64 2 24  224 7 84  215 7 80  214 7 80  226 7 84  188 6 70
+3 0 1  0 0 0  0 0 0  0 0 0
+6 0 2  0 0 0  62 1 23  225 7 84  215 7 81  215 7 81
+223 7 83  126 4 47  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+43 1 16  160 4 59  230 7 86  233 7 87  193 6 72  147 4 55
+117 3 44  106 3 39  120 3 45  150 4 56  159 4 59  90 2 33
+9 0 3  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+12 0 4  201 6 75  218 7 81  214 7 80  218 7 81  213 6 80
+25 0 9  0 0 0  0 0 0  0 0 0
+33 1 12  0 0 0  145 4 54  226 7 84  214 7 80  214 7 80
+224 7 84  154 4 57  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  95 3 36
+225 7 84  226 7 85  140 3 52  50 1 18  4 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  8 0 3  56 1 21
+76 2 28  17 0 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  163 5 61  222 7 83  215 7 81  219 7 82  183 5 68
+51 1 19  0 0 0  0 0 0  0 0 0
+1 0 0  17 0 6  204 6 76  219 7 82  215 7 81  215 7 80
+224 7 84  71 2 26  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  102 2 37  239 7 89
+175 4 65  44 1 16  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  14 0 5  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  131 4 49  225 7 84  215 7 81  224 7 84  142 4 53
+8 0 3  0 0 0  0 0 0  0 0 0
+0 0 0  80 2 30  226 7 85  215 7 80  214 7 80  222 7 83
+172 4 64  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  50 1 19  235 7 88  148 4 55
+7 0 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  97 3 36  224 7 84  215 7 81  219 7 82  202 6 75
+20 0 7  0 0 0  0 0 0  0 0 0
+0 0 0  151 4 56  225 7 84  214 7 80  215 7 80  222 7 83
+62 1 23  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  25 0 9  195 5 73  162 4 60  2 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  53 1 20  220 7 82  215 7 81  215 7 80  227 7 85
+87 2 32  0 0 0  0 0 0  0 0 0
+15 0 6  202 5 76  219 7 82  214 7 81  222 7 83  166 5 62
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  3 0 1  175 4 65  207 6 77  15 0 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  56 1 21  221 7 83  214 7 81  216 7 80  223 7 83
+58 1 21  0 0 0  0 0 0  0 0 0
+64 2 24  224 7 84  215 7 80  215 7 81  223 7 84  77 2 29
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  71 1 26  239 7 89  80 2 30  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  50 1 18  219 7 82  215 7 81  215 7 81  225 7 84
+76 2 28  0 0 0  0 0 0  0 0 0
+52 1 19  221 7 82  216 7 80  215 7 80  212 6 80  32 1 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  160 4 59  193 6 72  1 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  43 1 16  216 7 81  216 7 81  214 7 81  220 7 82
+182 5 68  22 0 8  0 0 0  0 0 0
+33 1 12  212 6 80  217 7 81  217 7 81  204 6 76  19 0 7
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+20 0 7  220 5 82  111 3 41  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  43 1 16  216 7 81  216 7 81  214 7 81  221 7 83
+186 5 70  6 0 2  0 0 0  0 0 0
+32 1 12  212 6 80  217 7 81  218 7 82  194 6 73  7 0 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+117 3 44  232 7 87  47 1 17  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  41 1 15  36 1 13
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  48 1 18  218 7 82  215 7 81  214 7 80  231 7 86
+111 3 41  0 0 0  0 0 0  0 0 0
+41 1 15  218 7 82  216 7 80  221 7 83  177 5 66  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+180 5 67  211 6 79  16 0 6  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  15 0 5  16 0 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  69 1 26  223 7 83  214 6 80  220 7 82  188 6 70
+47 1 17  0 0 0  0 0 0  0 0 0
+41 1 15  217 7 81  217 7 81  223 7 83  157 5 59  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  3 0 1
+193 5 72  195 5 73  4 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  56 1 21  215 6 81  216 6 81  224 7 84  104 3 39
+0 0 0  0 0 0  0 0 0  0 0 0
+51 1 19  221 7 82  216 7 81  224 7 83  135 4 50  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 0 2
+199 5 74  193 6 72  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  33 0 12  212 6 80  218 7 82  217 7 81  36 1 13
+0 0 0  0 0 0  0 0 0  0 0 0
+53 1 20  222 7 83  215 7 81  225 7 84  115 3 43  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  6 0 2
+193 5 72  221 7 82  62 1 23  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  71 1 26  223 7 83  219 7 82  205 6 76  17 0 6
+0 0 0  0 0 0  0 0 0  0 0 0
+53 1 20  222 7 83  215 7 81  225 7 84  97 3 36  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+182 5 68  233 7 87  97 3 36  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  142 4 53  224 7 84  224 7 84  160 4 59  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+42 1 16  218 7 82  216 7 81  224 7 83  82 2 31  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+157 5 59  233 7 88  133 4 50  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+13 0 5  206 6 77  218 7 82  226 7 85  71 2 26  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+44 1 16  219 7 82  216 7 81  223 7 84  79 2 30  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+112 3 42  233 7 87  185 6 69  2 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  26 0 10  7 0 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  22 0 8
+140 3 52  222 7 83  222 7 83  177 5 66  3 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+54 1 20  222 7 83  215 7 81  223 7 83  79 2 30  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+48 1 18  223 7 84  223 7 83  53 1 20  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  159 5 60
+231 7 86  218 7 82  234 7 87  80 2 30  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+50 1 18  221 7 82  216 7 80  224 7 84  82 2 31  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  174 5 65  233 7 87  150 4 56  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 0 3  18 0 6  19 0 7  9 0 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  180 5 67
+225 7 84  199 6 74  110 3 41  20 0 7  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+41 1 15  217 7 81  217 7 81  225 7 84  106 3 39  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  68 1 25  228 7 85  222 7 83  56 1 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  21 0 8  87 2 32  117 3 44  19 0 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  43 1 16  207 6 77
+229 7 86  98 2 36  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+30 1 11  211 6 79  217 7 81  223 7 83  151 4 56  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  148 4 55  235 7 88  188 6 70  13 0 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  60 1 22  207 6 77  225 7 84
+221 7 83  34 0 12  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+15 0 6  201 6 75  219 7 82  216 7 81  209 6 78  36 1 13
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  53 1 20
+10 0 3  0 0 0  15 0 5  188 6 70  236 7 88  158 4 59
+4 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  80 2 30  215 7 80  226 7 84  215 5 80
+76 2 28  2 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  186 5 70  221 7 83  214 7 80  225 7 84  77 2 29
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  22 0 8
+55 1 20  7 0 2  0 0 0  33 0 12  202 6 75  237 7 89
+151 4 56  7 0 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  111 3 41  225 7 84  225 7 84  215 5 80  67 2 25
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  159 5 60  224 7 84  214 7 81  221 7 83  53 1 20
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+36 1 13  26 0 9  39 1 14  0 0 0  36 1 13  193 5 72
+238 7 89  174 5 65  36 1 13  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  16 0 6  75 2 28
+154 4 57  228 7 85  228 7 85  204 6 76  57 1 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  120 3 45  227 7 85  214 7 80  223 7 83  104 3 39
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  5 0 2  15 0 5  37 1 14  0 0 0  22 0 8
+172 4 64  238 7 89  211 6 79  106 3 39  14 0 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  50 1 18  182 5 68  230 7 86
+228 7 86  229 7 86  170 5 63  30 1 11  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  74 2 28  225 7 84  215 7 80  223 7 83  159 4 59
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 0 3  29 0 10  34 0 12
+3 0 1  135 4 50  230 7 86  232 7 87  199 6 74  126 4 47
+56 1 21  19 0 7  3 0 1  0 0 0  3 0 1  21 0 8
+67 2 25  131 4 49  170 5 63  221 7 83  228 7 86  228 7 86
+204 6 76  99 3 37  1 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  29 0 10  211 6 79  217 7 81  220 7 82  182 5 68
+2 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  75 2 28
+126 4 47  22 0 8  98 2 36  205 6 77  230 7 86  238 7 89
+232 7 87  209 6 78  191 6 71  188 6 70  192 6 72  209 6 78
+230 7 86  235 7 88  235 7 88  226 7 85  193 6 72  112 3 42
+21 0 8  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  177 5 66  222 7 83  215 7 80  221 7 83
+57 1 21  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+54 1 20  140 3 52  133 3 49  47 1 17  68 1 25  126 4 47
+171 4 64  201 6 75  216 7 81  221 7 83  221 7 83  212 6 80
+199 6 74  172 4 64  126 4 47  64 2 24  10 0 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  117 3 44  227 7 85  214 7 80  221 7 83
+170 5 63  8 0 3  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 0 0  71 2 26  117 3 44  53 1 20  9 0 3
+0 0 0  6 0 2  19 0 7  26 0 10  27 0 10  24 0 9
+12 0 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  49 1 18  222 7 83  216 7 81  214 7 80
+220 7 82  189 6 71  133 4 50  12 0 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  34 0 12  64 2 24
+76 2 28  71 1 26  60 1 22  47 1 17  40 1 15  8 0 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 0 0  181 5 67  222 7 83  214 7 80
+214 7 81  221 7 83  233 7 88  140 3 52  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  12 0 4  21 0 8  22 0 8  16 0 6  2 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  102 2 37  227 7 85  214 7 80
+215 7 81  214 7 80  216 7 81  199 6 74  47 1 17  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  22 0 8  207 6 77  218 7 82
+214 7 81  215 7 81  223 7 84  120 3 45  42 1 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  133 4 50  227 7 85
+214 7 80  215 7 81  219 7 82  188 6 70  3 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  36 1 13  216 6 81
+217 7 81  214 7 81  214 7 80  227 7 85  110 3 41  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  138 3 51
+227 7 85  214 7 80  214 7 80  217 7 81  210 5 78  26 0 10
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  29 0 10
+211 6 79  218 7 82  214 7 80  217 7 81  207 6 77  25 0 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+111 3 41  229 7 86  214 7 80  214 7 80  223 7 83  155 4 58
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+8 0 3  185 6 69  223 7 83  214 7 80  217 7 81  207 6 77
+14 0 5  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  56 1 21  223 7 84  217 7 81  214 7 80  225 7 84
+127 3 47  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  120 3 45  229 7 86  214 7 80  216 7 81
+223 7 83  62 1 23  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 0 1  171 4 64  226 7 84  214 7 80
+220 7 82  199 6 74  20 0 7  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  26 0 10  200 5 74  222 7 83
+214 7 80  226 7 85  157 5 59  12 0 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  54 1 20  216 5 80
+219 7 82  214 7 80  227 7 85  154 4 57  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  77 2 29
+224 7 84  218 7 82  216 7 81  223 7 84  60 1 22  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+87 2 32  224 7 84  218 7 82  220 7 82  193 6 72  17 0 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  87 2 32  224 7 84  219 7 82  221 7 83  188 5 69
+145 4 54  3 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  76 2 28  214 7 81  222 7 83  221 7 83
+231 7 86  145 4 54  3 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  53 1 20  199 6 74  226 7 85
+215 7 81  227 7 85  170 5 63  39 1 14  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  24 0 9  165 5 61
+229 7 86  218 7 82  230 7 86  95 3 36  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 0 0
+109 3 40  216 7 80  226 7 85  211 6 79  151 4 56  23 0 8
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  44 1 16  170 5 63  229 7 86  233 7 88  201 6 75
+99 3 37  51 1 19  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  81 2 30  185 6 69  232 7 87
+233 7 88  229 7 86  126 4 47  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  7 0 2  85 2 31
+177 5 66  224 7 84  238 7 89  143 4 53  12 0 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 0 0  54 1 20  131 4 49  201 6 75  191 6 71  104 3 39
+40 1 15  9 0 3  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  6 0 2  53 1 20  110 3 41
+131 4 49  119 3 44  97 3 36  71 1 26  32 1 12  1 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_exherbo_clut224.ppm b/drivers/video/logo/logo_exherbo_clut224.ppm
new file mode 100644
index 000000000..e9cedd267
--- /dev/null
+++ b/drivers/video/logo/logo_exherbo_clut224.ppm
@@ -0,0 +1,963 @@
+P3
+71 80
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  117 117 117  104 104 104
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  143 143 143  255 255 255
+179 179 179  19 19 19  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  34 34 34  104 104 104  16 16 16
+0 0 0  0 0 0  0 0 0  0 0 0  60 60 60  161 161 161
+98 98 98  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  112 106 107  255 255 255
+255 255 255  197 197 197  21 21 21  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  60 60 60  236 236 236  255 255 255  57 57 57
+0 0 0  0 0 0  1 1 1  117 117 117  251 251 251  255 255 255
+250 250 250  33 33 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  33 33 33  245 245 245
+255 255 255  255 255 255  141 140 140  0 0 0  0 0 0  0 0 0
+0 0 0  25 25 25  230 230 230  255 255 255  255 255 255  54 54 54
+0 0 0  0 0 0  34 34 34  254 254 254  255 255 255  255 255 255
+255 255 255  93 92 92  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 3 3  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  93 92 92
+255 255 255  222 222 222  30 30 30  10 10 10  45 45 45  0 0 0
+0 0 0  152 152 152  255 255 255  255 255 255  244 244 244  25 25 25
+0 0 0  0 0 0  45 45 45  254 254 254  255 255 255  255 255 255
+255 255 255  117 117 117  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  49 49 49  202 202 202  15 15 15  0 0 0  3 3 3
+54 54 54  61 61 61  18 17 17  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+88 88 88  54 51 52  0 0 0  168 168 168  156 156 156  0 0 0
+23 23 23  243 243 243  255 255 255  255 255 255  229 229 229  16 16 16
+0 0 0  0 0 0  38 38 38  250 250 250  255 255 255  255 255 255
+255 255 255  93 92 92  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  152 152 152  255 255 255  33 33 33  0 0 0  7 7 7
+164 164 164  255 255 255  229 229 229  82 82 82  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  112 106 107  255 255 255  191 191 191  0 0 0
+93 92 92  255 255 255  255 255 255  255 255 255  254 254 254  207 207 207
+57 57 57  0 0 0  18 17 17  236 236 236  255 255 255  255 255 255
+249 248 248  38 38 38  0 0 0  0 0 0  0 0 0  0 0 0
+49 47 48  246 246 246  251 251 251  30 30 30  0 0 0  0 0 0
+3 3 3  147 147 147  255 255 255  249 249 249  61 61 61  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 39 39  244 244 244  255 255 255  225 225 225  1 1 1
+161 161 161  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+249 248 248  117 117 117  1 1 1  205 205 205  255 255 255  255 255 255
+209 209 209  4 4 4  0 0 0  0 0 0  7 7 7  67 68 68
+227 227 227  255 255 255  253 253 253  88 88 88  0 0 0  0 0 0
+0 0 0  0 0 0  150 150 150  255 255 255  183 183 183  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  187 187 187  255 255 255  255 255 255  247 247 247  36 37 36
+201 201 201  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  181 181 181  141 140 140  251 251 251  255 255 255
+249 249 249  215 215 215  163 163 163  77 77 77  57 57 57  147 147 147
+245 245 245  255 255 255  255 255 255  247 247 247  182 182 182  36 37 36
+0 0 0  0 0 0  3 3 3  184 184 184  127 127 127  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+70 69 69  255 255 255  255 255 255  255 255 255  255 255 255  82 82 82
+221 221 221  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  249 249 249  253 253 253  255 255 255
+255 255 255  238 238 238  88 88 88  10 9 9  0 0 0  0 0 0
+39 39 39  216 216 216  255 255 255  255 255 255  176 176 176  12 12 12
+0 0 0  0 0 0  0 0 0  5 5 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+182 182 182  255 255 255  255 255 255  255 255 255  255 255 255  134 134 134
+234 234 234  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+244 244 244  54 54 54  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  54 54 54  249 248 248  143 143 143  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  28 26 26
+246 246 246  255 255 255  255 255 255  255 255 255  255 255 255  238 238 238
+252 252 252  255 255 255  255 255 255  184 184 184  98 98 98  67 68 68
+61 61 61  98 98 98  221 221 221  255 255 255  255 255 255  255 255 255
+156 156 156  0 0 0  0 0 0  20 20 22  97 100 116  26 27 30
+0 0 0  0 0 0  127 127 127  0 0 0  0 0 0  0 0 0
+22 22 22  30 27 28  12 13 12  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  88 88 88
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  161 161 161  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  42 42 42  243 243 243  255 255 255  255 255 255
+98 98 98  0 0 0  0 0 0  130 132 147  154 161 185  97 100 116
+0 0 0  0 0 0  67 68 68  30 31 31  8 8 8  184 184 184
+241 241 241  247 247 247  230 230 230  138 137 137  4 4 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  143 143 143
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  61 61 61  0 0 0  0 0 0  14 11 13
+4 5 9  0 0 0  0 0 0  204 204 204  255 255 255  255 255 255
+104 104 104  0 0 0  0 0 0  130 132 147  154 161 185  146 153 179
+4 5 9  0 0 0  70 69 69  238 238 238  225 225 225  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  150 150 150  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  179 179 179
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  82 82 82  0 0 0  0 0 0  76 72 85
+48 49 56  0 0 0  3 3 3  216 216 216  255 255 255  255 255 255
+159 159 159  0 0 0  0 0 0  48 49 56  154 161 185  130 132 147
+4 5 9  0 0 0  61 61 61  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  254 254 254  54 51 52
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  204 204 204
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  188 188 188  0 0 0  0 0 0  1 1 1
+0 0 0  0 0 0  49 47 48  250 250 250  255 255 255  255 255 255
+238 237 237  22 22 22  0 0 0  0 0 0  26 27 30  4 5 9
+0 0 0  0 0 0  127 127 127  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  156 156 156
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 3 3  216 216 216
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  117 117 117  3 3 3  0 0 0
+0 0 0  12 12 12  189 189 189  255 255 255  255 255 255  255 255 255
+255 255 255  174 174 174  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  49 49 49  238 237 237  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  223 223 223
+8 8 8  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  5 5 5  219 219 219
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  212 212 212  49 49 49  0 0 0
+0 0 0  25 25 25  98 98 98  117 117 117  134 134 134  168 168 168
+201 201 201  238 238 238  179 179 179  70 69 69  39 39 39  60 60 60
+134 134 134  238 238 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  251 251 251
+36 37 36  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  211 211 211
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  194 194 194  67 68 68  0 0 0  0 0 0  0 0 0
+6 4 5  4 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  10 10 10  57 57 57  104 104 104  159 159 159  232 231 231
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+67 68 68  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  205 205 205
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+134 134 134  0 0 0  0 0 0  50 41 43  111 86 92  143 103 113
+156 121 129  163 123 133  152 115 124  143 103 113  122 96 102  111 86 92
+76 63 66  38 29 31  9 5 8  0 0 0  0 0 0  10 10 10
+75 75 75  166 165 165  238 237 237  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+117 117 117  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  187 187 187
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  134 134 134
+0 0 0  19 14 15  143 103 113  193 131 146  195 132 148  192 130 145
+191 129 144  191 129 144  191 129 145  192 130 145  193 131 146  195 132 148
+195 132 148  191 129 145  185 125 140  143 103 113  86 80 81  38 29 31
+0 0 0  0 0 0  19 19 19  117 117 117  244 244 244  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+164 164 164  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  125 125 125
+255 255 255  255 255 255  255 255 255  255 255 255  192 192 192  0 0 0
+19 19 19  163 123 133  195 132 148  186 126 141  189 128 143  192 130 145
+188 127 142  185 125 140  185 125 140  186 126 141  186 126 141  185 125 140
+185 125 140  186 126 141  188 127 143  192 130 145  195 132 148  191 129 144
+152 115 124  79 66 70  19 14 15  0 0 0  54 54 54  227 227 227
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+196 196 196  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  54 51 52
+253 253 253  255 255 255  255 255 255  251 251 251  54 54 54  0 0 0
+143 103 113  195 132 148  185 125 140  192 130 145  163 123 133  143 103 113
+185 125 140  189 128 143  185 125 140  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  185 125 140  185 125 140  186 126 141
+191 129 145  196 133 148  185 125 140  65 56 58  0 0 0  39 39 39
+222 222 222  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+207 207 207  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+205 205 205  255 255 255  255 255 255  188 188 188  0 0 0  60 53 55
+195 132 148  186 126 141  191 129 145  122 96 102  14 11 13  0 0 0
+38 29 31  163 123 133  189 128 143  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  185 125 140  185 126 140  192 130 145
+192 130 145  191 129 144  196 133 148  193 131 146  79 66 70  0 0 0
+45 45 45  238 238 238  255 255 255  255 255 255  255 255 255  255 255 255
+206 206 206  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+98 98 98  255 255 255  255 255 255  98 98 98  0 0 0  143 103 113
+193 131 146  186 125 140  192 130 145  38 29 31  0 0 0  0 0 0
+0 0 0  111 86 92  195 132 148  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  185 125 140  191 129 145  143 103 113
+50 41 43  38 35 39  122 96 102  193 131 146  196 133 148  79 66 70
+0 0 0  93 92 92  255 255 255  255 255 255  255 255 255  255 255 255
+187 187 187  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+7 7 7  217 217 217  255 255 255  42 42 42  10 7 8  185 125 140
+188 127 143  186 126 141  193 131 146  76 63 66  0 0 0  0 0 0
+0 0 0  143 103 113  192 130 145  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  191 129 144  143 103 113  5 3 4
+0 0 0  0 0 0  4 2 2  143 103 113  193 131 146  188 127 142
+38 29 31  0 0 0  203 203 203  255 255 255  255 255 255  255 255 255
+147 147 147  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  82 82 82  255 255 255  42 42 42  10 7 8  185 125 140
+188 127 143  186 126 141  189 128 143  163 123 133  60 53 55  31 25 27
+79 66 70  188 127 142  186 126 141  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  195 132 147  79 66 70  0 0 0
+0 0 0  0 0 0  0 0 0  86 80 81  193 131 146  193 131 146
+122 96 102  0 0 0  112 106 107  255 255 255  255 255 255  255 255 255
+82 82 82  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  187 187 187  104 104 104  0 0 0  143 103 113
+193 131 146  185 125 140  185 125 140  189 128 143  192 130 145  188 127 142
+193 131 146  186 126 141  185 125 140  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  195 132 147  76 63 66  0 0 0
+0 0 0  0 0 0  0 0 0  79 66 70  195 132 148  189 128 143
+163 123 133  0 0 0  75 75 75  255 255 255  255 255 255  234 234 234
+16 16 16  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  34 34 34  163 163 163  0 0 0  60 53 55
+196 133 148  186 126 141  185 125 140  185 125 140  186 126 141  188 127 142
+186 126 141  185 125 140  186 126 141  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  193 131 146  122 96 102  0 0 0
+0 0 0  0 0 0  0 0 0  122 96 102  193 131 146  192 130 145
+143 103 113  0 0 0  98 98 98  255 255 255  255 255 255  141 140 140
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  79 79 79  60 60 60  0 0 0
+122 96 102  196 133 148  188 127 142  185 125 140  185 125 140  186 126 141
+186 126 141  186 126 141  186 126 141  186 126 141  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  186 126 141  189 128 143  76 63 66
+10 7 8  6 4 5  50 41 43  185 125 140  186 126 141  196 133 148
+76 63 66  0 0 0  168 168 168  255 255 255  238 238 238  23 23 23
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  79 79 79  19 19 19
+6 6 6  122 96 102  191 129 145  195 132 148  193 131 146  189 128 143
+188 127 142  186 126 141  186 126 141  185 125 140  186 126 141  186 126 141
+186 126 141  186 126 141  186 126 141  185 125 140  186 126 141  191 129 145
+163 123 133  152 115 124  186 126 141  189 128 143  196 133 148  143 103 113
+1 0 0  36 37 36  244 244 244  255 255 255  112 106 107  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  6 6 6  49 49 49
+12 13 12  0 0 0  31 25 27  79 66 70  143 103 113  163 123 133
+185 125 140  191 129 144  193 131 146  195 132 148  193 131 146  193 131 146
+192 130 145  191 129 144  189 128 143  188 127 143  188 127 142  188 127 142
+192 130 145  195 132 147  195 132 148  192 130 145  114 88 95  4 2 2
+4 4 4  192 192 192  255 255 255  187 187 187  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+30 31 31  49 49 49  23 23 23  0 0 0  0 0 0  0 0 0
+14 11 13  38 29 31  60 53 55  79 66 70  114 88 95  143 103 113
+143 103 113  163 123 133  163 123 133  185 125 140  186 126 141  188 127 142
+186 126 141  163 123 133  122 96 102  50 41 43  0 0 0  12 13 12
+173 172 172  255 255 255  225 225 225  25 25 25  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  30 27 28  127 127 127  175 175 175  134 134 134  77 77 77
+42 42 42  15 15 15  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  5 3 4  19 14 15  27 23 24  31 25 27
+19 14 15  1 0 0  0 0 0  0 0 0  75 75 75  210 210 210
+255 255 255  236 236 236  54 54 54  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  61 61 61  187 187 187  255 255 255
+255 255 255  241 241 241  214 214 214  183 183 183  152 152 152  127 127 127
+104 104 104  79 79 79  57 57 57  39 39 39  30 31 31  25 25 25
+30 31 31  61 61 61  117 117 117  203 203 203  255 255 255  255 255 255
+227 227 227  57 57 57  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  67 68 68
+179 179 179  253 253 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  254 254 254  249 249 249  246 246 246
+250 250 250  255 255 255  255 255 255  255 255 255  255 255 255  185 185 185
+33 33 33  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  30 30 30  0 0 0  0 0 0  0 0 0
+0 0 0  36 37 36  117 117 117  188 188 188  236 236 236  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  199 199 199  88 88 88  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  19 19 19  232 231 231  136 136 136  19 19 19  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  18 17 17  54 54 54
+98 98 98  127 127 127  156 156 156  173 172 172  174 174 174  176 176 176
+163 163 163  117 117 117  49 47 48  0 0 0  0 0 0  0 0 0
+0 0 0  22 22 22  117 117 117  19 19 19  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  86 86 86  255 255 255  255 255 255  229 229 229  125 125 125
+25 25 25  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  10 9 9
+98 98 98  227 227 227  255 255 255  199 199 199  19 19 19  0 0 0
+3 3 3  28 26 26  8 8 8  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+22 22 22  30 27 28  18 17 17  2 2 2  0 0 0  0 0 0
+0 0 0  197 197 197  255 255 255  255 255 255  255 255 255  255 255 255
+241 241 241  181 181 181  125 125 125  75 75 75  33 33 33  6 6 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  42 42 42  127 127 127  232 231 231
+255 255 255  255 255 255  255 255 255  255 255 255  204 204 204  22 22 22
+0 0 0  141 140 140  234 232 232  191 191 191  70 69 69  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  30 30 30  138 137 137  214 209 209
+236 236 236  241 239 239  232 231 231  216 216 216  98 98 98  0 0 0
+75 75 75  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  229 229 229
+188 188 188  147 147 147  112 106 107  86 86 86  77 77 77  77 77 77
+104 104 104  156 156 156  215 215 215  255 255 255  241 239 239  159 159 159
+75 75 75  104 104 104  255 255 255  255 255 255  255 255 255  205 205 205
+23 23 23  4 4 4  176 176 176  255 255 255  253 253 253  57 57 57
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  67 68 68  241 239 239  255 255 255  254 254 254
+255 250 250  255 250 250  255 255 255  243 243 243  38 38 38  8 8 8
+219 219 219  255 255 255  250 250 250  195 195 195  127 127 127  88 88 88
+82 82 82  117 117 117  134 134 134  152 152 152  176 176 176  202 202 202
+227 227 227  246 246 246  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  225 225 225  164 164 164  86 86 86  16 16 16  0 0 0
+0 0 0  125 125 125  255 255 255  255 255 255  255 255 255  255 255 255
+189 189 189  6 6 6  8 7 7  166 165 165  183 183 183  93 92 92
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  49 47 48  173 172 172  234 232 232  255 255 255
+255 255 255  255 255 255  255 255 255  90 84 86  0 0 0  138 137 137
+255 255 255  152 152 152  49 47 48  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+3 3 3  16 16 16  30 31 31  54 54 54  70 69 69  54 54 54
+30 27 28  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+79 79 79  250 250 250  255 255 255  255 255 255  255 255 255  255 255 255
+230 230 230  141 140 140  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  22 22 22  82 82 82
+138 137 137  192 192 192  134 134 134  0 0 0  77 77 77  241 241 241
+79 79 79  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  54 51 52
+243 243 243  255 255 255  255 255 255  255 255 255  255 255 255  254 254 254
+205 205 205  210 210 210  117 117 117  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  38 38 38  212 212 212  57 57 57
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  3 3 3
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  23 23 23  217 217 217
+255 255 255  255 255 255  255 255 255  255 255 255  234 234 234  82 82 82
+104 104 104  206 206 206  208 208 208  79 79 79  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  10 10 10  159 159 159  45 45 45  0 0 0
+23 23 23  82 82 82  143 143 143  188 188 188  214 214 214  217 217 217
+213 213 213  205 205 205  189 189 189  174 174 174  152 152 152  127 127 127
+98 98 98  77 77 77  54 54 54  39 39 39  28 26 26  16 16 16
+10 10 10  15 15 15  33 33 33  98 98 98  217 217 217  255 255 255
+255 255 255  255 255 255  255 255 255  238 238 238  54 54 54  0 0 0
+30 30 30  199 199 199  203 203 203  195 195 195  34 34 34  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  82 82 82  49 49 49  60 60 60  168 168 168
+238 238 238  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  251 251 251  243 243 243  234 234 234
+225 225 225  234 234 234  247 247 247  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  232 231 231  61 61 61  0 0 0  0 0 0
+45 45 45  202 202 202  199 199 199  209 209 209  134 134 134  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  42 42 42  227 227 227  216 216 216  255 255 255  255 255 255
+255 255 255  249 248 248  205 205 205  171 170 170  176 176 176  219 219 219
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  175 175 175  30 30 30  0 0 0  0 0 0  0 0 0
+112 106 107  208 208 208  198 198 198  202 202 202  189 189 189  18 17 17
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  150 150 150  255 255 255  255 255 255  255 255 255  238 238 238
+134 134 134  36 37 36  0 0 0  0 0 0  0 0 0  7 7 7
+67 68 68  150 150 150  213 213 213  247 247 247  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  219 219 219
+93 92 92  0 0 0  0 0 0  0 0 0  0 0 0  30 30 30
+191 191 191  202 202 202  199 199 199  198 198 198  207 207 207  67 68 68
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+6 6 6  221 221 221  255 255 255  255 255 255  174 174 174  33 33 33
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  3 3 3  34 34 34  70 69 69  104 104 104
+134 134 134  152 152 152  164 164 164  177 177 177  187 187 187  192 192 192
+198 198 198  204 204 204  200 200 200  168 168 168  93 92 92  12 13 12
+0 0 0  0 0 0  0 0 0  0 0 0  16 16 16  163 163 163
+206 206 206  198 198 198  199 199 199  198 198 198  208 208 208  127 127 127
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+33 33 33  251 251 251  255 255 255  255 255 255  42 42 42  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  34 34 34  166 165 165  207 207 207
+198 198 198  198 198 198  199 199 199  199 199 199  203 203 203  179 179 179
+10 10 10  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+39 39 39  250 250 250  255 255 255  255 255 255  112 106 107  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  8 8 8
+15 15 15  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  12 12 12  93 92 92  192 192 192  206 206 206  198 198 198
+198 198 198  199 199 199  199 199 199  199 199 199  200 200 200  203 203 203
+49 49 49  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+2 2 2  39 39 39  117 117 117  213 213 213  255 255 255  195 195 195
+176 176 176  168 168 168  168 168 168  176 176 176  201 201 201  225 225 225
+234 234 234  216 216 216  168 168 168  93 92 92  30 27 28  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  15 15 15
+88 88 88  177 177 177  209 209 209  202 202 202  198 198 198  198 198 198
+199 199 199  199 199 199  199 199 199  198 198 198  199 199 199  215 215 215
+112 106 107  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  7 7 7  112 106 107  245 245 245
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  243 243 243  189 189 189
+127 127 127  79 79 79  60 60 60  45 45 45  33 33 33  28 26 26
+28 26 26  33 33 33  54 51 52  82 82 82  125 125 125  182 182 182
+209 209 209  204 204 204  199 199 199  198 198 198  198 198 198  199 199 199
+199 199 199  198 198 198  198 198 198  202 202 202  208 208 208  166 165 165
+61 61 61  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  36 37 36  86 86 86  138 137 137  141 140 140  117 117 117
+70 69 69  15 15 15  0 0 0  0 0 0  0 0 0  34 34 34
+182 182 182  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  254 254 254  247 247 247  244 244 244
+244 244 244  249 249 249  238 238 238  212 212 212  206 206 206  203 203 203
+199 199 199  198 198 198  199 199 199  198 198 198  200 200 200  208 208 208
+207 207 207  198 198 198  206 206 206  192 192 192  88 88 88  4 4 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  5 5 5
+2 2 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  7 7 7  98 98 98
+196 196 196  249 249 249  255 255 255  125 125 125  241 239 239  255 255 255
+255 255 255  229 229 229  134 134 134  18 17 17  0 0 0  0 0 0
+0 0 0  104 104 104  238 238 238  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  227 227 227  199 199 199  197 197 197  199 199 199  199 199 199
+199 199 199  198 198 198  198 198 198  203 203 203  201 201 201  117 117 117
+112 106 107  212 212 212  161 161 161  30 31 31  0 0 0  0 0 0
+0 0 0  30 31 31  112 106 107  174 174 174  211 211 211  223 223 223
+217 217 217  187 187 187  127 127 127  49 49 49  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  49 47 48  210 210 210  255 255 255
+255 255 255  255 255 255  188 188 188  0 0 0  147 147 147  255 255 255
+255 255 255  255 255 255  255 255 255  221 221 221  75 75 75  0 0 0
+0 0 0  0 0 0  39 39 39  191 191 191  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  241 241 241
+208 208 208  195 195 195  198 198 198  199 199 199  199 199 199  198 198 198
+198 198 198  199 199 199  208 208 208  194 194 194  67 68 68  0 0 0
+61 61 61  147 147 147  12 12 12  0 0 0  0 0 0  0 0 0
+0 0 0  86 86 86  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  253 253 253  183 183 183  61 61 61
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 4 4  163 163 163  247 247 247  255 255 255
+255 255 255  255 255 255  98 98 98  0 0 0  88 88 88  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  152 152 152
+16 16 16  0 0 0  0 0 0  5 5 5  141 140 140  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  249 249 249  222 222 222  198 198 198
+196 196 196  198 198 198  199 199 199  198 198 198  199 199 199  201 201 201
+207 207 207  206 206 206  159 159 159  45 45 45  0 0 0  23 23 23
+112 106 107  7 7 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  104 104 104  249 249 249  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  253 253 253
+134 134 134  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  30 31 31  196 196 196  205 205 205  249 248 248
+255 255 255  225 225 225  12 12 12  0 0 0  30 30 30  246 246 246
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+216 216 216  70 69 69  0 0 0  0 0 0  0 0 0  104 104 104
+252 252 252  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  252 252 252  230 230 230  203 203 203  195 195 195  198 198 198
+200 200 200  201 201 201  204 204 204  208 208 208  208 208 208  192 192 192
+141 140 140  60 60 60  2 2 2  0 0 0  15 15 15  125 125 125
+19 19 19  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  61 61 61  206 206 206  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  112 106 107  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  45 45 45  203 203 203  197 197 197  214 214 214
+255 255 255  117 117 117  0 0 0  0 0 0  0 0 0  209 209 209
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  253 253 253  127 127 127  0 0 0  0 0 0  0 0 0
+98 98 98  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+241 241 241  215 215 215  205 205 205  207 207 207  209 209 209  209 209 209
+205 205 205  195 195 195  171 170 170  134 134 134  75 75 75  23 23 23
+0 0 0  0 0 0  0 0 0  0 0 0  127 127 127  60 60 60
+0 0 0  0 0 0  98 98 98  192 192 192  34 34 34  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  7 7 7  112 106 107
+232 231 231  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  222 222 222  8 8 8  0 0 0  0 0 0
+0 0 0  0 0 0  34 34 34  199 199 199  201 201 201  203 203 203
+202 202 202  15 15 15  0 0 0  0 0 0  0 0 0  173 172 172
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  164 164 164  6 6 6  0 0 0
+0 0 0  117 117 117  206 206 206  206 206 206  189 189 189  159 159 159
+143 143 143  141 140 140  136 136 136  125 125 125  104 104 104  79 79 79
+54 51 52  25 25 25  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  77 77 77  143 143 143  0 0 0
+0 0 0  42 42 42  245 245 245  255 255 255  234 234 234  104 104 104
+7 7 7  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+23 23 23  147 147 147  250 250 250  255 255 255  255 255 255  255 255 255
+255 255 255  254 254 254  45 45 45  0 0 0  0 0 0
+0 0 0  0 0 0  8 8 8  177 177 177  205 205 205  209 209 209
+75 75 75  0 0 0  0 0 0  0 0 0  0 0 0  159 159 159
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  143 143 143
+179 179 179  255 255 255  255 255 255  255 255 255  168 168 168  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  25 25 25  189 189 189  54 54 54  0 0 0
+0 0 0  182 182 182  255 255 255  255 255 255  255 255 255  255 255 255
+211 211 211  98 98 98  8 8 8  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  49 49 49  161 161 161  232 231 231  255 255 255
+255 255 255  252 252 252  54 51 52  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  117 117 117  225 225 225  141 140 140
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  179 179 179
+255 255 255  255 255 255  255 255 255  255 255 255  179 179 179  0 0 0
+93 92 92  255 255 255  255 255 255  255 255 255  255 255 255  127 127 127
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  5 5 5  156 156 156  184 184 184  5 5 5  0 0 0
+45 45 45  253 253 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  211 211 211  88 88 88  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  18 17 17  93 92 92
+211 211 211  215 215 215  36 37 36  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  30 31 31  138 137 137  16 16 16
+0 0 0  0 0 0  0 0 0  0 0 0  6 6 6  227 227 227
+255 255 255  255 255 255  255 255 255  253 253 253  49 47 48  0 0 0
+104 104 104  255 255 255  255 255 255  255 255 255  255 255 255  251 251 251
+49 49 49  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+6 6 6  136 136 136  217 217 217  127 127 127  0 0 0  0 0 0
+127 127 127  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  197 197 197  75 75 75
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+12 12 12  147 147 147  23 23 23  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  134 134 134  255 255 255
+255 255 255  255 255 255  255 255 255  187 187 187  0 0 0  0 0 0
+134 134 134  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+181 181 181  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  5 5 5  23 23 23  54 54 54
+156 156 156  208 208 208  209 209 209  86 86 86  0 0 0  0 0 0
+75 75 75  252 252 252  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+188 188 188  75 75 75  8 8 8  0 0 0  0 0 0  0 0 0
+25 25 25  98 98 98  6 6 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  33 33 33  136 136 136  234 234 234  255 255 255
+255 255 255  255 255 255  252 252 252  57 57 57  0 0 0  0 0 0
+147 147 147  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+253 253 253  54 51 52  0 0 0  0 0 0  2 2 2  4 4 4
+12 13 12  22 22 22  33 33 33  45 45 45  60 60 60  82 82 82
+98 98 98  117 117 117  147 147 147  173 172 172  191 191 191  203 203 203
+205 205 205  199 199 199  205 205 205  57 57 57  0 0 0  0 0 0
+0 0 0  61 61 61  156 156 156  217 217 217  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  196 196 196  125 125 125  112 106 107  138 137 137
+199 199 199  112 106 107  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  61 61 61  196 196 196  209 209 209  197 197 197  230 230 230
+255 255 255  255 255 255  125 125 125  0 0 0  0 0 0  0 0 0
+161 161 161  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  156 156 156  0 0 0  0 0 0  70 69 69  179 179 179
+182 182 182  191 191 191  197 197 197  202 202 202  206 206 206  208 208 208
+209 209 209  208 208 208  206 206 206  204 204 204  201 201 201  199 199 199
+198 198 198  199 199 199  200 200 200  39 39 39  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  6 6 6  57 57 57  136 136 136
+217 217 217  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+244 244 244  215 215 215  202 202 202  206 206 206  207 207 207  207 207 207
+203 203 203  39 39 39  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  82 82 82  205 205 205  208 208 208  198 198 198  202 202 202
+244 244 244  152 152 152  0 0 0  0 0 0  0 0 0  0 0 0
+192 192 192  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  227 227 227  12 12 12  0 0 0  49 47 48  208 208 208
+203 203 203  201 201 201  200 200 200  200 200 200  199 199 199  199 199 199
+198 198 198  199 199 199  199 199 199  199 199 199  198 198 198  199 199 199
+202 202 202  207 207 207  208 208 208  36 37 36  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+8 8 8  77 77 77  210 210 210  255 255 255  246 246 246  219 219 219
+197 197 197  194 194 194  197 197 197  198 198 198  197 197 197  205 205 205
+156 156 156  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  45 45 45  166 165 165  212 212 212  192 192 192
+88 88 88  0 0 0  0 0 0  0 0 0  0 0 0  21 21 21
+241 241 241  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  253 253 253  42 42 42  0 0 0  19 19 19  196 196 196
+211 211 211  204 204 204  201 201 201  199 199 199  199 199 199  198 198 198
+199 199 199  200 200 200  202 202 202  205 205 205  209 209 209  207 207 207
+191 191 191  159 159 159  112 106 107  12 13 12  0 0 0  19 19 19
+86 86 86  19 19 19  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  16 16 16  150 150 150  207 207 207  205 205 205
+202 202 202  197 197 197  197 197 197  197 197 197  198 198 198  208 208 208
+67 68 68  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  5 5 5  67 68 68  30 27 28
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  112 106 107
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  54 54 54  0 0 0  1 1 1  60 60 60
+127 127 127  174 174 174  196 196 196  205 205 205  207 207 207  208 208 208
+207 207 207  203 203 203  191 191 191  161 161 161  117 117 117  61 61 61
+22 22 22  0 0 0  0 0 0  0 0 0  0 0 0  18 17 17
+249 248 248  238 237 237  173 172 172  88 88 88  16 16 16  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  57 57 57  127 127 127
+168 168 168  199 199 199  198 198 198  197 197 197  209 209 209  136 136 136
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  38 38 38  238 237 237
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  249 249 249  30 31 31  0 0 0  0 0 0  0 0 0
+0 0 0  6 6 6  30 30 30  57 57 57  70 69 69  77 77 77
+70 69 69  49 49 49  22 22 22  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+181 181 181  227 227 227  221 221 221  214 214 214  183 183 183  112 106 107
+36 37 36  6 6 6  0 0 0  0 0 0  0 0 0  0 0 0
+49 47 48  199 199 199  198 198 198  208 208 208  161 161 161  12 13 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  33 33 33  189 189 189  251 251 251
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  183 183 183  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+57 57 57  198 198 198  202 202 202  196 196 196  202 202 202  208 208 208
+197 197 197  175 175 175  159 159 159  156 156 156  152 152 152  147 147 147
+176 176 176  200 200 200  209 209 209  161 161 161  19 19 19  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  18 17 17  82 82 82  156 156 156
+225 225 225  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+234 234 234  39 39 39  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  60 60 60  185 185 185  208 208 208  199 199 199  197 197 197
+199 199 199  202 202 202  204 204 204  205 205 205  205 205 205  205 205 205
+205 205 205  208 208 208  141 140 140  15 15 15  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+42 42 42  104 104 104  184 184 184  232 231 231  247 247 247  216 216 216
+57 57 57  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  25 25 25  134 134 134  202 202 202  207 207 207
+201 201 201  198 198 198  198 198 198  200 200 200  203 203 203  209 209 209
+188 188 188  93 92 92  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  15 15 15  30 30 30  10 9 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  49 49 49  134 134 134
+187 187 187  201 201 201  202 202 202  194 194 194  168 168 168  104 104 104
+23 23 23  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+16 16 16  42 42 42  45 45 45  28 26 26  2 2 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_fbsd_clut224.ppm b/drivers/video/logo/logo_fbsd_clut224.ppm
new file mode 100644
index 000000000..4387aa045
--- /dev/null
+++ b/drivers/video/logo/logo_fbsd_clut224.ppm
@@ -0,0 +1,2403 @@
+P3
+120 120
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 1 1  1 1 1  1 1 1  1 1 1  1 1 1
+1 1 1  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  1 1 1  1 1 1
+1 1 1  1 1 1  1 1 1  1 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  1 1 1  1 1 1
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  1 1 1
+1 1 1  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  1 1 1
+1 1 1  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  1 1 1  1 1 1
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  1 1 1  1 1 1  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  38 6 5
+38 6 5  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  38 6 5  38 6 5
+38 6 5  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  1 1 1  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  39 3 2
+39 3 2  39 3 2  39 3 2  39 3 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 1 1  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  38 6 5
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  38 6 5
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 3 2  39 3 2  39 3 2  39 3 2  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  39 3 2  97 5 3  131 20 10
+153 32 16  176 59 34  176 59 34  174 34 14  166 21 8  124 2 0
+86 4 2  39 3 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  38 6 5  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  177 84 68  183 106 85  183 106 85  183 106 85  183 106 85
+183 106 85  183 106 85  183 106 85  177 84 68  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  38 6 5  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+1 1 1  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  39 3 2  86 4 2  141 11 5
+166 21 8  168 26 11  168 26 11  156 16 7  147 19 9  141 11 5
+97 5 3  39 3 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  39 3 2  124 2 0  147 19 9  176 59 34
+176 59 34  192 80 59  203 102 83  214 127 110  214 127 110  214 127 110
+203 99 81  209 61 24  153 32 16  86 4 2  39 3 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  1 1 1  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  38 6 5  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+177 84 68  183 106 85  213 149 125  222 184 158  237 212 203  246 228 219
+253 252 251  253 253 252  253 253 252  254 254 253  254 254 253  255 255 255
+255 255 255  254 254 253  253 253 252  253 253 252  253 252 251  246 228 219
+237 212 203  222 184 158  213 149 125  183 106 85  177 84 68  44 12 10
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  44 12 10
+38 6 5  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+39 3 2  86 4 2  153 32 16  209 61 24  213 78 31  203 99 81
+203 99 81  203 99 81  192 80 59  193 53 28  164 15 5  149 3 1
+140 1 0  117 1 0  39 3 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  97 5 3  140 1 0  131 20 10  153 32 16
+174 34 14  176 59 34  191 90 72  214 127 110  236 173 149  236 173 149
+236 173 149  221 154 132  214 127 110  203 99 81  204 76 38  147 19 9
+39 3 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+2 2 2  2 2 2  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  183 106 85  222 184 158  237 212 203
+249 237 229  253 253 252  255 255 255  255 255 255  255 255 255  255 255 255
+254 254 253  253 253 252  253 253 252  253 253 252  254 254 253  254 254 253
+253 253 252  253 253 252  253 253 252  253 253 252  253 253 252  253 253 252
+255 255 255  255 255 255  255 255 255  254 254 253  249 237 229  237 212 203
+222 184 158  183 106 85  90 10 7  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  38 6 5  2 2 2  2 2 2
+2 2 2  2 2 2  2 2 2  2 2 2  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  39 3 2  136 24 13
+204 76 38  214 127 110  221 143 118  221 143 118  221 143 118  221 154 132
+221 143 118  214 127 110  192 80 59  168 26 11  153 0 0  142 1 0
+131 0 0  140 1 0  86 4 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  147 0 0  141 0 0  131 0 0  147 19 9
+153 32 16  174 34 14  184 62 32  203 102 83  221 154 132  236 194 173
+241 196 185  241 196 185  236 173 149  239 175 143  231 168 130  221 143 118
+203 99 81  153 32 16  44 12 10  1 1 1  0 0 0  1 1 1
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  2 2 2
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  90 10 7
+183 106 85  222 184 158  249 237 229  253 253 252  255 255 255  255 255 255
+255 255 255  254 254 253  253 253 252  253 253 252  253 253 252  254 254 253
+254 254 253  254 254 253  254 254 253  254 254 253  254 255 253  255 255 255
+254 254 253  254 254 253  255 255 255  255 255 255  255 255 255  254 254 253
+254 254 253  253 253 252  253 253 252  253 253 252  255 255 255  255 255 255
+255 255 255  253 253 252  251 245 237  222 184 158  183 106 85  177 84 68
+44 12 10  44 12 10  44 12 10  44 12 10  44 12 10  38 6 5
+2 2 2  2 2 2  2 2 2  2 2 2  2 2 2  1 1 1
+0 0 0  1 1 1  86 4 2  176 59 34  203 102 83  239 175 143
+236 173 149  236 173 149  237 187 164  237 187 164  236 194 173  237 187 164
+236 173 149  206 120 99  193 53 28  168 14 5  153 2 0  140 1 0
+131 0 0  140 1 0  131 0 0  2 2 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 3 2  157 0 0  145 0 0  124 2 0  140 1 0
+156 16 7  168 26 11  186 41 14  192 80 59  206 120 99  233 177 153
+245 214 205  249 237 229  245 214 205  241 196 185  237 187 164  239 175 143
+239 175 143  231 168 130  214 127 110  176 59 34  86 4 2  0 0 0
+2 2 2  2 2 2  2 2 2  2 2 2  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  177 84 68  222 184 158  243 220 211
+255 255 255  255 255 255  254 254 253  253 253 252  253 253 252  253 253 252
+254 254 253  254 254 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  253 255 254  253 253 252
+253 252 251  253 252 251  253 253 252  255 255 255  255 255 255  243 220 211
+222 184 158  183 106 85  44 12 10  44 12 10  44 12 10  44 12 10
+44 12 10  2 2 2  2 2 2  2 2 2  2 2 2  1 1 1
+86 4 2  184 62 32  221 143 118  237 187 164  243 208 194  241 196 185
+241 196 185  241 205 194  243 210 197  245 214 205  243 210 197  237 187 164
+221 143 118  192 80 59  174 34 14  164 15 5  149 3 1  140 1 0
+124 2 0  142 0 0  150 0 0  39 3 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 3 2  166 0 0  150 0 0  131 0 0  131 0 0
+149 3 1  156 16 7  174 34 14  185 36 13  204 76 38  214 127 110
+237 187 164  251 245 237  254 254 253  250 233 221  243 208 194  241 196 185
+237 187 164  236 173 149  239 175 143  239 175 143  214 127 110  176 59 34
+90 10 7  2 2 2  2 2 2  44 12 10  44 12 10  44 12 10
+44 12 10  183 106 85  222 184 158  251 245 237  255 255 255  255 255 255
+253 253 252  253 253 252  254 254 253  254 254 253  254 254 253  254 254 253
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  253 252 251  251 245 237  249 237 229  249 237 229
+249 237 229  249 237 229  252 243 235  251 245 237  253 249 244  253 252 250
+253 253 252  253 255 254  254 254 253  253 253 252  253 252 251  254 254 253
+254 254 253  251 245 237  238 198 189  183 106 85  44 12 10  44 12 10
+44 12 10  44 12 10  2 2 2  2 2 2  90 10 7  192 80 59
+231 168 130  245 214 205  248 226 214  245 214 205  245 214 205  245 214 205
+248 226 214  250 233 221  252 242 234  249 237 229  241 205 194  221 154 132
+203 99 81  193 53 28  174 34 14  156 16 7  146 0 0  131 0 0
+124 2 0  150 0 0  157 0 0  39 3 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 3 2  167 0 0  159 0 0  131 0 0  131 0 0
+141 0 0  153 2 0  168 14 5  179 30 10  196 46 20  204 76 38
+206 120 99  237 187 164  251 240 231  255 255 255  252 243 235  245 214 205
+241 196 185  241 196 185  241 196 185  241 190 156  241 190 156  241 190 156
+221 143 118  176 59 34  44 12 10  44 12 10  44 12 10  177 84 68
+222 184 158  253 252 250  255 255 255  254 254 253  253 253 252  253 253 252
+254 254 253  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  254 254 253  249 237 229  243 220 211  243 220 211  243 220 211
+243 220 211  243 220 211  243 220 211  243 220 211  248 226 214  246 228 219
+250 233 221  249 237 229  253 249 244  253 252 251  253 255 254  253 255 254
+253 253 252  253 253 252  254 254 253  253 249 244  222 184 158  183 106 85
+44 12 10  44 12 10  90 10 7  184 62 32  231 168 130  248 226 214
+252 240 230  249 231 218  248 226 214  248 226 214  250 233 221  250 233 221
+252 243 235  253 253 252  253 252 251  245 214 205  236 173 149  203 99 81
+193 53 28  193 53 28  174 34 14  157 8 2  142 0 0  131 0 0
+131 0 0  157 0 0  163 0 0  39 3 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  170 0 0  167 0 0  140 1 0  124 2 0
+140 1 0  147 0 0  159 3 1  173 17 6  193 28 10  196 46 20
+209 61 24  203 99 81  221 154 132  245 214 205  251 240 231  246 228 219
+245 214 205  242 201 184  242 201 184  243 208 194  242 201 184  242 201 184
+242 201 184  242 201 184  227 164 124  183 106 85  222 184 158  251 245 237
+255 255 255  254 254 253  253 253 252  253 253 252  254 254 253  254 254 253
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+254 255 253  255 255 255  255 255 255  255 255 255  254 254 253  254 254 253
+254 254 253  254 254 253  254 254 253  254 254 253  254 254 253  255 255 255
+255 255 255  254 254 253  249 237 229  243 220 211  243 220 211  243 220 211
+243 220 211  243 220 211  243 220 211  243 220 211  243 220 211  243 220 211
+243 220 211  243 220 211  243 220 211  248 226 214  250 233 221  252 242 234
+253 249 244  253 252 251  253 253 252  253 253 252  255 255 255  253 252 250
+213 149 125  177 84 68  227 164 124  248 226 214  253 249 240  252 240 230
+251 236 222  251 238 227  251 238 227  252 240 230  252 242 234  254 252 249
+255 255 255  251 245 237  243 210 197  221 154 132  203 102 83  204 76 38
+193 53 28  193 53 28  164 15 5  150 0 0  140 1 0  124 2 0
+140 1 0  165 0 0  150 0 0  39 3 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  158 0 0  176 0 0  146 0 0  124 2 0
+131 0 0  145 0 0  155 2 1  172 5 1  180 20 5  193 28 10
+196 46 20  209 61 24  204 76 38  214 127 110  236 173 149  241 196 185
+243 208 194  243 210 197  243 208 194  243 208 194  245 214 205  245 214 205
+242 201 184  243 208 194  249 231 218  252 242 234  255 255 255  255 255 255
+253 253 252  253 253 252  254 254 253  254 255 253  254 255 253  255 255 255
+254 255 253  254 255 253  254 255 253  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  254 254 253  254 254 253  253 253 252
+254 254 253  254 254 253  254 254 253  254 253 250  254 254 253  255 255 255
+255 255 255  255 255 255  253 252 251  249 237 229  243 220 211  243 220 211
+243 220 211  243 220 211  243 220 211  243 220 211  243 220 211  243 220 211
+243 220 211  248 226 214  248 226 214  248 226 214  248 226 214  243 220 211
+246 228 219  249 237 229  252 243 235  253 252 251  253 249 240  241 190 156
+231 168 130  243 210 197  253 251 245  253 249 244  252 240 230  252 240 230
+252 243 235  252 242 234  252 242 234  251 245 237  254 252 249  253 249 244
+250 233 221  241 196 185  221 154 132  203 102 83  192 80 59  192 80 59
+193 53 28  168 26 11  157 0 0  146 0 0  131 0 0  124 2 0
+145 0 0  177 0 0  140 1 0  2 2 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  117 1 0  186 0 0  155 0 0  131 0 0
+131 0 0  141 0 0  153 2 0  163 1 0  176 6 2  191 24 6
+193 28 10  205 44 13  209 61 24  213 78 31  203 99 81  221 143 118
+239 175 143  237 187 164  242 201 184  243 208 194  243 210 197  247 223 207
+249 231 218  249 237 229  253 252 250  253 255 254  253 253 252  253 253 252
+254 254 253  254 254 253  254 255 253  254 255 253  254 255 253  254 255 253
+254 255 253  254 255 253  254 255 253  254 255 253  254 255 253  254 255 253
+254 255 253  254 254 253  254 254 253  254 253 250  254 253 250  254 254 253
+254 253 250  254 253 250  253 253 252  253 253 252  254 254 253  253 253 252
+253 253 252  253 253 252  254 254 253  253 253 252  252 243 235  243 220 211
+243 220 211  243 220 211  243 220 211  243 220 211  243 220 211  243 220 211
+243 220 211  238 198 189  206 120 99  176 59 34  176 59 34  176 59 34
+191 90 72  214 127 110  237 187 164  236 194 173  227 164 124  242 201 184
+253 248 237  253 252 251  253 249 240  251 245 237  253 249 240  253 249 240
+251 245 237  252 243 235  251 245 237  252 243 235  250 233 221  245 214 205
+237 187 164  221 154 132  214 127 110  203 99 81  192 80 59  192 80 59
+179 30 10  163 1 0  153 2 0  142 1 0  131 0 0  131 0 0
+153 0 0  191 24 6  111 7 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  86 4 2  196 0 0  163 0 0  140 1 0
+124 2 0  140 1 0  149 3 1  159 3 1  172 5 1  179 7 2
+191 24 6  210 31 6  213 49 12  209 61 24  213 78 31  213 78 31
+203 102 83  221 143 118  231 168 130  237 187 164  243 210 197  246 228 219
+251 245 237  253 252 251  253 253 252  253 253 252  254 254 253  254 254 253
+254 254 253  255 255 255  255 255 255  254 255 253  254 254 251  254 254 251
+254 255 253  254 255 253  254 255 253  254 254 251  254 254 251  254 255 253
+254 254 253  254 253 250  254 252 249  254 253 250  254 253 250  254 253 250
+254 253 250  253 252 251  253 253 252  253 253 252  253 253 252  253 253 252
+253 253 252  253 253 252  253 253 252  254 254 253  254 254 253  253 249 244
+246 228 219  243 220 211  243 220 211  243 220 211  243 220 211  243 220 211
+241 205 194  192 80 59  169 0 0  172 5 1  168 14 5  173 17 6
+173 17 6  173 17 6  204 67 22  231 168 130  251 238 227  255 255 255
+254 252 249  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 240  251 245 237  252 240 230  248 226 214  241 205 194  236 173 149
+221 154 132  214 127 110  206 120 99  203 99 81  192 80 59  185 36 13
+172 5 1  159 3 1  149 3 1  140 1 0  124 2 0  140 1 0
+168 14 5  185 36 13  86 4 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  39 3 2  184 9 3  174 0 0  147 0 0
+131 0 0  131 0 0  145 0 0  155 2 1  167 2 1  176 6 2
+184 9 3  195 25 6  210 31 6  213 49 12  209 61 24  213 78 31
+203 99 81  203 102 83  214 127 110  231 168 130  245 214 205  253 249 244
+253 253 252  253 253 252  253 253 252  254 254 253  254 254 253  254 254 253
+254 254 253  254 254 253  254 254 253  254 254 251  254 254 251  254 255 253
+254 255 253  254 255 253  254 255 253  254 254 251  254 254 251  254 253 250
+254 252 249  254 252 249  254 252 249  254 252 249  254 252 249  254 252 249
+253 252 251  253 252 251  253 253 252  253 253 252  253 253 252  253 253 252
+253 253 252  253 253 252  253 253 252  253 253 252  253 253 252  254 254 253
+253 249 244  246 228 219  243 220 211  243 220 211  243 220 211  243 220 211
+213 149 125  165 0 0  167 2 1  168 14 5  168 14 5  168 14 5
+180 23 7  203 99 81  243 208 194  254 254 253  254 254 253  253 251 245
+253 251 245  254 252 249  254 252 249  253 249 244  253 249 244  253 249 244
+251 245 237  250 233 221  245 214 205  241 196 185  237 187 164  236 173 149
+221 143 118  214 127 110  203 102 83  203 99 81  196 46 20  176 6 2
+167 2 1  159 3 1  145 0 0  131 0 0  124 2 0  149 3 1
+193 53 28  174 34 14  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  147 19 9  191 24 6  157 0 0
+131 0 0  124 2 0  140 1 0  149 3 1  159 3 1  172 5 1
+184 9 3  184 9 3  210 31 6  210 31 6  209 61 24  213 78 31
+213 78 31  203 99 81  221 143 118  243 208 194  253 252 250  253 253 252
+253 253 252  254 254 253  254 254 253  253 253 252  254 254 253  253 253 252
+253 252 251  253 252 250  254 252 249  254 253 250  254 254 251  254 254 253
+254 254 251  254 255 253  254 255 253  254 253 250  254 252 249  253 251 245
+253 251 245  254 252 249  254 252 249  253 251 245  253 251 245  254 252 249
+254 252 249  254 252 249  254 252 249  254 252 249  254 252 249  254 252 249
+254 252 249  254 252 249  254 252 249  253 252 250  253 252 250  253 252 250
+253 252 251  253 252 250  249 237 229  243 220 211  237 212 203  243 220 211
+191 90 72  163 0 0  167 2 1  168 14 5  168 14 5  185 36 13
+227 164 124  253 248 237  255 255 255  254 254 253  254 252 249  254 252 249
+254 253 250  254 253 250  254 252 249  253 249 244  253 249 244  252 243 235
+249 231 218  245 214 205  241 196 185  237 187 164  236 173 149  239 175 143
+221 143 118  214 127 110  203 99 81  196 46 20  183 2 0  172 5 1
+163 1 0  153 2 0  140 1 0  131 0 0  131 0 0  168 26 11
+192 80 59  147 19 9  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  90 10 7  193 53 28  167 2 1
+141 0 0  124 2 0  131 0 0  146 0 0  159 3 1  167 2 1
+179 7 2  184 9 3  195 25 6  210 31 6  213 49 12  213 78 31
+213 78 31  214 127 110  245 214 205  253 255 254  253 253 252  253 253 252
+253 253 252  253 253 252  253 252 251  253 253 252  254 253 250  254 252 249
+253 251 245  254 252 249  254 252 249  254 253 250  254 253 250  254 253 250
+254 254 253  254 254 251  254 254 251  253 251 245  253 250 244  253 250 244
+253 250 244  253 250 244  253 251 245  253 250 244  253 249 244  253 249 244
+254 252 249  254 252 249  254 252 249  254 252 249  253 252 250  254 252 249
+254 252 249  254 252 249  254 252 249  253 252 250  253 252 250  253 252 250
+253 252 250  254 252 249  253 252 250  249 237 229  243 220 211  243 220 211
+192 80 59  164 0 0  167 2 1  167 2 1  189 46 14  234 180 140
+254 254 251  254 254 253  254 253 250  254 253 250  254 253 250  254 254 253
+253 253 252  254 252 249  254 252 249  253 249 244  252 243 235  249 231 218
+245 214 205  243 208 194  241 196 185  237 187 164  236 173 149  221 154 132
+221 143 118  203 102 83  196 46 20  189 0 0  179 7 2  167 2 1
+159 3 1  147 0 0  140 1 0  124 2 0  141 11 5  176 59 34
+204 76 38  86 4 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  39 3 2  193 53 28  185 36 13
+150 0 0  131 0 0  131 0 0  142 1 0  149 3 1  167 2 1
+174 10 3  184 9 3  195 25 6  210 31 6  213 49 12  209 61 24
+206 120 99  243 220 211  253 255 254  253 253 252  253 253 252  254 254 253
+253 252 251  253 252 251  253 252 251  254 252 249  253 249 244  253 249 244
+253 249 244  253 251 245  254 252 249  254 252 249  254 252 249  254 253 250
+254 253 250  254 253 250  254 252 249  253 249 240  253 249 240  253 249 244
+253 249 240  253 249 240  253 249 240  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  252 243 235  246 228 219
+191 90 72  163 1 0  168 14 5  174 10 3  206 120 99  255 255 255
+254 254 253  253 253 252  254 254 253  254 254 253  254 254 253  254 254 253
+253 253 252  254 252 249  254 252 249  251 245 237  250 233 221  248 226 214
+245 214 205  243 208 194  241 196 185  236 173 149  236 173 149  221 154 132
+206 120 99  203 40 12  196 0 0  184 9 3  176 6 2  163 1 0
+153 2 0  142 1 0  131 0 0  131 0 0  153 32 16  191 90 72
+176 59 34  39 3 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  153 32 16  192 80 59
+159 13 5  140 1 0  124 2 0  140 1 0  149 3 1  157 8 2
+174 10 3  180 20 5  193 28 10  203 40 12  205 44 13  203 99 81
+243 220 211  253 255 254  253 253 252  253 253 252  253 253 252  253 252 251
+253 252 251  253 252 251  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 251 245  254 252 249  253 251 245  254 252 249
+254 252 249  253 251 245  253 249 244  253 248 237  253 248 237  253 248 237
+253 248 237  253 248 237  253 248 237  253 249 240  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+214 127 110  159 3 1  159 13 5  187 29 9  234 180 140  255 255 255
+254 253 250  254 254 253  254 254 253  255 255 255  254 254 253  254 254 253
+253 253 252  254 252 249  253 249 244  251 238 227  249 231 218  247 223 207
+245 214 205  241 196 185  237 187 164  236 173 149  236 173 149  203 102 83
+210 31 6  196 0 0  192 0 0  179 7 2  172 5 1  159 3 1
+149 3 1  140 1 0  124 2 0  142 30 13  192 80 59  203 102 83
+111 7 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  44 12 10  203 99 81
+193 53 28  150 0 0  131 0 0  131 0 0  142 1 0  157 8 2
+168 14 5  180 23 7  193 28 10  196 46 20  203 99 81  237 212 203
+253 255 254  253 253 252  253 253 252  253 253 252  253 252 250  253 253 252
+253 252 250  253 249 244  251 245 237  251 245 237  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 251 245
+253 249 244  253 249 244  251 245 237  252 244 235  252 244 235  252 246 236
+252 246 236  252 244 235  252 244 235  251 245 237  251 245 237  253 249 244
+253 249 244  251 245 237  253 249 244  253 249 244  253 249 244  251 245 237
+253 249 244  251 245 237  251 245 237  251 245 237  251 245 237  251 245 237
+251 245 237  251 245 237  251 245 237  251 245 237  251 245 237  253 252 250
+238 198 189  176 59 34  176 59 34  196 60 25  241 190 156  255 255 255
+254 254 253  255 255 255  254 255 253  255 255 255  254 254 253  254 254 253
+254 253 250  253 249 244  252 242 234  250 233 221  248 226 214  245 214 205
+243 208 194  241 196 185  237 187 164  236 173 149  203 99 81  210 31 6
+196 0 0  196 0 0  184 9 3  176 6 2  163 1 0  153 2 0
+142 1 0  131 0 0  131 20 10  176 59 34  198 112 92  192 80 59
+39 3 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  176 59 34
+203 99 81  166 21 8  140 1 0  131 0 0  140 1 0  144 9 4
+166 21 8  174 34 14  185 36 13  192 80 59  238 198 189  253 252 251
+253 252 250  253 253 252  253 253 252  254 253 250  253 252 251  254 252 249
+251 245 237  252 243 235  251 245 237  251 245 237  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  253 249 244
+251 245 237  252 243 235  252 243 235  252 240 230  252 240 230  252 240 230
+252 240 230  252 240 230  252 240 230  252 243 235  252 243 235  252 243 235
+251 245 237  251 245 237  251 245 237  251 245 237  251 245 237  251 245 237
+251 245 237  251 245 237  251 245 237  251 245 237  251 245 237  251 245 237
+251 245 237  251 245 237  252 243 235  252 243 235  252 243 235  251 245 237
+246 228 219  210 136 114  214 127 110  206 120 99  237 182 145  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  254 254 253  254 254 253
+253 249 244  252 243 235  252 240 230  250 233 221  248 226 214  245 214 205
+243 208 194  241 196 185  236 173 149  213 78 31  210 31 6  196 0 0
+196 0 0  192 0 0  179 7 2  172 5 1  159 3 1  150 0 0
+140 1 0  131 0 0  153 32 16  188 95 83  214 127 110  153 32 16
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  90 10 7
+203 102 83  184 62 32  149 3 1  131 0 0  141 11 5  147 19 9
+166 21 8  174 34 14  176 59 34  236 194 173  253 249 244  253 249 244
+253 253 252  254 253 250  254 253 250  253 253 252  254 252 249  252 243 235
+252 242 234  252 243 235  251 245 237  251 245 237  253 249 244  253 249 244
+253 249 244  253 249 244  253 249 244  253 249 244  253 249 244  252 243 235
+252 242 234  252 242 234  251 240 231  251 238 227  251 238 227  251 238 227
+251 238 227  251 238 227  251 238 227  252 242 234  252 242 234  252 242 234
+252 242 234  252 242 234  252 242 234  252 242 234  252 242 234  252 242 234
+252 242 234  252 242 234  252 242 234  251 240 231  251 240 231  250 239 228
+250 239 228  251 238 227  251 238 227  251 238 227  251 238 227  251 238 227
+250 239 228  237 187 164  210 136 114  221 143 118  227 164 124  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  254 254 253  254 252 249
+253 249 240  252 242 234  251 238 227  249 231 218  247 223 207  245 214 205
+241 196 185  221 154 132  213 78 31  210 31 6  196 0 0  196 0 0
+196 0 0  184 9 3  172 5 1  163 1 0  153 2 0  142 1 0
+131 0 0  136 24 13  177 84 68  214 127 110  203 99 81  86 4 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+177 84 68  203 102 83  174 34 14  140 1 0  141 11 5  147 19 9
+153 32 16  174 34 14  221 154 132  253 249 244  251 245 237  253 252 251
+254 254 253  254 253 250  254 254 253  253 249 244  252 242 234  251 240 231
+252 243 235  251 245 237  251 245 237  251 245 237  251 245 237  251 245 237
+252 246 236  253 249 244  251 245 237  251 245 237  252 243 235  252 242 234
+252 242 234  249 237 229  251 238 227  251 236 222  251 236 222  251 236 222
+251 236 222  251 236 222  251 238 227  249 237 229  249 237 229  249 237 229
+249 237 229  249 237 229  249 237 229  249 237 229  249 237 229  249 237 229
+249 237 229  251 238 227  251 236 222  251 236 222  251 236 222  251 236 222
+251 236 222  251 236 222  251 236 222  251 236 222  251 236 222  251 236 222
+251 238 227  243 220 211  221 154 132  221 154 132  221 143 118  251 238 227
+255 255 255  255 255 255  255 255 255  254 254 253  254 252 249  253 249 244
+251 245 237  252 240 230  250 233 221  248 226 214  247 223 207  241 196 185
+221 143 118  209 61 24  210 31 6  210 31 6  196 0 0  196 0 0
+184 9 3  179 7 2  167 2 1  159 3 1  149 3 1  131 0 0
+131 20 10  176 59 34  198 112 92  221 154 132  174 34 14  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+111 7 4  214 127 110  192 80 59  156 16 7  141 11 5  136 24 13
+153 32 16  198 112 92  249 237 229  251 242 233  253 249 244  254 254 253
+254 253 250  254 253 250  253 249 244  251 240 231  251 240 231  252 242 234
+252 243 235  251 245 237  251 245 237  251 245 237  251 245 237  253 248 237
+252 246 236  251 245 237  251 245 237  252 243 235  252 242 234  251 240 231
+249 237 229  249 237 229  250 233 221  250 235 219  250 235 219  250 235 219
+250 235 219  250 235 219  250 233 221  249 237 229  249 237 229  249 237 229
+249 237 229  249 237 229  249 237 229  249 237 229  250 233 221  249 231 218
+250 235 219  250 235 219  250 235 219  250 235 219  250 235 219  250 235 219
+250 235 219  250 235 219  250 235 219  250 235 219  250 235 219  250 233 221
+249 237 229  249 237 229  238 198 189  237 187 164  231 168 130  241 190 156
+255 255 255  255 255 255  255 255 255  254 252 249  253 249 244  253 249 244
+252 243 235  252 240 230  250 233 221  243 208 194  236 173 149  203 99 81
+213 49 12  210 31 6  210 31 6  196 0 0  196 0 0  192 0 0
+183 2 0  172 5 1  163 1 0  153 2 0  141 0 0  131 0 0
+153 32 16  188 95 83  213 149 125  206 120 99  86 4 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+39 3 2  203 99 81  214 127 110  176 59 34  147 19 9  136 24 13
+177 84 68  243 220 211  249 237 229  251 245 237  253 253 252  254 253 250
+254 253 250  253 251 245  249 237 229  249 237 229  251 240 231  252 242 234
+252 243 235  251 245 237  251 245 237  251 245 237  253 249 244  253 248 237
+252 244 235  252 246 236  252 243 235  252 242 234  249 237 229  249 237 229
+249 237 229  249 237 229  249 231 218  248 226 214  248 226 214  248 226 214
+248 226 214  248 226 214  250 233 221  250 233 221  250 233 221  250 233 221
+246 228 219  250 233 221  249 231 218  248 226 214  248 226 214  248 226 214
+248 226 214  248 226 214  248 226 214  248 226 214  248 226 214  248 226 214
+248 226 214  248 226 214  248 226 214  246 228 219  246 228 219  246 228 219
+246 228 219  246 228 219  246 228 219  238 198 189  238 198 189  231 168 130
+252 240 230  253 249 244  253 249 240  253 251 245  253 249 244  252 242 234
+248 226 214  242 201 184  239 175 143  214 127 110  213 78 31  209 61 24
+213 43 8  210 31 6  210 31 6  196 0 0  196 0 0  184 9 3
+176 6 2  167 2 1  159 3 1  145 0 0  131 0 0  136 24 13
+177 84 68  202 130 106  236 173 149  176 59 34  2 2 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  131 20 10  221 143 118  203 99 81  153 32 16  153 32 16
+222 184 158  252 242 234  249 237 229  253 252 250  254 254 253  254 253 250
+254 252 249  249 237 229  250 233 221  249 237 229  251 240 231  252 242 234
+252 243 235  251 245 237  253 249 244  253 249 244  253 249 240  253 248 237
+252 246 236  252 244 235  252 242 234  251 240 231  249 237 229  249 237 229
+249 237 229  246 228 219  246 228 219  248 226 214  247 223 207  247 223 207
+247 223 207  247 223 207  246 228 219  246 228 219  246 228 219  246 228 219
+248 226 214  247 223 207  247 223 207  247 223 207  247 223 207  247 223 207
+247 223 207  247 223 207  247 223 207  247 223 207  247 223 207  247 223 207
+247 223 207  248 226 214  246 228 219  246 228 219  246 228 219  246 228 219
+246 228 219  246 228 219  246 228 219  237 212 203  222 184 158  206 120 99
+239 175 143  248 226 214  243 208 194  242 201 184  237 187 164  236 173 149
+231 168 130  221 143 118  203 102 83  213 78 31  213 78 31  213 49 12
+210 31 6  210 31 6  196 0 0  196 0 0  184 9 3  179 7 2
+172 5 1  163 1 0  150 0 0  140 1 0  131 20 10  176 59 34
+198 112 92  218 160 133  214 127 110  97 5 3  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 1 1  2 2 2  198 112 92  221 143 118  192 80 59  213 149 125
+251 242 233  246 228 219  251 245 237  254 254 253  253 253 252  253 253 252
+251 240 231  249 231 218  251 236 222  252 240 230  252 242 234  252 243 235
+251 245 237  253 249 244  253 249 244  253 249 244  253 249 240  253 248 237
+252 246 236  251 242 233  251 240 231  249 237 229  249 237 229  250 233 221
+246 228 219  246 228 219  248 226 214  248 226 214  247 223 207  247 223 207
+247 223 207  247 223 207  248 226 214  248 226 214  247 223 207  247 223 207
+247 223 207  247 223 207  247 223 207  247 223 207  247 223 207  247 223 207
+247 223 207  247 223 207  247 223 207  247 223 207  243 210 197  247 223 207
+243 220 211  243 220 211  243 220 211  243 220 211  243 220 211  243 220 211
+243 220 211  243 220 211  243 220 211  237 212 203  188 95 83  174 34 14
+213 78 31  242 201 184  242 201 184  241 190 156  239 175 143  231 168 130
+221 143 118  214 127 110  203 99 81  213 78 31  213 49 12  210 31 6
+210 31 6  196 0 0  196 0 0  192 0 0  183 2 0  172 5 1
+167 2 1  153 2 0  141 0 0  141 11 5  153 32 16  188 95 83
+213 149 125  237 187 164  176 59 34  2 2 2  2 2 2  1 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+2 2 2  2 2 2  136 24 13  221 154 132  213 149 125  246 228 219
+246 228 219  246 228 219  253 252 250  254 254 253  254 254 253  252 243 235
+248 226 214  250 233 221  251 238 227  251 240 231  252 242 234  252 243 235
+251 245 237  253 249 244  254 252 249  254 252 249  253 250 244  253 249 240
+252 246 236  252 240 230  251 240 231  249 237 229  249 237 229  246 228 219
+246 228 219  248 226 214  243 220 211  243 220 211  245 214 205  243 210 197
+243 210 197  244 218 207  243 220 211  245 214 205  243 210 197  243 208 194
+243 210 197  243 208 194  243 208 194  243 208 194  243 208 194  243 208 194
+243 208 194  243 208 194  243 208 194  243 208 194  243 210 197  245 214 205
+243 220 211  245 214 205  245 214 205  245 214 205  243 216 205  243 216 205
+243 216 205  243 220 211  243 216 205  243 220 211  222 184 158  177 84 68
+193 53 28  206 120 99  237 187 164  239 175 143  231 168 130  221 143 118
+214 127 110  203 99 81  213 78 31  209 61 24  213 43 8  210 31 6
+196 0 0  196 0 0  196 0 0  184 9 3  179 7 2  167 2 1
+159 3 1  145 0 0  131 0 0  142 30 13  177 84 68  202 130 106
+222 184 158  214 127 110  86 4 2  2 2 2  2 2 2  1 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+2 2 2  2 2 2  38 6 5  188 95 83  237 212 203  249 237 229
+243 220 211  252 243 235  254 254 253  254 254 253  253 249 244  248 226 214
+248 226 214  250 233 221  249 237 229  251 240 231  252 243 235  251 245 237
+253 249 244  253 252 251  254 254 253  255 255 255  254 254 253  253 250 244
+252 244 235  251 238 227  249 237 229  249 237 229  246 228 219  246 228 219
+248 226 214  245 214 205  245 214 205  245 214 205  245 214 205  245 214 205
+245 214 205  243 210 197  243 208 194  242 201 184  242 201 184  242 201 184
+242 201 184  242 201 184  242 201 184  242 201 184  242 201 184  242 201 184
+242 201 184  242 201 184  242 201 184  243 208 194  237 212 203  237 212 203
+237 212 203  237 212 203  237 212 203  237 212 203  237 212 203  237 212 203
+237 212 203  237 212 203  237 212 203  237 212 203  237 212 203  213 149 125
+176 59 34  196 46 20  221 143 118  239 175 143  221 143 118  214 127 110
+203 102 83  213 78 31  213 78 31  213 49 12  210 31 6  210 31 6
+196 0 0  196 0 0  184 9 3  179 7 2  172 5 1  159 3 1
+147 0 0  140 1 0  136 24 13  176 59 34  183 106 85  221 154 132
+237 187 164  174 34 14  38 6 5  2 2 2  2 2 2  2 2 2
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  2 2 2
+2 2 2  2 2 2  2 2 2  177 84 68  251 245 237  243 220 211
+246 228 219  253 253 252  254 254 253  254 252 249  248 226 214  247 223 207
+249 231 218  250 233 221  249 237 229  251 240 231  252 243 235  253 249 244
+253 253 252  255 255 255  255 255 255  255 255 255  255 255 255  253 251 245
+252 244 235  251 236 222  250 233 221  250 233 221  246 228 219  243 220 211
+245 214 205  245 214 205  245 214 205  245 214 205  245 214 205  243 210 197
+242 201 184  242 201 184  242 201 184  242 201 184  242 201 184  242 201 184
+242 201 184  242 201 184  242 201 184  242 201 184  236 194 173  236 194 173
+236 194 173  236 194 173  241 196 185  238 198 189  238 198 189  238 198 189
+238 198 189  238 198 189  238 198 189  238 198 189  238 198 189  238 198 189
+238 198 189  241 205 194  238 198 189  238 198 189  238 198 189  238 198 189
+198 112 92  176 59 34  209 61 24  221 143 118  221 143 118  214 127 110
+203 99 81  213 78 31  213 49 12  210 31 6  210 31 6  196 0 0
+196 0 0  192 0 0  184 9 3  172 5 1  163 1 0  150 0 0
+141 0 0  141 11 5  153 32 16  188 95 83  213 149 125  241 196 185
+214 127 110  142 30 13  2 2 2  2 2 2  2 2 2  2 2 2
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  2 2 2
+2 2 2  2 2 2  44 12 10  222 184 158  249 237 229  237 212 203
+252 242 234  255 255 255  254 254 253  251 238 227  245 214 205  247 223 207
+249 231 218  250 233 221  252 240 230  252 242 234  253 249 244  253 252 251
+255 255 255  255 255 255  255 255 255  255 255 255  254 254 253  253 249 244
+252 240 230  250 235 219  249 231 218  246 228 219  248 226 214  245 214 205
+245 214 205  241 205 194  241 205 194  241 205 194  242 201 184  242 201 184
+236 194 173  236 194 173  236 194 173  241 190 156  241 190 156  241 190 156
+241 190 156  241 190 156  241 190 156  241 190 156  241 190 156  241 190 156
+241 190 156  241 196 185  241 196 185  241 196 185  241 196 185  241 196 185
+241 196 185  241 196 185  241 196 185  241 196 185  241 196 185  238 198 189
+238 198 189  238 198 189  238 198 189  238 198 189  238 198 189  238 198 189
+222 184 158  183 106 85  176 59 34  213 49 12  203 99 81  203 99 81
+213 78 31  209 61 24  210 31 6  210 31 6  196 0 0  196 0 0
+196 0 0  183 2 0  172 5 1  163 1 0  150 0 0  142 0 0
+131 0 0  142 30 13  177 84 68  202 130 106  222 184 158  233 177 153
+203 99 81  222 184 158  44 12 10  2 2 2  2 2 2  2 2 2
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  2 2 2
+2 2 2  2 2 2  183 106 85  253 249 244  243 220 211  243 216 205
+253 252 250  255 255 255  253 249 244  243 210 197  245 214 205  248 226 214
+249 231 218  250 233 221  251 238 227  252 243 235  253 251 245  254 254 253
+255 255 255  255 255 255  255 255 255  255 255 255  253 255 254  253 248 237
+251 236 222  249 231 218  248 226 214  243 220 211  245 214 205  245 214 205
+241 205 194  241 196 185  241 196 185  241 196 185  241 190 156  241 190 156
+241 190 156  241 190 156  241 190 156  241 190 156  241 190 156  241 190 156
+241 190 156  241 190 156  241 190 156  241 190 156  241 190 156  241 190 156
+237 187 164  237 187 164  237 187 164  237 187 164  237 187 164  237 187 164
+237 187 164  237 187 164  237 187 164  237 187 164  237 187 164  236 194 173
+236 194 173  236 194 173  236 194 173  236 194 173  236 194 173  222 184 158
+222 184 158  222 184 158  177 84 68  176 59 34  213 49 12  213 78 31
+213 78 31  213 43 8  210 31 6  196 0 0  196 0 0  192 0 0
+183 2 0  172 5 1  163 1 0  153 0 0  145 0 0  140 1 0
+136 24 13  176 59 34  198 112 92  221 154 132  238 198 189  203 99 81
+222 184 158  237 212 203  183 106 85  2 2 2  2 2 2  2 2 2
+2 2 2  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  2 2 2
+2 2 2  44 12 10  238 198 189  252 242 234  241 205 194  246 228 219
+254 254 253  254 254 253  247 223 207  243 208 194  245 214 205  247 223 207
+248 226 214  250 233 221  251 238 227  251 245 237  253 252 251  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  254 252 249  252 240 230
+249 231 218  247 223 207  247 223 207  245 214 205  243 210 197  241 205 194
+241 196 185  241 196 185  237 187 164  241 190 156  241 190 156  241 190 156
+241 190 156  239 175 143  239 175 143  239 175 143  239 175 143  239 175 143
+239 175 143  239 175 143  239 175 143  227 164 124  221 143 118  221 143 118
+214 127 110  214 127 110  206 120 99  203 99 81  203 99 81  203 99 81
+203 99 81  203 99 81  203 99 81  198 112 92  206 120 99  214 127 110
+213 149 125  221 154 132  218 160 133  222 184 158  222 184 158  222 184 158
+222 184 158  222 184 158  218 160 133  177 84 68  176 59 34  210 31 6
+213 43 8  210 31 6  196 0 0  196 0 0  189 0 0  181 0 0
+172 0 0  163 1 0  155 0 0  146 0 0  140 1 0  141 11 5
+153 32 16  188 95 83  213 149 125  238 198 189  221 154 132  198 112 92
+240 219 206  240 219 206  222 184 158  44 12 10  2 2 2  2 2 2
+2 2 2  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  2 2 2
+2 2 2  177 84 68  251 242 233  243 220 211  241 205 194  251 245 237
+255 255 255  251 240 231  242 201 184  243 208 194  245 214 205  247 223 207
+248 226 214  250 233 221  251 238 227  251 245 237  253 253 252  255 255 255
+255 255 255  255 255 255  255 255 255  254 253 250  252 244 235  250 235 219
+247 223 207  247 223 207  243 210 197  243 210 197  241 196 185  241 196 185
+241 196 185  241 190 156  237 182 145  239 175 143  239 175 143  239 175 143
+239 175 143  239 175 143  239 175 143  239 175 143  239 175 143  221 143 118
+214 127 110  203 99 81  213 78 31  213 78 31  209 61 24  204 76 38
+204 76 38  204 76 38  204 76 38  204 76 38  204 76 38  204 76 38
+193 53 28  193 53 28  193 53 28  184 62 32  192 80 59  184 62 32
+192 80 59  192 80 59  191 90 72  188 95 83  206 120 99  213 149 125
+218 160 133  218 160 133  218 160 133  213 149 125  177 84 68  176 59 34
+195 25 6  196 0 0  196 0 0  189 0 0  183 2 0  174 0 0
+165 0 0  157 0 0  147 0 0  140 1 0  131 0 0  142 30 13
+177 84 68  202 130 106  222 184 158  241 196 185  192 80 59  218 160 133
+237 212 203  240 219 206  237 212 203  177 84 68  2 2 2  2 2 2
+2 2 2  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  2 2 2  2 2 2
+38 6 5  222 184 158  251 245 237  238 198 189  237 212 203  254 254 253
+253 252 251  243 208 194  236 194 173  243 208 194  243 210 197  247 223 207
+248 226 214  250 233 221  252 240 230  251 245 237  254 252 249  254 254 253
+255 255 255  254 254 253  254 252 249  252 246 236  250 235 219  247 223 207
+247 223 207  243 208 194  242 201 184  241 196 185  241 196 185  237 187 164
+239 175 143  239 175 143  239 175 143  239 175 143  239 175 143  239 175 143
+239 175 143  227 164 124  221 143 118  203 102 83  213 78 31  213 49 12
+213 49 12  213 43 8  213 43 8  213 49 12  209 61 24  209 61 24
+209 61 24  193 53 28  193 53 28  193 53 28  193 53 28  193 53 28
+193 53 28  193 53 28  193 53 28  193 53 28  193 53 28  193 53 28
+176 59 34  176 59 34  176 59 34  176 59 34  176 59 34  177 84 68
+177 84 68  198 112 92  210 136 114  213 149 125  210 136 114  177 84 68
+176 59 34  180 23 7  192 0 0  183 2 0  174 0 0  167 2 1
+158 0 0  150 0 0  141 0 0  131 0 0  136 24 13  177 84 68
+198 112 92  218 160 133  237 212 203  203 102 83  176 59 34  218 160 133
+238 198 189  237 212 203  240 219 206  222 184 158  44 12 10  2 2 2
+2 2 2  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  2 2 2  0 0 0
+44 12 10  249 237 229  246 228 219  238 198 189  246 228 219  255 255 255
+249 237 229  237 187 164  236 194 173  242 201 184  243 210 197  247 223 207
+248 226 214  250 233 221  252 240 230  252 243 235  253 249 244  254 252 249
+254 252 249  253 249 244  252 240 230  250 235 219  247 223 207  243 210 197
+243 208 194  242 201 184  241 196 185  241 196 185  237 187 164  239 175 143
+231 168 130  231 168 130  231 168 130  231 168 130  231 168 130  221 143 118
+203 99 81  213 78 31  213 49 12  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  205 44 13  203 40 12
+203 40 12  203 40 12  202 38 11  202 38 11  193 28 10  193 28 10
+193 28 10  193 28 10  185 36 13  185 36 13  185 36 13  185 36 13
+174 34 14  174 34 14  174 34 14  176 59 34  176 59 34  176 59 34
+176 59 34  176 59 34  176 59 34  177 84 68  183 106 85  202 130 106
+177 84 68  176 59 34  153 32 16  172 5 1  169 0 0  158 0 0
+150 0 0  142 0 0  131 0 0  131 20 10  176 59 34  188 95 83
+213 149 125  238 198 189  221 154 132  174 34 14  177 84 68  202 130 106
+222 184 158  237 212 203  243 220 211  237 212 203  90 10 7  0 0 0
+2 2 2  1 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  2 2 2  0 0 0
+202 130 106  253 253 252  241 205 194  238 198 189  251 245 237  254 254 253
+242 201 184  233 177 153  236 194 173  242 201 184  243 208 194  247 223 207
+249 231 218  251 238 227  252 240 230  252 242 234  252 244 235  253 248 237
+252 244 235  251 238 227  250 235 219  247 223 207  243 208 194  242 201 184
+242 201 184  241 190 156  237 187 164  236 173 149  239 175 143  231 168 130
+231 168 130  231 168 130  221 143 118  214 127 110  213 78 31  213 49 12
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  195 25 6  195 25 6  195 25 6  193 28 10
+193 28 10  191 24 6  191 24 6  180 23 7  182 27 9  182 27 9
+182 27 9  174 34 14  174 34 14  174 34 14  174 34 14  174 34 14
+174 34 14  176 59 34  176 59 34  176 59 34  176 59 34  177 84 68
+183 106 85  183 106 85  177 84 68  153 32 16  147 19 9  150 0 0
+143 0 0  131 0 0  124 2 0  153 32 16  177 84 68  210 136 114
+237 187 164  221 154 132  196 46 20  177 84 68  177 84 68  183 106 85
+218 160 133  222 184 158  240 219 206  240 219 206  202 130 106  2 2 2
+2 2 2  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  2 2 2  38 6 5
+237 212 203  252 243 235  238 198 189  241 205 194  253 253 252  249 237 229
+236 173 149  236 173 149  237 187 164  242 201 184  243 208 194  247 223 207
+249 231 218  251 238 227  252 240 230  252 240 230  252 240 230  252 240 230
+251 236 222  250 235 219  247 223 207  243 208 194  242 201 184  241 190 156
+241 190 156  239 175 143  236 173 149  231 168 130  227 164 124  227 164 124
+221 143 118  203 102 83  213 78 31  213 49 12  213 43 8  213 43 8
+213 43 8  213 43 8  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  184 9 3  184 9 3
+184 9 3  184 9 3  184 9 3  184 9 3  184 9 3  175 12 4
+173 17 6  173 17 6  173 17 6  166 21 8  168 26 11  168 26 11
+153 32 16  153 32 16  153 32 16  153 32 16  153 32 16  153 32 16
+153 32 16  177 84 68  177 84 68  177 84 68  176 59 34  142 30 13
+131 20 10  124 2 0  124 2 0  153 32 16  176 59 34  203 99 81
+203 99 81  195 25 6  153 32 16  177 84 68  177 84 68  183 106 85
+213 149 125  222 184 158  237 212 203  240 219 206  222 184 158  44 12 10
+0 0 0  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 1 1  0 0 0  44 12 10
+249 237 229  243 220 211  237 187 164  243 220 211  254 255 253  242 201 184
+231 168 130  236 173 149  237 187 164  241 196 185  243 210 197  247 223 207
+249 231 218  251 238 227  252 240 230  252 240 230  252 240 230  251 238 227
+250 235 219  247 223 207  247 223 207  242 201 184  241 190 156  237 182 145
+239 175 143  231 168 130  227 164 124  221 143 118  221 143 118  203 102 83
+213 78 31  213 49 12  213 43 8  213 43 8  213 43 8  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  192 0 0
+184 9 3  184 9 3  184 9 3  179 7 2  179 7 2  176 6 2
+174 10 3  174 10 3  168 14 5  168 14 5  164 15 5  164 15 5
+156 16 7  156 16 7  153 32 16  153 32 16  153 32 16  153 32 16
+153 32 16  153 32 16  153 32 16  177 84 68  177 84 68  177 84 68
+142 30 13  136 24 13  131 20 10  144 9 4  159 3 1  176 6 2
+175 12 4  142 30 13  177 84 68  177 84 68  177 84 68  177 84 68
+202 130 106  218 160 133  238 198 189  243 220 211  237 212 203  177 84 68
+0 0 0  2 2 2  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  183 106 85
+254 254 253  238 198 189  236 194 173  246 228 219  253 249 244  239 175 143
+231 168 130  239 175 143  233 177 153  242 201 184  243 210 197  248 226 214
+250 235 219  251 238 227  252 244 235  252 246 236  252 240 230  251 236 222
+249 231 218  247 223 207  243 208 194  241 190 156  239 175 143  231 168 130
+227 164 124  221 143 118  221 143 118  214 127 110  213 78 31  213 49 12
+213 43 8  213 43 8  213 43 8  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  192 0 0
+188 0 0  183 2 0  183 2 0  179 0 0  176 6 2  172 5 1
+172 5 1  167 2 1  167 2 1  167 2 1  159 3 1  159 13 5
+159 13 5  156 16 7  156 16 7  147 19 9  147 19 9  142 30 13
+142 30 13  142 30 13  142 30 13  142 30 13  142 30 13  177 84 68
+177 84 68  177 84 68  142 30 13  142 30 13  142 30 13  136 24 13
+136 24 13  142 30 13  142 30 13  177 84 68  177 84 68  183 106 85
+183 106 85  213 149 125  222 184 158  237 212 203  237 212 203  202 130 106
+0 0 0  2 2 2  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  38 6 5  222 184 158
+253 249 244  236 194 173  236 194 173  251 245 237  244 218 207  221 143 118
+227 164 124  231 168 130  233 177 153  241 196 185  243 210 197  247 223 207
+251 236 222  253 249 240  253 252 251  254 252 249  251 242 233  250 235 219
+247 223 207  243 208 194  241 190 156  239 175 143  231 168 130  221 143 118
+221 143 118  214 127 110  213 78 31  213 49 12  213 43 8  213 43 8
+213 43 8  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  192 0 0  189 0 0
+186 0 0  184 0 0  181 0 0  178 0 0  175 0 0  173 0 0
+170 0 0  167 2 1  167 2 1  163 1 0  159 3 1  159 3 1
+157 8 2  157 8 2  144 9 4  144 9 4  147 19 9  147 19 9
+136 24 13  136 24 13  136 24 13  136 24 13  136 24 13  136 24 13
+176 59 34  177 84 68  177 84 68  142 30 13  142 30 13  136 24 13
+136 24 13  136 24 13  142 30 13  177 84 68  177 84 68  177 84 68
+183 106 85  202 130 106  218 160 133  237 212 203  240 219 206  222 184 158
+44 12 10  0 0 0  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  44 12 10  249 237 229
+249 237 229  222 184 158  236 194 173  253 253 252  236 173 149  221 143 118
+221 143 118  231 168 130  233 177 153  242 201 184  243 208 194  247 223 207
+252 244 235  254 254 253  255 255 255  255 255 255  252 244 235  248 226 214
+243 210 197  242 201 184  241 190 156  227 164 124  221 143 118  214 127 110
+203 102 83  213 78 31  213 43 8  213 43 8  213 43 8  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  192 0 0  189 0 0
+186 0 0  184 0 0  179 0 0  177 0 0  174 0 0  172 0 0
+169 0 0  166 0 0  164 0 0  160 0 0  159 0 0  157 0 0
+155 2 1  149 3 1  149 3 1  144 9 4  144 9 4  141 11 5
+141 11 5  136 24 13  136 24 13  136 24 13  136 24 13  136 24 13
+136 24 13  142 30 13  177 84 68  177 84 68  177 84 68  177 84 68
+142 30 13  142 30 13  177 84 68  177 84 68  177 84 68  177 84 68
+177 84 68  183 106 85  213 149 125  238 198 189  243 220 211  238 198 189
+44 12 10  0 0 0  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  177 84 68  253 252 251
+243 220 211  233 177 153  238 198 189  251 240 231  221 143 118  221 143 118
+221 143 118  227 164 124  237 182 145  241 190 156  243 208 194  250 235 219
+254 252 249  255 255 255  255 255 255  254 254 253  252 240 230  247 223 207
+242 201 184  241 190 156  231 168 130  214 127 110  206 120 99  213 78 31
+213 49 12  213 43 8  213 43 8  213 43 8  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  192 0 0  192 0 0  188 0 0
+186 0 0  181 0 0  179 0 0  176 0 0  174 0 0  171 0 0
+169 0 0  165 0 0  163 0 0  160 0 0  158 0 0  155 0 0
+153 0 0  150 0 0  149 3 1  145 0 0  144 9 4  144 9 4
+141 11 5  141 11 5  131 20 10  131 20 10  131 20 10  131 20 10
+136 24 13  131 20 10  136 24 13  177 84 68  177 84 68  177 84 68
+177 84 68  177 84 68  177 84 68  177 84 68  177 84 68  177 84 68
+177 84 68  177 84 68  213 149 125  222 184 158  240 219 206  237 212 203
+183 106 85  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  202 130 106  255 255 255
+238 198 189  233 177 153  237 212 203  241 196 185  206 120 99  214 127 110
+221 143 118  227 164 124  234 180 140  241 190 156  243 208 194  251 238 227
+254 254 253  255 255 255  255 255 255  254 253 250  249 231 218  243 208 194
+241 190 156  234 180 140  227 164 124  203 99 81  213 78 31  213 43 8
+213 43 8  213 43 8  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  192 0 0  189 0 0  186 0 0
+184 0 0  181 0 0  179 0 0  176 0 0  173 0 0  170 0 0
+167 0 0  165 0 0  162 0 0  160 0 0  157 0 0  153 0 0
+152 0 0  150 0 0  147 0 0  145 0 0  142 1 0  140 1 0
+140 1 0  141 11 5  141 11 5  131 20 10  131 20 10  131 20 10
+131 20 10  131 20 10  131 20 10  131 20 10  136 24 13  177 84 68
+177 84 68  177 84 68  177 84 68  177 84 68  177 84 68  177 84 68
+177 84 68  176 59 34  202 130 106  218 160 133  243 220 211  243 220 211
+202 130 106  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  222 184 158  255 255 255
+236 194 173  218 160 133  243 220 211  221 154 132  203 99 81  214 127 110
+221 143 118  227 164 124  234 180 140  241 190 156  243 208 194  251 238 227
+255 255 255  255 255 255  255 255 255  252 242 234  243 210 197  241 190 156
+237 182 145  227 164 124  203 102 83  213 49 12  213 43 8  213 43 8
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  192 0 0  189 0 0  186 0 0
+184 0 0  179 0 0  178 0 0  175 0 0  172 0 0  169 0 0
+166 0 0  164 0 0  160 0 0  159 0 0  157 0 0  153 0 0
+150 0 0  147 0 0  145 0 0  143 0 0  141 0 0  140 1 0
+131 0 0  131 0 0  131 0 0  124 2 0  124 2 0  131 20 10
+131 20 10  131 20 10  131 20 10  111 7 4  90 10 7  136 24 13
+177 84 68  177 84 68  177 84 68  177 84 68  177 84 68  177 84 68
+177 84 68  176 59 34  198 112 92  218 160 133  237 212 203  246 228 219
+222 184 158  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  237 212 203  253 253 252
+222 184 158  218 160 133  237 212 203  206 120 99  203 99 81  206 120 99
+221 143 118  227 164 124  234 180 140  241 190 156  242 201 184  249 231 218
+253 251 245  253 253 252  251 245 237  247 223 207  242 201 184  237 182 145
+227 164 124  203 102 83  213 78 31  213 43 8  213 43 8  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  192 0 0  189 0 0  186 0 0  186 0 0
+181 0 0  179 0 0  176 0 0  174 0 0  171 0 0  169 0 0
+166 0 0  163 0 0  160 0 0  158 0 0  157 0 0  153 0 0
+150 0 0  147 0 0  145 0 0  142 0 0  140 1 0  140 1 0
+131 0 0  131 0 0  131 0 0  124 2 0  124 2 0  124 2 0
+111 7 4  111 7 4  111 7 4  111 7 4  90 10 7  90 10 7
+90 10 7  142 30 13  142 30 13  142 30 13  142 30 13  142 30 13
+176 59 34  153 32 16  183 106 85  213 149 125  238 198 189  249 237 229
+238 198 189  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  246 228 219  251 245 237
+222 184 158  218 160 133  241 205 194  213 78 31  203 99 81  206 120 99
+206 120 99  227 164 124  231 168 130  241 190 156  242 201 184  243 210 197
+249 231 218  249 231 218  243 210 197  242 201 184  241 190 156  227 164 124
+206 120 99  213 78 31  213 78 31  213 43 8  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  192 0 0  189 0 0  186 0 0  184 0 0
+181 0 0  178 0 0  175 0 0  173 0 0  170 0 0  167 0 0
+165 0 0  162 0 0  159 0 0  158 0 0  155 0 0  152 0 0
+150 0 0  146 0 0  145 0 0  142 0 0  140 1 0  140 1 0
+131 0 0  131 0 0  131 0 0  124 2 0  124 2 0  124 2 0
+111 7 4  111 7 4  111 7 4  111 7 4  90 10 7  90 10 7
+90 10 7  90 10 7  136 24 13  142 30 13  142 30 13  142 30 13
+142 30 13  142 30 13  192 80 59  213 149 125  236 194 173  251 245 237
+237 212 203  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  177 84 68  253 249 244  249 237 229
+218 160 133  218 160 133  236 194 173  204 51 17  213 78 31  203 102 83
+206 120 99  227 164 124  231 168 130  237 182 145  241 190 156  242 201 184
+242 201 184  242 201 184  241 190 156  237 182 145  227 164 124  206 120 99
+213 78 31  213 78 31  213 78 31  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  192 0 0  189 0 0  186 0 0  184 0 0  181 0 0
+179 0 0  177 0 0  174 0 0  171 0 0  169 0 0  166 0 0
+164 0 0  160 0 0  159 0 0  157 0 0  153 0 0  150 0 0
+147 0 0  146 0 0  143 0 0  141 0 0  140 1 0  131 0 0
+131 0 0  131 0 0  131 0 0  124 2 0  124 2 0  117 1 0
+117 1 0  111 7 4  111 7 4  111 7 4  97 5 3  90 10 7
+90 10 7  90 10 7  90 10 7  136 24 13  136 24 13  142 30 13
+136 24 13  131 20 10  184 62 32  213 149 125  222 184 158  253 249 244
+237 212 203  177 84 68  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  183 106 85  255 255 255  246 228 219
+218 160 133  218 160 133  221 154 132  205 44 13  213 78 31  213 78 31
+203 102 83  221 143 118  227 164 124  231 168 130  237 182 145  241 190 156
+241 190 156  241 190 156  239 175 143  227 164 124  206 120 99  213 78 31
+213 78 31  213 78 31  213 78 31  210 31 6  210 31 6  210 31 6
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+192 0 0  192 0 0  188 0 0  186 0 0  184 0 0  181 0 0
+178 0 0  176 0 0  173 0 0  170 0 0  169 0 0  165 0 0
+163 0 0  160 0 0  157 0 0  155 0 0  153 0 0  150 0 0
+147 0 0  145 0 0  142 0 0  140 1 0  140 1 0  131 0 0
+131 0 0  131 0 0  124 2 0  124 2 0  124 2 0  117 1 0
+117 1 0  111 7 4  111 7 4  111 7 4  111 7 4  97 5 3
+90 10 7  90 10 7  90 10 7  90 10 7  111 7 4  131 20 10
+111 7 4  124 2 0  176 59 34  202 130 106  222 184 158  253 249 244
+237 212 203  183 106 85  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  1 1 1  183 106 85  255 255 255  237 212 203
+213 149 125  222 184 158  206 120 99  206 42 10  213 78 31  213 78 31
+213 78 31  206 120 99  221 143 118  227 164 124  231 168 130  239 175 143
+239 175 143  227 164 124  221 143 118  203 102 83  213 78 31  213 78 31
+213 78 31  213 49 12  213 49 12  210 31 6  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+192 0 0  189 0 0  186 0 0  184 0 0  181 0 0  179 0 0
+177 0 0  175 0 0  172 0 0  170 0 0  167 0 0  164 0 0
+162 0 0  159 0 0  157 0 0  155 0 0  150 0 0  150 0 0
+147 0 0  145 0 0  141 0 0  140 1 0  140 1 0  131 0 0
+131 0 0  131 0 0  124 2 0  124 2 0  124 2 0  117 1 0
+117 1 0  111 7 4  117 1 0  140 1 0  124 2 0  97 5 3
+97 5 3  97 5 3  97 5 3  97 5 3  97 5 3  111 7 4
+111 7 4  124 2 0  176 59 34  202 130 106  238 198 189  253 251 245
+237 212 203  202 130 106  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  202 130 106  255 255 255  237 212 203
+213 149 125  218 160 133  192 80 59  213 43 8  213 49 12  213 78 31
+213 78 31  203 99 81  206 120 99  221 143 118  221 143 118  221 143 118
+221 143 118  206 120 99  203 102 83  213 78 31  213 78 31  213 49 12
+213 49 12  213 49 12  213 49 12  210 31 6  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  192 0 0  192 0 0
+189 0 0  186 0 0  186 0 0  181 0 0  179 0 0  178 0 0
+175 0 0  173 0 0  170 0 0  169 0 0  165 0 0  163 0 0
+160 0 0  158 0 0  157 0 0  153 0 0  150 0 0  147 0 0
+145 0 0  143 0 0  141 0 0  140 1 0  140 1 0  131 0 0
+131 0 0  131 0 0  124 2 0  124 2 0  117 1 0  117 1 0
+131 0 0  145 0 0  163 1 0  149 3 1  111 7 4  97 5 3
+97 5 3  111 7 4  97 5 3  111 7 4  111 7 4  111 7 4
+111 7 4  124 2 0  174 34 14  202 130 106  242 201 184  253 251 245
+243 220 211  218 160 133  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  218 160 133  255 255 255  237 212 203
+213 149 125  218 160 133  196 57 25  213 43 8  213 49 12  213 49 12
+213 78 31  213 78 31  213 78 31  203 99 81  203 99 81  203 99 81
+213 78 31  213 78 31  213 78 31  213 78 31  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  210 31 6  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  192 0 0  192 0 0  189 0 0
+186 0 0  186 0 0  184 0 0  181 0 0  179 0 0  176 0 0
+174 0 0  172 0 0  169 0 0  167 0 0  164 0 0  162 0 0
+159 0 0  157 0 0  153 0 0  152 0 0  150 0 0  147 0 0
+145 0 0  142 0 0  140 1 0  140 1 0  131 0 0  131 0 0
+131 0 0  124 2 0  124 2 0  124 2 0  140 1 0  157 0 0
+167 0 0  171 0 0  163 1 0  117 1 0  97 5 3  97 5 3
+111 7 4  111 7 4  111 7 4  111 7 4  111 7 4  111 7 4
+117 1 0  131 0 0  174 34 14  202 130 106  243 208 194  253 249 244
+246 228 219  218 160 133  1 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  218 160 133  255 255 255  237 212 203
+213 149 125  202 130 106  196 46 20  213 43 8  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 78 31  213 78 31  213 78 31
+213 78 31  213 78 31  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 43 8  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  192 0 0  192 0 0  189 0 0  186 0 0
+186 0 0  184 0 0  181 0 0  179 0 0  176 0 0  174 0 0
+172 0 0  170 0 0  167 0 0  165 0 0  162 0 0  160 0 0
+158 0 0  155 0 0  153 0 0  150 0 0  147 0 0  146 0 0
+143 0 0  141 0 0  140 1 0  140 1 0  131 0 0  131 0 0
+124 2 0  131 0 0  146 0 0  164 0 0  174 0 0  173 0 0
+171 0 0  167 0 0  131 0 0  97 5 3  97 5 3  111 7 4
+111 7 4  111 7 4  111 7 4  111 7 4  117 1 0  117 1 0
+117 1 0  131 0 0  173 17 6  202 130 106  247 223 207  253 248 237
+246 228 219  222 184 158  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  222 184 158  255 255 255  237 212 203
+210 136 114  202 130 106  196 46 20  210 31 6  213 43 8  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 43 8  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+192 0 0  192 0 0  189 0 0  189 0 0  186 0 0  186 0 0
+184 0 0  181 0 0  179 0 0  177 0 0  175 0 0  173 0 0
+170 0 0  169 0 0  166 0 0  164 0 0  160 0 0  159 0 0
+157 0 0  153 0 0  152 0 0  150 0 0  147 0 0  145 0 0
+142 0 0  141 0 0  140 1 0  131 0 0  131 0 0  131 0 0
+153 0 0  170 0 0  178 0 0  176 0 0  173 0 0  171 0 0
+171 0 0  150 0 0  111 7 4  97 5 3  97 5 3  111 7 4
+117 1 0  117 1 0  117 1 0  117 1 0  117 1 0  117 1 0
+124 2 0  131 0 0  173 17 6  221 143 118  250 235 219  252 244 235
+246 228 219  222 184 158  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  222 184 158  255 255 255  237 212 203
+210 136 114  198 112 92  185 36 13  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 43 8  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  192 0 0  192 0 0
+192 0 0  189 0 0  188 0 0  186 0 0  184 0 0  184 0 0
+181 0 0  179 0 0  177 0 0  175 0 0  172 0 0  171 0 0
+169 0 0  166 0 0  164 0 0  160 0 0  159 0 0  157 0 0
+155 0 0  152 0 0  150 0 0  147 0 0  145 0 0  143 0 0
+141 0 0  140 1 0  131 0 0  141 0 0  158 0 0  177 0 0
+179 0 0  179 0 0  176 0 0  174 0 0  172 0 0  171 0 0
+165 0 0  117 1 0  111 7 4  97 5 3  111 7 4  117 1 0
+117 1 0  117 1 0  117 1 0  117 1 0  124 2 0  124 2 0
+124 2 0  140 1 0  180 20 5  227 164 124  250 235 219  252 240 230
+246 228 219  222 184 158  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  222 184 158  255 255 255  237 212 203
+210 136 114  188 95 83  187 29 9  201 33 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  189 0 0  189 0 0
+188 0 0  186 0 0  186 0 0  184 0 0  181 0 0  181 0 0
+179 0 0  177 0 0  175 0 0  173 0 0  171 0 0  169 0 0
+167 0 0  165 0 0  163 0 0  160 0 0  158 0 0  157 0 0
+153 0 0  150 0 0  150 0 0  147 0 0  145 0 0  141 0 0
+140 1 0  145 0 0  164 0 0  178 0 0  184 0 0  179 0 0
+179 0 0  176 0 0  175 0 0  173 0 0  171 0 0  172 0 0
+140 1 0  111 7 4  111 7 4  111 7 4  117 1 0  117 1 0
+117 1 0  117 1 0  124 2 0  124 2 0  124 2 0  131 0 0
+131 0 0  147 0 0  185 36 13  241 190 156  250 235 219  252 240 230
+249 237 229  218 160 133  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  218 160 133  255 255 255  243 220 211
+210 136 114  188 95 83  182 27 9  195 25 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 49 12  213 49 12  210 31 6  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  186 0 0  186 0 0
+186 0 0  184 0 0  184 0 0  181 0 0  179 0 0  178 0 0
+176 0 0  175 0 0  172 0 0  171 0 0  169 0 0  167 0 0
+164 0 0  162 0 0  160 0 0  158 0 0  157 0 0  153 0 0
+150 0 0  150 0 0  147 0 0  145 0 0  142 0 0  150 0 0
+166 0 0  184 0 0  186 0 0  184 0 0  181 0 0  179 0 0
+178 0 0  175 0 0  174 0 0  172 0 0  172 0 0  159 0 0
+117 1 0  111 7 4  111 7 4  117 1 0  117 1 0  117 1 0
+117 1 0  124 2 0  124 2 0  131 0 0  131 0 0  131 0 0
+140 1 0  157 0 0  196 46 20  242 201 184  250 235 219  252 240 230
+246 228 219  218 160 133  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  213 149 125  255 255 255  246 228 219
+210 136 114  188 95 83  180 23 7  195 25 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 49 12  213 43 8  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  184 0 0  184 0 0
+184 0 0  181 0 0  179 0 0  179 0 0  177 0 0  176 0 0
+174 0 0  173 0 0  170 0 0  169 0 0  167 0 0  165 0 0
+163 0 0  160 0 0  158 0 0  157 0 0  155 0 0  152 0 0
+150 0 0  147 0 0  145 0 0  147 0 0  167 0 0  184 0 0
+188 0 0  186 0 0  184 0 0  181 0 0  179 0 0  178 0 0
+177 0 0  174 0 0  173 0 0  172 0 0  170 0 0  131 0 0
+111 7 4  111 7 4  111 7 4  117 1 0  117 1 0  117 1 0
+124 2 0  124 2 0  131 0 0  131 0 0  140 1 0  140 1 0
+142 0 0  167 0 0  204 76 38  243 208 194  247 223 207  251 238 227
+246 228 219  218 160 133  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  213 149 125  255 255 255  251 245 237
+210 136 114  188 95 83  172 22 7  191 24 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  181 0 0  181 0 0
+179 0 0  179 0 0  177 0 0  176 0 0  174 0 0  173 0 0
+172 0 0  170 0 0  169 0 0  167 0 0  165 0 0  163 0 0
+160 0 0  159 0 0  157 0 0  153 0 0  152 0 0  150 0 0
+147 0 0  150 0 0  164 0 0  184 0 0  189 0 0  186 0 0
+186 0 0  184 0 0  181 0 0  179 0 0  179 0 0  177 0 0
+176 0 0  174 0 0  172 0 0  172 0 0  160 0 0  117 1 0
+111 7 4  111 7 4  111 7 4  117 1 0  117 1 0  124 2 0
+124 2 0  131 0 0  131 0 0  140 1 0  141 0 0  145 0 0
+147 0 0  183 2 0  221 143 118  247 223 207  247 223 207  252 240 230
+246 228 219  213 149 125  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  202 130 106  254 254 253  253 253 252
+210 136 114  188 95 83  166 21 8  191 24 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  213 43 8
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  179 0 0  179 0 0
+177 0 0  176 0 0  175 0 0  174 0 0  172 0 0  171 0 0
+169 0 0  169 0 0  166 0 0  164 0 0  162 0 0  160 0 0
+158 0 0  157 0 0  155 0 0  153 0 0  150 0 0  147 0 0
+160 0 0  181 0 0  192 0 0  189 0 0  188 0 0  186 0 0
+184 0 0  181 0 0  181 0 0  179 0 0  178 0 0  176 0 0
+175 0 0  173 0 0  171 0 0  171 0 0  140 1 0  111 7 4
+111 7 4  111 7 4  117 1 0  117 1 0  117 1 0  124 2 0
+124 2 0  131 0 0  140 1 0  141 0 0  145 0 0  147 0 0
+153 0 0  193 28 10  237 182 145  247 223 207  247 223 207  252 240 230
+243 220 211  213 149 125  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  183 106 85  253 253 252  255 255 255
+213 149 125  188 95 83  166 21 8  180 20 5  191 24 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  192 0 0  177 0 0  177 0 0
+175 0 0  174 0 0  173 0 0  172 0 0  170 0 0  169 0 0
+167 0 0  166 0 0  164 0 0  162 0 0  160 0 0  159 0 0
+157 0 0  155 0 0  153 0 0  150 0 0  153 0 0  172 0 0
+189 0 0  192 0 0  189 0 0  188 0 0  186 0 0  184 0 0
+184 0 0  181 0 0  179 0 0  178 0 0  177 0 0  175 0 0
+174 0 0  172 0 0  172 0 0  163 0 0  117 1 0  111 7 4
+111 7 4  111 7 4  117 1 0  117 1 0  124 2 0  124 2 0
+131 0 0  131 0 0  140 1 0  143 0 0  147 0 0  150 0 0
+167 0 0  213 78 31  241 190 156  247 223 207  247 223 207  252 240 230
+237 212 203  202 130 106  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  177 84 68  253 253 252  255 255 255
+222 184 158  188 95 83  156 18 6  175 12 4  180 20 5  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  189 0 0  174 0 0  174 0 0
+173 0 0  172 0 0  171 0 0  170 0 0  169 0 0  167 0 0
+165 0 0  164 0 0  162 0 0  160 0 0  158 0 0  157 0 0
+155 0 0  153 0 0  150 0 0  157 0 0  184 0 0  196 0 0
+189 0 0  189 0 0  186 0 0  186 0 0  186 0 0  184 0 0
+181 0 0  179 0 0  179 0 0  178 0 0  176 0 0  174 0 0
+173 0 0  171 0 0  171 0 0  147 0 0  111 7 4  111 7 4
+111 7 4  117 1 0  117 1 0  117 1 0  124 2 0  131 0 0
+131 0 0  140 1 0  141 0 0  145 0 0  150 0 0  153 0 0
+184 9 3  227 164 124  241 190 156  247 223 207  247 223 207  252 240 230
+237 212 203  183 106 85  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  249 237 229  255 255 255
+238 198 189  188 95 83  156 16 7  168 14 5  180 20 5  191 24 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  186 0 0  171 0 0  171 0 0
+170 0 0  169 0 0  169 0 0  167 0 0  165 0 0  164 0 0
+162 0 0  160 0 0  159 0 0  158 0 0  157 0 0  153 0 0
+152 0 0  150 0 0  165 0 0  189 0 0  192 0 0  192 0 0
+189 0 0  188 0 0  186 0 0  186 0 0  184 0 0  184 0 0
+181 0 0  179 0 0  178 0 0  177 0 0  175 0 0  173 0 0
+172 0 0  171 0 0  169 0 0  131 0 0  111 7 4  111 7 4
+111 7 4  117 1 0  117 1 0  124 2 0  124 2 0  131 0 0
+131 0 0  141 0 0  143 0 0  147 0 0  150 0 0  159 0 0
+213 78 31  234 180 140  242 201 184  247 223 207  247 223 207  252 240 230
+237 212 203  177 84 68  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  237 212 203  255 255 255
+246 228 219  183 106 85  153 32 16  157 8 2  180 20 5  180 20 5
+195 25 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  210 31 6  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  186 0 0  169 0 0  169 0 0
+167 0 0  166 0 0  165 0 0  164 0 0  163 0 0  162 0 0
+160 0 0  159 0 0  157 0 0  157 0 0  153 0 0  150 0 0
+150 0 0  171 0 0  192 0 0  192 0 0  189 0 0  189 0 0
+188 0 0  186 0 0  186 0 0  184 0 0  184 0 0  181 0 0
+179 0 0  178 0 0  177 0 0  175 0 0  174 0 0  172 0 0
+171 0 0  170 0 0  164 0 0  117 1 0  111 7 4  111 7 4
+117 1 0  117 1 0  117 1 0  124 2 0  131 0 0  131 0 0
+140 1 0  142 0 0  146 0 0  150 0 0  153 0 0  182 27 9
+231 168 130  241 190 156  243 208 194  247 223 207  248 226 214  250 239 228
+238 198 189  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  222 184 158  255 255 255
+251 242 233  202 130 106  153 32 16  149 3 1  175 12 4  180 20 5
+180 20 5  195 25 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  184 0 0  165 0 0  166 0 0
+165 0 0  164 0 0  163 0 0  162 0 0  160 0 0  159 0 0
+158 0 0  157 0 0  155 0 0  153 0 0  150 0 0  150 0 0
+174 0 0  192 0 0  192 0 0  192 0 0  189 0 0  188 0 0
+186 0 0  186 0 0  184 0 0  184 0 0  181 0 0  179 0 0
+179 0 0  177 0 0  176 0 0  174 0 0  173 0 0  171 0 0
+170 0 0  169 0 0  150 0 0  111 7 4  97 5 3  111 7 4
+117 1 0  117 1 0  124 2 0  124 2 0  131 0 0  131 0 0
+140 1 0  145 0 0  147 0 0  152 0 0  159 0 0  203 102 83
+241 190 156  241 190 156  247 223 207  247 223 207  250 235 219  246 228 219
+222 184 158  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  213 149 125  255 255 255
+253 252 250  213 149 125  176 59 34  142 1 0  168 14 5  180 20 5
+180 20 5  180 20 5  195 25 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  179 0 0  162 0 0  163 0 0
+162 0 0  160 0 0  160 0 0  159 0 0  158 0 0  157 0 0
+155 0 0  153 0 0  152 0 0  150 0 0  150 0 0  175 0 0
+192 0 0  192 0 0  189 0 0  189 0 0  188 0 0  186 0 0
+186 0 0  184 0 0  181 0 0  181 0 0  179 0 0  179 0 0
+178 0 0  176 0 0  175 0 0  173 0 0  172 0 0  170 0 0
+169 0 0  170 0 0  131 0 0  97 5 3  111 7 4  117 1 0
+117 1 0  117 1 0  124 2 0  131 0 0  131 0 0  140 1 0
+142 0 0  147 0 0  150 0 0  153 0 0  193 53 28  234 180 140
+241 190 156  241 190 156  247 223 207  247 223 207  250 235 219  246 228 219
+213 149 125  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  183 106 85  253 249 244
+255 255 255  222 184 158  177 84 68  140 1 0  157 8 2  175 12 4
+180 20 5  180 20 5  180 20 5  195 25 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  210 31 6  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  178 0 0  160 0 0  160 0 0
+159 0 0  158 0 0  157 0 0  157 0 0  155 0 0  153 0 0
+153 0 0  152 0 0  150 0 0  150 0 0  177 0 0  196 0 0
+192 0 0  189 0 0  189 0 0  188 0 0  186 0 0  186 0 0
+184 0 0  184 0 0  181 0 0  179 0 0  179 0 0  178 0 0
+177 0 0  175 0 0  174 0 0  172 0 0  171 0 0  169 0 0
+169 0 0  166 0 0  124 2 0  97 5 3  117 1 0  117 1 0
+117 1 0  124 2 0  124 2 0  131 0 0  131 0 0  141 0 0
+145 0 0  150 0 0  150 0 0  173 17 6  227 164 124  241 190 156
+241 190 156  242 201 184  247 223 207  247 223 207  251 236 222  240 219 206
+183 106 85  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  90 10 7  246 228 219
+255 255 255  246 228 219  177 84 68  141 11 5  142 1 0  175 12 4
+175 12 4  180 20 5  180 20 5  180 20 5  191 24 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  176 0 0  157 0 0  157 0 0
+157 0 0  157 0 0  155 0 0  153 0 0  152 0 0  150 0 0
+150 0 0  150 0 0  147 0 0  173 0 0  192 0 0  192 0 0
+189 0 0  188 0 0  186 0 0  186 0 0  186 0 0  184 0 0
+181 0 0  181 0 0  179 0 0  179 0 0  178 0 0  176 0 0
+175 0 0  173 0 0  172 0 0  170 0 0  169 0 0  169 0 0
+167 0 0  159 0 0  117 1 0  111 7 4  117 1 0  117 1 0
+124 2 0  124 2 0  131 0 0  131 0 0  140 1 0  143 0 0
+147 0 0  150 0 0  163 1 0  206 120 99  241 190 156  241 190 156
+241 190 156  247 223 207  247 223 207  250 235 219  250 239 228  237 212 203
+177 84 68  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  44 12 10  237 212 203
+255 255 255  254 254 253  202 130 106  136 24 13  140 1 0  157 8 2
+175 12 4  175 12 4  180 20 5  180 20 5  180 20 5  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  173 0 0  153 0 0  155 0 0
+153 0 0  153 0 0  152 0 0  150 0 0  150 0 0  150 0 0
+147 0 0  146 0 0  169 0 0  192 0 0  189 0 0  189 0 0
+188 0 0  186 0 0  186 0 0  186 0 0  184 0 0  184 0 0
+181 0 0  179 0 0  179 0 0  178 0 0  177 0 0  175 0 0
+174 0 0  172 0 0  171 0 0  169 0 0  169 0 0  167 0 0
+167 0 0  150 0 0  117 1 0  117 1 0  117 1 0  117 1 0
+124 2 0  131 0 0  131 0 0  140 1 0  141 0 0  145 0 0
+147 0 0  160 0 0  204 76 38  241 190 156  241 190 156  241 190 156
+241 190 156  247 223 207  250 235 219  250 235 219  250 239 228  222 184 158
+44 12 10  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 2 2  213 149 125
+253 253 252  255 255 255  222 184 158  142 30 13  131 0 0  149 3 1
+174 10 3  175 12 4  175 12 4  180 20 5  180 20 5  191 24 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  170 0 0  150 0 0  152 0 0
+150 0 0  150 0 0  150 0 0  147 0 0  147 0 0  146 0 0
+145 0 0  164 0 0  192 0 0  189 0 0  188 0 0  186 0 0
+186 0 0  186 0 0  184 0 0  184 0 0  181 0 0  181 0 0
+179 0 0  178 0 0  178 0 0  176 0 0  175 0 0  173 0 0
+172 0 0  171 0 0  170 0 0  169 0 0  167 0 0  165 0 0
+167 0 0  146 0 0  117 1 0  117 1 0  117 1 0  124 2 0
+131 0 0  131 0 0  140 1 0  141 0 0  145 0 0  147 0 0
+160 0 0  209 61 24  234 180 140  241 190 156  241 190 156  241 190 156
+247 223 207  250 235 219  250 235 219  250 235 219  246 228 219  213 149 125
+38 6 5  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 1 1  177 84 68
+251 245 237  255 255 255  246 228 219  176 59 34  131 0 0  140 1 0
+168 14 5  174 10 3  175 12 4  175 12 4  180 20 5  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+210 31 6  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  169 0 0  147 0 0  150 0 0
+150 0 0  147 0 0  147 0 0  146 0 0  145 0 0  142 0 0
+153 0 0  186 0 0  189 0 0  188 0 0  186 0 0  186 0 0
+186 0 0  184 0 0  184 0 0  181 0 0  181 0 0  179 0 0
+179 0 0  178 0 0  176 0 0  175 0 0  174 0 0  172 0 0
+171 0 0  170 0 0  169 0 0  167 0 0  166 0 0  164 0 0
+167 0 0  145 0 0  117 1 0  117 1 0  124 2 0  124 2 0
+131 0 0  131 0 0  140 1 0  142 0 0  145 0 0  167 0 0
+203 40 12  227 164 124  234 180 140  241 190 156  241 190 156  241 190 156
+247 223 207  250 235 219  250 235 219  250 239 228  237 212 203  183 106 85
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  44 12 10
+237 212 203  255 255 255  253 252 251  202 130 106  124 2 0  124 2 0
+149 3 1  168 14 5  174 10 3  175 12 4  175 12 4  191 24 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  210 31 6  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  173 0 0  146 0 0  146 0 0
+146 0 0  145 0 0  145 0 0  143 0 0  142 0 0  143 0 0
+177 0 0  189 0 0  186 0 0  186 0 0  186 0 0  184 0 0
+184 0 0  181 0 0  181 0 0  179 0 0  179 0 0  178 0 0
+177 0 0  176 0 0  175 0 0  173 0 0  172 0 0  171 0 0
+170 0 0  169 0 0  167 0 0  166 0 0  165 0 0  165 0 0
+169 0 0  141 0 0  117 1 0  124 2 0  124 2 0  131 0 0
+131 0 0  140 1 0  141 0 0  146 0 0  173 0 0  210 31 6
+206 120 99  227 164 124  227 164 124  234 180 140  234 180 140  242 201 184
+250 235 219  250 235 219  250 235 219  250 239 228  222 184 158  44 12 10
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+213 149 125  253 253 252  255 255 255  238 198 189  136 24 13  124 2 0
+131 0 0  157 8 2  168 14 5  168 14 5  175 12 4  191 24 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  210 31 6  196 0 0  192 0 0  192 0 0
+192 0 0  192 0 0  196 0 0  172 0 0  145 0 0  145 0 0
+143 0 0  142 0 0  142 0 0  141 0 0  140 1 0  159 0 0
+188 0 0  186 0 0  186 0 0  186 0 0  184 0 0  184 0 0
+181 0 0  181 0 0  179 0 0  179 0 0  178 0 0  177 0 0
+175 0 0  174 0 0  174 0 0  172 0 0  170 0 0  170 0 0
+169 0 0  167 0 0  166 0 0  165 0 0  164 0 0  166 0 0
+170 0 0  143 0 0  117 1 0  124 2 0  131 0 0  131 0 0
+140 1 0  141 0 0  143 0 0  165 0 0  195 25 6  213 78 31
+206 120 99  221 143 118  227 164 124  227 164 124  234 180 140  247 223 207
+250 235 219  250 235 219  251 236 222  246 228 219  213 149 125  44 12 10
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+177 84 68  246 228 219  255 255 255  253 252 250  177 84 68  124 2 0
+124 2 0  149 3 1  168 14 5  168 14 5  168 14 5  184 9 3
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  210 31 6  192 0 0  189 0 0
+192 0 0  192 0 0  192 0 0  176 0 0  142 0 0  142 0 0
+141 0 0  141 0 0  141 0 0  140 1 0  142 0 0  178 0 0
+186 0 0  186 0 0  184 0 0  184 0 0  181 0 0  181 0 0
+181 0 0  179 0 0  179 0 0  178 0 0  177 0 0  176 0 0
+174 0 0  173 0 0  172 0 0  171 0 0  169 0 0  169 0 0
+169 0 0  166 0 0  165 0 0  165 0 0  166 0 0  169 0 0
+171 0 0  142 0 0  124 2 0  131 0 0  131 0 0  131 0 0
+140 1 0  143 0 0  146 0 0  184 9 3  213 78 31  213 78 31
+203 102 83  206 120 99  221 143 118  227 164 124  241 190 156  247 223 207
+250 235 219  250 235 219  252 240 230  237 212 203  183 106 85  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+44 12 10  222 184 158  254 254 253  255 255 255  222 184 158  124 2 0
+124 2 0  131 0 0  157 8 2  168 14 5  168 14 5  184 9 3
+196 0 0  195 25 6  195 25 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  210 31 6  196 0 0
+188 0 0  189 0 0  189 0 0  179 0 0  140 1 0  140 1 0
+140 1 0  140 1 0  140 1 0  131 0 0  155 0 0  186 0 0
+184 0 0  184 0 0  181 0 0  181 0 0  179 0 0  179 0 0
+179 0 0  178 0 0  177 0 0  176 0 0  175 0 0  174 0 0
+172 0 0  171 0 0  171 0 0  169 0 0  169 0 0  167 0 0
+166 0 0  165 0 0  164 0 0  166 0 0  169 0 0  172 0 0
+173 0 0  141 0 0  124 2 0  131 0 0  131 0 0  140 1 0
+142 0 0  146 0 0  166 21 8  209 61 24  213 78 31  213 78 31
+213 78 31  203 102 83  206 120 99  227 164 124  242 201 184  247 223 207
+247 223 207  250 235 219  249 237 229  222 184 158  44 12 10  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+2 2 2  183 106 85  251 245 237  255 255 255  251 245 237  177 84 68
+117 1 0  117 1 0  142 1 0  157 8 2  164 15 5  184 9 3
+196 0 0  195 25 6  195 25 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+195 25 6  186 0 0  186 0 0  181 0 0  140 1 0  140 1 0
+140 1 0  131 0 0  131 0 0  131 0 0  171 0 0  184 0 0
+181 0 0  181 0 0  181 0 0  179 0 0  179 0 0  178 0 0
+178 0 0  177 0 0  176 0 0  175 0 0  173 0 0  173 0 0
+171 0 0  170 0 0  169 0 0  169 0 0  167 0 0  166 0 0
+165 0 0  164 0 0  166 0 0  169 0 0  171 0 0  174 0 0
+175 0 0  145 0 0  131 0 0  131 0 0  140 1 0  141 0 0
+149 3 1  174 34 14  196 57 25  209 61 24  209 61 24  209 61 24
+213 78 31  213 78 31  203 99 81  227 164 124  243 210 197  243 210 197
+247 223 207  250 235 219  237 212 203  183 106 85  2 2 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  44 12 10  238 198 189  255 255 255  255 255 255  222 184 158
+111 7 4  117 1 0  124 2 0  153 2 0  157 8 2  179 7 2
+196 0 0  196 0 0  196 0 0  195 25 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  210 31 6  189 0 0  179 0 0  141 0 0  131 0 0
+131 0 0  131 0 0  131 0 0  142 0 0  181 0 0  181 0 0
+179 0 0  179 0 0  179 0 0  178 0 0  177 0 0  176 0 0
+175 0 0  174 0 0  174 0 0  173 0 0  172 0 0  171 0 0
+169 0 0  169 0 0  169 0 0  167 0 0  166 0 0  164 0 0
+164 0 0  166 0 0  169 0 0  171 0 0  174 0 0  177 0 0
+178 0 0  147 0 0  131 0 0  140 1 0  141 0 0  159 13 5
+189 46 14  196 60 25  196 60 25  196 57 25  209 61 24  209 61 24
+209 61 24  213 78 31  203 99 81  237 187 164  243 208 194  243 208 194
+247 223 207  246 228 219  222 184 158  44 12 10  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  183 106 85  253 249 244  255 255 255  253 252 250
+177 84 68  117 1 0  117 1 0  131 0 0  157 8 2  172 5 1
+196 0 0  196 0 0  196 0 0  195 25 6  195 25 6  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  195 25 6  146 0 0  131 0 0
+131 0 0  131 0 0  124 2 0  155 0 0  181 0 0  179 0 0
+179 0 0  178 0 0  178 0 0  177 0 0  176 0 0  175 0 0
+174 0 0  173 0 0  172 0 0  171 0 0  170 0 0  170 0 0
+169 0 0  167 0 0  166 0 0  165 0 0  165 0 0  164 0 0
+167 0 0  169 0 0  172 0 0  174 0 0  177 0 0  179 0 0
+181 0 0  152 0 0  131 0 0  149 3 1  174 34 14  196 60 25
+204 67 22  209 61 24  209 61 24  209 61 24  209 61 24  209 61 24
+209 61 24  209 61 24  214 127 110  241 196 185  242 201 184  242 201 184
+247 223 207  237 212 203  202 130 106  38 6 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  44 12 10  238 198 189  254 254 253  255 255 255
+238 198 189  111 7 4  117 1 0  117 1 0  142 1 0  172 5 1
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  195 25 6
+195 25 6  195 25 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  187 29 9  156 18 6
+131 0 0  124 2 0  124 2 0  166 0 0  179 0 0  177 0 0
+177 0 0  176 0 0  175 0 0  174 0 0  174 0 0  173 0 0
+172 0 0  171 0 0  170 0 0  170 0 0  169 0 0  169 0 0
+167 0 0  166 0 0  165 0 0  164 0 0  165 0 0  167 0 0
+170 0 0  172 0 0  175 0 0  177 0 0  179 0 0  181 0 0
+184 0 0  172 5 1  168 26 11  194 55 23  204 67 22  204 67 22
+204 67 22  209 61 24  209 61 24  209 61 24  209 61 24  209 61 24
+209 61 24  204 76 38  236 173 149  236 194 173  236 194 173  242 201 184
+240 219 206  222 184 158  44 12 10  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  183 106 85  249 237 229  255 255 255
+253 252 250  183 106 85  111 7 4  117 1 0  117 1 0  149 3 1
+184 9 3  196 0 0  196 0 0  196 0 0  196 0 0  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  200 41 9
+179 30 10  156 18 6  140 1 0  170 0 0  177 0 0  175 0 0
+176 0 0  175 0 0  174 0 0  173 0 0  173 0 0  172 0 0
+171 0 0  170 0 0  169 0 0  169 0 0  167 0 0  166 0 0
+166 0 0  165 0 0  164 0 0  166 0 0  169 0 0  170 0 0
+173 0 0  175 0 0  177 0 0  178 0 0  181 0 0  184 9 3
+204 37 8  213 49 12  204 67 22  204 67 22  204 67 22  204 67 22
+204 67 22  209 61 24  209 61 24  209 61 24  209 61 24  209 61 24
+209 61 24  221 143 118  237 187 164  237 187 164  237 187 164  238 198 189
+237 212 203  183 106 85  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  44 12 10  222 184 158  255 255 255
+255 255 255  237 212 203  136 24 13  117 1 0  117 1 0  124 2 0
+176 6 2  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  195 25 6  195 25 6  195 25 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  200 41 9
+200 41 9  189 46 14  200 41 9  195 25 6  184 9 3  179 7 2
+172 0 0  170 0 0  171 0 0  171 0 0  171 0 0  170 0 0
+169 0 0  169 0 0  167 0 0  166 0 0  166 0 0  164 0 0
+164 0 0  165 0 0  167 0 0  169 0 0  170 0 0  171 0 0
+174 0 0  178 0 0  184 9 3  195 25 6  213 49 12  213 49 12
+213 78 31  204 67 22  204 67 22  204 67 22  204 67 22  204 67 22
+209 61 24  209 61 24  209 61 24  209 61 24  209 61 24  209 61 24
+203 102 83  237 187 164  237 187 164  237 187 164  237 187 164  237 212 203
+222 184 158  44 12 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  177 84 68  246 228 219
+255 255 255  255 255 255  198 112 92  117 1 0  117 1 0  117 1 0
+142 1 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  200 41 9
+189 46 14  189 46 14  200 41 9  213 43 8  213 43 8  213 43 8
+201 33 6  191 24 6  184 9 3  172 0 0  169 0 0  167 0 0
+167 0 0  166 0 0  164 0 0  163 0 0  163 0 0  163 0 0
+164 0 0  167 0 0  169 0 0  171 0 0  179 7 2  191 24 6
+204 37 8  213 49 12  213 49 12  213 78 31  213 78 31  213 78 31
+213 78 31  209 61 24  204 67 22  204 67 22  204 67 22  204 67 22
+209 61 24  209 61 24  209 61 24  209 61 24  209 61 24  213 78 31
+236 173 149  237 187 164  237 187 164  237 187 164  238 198 189  238 198 189
+183 106 85  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  44 12 10  213 149 125
+253 249 244  255 255 255  246 228 219  153 32 16  117 1 0  117 1 0
+117 1 0  167 2 1  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  195 25 6  195 25 6  195 25 6
+195 25 6  195 25 6  195 25 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  200 41 9
+186 41 14  189 46 14  200 41 9  213 43 8  213 43 8  213 49 12
+213 49 12  213 49 12  213 49 12  213 43 8  213 43 8  201 33 6
+201 33 6  191 24 6  191 24 6  191 24 6  191 24 6  191 24 6
+201 33 6  204 37 8  213 43 8  213 49 12  213 49 12  213 49 12
+213 78 31  213 78 31  213 78 31  213 78 31  213 78 31  213 78 31
+213 78 31  209 61 24  204 67 22  204 67 22  204 67 22  209 61 24
+209 61 24  209 61 24  209 61 24  209 61 24  209 61 24  221 143 118
+237 187 164  237 187 164  237 187 164  237 187 164  237 212 203  213 149 125
+44 12 10  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  44 12 10
+222 184 158  255 255 255  254 254 253  218 160 133  131 0 0  117 1 0
+117 1 0  124 2 0  172 5 1  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  195 25 6  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  200 41 9
+186 41 14  186 41 14  206 42 10  213 43 8  213 43 8  213 43 8
+213 43 8  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 78 31  213 78 31  213 78 31  213 78 31  213 78 31
+213 78 31  213 78 31  204 67 22  204 67 22  204 67 22  209 61 24
+209 61 24  209 61 24  209 61 24  209 61 24  214 127 110  237 187 164
+237 187 164  237 187 164  237 187 164  238 198 189  222 184 158  90 10 7
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+177 84 68  240 219 206  254 254 253  251 245 237  188 95 83  124 2 0
+124 2 0  124 2 0  131 0 0  179 7 2  196 0 0  192 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  201 33 6
+185 36 13  185 36 13  200 41 9  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 78 31  213 78 31  213 78 31  213 78 31
+213 78 31  213 78 31  204 67 22  204 67 22  204 67 22  209 61 24
+209 61 24  209 61 24  213 49 12  203 102 83  236 173 149  233 177 153
+233 177 153  237 187 164  236 194 173  238 198 189  183 106 85  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+44 12 10  183 106 85  249 237 229  253 253 252  243 220 211  153 32 16
+131 0 0  124 2 0  124 2 0  140 1 0  179 7 2  196 0 0
+192 0 0  192 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  195 25 6  210 31 6  201 33 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  201 33 6
+179 30 10  185 36 13  200 41 9  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 78 31  213 78 31  213 78 31
+213 78 31  213 78 31  213 78 31  209 61 24  209 61 24  209 61 24
+209 61 24  213 49 12  213 78 31  236 173 149  233 177 153  233 177 153
+233 177 153  237 187 164  237 212 203  202 130 106  44 12 10  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  44 12 10  218 160 133  253 249 244  254 252 249  222 184 158
+156 16 7  131 0 0  131 0 0  131 0 0  142 1 0  179 7 2
+196 0 0  192 0 0  192 0 0  184 9 3  184 9 3  184 9 3
+184 9 3  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  201 33 6  201 33 6
+201 33 6  201 33 6  210 31 6  210 31 6  210 31 6  201 33 6
+179 30 10  179 30 10  204 37 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 78 31  213 78 31  213 78 31
+213 78 31  213 78 31  213 78 31  213 78 31  213 78 31  209 61 24
+213 49 12  213 78 31  239 175 143  236 173 149  236 173 149  236 173 149
+237 187 164  238 198 189  222 184 158  44 12 10  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  44 12 10  222 184 158  253 249 244  253 249 244
+213 149 125  150 0 0  140 1 0  131 0 0  140 1 0  142 1 0
+179 7 2  196 0 0  192 0 0  192 0 0  192 0 0  184 9 3
+184 9 3  184 9 3  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  201 33 6
+201 33 6  201 33 6  201 33 6  201 33 6  201 33 6  201 33 6
+179 30 10  179 30 10  204 37 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 78 31
+213 78 31  213 78 31  213 78 31  213 78 31  213 78 31  213 49 12
+213 78 31  239 175 143  236 173 149  236 173 149  236 173 149  241 190 156
+236 194 173  222 184 158  177 84 68  2 2 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  177 84 68  222 184 158  253 249 244
+249 237 229  198 112 92  152 0 0  141 0 0  141 0 0  142 0 0
+147 0 0  176 6 2  196 0 0  196 0 0  192 0 0  192 0 0
+184 9 3  184 9 3  184 9 3  191 24 6  195 25 6  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+201 33 6  201 33 6  201 33 6  201 33 6  201 33 6  201 33 6
+187 29 9  179 30 10  201 33 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 78 31
+213 78 31  213 78 31  213 78 31  213 78 31  213 49 12  213 78 31
+231 168 130  239 175 143  239 175 143  236 173 149  239 175 143  236 194 173
+222 184 158  177 84 68  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  177 84 68  238 198 189
+253 249 244  246 228 219  191 90 72  155 0 0  146 0 0  146 0 0
+147 0 0  147 0 0  172 5 1  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  201 33 6  201 33 6  201 33 6  201 33 6  210 31 6
+187 29 9  179 30 10  201 33 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 78 31  213 49 12  213 49 12  213 78 31  231 168 130
+239 175 143  239 175 143  239 175 143  239 175 143  237 187 164  222 184 158
+183 106 85  38 6 5  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  38 6 5  183 106 85
+237 212 203  253 249 244  243 220 211  192 80 59  159 0 0  150 0 0
+150 0 0  152 0 0  150 0 0  167 2 1  192 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  195 25 6
+195 25 6  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+195 25 6  195 25 6  210 31 6  210 31 6  210 31 6  210 31 6
+201 33 6  179 30 10  201 33 6  210 31 6  210 31 6  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  203 99 81  231 168 130  239 175 143
+239 175 143  239 175 143  239 175 143  237 187 164  222 184 158  183 106 85
+44 12 10  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  38 6 5
+177 84 68  222 184 158  251 245 237  243 220 211  191 90 72  163 0 0
+159 3 1  157 0 0  158 0 0  157 0 0  163 1 0  183 2 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+201 33 6  182 27 9  201 33 6  210 31 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 78 31  206 120 99  239 175 143  239 175 143  239 175 143
+239 175 143  239 175 143  237 187 164  222 184 158  183 106 85  38 6 5
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+38 6 5  177 84 68  222 184 158  252 243 235  243 220 211  191 90 72
+167 2 1  163 1 0  163 1 0  163 1 0  160 1 0  163 1 0
+176 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  195 25 6  195 25 6  195 25 6  195 25 6  195 25 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+201 33 6  182 27 9  195 25 6  210 31 6  210 31 6  210 31 6
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 78 31  221 143 118  239 175 143  239 175 143  239 175 143  239 175 143
+239 175 143  237 187 164  222 184 158  183 106 85  44 12 10  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 2 2  44 12 10  222 184 158  249 237 229  243 220 211
+198 112 92  173 17 6  172 5 1  169 0 0  167 0 0  166 0 0
+166 0 0  170 0 0  183 2 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  195 25 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  191 24 6  187 29 9  210 31 6  210 31 6  210 31 6
+210 31 6  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 78 31
+227 164 124  231 168 130  239 175 143  239 175 143  239 175 143  239 175 143
+222 184 158  222 184 158  177 84 68  38 6 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  44 12 10  213 149 125  246 228 219
+246 228 219  210 136 114  179 30 10  176 6 2  179 0 0  174 0 0
+171 0 0  171 0 0  171 0 0  178 0 0  192 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  191 24 6  187 29 9  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 49 12  213 49 12  213 49 12  213 49 12  213 49 12  213 49 12
+213 49 12  213 49 12  213 49 12  213 78 31  206 120 99  227 164 124
+227 164 124  231 168 130  239 175 143  239 175 143  239 175 143  222 184 158
+213 149 125  177 84 68  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  44 12 10  183 106 85
+238 198 189  246 228 219  236 173 149  184 62 32  180 23 7  184 9 3
+183 2 0  179 0 0  176 0 0  177 0 0  178 0 0  183 2 0
+196 0 0  196 0 0  196 0 0  196 0 0  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  191 24 6  191 24 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 49 12  213 49 12  213 49 12  213 49 12
+213 43 8  213 78 31  213 78 31  227 164 124  227 164 124  227 164 124
+227 164 124  231 168 130  239 175 143  239 175 143  222 184 158  202 130 106
+44 12 10  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  44 12 10
+177 84 68  222 184 158  246 228 219  241 196 185  203 99 81  185 36 13
+193 28 10  196 0 0  192 0 0  183 2 0  183 2 0  183 2 0
+183 2 0  186 0 0  196 0 0  196 0 0  196 0 0  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  191 24 6  191 24 6  195 25 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 49 12
+213 78 31  227 164 124  227 164 124  227 164 124  227 164 124  227 164 124
+231 168 130  231 168 130  236 173 149  218 160 133  183 106 85  44 12 10
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+2 2 2  44 12 10  183 106 85  238 198 189  237 212 203  221 143 118
+192 80 59  196 46 20  193 28 10  196 0 0  196 0 0  189 0 0
+186 0 0  188 0 0  189 0 0  192 0 0  196 0 0  196 0 0
+196 0 0  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  191 24 6  195 25 6  195 25 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 49 12  213 78 31  206 120 99
+227 164 124  227 164 124  227 164 124  227 164 124  227 164 124  227 164 124
+231 168 130  218 160 133  202 130 106  44 12 10  2 2 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 1 1  44 12 10  90 10 7  213 149 125  237 212 203
+241 196 185  203 102 83  193 53 28  193 53 28  203 40 12  210 31 6
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  210 31 6  210 31 6
+195 25 6  191 24 6  195 25 6  195 25 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  213 43 8  213 43 8
+213 43 8  213 43 8  213 43 8  213 43 8  213 43 8  210 31 6
+213 43 8  213 78 31  213 78 31  206 120 99  227 164 124  227 164 124
+227 164 124  227 164 124  227 164 124  227 164 124  227 164 124  218 160 133
+213 149 125  183 106 85  44 12 10  2 2 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  44 12 10  177 84 68
+213 149 125  238 198 189  233 177 153  203 99 81  193 53 28  193 53 28
+209 61 24  204 51 17  210 31 6  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+184 9 3  184 9 3  195 25 6  195 25 6  195 25 6  210 31 6
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  210 31 6
+210 31 6  210 31 6  210 31 6  213 43 8  213 49 12  213 78 31
+213 78 31  221 143 118  227 164 124  227 164 124  227 164 124  227 164 124
+227 164 124  227 164 124  227 164 124  227 164 124  213 149 125  183 106 85
+44 12 10  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+44 12 10  177 84 68  213 149 125  238 198 189  221 154 132  203 99 81
+204 76 38  204 76 38  204 76 38  204 76 38  209 61 24  205 44 13
+210 31 6  195 25 6  195 25 6  196 0 0  196 0 0  196 0 0
+196 0 0  196 0 0  196 0 0  196 0 0  196 0 0  196 0 0
+210 31 6  210 31 6  210 31 6  210 31 6  210 31 6  213 49 12
+213 49 12  213 78 31  213 78 31  213 78 31  206 120 99  227 164 124
+227 164 124  227 164 124  227 164 124  227 164 124  227 164 124  227 164 124
+227 164 124  227 164 124  202 130 106  183 106 85  44 12 10  38 6 5
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 1 1  2 2 2  44 12 10  177 84 68  202 130 106  222 184 158
+221 154 132  206 120 99  192 80 59  204 76 38  204 76 38  192 80 59
+192 80 59  192 80 59  204 76 38  204 76 38  204 76 38  209 61 24
+209 61 24  209 61 24  196 46 20  204 51 17  204 51 17  209 61 24
+204 76 38  213 78 31  213 78 31  213 78 31  213 78 31  213 78 31
+206 120 99  206 120 99  206 120 99  221 143 118  221 143 118  227 164 124
+227 164 124  227 164 124  227 164 124  227 164 124  227 164 124  227 164 124
+202 130 106  183 106 85  44 12 10  44 12 10  2 2 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 2 2  44 12 10  44 12 10
+183 106 85  213 149 125  218 160 133  214 127 110  203 102 83  192 80 59
+204 76 38  192 80 59  192 80 59  192 80 59  203 99 81  203 99 81
+203 99 81  203 99 81  203 99 81  203 99 81  203 99 81  203 99 81
+203 102 83  203 102 83  203 102 83  206 120 99  206 120 99  206 120 99
+206 120 99  206 120 99  206 120 99  206 120 99  227 164 124  227 164 124
+227 164 124  227 164 124  227 164 124  202 130 106  183 106 85  177 84 68
+44 12 10  2 2 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 2 2
+2 2 2  44 12 10  44 12 10  183 106 85  183 106 85  202 130 106
+214 127 110  206 120 99  203 99 81  203 99 81  203 99 81  203 99 81
+203 99 81  203 99 81  203 99 81  203 99 81  203 99 81  203 99 81
+203 102 83  203 102 83  203 102 83  203 102 83  206 120 99  206 120 99
+206 120 99  206 120 99  206 120 99  221 143 118  206 120 99  202 130 106
+202 130 106  183 106 85  177 84 68  44 12 10  44 12 10  2 2 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 2 2  2 2 2  44 12 10  44 12 10
+44 12 10  177 84 68  183 106 85  183 106 85  183 106 85  188 95 83
+191 90 72  203 99 81  203 99 81  203 99 81  203 99 81  203 99 81
+203 102 83  203 102 83  203 102 83  203 102 83  198 112 92  198 112 92
+198 112 92  183 106 85  183 106 85  177 84 68  177 84 68  44 12 10
+44 12 10  38 6 5  2 2 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+2 2 2  2 2 2  2 2 2  44 12 10  44 12 10  44 12 10
+44 12 10  44 12 10  90 10 7  90 10 7  90 10 7  136 24 13
+90 10 7  90 10 7  90 10 7  90 10 7  44 12 10  44 12 10
+44 12 10  44 12 10  44 12 10  2 2 2  2 2 2  1 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 1 1  1 1 1  2 2 2  1 1 1  1 1 1  1 1 1
+1 1 1  2 2 2  2 2 2  2 2 2  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_fedoraglossy_clut224.ppm b/drivers/video/logo/logo_fedoraglossy_clut224.ppm
new file mode 100644
index 000000000..276afb7ed
--- /dev/null
+++ b/drivers/video/logo/logo_fedoraglossy_clut224.ppm
@@ -0,0 +1,1123 @@
+P3
+80 80
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 0 1  0 0 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 4 9  0 12 24  0 16 33  0 26 50  0 31 59
+0 33 61  0 31 59  0 30 56  0 28 53  0 22 43  0 16 33
+0 12 24  0 5 11  0 2 5  0 1 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 5 11  0 9 19  0 12 24  0 16 33
+0 25 48  2 43 79  36 91 133  66 107 134  66 107 134  66 107 134
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+9 67 112  2 43 79  0 31 57  0 22 43  0 20 39  0 14 29
+0 5 11  0 2 5  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+0 9 19  0 22 43  0 38 70  9 67 112  66 107 134  105 133 154
+123 144 162  127 149 166  126 153 173  126 153 173  126 153 173  126 153 173
+126 153 173  126 153 173  126 153 173  126 153 173  126 153 173  126 153 173
+127 149 166  127 149 166  121 141 158  105 133 154  66 107 134  3 53 95
+1 36 66  0 25 48  0 14 29  0 5 11  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 2 5  0 9 19  0 28 53
+9 67 112  66 107 134  121 141 158  126 153 173  126 153 173  126 153 173
+126 153 173  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+127 149 166  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+127 149 166  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+110 135 155  66 107 134  3 53 95  0 28 53  0 16 33  0 4 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 5 11  0 20 39  0 34 63  36 91 133  105 133 154
+127 149 166  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+127 149 166  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+127 149 166  127 149 166  127 149 166  127 149 166  127 149 166  127 149 166
+127 149 166  127 149 166  127 149 166  123 144 162  123 144 162  123 144 162
+127 149 166  127 149 166  121 141 158  105 133 154  9 67 112  0 24 47
+0 9 19  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 16 33  0 34 63  9 67 112  105 133 154  127 149 166  127 149 166
+127 149 166  123 144 162  123 144 162  127 149 166  123 144 162  123 144 162
+123 144 162  123 144 162  123 144 162  123 144 162  123 144 162  123 144 162
+123 144 162  123 144 162  123 144 162  123 144 162  123 144 162  123 144 162
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  121 141 158
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  86 123 154
+3 53 95  0 22 43  0 7 15  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 14 29
+2 43 79  66 107 134  115 143 164  120 146 166  123 144 162  123 144 162
+123 144 162  123 144 162  123 144 162  120 146 166  121 141 158  121 141 158
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  121 141 158
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  121 141 158
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  121 141 158
+121 141 158  121 141 158  121 141 158  121 141 158  121 141 158  121 141 158
+110 135 155  66 107 134  0 27 51  0 14 29  0 0 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 2 5  0 22 43  3 53 95
+86 123 154  115 143 164  114 140 160  114 140 160  114 140 160  114 140 160
+114 140 160  114 140 160  114 140 160  114 140 160  114 140 160  114 140 160
+114 140 160  114 140 160  114 140 160  114 140 160  114 140 160  110 135 155
+110 135 155  114 140 160  110 135 155  110 135 155  110 135 155  110 135 155
+110 135 155  110 135 155  110 135 155  110 135 155  110 135 155  110 135 155
+110 135 155  110 135 155  110 135 155  110 135 155  110 135 155  110 135 155
+110 135 155  121 141 158  66 107 134  0 31 57  0 20 39  0 4 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 12 24  0 33 63  3 53 95  86 123 154
+114 140 160  109 137 159  109 137 159  109 137 159  109 137 159  109 137 159
+109 137 159  110 135 155  110 135 155  109 137 159  110 135 155  110 135 155
+110 135 155  110 135 155  109 137 159  110 135 155  110 135 155  110 135 155
+110 135 155  110 135 155  110 135 155  110 135 155  105 133 154  105 133 154
+105 133 154  105 133 154  110 135 155  121 141 158  127 149 166  127 149 166
+121 141 158  110 135 155  105 133 154  105 133 154  105 133 154  105 133 154
+105 133 154  105 133 154  110 135 155  66 107 134  0 33 61  0 20 39
+0 7 15  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 14 29  0 40 76  3 53 95  86 123 154  109 137 159
+104 134 157  104 134 157  104 134 157  104 134 157  104 134 157  104 134 157
+104 134 157  104 134 157  104 134 157  104 134 157  105 133 154  105 133 154
+105 133 154  105 133 154  105 133 154  105 133 154  105 133 154  105 133 154
+105 133 154  105 133 154  105 133 154  105 133 154  105 133 154  126 153 173
+158 185 204  198 215 225  227 236 241  240 245 247  240 245 247  240 245 247
+240 245 247  227 236 241  188 208 220  126 153 173  105 133 154  105 133 154
+105 133 154  105 133 154  105 133 154  105 133 154  66 107 134  0 29 54
+0 20 39  0 2 5  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 9 19  0 38 72  0 47 87  66 107 134  100 132 157  95 130 157
+95 130 157  95 130 157  100 132 157  100 132 157  100 132 157  100 132 157
+100 132 157  100 132 157  100 132 157  100 132 157  100 132 157  100 132 157
+100 132 157  100 132 157  100 132 157  100 132 157  100 132 157  95 130 157
+95 130 157  95 130 157  95 130 157  125 160 184  198 215 225  240 245 247
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  249 251 252  175 199 215  105 133 154
+105 133 154  86 123 154  86 123 154  86 123 154  105 133 154  36 91 133
+0 22 43  0 20 39  0 4 9  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 5 11
+0 38 71  0 45 86  9 67 112  86 123 154  95 130 157  95 130 157
+95 130 157  95 130 157  95 130 157  95 130 157  95 130 157  95 130 157
+95 130 157  95 130 157  95 130 157  95 130 157  95 130 157  95 130 157
+95 130 157  95 130 157  95 130 157  86 123 154  86 123 154  86 123 154
+86 123 154  114 140 160  188 208 220  249 251 252  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  151 182 203
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  66 107 134
+1 36 66  0 22 43  0 16 33  0 2 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 5 11  0 37 70
+0 49 91  0 51 95  54 103 137  95 130 157  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  86 123 154
+126 153 173  227 236 241  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  198 215 225
+86 123 154  86 123 154  86 123 154  86 123 154  66 107 134  86 123 154
+36 91 133  0 25 48  0 22 43  0 12 24  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 4 9  0 38 72  0 50 94
+0 51 95  1 56 100  70 116 150  86 123 154  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  86 123 154  86 123 154
+86 123 154  86 123 154  86 123 154  86 123 154  70 116 150  126 153 173
+240 245 247  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  198 215 225
+86 123 154  78 123 153  86 123 154  66 107 134  66 107 134  66 107 134
+66 107 134  0 31 57  0 24 47  0 22 43  0 12 24  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  0 39 73  0 51 95  0 53 98
+0 53 98  9 67 112  70 116 150  78 123 153  70 116 150  70 116 150
+70 116 150  70 116 150  78 123 153  78 123 153  78 123 153  78 123 153
+78 123 153  78 123 153  78 123 153  78 123 153  78 123 153  78 123 153
+78 123 153  70 116 150  78 123 153  65 110 142  115 143 164  227 236 241
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  158 185 204
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+66 107 134  1 36 66  0 26 50  0 25 48  0 22 43  0 9 19
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 22 43  0 53 98  0 54 101  0 54 98
+0 53 98  12 74 126  70 116 150  70 116 150  70 116 150  70 116 150
+70 116 150  70 116 150  70 116 150  70 116 150  70 116 150  70 116 150
+70 116 150  70 116 150  70 116 150  70 116 150  70 116 150  70 116 150
+70 116 150  70 116 150  70 116 150  86 123 154  213 228 238  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  188 208 220  78 123 153
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+66 107 134  1 36 66  0 27 51  0 27 51  0 24 47  0 20 39
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 2 5  0 42 78  0 54 101  0 55 102  0 55 102
+0 55 102  9 67 112  65 110 142  70 116 150  65 110 142  70 116 150
+70 116 150  65 110 142  65 110 142  70 116 150  70 116 150  65 110 142
+70 116 150  65 110 142  65 110 142  70 116 150  65 110 142  65 110 142
+65 110 142  65 110 142  65 110 142  163 190 208  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  249 251 252  205 221 232  163 190 208  139 173 197  139 173 197
+151 182 203  163 190 208  163 190 208  125 160 184  70 116 150  66 107 134
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+66 107 134  1 36 66  0 28 53  0 28 53  0 27 51  0 22 43
+0 4 9  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 32 60  0 54 101  0 57 102  2 58 106  2 58 106
+2 58 106  9 67 112  65 110 142  65 110 142  65 110 142  65 110 142
+65 110 142  65 110 142  65 110 142  65 110 142  58 111 150  65 110 142
+65 110 142  65 110 142  65 110 142  65 110 142  65 110 142  65 110 142
+65 110 142  58 105 140  100 132 157  240 245 247  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+198 215 225  108 139 162  65 110 142  54 103 137  54 103 137  54 103 137
+54 103 137  54 103 137  54 103 137  66 107 134  66 107 134  66 107 134
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+66 107 134  1 36 66  0 29 55  0 29 54  0 28 53  0 26 50
+0 14 29  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 9 19  0 53 98  0 57 102  2 58 106  2 58 106  0 60 110
+0 60 110  1 61 111  44 99 139  56 105 142  56 105 142  56 105 142
+56 105 142  56 105 142  56 105 142  56 105 142  56 105 142  56 105 142
+56 105 142  56 105 142  56 105 142  56 105 142  56 105 142  56 105 142
+56 105 142  50 102 142  175 199 215  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  151 182 203
+54 103 137  54 103 137  54 103 137  54 103 137  54 103 137  54 103 137
+54 103 137  54 103 137  54 103 137  54 103 137  66 107 134  66 107 134
+66 107 134  66 107 134  66 107 134  66 107 134  66 107 134  66 107 134
+36 91 133  0 38 70  0 30 56  0 29 55  0 28 53  0 27 51
+0 24 47  0 14 29  0 0 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 35 67  2 58 106  2 58 106  2 58 106  0 60 110  0 60 110
+0 61 112  0 61 112  23 84 135  56 105 142  50 104 143  50 104 143
+50 104 143  50 104 143  50 104 143  50 104 143  50 104 143  50 104 143
+50 104 143  50 104 143  50 104 143  50 104 143  50 102 142  50 102 142
+50 102 142  70 116 150  227 236 241  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  158 185 204  44 99 139
+54 103 137  54 103 137  54 103 137  54 103 137  54 103 137  54 103 137
+54 103 137  54 103 137  36 91 133  36 91 133  36 91 133  54 103 137
+54 103 137  54 103 137  54 103 137  54 103 137  54 103 137  54 103 137
+9 67 112  0 41 75  0 32 60  0 30 56  0 29 54  0 28 53
+0 27 51  0 20 39  0 4 9  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 5 11
+0 50 95  2 58 106  0 60 110  0 60 110  0 61 112  0 61 112
+1 62 114  1 62 114  5 67 116  42 97 140  50 102 142  50 102 142
+50 102 142  50 102 142  50 102 142  50 102 142  50 102 142  50 102 142
+50 102 142  50 102 142  50 102 142  50 102 142  50 102 142  50 102 142
+38 94 135  117 151 174  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  205 221 232  54 103 137  36 91 133
+38 94 135  38 94 135  38 94 135  36 91 133  36 91 133  36 91 133
+36 91 133  36 91 133  36 91 133  36 91 133  36 91 133  36 91 133
+38 94 135  54 103 137  38 94 135  38 94 135  54 103 137  36 91 133
+0 47 84  0 44 81  1 36 66  0 30 57  0 29 55  0 28 53
+0 27 51  0 22 43  0 9 19  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 20 39
+0 57 102  0 60 110  0 60 110  0 61 112  0 63 115  0 63 115
+0 63 115  0 63 115  0 63 115  12 74 126  42 97 140  42 97 140
+42 97 140  42 97 140  42 97 140  42 97 140  42 97 140  42 97 140
+42 97 140  42 97 140  42 97 140  42 97 140  42 97 140  42 97 140
+37 92 135  151 182 203  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  117 151 174  33 89 133  36 91 133
+36 91 133  36 91 133  36 91 133  36 91 133  36 91 133  36 91 133
+36 91 133  36 91 133  36 91 133  36 91 133  36 91 133  36 91 133
+36 91 133  38 94 135  36 91 133  36 91 133  36 91 133  2 60 104
+0 47 84  0 47 84  0 38 70  0 31 58  0 30 56  0 29 54
+0 28 53  0 25 48  0 16 33  0 2 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 40 76
+0 60 110  0 61 112  0 63 115  0 63 115  0 64 119  0 64 119
+0 64 119  0 66 119  0 66 119  0 67 125  23 84 135  42 97 140
+42 97 140  39 95 138  42 97 140  37 101 144  37 101 144  42 97 140
+42 97 140  42 97 140  42 97 140  42 97 140  39 95 138  39 95 138
+42 97 140  188 208 220  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  227 236 241  58 105 140  33 89 133  36 91 133
+36 91 133  36 91 133  36 91 133  33 89 133  33 89 133  33 89 133
+33 89 133  33 89 133  33 89 133  33 89 133  33 89 133  33 89 133
+33 89 133  36 91 133  36 91 133  36 91 133  9 67 112  0 50 89
+0 50 89  0 47 84  0 41 75  0 32 60  0 31 58  0 30 56
+0 29 54  0 27 51  0 24 47  0 9 19  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 16 33  2 58 106
+0 61 112  0 63 115  0 63 115  0 64 119  0 64 119  0 66 119
+0 66 119  0 66 119  0 67 125  0 67 125  0 67 125  16 83 138
+39 95 138  39 95 138  37 101 144  37 101 144  37 101 144  39 95 138
+39 95 138  39 95 138  39 95 138  39 95 138  37 92 135  33 89 133
+37 101 144  205 221 232  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  188 208 220  36 91 133  33 89 133  33 89 133
+33 89 133  33 89 133  33 89 133  33 89 133  33 89 133  33 89 133
+33 89 133  33 89 133  33 89 133  9 67 112  9 67 112  9 67 112
+9 67 112  33 89 133  33 89 133  9 67 112  0 50 89  0 50 89
+0 50 89  0 47 84  0 43 79  0 33 61  0 31 59  0 30 56
+0 29 54  0 28 53  0 26 50  0 14 29  0 0 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 45 84  0 63 115
+0 63 115  0 64 119  0 64 119  0 66 119  0 67 125  0 67 125
+0 67 125  0 68 127  0 68 127  0 68 127  0 68 127  0 68 127
+13 81 137  23 87 136  23 87 136  23 87 136  23 87 136  23 87 136
+23 87 136  23 87 136  23 87 136  23 87 136  23 87 136  23 87 136
+37 101 144  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  175 199 215  33 89 133  23 84 135  33 89 133
+23 84 135  23 84 135  33 89 133  33 89 133  12 74 126  12 74 126
+12 74 126  9 67 112  9 67 112  9 67 112  9 67 112  9 67 112
+9 67 112  9 67 112  9 67 112  0 55 96  0 55 96  0 50 89
+0 50 89  0 50 89  0 44 81  0 34 63  0 32 60  0 31 58
+0 30 56  0 29 54  0 26 50  0 20 39  0 4 9  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 5 11  0 61 112  0 63 115
+0 63 115  0 64 119  0 66 119  1 68 121  0 67 125  0 67 125
+0 68 127  0 68 127  0 68 127  0 68 127  0 68 127  1 73 132
+1 73 132  6 78 137  16 83 138  19 86 138  19 86 138  19 86 138
+19 86 138  19 86 138  19 86 138  19 86 138  23 87 136  19 86 138
+37 101 144  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  163 190 208  12 74 126  12 74 126  12 74 126
+12 74 126  12 74 126  12 74 126  12 74 126  9 67 112  9 67 112
+9 67 112  9 67 112  9 67 112  9 67 112  9 67 112  9 67 112
+3 53 95  1 56 100  0 59 102  0 55 96  0 55 96  0 55 96
+0 50 89  0 50 89  0 44 81  0 34 63  0 32 60  0 31 58
+0 31 57  0 29 54  0 27 51  0 20 39  0 7 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 12 24  0 64 119  0 64 119
+0 64 119  0 66 119  0 67 125  0 67 125  0 68 127  0 68 127
+0 68 127  0 68 127  1 73 132  1 73 132  1 73 132  1 73 132
+1 75 135  1 73 132  0 74 137  8 80 139  16 83 138  19 86 138
+19 86 138  19 86 138  19 86 138  19 86 138  16 83 138  16 83 138
+37 101 144  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  158 185 204  12 74 126  12 74 126  12 74 126
+12 74 126  12 74 126  12 74 126  9 67 112  9 67 112  9 67 112
+9 67 112  9 67 112  9 67 112  9 67 112  4 62 107  3 53 95
+0 46 85  0 55 96  0 59 102  0 59 102  0 55 96  0 55 96
+0 50 89  0 50 89  0 44 81  0 35 64  0 33 61  0 31 59
+0 30 57  0 29 54  0 28 53  0 24 47  0 12 24  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 31 59  0 64 119  0 66 119
+0 66 119  0 67 125  0 67 125  0 68 127  0 68 127  0 68 127
+0 68 127  1 73 132  1 73 132  0 74 137  0 74 137  0 74 137
+0 74 137  0 74 137  1 75 137  0 74 137  1 75 137  6 79 140
+8 80 139  8 80 139  16 83 138  16 83 138  13 81 137  8 80 139
+23 87 136  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  158 185 204  12 74 126  9 72 124  9 72 124
+7 70 121  9 67 112  9 67 112  9 67 112  9 67 112  9 67 112
+9 67 112  4 62 107  2 60 104  3 53 95  0 49 89  0 45 86
+0 48 88  2 60 104  0 59 102  0 59 102  0 55 96  0 55 96
+0 50 89  0 50 89  0 43 79  0 35 64  0 34 63  0 32 60
+0 31 58  0 30 56  0 28 53  0 26 50  0 14 29  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 49 92  0 66 119  0 66 119
+0 67 125  0 67 125  0 68 127  0 68 127  1 73 132  1 73 132
+1 73 132  1 73 132  1 75 135  0 74 137  0 74 137  1 75 137
+1 75 137  1 75 137  1 75 137  1 75 137  1 75 137  1 75 137
+1 75 137  4 77 138  4 78 138  7 79 141  8 80 139  4 77 138
+19 86 138  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  158 185 204  6 69 123  4 69 121  5 67 116
+9 67 112  9 67 112  9 67 112  9 67 112  9 67 112  4 62 107
+2 60 104  0 54 98  0 50 94  0 49 92  0 48 90  0 46 85
+0 55 96  5 66 110  2 60 104  0 59 102  0 55 96  0 55 96
+0 55 96  0 50 89  0 41 75  0 34 65  0 33 63  0 33 61
+0 31 59  0 29 55  0 29 54  0 27 51  0 20 39  0 5 11
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 2 5  2 58 106  1 68 121  0 67 125
+0 67 125  0 68 127  0 68 127  1 73 132  1 73 132  1 75 135
+0 74 137  0 74 137  1 75 137  1 75 137  4 77 138  4 77 138
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  1 75 137  1 75 137  1 75 137  1 75 137  0 74 137
+16 83 138  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  158 185 204  1 68 121  1 65 118  1 65 118
+4 65 114  1 62 114  1 61 111  0 60 110  2 58 106  0 55 102
+0 54 101  0 53 98  0 51 95  0 50 93  0 48 90  0 52 94
+0 64 112  5 66 110  2 60 104  0 59 102  0 59 102  0 55 96
+0 55 96  0 50 89  0 39 73  0 35 64  0 34 63  0 33 63
+0 32 60  0 30 57  0 29 54  0 28 53  0 22 43  0 9 19
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 5 11  0 64 119  0 66 119  0 67 125
+0 68 127  0 68 127  0 68 127  1 73 132  1 75 135  0 74 137
+0 74 137  1 75 137  1 75 137  4 77 138  4 77 138  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  4 77 138  4 77 138  1 75 137  0 74 137
+16 83 138  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 66 119  0 63 115  0 63 115
+1 62 114  0 60 110  0 60 110  2 58 106  0 57 102  0 55 102
+0 54 101  0 54 98  0 51 95  0 49 93  0 52 94  0 64 112
+0 64 112  5 66 110  2 60 104  0 59 102  0 59 102  0 55 96
+0 55 96  0 50 89  0 37 69  0 35 67  0 35 64  0 33 63
+0 32 60  0 30 56  0 29 54  0 28 53  0 24 47  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 7 15  0 67 125  0 67 125  0 68 127
+0 68 127  0 68 127  1 73 132  1 73 132  0 74 137  0 74 137
+1 75 137  1 75 137  4 77 138  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  1 75 137  0 74 137
+7 79 141  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 64 119  0 63 115  1 62 114
+0 61 112  0 60 110  0 60 110  2 58 106  0 55 102  0 55 102
+0 54 101  0 53 98  0 51 95  0 57 102  0 64 112  5 67 116
+0 64 112  5 66 110  2 60 104  2 60 104  0 59 102  0 55 96
+0 55 96  0 43 79  0 36 67  0 36 67  0 35 64  0 34 63
+0 33 61  0 31 57  0 29 55  0 28 53  0 24 47  0 7 15
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 7 15  0 67 125  0 67 125  0 68 127
+0 68 127  1 73 132  1 73 132  1 75 135  1 75 137  1 75 137
+1 75 137  1 75 137  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  7 79 141
+7 79 141  7 79 141  7 79 141  7 79 141  7 79 141  7 79 141
+37 101 144  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  198 215 225  23 87 136  12 74 126  12 74 126
+12 74 126  9 72 124  9 72 124  9 67 112  2 58 106  0 53 101
+0 57 102  0 60 110  0 64 112  1 68 121  1 68 121  5 67 116
+0 64 112  5 66 110  4 62 107  2 60 104  0 59 102  0 59 102
+0 50 89  0 40 74  0 37 69  0 35 67  0 34 65  0 34 63
+0 33 61  0 31 58  0 30 56  0 29 54  0 25 48  0 4 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 4 9  0 67 125  0 67 125  0 68 127
+0 68 127  1 73 132  1 73 132  0 74 137  1 75 137  1 75 137
+4 77 138  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+7 79 141  7 79 141  39 124 175  39 124 175  118 157 183  175 199 215
+205 221 232  209 226 237  209 226 237  209 226 237  209 226 237  209 226 237
+209 226 237  249 251 252  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  227 236 241  205 221 232  205 221 232
+205 221 232  205 221 232  205 221 232  198 215 225  151 182 203  49 111 152
+2 73 127  7 77 127  2 73 127  2 73 127  1 68 121  5 67 116
+0 64 112  5 66 110  4 62 107  2 60 104  0 59 102  0 59 102
+0 43 79  0 38 72  0 38 70  0 36 67  0 35 64  0 34 63
+0 33 61  0 31 58  0 30 56  0 29 54  0 25 48  0 5 11
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  0 68 127
+1 73 132  1 73 132  1 75 135  0 74 137  1 75 137  1 75 137
+6 78 141  6 78 141  6 78 141  7 79 141  7 79 141  39 124 175
+39 124 175  39 124 175  39 124 175  132 167 191  249 251 252  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  227 236 241
+59 119 159  2 73 127  2 73 127  2 73 127  1 68 121  1 68 121
+5 67 116  5 66 110  5 66 110  2 60 104  0 59 102  0 50 89
+0 40 74  0 39 73  0 38 70  0 37 69  1 36 66  0 34 63
+0 32 60  0 31 59  0 30 56  0 29 54  0 25 48  0 7 15
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  0 68 127
+1 73 132  1 73 132  0 74 137  0 74 137  1 75 137  4 77 138
+6 78 141  6 78 141  7 79 141  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  227 236 241  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+175 199 215  13 81 137  2 73 127  2 73 127  1 68 121  1 68 121
+5 67 116  5 66 110  5 66 110  2 60 104  0 52 94  0 42 78
+0 40 74  0 40 74  0 38 71  0 37 69  0 36 67  0 33 63
+0 33 61  0 31 59  0 30 56  0 29 54  0 27 51  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  1 73 132
+1 73 132  1 75 135  0 74 137  1 75 137  1 75 137  1 75 137
+7 79 141  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  66 128 168  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+227 236 241  37 101 144  2 73 127  2 73 127  1 68 121  1 68 121
+5 67 116  0 64 112  5 66 110  0 55 96  0 43 79  0 42 78
+0 40 76  0 40 74  0 38 71  0 37 69  0 36 67  0 34 63
+0 34 63  0 31 59  0 30 56  0 29 54  0 28 53  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  1 73 132
+1 73 132  1 75 135  0 74 137  1 75 137  1 75 137  7 79 141
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+213 228 238  37 101 144  2 73 127  2 73 127  1 68 121  1 68 121
+5 67 116  0 64 112  0 52 94  0 44 82  0 42 78  0 42 78
+0 40 76  0 40 74  0 38 71  0 37 69  0 36 67  0 34 63
+0 34 63  0 31 59  0 30 56  0 29 54  0 27 51  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  1 73 132
+1 73 132  1 75 135  0 74 137  0 74 137  7 79 141  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  209 226 237  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+151 182 203  7 77 127  2 73 127  2 73 127  1 68 121  1 68 121
+2 60 104  0 50 89  0 45 84  0 44 82  0 43 79  0 42 78
+0 40 76  0 40 74  0 38 71  0 37 69  0 36 67  0 34 63
+0 34 63  0 31 59  0 30 56  0 29 54  0 28 53  0 14 29
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  0 68 127  1 73 132
+1 73 132  0 74 137  0 74 137  6 78 141  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  213 228 238  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  188 208 220
+37 101 144  2 73 127  2 73 127  1 68 121  0 64 112  0 54 98
+0 49 91  0 47 87  0 46 85  0 45 83  0 43 79  0 42 78
+0 40 76  0 40 74  0 38 71  0 37 69  0 36 67  0 34 63
+0 34 63  0 31 59  0 30 56  0 29 54  0 28 53  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  0 67 125  0 68 127  1 73 132
+1 73 132  1 73 132  0 74 137  7 79 141  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  118 157 183
+142 177 202  142 177 202  142 177 202  142 177 202  142 177 202  142 177 202
+188 208 220  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  205 221 232  139 173 197  139 173 197  139 173 197
+132 167 191  132 167 191  132 167 191  125 160 184  83 131 163  7 77 127
+0 64 112  0 64 112  0 57 102  0 53 98  0 49 93  0 48 90
+0 48 88  0 47 87  0 46 85  0 45 83  0 43 79  0 42 78
+0 40 74  0 40 74  0 38 70  0 37 69  0 35 64  0 34 63
+0 32 60  0 31 59  0 30 56  0 29 54  0 27 51  0 12 24
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 67 125  0 68 127  8 76 132
+16 83 138  13 81 137  7 79 141  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  7 79 141  7 79 141  7 79 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  0 74 137
+19 86 138  227 236 241  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 64 119  0 64 119  0 63 115
+0 61 112  0 60 110  0 60 110  0 55 102  0 53 101  0 55 102
+0 55 102  0 54 98  0 53 98  0 51 95  0 50 94  0 49 91
+0 48 88  0 46 85  0 45 84  0 44 82  0 43 79  0 42 78
+0 40 74  0 40 74  0 37 69  0 36 67  0 35 67  0 34 63
+0 33 61  0 31 58  0 30 56  0 29 54  0 25 48  0 7 15
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 67 125  6 74 130  23 87 136
+37 101 144  37 101 144  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  39 124 175  7 79 141
+6 78 141  6 78 141  6 78 141  7 79 141  7 79 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  1 75 137
+7 79 141  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 67 125  0 64 119  0 64 119
+0 63 115  0 61 112  0 61 112  0 60 110  2 58 106  0 57 102
+0 55 102  0 56 101  0 53 98  0 50 95  0 49 93  0 49 91
+0 48 88  0 45 86  0 44 83  0 44 81  0 42 78  0 42 78
+0 40 74  0 38 72  0 38 70  0 35 67  0 34 65  0 34 63
+0 33 61  0 31 57  0 30 56  0 28 53  0 24 47  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 64 119  15 82 136  37 101 144
+49 111 152  48 117 162  39 124 175  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  39 124 175  7 79 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  1 75 137
+16 83 138  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 67 125  0 64 119  0 64 119
+0 63 115  1 62 114  0 60 110  0 60 110  2 58 106  0 57 102
+0 55 102  0 54 101  0 53 98  0 51 95  0 50 93  0 48 90
+0 48 88  0 46 85  0 45 83  0 44 81  0 42 78  0 40 76
+0 40 74  0 38 72  0 37 69  0 36 67  0 35 64  0 34 63
+0 33 61  0 31 57  0 29 55  0 27 51  0 24 47  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  1 65 118  33 89 133  54 113 152
+72 126 163  66 128 168  66 128 168  39 124 175  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  7 79 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  0 74 137
+16 83 138  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  158 185 204  0 67 125  0 64 119  0 64 119
+0 63 115  0 61 112  0 60 110  0 60 110  2 58 106  0 57 102
+0 55 102  0 54 101  0 53 98  0 51 95  0 50 93  0 48 90
+0 47 87  0 46 85  0 44 83  0 44 81  0 42 78  0 40 76
+0 40 74  0 38 72  0 37 69  0 36 67  0 35 64  0 33 63
+0 33 61  0 31 57  0 29 55  0 29 54  0 12 24  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 2 5  5 67 116  42 97 140  72 126 163
+83 131 163  118 157 183  83 131 163  66 128 168  39 124 175  39 124 175
+39 124 175  39 124 175  39 124 175  7 79 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  1 75 137  0 74 137
+16 83 138  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 64 119  0 63 115  0 63 115
+1 62 114  0 60 110  0 60 110  2 58 106  0 57 102  0 55 102
+0 54 101  0 53 98  0 51 95  0 50 93  0 49 91  0 48 88
+0 47 87  0 45 84  0 44 82  0 42 78  0 42 78  0 41 75
+0 38 72  0 38 71  0 37 69  0 35 67  0 34 63  0 33 61
+0 31 59  0 30 57  0 29 54  0 29 54  0 5 11  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 2 5  9 72 124  50 109 150  83 131 163
+117 151 174  118 157 183  118 157 183  66 128 168  39 124 175  39 124 175
+39 124 175  39 124 175  7 79 141  6 78 141  6 78 141  6 78 141
+6 78 141  6 78 141  6 78 141  6 78 141  6 78 141  6 78 141
+6 78 141  4 77 138  4 77 138  4 77 138  1 75 137  0 74 137
+16 83 138  213 228 238  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 64 119  0 63 115  0 63 115
+0 61 112  0 60 110  0 60 110  2 58 106  0 57 102  0 55 102
+0 54 101  0 53 98  0 51 95  0 50 93  0 49 91  0 48 88
+0 47 87  0 45 84  0 44 82  0 43 79  0 42 78  0 41 75
+0 39 73  0 38 71  0 36 67  0 35 67  0 34 63  0 33 61
+0 31 59  0 30 56  0 28 53  0 22 43  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  9 67 112  58 111 150  107 142 167
+118 157 183  142 177 202  132 167 191  118 157 183  66 128 168  39 124 175
+7 79 141  39 124 175  7 79 141  0 74 137  1 75 137  1 75 137
+1 75 137  1 75 137  1 75 137  1 75 137  4 77 138  1 75 137
+1 75 137  1 75 137  1 75 137  1 75 137  0 74 137  1 73 132
+13 81 137  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 64 119  1 62 114  0 61 112
+0 60 110  0 60 110  2 58 106  0 57 102  0 55 102  0 54 101
+0 53 98  0 53 98  0 50 93  0 49 91  0 48 88  0 47 87
+0 46 85  0 44 82  0 43 79  0 42 78  0 40 76  0 40 74
+0 38 71  0 37 69  0 35 67  0 35 64  0 34 63  0 32 60
+0 31 58  0 30 56  0 26 50  0 12 24  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  3 53 95  68 118 153  107 142 167
+132 167 191  151 182 203  142 177 202  132 167 191  66 128 168  39 124 175
+7 79 141  7 79 141  7 79 141  0 74 137  0 74 137  1 75 137
+1 75 137  1 75 137  1 75 137  1 75 137  1 75 137  1 75 137
+1 75 137  0 74 137  0 74 137  0 74 137  1 73 132  1 73 132
+13 81 137  209 226 237  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  151 182 203  0 63 115  0 61 112  0 60 110
+0 60 110  2 58 106  2 58 106  0 55 102  0 54 101  0 53 98
+0 53 98  0 51 95  0 49 92  0 48 90  0 47 87  0 46 85
+0 45 84  0 44 81  0 42 78  0 42 78  0 40 74  0 40 74
+0 38 70  0 37 69  1 36 66  0 34 63  0 33 61  0 31 59
+0 30 57  0 29 54  0 22 43  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  3 53 95  68 118 153  117 151 174
+151 182 203  175 199 215  163 190 208  142 177 202  118 157 183  39 124 175
+39 124 175  7 79 141  7 79 141  1 73 132  0 74 137  0 74 137
+0 74 137  0 74 137  1 75 137  1 75 137  0 74 137  0 74 137
+0 74 137  1 75 135  1 75 135  0 74 137  1 73 132  0 68 127
+16 83 138  227 236 241  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  142 177 202  1 62 114  0 61 112  0 60 110
+0 60 110  2 58 106  2 58 106  0 55 102  0 54 101  0 53 98
+0 53 98  0 51 95  0 50 93  0 48 90  0 47 87  0 46 85
+0 45 84  0 44 81  0 42 78  0 42 78  0 40 74  0 38 72
+0 38 70  0 37 69  0 35 64  0 34 63  0 33 61  0 31 59
+0 31 57  0 29 55  0 14 29  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  9 67 112  68 118 153  118 157 183
+158 185 204  188 208 220  188 208 220  151 182 203  118 157 183  66 128 168
+39 124 175  7 79 141  6 79 140  0 68 127  1 73 132  1 73 132
+1 73 132  1 73 132  1 75 135  1 75 135  0 74 137  1 73 132
+1 73 132  1 73 132  1 73 132  1 73 132  0 68 127  0 67 125
+19 86 138  240 245 247  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  132 167 191  2 58 106  0 60 110  0 60 110
+2 58 106  2 58 106  0 57 102  0 54 101  0 53 98  0 53 98
+0 51 95  0 50 93  0 48 90  0 47 87  0 46 85  0 45 84
+0 44 82  0 43 79  0 42 78  0 41 75  0 39 73  0 38 71
+0 37 69  0 36 67  0 34 63  0 33 61  0 32 60  0 31 58
+0 30 56  0 27 51  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  12 74 126  68 118 153  118 157 183
+163 190 208  205 221 232  205 221 232  175 199 215  139 173 197  66 128 168
+39 124 175  7 79 141  7 79 141  1 73 132  0 68 127  1 73 132
+1 73 132  1 73 132  1 73 132  1 73 132  1 73 132  1 73 132
+1 73 132  1 73 132  0 68 127  0 68 127  0 68 127  0 64 119
+77 129 164  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  107 142 167  0 53 101  0 60 110  2 58 106
+2 58 106  0 57 102  0 55 102  0 54 101  0 53 98  0 51 95
+0 51 95  0 50 93  0 48 90  0 47 87  0 46 85  0 45 83
+0 44 81  0 42 78  0 42 78  0 40 74  0 38 72  0 38 70
+0 37 69  0 36 67  0 34 63  0 33 61  0 31 59  0 30 56
+0 29 54  0 16 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  12 74 126  70 116 150  120 155 180
+163 190 208  209 226 237  213 228 238  188 208 220  142 177 202  83 131 163
+39 124 175  7 79 141  7 79 141  1 75 135  0 68 127  0 68 127
+0 68 127  0 68 127  0 68 127  0 68 127  0 68 127  0 68 127
+0 68 127  0 68 127  0 68 127  0 68 127  0 67 125  0 64 119
+175 199 215  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  249 251 252  68 118 153  0 55 102  2 58 106  2 58 106
+0 55 102  0 55 102  0 54 101  0 53 98  0 51 95  0 50 94
+0 49 92  0 48 90  0 47 87  0 46 85  0 45 83  0 44 81
+0 43 79  0 42 78  0 40 74  0 39 73  0 38 70  0 37 69
+0 36 67  0 35 64  0 33 63  0 32 60  0 31 58  0 29 54
+0 22 43  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  12 74 126  70 116 150  120 155 180
+163 190 208  213 228 238  240 245 247  198 215 225  142 177 202  118 157 183
+39 124 175  7 79 141  7 79 141  6 79 140  0 67 125  0 67 125
+0 67 125  0 67 125  0 68 127  0 68 127  0 68 127  0 67 125
+0 67 125  0 67 125  0 67 125  0 67 125  1 62 114  69 126 163
+240 245 247  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  227 236 241  12 74 126  2 58 106  2 58 106  0 57 102
+0 56 101  0 54 101  0 53 98  0 51 95  0 51 95  0 50 93
+0 48 90  0 48 88  0 46 85  0 45 84  0 44 82  0 43 79
+0 42 78  0 40 76  0 40 74  0 38 71  0 37 69  0 36 67
+0 35 67  0 35 64  0 33 61  0 31 59  0 30 56  0 27 51
+0 9 19  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  9 67 112  56 105 142  117 151 174
+158 185 204  209 226 237  240 245 247  198 215 225  151 182 203  118 157 183
+49 121 167  7 79 141  7 79 141  7 79 141  4 78 138  0 66 119
+0 66 119  1 68 121  0 67 125  0 67 125  0 67 125  0 67 125
+0 67 125  0 66 119  0 66 119  0 63 115  39 95 138  213 228 238
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  158 185 204  0 53 101  2 58 106  0 55 102  0 55 102
+0 54 101  0 53 98  0 53 98  0 50 94  0 50 93  0 49 91
+0 48 88  0 47 87  0 46 85  0 44 83  0 44 81  0 42 78
+0 42 78  0 40 74  0 39 73  0 38 70  0 36 67  0 35 67
+0 34 65  0 34 63  0 32 60  0 30 57  0 29 54  0 16 33
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  4 62 107  54 103 137  107 142 167
+151 182 203  198 215 225  227 236 241  198 215 225  158 185 204  118 157 183
+49 121 167  7 79 141  7 79 141  7 79 141  7 79 141  4 78 138
+0 66 119  0 63 115  0 64 119  0 64 119  0 64 119  0 64 119
+0 63 115  1 62 114  0 60 110  58 111 150  205 221 232  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+249 251 252  65 110 142  0 53 101  0 55 102  0 54 101  0 54 101
+0 53 98  0 53 98  0 51 95  0 50 93  0 48 90  0 48 88
+0 47 87  0 46 85  0 45 83  0 44 81  0 43 79  0 42 78
+0 41 75  0 39 73  0 38 70  0 37 69  0 36 67  0 35 64
+0 34 63  0 33 61  0 31 58  0 30 56  0 24 47  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 57 102  36 91 133  95 130 157
+132 167 191  175 199 215  205 221 232  198 215 225  158 185 204  118 157 183
+58 122 163  19 86 138  6 79 140  6 79 140  6 79 140  6 79 140
+4 78 138  13 81 137  7 73 126  1 65 118  0 63 115  0 66 119
+12 74 126  44 99 139  139 173 197  240 245 247  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+175 199 215  4 62 107  0 53 101  0 54 101  0 53 98  0 53 98
+0 51 95  0 50 94  0 50 93  0 48 90  0 47 87  0 47 87
+0 46 85  0 45 84  0 44 81  0 42 78  0 42 78  0 40 74
+0 40 74  0 38 71  0 37 69  0 36 67  0 35 64  0 34 63
+0 33 63  0 31 59  0 29 55  0 26 50  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 53 98  33 89 133  78 123 153
+120 155 180  158 185 204  188 208 220  175 199 215  151 182 203  118 157 183
+58 122 163  19 86 138  6 79 140  6 79 140  6 79 140  66 128 168
+163 190 208  198 215 225  188 208 220  175 199 215  163 190 208  175 199 215
+198 215 225  249 251 252  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  240 245 247
+44 99 139  0 53 98  0 54 101  0 53 98  0 53 98  0 51 95
+0 50 94  0 50 93  0 49 91  0 48 88  0 47 87  0 45 86
+0 45 84  0 44 82  0 43 79  0 42 78  0 40 76  0 40 74
+0 38 72  0 38 70  0 36 67  0 35 67  0 34 63  0 33 63
+0 32 60  0 31 58  0 27 51  0 7 15  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  0 51 95  9 67 112  65 110 142
+107 142 167  139 173 197  163 190 208  163 190 208  142 177 202  118 157 183
+53 120 163  19 86 138  4 78 138  4 78 138  77 129 164  240 245 247
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  107 142 167
+0 50 95  0 54 101  0 53 98  0 51 95  0 51 95  0 50 93
+0 49 92  0 48 90  0 48 88  0 47 87  0 46 85  0 45 83
+0 44 82  0 43 79  0 42 78  0 40 76  0 40 74  0 38 72
+0 38 70  0 36 67  1 36 66  0 35 64  0 34 63  0 33 61
+0 30 57  0 25 48  0 7 15  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 50 94  4 62 107  54 103 137
+95 130 157  126 153 173  139 173 197  139 173 197  125 160 184  107 142 167
+53 120 163  8 80 139  2 76 135  19 86 138  198 215 225  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  139 173 197  1 56 100
+0 51 95  0 53 98  0 51 95  0 51 95  0 50 93  0 49 91
+0 48 90  0 48 88  0 47 87  0 46 85  0 45 83  0 44 82
+0 43 79  0 42 78  0 40 76  0 40 74  0 39 73  0 38 71
+0 37 69  0 35 67  0 35 64  0 34 63  0 33 61  0 31 59
+0 25 48  0 5 11  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 2  0 47 87  2 54 96  33 89 133
+70 116 150  107 142 167  126 153 173  125 160 184  117 151 174  86 123 154
+49 111 152  8 80 139  2 73 127  37 101 144  227 236 241  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  151 182 203  4 62 107  0 50 95
+0 51 95  0 51 95  0 50 93  0 49 92  0 49 91  0 48 88
+0 47 87  0 46 85  0 46 85  0 44 83  0 44 81  0 43 79
+0 42 78  0 42 78  0 40 74  0 38 72  0 38 71  0 37 69
+0 37 69  0 35 64  0 34 63  0 33 63  0 31 58  0 26 50
+0 4 9  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 42 78  0 49 93  9 67 112
+54 103 137  86 123 154  107 142 167  115 143 164  104 134 157  70 116 150
+36 91 133  7 77 127  2 73 127  19 86 138  209 226 237  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  240 245 247  120 155 180  1 56 100  0 50 94  0 51 95
+0 51 95  0 50 94  0 49 91  0 48 90  0 48 88  0 47 87
+0 46 85  0 46 85  0 45 84  0 44 81  0 43 79  0 42 78
+0 42 78  0 41 75  0 39 73  0 38 71  0 38 70  0 37 69
+0 36 67  0 34 63  0 33 63  0 31 59  0 28 53  0 5 11
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 40 74  0 48 90  3 53 95
+36 91 133  66 107 134  86 123 154  95 130 157  86 123 154  66 107 134
+12 74 126  0 57 102  0 64 112  2 73 127  118 157 183  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+188 208 220  58 105 140  0 49 93  0 49 92  0 50 93  0 49 92
+0 49 91  0 48 90  0 47 87  0 47 87  0 46 85  0 45 84
+0 44 83  0 44 82  0 44 81  0 42 78  0 42 78  0 40 76
+0 40 74  0 38 72  0 38 70  0 37 69  0 36 67  0 35 67
+0 34 63  0 33 61  0 31 59  0 20 39  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 37 70  0 47 87  0 48 90
+9 67 112  38 94 135  66 107 134  70 116 150  65 110 142  54 103 137
+9 67 112  0 51 95  0 51 95  0 56 101  5 66 110  125 160 184
+227 236 241  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  240 245 247  188 208 220  86 123 154
+2 60 104  0 45 86  0 50 93  0 49 92  0 49 91  0 48 90
+0 48 88  0 47 87  0 46 85  0 46 85  0 45 84  0 44 82
+0 44 81  0 43 79  0 42 78  0 40 76  0 40 74  0 40 74
+0 38 72  0 38 70  0 36 67  1 36 66  0 35 64  0 34 63
+0 33 63  0 29 55  0 9 19  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 28 53  0 46 85  0 47 87
+3 53 95  9 67 112  36 91 133  54 103 137  54 103 137  33 89 133
+2 60 104  0 49 93  0 50 93  0 50 94  0 49 92  0 50 94
+36 91 133  108 139 162  151 182 203  188 208 220  198 215 225  198 215 225
+175 199 215  139 173 197  95 130 157  36 91 133  0 50 94  0 45 86
+0 48 90  0 49 91  0 49 91  0 48 90  0 48 88  0 47 87
+0 46 85  0 46 85  0 45 84  0 45 83  0 44 81  0 43 79
+0 42 78  0 42 78  0 42 78  0 40 74  0 39 73  0 38 72
+0 38 70  0 37 69  0 35 67  0 35 64  0 33 63  0 24 47
+0 9 19  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 4 9  0 34 65  0 44 83
+0 46 85  3 53 95  9 67 112  33 89 133  33 89 133  9 67 112
+0 49 91  0 48 90  0 49 91  0 49 92  0 50 93  0 49 92
+0 47 87  0 45 86  0 49 92  3 53 95  3 53 95  2 54 96
+0 52 94  0 48 90  0 44 83  0 45 86  0 48 90  0 48 90
+0 48 88  0 48 88  0 47 87  0 46 85  0 46 85  0 46 85
+0 45 83  0 45 83  0 44 81  0 43 79  0 42 78  0 42 78
+0 40 76  0 40 74  0 40 74  0 38 72  0 38 70  0 37 69
+0 36 67  0 36 67  0 35 67  0 31 59  0 14 29  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 5 11  0 33 61
+0 42 78  0 46 85  3 53 95  4 62 107  4 62 107  3 53 95
+0 47 87  0 48 88  0 48 88  0 48 90  0 48 90  0 48 90
+0 48 90  0 48 90  0 49 91  0 48 90  0 48 90  0 48 88
+0 48 90  0 48 90  0 48 88  0 48 90  0 47 87  0 47 87
+0 47 87  0 47 87  0 46 85  0 46 85  0 45 84  0 44 83
+0 44 81  0 44 81  0 43 79  0 42 78  0 42 78  0 40 76
+0 40 74  0 39 73  0 38 72  0 38 70  0 37 69  0 35 67
+0 34 65  0 29 55  0 14 29  0 2 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 2 5
+0 27 51  0 40 76  0 44 82  0 47 84  0 47 87  0 45 84
+0 46 85  0 46 85  0 46 85  0 47 87  0 47 87  0 47 87
+0 47 87  0 47 87  0 47 87  0 47 87  0 47 87  0 47 87
+0 46 85  0 45 86  0 45 86  0 46 85  0 46 85  0 45 84
+0 45 83  0 44 82  0 45 83  0 44 81  0 44 81  0 42 78
+0 42 78  0 42 78  0 40 76  0 40 74  0 40 74  0 39 73
+0 39 73  0 39 73  0 38 72  0 35 67  0 29 54  0 12 24
+0 0 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 5 11  0 14 29  0 22 43  0 27 51  0 26 50
+0 24 47  0 29 54  0 31 58  0 36 67  0 36 67  0 40 74
+0 40 74  0 40 74  0 44 81  0 46 85  0 47 87  0 47 87
+0 47 87  0 47 87  0 47 87  0 47 87  0 45 86  0 46 85
+0 45 84  0 45 84  0 45 84  0 44 83  0 44 81  0 43 79
+0 43 79  0 42 78  0 41 75  0 38 71  0 36 67  0 34 63
+0 16 33  0 7 15  0 5 11  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 0 0  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 0 0  0 0 0  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_fedorasimple_clut224.ppm b/drivers/video/logo/logo_fedorasimple_clut224.ppm
new file mode 100644
index 000000000..b9ba699fc
--- /dev/null
+++ b/drivers/video/logo/logo_fedorasimple_clut224.ppm
@@ -0,0 +1,1123 @@
+P3
+80 80
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 1 2  0 6 11  0 11 21  0 16 30
+0 21 39  0 25 46  0 27 49  0 27 50  0 27 49  0 25 46
+0 21 39  0 16 30  0 11 21  0 6 11  0 1 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 1 2  0 9 18
+0 22 40  0 32 61  0 41 77  0 46 87  0 50 92  0 51 95
+0 52 95  0 52 95  0 52 95  0 52 95  0 52 95  0 52 95
+0 52 95  0 51 95  0 50 92  0 46 87  0 41 77  0 32 61
+0 22 40  0 9 18  0 1 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 10 18  0 26 49  0 41 77  0 49 90
+0 52 96  0 52 95  0 51 94  0 50 93  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 93  0 51 94  0 52 95
+0 52 96  0 49 90  0 41 75  0 26 49  0 10 18  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 1 3
+0 17 32  0 37 70  0 49 90  0 52 96  0 51 94  0 50 92
+0 50 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 91  0 50 92  0 51 94  0 52 96  0 49 90  0 37 70
+0 17 32  0 1 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 1 2  0 19 35  0 41 77
+0 51 94  0 51 95  0 50 92  0 49 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 50 92  0 51 95
+0 50 93  0 41 75  0 18 34  0 1 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 13 24  0 37 71  0 51 95  0 51 94
+0 50 92  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 50 92  0 51 94  0 51 95  0 37 71  0 13 24  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 3 6  0 29 53  0 49 91  0 51 95  0 50 92  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 50 92  0 51 95  0 49 91  0 29 53
+0 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 12 22
+0 41 77  0 51 96  0 50 92  0 49 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 47 90  0 47 89  0 46 89  0 47 89  0 48 90  0 49 91
+0 50 92  0 50 92  0 50 92  0 49 91  0 50 92  0 52 96
+0 41 77  0 12 22  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 20 37  0 47 89
+0 51 94  0 50 91  0 50 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 46 88  0 41 85  0 40 84  0 47 90
+7 56 98  13 61 101  13 61 101  11 59 100  0 50 93  0 41 85
+0 41 85  0 47 89  0 49 91  0 50 92  0 50 92  0 49 91
+0 51 95  0 47 89  0 20 37  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 26 50  0 50 93  0 50 93
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 43 86  0 43 86  17 64 103  62 116 154  144 169 189  190 205 217
+217 226 233  227 235 240  232 238 242  221 230 237  194 209 220  144 169 189
+62 116 154  1 57 102  0 45 88  0 47 88  0 49 91  0 50 92
+0 49 91  0 50 93  0 50 93  0 27 49  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 1 2  0 30 57  0 51 96  0 50 92  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 45 88  0 43 86
+48 88 122  156 179 196  232 238 242  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  189 213 227  24 107 158  0 65 113  0 50 92  0 47 89
+0 49 91  0 50 92  0 50 92  0 51 95  0 30 57  0 1 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 30 57  0 51 95  0 50 92  0 50 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 41 84  17 64 103  156 179 196
+247 250 251  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  196 216 229  0 110 172  0 93 151  0 63 110
+0 46 88  0 49 90  0 50 92  0 50 92  0 51 96  0 31 59
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 29 53  0 51 95  0 50 92  0 50 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 40 84  59 105 139  221 230 237  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  81 154 196  0 104 168  0 107 168
+0 78 129  0 49 91  0 48 89  0 50 92  0 50 92  0 52 95
+0 29 53  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 23 43
+0 51 94  0 50 92  0 50 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 48 91  0 40 84  89 138 172  247 250 251  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  132 174 200  0 102 165  0 108 169
+0 110 172  0 88 143  0 50 93  0 48 90  0 50 92  0 50 92
+0 51 94  0 23 43  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 14 27  0 49 91
+0 50 92  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 40 84  79 130 164  252 253 254  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  81 154 196  0 102 165  0 107 168
+0 108 169  0 110 172  0 89 144  0 49 92  0 48 90  0 50 92
+0 50 92  0 49 91  0 14 27  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 6 11  0 44 82  0 50 93
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 49 91  0 40 84
+59 105 139  247 250 251  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  207 222 232  11 106 162  0 105 167  0 108 169
+0 108 168  0 108 169  0 110 172  0 86 140  0 47 88  0 49 91
+0 50 91  0 50 93  0 45 83  0 6 11  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 35 66  0 51 95  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 44 87  17 64 103
+221 230 237  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  246 249 251  241 246 248  247 250 251  255 255 255  255 255 255
+249 251 253  189 213 227  27 123 177  0 103 165  0 108 168  0 108 169
+0 108 169  0 107 168  0 108 169  0 108 170  0 70 118  0 46 88
+0 49 92  0 49 91  0 51 95  0 34 64  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 19 35  0 51 94  0 49 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 41 85  144 169 189
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  202 218 229  133 160 181
+59 105 139  42 85 121  22 79 121  42 85 121  59 105 139  73 134 171
+51 128 174  0 109 171  0 105 166  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 109 170  0 100 158  0 53 96
+0 48 90  0 50 92  0 49 92  0 51 94  0 19 35  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 4 9  0 43 81  0 50 93  0 50 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 43 86  42 85 121  246 249 251
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  232 238 242  79 130 164  4 52 93  0 40 84
+0 42 85  0 44 87  0 45 88  0 44 87  0 42 85  0 41 85
+1 57 102  0 92 148  0 110 171  0 108 169  0 108 168  0 108 169
+0 108 169  0 108 169  0 108 169  0 107 168  0 110 171  0 78 129
+0 46 87  0 49 91  0 50 91  0 50 93  0 43 81  0 4 9
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 27 50  0 51 95  0 49 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 41 85  144 169 189  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  217 227 234  45 87 122  0 39 81  0 47 89  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 47 88  0 49 91  0 86 140  0 109 171  0 107 169  0 108 169
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 101 162
+0 52 96  0 48 90  0 50 92  0 49 91  0 51 95  0 27 50
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 6 11  0 47 86  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 45 88  17 64 103  227 235 240  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+236 240 243  45 87 122  0 40 84  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 48 89  0 48 90  0 88 143  0 109 171  0 107 168
+0 108 169  0 108 169  0 108 169  0 108 169  0 107 168  0 109 171
+0 70 118  0 47 88  0 50 92  0 50 92  0 50 93  0 47 86
+0 6 11  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 27 50  0 52 95  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 41 84  59 105 139  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+98 145 177  0 39 81  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 47 89  0 53 98  0 101 162  0 108 170
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 110 171
+0 86 140  0 47 88  0 49 91  0 50 92  0 49 91  0 51 95
+0 26 50  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 3 7
+0 44 84  0 50 93  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 40 84  133 160 181  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  217 226 233
+7 56 98  0 46 88  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 46 87  0 78 129  0 110 171
+0 107 168  0 108 169  0 108 169  0 108 169  0 108 169  0 108 170
+0 98 155  0 50 93  0 49 90  0 50 92  0 50 92  0 50 93
+0 44 82  0 3 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 20 37
+0 51 95  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 48 90  0 44 87  174 198 214  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  133 160 181
+0 40 84  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 47 89  1 57 102  0 105 166
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 107 169
+0 103 163  0 56 101  0 48 90  0 50 92  0 50 92  0 49 91
+0 52 95  0 20 37  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 37 70
+0 51 94  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 48 90  0 48 91  202 218 229  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  62 116 154
+0 40 84  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 48 90  0 51 93  0 98 155
+0 108 170  0 108 169  0 108 169  0 108 169  0 108 169  0 108 168
+0 107 168  0 62 109  0 48 89  0 50 92  0 50 92  0 50 92
+0 51 94  0 37 70  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 8 14  0 48 89
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 89  4 52 93  217 226 233  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 42 85  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 48 90  0 92 148
+0 109 171  0 108 169  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 64 111  0 47 88  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 89  0 8 14  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 21 39  0 52 95
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 46 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 48 90  0 92 148
+0 109 171  0 108 169  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 64 111  0 47 88  0 50 92  0 50 92  0 50 92
+0 49 91  0 52 95  0 21 39  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 34 64  0 51 94
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 48 90  0 51 93  0 98 155
+0 108 170  0 108 169  0 108 169  0 108 169  0 108 169  0 108 168
+0 107 168  0 62 109  0 48 89  0 50 92  0 50 92  0 50 92
+0 50 92  0 51 94  0 34 62  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 2 4  0 43 81  0 50 93
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 47 89  1 57 102  0 105 166
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 107 169
+0 103 165  0 57 101  0 48 90  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 93  0 43 81  0 2 4  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 10 19  0 48 90  0 49 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 46 87  0 76 127  0 109 171
+0 107 168  0 108 169  0 108 169  0 108 169  0 108 169  0 108 170
+0 99 157  0 51 93  0 49 90  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 92  0 48 90  0 9 18  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 17 32  0 51 95  0 50 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 48 89  0 53 96  0 100 158  0 108 170
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 109 171
+0 86 140  0 47 88  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 91  0 51 95  0 17 32  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 25 47  0 52 95  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 48 90  0 48 89  0 86 140  0 109 171  0 107 168
+0 108 169  0 108 169  0 108 169  0 108 169  0 107 168  0 109 171
+0 70 118  0 47 88  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 52 95  0 25 46  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 31 59  0 51 95  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 47 89  0 48 89  0 78 129  0 109 170  0 108 169  0 108 169
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 101 162
+0 53 96  0 48 90  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 51 95  0 31 59  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 35 66  0 51 94  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 49 91  0 49 91
+0 48 90  0 47 89  0 47 89  0 47 89  0 47 89  0 47 89
+0 47 89  0 44 87  4 54 96  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  42 85 121
+0 40 84  0 47 89  0 47 89  0 47 89  0 47 89  0 47 89
+0 47 89  0 48 90  0 49 90  0 48 90  0 47 88  0 46 87
+0 57 101  0 89 144  0 109 171  0 108 169  0 107 168  0 108 169
+0 108 169  0 108 169  0 108 169  0 107 168  0 110 171  0 78 129
+0 47 88  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 51 94  0 35 66  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 41 75  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 48 90  0 47 88  0 47 88  0 49 90  0 51 94  0 48 92
+0 49 93  10 58 99  13 61 101  13 61 101  13 61 101  13 61 101
+13 61 101  11 59 100  17 64 103  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  133 160 181
+13 56 97  13 61 101  13 61 101  13 61 101  13 61 101  13 61 101
+13 61 101  0 52 95  0 48 92  1 55 99  0 65 113  0 86 140
+0 103 163  0 110 171  0 108 169  0 107 168  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 109 170  0 101 162  0 53 96
+0 48 89  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 93  0 41 75  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 42 79  0 49 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 48 89  0 47 89
+1 55 99  0 70 118  0 86 140  0 92 148  0 93 151  51 128 174
+174 198 214  221 230 237  229 236 240  229 235 240  229 235 240  229 235 240
+229 235 240  229 235 240  229 236 240  250 252 253  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+232 238 242  229 235 240  229 235 240  229 235 240  229 235 240  229 236 240
+227 235 240  189 213 227  81 154 196  0 101 162  0 108 169  0 110 172
+0 108 170  0 107 168  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 168  0 108 169  0 108 170  0 70 118  0 46 87
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 92  0 42 79  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 43 81  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 48 89  0 48 89  0 62 109  0 86 140
+0 103 163  0 110 171  0 110 172  0 105 167  51 128 174  239 246 249
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  132 174 200  0 102 165  0 107 168
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 168  0 107 169  0 110 172  0 86 140  0 47 89  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 93  0 43 81  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 91  0 46 88  0 56 101  0 86 140  0 107 168  0 110 172
+0 108 170  0 108 169  0 105 167  0 107 168  196 216 229  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  239 246 249  27 123 177  0 105 167
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 107 168
+0 108 169  0 110 172  0 89 144  0 50 92  0 48 90  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 92  0 42 81  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 47 88  0 70 118  0 103 163  0 110 172  0 108 169  0 107 168
+0 108 169  0 108 169  0 105 167  27 123 177  241 246 248  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  81 154 196  0 102 165
+0 108 169  0 108 169  0 108 169  0 108 169  0 107 168  0 108 169
+0 110 172  0 88 143  0 50 93  0 48 90  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 93  0 41 77  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 49 91  0 47 88
+0 76 127  0 108 169  0 109 170  0 107 168  0 108 169  0 108 169
+0 108 169  0 108 169  0 105 167  27 123 177  241 246 248  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  81 154 196  0 102 165
+0 108 169  0 108 169  0 107 168  0 108 169  0 109 171  0 107 168
+0 78 129  0 49 91  0 48 89  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 51 94  0 37 70  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 47 88  0 76 127
+0 109 171  0 108 169  0 107 168  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 107 168  196 216 229  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  239 246 249  27 123 177  0 105 167
+0 108 169  0 108 169  0 110 171  0 109 171  0 94 151  0 63 110
+0 46 88  0 49 90  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 51 94  0 34 62  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 49 91  0 46 88  0 70 118  0 108 170
+0 108 169  0 107 168  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 103 165  51 128 174  239 246 249
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  132 174 200  0 104 168  0 109 172
+0 110 172  0 105 167  0 92 148  0 70 118  0 49 92  0 47 89
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 52 95  0 28 52  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 47 89  0 57 101  0 103 163  0 108 170
+0 107 168  0 108 169  0 108 169  0 108 169  0 108 169  0 108 169
+0 107 168  0 108 169  0 110 171  0 109 171  0 101 162  51 128 174
+174 198 214  221 230 237  229 236 240  229 235 240  229 235 240  229 235 240
+229 235 240  229 235 240  250 252 253  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  232 238 242
+229 235 240  229 235 240  229 235 240  229 235 240  229 235 240  229 236 240
+227 235 240  189 213 227  81 154 196  0 98 155  0 93 151  0 86 140
+0 72 121  1 57 102  0 48 90  0 47 88  0 49 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 52 95  0 20 37  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 49 91  0 48 89  0 88 143  0 110 171  0 107 168
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 107 168
+0 109 171  0 108 169  0 89 144  0 70 118  1 57 102  0 49 93
+0 49 93  10 58 99  13 61 101  13 61 101  13 61 101  13 61 101
+13 61 101  13 56 97  59 105 139  243 247 249  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  59 105 139
+13 56 97  13 61 101  13 61 101  13 61 101  13 61 101  13 61 101
+13 61 101  0 52 95  0 48 92  0 51 94  0 49 91  0 47 88
+0 47 88  0 48 90  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 49 92  0 12 22  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 47 88  0 64 111  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 108 169  0 108 169  0 107 168  0 109 172
+0 98 155  0 64 111  0 47 89  0 47 88  0 48 89  0 49 90
+0 48 90  0 47 89  0 47 89  0 47 89  0 47 89  0 47 89
+0 47 89  0 44 87  0 51 94  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  45 87 122
+0 40 84  0 47 89  0 47 89  0 47 89  0 47 89  0 47 89
+0 47 89  0 48 90  0 49 91  0 49 91  0 49 91  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 45 85  0 4 9  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 49 91  0 48 89  0 88 143  0 110 171  0 107 168  0 108 169
+0 108 169  0 108 169  0 108 169  0 107 168  0 109 171  0 93 151
+0 53 96  0 46 87  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  6 55 97  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 51 94  0 37 70  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 48 90  0 57 101  0 103 163  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 109 170  0 99 157  0 53 96
+0 47 88  0 49 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 91  0 52 95  0 26 49  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 46 88  0 72 121  0 109 171  0 107 168  0 108 169  0 108 169
+0 108 169  0 108 169  0 108 168  0 108 169  0 65 113  0 46 87
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 92  0 50 93  0 12 22  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 47 88  0 86 140  0 110 172  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 109 171  0 92 148  0 48 90  0 48 90
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 93  0 41 77  0 1 3  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 48 90
+0 49 91  0 94 151  0 109 171  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 109 171  0 72 121  0 46 88  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 52 95  0 26 49  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 51 94  0 100 158  0 109 170  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 168  0 107 168  0 62 109  0 47 89  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  7 56 98  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 48 89  0 8 14  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 53 96  0 101 162  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 105 166  1 55 99  0 48 90  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 47 90  6 55 97  220 228 234  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  48 88 122
+0 42 85  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 49 91  0 51 95
+0 34 62  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 52 96  0 101 162  0 108 169  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 103 165  1 55 99  0 48 90  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 46 89  10 58 99  221 230 237  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  252 253 254  42 85 121
+0 43 86  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 12 22  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 51 94  0 100 158  0 109 170  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 168  0 105 167  0 62 109  0 47 89  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 44 87  17 74 116  238 243 246  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  243 247 249  42 85 121
+0 44 87  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 51 95  0 35 66
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 48 90
+0 49 91  0 94 151  0 109 171  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 109 171  0 72 121  0 46 88  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 41 85  59 105 139  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  227 235 240  17 64 103
+0 45 88  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 47 89  0 10 19
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 49 91
+0 47 88  0 86 140  0 110 172  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 109 171  0 92 148  0 48 90  0 48 90
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 91  0 41 85  156 179 196  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  194 209 220  0 48 90
+0 47 90  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 51 95  0 28 52  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 46 88  0 72 121  0 109 171  0 107 168  0 108 169  0 108 169
+0 108 169  0 108 169  0 108 168  0 108 169  0 65 113  0 46 87
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 41 84  45 87 122  243 247 249  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  133 160 181  0 40 84
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 50 93  0 42 79  0 3 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 48 90  0 57 101  0 103 165  0 108 169  0 108 169  0 108 169
+0 108 169  0 108 169  0 107 168  0 109 170  0 99 157  0 53 96
+0 47 88  0 49 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 44 87
+6 55 97  190 205 217  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  59 105 139  0 41 85
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 91  0 50 92  0 49 91  0 14 27  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 49 91  0 48 89  0 89 144  0 110 171  0 107 168  0 108 169
+0 108 169  0 108 169  0 108 169  0 107 168  0 109 171  0 94 151
+0 53 96  0 46 87  0 49 91  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 41 84  13 56 97
+155 189 209  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  207 222 232  6 55 97  0 46 89
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 92  0 52 95  0 25 47  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 47 88  0 64 111  0 108 169  0 108 168  0 108 169
+0 108 169  0 108 169  0 108 169  0 108 169  0 107 168  0 110 171
+0 99 157  0 64 111  0 43 86  0 42 85  0 44 87  0 46 88
+0 46 88  0 44 87  0 40 84  0 43 86  59 105 139  194 209 220
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  89 138 172  0 40 84  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 51 95  0 34 64  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 49 91  0 48 90  0 89 144  0 110 171  0 107 168
+0 108 169  0 108 169  0 108 169  0 108 169  0 108 169  0 105 167
+0 104 168  27 123 177  58 125 166  56 105 141  42 85 121  17 64 103
+17 64 103  45 87 122  79 130 164  174 198 214  250 252 253  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  207 222 232  10 58 99  0 47 89  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 49 91  0 51 95
+0 39 74  0 3 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 47 89  1 57 102  0 103 163  0 108 170
+0 107 168  0 108 169  0 108 169  0 108 169  0 105 167  0 110 171
+156 192 214  239 246 249  255 255 255  255 255 255  247 250 251  238 243 246
+238 243 246  249 251 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  252 253 254  59 105 139  0 41 84  0 49 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 51 95  0 42 79
+0 6 11  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 49 91  0 46 88  0 70 118  0 109 170
+0 108 169  0 107 168  0 108 169  0 107 168  0 103 165  156 192 214
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  133 160 181  0 41 85  0 49 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 51 95  0 41 77  0 7 14
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 47 88  0 78 129
+0 109 171  0 108 169  0 107 168  0 105 166  27 123 177  241 246 248
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+156 179 196  0 48 91  0 47 89  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 91  0 50 92  0 51 95  0 38 73  0 6 11  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 44 82  0 49 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 49 91  0 47 89
+0 78 129  0 109 170  0 109 170  0 102 165  51 128 174  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  156 179 196
+4 52 93  0 46 88  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 50 92  0 51 96  0 34 62  0 3 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 41 77  0 50 93  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 47 88  0 70 118  0 103 163  0 107 169  27 123 177  243 247 249
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  247 250 251  133 160 181  0 47 90
+0 46 88  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 91  0 50 91  0 51 94
+0 49 91  0 25 47  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 34 62  0 51 94  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 91  0 46 88  1 57 102  0 88 143  0 105 167  156 192 214
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  190 205 217  59 105 139  0 41 84  0 47 90
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 50 93  0 51 96  0 41 77
+0 13 24  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 19 35  0 52 95  0 49 91
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 48 89  0 48 89  0 62 109  12 92 143
+156 192 214  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  249 251 253
+190 205 217  79 130 164  0 51 93  0 42 85  0 49 91  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 49 91  0 50 92  0 51 95  0 48 89  0 27 50  0 3 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 3 6  0 42 79  0 50 93
+0 49 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 49 91  0 48 89  0 45 88
+0 53 98  56 105 141  144 169 189  194 209 220  225 233 238  236 240 243
+236 240 243  227 235 240  202 218 229  155 189 209  98 145 177  42 85 121
+0 47 90  0 41 85  0 48 90  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 91  0 50 93
+0 51 95  0 49 90  0 34 62  0 9 18  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 16 30  0 49 92
+0 50 93  0 49 91  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 49 91
+0 48 90  0 42 85  0 41 85  0 50 92  11 59 100  17 64 103
+17 64 103  11 59 100  0 53 95  0 43 86  0 40 84  0 44 87
+0 48 91  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 49 91  0 50 92  0 51 94  0 52 96  0 47 88
+0 32 61  0 11 21  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 22 40
+0 49 92  0 51 94  0 50 92  0 50 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 49 91  0 48 90  0 47 89  0 46 89
+0 46 89  0 47 89  0 48 90  0 49 91  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 92
+0 50 92  0 50 92  0 50 92  0 50 92  0 50 92  0 50 93
+0 51 95  0 52 96  0 49 91  0 41 75  0 25 46  0 8 14
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 16 30  0 42 79  0 51 95  0 51 95  0 51 93  0 50 93
+0 50 93  0 50 93  0 50 93  0 50 93  0 50 93  0 50 93
+0 50 93  0 50 93  0 50 93  0 50 93  0 50 93  0 50 93
+0 50 93  0 50 93  0 50 93  0 50 93  0 50 93  0 50 93
+0 50 93  0 50 93  0 50 93  0 50 93  0 50 93  0 50 93
+0 50 93  0 50 93  0 50 93  0 50 93  0 50 93  0 51 93
+0 51 94  0 52 95  0 52 95  0 52 95  0 50 92  0 45 85
+0 37 70  0 25 46  0 10 19  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 3 6  0 18 34  0 32 61  0 41 77  0 44 82
+0 44 82  0 45 82  0 45 82  0 45 82  0 45 82  0 45 82
+0 45 82  0 45 82  0 45 82  0 45 82  0 45 82  0 45 82
+0 45 82  0 45 82  0 45 82  0 45 82  0 45 82  0 45 82
+0 45 82  0 45 82  0 45 82  0 45 83  0 45 83  0 45 83
+0 45 82  0 45 82  0 45 82  0 44 82  0 42 81  0 41 77
+0 37 70  0 32 61  0 27 50  0 19 35  0 11 21  0 3 7
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_gentoo_clut224.ppm b/drivers/video/logo/logo_gentoo_clut224.ppm
new file mode 100644
index 000000000..6163d9fcc
--- /dev/null
+++ b/drivers/video/logo/logo_gentoo_clut224.ppm
@@ -0,0 +1,803 @@
+P3
+76 80
+255
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+152 135 250 152 139 239 151 142 225 151 143 219 154 147 213 154 147 213 154 147 213 154 147 213
+154 147 213 154 147 213 151 143 219 157 148 227 157 148 227 152 139 239 158 154 250 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 155 145 246 151 142 225 154 147 213
+156 149 204 156 149 199 156 149 199 156 149 199 156 149 199 156 149 199 156 149 204 156 149 204
+156 149 204 156 149 204 156 149 204 154 147 213 154 147 213 154 147 213 151 142 225 152 139 239
+152 139 239 158 154 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 157 148 227 151 143 219 156 149 204 156 149 199 165 162 202
+186 183 208 199 194 238 219 217 229 221 219 238 245 245 249 252 252 253 255 255 255 255 255 255
+248 248 253 235 234 251 212 211 246 199 194 238 162 161 213 151 142 225 151 143 219 151 143 219
+151 142 225 152 139 239 152 139 239 152 135 250 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 159 151 238 151 143 219 156 149 204 165 162 202 186 181 225 231 231 244 253 253 255
+254 254 255 253 253 254 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+253 253 254 249 249 254 245 245 249 242 242 252 241 240 252 223 222 249 185 182 243 151 142 225
+151 142 225 149 138 231 146 136 235 152 139 239 152 135 250 152 135 250 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 155 145 246
+151 142 225 156 149 204 156 149 204 206 201 229 244 244 253 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 251 251 254 248 248 253 244 244 253 241 240 252 237 237 251 233 232 250 225 225 249
+190 187 243 159 151 238 146 135 237 146 135 237 146 135 237 146 133 248 146 133 248 136 134 248
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 149 138 231 154 147 213
+156 149 204 171 166 233 239 238 247 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 252 252 254 249 249 254 245 245 253 242 242 252 238 238 252 235 234 251 231 231 250
+228 227 250 223 222 249 185 182 243 146 133 243 146 133 248 144 131 244 146 133 248 146 133 248
+146 133 248 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 152 139 239 157 148 227 154 147 213 154 147 213
+206 201 229 253 253 254 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 253 253 254 250 250 254 246 246 253 243 243 252 239 239 252 236 236 251 233 232 250
+229 228 250 225 225 249 222 222 249 213 212 247 176 171 244 146 133 248 146 133 248 146 133 248
+146 133 248 146 133 248 136 134 248 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 155 145 246 151 142 225 154 147 213 157 148 227 227 227 249
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 251 251 254 248 248 253 244 244 253 240 240 252 237 236 251 234 233 251
+230 229 251 226 225 249 223 222 249 219 218 248 216 216 248 203 200 248 167 159 246 152 135 250
+152 135 250 152 135 250 152 135 250 146 133 248 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 152 139 239 151 142 225 151 142 225 180 177 239 245 245 249 254 254 254
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 252 252 254 248 248 253 245 244 253 241 241 252 237 237 251 234 234 251
+231 230 250 227 227 249 224 224 249 219 218 248 216 216 248 213 212 247 210 209 246 189 186 247
+158 154 250 155 145 246 155 145 246 152 135 250 146 133 248 146 133 248 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 152 139 239 151 142 225 151 142 225 199 194 238 254 254 254 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 253 253 254 249 249 254 246 245 253 242 242 252 238 238 252 235 234 251
+231 231 250 228 227 250 224 224 249 221 220 248 218 217 247 213 213 247 210 209 246 206 205 245
+201 199 246 171 166 245 155 145 246 158 149 246 155 145 246 146 133 248 146 133 248 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+146 133 243 149 138 231 149 138 231 213 212 247 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 253 253 254 250 250 254 246 246 253 243 243 252 239 239 252 236 236 251
+233 232 250 229 228 250 225 224 250 221 221 248 218 217 247 214 214 247 210 209 246 207 206 246
+203 203 245 200 199 244 185 183 243 163 156 246 160 152 246 160 152 246 155 145 246 146 133 248
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 152 139 239
+146 135 237 146 136 235 215 214 247 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 254 254 254 251 251 254 248 248 253 243 243 252 240 240 252 236 236 251
+233 232 250 229 229 250 226 225 249 222 222 249 218 218 248 215 214 247 210 209 246 208 208 246
+204 203 245 200 199 244 197 196 244 189 187 243 169 163 245 164 155 247 163 156 246 158 149 246
+146 133 248 146 133 243 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 146 133 248 146 133 243
+146 135 237 203 200 248 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 251 251 254 248 248 253 244 244 253 241 240 252 237 237 251
+234 233 251 230 229 251 226 225 249 223 222 249 219 218 248 215 215 247 212 211 246 208 208 246
+204 203 245 201 200 245 197 196 244 194 193 243 190 188 243 176 171 244 167 159 246 168 162 245
+165 158 245 146 133 243 144 131 244 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 158 154 250 146 133 248 146 133 243
+184 179 247 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 252 252 254 248 248 253 245 244 253 241 241 252 237 237 251
+234 234 251 230 230 250 227 227 249 223 223 249 219 218 248 215 215 247 212 211 246 208 208 246
+204 204 245 201 200 245 197 196 244 194 193 243 190 188 243 188 186 243 181 177 244 171 166 245
+171 166 245 171 166 245 155 145 246 144 131 244 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 146 133 248 144 131 244 164 155 247
+253 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 244 244 253 230 230 250 221 220 248 220 219 250 234 233 251 238 238 252
+234 234 251 231 230 250 227 227 249 223 223 249 219 218 248 216 216 248 213 212 247 209 208 246
+204 204 245 201 201 245 198 197 244 194 193 243 191 190 243 188 187 243 186 184 243 182 180 243
+175 170 244 174 170 244 175 170 244 160 152 246 146 133 243 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 146 133 248 146 133 248 146 133 248 234 233 253
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 251 251 254 228 227 250 204 204 245 185 183 243 189 186 247
+212 210 249 231 231 250 227 227 249 224 224 249 219 218 248 216 216 248 213 212 247 209 208 246
+206 205 245 201 201 245 198 197 244 195 194 243 191 190 243 188 187 243 186 184 243 184 182 243
+181 177 244 178 174 244 178 174 244 178 175 244 168 162 245 146 135 237 145 140 230 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 146 133 248 146 133 248 189 186 247 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 244 244 253 218 217 247 191 190 243 176 171 244
+160 152 246 171 164 246 217 216 247 224 224 249 221 220 248 216 216 248 213 212 247 209 208 246
+206 205 245 201 201 245 199 198 244 195 194 243 191 190 243 188 187 243 186 185 243 184 182 243
+182 180 243 180 177 244 180 177 244 181 177 244 182 180 243 176 171 244 152 139 239 146 135 230
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 146 133 248 152 135 250 155 145 246 242 241 253 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 251 251 254 224 224 249 196 195 244 178 175 244
+163 156 246 152 135 250 149 138 231 206 201 229 221 220 248 216 216 248 213 213 247 210 209 246
+206 205 245 201 201 245 199 198 244 195 194 243 191 190 243 188 187 243 186 185 243 184 182 243
+183 180 243 181 177 244 178 175 244 182 180 243 185 183 243 185 183 243 180 177 244 152 139 239
+146 136 235 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 146 133 248 155 145 246 184 179 247 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 246 246 253 217 214 250 212 208 248 220 219 250 218 217 247 195 194 243 178 174 244
+163 156 246 146 133 248 151 142 225 156 149 199 165 162 202 220 219 250 213 213 247 210 209 246
+206 205 245 201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 177 172 244 183 180 243 188 187 243 189 187 243 185 183 243
+159 151 238 146 135 237 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 146 133 248 152 135 250 155 145 246 220 219 250 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+243 243 253 155 145 246 146 133 243 146 133 243 146 133 243 146 135 237 158 149 246 166 159 245
+159 150 246 146 133 248 151 143 219 151 143 191 105 99 142 186 183 208 213 213 247 210 209 246
+207 206 246 201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 176 172 244 174 170 244 185 183 243 193 191 243 194 193 243
+193 191 243 159 151 238 146 136 235 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 146 133 248 155 145 246 158 149 246 248 247 254 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 254 254 255 248 248 253 255 255 255 255 255 255 255 255 255
+189 186 247 146 133 243 146 135 237 146 133 243 146 135 237 146 133 243 146 135 237 146 135 237
+146 133 243 146 135 237 154 147 213 134 128 170 88 84 126 88 84 126 199 194 238 210 209 246
+207 206 246 201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 176 172 244 174 170 244 173 167 245 189 187 243 199 198 244
+201 200 245 199 198 244 159 151 238 145 140 230 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 146 133 248 160 152 246 165 158 245 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 246 246 253 219 218 248 234 233 251 243 243 252 248 248 253
+152 139 239 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237
+146 135 237 146 136 235 156 149 199 114 106 151 82 76 118 82 76 118 142 137 174 210 209 246
+207 206 246 201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 176 172 244 174 170 244 172 167 245 171 166 245 193 191 243
+206 205 245 207 206 246 209 209 245 159 151 238 146 135 230 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 146 133 248 144 131 244 163 156 246 165 158 245 254 253 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 244 244 253 190 188 243 201 200 245 208 208 246 207 206 246
+146 135 237 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237 146 135 237
+146 135 237 146 136 235 136 128 204 95 88 132 82 76 118 82 76 118 100 95 138 210 209 246
+207 206 246 201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 176 172 244 174 170 244 172 167 245 171 166 245 169 163 245
+199 198 244 212 211 246 213 212 247 209 209 245 157 148 227 151 137 225 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 144 131 244 146 133 248 167 159 246 167 159 246 234 233 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 184 179 247 177 172 244 181 177 244 182 180 243
+152 139 239 146 135 237 146 135 237 146 136 235 146 136 235 146 136 235 146 136 235 146 136 235
+146 136 235 146 136 235 146 136 235 82 76 118 82 76 118 82 76 118 88 84 126 210 209 246
+206 205 245 201 201 245 199 198 244 195 194 243 191 190 243 189 187 243 187 185 243 185 182 243
+183 180 243 181 177 244 178 175 244 176 172 244 174 170 244 172 167 245 171 166 245 168 162 245
+169 163 245 207 206 246 218 218 248 221 220 248 212 211 246 157 148 227 161 137 216 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 144 131 244 144 131 244 163 156 246 169 163 245 189 186 247 253 253 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 217 214 250 159 150 246 160 152 246 163 156 246
+158 149 246 146 135 230 146 136 235 146 136 235 146 136 235 149 138 231 146 136 235 149 138 231
+149 138 231 149 138 231 146 135 230 82 76 118 82 76 118 82 76 118 110 102 146 210 209 246
+206 205 245 201 201 245 199 198 244 195 194 243 191 190 243 188 187 243 186 185 243 184 182 243
+182 180 243 180 177 244 178 174 244 176 172 244 174 170 244 172 167 245 169 163 245 168 162 245
+166 159 245 171 164 246 216 216 248 224 224 249 226 225 249 215 214 247 157 148 227 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 146 133 243 146 133 243 155 145 246 172 167 245 173 167 245 199 196 246 251 251 254
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 254 254 255 195 192 247 144 131 244 146 133 248
+144 131 244 146 133 243 146 136 235 151 137 225 145 140 230 146 135 230 149 138 231 149 138 231
+149 138 231 146 135 230 113 109 162 82 76 118 82 76 118 82 76 118 165 162 202 209 208 246
+206 205 245 201 201 245 199 198 244 195 194 243 191 190 243 188 187 243 186 185 243 184 182 243
+182 180 243 180 177 244 178 174 244 176 172 244 174 170 244 172 167 245 169 163 245 168 162 245
+166 159 245 163 156 246 175 170 244 226 225 249 231 230 250 233 232 250 209 209 245 151 142 225
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 146 133 243 146 135 237 146 135 237 166 159 245 176 171 244 176 172 244 189 186 247
+242 241 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 254 254 255 206 201 229 154 147 213
+154 147 213 156 149 204 156 149 199 151 143 191 126 124 174 0 0 0 0 0 0 146 135 230
+149 138 231 126 124 174 82 76 118 82 76 118 82 76 118 131 126 164 212 211 246 209 208 246
+204 204 245 201 201 245 198 197 244 194 193 243 191 190 243 188 187 243 186 184 243 184 182 243
+182 180 243 180 177 244 178 174 244 176 172 244 174 170 244 172 167 245 169 163 245 168 162 245
+167 159 246 163 156 246 164 155 247 184 179 247 237 236 251 237 236 251 237 237 251 199 194 238
+151 143 219 0 0 0 0 0 0 0 0 0
+0 0 0 146 135 237 146 133 243 146 135 237 152 139 239 174 170 244 179 175 244 180 177 244
+182 180 243 220 219 250 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 219 217 229
+142 137 174 114 108 147 102 97 140 95 88 132 82 76 118 82 76 118 82 76 118 82 76 118
+95 88 132 82 76 118 82 76 118 82 76 118 142 137 174 212 211 246 213 212 247 209 208 246
+204 204 245 201 201 245 198 197 244 194 193 243 191 190 243 188 187 243 186 184 243 184 182 243
+182 180 243 180 177 244 178 174 244 176 171 244 174 170 244 171 166 245 169 163 245 168 162 245
+167 159 246 164 155 247 163 156 246 160 152 246 212 208 248 242 242 252 242 242 252 241 240 252
+171 166 233 151 143 219 0 0 0 0 0 0
+0 0 0 0 0 0 146 135 237 146 135 237 146 135 237 152 139 239 176 172 244 183 180 243
+183 180 243 184 182 243 196 194 246 234 234 251 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+252 252 253 186 183 208 122 118 142 82 76 118 82 76 118 82 76 118 82 76 118 82 76 118
+82 76 118 82 76 118 114 106 151 186 181 225 223 223 249 215 215 247 212 211 246 208 208 246
+204 204 245 201 200 245 197 196 244 194 193 243 190 188 243 188 186 243 186 184 243 184 182 243
+182 180 243 180 177 244 178 174 244 176 171 244 174 170 244 171 166 245 169 163 245 168 162 245
+167 159 246 163 156 246 163 156 246 159 150 246 164 155 247 242 241 253 248 248 253 251 250 252
+231 231 244 154 147 213 0 0 0 0 0 0
+0 0 0 0 0 0 146 135 237 146 136 235 146 136 235 146 136 235 149 138 231 174 170 244
+186 184 243 187 185 243 187 186 243 188 186 243 204 204 246 242 241 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 219 217 229 165 162 202 131 126 164 114 108 147 114 108 147
+142 137 174 206 201 229 227 227 249 224 224 249 219 218 248 215 214 247 212 211 246 208 208 246
+204 203 245 201 200 245 197 196 244 193 191 243 190 188 243 187 186 243 186 184 243 184 182 243
+181 177 244 180 177 244 178 174 244 176 171 244 173 167 245 171 166 245 169 163 245 168 162 245
+165 158 245 163 156 246 160 152 246 160 152 246 158 149 246 195 192 247 253 253 254 254 254 254
+254 253 255 199 194 238 154 147 213 0 0 0
+0 0 0 0 0 0 0 0 0 146 136 235 146 136 235 149 138 231 149 138 231 149 138 231
+171 166 233 187 185 243 190 188 243 192 191 243 193 191 243 194 193 243 215 215 247 246 246 253
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 254 254 254 252 252 253 251 250 252 245 245 249 240 240 252 236 236 251
+231 231 250 227 227 249 224 224 249 221 221 248 218 218 248 214 214 247 210 209 246 207 206 246
+204 203 245 200 199 244 196 195 244 193 191 243 189 187 243 187 186 243 185 183 243 183 181 243
+181 177 244 179 175 244 177 172 244 175 170 244 173 167 245 171 166 245 169 163 245 168 162 245
+165 158 245 163 156 246 160 152 246 159 150 246 158 149 246 159 150 246 248 247 254 255 255 255
+255 255 255 239 238 247 154 147 213 0 0 0
+0 0 0 0 0 0 0 0 0 146 135 230 149 138 231 146 136 235 149 138 231 149 138 231
+146 136 235 157 148 227 185 182 243 197 196 244 198 197 244 199 198 244 200 199 244 201 200 245
+219 218 248 245 245 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 253 253 254 250 250 254 246 246 253 243 243 252 239 239 252 235 234 251
+233 232 250 228 227 250 225 224 250 221 220 248 218 217 247 213 213 247 210 209 246 207 206 246
+203 203 245 199 198 244 196 195 244 192 191 243 189 187 243 187 186 243 185 183 243 183 180 243
+181 177 244 179 175 244 178 174 244 175 170 244 173 167 245 171 166 245 169 163 245 167 159 246
+165 158 245 163 156 246 160 152 246 159 150 246 158 149 246 155 145 246 220 219 250 255 255 255
+255 255 255 253 253 255 186 181 225 154 147 213
+0 0 0 0 0 0 0 0 0 0 0 0 149 138 231 149 138 231 149 138 231 149 138 231
+149 138 231 149 138 231 149 138 231 171 166 233 197 196 244 204 203 245 204 204 245 207 206 246
+207 206 246 209 208 246 223 222 249 244 244 253 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 252 252 254 249 249 254 245 245 253 242 242 252 238 238 252 235 234 251
+231 231 250 228 227 250 224 224 249 221 220 248 216 216 248 213 212 247 210 209 246 206 205 245
+201 201 245 199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 183 243 183 180 243
+181 177 244 179 175 244 177 172 244 175 170 244 173 167 245 171 166 245 168 162 245 167 159 246
+165 158 245 163 156 246 160 152 246 159 150 246 158 149 246 155 145 246 203 200 248 254 254 255
+255 255 255 255 255 255 199 194 238 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 151 142 225 149 138 231 151 137 225
+149 138 231 151 137 225 151 137 225 151 142 225 151 142 225 180 177 239 207 206 246 212 211 246
+213 212 247 213 213 247 215 214 247 215 215 247 226 225 249 244 244 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 251 251 254 248 248 253 244 244 253 241 241 252 237 237 251 234 234 251
+230 230 250 227 227 249 223 223 249 219 218 248 216 216 248 213 212 247 209 208 246 204 204 245
+201 201 245 198 197 244 195 194 243 191 190 243 188 187 243 186 185 243 184 182 243 182 180 243
+181 177 244 178 175 244 176 172 244 174 170 244 172 167 245 171 166 245 168 162 245 167 159 246
+165 158 245 163 156 246 160 152 246 159 150 246 155 145 246 155 145 246 207 205 248 255 255 255
+255 255 255 255 255 255 206 201 229 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 151 137 225 151 142 225
+151 142 225 151 142 225 151 142 225 151 137 225 151 142 225 151 142 225 157 148 227 185 182 243
+214 214 247 219 218 248 219 218 248 221 220 248 222 222 249 223 223 249 230 230 250 245 245 253
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 250 250 254 248 248 253 243 243 252 240 240 252 236 236 251 234 233 251
+229 229 250 226 225 249 222 222 249 219 218 248 215 215 247 212 211 246 208 208 246 204 204 245
+201 200 245 197 196 244 194 193 243 190 188 243 188 187 243 186 184 243 184 182 243 182 180 243
+180 177 244 178 174 244 176 172 244 174 170 244 172 167 245 169 163 245 168 162 245 166 159 245
+163 156 246 163 156 246 160 152 246 158 149 246 155 145 246 155 145 246 224 222 251 255 255 255
+255 255 255 255 255 255 186 181 225 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 136 134 248
+151 142 225 151 142 225 151 142 225 151 142 225 151 142 225 151 137 225 151 143 219 151 143 219
+157 148 227 186 184 243 218 217 247 226 225 249 227 227 249 228 227 250 229 229 250 230 230 250
+235 234 251 251 250 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 253 253 254 249 249 254 246 246 253 242 242 252 239 239 252 235 234 251 233 232 250
+229 228 250 225 225 249 221 221 248 218 217 247 214 214 247 210 209 246 207 206 246 204 203 245
+200 199 244 197 196 244 193 191 243 190 188 243 188 186 243 186 184 243 184 182 243 182 180 243
+180 177 244 178 174 244 176 171 244 174 170 244 171 166 245 169 163 245 168 162 245 167 159 246
+164 155 247 160 152 246 160 152 246 158 149 246 155 145 246 171 164 246 253 253 255 255 255 255
+255 255 255 253 252 254 162 161 213 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 151 142 225 151 143 219 151 142 225 151 143 219 151 143 219 151 142 225 151 143 219
+151 143 219 151 143 219 151 143 219 186 181 225 220 219 250 234 233 251 234 234 251 235 234 251
+237 236 251 245 245 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 252 252 254 248 248 253 245 245 253 241 241 252 238 238 252 234 234 251 231 231 250
+228 227 250 224 224 249 221 220 248 218 217 247 213 213 247 210 209 246 207 206 246 203 203 245
+199 198 244 196 195 244 192 191 243 189 187 243 187 186 243 185 183 243 183 181 243 181 177 244
+179 175 244 177 172 244 175 170 244 173 167 245 171 166 245 169 163 245 168 162 245 167 159 246
+163 156 246 164 155 247 160 152 246 158 149 246 160 152 246 240 238 254 255 255 255 255 255 255
+255 255 255 221 219 238 156 149 204 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 151 143 219 151 143 219 151 143 219 151 143 219 151 143 219
+151 143 219 151 143 219 151 143 219 151 143 219 154 147 213 180 177 239 231 230 250 241 240 252
+243 243 252 253 253 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 250 250 254 248 248 253 243 243 252 240 240 252 237 237 251 234 233 251 230 229 251
+227 227 249 223 223 249 219 218 248 216 216 248 213 212 247 209 208 246 206 205 245 201 201 245
+199 198 244 195 194 243 192 191 243 189 187 243 187 185 243 185 183 243 183 180 243 181 177 244
+179 175 244 178 174 244 175 170 244 173 167 245 171 166 245 169 163 245 167 159 246 165 158 245
+163 156 246 160 152 246 159 150 246 159 150 246 230 229 251 255 255 255 255 255 255 255 255 255
+253 252 254 162 161 213 156 149 199 156 149 204
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 151 143 219 151 143 219 151 143 219
+151 143 219 151 143 219 154 147 213 151 143 219 151 143 219 199 194 238 245 245 249 246 245 253
+252 252 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+252 252 254 249 249 254 246 246 253 242 242 252 239 239 252 236 236 251 233 232 250 229 228 250
+226 225 249 222 222 249 218 218 248 215 215 247 212 211 246 208 208 246 204 204 245 201 200 245
+198 197 244 194 193 243 191 190 243 188 187 243 186 184 243 184 182 243 182 180 243 181 177 244
+178 175 244 176 172 244 175 170 244 173 167 245 171 166 245 168 162 245 167 159 246 165 158 245
+163 156 246 160 152 246 160 152 246 224 222 251 255 255 255 255 255 255 255 255 255 255 255 255
+206 201 229 156 149 199 156 149 199 156 149 199
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 136 134 248 154 147 213
+154 147 213 151 143 219 154 147 213 154 147 213 209 209 245 251 250 252 250 250 254 254 254 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+251 251 254 248 248 253 245 244 253 241 241 252 237 237 251 234 234 251 231 231 250 228 227 250
+224 224 249 221 220 248 218 217 247 214 214 247 210 209 246 207 206 246 204 203 245 200 199 244
+197 196 244 193 191 243 190 188 243 188 186 243 186 184 243 184 182 243 182 180 243 180 177 244
+178 174 244 176 172 244 174 170 244 172 167 245 169 163 245 168 162 245 166 159 245 163 156 246
+163 156 246 160 152 246 217 214 250 255 255 255 255 255 255 255 255 255 255 255 255 239 238 247
+165 162 202 156 149 199 165 162 202 156 149 199
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+161 137 216 161 137 216 162 161 213 226 225 249 252 252 253 254 254 254 254 254 254 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 253 253 254
+250 250 254 246 246 253 243 243 252 240 240 252 236 236 251 234 233 251 230 229 251 227 227 249
+223 223 249 219 218 248 216 216 248 213 212 247 210 209 246 206 205 245 203 203 245 199 198 244
+196 195 244 192 191 243 189 187 243 187 186 243 185 183 243 183 181 243 181 177 244 179 175 244
+178 174 244 176 171 244 174 170 244 171 166 245 169 163 245 168 162 245 167 159 246 163 156 246
+163 156 246 212 210 249 255 255 255 255 255 255 255 255 255 255 255 255 251 251 254 186 183 208
+156 149 199 156 149 199 156 149 199 156 149 199
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 186 181 225 241 240 252 254 254 254 254 254 254 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 251 251 254
+248 248 253 245 245 253 242 242 252 238 238 252 235 234 251 233 232 250 229 228 250 225 225 249
+222 222 249 218 218 248 215 215 247 212 211 246 208 208 246 204 204 245 201 201 245 198 197 244
+195 194 243 191 190 243 189 187 243 187 185 243 185 183 243 183 180 243 181 177 244 179 175 244
+177 172 244 175 170 244 173 167 245 171 166 245 169 163 245 168 162 245 165 158 245 163 156 246
+212 208 248 255 255 255 255 255 255 255 255 255 255 255 255 254 254 254 186 181 225 151 143 191
+156 149 199 156 149 199 156 149 199 151 143 191
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+186 181 225 251 250 252 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 253 253 254 250 250 254
+248 248 253 243 243 252 240 240 252 237 237 251 234 233 251 230 230 250 227 227 249 224 224 249
+221 220 248 218 217 247 213 213 247 210 209 246 207 206 246 204 203 245 201 200 245 197 196 244
+194 193 243 190 188 243 188 187 243 186 184 243 184 182 243 182 180 243 180 177 244 178 175 244
+176 172 244 175 170 244 173 167 245 171 166 245 168 162 245 167 159 246 165 158 245 217 214 250
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 219 217 229 156 149 199 156 149 199
+151 143 191 151 143 191 151 143 191 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 161 160 183 199 194 238
+253 252 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 254 254 254 251 251 254 248 248 253
+245 245 253 242 242 252 239 239 252 235 234 251 233 232 250 229 229 250 226 225 249 223 222 249
+219 218 248 215 215 247 213 212 247 209 208 246 206 205 245 201 201 245 199 198 244 196 195 244
+192 191 243 189 187 243 187 186 243 185 183 243 184 182 243 182 180 243 180 177 244 178 174 244
+176 171 244 174 170 244 172 167 245 169 163 245 168 162 245 167 159 246 220 219 250 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 221 219 238 151 143 191 151 143 191 151 143 191
+156 149 199 151 143 191 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 154 147 213 206 201 229 252 252 253
+254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 252 252 254 249 249 254 246 246 253
+243 243 252 240 240 252 237 237 251 234 234 251 231 231 250 228 227 250 224 224 249 221 220 248
+218 217 247 215 214 247 212 211 246 208 208 246 204 204 245 201 200 245 198 197 244 195 194 243
+191 190 243 189 187 243 187 185 243 185 183 243 183 180 243 181 177 244 179 175 244 177 172 244
+175 170 244 173 167 245 171 166 245 169 163 245 169 163 245 225 224 250 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 231 231 244 151 143 191 151 143 191 151 143 191 151 143 191
+151 143 191 151 143 191 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 154 147 213 206 201 229 254 254 254 254 254 254
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 254 254 254 251 251 254 248 248 253 245 245 253
+242 242 252 238 238 252 235 234 251 233 232 250 229 229 250 226 225 249 223 222 249 219 218 248
+216 216 248 213 212 247 210 209 246 207 206 246 203 203 245 200 199 244 197 196 244 193 191 243
+190 188 243 188 186 243 186 184 243 184 182 243 182 180 243 181 177 244 178 175 244 177 172 244
+175 170 244 173 167 245 171 166 245 172 167 245 230 229 251 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 231 231 244 151 143 191 151 143 191 151 143 191 143 137 181 151 143 191
+143 137 181 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 162 161 213 221 219 238 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 252 252 254 249 249 254 246 246 253 243 243 252
+240 240 252 237 237 251 234 234 251 231 230 250 228 227 250 225 224 250 221 221 248 218 218 248
+215 214 247 212 211 246 209 208 246 204 204 245 201 201 245 199 198 244 195 194 243 192 191 243
+189 187 243 187 186 243 185 183 243 183 181 243 182 180 243 180 177 244 178 174 244 176 172 244
+174 170 244 172 167 245 176 171 244 234 233 253 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 231 231 244 151 143 191 151 143 191 143 137 181 151 143 191 143 137 181 151 143 191
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 165 162 202 221 219 238 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 253 253 254 250 250 254 248 248 253 244 244 253 241 241 252
+238 238 252 235 234 251 233 232 250 229 229 250 226 225 249 223 222 249 219 218 248 216 216 248
+213 213 247 210 209 246 207 206 246 204 203 245 201 200 245 197 196 244 194 193 243 191 190 243
+188 187 243 187 185 243 185 183 243 183 180 243 181 177 244 179 175 244 177 172 244 175 170 244
+173 167 245 184 179 247 243 243 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+221 219 238 151 143 191 143 137 181 143 137 181 143 137 181 143 137 181 143 137 181 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 162 161 213 231 231 244 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 253 253 254 250 250 254 248 248 253 245 245 253 242 242 252 239 239 252
+236 236 251 234 233 251 230 230 250 227 227 249 224 224 249 221 221 248 218 218 248 215 214 247
+212 211 246 209 208 246 206 205 245 201 201 245 199 198 244 196 195 244 193 191 243 189 187 243
+187 186 243 186 184 243 184 182 243 182 180 243 180 177 244 178 175 244 177 172 244 175 170 244
+195 192 247 249 249 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 221 219 238
+151 143 191 143 137 181 143 137 181 143 137 181 143 137 181 143 137 181 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+186 183 208 231 231 244 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 251 251 254 248 248 253 246 245 253 243 243 252 240 240 252 237 237 251
+234 234 251 231 231 250 229 228 250 226 225 249 223 222 249 219 218 248 216 216 248 213 213 247
+210 209 246 207 206 246 204 203 245 201 200 245 197 196 244 194 193 243 191 190 243 189 187 243
+187 185 243 185 183 243 183 181 243 181 177 244 180 177 244 178 174 244 176 171 244 207 205 248
+254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 206 201 229 143 137 181
+142 137 174 143 137 181 143 137 181 143 137 181 143 137 181 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 165 162 202
+231 231 244 254 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 251 251 254 249 249 254 246 246 253 243 243 252 241 241 252 238 238 252 235 234 251
+233 232 250 230 229 251 227 227 249 224 224 249 221 220 248 218 217 247 215 214 247 212 211 246
+209 208 246 206 205 245 201 201 245 199 198 244 196 195 244 193 191 243 190 188 243 188 186 243
+186 184 243 184 182 243 182 180 243 181 177 244 179 175 244 178 174 244 220 219 250 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 202 200 215 137 131 176 143 137 181
+137 131 176 137 131 176 137 131 176 137 131 176 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 165 162 202 231 231 244
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 254 254 254
+252 252 254 249 249 254 248 248 253 244 244 253 242 241 253 239 239 252 236 236 251 234 233 251
+230 230 250 228 227 250 225 224 250 221 221 248 219 218 248 215 215 247 213 212 247 210 209 246
+207 206 246 204 203 245 201 200 245 198 197 244 194 193 243 191 190 243 189 187 243 187 185 243
+185 183 243 183 181 243 182 180 243 180 177 244 183 180 243 233 232 252 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 248 248 253 186 183 208 137 131 176 137 131 176 137 131 176
+137 131 176 137 131 176 137 131 176 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 156 149 199 221 219 238 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 254 254 254 252 252 254
+249 249 254 248 248 253 245 244 253 242 242 252 239 239 252 237 236 251 234 234 251 231 231 250
+229 228 250 226 225 249 223 222 249 219 218 248 216 216 248 214 214 247 210 209 246 208 208 246
+204 204 245 201 201 245 199 198 244 196 195 244 193 191 243 190 188 243 188 186 243 186 184 243
+184 182 243 183 180 243 181 177 244 196 194 246 246 245 253 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 239 238 247 161 160 183 137 131 176 137 131 176 137 131 176 137 131 176
+137 131 176 134 128 170 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 156 149 204 219 217 229 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 254 254 254 252 252 254 249 249 254
+248 248 253 245 244 253 242 242 252 240 240 252 237 237 251 234 234 251 233 232 250 229 229 250
+226 225 249 224 224 249 221 220 248 218 217 247 215 215 247 212 211 246 209 208 246 207 206 246
+203 203 245 200 199 244 197 196 244 194 193 243 191 190 243 189 187 243 187 185 243 185 183 243
+183 181 243 183 180 243 217 214 250 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 221 219 238 143 137 181 137 131 176 137 131 176 134 128 170 134 128 170 134 128 170
+134 128 170 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 136 134 248 186 181 225 255 255 255 254 254 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 253 253 254 251 251 254 249 249 254 248 248 253
+245 244 253 242 242 252 240 240 252 237 237 251 235 234 251 233 232 250 230 229 251 227 227 249
+224 224 249 221 221 248 219 218 248 215 215 247 213 212 247 210 209 246 207 206 246 204 204 245
+201 201 245 199 198 244 196 195 244 193 191 243 190 188 243 188 186 243 186 184 243 184 182 243
+190 188 243 236 236 251 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+202 200 215 137 131 176 134 128 170 134 128 170 126 124 174 134 128 170 130 124 168 134 128 170
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 165 162 202 245 245 249 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 253 253 254 251 251 254 249 249 254 248 248 253 245 244 253
+242 242 252 240 240 252 237 237 251 235 234 251 233 232 250 230 230 250 228 227 250 225 224 250
+222 222 249 219 218 248 216 216 248 214 214 247 210 209 246 208 208 246 206 205 245 201 201 245
+200 199 244 197 196 244 194 193 243 191 190 243 188 187 243 187 185 243 185 183 243 204 203 245
+249 249 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 251 250 252 165 162 202
+134 128 170 130 124 168 130 124 168 130 124 168 130 124 168 130 124 168 130 124 168 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 151 143 191 231 231 244 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 252 252 254 250 250 254 248 248 253 246 246 253 244 244 253 242 242 252
+240 240 252 237 237 251 235 234 251 234 233 251 230 230 250 228 227 250 225 225 249 223 222 249
+219 218 248 218 217 247 215 214 247 212 211 246 209 208 246 207 206 246 203 203 245 201 200 245
+198 197 244 195 194 243 192 191 243 189 187 243 187 186 243 188 187 243 225 224 250 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 221 219 238 143 137 181 134 128 170
+130 124 168 130 124 168 130 124 168 130 124 168 130 124 168 125 119 163 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 122 118 142 186 183 208 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+253 253 254 251 251 254 249 249 254 248 248 253 246 245 253 243 243 252 242 241 253 239 239 252
+237 237 251 235 234 251 233 232 250 230 230 250 228 227 250 226 225 249 223 223 249 221 220 248
+218 217 247 215 215 247 213 212 247 210 209 246 207 206 246 204 204 245 201 201 245 199 198 244
+196 195 244 193 191 243 190 188 243 188 187 243 204 204 246 245 245 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 251 251 254 202 200 215 131 126 164 130 124 168 130 124 168
+125 119 163 130 124 168 125 119 163 125 119 163 126 124 174 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 143 137 181 221 219 238 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 253 253 254 252 252 254
+250 250 254 248 248 253 248 248 253 245 245 253 243 243 252 241 241 252 239 239 252 237 237 251
+235 234 251 233 232 250 230 230 250 228 227 250 226 225 249 223 223 249 221 220 248 218 218 248
+215 215 247 213 212 247 210 209 246 208 208 246 204 204 245 201 201 245 199 198 244 197 196 244
+194 193 243 191 190 243 193 191 243 228 227 250 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 239 238 247 161 160 183 125 119 163 125 119 163 125 119 163 125 119 163
+125 119 163 125 119 163 125 119 163 107 101 144 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+122 118 142 156 149 199 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 254 254 254 253 253 254 251 251 254 250 250 254 249 249 254
+248 248 253 246 245 253 244 244 253 242 242 252 240 240 252 238 238 252 236 236 251 234 234 251
+233 232 250 230 230 250 228 227 250 226 225 249 223 223 249 221 220 248 218 218 248 215 215 247
+213 213 247 210 209 246 208 208 246 206 205 245 203 203 245 200 199 244 197 196 244 195 194 243
+192 191 243 208 208 246 248 247 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 253 255 202 200 215 130 124 168 125 119 163 125 119 163 125 119 163 125 119 163 125 119 163
+125 119 163 121 115 159 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+143 137 181 186 183 208 254 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 253 253 254 252 252 254 251 251 254 250 250 254 249 249 254 248 248 253 246 246 253
+244 244 253 243 243 252 241 241 252 239 239 252 237 237 251 236 236 251 234 234 251 233 232 250
+230 229 251 228 227 250 225 225 249 223 223 249 221 220 248 218 218 248 216 216 248 213 213 247
+210 209 246 209 208 246 206 205 245 203 203 245 201 200 245 198 197 244 195 194 243 198 197 244
+233 232 250 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 239 238 247
+161 160 183 121 115 159 125 119 163 121 115 159 125 119 163 121 115 159 121 115 159 121 115 159
+121 115 159 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+143 137 181 202 200 215 255 255 255 255 255 255 254 254 254 253 253 254 252 252 254 251 251 254
+251 251 254 250 250 254 249 249 254 248 248 253 248 248 253 246 245 253 244 244 253 243 243 252
+242 241 253 240 240 252 238 238 252 237 236 251 235 234 251 234 233 251 231 231 250 229 229 250
+227 227 249 225 225 249 223 223 249 221 220 248 218 218 248 216 216 248 213 213 247 212 211 246
+209 208 246 207 206 246 204 203 245 201 200 245 199 198 244 196 195 244 219 218 248 250 250 254
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 250 250 254 202 200 215 125 119 163
+121 115 159 121 115 159 121 115 159 121 115 159 120 114 157 120 114 157 121 115 159 116 112 152
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+137 131 176 206 201 229 254 254 254 255 255 255 252 252 254 250 250 254 249 249 254 248 248 253
+248 248 253 248 248 253 246 246 253 245 245 253 244 244 253 243 243 252 242 241 253 240 240 252
+239 239 252 237 237 251 236 236 251 234 234 251 233 232 250 230 230 250 229 228 250 227 227 249
+225 224 250 223 222 249 221 220 248 218 218 248 215 215 247 213 213 247 212 211 246 209 208 246
+207 206 246 204 203 245 201 201 245 199 198 244 210 209 246 243 243 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 221 219 238 142 137 174 121 115 159 120 114 157
+121 115 159 120 114 157 120 114 157 120 114 157 120 114 157 120 114 157 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+137 131 176 202 200 215 255 255 255 255 255 255 252 252 254 246 246 253 246 246 253 245 245 253
+245 244 253 244 244 253 243 243 252 242 242 252 241 241 252 240 240 252 238 238 252 237 237 251
+236 236 251 234 234 251 234 233 251 231 231 250 229 229 250 228 227 250 226 225 249 224 224 249
+222 222 249 219 218 248 218 217 247 215 215 247 213 213 247 212 211 246 209 208 246 207 206 246
+204 203 245 201 201 245 208 208 246 236 236 251 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 254 254 254 245 245 249 165 162 202 120 114 157 120 114 157 120 114 157 116 112 152
+120 114 157 116 112 152 116 112 152 116 112 152 113 109 162 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+137 131 176 186 183 208 254 254 255 255 255 255 253 253 255 243 243 252 243 243 252 242 242 252
+242 241 253 241 241 252 240 240 252 239 239 252 238 238 252 237 237 251 236 236 251 234 234 251
+234 233 251 231 231 250 230 230 250 228 227 250 227 227 249 225 225 249 223 223 249 221 221 248
+219 218 248 218 217 247 215 215 247 213 212 247 210 209 246 209 208 246 207 206 246 204 203 245
+207 206 246 234 233 251 254 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+252 252 253 186 183 208 131 126 164 116 112 152 120 114 157 116 112 152 116 112 152 116 112 152
+114 106 151 114 106 151 116 112 152 114 108 147 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+134 128 170 151 143 191 255 255 255 255 255 255 255 255 255 243 243 253 240 240 252 239 239 252
+238 238 252 237 237 251 237 237 251 236 236 251 235 234 251 234 234 251 233 232 250 231 231 250
+230 230 250 229 228 250 227 227 249 226 225 249 224 224 249 222 222 249 221 220 248 219 218 248
+216 216 248 215 214 247 213 212 247 210 209 246 209 208 246 207 206 246 210 209 246 233 232 250
+253 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 202 200 215
+134 128 170 116 112 152 116 112 152 116 112 152 114 106 151 116 112 152 114 106 151 114 106 151
+114 106 151 114 106 151 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+130 124 168 131 126 164 239 238 247 255 255 255 255 255 255 251 251 254 237 236 251 236 236 251
+235 234 251 235 234 251 234 234 251 234 233 251 233 232 250 231 231 250 230 229 251 229 228 250
+227 227 249 226 225 249 224 224 249 223 222 249 221 221 248 219 218 248 218 217 247 215 215 247
+214 214 247 212 211 246 210 209 246 208 208 246 215 214 247 236 236 251 254 254 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 219 217 229 142 137 174 116 112 152
+116 112 152 114 106 151 114 106 151 114 106 151 114 106 151 114 106 151 114 106 151 114 106 151
+114 106 151 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+130 124 168 130 124 168 186 183 208 255 255 255 255 255 255 255 255 255 245 245 253 234 233 251
+233 232 250 231 231 250 231 231 250 230 230 250 229 229 250 228 227 250 227 227 249 226 225 249
+224 224 249 223 223 249 221 221 248 219 218 248 218 218 248 216 216 248 215 214 247 213 212 247
+212 211 246 212 211 246 225 225 249 244 244 253 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 219 217 229 142 137 174 114 106 151 114 106 151 114 106 151
+114 106 151 114 106 151 114 106 151 114 106 151 110 102 146 110 102 146 110 102 146 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+125 119 163 125 119 163 134 128 170 245 245 249 255 255 255 255 255 255 255 255 255 245 245 253
+230 230 250 229 228 250 228 227 250 227 227 249 226 225 249 225 225 249 224 224 249 223 222 249
+221 221 248 219 218 248 219 218 248 218 217 247 215 215 247 213 213 247 216 216 248 229 228 250
+242 241 253 253 253 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 219 217 229 142 137 174 114 108 147 114 106 151 114 106 151 110 102 146 110 102 146
+110 102 146 110 102 146 107 101 144 107 101 144 110 102 146 110 102 146 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+125 119 163 125 119 163 121 115 159 186 183 208 255 255 255 255 255 255 255 255 255 255 255 255
+252 252 254 240 240 252 230 230 250 225 225 249 223 223 249 222 222 249 221 220 248 219 218 248
+218 218 248 219 218 248 224 224 249 229 229 250 238 238 252 246 246 253 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 251 250 252 186 183 208
+131 126 164 110 102 146 110 102 146 110 102 146 107 101 144 110 102 146 110 102 146 110 102 146
+107 101 144 107 101 144 107 101 144 107 101 144 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+114 108 147 120 114 157 121 115 159 121 115 159 219 217 229 254 254 254 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 254 254 255 250 250 254 249 249 254 248 247 254 250 250 254
+253 252 254 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 231 231 244 161 160 183 120 114 157 114 108 147
+110 102 146 110 102 146 107 101 144 110 102 146 107 101 144 107 101 144 107 101 144 105 100 143
+107 101 144 105 99 142 88 84 126 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 120 114 157 120 114 157 120 114 157 121 115 159 219 217 229 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+254 254 254 231 231 244 186 183 208 131 126 164 107 101 144 110 102 146 107 101 144 107 101 144
+107 101 144 107 101 144 107 101 144 105 100 143 105 99 142 105 99 142 105 99 142 104 98 141
+104 97 144 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 113 109 162 116 112 152 116 112 152 116 112 152 120 114 157 186 183 208 251 250 252
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
+255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 245 245 249 206 201 229 165 162 202
+142 137 174 105 99 142 105 99 142 105 99 142 107 101 144 105 100 143 105 99 142 105 99 142
+104 98 141 104 98 141 104 98 141 102 97 140 102 97 140 102 97 140 100 95 138 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 114 106 151 114 106 151 114 106 151 114 106 151 114 108 147 121 115 159
+165 162 202 219 217 229 245 245 249 255 255 255 255 255 255 255 255 255 255 255 255 252 252 253
+231 231 244 219 217 229 186 183 208 161 160 183 134 128 170 114 108 147 107 101 144 107 101 144
+104 98 141 105 99 142 105 99 142 104 98 141 104 98 141 102 97 140 102 97 140 102 97 140
+102 97 140 102 97 140 100 95 138 100 95 138 102 95 138 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 114 106 151 114 106 151 110 102 146 114 108 147 110 102 146
+114 108 147 110 102 146 110 102 146 116 112 152 125 119 163 120 114 157 116 112 152 107 101 144
+107 101 144 105 99 142 107 101 144 107 101 144 105 99 142 104 98 141 104 98 141 104 98 141
+104 98 141 102 95 138 102 97 140 102 97 140 102 97 140 100 95 138 102 95 138 100 95 138
+100 95 138 100 95 138 100 95 138 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 110 102 146 110 102 146 110 102 146 107 101 144
+107 101 144 107 101 144 107 101 144 107 101 144 105 99 142 107 101 144 105 99 142 105 99 142
+104 98 141 104 98 141 104 98 141 102 97 140 102 97 140 102 95 138 102 95 138 102 95 138
+102 95 138 100 95 138 100 95 138 100 95 138 100 95 138 100 95 138 100 95 138 97 91 133
+100 95 138 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 104 97 144 104 97 144 104 97 144
+105 100 143 105 99 142 105 99 142 104 98 141 104 98 141 102 97 140 102 97 140 102 97 140
+102 97 140 102 97 140 100 95 138 102 95 138 100 95 138 100 95 138 100 95 138 97 94 132
+97 91 133 100 95 138 97 91 133 97 91 133 97 91 133 97 91 133 97 91 133 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 104 97 144 102 97 140
+102 97 140 102 97 140 102 97 140 102 97 140 102 97 140 100 95 138 100 95 138 100 95 138
+100 95 138 100 95 138 97 91 133 100 95 138 97 91 133 97 91 133 97 91 133 97 91 133
+97 91 133 97 91 133 97 91 133 95 88 132 100 85 132 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 88 84 126
+102 97 140 100 95 138 100 95 138 100 95 138 97 91 133 100 95 138 97 91 133 97 91 133
+97 91 133 97 91 133 97 91 133 97 91 133 97 91 133 97 91 133 95 88 132 95 88 132
+95 88 132 97 94 132 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 95 88 132 97 91 133 97 91 133 97 91 133 97 91 133 97 91 133
+95 88 132 95 88 132 95 88 132 95 88 132 95 88 132 95 88 132 72 70 123 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/drivers/video/logo/logo_oldzen_clut224.ppm b/drivers/video/logo/logo_oldzen_clut224.ppm
new file mode 100644
index 000000000..d16b347e3
--- /dev/null
+++ b/drivers/video/logo/logo_oldzen_clut224.ppm
@@ -0,0 +1,882 @@
+P3
+80 80
+255
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 43 56 0 47 62 0 2 3 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 28 37 105 174 196 105 174 196 0 61 80 0 2 3
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 20 27 61 146 173 167 209 223 167 209 223 124 186 205 0 96 126
+0 5 6 0 2 3 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 31 41 79 158 183 152 201 217 174 213 226 167 209 223 159 205 220 157 204 219
+27 122 151 0 19 25 0 9 11 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 31 41 65 149 175 152 201 217 162 207 221 167 209 223 164 208 222 167 209 223 172 212 225
+145 197 214 11 108 138 0 17 22 0 11 14 0 4 5 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 2 0 19 25 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 8 10
+70 152 178 159 205 220 164 208 222 164 208 222 169 211 224 164 208 222 164 208 222 169 211 224
+162 207 221 118 182 202 0 61 80 0 23 31 0 18 24 0 4 5 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 2 0 47 62 0 16 21 0 2 3 0 4 6 0 3 4 0 0 0 0 3 4
+33 126 155 169 211 224 172 212 225 179 216 228 167 209 223 172 212 225 164 208 222 179 216 228
+162 207 221 50 138 166 0 45 59 0 84 111 0 51 67 0 16 21 0 2 3 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 67 88 14 111 141 0 10 13 0 11 14 0 13 17 0 2 3 0 0 0
+0 5 7 16 112 142 135 192 210 164 208 222 157 204 219 162 207 221 167 209 223 159 205 220
+89 164 188 0 57 75 19 115 145 85 162 186 14 111 141 0 41 54 0 11 15 0 0 0
+0 8 11 0 83 109 0 31 40 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 40 52 75 156 181 5 103 133 0 15 20 0 16 22 0 10 13 0 0 0
+0 0 0 0 9 12 13 110 140 133 191 209 167 209 223 169 211 224 172 212 225 103 173 195
+0 62 81 0 96 126 145 197 214 140 195 212 27 122 151 0 48 63 0 7 10 0 9 11
+5 103 133 140 195 212 61 146 173 0 21 28 0 1 1 0 1 2 0 1 2 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 10 13 51 139 167 131 189 208 0 90 118 0 22 29 0 13 17 0 5 6
+0 0 0 0 0 1 0 11 15 0 95 124 140 195 212 169 211 224 97 169 192 0 69 91
+0 84 111 159 205 220 179 216 228 95 168 191 0 75 98 0 21 28 0 26 34 19 115 145
+138 193 211 164 208 222 154 203 218 39 130 159 0 10 13 0 8 10 0 6 8 0 2 2
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 2 0 87 114 140 195 212 131 189 208 25 119 149 0 29 38 0 7 10
+0 1 2 0 0 1 0 2 3 0 7 9 0 98 128 53 141 168 0 64 84 0 69 91
+152 201 217 177 215 227 56 143 170 0 69 91 0 23 30 0 62 81 68 151 177 149 200 216
+164 208 222 164 208 222 169 211 224 147 199 215 7 105 135 0 19 25 0 20 26 0 7 10
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 13 18 81 159 184 167 209 223 172 212 225 107 175 197 0 89 116
+0 18 23 0 4 5 0 1 2 0 2 3 0 4 6 0 14 19 0 40 52 26 120 150
+16 112 142 0 63 83 0 40 52 0 67 88 68 151 177 149 200 216 167 209 223 164 208 222
+177 215 227 174 213 226 167 209 223 172 212 225 145 197 214 10 107 137 0 26 35 0 21 28
+0 5 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 2
+0 2 3 0 4 5 0 5 6 0 3 4 0 1 2 0 1 1 0 0 0 0 0 0
+0 0 0 0 0 0 0 1 1 0 55 72 138 193 211 159 205 220 167 209 223 182 218 229
+58 144 171 0 86 112 0 35 46 0 14 19 0 10 13 0 14 19 0 24 32 0 39 51
+0 52 69 0 92 120 61 146 173 129 188 207 167 209 223 167 209 223 169 211 224 167 209 223
+177 215 227 167 209 223 164 208 222 172 212 225 164 208 222 133 191 209 0 77 101 0 25 33
+0 20 26 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 1 2 0 8 11 0 26 35 0 56 73 0 82 107
+3 101 131 26 120 150 32 125 154 7 105 135 0 92 120 0 75 98 0 40 52 0 13 18
+0 1 1 0 0 0 0 0 0 0 3 4 17 113 143 159 205 220 167 209 223 174 213 226
+167 209 223 147 199 215 99 170 193 61 146 173 58 144 171 56 143 170 63 147 174 68 151 177
+99 170 193 140 195 212 162 207 221 164 208 222 169 211 224 167 209 223 174 213 226 167 209 223
+172 212 225 167 209 223 169 211 224 164 208 222 164 208 222 113 179 200 0 72 94 0 46 60
+0 52 69 0 15 20 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 2 0 21 28 0 86 112 66 150 176 120 183 203 129 188 207 140 195 212
+152 201 217 162 207 221 164 208 222 152 201 217 142 196 213 135 192 210 122 184 204 105 174 196
+0 83 109 0 12 15 0 0 0 0 1 1 0 14 18 46 136 164 177 215 227 157 204 219
+167 209 223 169 211 224 169 211 224 164 208 222 167 209 223 169 211 224 174 213 226 174 213 226
+174 213 226 172 212 225 164 208 222 167 209 223 172 212 225 174 213 226 172 212 225 164 208 222
+174 213 226 169 211 224 169 211 224 162 207 221 91 165 189 0 79 103 0 64 84 36 128 157
+9 106 136 0 42 55 0 10 13 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5
+0 48 63 40 131 160 142 196 213 172 212 225 172 212 225 164 208 222 169 211 224 177 215 227
+169 211 224 169 211 224 169 211 224 167 209 223 167 209 223 167 209 223 162 207 221 157 204 219
+164 208 222 89 164 188 0 51 67 0 4 6 0 5 7 0 18 23 23 118 148 135 192 210
+167 209 223 162 207 221 169 211 224 174 213 226 167 209 223 174 213 226 164 208 222 167 209 223
+164 208 222 162 207 221 169 211 224 172 212 225 162 207 221 169 211 224 174 213 226 169 211 224
+162 207 221 167 209 223 140 195 212 27 122 151 0 49 64 0 87 114 120 183 203 149 200 216
+42 133 161 0 66 86 0 19 25 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5 0 37 48 27 122 151
+129 188 207 172 212 225 164 208 222 169 211 224 172 212 225 167 209 223 169 211 224 172 212 225
+167 209 223 172 212 225 174 213 226 177 215 227 172 212 225 169 211 224 167 209 223 169 211 224
+167 209 223 164 208 222 105 174 196 0 51 67 0 11 14 0 10 13 0 17 22 0 93 122
+118 182 202 169 211 224 174 213 226 164 208 222 174 213 226 164 208 222 169 211 224 167 209 223
+169 211 224 169 211 224 167 209 223 167 209 223 169 211 224 169 211 224 162 207 221 174 213 226
+118 182 202 55 142 169 0 80 105 0 49 64 51 139 167 140 195 212 154 203 218 91 165 189
+16 112 142 0 49 64 0 13 18 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 8 10 0 95 124 103 173 195 154 203 218
+172 212 225 162 207 221 172 212 225 172 212 225 169 211 224 164 208 222 174 213 226 177 215 227
+172 212 225 172 212 225 174 213 226 174 213 226 172 212 225 174 213 226 172 212 225 169 211 224
+172 212 225 169 211 224 174 213 226 77 157 182 0 24 32 0 23 31 0 18 24 0 16 21
+0 69 91 61 146 173 149 200 216 169 211 224 164 208 222 169 211 224 164 208 222 164 208 222
+169 211 224 169 211 224 164 208 222 169 211 224 169 211 224 152 201 217 91 165 189 37 129 158
+0 73 96 0 57 75 2 100 130 75 156 181 184 219 230 124 186 205 66 150 176 13 110 140
+0 54 70 0 20 26 0 5 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 52 69 145 197 214 164 208 222 169 211 224
+174 213 226 164 208 222 172 212 225 172 212 225 174 213 226 169 211 224 172 212 225 167 209 223
+174 213 226 174 213 226 172 212 225 174 213 226 167 209 223 169 211 224 172 212 225 174 213 226
+177 215 227 174 213 226 174 213 226 164 208 222 0 89 116 0 30 39 0 41 54 0 22 29
+0 16 21 0 26 35 0 64 84 25 119 149 91 165 189 140 195 212 169 211 224 169 211 224
+172 212 225 159 205 220 124 186 205 70 152 178 21 116 146 0 75 98 0 50 66 0 50 66
+4 102 132 85 162 186 147 199 215 131 189 208 83 161 185 37 129 158 0 86 112 0 45 59
+0 14 18 0 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 14 18 60 145 172 167 209 223 167 209 223
+174 213 226 174 213 226 172 212 225 164 208 222 162 207 221 169 211 224 172 212 225 169 211 224
+172 212 225 162 207 221 169 211 224 169 211 224 174 213 226 174 213 226 172 212 225 157 204 219
+162 207 221 172 212 225 157 204 219 169 211 224 103 173 195 0 45 59 0 54 70 0 41 54
+0 20 26 0 14 19 0 14 19 0 19 25 0 28 37 0 47 62 0 58 76 0 63 83
+0 64 84 0 55 72 0 41 54 0 37 48 0 38 50 0 58 76 0 76 100 5 103 133
+58 144 171 70 152 178 48 137 165 27 122 151 0 86 112 0 44 58 0 25 33 0 9 12
+0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 72 94 140 195 212 177 215 227
+164 208 222 174 213 226 172 212 225 169 211 224 169 211 224 169 211 224 172 212 225 167 209 223
+142 196 213 113 179 200 103 173 195 91 165 189 103 173 195 131 189 208 162 207 221 172 212 225
+164 208 222 167 209 223 164 208 222 162 207 221 131 189 208 0 63 83 0 66 86 0 68 89
+0 20 27 0 11 14 0 12 15 0 22 29 0 38 50 0 50 66 0 55 72 0 57 75
+0 73 96 0 69 91 0 54 70 0 44 58 0 50 66 0 56 73 0 57 75 0 59 78
+0 49 64 0 40 52 0 39 51 0 37 48 0 23 30 0 10 13 0 4 6 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 23 74 155 180 172 212 225
+169 211 224 164 208 222 172 212 225 174 213 226 157 204 219 174 213 226 138 193 211 63 147 174
+5 103 133 0 64 84 0 57 75 0 57 75 0 66 86 0 86 112 32 125 154 120 183 203
+169 211 224 162 207 221 167 209 223 169 211 224 126 187 206 0 76 100 0 77 101 5 103 133
+0 31 41 0 8 11 0 3 4 0 5 7 0 16 21 0 31 40 0 54 70 0 75 98
+0 64 84 0 49 64 0 67 88 17 113 143 66 150 176 87 163 187 103 173 195 109 177 198
+77 157 182 51 139 167 0 87 114 0 28 37 0 5 7 0 1 2 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 75 98 129 188 207
+169 211 224 164 208 222 167 209 223 164 208 222 159 205 220 46 136 164 0 72 94 0 44 58
+0 64 84 6 104 134 36 128 157 42 133 161 11 108 138 0 84 111 0 49 64 0 62 81
+93 167 190 174 213 226 167 209 223 167 209 223 138 193 211 0 77 101 0 87 114 39 130 159
+0 57 75 0 14 19 0 2 2 0 0 0 0 2 2 0 6 9 0 11 14 0 10 13
+0 20 27 4 102 132 109 177 198 154 203 218 159 205 220 169 211 224 167 209 223 172 212 225
+174 213 226 159 205 220 138 193 211 89 164 188 0 57 75 0 5 7 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 13 53 141 168
+164 208 222 162 207 221 135 192 210 45 135 163 0 56 73 0 54 70 9 106 136 81 159 184
+169 211 224 133 191 209 83 161 185 53 141 168 29 123 152 9 106 136 1 99 129 0 48 63
+0 48 63 87 163 187 167 209 223 169 211 224 138 193 211 0 89 116 0 95 124 66 150 176
+0 77 101 0 20 26 0 2 3 0 0 0 0 0 0 0 0 0 0 1 2 0 21 28
+63 147 174 145 197 214 167 209 223 167 209 223 164 208 222 174 213 226 169 211 224 169 211 224
+177 215 227 177 215 227 169 211 224 177 215 227 147 199 215 40 131 160 0 26 34 0 4 6
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 60
+147 199 215 66 150 176 0 83 109 0 49 64 4 102 132 103 173 195 162 207 221 120 183 203
+75 156 181 29 123 152 0 98 128 0 67 88 0 47 62 0 38 50 0 34 45 0 31 41
+0 12 16 16 112 142 164 208 222 172 212 225 133 191 209 0 80 105 0 95 124 89 164 188
+0 93 122 0 26 34 0 4 5 0 0 0 0 0 0 0 0 0 0 23 31 51 139 167
+164 208 222 167 209 223 167 209 223 169 211 224 169 211 224 169 211 224 167 209 223 172 212 225
+159 205 220 164 208 222 167 209 223 174 213 226 159 205 220 157 204 219 65 149 175 0 34 45
+0 5 6 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6
+0 84 111 0 35 46 0 43 56 23 118 148 145 197 214 135 192 210 83 161 185 32 125 154
+0 87 114 0 52 69 0 32 42 0 18 24 0 10 13 0 6 8 0 6 9 0 5 7
+0 4 6 0 80 105 152 201 217 159 205 220 129 188 207 0 68 89 14 111 141 99 170 193
+0 95 124 0 29 38 0 5 7 0 0 0 0 0 0 0 16 21 39 130 159 154 203 218
+172 212 225 169 211 224 169 211 224 174 213 226 167 209 223 167 209 223 167 209 223 172 212 225
+164 208 222 107 175 197 81 159 184 83 161 185 122 184 204 164 208 222 169 211 224 66 150 176
+0 23 31 0 10 13 0 5 7 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 3 4 0 11 14 0 58 76 36 128 157 85 162 186 43 134 162 3 101 131 0 58 76
+0 28 37 0 14 19 0 5 7 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 1 0 82 107 154 203 218 167 209 223 91 165 189 0 47 62 61 146 173 113 179 200
+0 90 118 0 27 36 0 4 5 0 0 0 0 3 4 9 106 136 149 200 216 164 208 222
+164 208 222 172 212 225 174 213 226 169 211 224 169 211 224 177 215 227 169 211 224 97 169 192
+0 89 116 0 46 60 0 46 60 0 43 56 0 62 81 27 122 151 133 191 209 159 205 220
+22 117 147 0 22 29 0 17 22 0 6 8 0 0 1 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 7 9 0 30 39 0 69 91 0 93 122 0 57 75 0 26 35 0 10 13
+0 2 3 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 5 7 27 122 151 164 208 222 164 208 222 7 105 135 0 51 67 97 169 192 93 167 190
+0 87 114 0 23 30 0 3 4 0 0 0 0 72 94 140 195 212 162 207 221 172 212 225
+164 208 222 167 209 223 169 211 224 169 211 224 172 212 225 154 203 218 45 135 163 0 55 72
+0 80 105 29 123 152 61 146 173 48 137 165 0 98 128 0 43 56 0 84 111 113 179 200
+135 192 210 0 87 114 0 25 33 0 26 34 0 5 7 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 10 13 0 23 31 0 28 37 0 15 20 0 4 6 0 1 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2
+0 62 81 115 180 201 159 205 220 118 182 202 0 39 51 0 83 109 115 180 201 79 158 183
+0 69 91 0 18 24 0 1 2 0 12 16 87 163 187 167 209 223 169 211 224 167 209 223
+172 212 225 167 209 223 167 209 223 167 209 223 167 209 223 55 142 169 0 64 84 22 117 147
+157 204 219 157 204 219 101 172 194 53 141 168 19 115 145 0 75 98 0 29 38 14 111 141
+162 207 221 65 149 175 0 44 58 0 44 58 0 20 27 0 3 4 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 2 3 0 5 6 0 4 5 0 1 2 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 18 23 0 83 109
+109 177 198 167 209 223 145 197 214 13 110 140 0 24 32 29 123 152 115 180 201 56 143 170
+0 45 59 0 11 14 0 3 4 0 75 98 159 205 220 167 209 223 169 211 224 172 212 225
+169 211 224 169 211 224 172 212 225 174 213 226 85 162 186 0 64 84 19 115 145 164 208 222
+140 195 212 75 156 181 21 116 146 0 73 96 0 43 56 0 32 42 0 12 16 0 28 37
+124 186 205 131 189 208 0 87 114 0 38 50 0 51 67 0 14 18 0 2 2 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 14 19 0 26 34 0 8 10 0 1 2
+0 1 1 0 1 1 0 2 3 0 9 11 0 31 41 1 99 129 74 155 180 142 196 213
+164 208 222 169 211 224 63 147 174 0 28 37 0 57 75 66 150 176 95 168 191 19 115 145
+0 30 39 0 5 7 0 17 22 74 155 180 162 207 221 167 209 223 174 213 226 172 212 225
+172 212 225 167 209 223 169 211 224 169 211 224 0 87 114 0 86 112 140 195 212 159 205 220
+66 150 176 4 102 132 0 55 72 0 23 31 0 9 12 0 5 7 0 3 4 0 4 5
+26 120 150 164 208 222 43 134 162 0 28 37 0 76 100 0 32 42 0 7 9 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 21 28 87 163 187 111 178 199 61 146 173
+46 136 164 46 136 164 70 152 178 118 182 202 174 213 226 167 209 223 167 209 223 167 209 223
+159 205 220 74 155 180 0 40 52 0 37 48 33 126 155 85 162 186 43 134 162 0 69 91
+0 13 18 0 6 8 0 95 124 138 193 211 164 208 222 162 207 221 172 212 225 162 207 221
+169 211 224 169 211 224 162 207 221 51 139 167 0 46 60 91 165 189 162 207 221 95 168 191
+1 99 129 0 42 55 0 13 17 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0
+0 47 62 126 187 206 111 178 199 0 51 67 0 63 83 0 71 93 0 20 26 0 2 2
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 25 119 149 159 205 220 169 211 224
+172 212 225 172 212 225 164 208 222 167 209 223 172 212 225 172 212 225 167 209 223 162 207 221
+63 147 174 0 54 70 0 47 62 19 115 145 81 159 184 61 146 173 0 95 124 0 34 45
+0 4 5 0 29 38 77 157 182 157 204 219 172 212 225 167 209 223 164 208 222 162 207 221
+174 213 226 164 208 222 113 179 200 0 61 80 18 114 144 184 219 230 122 184 204 29 123 152
+0 49 64 0 14 19 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 15 20 77 157 182 145 197 214 0 93 122 0 47 62 4 102 132 0 38 50 0 6 9
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 68 89 138 193 211 169 211 224
+179 216 228 172 212 225 164 208 222 174 213 226 169 211 224 174 213 226 124 186 205 39 130 159
+0 48 63 0 67 88 53 141 168 103 173 195 83 161 185 16 112 142 0 48 63 0 10 13
+0 5 7 12 109 139 149 200 216 169 211 224 174 213 226 169 211 224 172 212 225 174 213 226
+167 209 223 149 200 216 18 114 144 0 55 72 159 205 220 147 199 215 68 151 177 0 80 105
+0 21 28 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 6 8 46 136 164 162 207 221 39 130 159 0 33 44 11 108 138 0 61 80 0 14 18
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 25 33 95 168 191 164 208 222
+172 212 225 162 207 221 169 211 224 167 209 223 162 207 221 172 212 225 55 142 169 0 29 38
+0 44 58 46 136 164 129 188 207 93 167 190 16 112 142 0 55 72 0 14 19 0 2 2
+0 64 84 118 182 202 172 212 225 174 213 226 169 211 224 172 212 225 167 209 223 167 209 223
+159 205 220 85 162 186 0 57 75 42 133 161 190 222 232 111 178 199 19 115 145 0 43 56
+0 10 13 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 4 5 33 126 155 162 207 221 83 161 185 0 32 42 2 100 130 0 84 111 0 23 31
+0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 5 7 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 8 43 134 162 167 209 223
+174 213 226 169 211 224 164 208 222 169 211 224 177 215 227 164 208 222 126 187 206 48 137 165
+0 82 107 0 42 55 0 59 78 0 76 100 0 39 51 0 13 18 0 7 10 0 69 91
+138 193 211 167 209 223 164 208 222 169 211 224 179 216 228 167 209 223 177 215 227 167 209 223
+131 189 208 0 75 98 0 92 120 152 201 217 122 184 204 39 130 159 0 63 83 0 18 24
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 2 3 34 127 156 167 209 223 131 189 208 0 42 55 0 80 105 13 110 140 0 38 50
+0 5 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 8 0 18 24 0 0 0
+0 1 2 0 3 4 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 86 112 145 197 214
+167 209 223 167 209 223 169 211 224 169 211 224 167 209 223 164 208 222 164 208 222 164 208 222
+152 201 217 61 146 173 0 86 112 0 43 56 0 25 33 0 40 52 12 109 139 122 184 204
+167 209 223 167 209 223 174 213 226 162 207 221 174 213 226 169 211 224 167 209 223 149 200 216
+12 109 139 0 58 76 83 161 185 157 204 219 75 156 181 0 90 118 0 33 44 0 7 9
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 3 4 33 126 155 172 212 225 162 207 221 0 50 66 0 73 96 30 124 153 0 55 72
+0 9 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 14 0 40 52 0 1 2
+0 6 8 0 9 12 0 7 10 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 36 47 126 187 206
+169 211 224 174 213 226 169 211 224 172 212 225 174 213 226 169 211 224 169 211 224 164 208 222
+174 213 226 169 211 224 135 192 210 83 161 185 75 156 181 101 172 194 142 196 213 172 212 225
+167 209 223 174 213 226 167 209 223 169 211 224 169 211 224 172 212 225 133 191 209 21 116 146
+0 52 69 56 143 170 164 208 222 120 183 203 19 115 145 0 48 63 0 13 18 0 2 2
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 4 5 37 129 158 167 209 223 162 207 221 0 61 80 0 79 103 46 136 164 0 71 93
+0 13 18 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 11 0 61 80 0 6 8
+0 10 13 0 20 26 0 14 19 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 11 65 149 175
+174 213 226 157 204 219 167 209 223 177 215 227 172 212 225 164 208 222 164 208 222 167 209 223
+162 207 221 169 211 224 159 205 220 179 216 228 159 205 220 157 204 219 149 200 216 164 208 222
+169 211 224 159 205 220 167 209 223 174 213 226 157 204 219 66 150 176 0 82 107 0 50 66
+66 150 176 164 208 222 142 196 213 60 145 172 0 75 98 0 22 29 0 5 6 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 6 9 51 139 167 167 209 223 172 212 225 0 84 111 0 64 84 51 139 167 0 84 111
+0 18 23 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 0 95 124 0 23 31
+0 10 13 0 29 38 0 24 32 0 6 8 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 64 84
+21 116 146 0 76 100 0 61 80 0 54 70 0 56 73 0 77 101 23 118 148 113 179 200
+157 204 219 172 212 225 169 211 224 167 209 223 167 209 223 91 165 189 0 61 80 0 55 72
+0 82 107 0 95 124 0 89 116 0 62 81 0 45 59 0 43 56 0 95 124 131 189 208
+167 209 223 118 182 202 45 135 163 0 83 109 0 27 36 0 5 7 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 23 30 95 168 191 167 209 223 174 213 226 3 101 131 0 45 59 46 136 164 2 100 130
+0 23 30 0 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5 23 118 148 0 61 80
+0 9 11 0 36 47 0 32 42 0 6 9 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4
+0 6 8 0 11 14 0 39 51 0 82 107 3 101 131 0 80 105 0 55 72 0 55 72
+34 127 156 131 189 208 169 211 224 169 211 224 167 209 223 105 174 196 0 61 80 0 33 44
+0 63 83 0 54 70 0 66 86 0 95 124 0 84 111 29 123 152 109 177 198 118 182 202
+70 152 178 25 119 149 0 76 100 0 33 44 0 7 10 0 1 1 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 61 80 152 201 217 169 211 224 164 208 222 3 101 131 0 48 63 53 141 168 10 107 137
+0 27 36 0 4 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 21 116 146 25 119 149
+0 9 12 0 37 48 0 41 54 0 13 18 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 1 0 16 21 0 62 81 23 118 148 45 135 163 36 128 157 0 98 128 0 58 76
+0 38 50 3 101 131 129 188 207 172 212 225 167 209 223 154 203 218 29 123 152 0 48 63
+74 155 180 46 136 164 36 128 157 30 124 153 0 48 63 0 55 72 12 109 139 13 110 140
+0 84 111 0 51 67 0 23 30 0 7 10 0 1 1 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2
+32 125 154 169 211 224 169 211 224 169 211 224 9 106 136 0 48 63 60 145 172 16 112 142
+0 31 40 0 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 82 107 85 162 186
+0 24 32 0 28 37 0 50 66 0 25 33 0 2 2 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 6 8 0 26 34 0 52 69 0 66 86 0 56 73 0 45 59 0 41 54
+0 34 45 0 23 31 5 103 133 152 201 217 174 213 226 172 212 225 111 178 199 0 50 66
+18 114 144 33 126 155 0 98 128 0 52 69 0 19 25 0 23 31 0 33 44 0 41 54
+0 25 33 0 14 18 0 5 6 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 27
+120 183 203 172 212 225 169 211 224 167 209 223 4 102 132 0 56 73 72 153 179 21 116 146
+0 34 45 0 6 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 63 126 187 206
+0 89 116 0 23 31 0 59 78 0 34 45 0 5 7 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 2 0 6 8 0 12 16 0 12 16 0 7 9 0 6 8 0 6 8
+0 6 8 0 7 9 0 20 27 93 167 190 172 212 225 172 212 225 167 209 223 7 105 135
+0 56 73 16 112 142 0 59 78 0 12 15 0 29 38 0 29 38 0 8 10 0 12 15
+0 9 12 0 4 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 8 19 115 145
+167 209 223 172 212 225 172 212 225 167 209 223 0 82 107 0 92 120 107 175 197 29 123 152
+0 38 50 0 7 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 23 31 105 174 196
+72 153 179 0 36 47 0 54 70 0 54 70 0 11 15 0 1 2 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 3 4 16 112 142 162 207 221 172 212 225 169 211 224 53 141 168
+0 40 52 6 104 134 0 55 72 0 8 11 0 36 47 0 66 86 0 3 4 0 7 10
+0 9 11 0 5 6 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 0 63 83 131 189 208
+167 209 223 172 212 225 172 212 225 164 208 222 0 63 83 7 105 135 131 189 208 26 120 150
+0 37 48 0 7 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 13 77 157 182
+135 192 210 0 90 118 0 40 52 0 75 98 0 21 28 0 4 5 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 1 1 0 90 118 157 204 219 172 212 225 169 211 224 97 169 192
+0 41 54 3 101 131 0 73 96 0 11 15 0 46 60 45 135 163 0 15 20 0 10 13
+0 16 22 0 11 15 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 55 72 93 167 190 167 209 223
+172 212 225 164 208 222 167 209 223 135 192 210 0 48 63 32 125 154 126 187 206 17 113 143
+0 32 42 0 6 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 17 113 143
+154 203 218 79 158 183 0 47 62 0 59 78 0 39 51 0 7 10 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 86 112 162 207 221 167 209 223 169 211 224 149 200 216
+0 52 69 0 90 118 5 103 133 0 17 22 0 52 69 118 182 202 0 90 118 0 13 18
+0 31 41 0 20 26 0 4 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 1 0 62 81 97 169 192 167 209 223 174 213 226
+172 212 225 179 216 228 164 208 222 77 157 182 0 41 54 65 149 175 109 177 198 7 105 135
+0 27 36 0 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 43 56
+159 205 220 147 199 215 40 131 160 0 36 47 0 59 78 0 23 31 0 2 3 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 1 1 4 102 132 167 209 223 164 208 222 172 212 225 174 213 226
+0 68 89 0 75 98 26 120 150 0 26 35 0 49 64 135 192 210 126 187 206 10 107 137
+0 23 30 0 14 18 0 6 9 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 13 17 7 105 135 149 200 216 164 208 222 167 209 223 177 215 227
+167 209 223 164 208 222 162 207 221 17 113 143 0 61 80 138 193 211 87 163 187 0 89 116
+0 19 25 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 13
+95 168 191 172 212 225 135 192 210 0 98 128 0 31 40 0 35 46 0 11 15 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 7 9 58 144 171 167 209 223 174 213 226 169 211 224 172 212 225
+0 79 103 0 69 91 43 134 162 0 43 56 0 32 42 99 170 193 167 209 223 162 207 221
+45 135 163 0 69 91 0 21 28 0 7 10 0 3 4 0 4 5 0 6 8 0 8 11
+0 31 40 0 83 109 68 151 177 154 203 218 162 207 221 167 209 223 172 212 225 172 212 225
+172 212 225 167 209 223 142 196 213 0 72 94 9 106 136 179 216 228 65 149 175 0 72 94
+0 13 18 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+19 115 145 167 209 223 167 209 223 120 183 203 0 50 66 0 28 37 0 23 30 0 5 6
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 1 0 40 52 120 183 203 164 208 222 169 211 224 174 213 226 169 211 224
+0 82 107 0 77 101 61 146 173 0 67 88 0 18 24 58 144 171 167 209 223 167 209 223
+157 204 219 122 184 204 72 153 179 48 137 165 36 128 157 37 129 158 42 133 161 56 143 170
+99 170 193 147 199 215 174 213 226 172 212 225 169 211 224 169 211 224 172 212 225 164 208 222
+167 209 223 167 209 223 61 146 173 0 39 51 81 159 184 140 195 212 39 130 159 0 54 70
+0 8 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 47 62 133 191 209 172 212 225 169 211 224 91 165 189 0 44 58 0 23 31 0 12 16
+0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 9 11 25 119 149 154 203 218 159 205 220 162 207 221 174 213 226 177 215 227
+0 76 100 0 77 101 75 156 181 1 99 129 0 13 17 19 115 145 174 213 226 162 207 221
+162 207 221 169 211 224 172 212 225 167 209 223 167 209 223 167 209 223 167 209 223 169 211 224
+167 209 223 167 209 223 169 211 224 167 209 223 172 212 225 167 209 223 167 209 223 172 212 225
+174 213 226 145 197 214 2 100 130 0 64 84 198 226 235 109 177 198 14 111 141 0 40 52
+0 4 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 6 8 37 129 158 167 209 223 164 208 222 172 212 225 99 170 193 0 55 72 0 20 26
+0 6 8 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 6 9 7 105 135 140 195 212 164 208 222 177 215 227 172 212 225 159 205 220 174 213 226
+0 72 94 3 101 131 105 174 196 6 104 134 0 15 20 0 55 72 113 179 200 172 212 225
+172 212 225 169 211 224 169 211 224 169 211 224 172 212 225 174 213 226 174 213 226 174 213 226
+174 213 226 174 213 226 164 208 222 169 211 224 172 212 225 174 213 226 164 208 222 169 211 224
+162 207 221 65 149 175 0 48 63 75 156 181 164 208 222 68 151 177 0 79 103 0 23 30
+0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 1 0 47 62 107 175 197 169 211 224 164 208 222 177 215 227 99 170 193 0 76 100
+0 14 18 0 4 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 9
+5 103 133 120 183 203 174 213 226 167 209 223 164 208 222 167 209 223 172 212 225 157 204 219
+0 54 70 27 122 151 124 186 205 6 104 134 0 23 30 0 15 20 48 137 165 152 201 217
+172 212 225 172 212 225 172 212 225 169 211 224 172 212 225 177 215 227 174 213 226 174 213 226
+174 213 226 172 212 225 169 211 224 169 211 224 164 208 222 169 211 224 169 211 224 174 213 226
+113 179 200 0 75 98 9 106 136 179 216 228 113 179 200 30 124 153 0 48 63 0 11 15
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 6 9 22 117 147 162 207 221 157 204 219 169 211 224 172 212 225 122 184 204
+37 129 158 0 34 45 0 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 0 31 41 21 116 146
+142 196 213 172 212 225 172 212 225 177 215 227 164 208 222 169 211 224 164 208 222 115 180 201
+0 45 59 48 137 165 113 179 200 6 104 134 0 23 30 0 5 6 0 55 72 133 191 209
+162 207 221 159 205 220 169 211 224 169 211 224 172 212 225 174 213 226 172 212 225 172 212 225
+174 213 226 172 212 225 167 209 223 174 213 226 157 204 219 174 213 226 169 211 224 147 199 215
+0 93 122 0 75 98 118 182 202 169 211 224 66 150 176 0 87 114 0 27 36 0 5 7
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 32 42 99 170 193 174 213 226 169 211 224 164 208 222 164 208 222
+152 201 217 95 168 191 14 111 141 0 41 54 0 11 14 0 3 4 0 1 1 0 0 0
+0 0 0 0 0 1 0 2 3 0 8 11 0 47 62 29 123 152 103 173 195 152 201 217
+164 208 222 162 207 221 172 212 225 172 212 225 164 208 222 167 209 223 172 212 225 60 145 172
+0 46 60 97 169 192 99 170 193 0 96 126 0 21 28 0 2 3 0 7 10 21 116 146
+169 211 224 169 211 224 164 208 222 174 213 226 174 213 226 172 212 225 169 211 224 169 211 224
+169 211 224 169 211 224 164 208 222 164 208 222 179 216 228 164 208 222 145 197 214 18 114 144
+0 52 69 79 158 183 167 209 223 109 177 198 18 114 144 0 44 58 0 12 16 0 1 2
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 1 1 0 64 84 131 189 208 159 205 220 167 209 223 167 209 223
+172 212 225 169 211 224 174 213 226 177 215 227 111 178 199 68 151 177 40 131 160 14 111 141
+12 109 139 34 127 156 77 157 182 126 187 206 167 209 223 169 211 224 167 209 223 174 213 226
+172 212 225 164 208 222 174 213 226 169 211 224 172 212 225 172 212 225 157 204 219 0 84 111
+0 95 124 164 208 222 75 156 181 0 75 98 0 17 22 0 2 3 0 0 0 0 5 7
+32 125 154 145 197 214 164 208 222 169 211 224 172 212 225 174 213 226 172 212 225 169 211 224
+172 212 225 172 212 225 172 212 225 174 213 226 152 201 217 97 169 192 0 87 114 0 50 66
+77 157 182 174 213 226 111 178 199 36 128 157 0 54 70 0 16 21 0 2 3 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 4 5 4 102 132 135 192 210 169 211 224 169 211 224
+172 212 225 172 212 225 172 212 225 167 209 223 169 211 224 169 211 224 167 209 223 167 209 223
+174 213 226 167 209 223 164 208 222 169 211 224 174 213 226 174 213 226 172 212 225 167 209 223
+169 211 224 167 209 223 174 213 226 169 211 224 174 213 226 162 207 221 81 159 184 0 49 64
+79 158 183 167 209 223 48 137 165 0 61 80 0 10 13 0 1 1 0 0 0 0 0 0
+0 11 15 4 102 132 120 183 203 164 208 222 169 211 224 169 211 224 169 211 224 172 212 225
+169 211 224 169 211 224 172 212 225 118 182 202 53 141 168 0 67 88 0 79 103 79 158 183
+174 213 226 124 186 205 45 135 163 0 77 101 0 23 31 0 5 6 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 12 16 11 108 138 142 196 213 162 207 221
+167 209 223 167 209 223 169 211 224 172 212 225 164 208 222 162 207 221 172 212 225 174 213 226
+169 211 224 172 212 225 174 213 226 174 213 226 174 213 226 172 212 225 174 213 226 172 212 225
+172 212 225 169 211 224 169 211 224 167 209 223 169 211 224 131 189 208 0 92 120 0 76 100
+167 209 223 113 179 200 17 113 143 0 44 58 0 7 9 0 0 0 0 0 0 0 0 0
+0 0 1 0 11 14 0 61 80 34 127 156 95 168 191 129 188 207 133 191 209 135 192 210
+118 182 202 83 161 185 39 130 159 0 76 100 0 49 64 22 117 147 135 192 210 164 208 222
+101 172 194 39 130 159 0 80 105 0 31 41 0 6 8 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 15 3 101 131 142 196 213
+172 212 225 177 215 227 167 209 223 162 207 221 164 208 222 169 211 224 172 212 225 174 213 226
+174 213 226 172 212 225 169 211 224 174 213 226 174 213 226 169 211 224 164 208 222 164 208 222
+169 211 224 167 209 223 174 213 226 167 209 223 157 204 219 27 122 151 0 58 76 83 161 185
+177 215 227 70 152 178 0 84 111 0 26 35 0 3 4 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 6 8 0 16 21 0 39 51 0 52 69 0 58 76 0 63 83
+0 51 67 0 43 56 0 43 56 0 86 112 97 169 192 164 208 222 142 196 213 95 168 191
+22 117 147 0 71 93 0 29 38 0 8 10 0 0 1 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5 0 69 91
+115 180 201 145 197 214 164 208 222 169 211 224 172 212 225 164 208 222 154 203 218 167 209 223
+174 213 226 172 212 225 172 212 225 169 211 224 172 212 225 174 213 226 172 212 225 172 212 225
+174 213 226 167 209 223 167 209 223 154 203 218 26 120 150 0 54 70 68 151 177 187 220 231
+105 174 196 17 113 143 0 48 63 0 11 14 0 0 1 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 2 3 0 11 15 0 23 31 0 39 51 0 62 81 0 72 94
+1 99 129 46 136 164 111 178 199 124 186 205 97 169 192 58 144 171 29 123 152 0 87 114
+0 43 56 0 17 22 0 5 7 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 4 5
+0 42 55 40 131 160 133 191 209 162 207 221 162 207 221 167 209 223 169 211 224 169 211 224
+172 212 225 172 212 225 169 211 224 172 212 225 169 211 224 169 211 224 169 211 224 169 211 224
+172 212 225 172 212 225 124 186 205 22 117 147 0 56 73 29 123 152 162 207 221 135 192 210
+40 131 160 0 68 89 0 21 28 0 4 5 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 1 2 0 11 14 0 26 35 0 52 69 0 83 109
+4 102 132 17 113 143 19 115 145 10 107 137 0 98 128 0 73 96 0 48 63 0 25 33
+0 9 12 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
+0 5 7 0 22 29 0 75 98 46 136 164 124 186 205 169 211 224 167 209 223 167 209 223
+172 212 225 169 211 224 169 211 224 167 209 223 169 211 224 169 211 224 167 209 223 167 209 223
+147 199 215 55 142 169 0 86 112 0 59 78 79 158 183 147 199 215 138 193 211 70 152 178
+0 86 112 0 33 44 0 7 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 5 6 0 13 17 0 22 29
+0 33 44 0 39 51 0 40 52 0 33 44 0 28 37 0 19 25 0 10 13 0 4 6
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 1 2 0 8 10 0 16 21 0 23 30 0 57 75 13 110 140 75 156 181 147 199 215
+177 215 227 167 209 223 164 208 222 167 209 223 174 213 226 147 199 215 85 162 186 30 124 153
+0 69 91 0 42 55 0 84 111 131 189 208 177 215 227 124 186 205 60 145 172 0 96 126
+0 38 50 0 9 11 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 3 4
+0 6 8 0 6 9 0 8 10 0 7 9 0 4 5 0 2 3 0 0 1 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 6 9 0 18 24 0 24 32 0 25 33 0 29 38 0 36 47
+0 56 73 0 72 94 0 77 101 0 72 94 0 57 75 0 44 58 0 44 58 0 57 75
+4 102 132 83 161 185 149 200 216 133 191 209 75 156 181 21 116 146 0 71 93 0 32 42
+0 7 9 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 1 0 3 4 0 13 18 0 27 36 0 42 55 0 49 64
+0 54 70 0 61 80 0 64 84 0 80 105 4 102 132 17 113 143 43 134 162 85 162 186
+118 182 202 95 168 191 61 146 173 26 120 150 0 87 114 0 49 64 0 23 30 0 8 10
+0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 5 7 0 14 18 0 29 38
+0 50 66 0 69 91 0 95 124 14 111 141 26 120 150 33 126 155 34 127 156 25 119 149
+9 106 136 0 93 122 0 73 96 0 49 64 0 25 33 0 11 15 0 4 6 0 1 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 5 6
+0 12 15 0 19 25 0 26 34 0 36 47 0 45 59 0 46 60 0 43 56 0 41 54
+0 33 44 0 24 32 0 16 22 0 10 13 0 5 6 0 1 1 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 1 0 2 3 0 4 5 0 5 7 0 7 9 0 7 10 0 6 9
+0 3 4 0 3 4 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/drivers/video/logo/logo_slackware_clut224.ppm b/drivers/video/logo/logo_slackware_clut224.ppm
new file mode 100644
index 000000000..fbf39203b
--- /dev/null
+++ b/drivers/video/logo/logo_slackware_clut224.ppm
@@ -0,0 +1,1123 @@
+P3
+79 80
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 3 6  8 10 17  12 16 26  12 16 26
+21 23 31  23 26 35  23 26 35  19 21 29  19 21 29  12 16 26
+8 10 17  2 3 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 3 6  23 26 35
+43 50 70  53 65 105  53 65 105  68 84 132  72 90 145  72 90 145
+69 93 168  72 96 170  69 93 168  69 93 168  72 90 145  72 90 145
+68 84 132  53 65 105  53 65 105  43 50 70  23 26 35  4 5 10
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  8 10 17  31 38 62  53 65 105  72 90 145  72 95 165
+69 93 168  71 96 171  71 96 171  74 98 173  75 99 174  79 102 174
+87 109 178  87 109 178  87 109 178  87 109 178  79 102 174  77 101 175
+74 99 174  71 96 171  71 96 171  69 93 168  72 95 165  72 90 145
+53 65 105  31 38 62  8 10 17  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  27 32 46
+46 54 81  72 90 145  73 97 172  71 96 171  69 93 168  77 101 175
+108 128 187  132 148 198  144 158 201  159 170 206  166 177 212  176 187 218
+189 198 224  201 208 230  201 208 230  192 201 226  180 190 220  168 179 211
+160 171 208  147 161 203  132 148 198  108 128 187  79 102 174  69 93 168
+71 96 171  72 96 171  72 90 145  53 65 105  27 32 46  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  19 21 29  53 65 105  69 93 168
+73 97 172  74 98 173  90 112 180  124 141 194  164 175 209  191 199 224
+209 215 232  229 232 239  232 235 242  232 235 242  232 235 242  232 235 242
+232 235 242  229 232 239  229 232 239  232 235 242  232 235 242  232 235 242
+232 235 242  232 235 242  232 235 242  214 219 236  192 201 226  166 177 212
+126 145 198  91 113 180  75 99 174  73 97 172  69 93 168  53 65 105
+19 21 29  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  12 16 26  46 54 81  69 93 168  69 93 168  79 102 174
+117 135 190  176 187 218  214 219 236  221 226 239  221 226 239  229 232 239
+221 226 239  221 226 239  221 226 239  221 226 239  221 226 239  221 226 239
+221 226 239  221 226 239  221 226 239  221 226 239  221 226 239  221 226 239
+221 226 239  221 226 239  221 226 239  221 226 239  229 232 239  229 232 239
+221 226 239  218 222 237  184 193 222  124 141 194  87 109 178  71 96 171
+72 90 145  46 54 81  12 16 26  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  4 5 10
+43 50 70  68 84 132  71 96 171  79 102 174  126 145 198  180 190 220
+218 222 237  221 226 239  218 222 237  218 222 237  218 222 237  218 222 237
+218 222 237  218 222 237  218 222 237  218 222 237  218 222 237  218 222 237
+218 222 237  218 222 237  218 222 237  218 222 237  218 222 237  218 222 237
+218 222 237  218 222 237  218 222 237  218 222 237  218 222 237  218 222 237
+218 222 237  221 226 239  221 226 239  221 226 239  185 195 223  136 152 200
+87 109 178  71 96 171  68 84 132  46 54 81  8 10 17  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  27 32 46  68 84 132
+69 93 168  77 101 175  132 148 198  191 199 224  213 218 233  218 222 237
+214 219 236  213 218 233  214 219 236  214 219 236  213 218 233  213 218 233
+214 219 236  214 219 236  214 219 236  214 219 236  214 219 236  214 219 236
+214 219 236  214 219 236  214 219 236  214 219 236  214 219 236  214 219 236
+214 219 236  214 219 236  214 219 236  214 219 236  214 219 236  214 219 236
+214 219 236  214 219 236  214 219 236  214 219 236  218 222 237  214 219 236
+192 201 226  141 156 201  79 102 174  69 93 168  68 84 132  23 26 35
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  31 38 62  72 90 145  71 96 171
+96 117 181  168 179 211  213 218 233  213 218 233  209 215 234  209 215 232
+209 215 232  209 215 232  209 215 232  209 215 232  209 215 232  209 215 232
+209 215 232  209 215 232  209 215 232  209 215 232  209 215 232  209 215 234
+209 215 234  209 215 234  209 215 234  209 215 234  209 215 234  209 215 234
+209 215 234  209 215 234  209 215 234  209 215 234  209 215 234  209 215 234
+209 215 234  209 215 234  209 215 234  209 215 234  209 215 234  209 215 234
+214 219 236  214 219 236  176 187 218  108 128 187  71 96 171  72 90 145
+31 38 62  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  1 2 3  43 50 70  72 90 145  74 98 172  124 141 194
+191 199 224  209 215 232  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  205 212 231  205 212 231  205 212 231  205 212 231
+205 212 231  205 212 231  209 215 234  197 204 227  136 152 200  75 99 174
+72 90 145  43 50 70  1 2 3  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+5 7 12  53 65 105  69 93 168  87 109 178  160 171 208  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  201 208 230  201 208 230  201 208 230
+201 208 230  201 208 230  201 208 230  205 211 230  205 212 231  166 177 212
+96 117 181  69 93 168  53 65 105  7 8 13  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  11 13 21
+53 65 105  69 93 168  87 109 178  166 177 212  201 208 230  197 204 227
+197 204 227  197 204 227  197 204 227  197 204 227  197 204 227  197 204 227
+197 204 227  197 204 227  197 204 227  197 204 227  197 204 227  197 204 227
+197 204 227  197 204 227  197 204 227  197 204 227  197 204 227  197 204 227
+197 204 227  197 204 227  197 204 227  197 204 227  197 204 227  197 204 227
+198 205 228  198 205 228  198 205 228  198 205 228  198 205 228  198 205 228
+198 205 228  198 205 228  198 205 228  198 205 228  198 205 228  198 205 228
+198 205 228  198 205 228  198 205 228  198 205 228  198 205 228  205 211 230
+176 187 218  91 113 180  69 93 168  53 65 105  11 13 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  8 10 17  53 65 105
+69 93 168  87 109 178  160 171 208  197 204 227  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  192 201 226
+192 201 226  192 201 226  192 201 226  192 201 226  192 201 226  194 201 224
+198 205 228  169 180 211  91 113 180  71 96 169  53 65 105  5 7 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 2 3  46 54 81  69 93 168
+87 109 178  160 171 208  192 201 226  189 197 224  189 197 224  189 197 224
+189 197 224  189 197 224  189 197 224  189 197 224  189 197 224  189 197 224
+189 198 224  189 198 224  189 198 224  189 198 224  189 198 224  189 198 224
+189 198 224  189 198 224  189 198 224  189 198 224  189 198 224  189 198 224
+188 198 224  188 198 224  189 198 224  189 197 224  189 198 224  189 198 224
+189 198 224  189 198 224  189 198 224  189 198 224  189 198 224  189 198 224
+189 198 224  189 198 224  189 198 224  189 198 224  189 198 224  189 198 224
+189 198 224  189 198 224  189 198 224  189 198 224  189 198 224  189 198 224
+189 198 224  194 201 224  166 177 212  91 113 180  69 93 168  46 54 81
+1 2 3  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  31 38 62  72 95 165  79 102 174
+153 166 206  189 197 224  184 193 222  184 193 222  184 193 222  184 193 222
+184 193 222  184 193 222  184 193 222  184 193 222  184 193 222  184 193 222
+184 193 220  184 193 222  185 195 223  185 195 223  185 195 223  184 193 222
+185 195 222  185 195 223  185 195 223  185 195 223  185 195 223  185 195 223
+185 195 223  185 195 223  185 195 223  185 195 223  185 195 223  185 195 223
+185 195 222  184 193 222  185 195 223  185 195 223  185 195 223  185 195 223
+185 195 222  185 195 223  185 195 223  185 195 223  185 195 222  185 195 223
+185 195 223  185 195 223  185 195 223  185 195 223  185 195 223  185 195 223
+185 195 223  185 195 223  189 198 224  160 171 208  84 105 171  69 93 168
+31 38 62  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  27 32 46  72 90 145  72 96 171  126 145 198
+184 193 220  180 190 220  180 190 220  180 190 220  180 190 220  180 190 220
+180 190 220  180 190 220  180 190 220  180 190 220  180 190 220  180 190 220
+180 190 220  180 190 220  180 190 220  180 190 220  180 190 220  184 193 222
+185 195 222  185 195 223  189 198 224  191 199 224  192 201 226  192 201 226
+192 201 226  194 201 224  192 201 226  191 199 224  189 198 224  189 197 224
+185 195 222  184 193 222  184 193 220  180 190 220  180 190 220  184 193 220
+185 195 222  189 197 224  189 198 224  189 197 224  185 195 223  184 193 222
+180 190 220  180 190 220  180 190 220  180 190 220  180 190 220  180 190 220
+180 190 220  180 190 220  180 190 220  185 195 223  137 153 200  74 98 173
+72 90 145  27 32 46  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  12 16 26  68 84 132  71 96 169  96 117 181  169 180 211
+176 187 218  176 187 218  176 187 218  176 187 218  176 187 218  176 187 218
+176 187 218  176 187 218  176 187 218  176 187 218  176 187 218  176 187 218
+176 187 218  176 187 218  180 190 220  180 190 220  185 195 223  189 198 224
+197 204 227  201 208 230  205 212 231  209 215 232  213 218 233  214 219 236
+218 222 237  214 219 236  213 218 233  209 215 232  205 212 231  201 208 230
+197 204 227  192 201 226  189 197 224  185 195 223  184 193 222  189 197 224
+192 201 226  201 208 230  205 212 231  205 211 230  197 204 227  185 195 223
+180 190 220  176 187 218  176 187 218  176 187 218  176 187 218  176 187 218
+176 187 218  176 187 218  176 187 218  180 190 220  176 187 218  108 128 187
+69 93 168  68 84 132  12 16 26  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 5 10  53 65 105  71 96 169  75 99 174  147 161 203  176 187 218
+175 184 213  175 184 213  175 184 213  175 184 213  175 184 213  175 184 213
+175 184 213  175 184 213  175 184 213  175 184 213  175 184 213  175 184 213
+176 187 218  180 190 220  185 195 222  194 201 224  205 211 230  218 222 237
+229 232 239  229 232 239  228 230 236  228 230 236  229 232 239  228 230 236
+225 227 235  228 230 236  229 232 239  228 230 236  228 230 236  232 235 242
+232 235 242  221 226 239  209 215 234  201 208 228  198 205 228  205 211 230
+218 222 237  232 235 242  228 230 236  229 232 239  221 226 239  199 206 227
+184 193 222  176 187 218  176 187 218  175 184 213  176 187 218  176 187 218
+176 187 218  176 187 218  176 187 218  176 187 218  176 187 218  160 171 208
+79 102 174  71 96 169  53 65 105  2 3 6  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+31 38 62  72 95 165  72 96 171  117 135 190  169 180 211  166 177 212
+166 177 212  166 177 212  166 177 212  166 177 212  166 177 212  168 179 211
+168 179 211  168 179 211  168 179 211  166 177 212  168 179 211  175 184 213
+180 190 220  191 199 224  205 212 231  229 232 239  232 235 242  209 213 223
+185 191 210  156 164 188  156 164 188  129 135 150  128 134 148  122 122 122
+122 122 122  122 122 122  128 134 148  129 135 150  129 135 150  156 164 188
+183 190 208  204 208 221  225 227 235  232 235 242  232 235 242  232 235 242
+219 222 229  168 177 206  129 135 150  156 164 188  219 222 229  221 226 239
+189 198 224  176 187 218  169 180 211  168 179 211  169 180 211  169 180 211
+169 180 211  169 180 211  169 180 211  169 180 211  169 180 211  175 184 213
+127 144 195  72 96 171  72 95 165  31 38 62  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  7 8 13
+53 65 105  72 96 170  87 109 178  152 165 205  166 177 212  166 177 212
+166 177 212  166 177 212  166 177 212  166 177 212  166 177 212  166 177 212
+166 177 212  164 175 209  166 177 212  166 177 212  169 180 211  180 190 220
+194 201 224  218 222 237  232 235 242  194 200 218  156 164 188  128 134 148
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  129 135 150  156 164 188  183 190 208  156 164 188
+128 134 148  122 122 122  122 122 122  122 122 122  156 164 188  229 232 239
+197 204 227  176 187 218  166 177 212  166 177 212  166 177 212  166 177 212
+166 177 212  166 177 212  166 177 212  166 177 212  166 177 212  166 177 212
+160 171 208  90 112 180  71 96 171  53 65 105  8 10 17  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  31 38 62
+72 95 165  71 96 171  108 128 187  160 171 208  160 171 208  160 171 208
+160 171 208  160 171 208  160 171 208  160 171 208  160 171 208  160 171 208
+160 171 208  160 171 208  160 171 208  166 177 212  176 187 218  197 204 224
+232 235 242  209 213 223  129 135 150  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  228 230 236
+201 208 230  176 187 218  164 175 209  160 171 208  160 171 208  160 171 208
+164 175 209  164 175 209  164 175 209  164 175 209  164 175 209  164 175 209
+166 177 212  117 135 190  71 96 171  72 95 165  31 38 62  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  7 8 13  68 84 132
+73 97 171  74 98 172  132 148 198  160 171 208  159 170 206  159 170 206
+159 170 206  159 170 206  159 170 206  159 170 206  159 170 206  159 170 206
+153 166 206  153 166 206  160 171 208  169 180 211  190 198 223  229 232 239
+204 208 221  122 122 122  113 116 128  122 122 122  122 122 122  122 122 122
+122 122 122  113 116 128  113 116 128  113 116 128  113 116 128  122 122 122
+122 122 122  113 116 128  113 116 128  113 116 128  113 116 128  113 116 128
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  122 122 122
+122 122 122  122 122 122  122 122 122  122 122 122  122 122 122  225 227 235
+205 211 230  175 184 213  160 171 208  159 170 206  160 171 208  160 171 208
+160 171 208  160 171 208  160 171 208  160 171 208  160 171 208  160 171 208
+160 171 208  137 153 200  79 102 174  72 96 170  68 84 132  8 10 17
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  31 38 62  72 95 165
+73 97 172  87 109 178  152 165 205  153 166 206  153 166 206  153 166 206
+153 166 206  153 166 206  153 166 206  153 166 206  153 166 206  153 166 206
+152 165 205  153 166 206  160 171 208  180 190 220  221 226 239  204 208 221
+122 122 122  113 116 128  113 116 128  113 116 128  113 116 128  113 116 128
+113 116 128  129 135 150  168 177 206  194 200 218  219 222 229  228 230 236
+225 227 235  219 222 229  209 213 223  183 190 208  129 135 150  122 122 122
+113 116 128  113 116 128  113 116 128  113 116 128  113 116 128  113 116 128
+113 116 128  113 116 128  113 116 128  113 116 128  113 116 128  219 222 229
+205 211 230  169 180 211  153 166 206  153 166 206  153 166 206  153 166 206
+153 166 206  153 166 206  153 166 206  153 166 206  153 166 206  153 166 206
+153 166 206  153 166 206  91 113 180  72 96 171  72 95 165  43 50 70
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  11 13 21  53 65 105  74 98 173
+72 96 170  111 130 189  149 162 202  147 161 203  147 161 203  147 161 203
+147 161 203  147 161 203  147 161 203  147 161 203  147 161 203  147 161 203
+147 161 203  152 165 205  166 177 212  199 206 227  228 230 236  122 122 122
+78 87 115  113 116 128  113 116 128  78 87 115  78 87 115  129 135 150
+194 200 218  232 235 242  221 226 239  213 217 231  205 211 230  197 204 227
+197 204 227  201 208 228  209 215 232  218 222 235  232 235 242  225 227 235
+183 190 208  122 122 122  78 87 115  113 116 128  113 116 128  113 116 128
+113 116 128  113 116 128  113 116 128  113 116 128  113 116 128  219 222 229
+201 208 230  166 177 212  152 165 205  147 161 203  147 161 203  152 165 205
+152 165 205  152 165 205  152 165 205  152 165 205  152 165 205  147 161 203
+152 165 205  152 165 205  117 135 190  72 96 170  74 98 173  68 84 132
+12 16 26  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  31 38 62  72 95 165  74 98 173
+72 96 170  127 144 195  144 158 201  144 158 201  144 158 201  144 158 201
+144 158 201  144 158 201  144 158 201  144 158 201  144 158 201  144 158 201
+144 158 201  153 166 206  175 184 213  221 226 239  156 164 188  78 87 115
+78 87 115  78 87 115  78 87 115  78 87 115  129 135 150  229 232 239
+218 222 235  190 198 223  176 187 218  169 180 211  166 177 212  166 177 212
+164 175 209  166 177 212  166 177 212  175 184 213  180 190 220  197 204 227
+221 226 239  219 222 229  156 164 188  78 87 115  78 87 115  78 87 115
+78 87 115  78 87 115  78 87 115  78 87 115  78 87 115  219 222 229
+201 208 228  166 177 212  147 161 203  144 158 201  147 161 203  147 161 203
+147 161 203  147 161 203  147 161 203  147 161 203  147 161 203  147 161 203
+147 161 203  147 161 203  132 148 198  73 97 171  73 98 172  72 95 165
+31 38 62  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  2 3 6  53 65 105  73 96 167  73 97 172
+73 97 171  127 144 195  141 156 201  137 153 200  137 153 200  137 153 200
+137 153 200  137 153 200  137 153 200  137 153 200  141 156 201  137 153 200
+141 156 201  153 166 206  185 195 222  228 230 236  122 122 122  69 78 104
+78 87 115  78 87 115  69 78 104  122 122 122  228 230 236  205 211 230
+175 184 213  160 171 208  152 165 205  147 161 203  144 158 201  144 158 201
+141 156 201  144 158 201  147 161 203  147 161 203  153 166 206  164 175 209
+175 184 213  198 205 228  232 235 242  183 190 208  113 116 128  78 87 115
+78 87 115  78 87 115  78 87 115  78 87 115  78 87 115  219 222 229
+197 204 227  160 171 208  144 158 201  141 156 201  141 156 201  141 156 201
+141 156 201  141 156 201  141 156 201  141 156 201  141 156 201  141 156 201
+141 156 201  141 156 201  136 152 200  74 98 172  73 97 172  73 97 170
+53 65 105  4 5 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  12 16 26  68 84 132  74 98 173  72 96 171
+79 102 174  127 144 195  136 152 200  136 152 200  136 152 200  136 152 200
+136 152 200  136 152 200  136 152 200  136 152 200  136 152 200  136 152 200
+137 153 200  153 166 206  197 204 227  204 208 221  78 87 115  69 78 104
+69 78 104  69 78 104  69 78 104  183 190 208  221 225 235  176 187 218
+152 165 205  141 156 201  136 152 200  136 152 200  136 152 200  136 152 200
+136 152 200  136 152 200  136 152 200  136 152 200  137 153 200  141 156 201
+152 165 205  166 177 212  184 193 220  225 227 235  185 191 210  78 87 115
+69 78 104  69 78 104  69 78 104  69 78 104  69 78 104  219 222 229
+194 201 224  160 171 208  141 156 201  136 152 200  137 153 200  137 153 200
+137 153 200  137 153 200  137 153 200  137 153 200  137 153 200  137 153 200
+137 153 200  137 153 200  132 148 198  79 102 174  72 96 171  74 98 173
+72 89 141  19 21 29  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  31 38 62  72 95 165  74 98 173  72 96 170
+87 109 178  127 144 195  132 148 198  132 148 198  132 148 198  132 148 198
+132 148 198  132 148 198  132 148 198  132 148 198  132 148 198  132 148 198
+136 152 200  153 166 206  201 208 230  183 190 208  69 78 104  69 78 104
+69 78 104  69 78 104  69 78 104  209 213 223  199 206 227  160 171 208
+137 153 200  132 148 198  132 148 198  132 148 198  132 148 198  132 148 198
+132 148 198  132 148 198  132 148 198  132 148 198  132 148 198  132 148 198
+132 148 198  141 156 201  153 166 206  176 187 218  221 225 235  183 190 208
+69 78 104  56 62 79  69 78 104  69 78 104  69 78 104  219 222 229
+185 195 222  152 165 205  136 152 200  132 148 198  132 148 198  132 148 198
+132 148 198  132 148 198  132 148 198  132 148 198  132 148 198  132 148 198
+132 148 198  132 148 198  132 148 198  87 109 178  72 96 170  74 98 173
+72 95 165  31 38 62  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 1  53 65 105  74 98 172  73 97 171  71 96 171
+90 112 180  124 141 194  127 144 195  127 144 195  127 144 195  127 144 195
+127 144 195  127 144 195  127 144 195  127 144 195  127 144 195  127 144 195
+132 148 198  153 166 206  205 212 231  183 190 208  56 60 74  56 62 79
+56 62 79  56 60 74  78 87 115  209 213 223  191 199 224  153 166 206
+132 148 198  127 144 195  127 144 195  127 144 195  127 144 195  127 144 195
+127 144 195  126 145 198  126 145 198  126 145 198  126 145 198  126 145 198
+127 144 195  132 148 198  136 152 200  152 165 205  176 187 218  225 227 235
+183 190 208  69 78 104  56 60 74  56 59 67  122 122 122  220 224 234
+175 184 213  144 158 201  126 145 198  127 144 195  126 145 198  126 145 198
+126 145 198  126 145 198  126 145 198  126 145 198  126 145 198  126 145 198
+126 145 198  126 145 198  127 144 195  91 113 180  71 96 171  73 97 172
+72 96 170  53 65 105  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  8 10 17  53 65 105  75 99 174  74 98 172  71 96 171
+90 112 180  120 138 192  124 141 194  124 141 194  124 141 194  124 141 194
+124 141 194  124 141 194  124 141 194  124 141 194  120 138 192  120 138 192
+126 145 198  147 161 203  198 205 228  183 190 208  56 60 74  56 59 67
+56 60 74  56 59 67  69 78 104  204 208 221  199 206 227  160 171 208
+137 153 200  126 145 198  127 144 195  127 144 195  124 141 194  124 141 194
+124 141 194  124 141 194  124 141 194  124 141 194  124 141 194  124 141 194
+124 141 194  124 141 194  124 141 194  132 148 198  149 162 202  175 184 213
+220 224 234  183 190 208  122 122 122  122 122 122  204 208 221  205 211 230
+159 170 206  132 148 198  124 141 194  124 141 194  124 141 194  124 141 194
+124 141 194  124 141 194  124 141 194  124 141 194  124 141 194  124 141 194
+124 141 194  124 141 194  124 141 194  96 117 181  71 96 171  74 98 171
+74 98 173  53 65 105  8 10 17  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  23 26 35  68 84 132  74 98 173  74 98 172  72 96 171
+87 109 178  117 135 190  120 138 192  120 138 192  120 138 192  120 138 192
+120 138 192  120 138 192  120 138 192  120 138 192  117 135 190  117 135 190
+124 141 194  141 156 201  189 198 224  183 190 208  56 62 79  56 59 67
+56 59 67  56 59 67  56 59 67  129 135 150  232 235 242  184 193 220
+160 171 208  147 161 203  141 156 201  136 152 200  132 148 198  132 148 198
+132 148 198  127 144 195  127 144 195  127 144 195  127 144 195  124 141 194
+124 141 194  124 141 194  124 141 194  124 141 194  132 148 198  144 158 201
+168 179 211  205 211 230  221 225 235  220 224 234  199 206 227  164 175 209
+136 152 200  124 141 194  120 138 192  120 138 192  120 138 192  120 138 192
+120 138 192  120 138 192  120 138 192  120 138 192  120 138 192  120 138 192
+120 138 192  120 138 192  120 138 192  87 109 178  72 96 171  74 98 172
+74 98 173  72 89 141  23 26 35  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  27 32 46  72 95 165  73 97 172  74 98 172  73 97 171
+79 102 174  111 130 189  117 135 190  117 135 190  117 135 190  117 135 190
+117 135 190  117 135 190  117 135 190  117 135 190  117 135 190  115 133 187
+117 135 190  132 148 198  176 187 218  219 222 229  69 78 104  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  156 164 188  232 235 242
+209 215 232  185 195 222  176 187 218  168 179 211  164 175 209  159 170 206
+153 166 206  153 166 206  152 165 205  149 162 202  144 158 201  141 156 201
+141 156 201  136 152 200  132 148 198  132 148 198  132 148 198  132 148 198
+141 156 201  153 166 206  160 171 208  160 171 208  147 161 203  132 148 198
+120 138 192  117 135 190  117 135 190  117 135 190  117 135 190  117 135 190
+117 135 190  117 135 190  117 135 190  117 135 190  117 135 190  117 135 190
+117 135 190  117 135 190  117 135 190  79 102 174  73 97 171  74 98 172
+73 98 172  72 95 165  31 38 62  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 1  31 38 62  73 96 167  74 98 173  74 98 172  72 96 170
+74 98 173  108 128 187  111 130 189  111 130 189  111 130 189  111 130 189
+111 130 189  111 130 189  111 130 189  111 130 189  111 130 189  111 130 189
+111 130 189  124 141 194  153 166 206  221 225 235  122 122 122  38 41 51
+56 59 67  56 59 67  56 59 67  38 41 51  56 59 67  113 116 128
+156 164 188  209 213 223  209 213 223  209 213 223  213 217 231  213 217 231
+213 217 231  209 215 232  209 215 232  209 215 232  201 208 230  192 201 226
+185 195 222  180 190 220  175 184 213  164 175 209  153 166 206  147 161 203
+144 158 201  141 156 201  136 152 200  132 148 198  124 141 194  117 135 190
+111 130 189  111 130 189  111 130 189  111 130 189  111 130 189  111 130 189
+111 130 189  111 130 189  111 130 189  111 130 189  111 130 189  111 130 189
+111 130 189  111 130 189  111 130 189  75 99 174  73 97 171  74 98 172
+74 98 172  72 96 170  31 38 62  0 0 0  0 0 0  0 0 0
+0 0 0
+2 3 6  46 54 81  71 96 169  74 98 173  74 98 172  73 97 171
+72 96 171  96 117 181  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  111 130 189  136 152 200  184 193 220  204 208 221  56 59 67
+38 41 51  38 41 51  38 41 51  38 41 51  38 41 51  38 41 51
+38 41 51  38 41 51  56 59 67  78 87 115  122 122 122  128 134 148
+128 134 148  129 135 150  129 135 150  156 164 188  156 164 188  156 164 188
+183 190 208  194 200 218  209 213 223  220 224 234  221 226 239  209 215 232
+189 196 219  164 175 209  147 161 203  132 148 198  120 138 192  111 130 189
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  108 128 187  73 97 171  73 97 171  74 98 172
+74 98 171  74 98 172  46 54 81  2 3 6  0 0 0  0 0 0
+0 0 0
+5 7 12  53 65 105  72 96 170  74 98 172  74 98 172  74 98 172
+73 97 170  91 113 180  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  120 138 192  152 165 205  213 218 233  156 164 188
+34 36 42  34 36 42  38 41 51  34 36 42  34 36 42  34 36 42
+34 36 42  34 36 42  34 36 42  34 36 42  34 36 42  29 31 36
+29 31 36  29 31 36  29 31 36  29 31 36  29 31 36  38 41 51
+56 59 67  56 59 67  69 78 104  78 87 115  113 116 128  129 135 150
+204 208 221  225 227 235  199 206 227  166 177 212  141 156 201  120 138 192
+111 130 189  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  108 128 187
+108 128 187  108 128 187  96 117 181  73 97 171  73 97 171  74 98 172
+74 98 172  74 98 172  53 65 105  5 7 12  0 0 0  0 0 0
+0 0 0
+7 8 13  53 65 105  73 97 170  74 98 171  74 98 172  74 98 172
+73 97 171  79 102 174  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  108 128 187  127 144 195  160 171 208  218 222 235
+156 164 188  38 41 51  27 29 36  34 36 42  34 36 42  34 36 42
+34 36 42  34 36 42  34 36 42  34 36 42  34 36 42  34 36 42
+34 36 42  34 36 42  34 36 42  34 36 42  34 36 42  34 36 42
+29 31 36  29 31 36  29 31 36  29 31 36  27 29 36  29 31 36
+34 36 42  69 78 104  156 164 188  210 214 227  190 198 223  147 161 203
+120 138 192  108 128 187  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  87 109 178  72 96 170  72 96 170  74 98 172
+74 98 172  74 98 173  53 65 105  7 8 13  0 0 0  0 0 0
+0 0 0
+7 8 13  53 65 105  73 97 171  74 98 172  74 98 172  74 98 172
+72 96 170  74 98 172  91 113 180  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  96 117 181  108 128 187  127 144 195  160 171 208
+218 222 237  183 190 208  69 78 104  21 23 31  19 21 29  21 23 31
+27 29 36  29 31 36  29 31 36  29 31 36  29 31 36  29 31 36
+29 31 36  29 31 36  27 29 36  29 31 36  29 31 36  29 31 36
+29 31 36  29 31 36  29 31 36  29 31 36  29 31 36  29 31 36
+27 29 36  21 23 31  19 21 29  78 87 115  204 208 221  199 206 227
+144 158 201  117 135 190  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  96 117 181  96 117 181  96 117 181  96 117 181
+96 117 181  96 117 181  75 99 174  72 96 170  73 97 171  74 98 172
+74 98 172  74 98 172  53 65 105  8 10 17  0 0 0  0 0 0
+0 0 0
+8 10 17  53 65 105  74 98 172  74 98 172  74 98 172  74 98 172
+73 97 171  73 97 171  79 102 174  91 113 180  91 113 180  91 113 180
+91 113 180  91 113 180  91 113 180  91 113 180  91 113 180  91 113 180
+91 113 180  90 112 180  90 112 180  91 113 180  96 117 181  120 138 192
+147 161 203  194 201 224  219 222 229  156 164 188  69 78 104  38 41 51
+19 21 29  8 10 17  11 13 21  17 19 24  17 19 24  17 19 24
+17 19 24  17 19 24  17 19 24  17 19 24  17 19 24  21 23 31
+21 23 31  21 23 31  21 23 31  21 23 31  21 23 31  21 23 31
+21 23 31  21 23 31  19 21 29  17 19 24  56 59 67  194 200 218
+189 197 224  132 148 198  108 128 187  91 113 180  91 113 180  91 113 180
+91 113 180  91 113 180  91 113 180  91 113 180  91 113 180  91 113 180
+91 113 180  87 109 178  73 97 171  73 97 171  74 98 172  74 98 172
+74 98 172  74 98 173  53 65 105  8 10 17  0 0 0  0 0 0
+0 0 0
+8 10 17  68 84 132  74 98 173  74 98 172  74 98 172  74 98 172
+74 98 172  72 96 170  77 101 175  87 109 178  87 109 178  87 109 178
+87 109 178  87 109 178  87 109 178  87 109 178  87 109 178  87 109 178
+87 109 178  87 109 178  87 109 178  87 109 178  87 109 178  96 117 181
+108 128 187  131 146 194  159 170 206  194 201 224  225 227 235  194 200 218
+129 135 150  122 122 122  78 87 115  56 59 67  38 41 51  34 36 42
+27 29 36  17 19 24  17 19 24  17 19 24  11 13 21  5 7 12
+4 5 10  7 8 13  7 8 13  11 13 21  17 19 24  17 19 24
+17 19 24  17 19 24  17 19 24  17 19 24  7 8 13  56 60 74
+219 222 229  160 171 208  117 135 190  91 113 180  87 109 178  87 109 178
+87 109 178  87 109 178  87 109 178  87 109 178  87 109 178  87 109 178
+87 109 178  79 102 174  72 96 170  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 173  53 65 105  8 10 17  0 0 0  0 0 0
+0 0 0
+7 8 13  53 65 105  74 98 173  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 171  74 98 172  79 102 174  84 105 171  84 105 171
+84 105 171  84 105 171  84 105 171  84 105 171  84 105 171  84 105 171
+84 105 171  79 102 174  84 105 171  87 109 178  87 109 178  91 113 180
+96 117 181  96 117 181  111 130 189  124 141 194  144 158 201  168 179 211
+192 201 226  209 215 232  209 213 223  194 200 218  183 190 208  183 190 208
+183 190 208  183 190 208  183 190 208  183 190 208  183 190 208  183 190 208
+156 164 188  156 164 188  122 122 122  56 59 67  17 19 24  7 8 13
+8 10 17  11 13 21  11 13 21  11 13 21  8 10 17  5 7 12
+129 135 150  201 208 230  127 144 195  96 117 181  87 109 178  84 105 171
+87 109 178  87 109 178  87 109 178  87 109 178  87 109 178  87 109 178
+79 102 174  74 98 172  73 97 171  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  53 65 105  8 10 17  0 0 0  0 0 0
+0 0 0
+5 7 12  53 65 105  73 97 171  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 171  72 96 170  74 98 172  79 102 174  79 102 174
+79 102 174  79 102 174  79 102 174  79 102 174  79 102 174  79 102 174
+79 102 174  79 102 174  87 109 178  96 117 181  111 130 189  117 135 190
+117 135 190  108 128 187  96 117 181  96 117 181  96 117 181  108 128 187
+117 135 190  127 144 195  137 153 200  152 165 205  160 171 208  160 171 208
+166 177 212  168 179 211  169 180 211  169 180 211  175 184 213  176 187 218
+184 193 220  189 196 219  202 207 223  219 222 229  204 208 221  113 116 128
+2 3 6  4 5 10  5 7 12  5 7 12  7 8 13  0 0 0
+56 59 67  204 208 221  147 161 203  96 117 181  84 105 171  79 102 174
+79 102 174  79 102 174  79 102 174  79 102 174  79 102 174  79 102 174
+74 98 172  72 96 170  73 97 171  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 173  53 65 105  7 8 13  0 0 0  0 0 0
+0 0 0
+4 5 10  53 65 105  72 96 170  74 98 171  74 98 172  74 98 172
+74 98 172  74 98 172  73 97 171  73 97 171  74 98 171  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  73 97 171
+74 98 172  79 102 174  108 128 187  132 148 198  175 184 213  194 201 224
+189 197 224  153 166 206  117 135 190  96 117 181  79 102 174  79 102 174
+87 108 173  87 109 178  91 113 180  96 117 181  96 117 181  108 128 187
+108 128 187  108 128 187  108 128 187  108 128 187  108 128 187  111 130 189
+111 130 189  120 137 191  127 144 195  144 158 201  178 186 211  220 224 234
+122 122 122  0 0 0  1 2 3  1 2 3  1 2 3  0 0 0
+7 8 13  183 190 208  166 177 212  108 128 187  79 102 174  74 98 172
+75 99 174  75 99 174  75 99 174  75 99 174  75 99 174  74 98 172
+73 97 171  73 97 171  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  53 65 105  5 7 12  0 0 0  0 0 0
+0 0 0
+2 3 6  46 54 81  73 96 167  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 172  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  72 96 171  72 96 170
+75 99 174  91 113 180  132 148 198  198 204 221  183 190 208  122 122 122
+129 135 150  210 214 227  164 175 209  111 130 189  87 109 178  74 98 172
+73 97 171  74 98 171  74 98 172  75 99 174  79 102 174  79 102 174
+79 102 174  79 102 174  79 102 174  79 102 174  79 102 174  79 102 174
+79 102 174  87 109 178  87 109 178  96 117 181  124 141 194  168 179 211
+209 213 223  38 41 51  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  156 164 188  176 187 218  108 128 187  79 102 174  73 97 171
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 171
+73 97 171  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 171  73 97 171  46 54 81  2 3 6  0 0 0  0 0 0
+0 0 0
+1 2 3  31 38 62  72 95 165  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 170  72 96 171
+79 102 174  108 128 187  175 184 213  183 190 208  17 19 24  0 0 0
+0 0 0  69 78 104  220 224 234  144 157 197  96 117 181  79 102 174
+73 97 171  72 96 170  73 97 171  73 97 171  74 98 171  74 98 171
+74 98 171  74 98 171  74 98 171  74 98 171  74 98 171  74 98 171
+73 97 172  73 97 171  74 98 171  79 102 174  96 117 181  136 152 200
+206 211 226  113 116 128  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  156 164 188  185 195 222  111 130 189  79 102 174  74 98 171
+72 96 171  73 97 171  73 97 171  73 97 171  73 97 171  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 170  31 38 62  0 0 1  0 0 0  0 0 0
+0 0 0
+0 0 0  27 32 46  72 95 165  74 98 173  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 171  73 97 171
+84 105 171  115 133 187  197 204 227  128 134 148  0 0 0  0 0 0
+0 0 0  2 3 6  156 164 188  189 198 224  120 137 191  90 112 180
+75 99 174  73 97 170  73 97 171  73 97 171  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+74 98 171  73 97 172  73 97 170  74 98 172  87 109 178  124 141 194
+201 208 228  128 134 148  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  129 135 150  190 198 223  115 133 187  79 102 174  74 98 171
+73 97 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 173  73 96 167  27 32 46  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  19 21 29  72 90 145  74 99 174  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 171  74 98 171
+87 109 178  120 137 191  199 206 227  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  56 59 67  219 222 229  159 170 206  111 130 189
+87 109 178  77 101 175  73 98 172  73 97 170  73 97 171  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 171  73 97 171  74 98 172  91 113 180  131 146 194
+205 211 230  122 122 122  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  156 164 188  180 190 220  111 130 189  79 102 174  74 98 171
+72 96 171  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 173  72 90 145  23 26 35  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  11 13 21  68 84 132  75 99 174  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 171  74 98 171
+87 109 178  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  1 2 3  128 134 148  216 220 231  149 162 202
+115 133 187  91 113 180  79 102 174  74 98 172  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  74 98 172  79 102 174  108 128 187  149 162 202
+209 213 223  56 59 67  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  183 190 208  169 180 211  108 128 187  79 102 174  73 97 171
+73 97 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+75 99 174  53 65 105  11 13 21  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  2 3 6  46 54 81  75 99 174  74 98 171  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 171  74 98 171
+87 109 178  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  5 7 12  129 135 150  218 222 235
+164 175 209  125 141 190  108 128 187  91 113 180  84 105 171  79 102 174
+75 99 174  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+75 99 174  79 102 174  87 109 178  108 128 187  135 149 193  198 205 228
+183 190 208  7 8 13  0 0 0  0 0 0  0 0 0  0 0 0
+38 41 51  194 200 218  152 165 205  96 117 181  77 101 175  72 96 171
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  73 97 171
+75 99 174  46 54 81  2 3 6  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  31 38 62  72 95 165  73 97 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 171  74 98 171
+87 109 178  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 3 6  122 122 122
+204 208 221  201 208 228  157 168 202  131 146 194  117 135 190  108 128 187
+96 117 181  91 113 180  87 109 178  87 109 178  87 109 178  91 113 180
+96 117 181  108 128 187  120 138 192  144 158 201  199 206 227  194 200 218
+34 36 42  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+113 116 128  205 212 231  127 144 195  90 112 180  74 98 172  73 97 171
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  73 98 172
+72 95 165  31 38 62  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  23 26 35  68 84 132  74 98 173  74 98 171
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  73 97 172  73 97 171  72 96 170  73 97 170
+87 109 178  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+38 41 51  129 135 150  219 222 229  205 209 224  184 193 220  160 171 208
+144 158 201  136 152 200  127 144 195  125 141 190  129 144 192  136 152 200
+147 161 203  160 171 208  190 198 223  219 222 229  183 190 208  38 41 51
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  17 19 24
+185 191 210  175 184 213  108 128 187  79 102 174  73 97 171  73 97 170
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 171  74 99 174
+68 84 132  23 26 35  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  4 5 10  53 65 105  75 99 174  73 97 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 171  73 97 171  72 97 171  72 96 170  73 97 170
+87 108 173  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  29 31 36  113 116 128  156 164 188  183 190 208
+194 200 218  204 208 221  206 211 226  209 215 232  209 215 232  204 208 221
+194 200 218  183 190 208  156 164 188  56 59 67  1 2 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  128 134 148
+205 212 231  131 146 194  91 113 180  77 100 169  73 97 171  72 96 171
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  73 97 172  74 98 173
+53 65 105  4 5 10  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  31 38 62  72 95 165  74 98 173
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+73 97 171  73 97 171  84 105 171  96 117 181  108 128 187  96 117 181
+91 113 180  120 138 192  201 208 228  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  21 23 31
+56 59 67  56 62 79  78 87 115  113 116 128  78 87 115  56 62 79
+56 59 67  27 29 36  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  113 116 128  220 224 234
+149 162 202  108 128 187  79 102 174  73 97 171  73 97 170  73 97 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 173  72 95 165
+31 38 62  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  11 13 21  68 84 132  75 99 174
+74 98 171  74 98 172  74 98 172  74 98 172  74 98 172  73 97 171
+73 97 170  77 101 175  127 144 195  164 173 204  156 164 188  168 177 206
+124 141 194  120 137 191  199 206 227  122 122 122  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  11 13 21  122 122 122  216 220 231  159 170 206
+111 130 189  87 109 178  74 98 172  73 97 171  73 97 171  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  75 99 174  68 84 132
+12 16 26  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  2 3 6  43 50 70  73 97 171
+73 97 172  74 98 172  74 98 172  74 98 172  74 98 172  72 96 170
+71 96 171  79 102 174  147 159 196  69 78 104  0 0 0  122 122 122
+144 158 201  117 135 190  191 199 224  129 135 150  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 3 6  19 21 29  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  56 62 79  183 190 208  213 218 233  152 165 205  111 130 189
+87 109 178  75 99 174  73 97 171  72 96 170  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  73 97 171  73 97 171  31 38 62
+2 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  12 16 26  68 84 132
+75 99 174  73 97 171  74 98 172  74 98 172  74 98 172  73 97 171
+72 96 171  79 102 174  144 157 197  69 78 104  0 0 0  113 116 128
+141 156 201  108 128 187  166 177 212  183 190 208  17 19 24  0 0 0
+0 0 0  0 0 0  56 59 67  156 164 188  185 191 210  183 190 208
+129 135 150  69 78 104  34 36 42  7 8 13  1 2 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 5 10  17 19 24  78 87 115
+172 181 208  209 213 223  184 193 220  132 148 198  108 128 187  87 109 178
+75 99 174  73 97 171  73 97 172  73 97 171  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  73 97 171  74 99 174  68 84 132  12 16 26
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  31 38 62
+72 95 165  74 98 172  74 98 172  74 98 172  74 98 172  73 97 171
+72 96 171  79 102 174  144 157 197  56 62 79  0 0 0  122 122 122
+136 152 200  96 117 181  131 146 194  205 211 230  156 164 188  56 60 74
+69 78 104  129 135 150  219 222 229  189 196 219  168 179 211  176 187 218
+194 201 221  205 209 224  219 222 229  204 208 221  156 164 188  129 135 150
+122 122 122  113 116 128  69 78 104  56 62 79  56 60 74  69 78 104
+78 87 115  113 116 128  129 135 150  183 190 208  219 222 229  205 209 224
+176 187 218  141 156 201  117 135 190  96 117 181  79 102 174  74 98 172
+73 97 170  73 97 171  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  72 95 165  31 38 62  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  8 10 17
+53 65 105  75 99 174  73 97 171  74 98 172  74 98 172  73 97 171
+72 97 171  79 102 174  147 159 196  56 60 74  0 0 0  122 122 122
+132 148 198  87 109 178  108 128 187  137 153 200  191 199 224  221 226 239
+218 222 237  191 199 224  152 165 205  124 141 194  111 130 189  111 130 189
+120 137 191  131 146 194  141 156 201  160 171 208  180 190 220  185 195 222
+194 201 224  201 208 228  214 219 236  221 226 239  221 226 239  221 226 239
+213 218 233  201 208 230  190 198 223  169 180 211  144 158 201  127 144 195
+111 130 189  96 117 181  87 109 178  77 101 175  74 98 172  73 97 171
+73 97 171  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  73 97 171  74 99 174  68 84 132  11 13 21  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+27 32 46  72 95 165  74 98 173  74 98 172  74 98 172  73 97 171
+72 96 171  84 105 171  147 159 196  56 59 67  0 0 0  122 122 122
+132 148 198  79 102 174  84 105 171  96 117 181  115 133 187  127 144 195
+127 144 195  115 133 187  96 117 181  90 112 180  79 102 174  79 102 174
+87 109 178  90 112 180  96 117 181  96 117 181  108 128 187  111 130 189
+117 135 190  120 138 192  125 141 190  129 144 192  131 146 194  127 144 195
+125 141 190  120 137 191  115 133 187  108 128 187  96 117 181  90 112 180
+79 102 174  77 101 175  74 98 172  73 97 171  73 97 171  73 97 171
+74 98 172  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 173  72 95 165  31 38 62  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 5 10  53 65 105  77 101 175  73 98 172  74 98 171  73 97 171
+72 96 171  79 102 174  147 159 196  56 59 67  0 0 0  122 122 122
+126 145 198  75 99 174  72 96 171  75 99 174  79 102 174  87 109 178
+87 109 178  79 102 174  77 101 175  74 98 172  72 97 171  71 96 171
+72 96 171  73 97 171  74 98 172  75 99 174  79 102 174  79 102 174
+84 105 171  87 109 178  87 109 178  87 109 178  87 109 178  87 109 178
+87 109 178  87 108 173  79 102 174  79 102 174  74 98 172  74 98 172
+72 96 171  72 96 170  72 96 171  72 97 171  72 97 171  72 97 171
+72 97 171  72 97 171  72 97 171  72 97 171  72 97 171  72 97 171
+72 97 171  73 97 171  73 97 171  74 98 172  74 98 172  73 97 171
+73 97 171  75 99 174  53 65 105  7 8 13  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  23 26 35  72 90 145  75 99 174  73 97 171  72 96 170
+72 96 171  87 109 178  147 159 196  56 59 67  0 0 0  122 122 122
+144 158 201  96 117 181  90 112 180  91 113 180  91 113 180  91 113 180
+91 113 180  91 113 180  90 112 180  91 113 180  91 113 180  90 112 180
+90 112 180  90 112 180  91 113 180  91 113 180  90 112 180  91 113 180
+91 113 180  91 113 180  91 113 180  91 113 180  91 113 180  91 113 180
+91 113 180  91 113 180  91 113 180  90 112 180  91 113 180  90 112 180
+90 112 180  90 112 180  90 112 180  90 112 180  90 112 180  90 112 180
+90 112 180  90 112 180  90 112 180  90 112 180  90 112 180  90 112 180
+90 112 180  79 102 174  74 98 171  73 97 171  73 97 172  73 97 171
+75 99 174  72 89 141  23 26 35  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  31 38 62  72 90 145  74 98 173  73 97 171
+72 96 171  87 109 178  147 159 196  56 59 67  0 0 0  122 122 122
+180 188 209  156 166 198  156 166 198  156 166 198  156 166 198  156 166 198
+156 166 198  156 166 198  156 166 198  156 166 198  156 166 198  156 166 198
+156 166 198  156 166 198  156 166 198  156 166 198  156 166 198  156 166 198
+156 166 198  156 166 198  156 166 198  156 166 198  156 166 198  156 166 198
+156 166 198  156 166 198  156 166 198  156 166 198  156 166 198  156 166 198
+156 166 198  156 166 198  157 168 202  157 168 202  157 168 202  157 168 202
+157 168 202  157 168 202  157 168 202  157 168 202  157 168 202  157 168 202
+159 169 202  132 148 198  79 102 174  73 97 170  72 97 171  74 99 174
+72 90 145  27 32 46  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 3 6  31 38 62  73 96 167  74 98 173
+72 96 170  87 109 178  147 159 196  38 41 51  0 0 0  29 31 36
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+56 59 67  56 59 67  56 59 67  56 59 67  56 59 67  56 59 67
+69 78 104  172 181 208  90 112 180  72 96 170  74 98 173  73 97 171
+43 50 70  1 2 3  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 5 10  46 54 81  73 97 172
+72 96 171  87 109 178  156 164 188  56 59 67  4 5 10  1 2 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+29 31 36  168 177 206  90 112 180  72 96 171  74 98 173  53 65 105
+4 5 10  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  8 10 17  53 65 105
+74 98 173  79 102 174  142 153 189  142 153 189  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+128 134 148  128 134 148  128 134 148  128 134 148  128 134 148  128 134 148
+129 135 150  155 167 201  87 109 178  74 99 174  53 65 105  8 10 17
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  12 16 26
+68 84 132  77 101 175  96 117 181  126 144 198  126 145 198  126 144 198
+126 145 198  126 145 198  126 145 198  126 145 198  126 145 198  126 145 198
+126 145 198  126 145 198  126 145 198  126 144 198  126 144 198  126 144 198
+126 144 198  126 144 198  126 144 198  126 144 198  126 144 198  126 144 198
+126 144 198  126 144 198  126 144 198  126 144 198  126 144 198  126 144 198
+126 144 198  126 144 198  126 144 198  126 144 198  126 144 198  126 144 198
+126 144 198  126 144 198  126 144 198  126 144 198  126 144 198  126 144 198
+126 144 198  126 144 198  126 144 198  126 144 198  126 144 198  126 144 198
+124 141 194  96 117 181  77 101 175  68 84 132  12 16 26  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+11 13 21  53 65 105  73 97 171  74 98 173  73 97 171  73 97 172
+73 97 171  73 97 171  73 97 171  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+73 97 172  73 97 172  73 97 172  73 97 172  73 97 172  73 97 172
+74 98 173  73 98 172  53 65 105  11 13 21  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  5 7 12  46 54 81  73 96 167  74 99 174  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  73 97 171
+73 97 171  73 97 171  73 97 171  73 97 171  73 97 171  74 98 173
+72 96 170  46 54 81  5 7 12  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 3 6  31 38 62  72 90 145  75 99 174
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  73 97 171  75 99 174  72 95 165
+31 38 62  2 3 6  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  23 26 35  68 84 132
+77 101 175  74 98 173  74 98 172  73 97 171  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+73 97 171  74 98 172  74 98 173  77 101 175  72 90 145  27 32 46
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  12 16 26
+53 65 105  72 95 165  75 99 174  75 99 174  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  73 97 171
+74 99 174  75 99 174  72 95 165  53 65 105  12 16 26  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  19 21 29  53 65 105  72 95 165  77 101 175  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  75 99 174
+72 95 165  53 65 105  19 21 29  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  27 32 46  53 65 105  74 98 173
+75 99 174  75 99 174  74 98 172  74 98 171  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 171  74 98 172  75 99 174  75 99 174  75 99 174  68 84 132
+27 32 46  1 2 3  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  2 3 6  27 32 46
+53 65 105  72 95 165  74 99 174  77 101 175  75 99 174  74 98 172
+74 98 171  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 172  74 98 172  74 98 172
+74 98 172  74 98 172  74 98 172  74 98 171  74 98 172  75 99 174
+77 101 175  75 99 174  72 95 165  53 65 105  31 38 62  5 7 12
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  12 16 26  46 54 81  53 65 105  72 90 145  77 101 175
+77 101 175  75 99 174  75 99 174  74 98 173  74 98 173  74 98 173
+74 98 173  74 98 173  74 98 173  74 98 173  74 98 173  74 98 173
+74 98 173  75 99 174  75 99 174  77 101 175  77 101 175  72 90 145
+53 65 105  46 54 81  19 21 29  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  2 3 6  12 16 26  27 32 46
+46 54 81  68 84 132  72 90 145  72 95 165  73 96 167  73 97 170
+73 97 170  73 97 171  73 97 171  73 97 170  72 96 170  73 96 167
+72 95 165  72 90 145  53 65 105  46 54 81  31 38 62  12 16 26
+2 3 6  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  2 3 6  7 8 13  19 21 29  27 32 46
+27 32 46  31 38 62  31 38 62  27 32 46  23 26 35  19 21 29
+8 10 17  2 3 6  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0
diff --git a/drivers/video/logo/logo_tits_clut224.ppm b/drivers/video/logo/logo_tits_clut224.ppm
new file mode 100644
index 000000000..7504a3a2c
--- /dev/null
+++ b/drivers/video/logo/logo_tits_clut224.ppm
@@ -0,0 +1,1443 @@
+P3
+72 120
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  32 15 7  99 70 33  83 51 28
+83 51 28  99 70 33  65 48 15  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  40 20 7  99 70 33  83 51 28  99 70 33
+9 2 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  116 86 23  200 156 55  255 234 0  255 232 0
+255 232 0  255 234 0  255 221 0  156 123 43  36 17 7  0 0 0
+4 0 2  123 94 35  255 221 0  255 234 0  255 232 0  255 232 0
+156 123 43  116 86 23  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  156 123 43  255 234 0  255 221 0  123 94 35  73 49 22
+99 70 33  73 49 22  116 86 23  255 221 0  200 156 55  22 13 4
+123 94 35  255 221 0  255 221 0  255 221 0  253 230 2  255 255 40
+255 255 40  253 230 2  116 86 23  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+156 123 43  255 234 0  156 123 43  111 90 31  200 156 55  255 255 42
+255 255 41  255 255 42  248 233 31  111 90 31  116 86 23  65 48 15
+123 94 35  116 86 23  116 86 23  156 123 43  249 230 10  255 255 42
+255 255 42  255 255 40  253 230 2  116 86 23  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  123 94 35
+255 234 0  156 123 43  156 123 43  255 255 42  255 255 41  255 255 40
+255 255 40  255 255 41  255 255 42  255 255 42  156 123 43  65 48 15
+255 221 0  255 234 0  255 234 0  73 49 22  249 230 10  255 255 42
+255 255 40  255 255 42  255 255 40  253 230 2  116 86 23  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  22 13 4  255 221 0
+200 156 55  123 94 35  255 255 42  255 255 41  255 255 41  255 255 40
+255 255 42  248 233 31  200 156 55  200 156 55  248 233 31  116 86 23
+255 234 0  255 221 0  255 222 0  255 221 0  111 90 31  255 255 42
+255 255 41  255 255 40  255 255 42  255 255 40  253 230 2  65 48 15
+4 0 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  32 15 7  200 156 55  253 230 2
+116 86 23  200 156 55  255 255 41  255 255 41  255 255 41  255 255 41
+200 156 55  116 86 23  116 86 23  116 86 23  111 90 31  116 86 23
+255 232 0  255 222 0  255 222 0  255 221 0  99 70 33  255 255 42
+255 255 40  255 255 41  255 255 40  255 255 42  255 255 40  156 123 43
+58 38 16  156 123 43  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  65 48 15  255 232 0  116 86 23
+200 156 55  255 255 42  255 255 40  255 255 41  255 255 41  248 233 31
+116 86 23  255 221 0  255 234 0  255 234 0  156 123 43  116 86 23
+255 234 0  255 232 0  255 232 0  255 221 0  111 90 31  255 255 40
+255 255 41  255 255 41  255 255 41  255 255 40  255 255 42  255 255 42
+116 86 23  255 234 0  200 156 55  20 10 4  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 0 2  255 221 0  116 86 23  200 156 55
+255 255 42  255 255 40  255 255 41  255 255 41  255 255 41  156 123 43
+156 123 43  255 234 0  255 232 0  255 234 0  255 221 0  116 86 23
+255 221 0  156 123 43  156 123 43  255 221 0  200 156 55  156 123 43
+255 255 41  255 255 41  255 255 41  255 255 41  255 255 41  255 255 42
+200 156 55  156 123 43  255 234 0  200 156 55  22 13 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  116 86 23  116 86 23  200 156 55  255 255 41
+255 255 40  255 255 41  255 255 41  255 255 40  255 255 41  73 49 22
+255 234 0  156 123 43  116 86 23  116 86 23  116 86 23  58 38 16
+116 86 23  131 100 59  119 92 52  116 86 23  65 48 15  156 123 43
+255 255 42  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+248 233 31  123 94 35  248 233 31  249 230 10  200 156 55  22 13 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  4 0 2  65 48 15  248 233 31  255 255 42  255 255 40
+255 255 41  255 255 41  255 255 40  255 255 42  123 94 35  20 10 4
+65 48 15  133 97 51  186 149 103  186 149 103  143 108 61  83 51 28
+219 162 97  236 169 118  236 169 118  208 160 108  107 66 48  156 123 43
+255 255 42  255 255 41  255 255 41  255 255 41  255 255 41  255 255 40
+255 255 42  111 90 31  248 233 31  255 255 42  249 230 10  200 156 55
+22 13 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  58 38 16  248 233 31  255 255 42  255 255 40  255 255 41
+255 255 41  255 255 41  255 255 41  248 233 31  58 38 16  98 62 37
+219 162 97  236 169 118  236 169 118  236 169 118  230 163 113  219 162 97
+232 168 117  231 165 115  231 165 115  236 169 118  190 135 80  116 86 23
+248 233 31  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  200 156 55  156 123 43  255 255 40  255 255 42  249 230 10
+200 156 55  40 20 7  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+40 20 7  248 233 31  255 255 41  255 255 40  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  156 123 43  83 51 28  225 161 106
+232 168 117  231 165 115  219 162 97  219 162 97  232 168 117  232 168 117
+230 164 114  230 164 114  229 163 112  231 165 115  236 169 118  94 65 40
+248 233 31  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 40  255 255 42  86 63 36  255 255 40  255 255 41  255 255 41
+249 230 10  99 70 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  40 20 7
+248 233 31  255 255 41  255 255 40  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 41  248 233 31  83 51 28  226 164 99  231 165 115
+230 164 114  230 163 113  225 161 106  229 163 112  231 165 115  231 165 115
+231 165 115  225 161 106  200 154 86  232 168 117  236 169 118  131 100 59
+200 156 55  255 255 40  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 40  86 63 36  255 255 41  255 255 40  255 255 41
+255 255 41  248 233 31  18 6 8  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  40 20 7  248 233 31
+255 255 42  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 40  255 255 41  156 123 43  143 108 61  230 172 123  236 217 150
+232 170 121  227 181 122  236 217 150  230 172 123  229 163 112  230 163 113
+230 163 113  230 164 114  231 165 115  230 163 113  231 165 115  225 161 106
+73 49 22  255 255 40  255 255 41  255 255 40  255 255 41  255 255 41
+255 255 41  255 255 41  156 123 43  200 156 55  255 255 41  255 255 40
+255 255 41  255 255 41  123 94 35  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  18 6 8  248 233 31  255 255 42
+255 255 40  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  248 233 31  83 51 28  225 161 106  236 217 150  249 228 179
+236 217 150  255 232 190  255 234 192  249 228 179  249 228 179  236 217 150
+233 170 120  232 170 121  232 170 121  232 170 121  230 164 114  232 168 117
+91 67 37  248 233 31  255 255 40  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 42  255 221 0  111 90 31  255 255 42  255 255 41
+255 255 41  255 255 42  200 156 55  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  99 70 33  255 255 41  255 255 40
+255 255 41  255 255 41  255 255 41  255 255 41  255 255 41  255 255 40
+255 255 42  123 94 35  155 112 61  230 172 123  249 228 179  255 231 189
+255 231 189  255 231 189  255 230 188  255 232 190  255 232 190  255 232 190
+255 231 189  255 231 189  255 231 189  255 230 188  230 172 123  232 166 115
+168 130 70  116 86 23  249 230 10  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 42  255 221 0  111 90 31  255 255 42  255 255 41
+255 255 41  255 255 41  248 233 31  58 38 16  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  86 63 36  248 233 31  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  255 255 41  255 255 41  255 255 41
+248 233 31  104 73 33  219 162 97  230 172 123  249 228 179  255 232 190
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 232 190  236 217 150  230 163 113
+225 161 106  104 73 33  200 156 55  249 230 10  255 255 41  255 255 41
+255 255 40  249 230 10  255 221 0  111 90 31  255 255 42  255 255 40
+255 255 41  255 255 40  255 255 42  156 123 43  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  9 2 0  156 123 43  255 255 42  255 255 40  255 255 41
+255 255 41  255 255 41  255 255 41  255 255 41  255 255 40  255 255 42
+156 123 43  155 112 61  236 169 118  236 169 118  236 217 150  255 234 192
+255 233 191  255 232 190  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 232 190  249 228 179  230 172 123
+236 169 118  164 128 83  156 123 43  255 232 0  255 222 0  255 222 0
+255 222 0  255 222 0  255 221 0  111 90 31  248 233 31  255 255 41
+255 255 41  255 255 41  255 255 42  123 94 35  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  65 48 15  255 255 42  255 255 40  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  255 255 40  255 255 41  200 156 55
+98 62 37  115 82 46  119 92 52  119 92 52  131 100 59  161 127 82
+249 228 179  249 228 179  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 231 189  249 228 179  178 137 80
+115 82 46  115 82 46  40 20 7  156 123 43  253 230 2  255 221 0
+255 221 0  255 221 0  255 232 0  156 123 43  156 123 43  255 255 42
+255 255 41  255 255 41  255 255 41  123 94 35  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+9 2 0  200 156 55  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  255 255 41  255 255 40  73 49 22
+83 51 28  155 112 61  164 128 83  146 115 67  164 128 83  133 97 51
+81 56 37  211 185 152  255 234 192  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  249 228 179  94 65 40  115 82 46
+164 128 83  153 118 66  133 97 51  65 48 15  255 221 0  255 222 0
+255 221 0  255 222 0  255 232 0  156 123 43  156 123 43  255 255 42
+255 255 41  255 255 41  255 255 42  249 230 10  40 20 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 0 2  248 233 31  255 255 40  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  248 233 31  123 94 35  155 112 61
+230 163 113  236 169 118  200 154 86  94 65 40  138 104 59  236 217 150
+236 221 188  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  236 221 188  249 228 179
+180 145 91  89 60 38  168 130 70  166 125 65  116 86 23  255 221 0
+255 222 0  255 221 0  255 232 0  156 123 43  156 123 43  255 255 42
+255 255 41  255 255 41  255 255 42  249 230 10  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+116 86 23  248 233 31  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 41  255 255 41  248 233 31  94 65 40  236 169 118
+232 166 115  143 108 61  104 74 41  104 74 41  32 15 7  115 82 46
+236 217 150  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 232 190  255 231 189  164 128 83
+22 13 4  98 62 37  106 76 45  190 135 80  166 125 65  116 86 23
+255 222 0  255 222 0  255 232 0  156 123 43  156 123 43  255 255 42
+255 255 40  255 255 41  255 255 42  249 230 10  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+156 123 43  255 255 40  255 255 41  255 255 41  255 255 41  255 255 41
+255 255 41  255 255 40  255 255 40  86 63 36  190 135 80  190 135 80
+42 24 20  42 24 20  73 53 28  81 56 37  82 57 39  42 24 20
+106 76 45  249 228 179  255 233 191  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  161 127 82  32 15 7
+82 57 39  82 57 39  73 53 28  73 53 28  99 70 33  133 97 51
+116 86 23  255 221 0  255 234 0  156 123 43  156 123 43  255 255 42
+255 255 42  255 255 40  248 233 31  253 230 2  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  40 20 7
+255 221 0  255 255 40  255 255 40  255 255 41  255 255 41  255 255 41
+255 255 40  255 255 41  200 156 55  115 82 46  190 135 80  32 15 7
+42 24 20  161 127 82  172 197 155  172 197 155  172 197 155  212 211 185
+46 29 24  210 175 116  255 234 192  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 232 190  249 228 179  32 15 7  173 198 156
+173 198 156  172 197 155  186 149 103  161 127 82  82 57 39  20 10 4
+98 62 37  116 86 23  156 123 43  116 86 23  156 123 43  248 233 31
+248 233 31  255 222 0  255 221 0  253 230 2  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  116 86 23
+255 234 0  249 230 10  255 255 42  255 255 40  255 255 41  255 255 41
+255 255 41  255 255 41  111 90 31  190 135 80  106 76 45  22 13 4
+161 127 82  51 35 26  5 0 27  186 149 103  161 127 82  161 127 82
+212 211 185  215 204 162  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  249 228 179  199 209 176  172 197 155
+5 0 27  73 53 28  172 197 155  161 127 82  134 113 63  51 35 26
+58 38 16  190 135 80  115 82 46  4 0 2  156 123 43  253 230 2
+255 221 0  255 221 0  255 222 0  253 230 2  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  116 86 23
+255 232 0  255 222 0  255 255 40  255 255 42  255 255 41  255 255 41
+255 255 41  200 156 55  115 82 46  133 97 51  10 1 0  172 197 155
+51 35 26  5 0 27  46 29 24  212 211 185  247 255 212  107 78 54
+212 211 185  247 255 212  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 234 192  247 255 212  107 78 54
+5 0 27  82 57 39  247 255 212  247 255 212  134 113 63  172 197 155
+22 13 4  98 62 37  200 154 86  9 4 10  156 123 43  255 232 0
+255 221 0  255 222 0  255 222 0  253 230 2  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  116 86 23
+255 232 0  255 221 0  253 230 2  248 233 31  255 255 41  255 255 41
+255 255 42  200 156 55  57 32 26  22 13 4  82 57 39  230 232 196
+42 24 20  5 0 27  5 0 27  51 35 26  199 209 176  199 209 176
+134 113 63  247 255 212  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 234 192  199 209 176  5 0 27
+5 0 27  5 0 27  82 57 39  247 255 212  186 149 103  173 198 156
+134 113 63  9 2 0  69 42 21  46 29 24  116 86 23  255 221 0
+255 222 0  255 222 0  255 221 0  255 232 0  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  116 86 23
+255 232 0  255 222 0  255 221 0  255 221 0  253 230 2  248 233 31
+248 233 31  99 70 33  133 97 51  32 15 7  161 127 82  247 255 212
+51 35 26  107 78 54  51 35 26  5 0 27  46 29 24  199 209 176
+134 113 63  255 234 192  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 234 192  200 194 140  51 35 26
+107 78 54  46 29 24  5 0 27  107 78 54  186 149 103  199 209 176
+212 211 185  10 1 0  104 73 33  166 125 65  77 45 20  255 221 0
+255 222 0  255 221 0  255 234 0  156 123 43  9 4 10  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  116 86 23
+255 234 0  255 221 0  255 222 0  255 221 0  255 221 0  255 222 0
+255 221 0  83 51 28  98 62 37  133 97 51  134 113 63  247 255 212
+107 78 54  107 78 54  172 197 155  173 198 156  107 78 54  134 113 63
+186 149 103  255 234 192  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  211 185 152  82 57 39
+172 197 155  199 209 176  172 197 155  107 78 54  134 113 63  230 232 196
+199 209 176  104 73 33  83 51 28  190 135 80  75 50 26  255 221 0
+255 222 0  255 222 0  255 221 0  65 48 15  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  40 20 7
+200 156 55  253 230 2  255 222 0  255 222 0  255 222 0  255 222 0
+255 221 0  83 51 28  83 51 28  166 125 65  119 92 52  199 209 176
+212 211 185  82 57 39  51 35 26  82 57 39  82 57 39  131 100 59
+255 230 188  255 231 189  255 231 189  255 230 188  255 232 190  255 232 190
+255 230 188  255 231 189  255 231 189  255 231 189  255 234 192  180 145 91
+51 35 26  82 57 39  82 57 39  51 35 26  186 149 103  230 232 196
+107 78 54  190 135 80  75 50 26  133 97 51  116 86 23  156 123 43
+255 222 0  255 232 0  200 156 55  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+156 123 43  255 232 0  255 221 0  255 222 0  255 222 0  255 222 0
+255 221 0  40 20 7  65 48 15  73 53 28  229 163 112  222 164 108
+233 169 121  219 162 97  223 184 122  215 204 162  215 204 162  249 228 179
+255 230 188  255 231 189  255 231 189  255 232 190  236 217 150  227 181 122
+255 234 192  255 231 189  255 231 189  255 231 189  255 230 188  249 228 179
+235 224 165  215 204 162  215 204 162  235 224 165  249 228 179  230 172 123
+225 161 106  131 100 59  40 20 7  116 86 23  255 221 0  73 49 22
+255 222 0  255 232 0  200 156 55  4 0 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+116 86 23  255 221 0  255 222 0  255 222 0  255 222 0  255 222 0
+255 221 0  116 86 23  255 221 0  73 49 22  225 161 106  231 165 115
+231 165 115  236 217 150  255 232 190  255 233 191  255 233 191  255 232 190
+255 231 189  255 231 189  255 232 190  255 234 192  236 217 150  227 181 122
+255 234 192  255 233 191  255 233 191  255 231 189  255 231 189  255 232 190
+255 233 191  255 233 191  255 233 191  255 233 191  255 232 190  236 217 150
+236 169 118  94 65 40  255 221 0  255 232 0  255 221 0  73 49 22
+255 222 0  255 222 0  255 221 0  65 48 15  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 0 2  255 221 0  255 232 0  255 221 0  255 222 0  255 221 0
+255 232 0  156 123 43  156 123 43  200 156 55  131 100 59  236 169 118
+230 172 123  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 230 188  236 217 150  235 224 165  236 221 188  236 221 188
+236 221 188  235 224 165  236 217 150  249 228 179  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  249 228 179
+178 137 80  116 86 23  255 221 0  255 222 0  255 221 0  73 49 22
+255 222 0  255 222 0  255 234 0  116 86 23  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  58 38 16  255 221 0  255 222 0  255 222 0  255 222 0
+253 230 2  200 156 55  156 123 43  200 156 55  94 65 40  236 169 118
+227 181 122  255 234 192  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  186 149 103  36 17 7  58 38 16  40 20 7
+58 38 16  40 20 7  131 100 59  255 233 191  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  255 233 191  236 217 150
+155 112 61  123 94 35  255 234 0  255 221 0  255 222 0  65 48 15
+248 233 31  255 255 42  255 255 40  116 86 23  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  116 86 23  255 234 0  255 221 0  255 222 0
+255 221 0  255 234 0  116 86 23  200 156 55  116 86 23  190 135 80
+230 172 123  249 228 179  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  200 194 140  58 38 16  116 86 23  155 112 61
+116 86 23  77 45 20  134 113 63  255 234 192  255 230 188  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 230 188  230 172 123
+99 70 33  255 221 0  255 222 0  253 230 2  156 123 43  156 123 43
+255 255 42  255 255 41  255 255 42  200 156 55  40 20 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  65 48 15  156 123 43  200 156 55  253 230 2  255 221 0
+255 221 0  255 232 0  116 86 23  255 221 0  156 123 43  115 82 46
+230 163 113  236 217 150  255 234 192  255 230 188  255 231 189  255 231 189
+255 231 189  255 234 192  200 194 140  40 20 7  155 112 61  190 135 80
+155 112 61  77 45 20  134 113 63  255 234 192  255 231 189  255 231 189
+255 231 189  255 231 189  255 230 188  255 234 192  236 217 150  155 112 61
+116 86 23  255 234 0  255 221 0  255 232 0  116 86 23  248 233 31
+255 255 40  255 255 41  255 255 40  255 255 42  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  65 48 15  156 123 43  200 156 55  253 230 2  253 230 2
+249 230 10  255 234 0  116 86 23  255 221 0  255 232 0  156 123 43
+115 82 46  236 169 118  249 228 179  255 232 190  255 231 189  255 230 188
+255 231 189  255 234 192  211 185 152  77 45 20  155 112 61  190 135 80
+190 135 80  77 45 20  186 149 103  255 234 192  255 231 189  255 231 189
+255 230 188  255 230 188  255 233 191  249 228 179  164 128 83  116 86 23
+200 156 55  255 222 0  255 221 0  255 232 0  116 86 23  248 233 31
+255 255 41  255 255 41  255 255 40  255 255 42  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  65 48 15  156 123 43  200 156 55  255 234 0  255 255 40
+255 255 42  255 255 40  116 86 23  255 221 0  255 222 0  255 232 0
+156 123 43  115 82 46  236 169 118  249 228 179  255 233 191  255 231 189
+255 231 189  255 231 189  255 234 192  134 113 63  77 45 20  190 135 80
+107 66 48  73 53 28  255 230 188  255 231 189  255 231 189  255 230 188
+255 232 190  255 234 192  249 228 179  164 128 83  116 86 23  255 221 0
+99 70 33  253 230 2  255 222 0  253 230 2  116 86 23  248 233 31
+255 255 41  255 255 41  255 255 41  255 255 41  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+4 0 2  116 86 23  156 123 43  200 156 55  255 255 40  255 255 41
+255 255 40  255 255 42  111 90 31  255 221 0  255 222 0  255 222 0
+255 232 0  156 123 43  115 82 46  186 149 103  249 228 179  255 234 192
+255 231 189  255 230 188  255 234 192  236 217 150  98 62 37  77 45 20
+77 45 20  212 179 122  255 234 192  255 230 188  255 230 188  255 232 190
+255 230 188  211 185 152  155 112 61  116 86 23  255 221 0  255 232 0
+156 123 43  156 123 43  255 234 0  156 123 43  156 123 43  255 255 41
+255 255 41  255 255 41  255 255 41  156 123 43  4 0 2  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+9 4 10  255 221 0  156 123 43  200 156 55  255 255 40  255 255 40
+255 255 41  255 255 42  111 90 31  255 221 0  255 222 0  255 221 0
+253 230 2  200 156 55  116 86 23  116 86 23  166 125 65  236 217 150
+255 230 188  255 234 192  255 231 189  255 234 192  236 217 150  146 115 67
+212 179 122  255 234 192  255 231 189  255 232 190  255 234 192  255 230 188
+208 160 108  116 86 23  65 48 15  255 221 0  255 222 0  255 232 0
+200 156 55  123 94 35  255 234 0  156 123 43  156 123 43  255 255 42
+255 255 41  255 255 40  255 255 42  123 94 35  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+22 13 4  255 221 0  156 123 43  200 156 55  255 255 40  255 255 41
+255 255 41  255 255 42  111 90 31  255 221 0  255 222 0  255 222 0
+255 232 0  116 86 23  255 221 0  253 230 2  116 86 23  86 63 36
+190 135 80  236 217 150  255 232 190  255 234 192  255 234 192  255 234 192
+255 234 192  255 234 192  255 234 192  255 230 188  212 179 122  133 97 51
+73 49 22  255 221 0  116 86 23  255 221 0  255 222 0  255 232 0
+200 156 55  123 94 35  255 234 0  156 123 43  200 156 55  255 255 41
+255 255 41  255 255 41  200 156 55  156 123 43  65 48 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 0 2  83 51 28  94 65 40  94 65 40
+89 60 38  22 13 4  83 51 28  89 60 38  9 2 0  0 0 0
+156 123 43  255 234 0  116 86 23  200 156 55  255 255 40  255 255 41
+255 255 41  255 255 41  156 123 43  156 123 43  255 222 0  255 222 0
+255 232 0  116 86 23  200 156 55  253 230 2  253 230 2  200 156 55
+40 20 7  104 73 33  146 115 67  210 175 116  249 228 179  255 230 188
+249 228 179  223 184 122  161 127 82  115 82 46  40 20 7  156 123 43
+253 230 2  255 234 0  116 86 23  255 221 0  255 222 0  255 232 0
+200 156 55  156 123 43  255 221 0  111 90 31  255 255 42  255 255 40
+255 255 41  255 255 41  156 123 43  200 156 55  65 48 15  0 0 0
+0 0 0  9 2 0  89 60 38  75 50 26  22 13 4  89 60 38
+94 65 40  94 65 40  83 51 28  4 0 2  0 0 0  0 0 0
+0 0 0  4 0 2  115 82 46  236 169 118  236 169 118  236 169 118
+232 166 115  75 50 26  190 135 80  232 166 115  133 97 51  4 0 2
+156 123 43  255 234 0  255 221 0  116 86 23  248 233 31  255 255 41
+255 255 41  255 255 40  255 255 42  65 48 15  253 230 2  255 221 0
+255 232 0  116 86 23  200 156 55  253 230 2  253 230 2  200 156 55
+98 62 37  133 97 51  104 73 33  83 51 28  89 60 38  107 78 54
+94 65 40  77 45 20  115 82 46  164 128 83  164 128 83  156 123 43
+255 232 0  255 232 0  116 86 23  255 221 0  255 222 0  255 232 0
+200 156 55  156 123 43  255 221 0  111 90 31  255 255 42  255 255 40
+255 255 41  248 233 31  73 49 22  255 234 0  65 48 15  0 0 0
+9 2 0  133 97 51  232 166 115  190 135 80  75 50 26  232 166 115
+236 169 118  236 169 118  236 169 118  115 82 46  4 0 2  0 0 0
+0 0 0  9 2 0  208 160 108  249 228 179  190 135 80  86 58 39
+81 56 37  73 53 28  89 60 38  84 59 38  155 112 61  107 66 48
+156 123 43  255 232 0  255 234 0  73 49 22  255 255 41  255 255 40
+255 255 41  255 255 41  255 255 41  116 86 23  255 221 0  255 222 0
+255 232 0  156 123 43  200 156 55  255 222 0  253 230 2  200 156 55
+83 51 28  190 135 80  182 141 64  166 125 65  155 112 61  155 112 61
+155 112 61  208 160 108  249 228 179  255 234 192  200 194 140  123 94 35
+255 234 0  156 123 43  156 123 43  253 230 2  255 222 0  255 232 0
+156 123 43  156 123 43  116 86 23  248 233 31  255 255 41  255 255 41
+248 233 31  116 86 23  255 221 0  255 232 0  65 48 15  0 0 0
+116 87 35  155 112 61  84 59 38  89 60 38  73 53 28  81 56 37
+86 58 39  190 135 80  249 228 179  208 160 108  4 0 2  0 0 0
+0 0 0  0 0 0  211 185 152  235 224 165  83 51 28  211 185 152
+230 172 123  219 162 97  131 100 59  115 82 46  40 20 7  57 32 26
+156 123 43  255 234 0  255 222 0  156 123 43  156 123 43  255 255 42
+255 255 41  255 255 41  255 255 41  248 233 31  116 86 23  255 234 0
+255 221 0  253 230 2  65 48 15  255 222 0  255 232 0  255 221 0
+83 51 28  182 141 64  166 125 65  166 125 65  190 135 80  227 165 115
+249 228 179  255 233 191  255 232 190  255 234 192  186 149 103  123 94 35
+255 234 0  156 123 43  156 123 43  255 232 0  255 221 0  255 222 0
+255 221 0  116 86 23  156 123 43  255 255 41  255 255 41  248 233 31
+116 86 23  255 221 0  253 230 2  253 230 2  65 48 15  0 0 0
+77 45 20  40 20 7  115 82 46  131 100 59  219 162 97  230 172 123
+211 185 152  83 51 28  249 228 179  211 185 152  0 0 0  0 0 0
+0 0 0  83 51 28  249 228 179  186 149 103  99 70 33  249 228 179
+227 181 122  98 62 37  133 97 51  164 128 83  161 127 82  143 108 61
+83 51 28  255 221 0  253 230 2  253 230 2  156 123 43  99 70 33
+248 233 31  248 233 31  248 233 31  200 156 55  116 86 23  255 234 0
+255 232 0  255 232 0  156 123 43  123 94 35  156 123 43  83 51 28
+155 112 61  190 135 80  225 161 106  211 185 152  249 228 179  255 231 189
+255 233 191  255 231 189  255 231 189  255 234 192  235 224 165  121 95 40
+116 86 23  156 123 43  156 123 43  255 234 0  255 232 0  255 234 0
+200 156 55  123 94 35  248 233 31  248 233 31  248 233 31  123 94 35
+255 221 0  253 230 2  255 221 0  255 232 0  58 38 16  40 20 7
+143 108 61  161 127 82  164 128 83  133 97 51  98 62 37  227 181 122
+249 228 179  98 62 37  186 149 103  249 228 179  83 51 28  0 0 0
+0 0 0  143 108 61  252 233 179  131 100 59  146 115 67  255 234 192
+146 115 67  133 97 51  230 172 123  190 135 80  83 51 28  94 65 40
+80 54 33  83 51 28  255 221 0  255 232 0  255 221 0  156 123 43
+83 51 28  84 59 38  86 63 36  86 63 36  69 42 21  156 123 43
+156 123 43  200 156 55  104 73 33  69 42 21  155 112 61  208 160 108
+236 217 150  249 228 179  255 231 189  255 234 192  255 233 191  255 231 189
+255 230 188  255 231 189  255 231 189  255 230 188  255 232 190  249 228 179
+186 149 103  94 65 40  83 51 28  156 123 43  156 123 43  156 123 43
+73 49 22  84 59 38  86 63 36  86 63 36  75 50 26  156 123 43
+255 221 0  253 230 2  253 230 2  255 221 0  75 50 26  82 57 39
+94 65 40  83 51 28  190 135 80  230 172 123  133 97 51  146 115 67
+255 234 192  146 115 67  131 100 59  252 233 179  143 108 61  0 0 0
+0 0 0  161 127 82  249 228 179  83 51 28  211 185 152  255 230 188
+104 73 33  180 145 91  249 228 179  107 78 54  166 125 65  236 217 150
+236 217 150  190 135 80  104 73 33  255 221 0  116 86 23  107 78 54
+200 154 86  236 169 118  236 217 150  235 224 165  236 217 150  186 149 103
+146 115 67  107 66 48  83 51 28  133 97 51  236 169 118  249 228 179
+255 234 192  255 233 191  255 231 189  255 231 189  255 230 188  255 231 189
+255 231 189  255 230 188  255 231 189  255 232 190  255 234 192  255 230 188
+236 217 150  190 135 80  98 62 37  98 62 37  131 100 59  161 127 82
+227 181 122  249 228 179  236 217 150  235 224 165  227 181 122  131 100 59
+99 70 33  255 221 0  255 221 0  99 70 33  190 135 80  236 169 118
+238 197 114  166 125 65  107 78 54  249 228 179  180 145 91  104 73 33
+255 230 188  212 179 122  83 51 28  249 228 179  161 127 82  0 0 0
+58 38 16  227 181 122  215 204 162  84 59 38  249 228 179  212 179 122
+83 51 28  236 217 150  211 185 152  98 62 37  227 181 122  255 233 191
+249 228 179  227 181 122  119 92 52  40 20 7  166 125 65  236 169 118
+236 217 150  249 228 179  255 232 190  255 234 192  255 234 192  255 234 192
+255 234 192  249 228 179  208 160 108  115 82 46  58 38 16  155 112 61
+230 172 123  249 228 179  255 233 191  255 232 190  255 230 188  255 231 189
+255 230 188  255 231 189  255 233 191  255 230 188  227 181 122  190 135 80
+77 45 20  98 62 37  190 135 80  236 217 150  255 231 189  255 234 192
+255 234 192  255 234 192  255 234 192  255 234 192  255 232 190  255 230 188
+208 160 108  83 51 28  22 13 4  131 100 59  229 163 112  229 163 112
+236 217 150  236 217 150  98 62 37  211 185 152  236 217 150  83 51 28
+212 179 122  249 228 179  84 59 38  215 204 162  227 181 122  58 38 16
+81 56 37  249 228 179  215 204 162  143 108 61  255 234 192  164 128 83
+107 78 54  253 234 183  180 145 91  131 100 59  255 232 190  249 228 179
+155 112 61  32 15 7  99 70 33  166 125 65  236 169 118  236 217 150
+255 230 188  255 232 190  255 231 189  255 230 188  255 231 189  255 231 189
+255 231 189  255 233 191  255 233 191  236 217 150  222 164 108  104 74 41
+77 45 20  133 97 51  227 165 115  249 228 179  255 232 190  255 231 189
+255 231 189  255 230 188  227 181 122  155 112 61  98 62 37  83 51 28
+190 135 80  236 217 150  255 230 188  255 234 192  255 232 190  255 231 189
+255 230 188  255 231 189  255 231 189  255 230 188  255 231 189  255 233 191
+255 230 188  222 164 108  58 38 16  69 42 21  40 20 7  143 108 61
+236 217 150  255 231 189  124 98 54  186 149 103  253 234 183  107 78 54
+164 128 83  255 234 192  143 108 61  215 204 162  249 228 179  81 56 37
+81 56 37  236 217 150  249 228 179  235 224 165  255 233 191  98 62 37
+186 149 103  245 232 191  98 62 37  208 160 108  255 234 192  249 228 179
+208 160 108  153 118 66  98 62 37  190 135 80  236 217 150  255 230 188
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 233 191  255 230 188  249 228 179
+211 185 152  186 149 103  211 185 152  249 228 179  255 232 190  255 231 189
+255 231 189  255 230 188  236 217 150  186 149 103  210 175 116  236 217 150
+255 230 188  255 234 192  255 231 189  255 230 188  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 230 188
+255 232 190  255 230 188  166 125 65  69 42 21  153 118 66  178 137 80
+233 170 120  255 231 189  208 160 108  98 62 37  245 232 191  186 149 103
+98 62 37  255 233 191  235 224 165  249 228 179  236 217 150  81 56 37
+58 38 16  190 135 80  236 217 150  255 233 191  255 232 190  200 154 86
+236 217 150  236 221 188  98 62 37  236 217 150  255 233 191  255 231 189
+255 234 192  249 228 179  143 108 61  115 82 46  255 230 188  255 231 189
+255 230 188  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  255 231 189  255 233 191
+255 234 192  255 234 192  255 234 192  255 232 190  255 230 188  255 231 189
+255 230 188  255 231 189  255 234 192  255 234 192  255 234 192  255 234 192
+255 232 190  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 230 188  115 82 46  143 108 61  236 169 118  236 169 118
+229 163 112  249 228 179  249 228 179  98 62 37  236 221 188  236 217 150
+200 154 86  255 233 191  255 232 190  236 217 150  190 135 80  42 24 20
+0 0 0  58 38 16  190 135 80  236 217 150  235 224 165  255 234 192
+255 234 192  255 230 188  227 181 122  255 230 188  255 231 189  255 230 188
+255 231 189  255 233 191  227 181 122  75 50 26  236 221 188  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 232 190  236 221 188  73 53 28  225 161 106  231 165 115  230 164 114
+229 163 112  227 181 122  249 228 179  227 181 122  255 230 188  255 234 192
+249 228 179  236 217 150  236 217 150  190 135 80  58 38 16  0 0 0
+0 0 0  0 0 0  40 20 7  104 73 33  200 154 86  230 172 123
+236 217 150  249 228 179  255 232 190  255 232 190  255 232 190  255 230 188
+255 231 189  255 232 190  249 228 179  94 65 40  236 221 188  255 231 189
+255 231 189  255 232 190  255 233 191  255 233 191  255 231 189  255 230 188
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 230 188
+255 232 190  255 234 192  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 230 188  255 231 189  255 233 191  255 233 191  255 231 189  255 230 188
+255 231 189  236 221 188  84 59 38  236 169 118  231 165 115  231 165 115
+230 164 114  230 163 113  230 172 123  227 181 122  230 172 123  227 181 122
+236 169 118  200 154 86  104 73 33  40 20 7  9 2 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  20 10 4  115 82 46
+190 135 80  236 169 118  233 169 121  236 217 150  249 228 179  255 232 190
+255 230 188  255 234 192  236 217 150  75 50 26  211 185 152  255 233 191
+255 230 188  249 228 179  235 224 165  235 224 165  255 230 188  255 233 191
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 233 191
+255 230 188  236 217 150  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 230 188
+255 231 189  255 230 188  235 224 165  249 228 179  255 230 188  255 231 189
+255 233 191  211 185 152  69 42 21  200 154 86  232 168 117  231 165 115
+231 165 115  230 164 114  230 163 113  231 165 115  236 169 118  190 135 80
+115 82 46  20 10 4  9 2 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+36 17 7  104 74 41  200 154 86  231 165 115  230 172 123  249 228 179
+255 231 189  255 234 192  236 217 150  69 42 21  208 160 108  255 234 192
+255 233 191  227 181 122  219 162 97  232 166 115  230 172 123  236 217 150
+255 231 189  255 233 191  255 234 192  255 234 192  255 234 192  255 234 192
+255 234 192  255 234 192  255 232 190  255 231 189  255 231 189  236 217 150
+225 161 106  227 181 122  255 234 192  255 234 192  255 234 192  255 234 192
+255 234 192  255 234 192  255 234 192  255 232 190  255 231 189  255 231 189
+255 230 188  236 217 150  225 161 106  219 162 97  249 228 179  255 231 189
+255 234 192  208 160 108  73 49 22  229 163 112  231 165 115  231 165 115
+231 165 115  230 164 114  236 169 118  200 154 86  104 74 41  36 17 7
+0 0 0  4 0 2  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  58 38 16  200 154 86  231 165 115  227 181 122
+255 233 191  255 231 189  249 228 179  168 130 70  94 65 40  249 228 179
+255 233 191  255 231 189  131 100 59  155 112 61  230 172 123  255 230 188
+255 234 192  249 228 179  211 185 152  211 185 152  211 185 152  211 185 152
+211 185 152  235 224 165  255 230 188  255 234 192  236 217 150  200 154 86
+190 135 80  236 217 150  249 228 179  215 204 162  211 185 152  211 185 152
+211 185 152  211 185 152  236 217 150  255 230 188  255 234 192  255 234 192
+249 228 179  190 135 80  98 62 37  227 181 122  255 232 190  255 232 190
+249 228 179  94 65 40  166 125 65  236 169 118  230 164 114  230 164 114
+229 163 112  232 166 115  200 154 86  58 38 16  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  133 97 51  232 166 115  227 181 122
+255 234 192  255 230 188  255 234 192  227 181 122  69 42 21  227 181 122
+255 234 192  255 230 188  225 161 106  75 50 26  233 178 115  186 149 103
+131 100 59  104 73 33  82 57 39  107 78 54  107 78 54  107 78 54
+82 57 39  89 60 38  131 100 59  146 115 67  138 104 59  182 141 64
+123 94 35  119 92 52  115 82 46  82 57 39  107 78 54  107 78 54
+107 78 54  91 67 37  86 63 36  119 92 52  146 115 67  161 127 82
+236 169 118  107 78 54  166 125 65  249 228 179  255 232 190  255 234 192
+236 217 150  69 42 21  225 161 106  231 165 115  230 163 113  232 168 117
+227 181 122  230 172 123  119 92 52  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  36 17 7  200 154 86  236 217 150
+255 233 191  255 231 189  255 231 189  249 228 179  104 73 33  153 118 66
+236 217 150  236 217 150  200 154 86  42 24 20  73 49 22  107 78 54
+161 127 82  172 197 155  212 216 184  231 255 198  235 255 203  232 255 199
+231 255 198  172 197 155  186 149 103  132 119 61  107 78 54  69 42 21
+107 78 54  161 127 82  172 197 155  210 220 166  231 255 198  232 255 199
+235 255 203  231 255 198  189 206 156  172 197 155  161 127 82  134 113 63
+75 50 26  42 24 20  155 112 61  227 181 122  249 228 179  255 234 192
+164 128 83  104 73 33  230 164 114  230 163 113  231 165 115  236 217 150
+255 234 192  215 204 162  32 15 7  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  186 149 103  249 228 179
+255 231 189  255 231 189  255 230 188  255 233 191  161 127 82  115 82 46
+236 169 118  190 135 80  80 54 33  82 57 39  172 197 155  230 232 196
+247 255 212  237 255 203  235 255 203  232 255 199  231 255 198  232 255 198
+231 255 198  212 216 184  172 197 155  107 78 54  107 78 54  172 197 155
+212 216 184  237 255 203  231 255 198  212 216 184  191 208 160  212 216 184
+231 255 198  233 255 199  237 255 203  237 255 203  247 255 212  232 255 199
+173 198 156  125 102 55  51 35 26  155 112 61  236 217 150  255 230 188
+115 82 46  161 127 82  236 169 118  232 168 117  236 217 150  255 232 190
+255 234 192  212 203 153  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 2  186 149 103  255 234 192
+255 230 188  255 231 189  255 231 189  255 232 190  208 160 108  83 51 28
+166 125 65  91 67 37  172 197 155  212 216 184  212 216 184  161 127 82
+161 127 82  191 208 160  230 232 196  231 255 198  231 255 198  231 255 198
+231 255 198  232 255 199  235 255 203  232 255 199  212 216 184  210 220 166
+210 220 166  199 209 176  173 198 156  189 206 156  230 232 196  233 255 199
+231 255 198  231 255 198  230 232 196  199 209 176  186 149 103  134 113 63
+191 208 160  212 216 184  173 198 156  107 78 54  166 125 65  215 204 162
+83 51 28  200 154 86  232 166 115  236 217 150  255 232 190  255 231 189
+255 234 192  215 204 162  4 0 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 2 0  186 149 103  255 234 192
+255 231 189  255 231 189  255 231 189  255 232 190  249 228 179  104 74 41
+32 15 7  172 197 155  230 232 196  161 127 82  51 35 26  125 102 55
+172 197 155  212 216 184  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  212 216 184  199 209 176
+172 197 155  172 197 155  191 208 160  230 232 196  232 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  230 232 196  172 197 155  132 119 61
+51 35 26  125 102 55  212 216 184  191 208 160  82 57 39  186 149 103
+106 76 45  236 169 118  232 170 121  249 228 179  255 232 190  255 231 189
+255 234 192  215 204 162  4 0 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 2 0  186 149 103  255 234 192
+255 231 189  255 231 189  255 231 189  255 233 191  249 228 179  138 104 59
+42 24 20  191 208 160  161 127 82  91 67 37  172 197 155  212 216 184
+235 255 203  232 255 199  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  199 209 176  212 216 184  173 198 156
+172 197 155  173 198 156  230 232 196  232 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  232 255 198  233 255 199  230 232 196
+173 198 156  125 102 55  107 78 54  199 209 176  82 57 39  138 104 59
+107 78 54  236 169 118  227 181 122  255 234 192  255 230 188  255 231 189
+255 234 192  215 204 162  4 0 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  186 149 103  255 234 192
+255 231 189  255 231 189  255 231 189  255 232 190  249 228 179  236 169 118
+133 97 51  73 53 28  107 78 54  172 197 155  199 209 176  232 255 199
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+212 216 184  199 209 176  212 216 184  199 209 176  199 209 176  173 198 156
+199 209 176  199 209 176  173 198 156  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  232 255 198
+230 232 196  191 208 160  134 113 63  73 53 28  106 76 45  83 51 28
+168 130 70  236 169 118  236 217 150  255 232 190  255 231 189  255 231 189
+255 234 192  211 185 152  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  22 13 4  208 160 108  255 233 191
+255 231 189  255 231 189  255 231 189  255 231 189  255 233 191  236 217 150
+225 161 106  83 51 28  161 127 82  191 208 160  230 232 196  232 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  212 216 184
+199 209 176  199 209 176  199 209 176  199 209 176  199 209 176  199 209 176
+173 198 156  173 198 156  199 209 176  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+232 255 198  230 232 196  172 197 155  73 53 28  155 112 61  73 53 28
+236 169 118  231 165 115  255 230 188  255 230 188  255 231 189  255 231 189
+255 234 192  215 204 162  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  133 97 51  236 169 118  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  255 234 192  236 217 150
+200 154 86  82 57 39  173 198 156  199 209 176  232 255 199  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+173 198 156  173 198 156  199 209 176  173 198 156  199 209 176  212 216 184
+199 209 176  173 198 156  199 209 176  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  232 255 199  230 232 196  132 119 61  104 73 33  80 54 33
+232 166 115  230 172 123  255 230 188  255 230 188  255 231 189  255 231 189
+255 232 190  249 228 179  115 82 46  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 230 188
+255 231 189  255 231 189  255 231 189  255 231 189  255 232 190  249 228 179
+166 125 65  107 78 54  189 206 156  230 232 196  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+230 232 196  212 216 184  199 209 176  173 198 156  173 198 156  199 209 176
+199 209 176  173 198 156  199 209 176  199 209 176  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  237 255 203  161 127 82  104 73 33  84 59 38
+232 166 115  235 224 165  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 233 191  146 115 67  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 230 188  255 234 192
+82 57 39  172 197 155  173 198 156  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  199 209 176  199 209 176  173 198 156  199 209 176  199 209 176
+173 198 156  230 232 196  231 255 198  173 198 156  230 232 196  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  233 255 199  212 216 184  82 57 39  166 125 65
+230 163 113  235 224 165  255 233 191  255 231 189  255 231 189  255 231 189
+255 230 188  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 230 188  255 234 192
+82 57 39  172 197 155  172 197 155  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  199 209 176  231 255 198
+231 255 198  212 216 184  173 198 156  199 209 176  173 198 156  173 198 156
+199 209 176  231 255 198  212 216 184  173 198 156  199 209 176  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  232 255 198  230 232 196  82 57 39  200 154 86
+233 170 120  249 228 179  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 233 191
+82 57 39  172 197 155  172 197 155  212 216 184  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  199 209 176  199 209 176
+231 255 198  231 255 198  231 255 198  199 209 176  173 198 156  199 209 176
+231 255 198  231 255 198  199 209 176  173 198 156  199 209 176  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  233 255 199  231 255 198  82 57 39  190 135 80
+236 217 150  255 233 191  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 234 192
+119 92 52  132 119 61  173 198 156  172 197 155  212 216 184  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  212 216 184  199 209 176
+230 232 196  231 255 198  231 255 198  199 209 176  173 198 156  173 198 156
+199 209 176  199 209 176  199 209 176  173 198 156  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  235 255 203  172 197 155  86 63 36  219 162 97
+236 217 150  255 234 192  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 231 189
+255 230 188  255 231 189  255 231 189  255 231 189  255 231 189  255 234 192
+200 154 86  82 57 39  191 208 160  172 197 155  173 198 156  212 216 184
+231 255 198  232 255 199  231 255 198  231 255 198  231 255 198  199 209 176
+199 209 176  199 209 176  199 209 176  199 209 176  173 198 156  212 216 184
+199 209 176  173 198 156  173 198 156  199 209 176  232 255 199  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  235 255 203  132 119 61  133 97 51  236 169 118
+236 217 150  255 233 191  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  255 230 188
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  255 233 191
+211 185 152  82 57 39  172 197 155  173 198 156  172 197 155  172 197 155
+206 210 177  230 232 196  233 255 199  233 255 199  231 255 198  231 255 198
+199 209 176  199 209 176  173 198 156  173 198 156  199 209 176  191 208 160
+172 197 155  172 197 155  199 209 176  230 232 196  230 232 196  231 255 198
+233 255 199  233 255 199  233 255 199  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  199 209 176  107 78 54  166 125 65  233 170 120
+249 228 179  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  236 217 150
+255 233 191  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 230 188  133 97 51  125 102 55  189 206 156  173 198 156  172 197 155
+172 197 155  173 198 156  199 209 176  212 216 184  231 255 198  233 255 199
+232 255 199  230 232 196  199 209 176  173 198 156  191 208 160  172 197 155
+172 197 155  172 197 155  172 197 155  172 197 155  173 198 156  199 209 176
+199 209 176  199 209 176  212 216 184  232 255 198  232 255 199  232 255 199
+231 255 198  212 216 184  186 149 103  83 51 28  230 163 113  230 172 123
+255 234 192  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 230 188  255 234 192  161 127 82  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  138 104 59  236 169 118  227 181 122
+255 234 192  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  227 165 115  86 63 36  161 127 82  189 206 156  173 198 156
+172 197 155  172 197 155  172 197 155  172 197 155  189 206 156  191 208 160
+191 208 160  191 208 160  173 198 156  172 197 155  172 197 155  173 198 156
+173 198 156  173 198 156  173 198 156  173 198 156  172 197 155  172 197 155
+172 197 155  172 197 155  173 198 156  189 206 156  189 206 156  191 208 160
+191 208 160  172 197 155  86 63 36  190 135 80  232 166 115  230 172 123
+255 233 191  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 230 188  138 104 59  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  77 45 20  219 162 97  236 217 150
+255 234 192  255 230 188  255 231 189  255 231 189  255 231 189  255 230 188
+255 234 192  235 224 165  200 154 86  96 68 37  125 102 55  172 197 155
+191 208 160  173 198 156  173 198 156  172 197 155  172 197 155  172 197 155
+172 197 155  172 197 155  172 197 155  173 198 156  173 198 156  173 198 156
+173 198 156  173 198 156  173 198 156  173 198 156  172 197 155  172 197 155
+172 197 155  172 197 155  173 198 156  173 198 156  173 198 156  172 197 155
+132 119 61  82 57 39  115 82 46  236 169 118  229 163 112  230 172 123
+255 233 191  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 234 192  235 224 165  69 42 21  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  190 135 80  227 181 122
+249 228 179  255 231 189  255 231 189  255 231 189  255 231 189  255 231 189
+255 234 192  236 217 150  200 154 86  96 68 37  107 78 54  82 57 39
+107 78 54  172 197 155  172 197 155  172 197 155  172 197 155  173 198 156
+173 198 156  173 198 156  173 198 156  172 197 155  173 198 156  173 198 156
+173 198 156  172 197 155  172 197 155  172 197 155  172 197 155  172 197 155
+172 197 155  173 198 156  172 197 155  107 78 54  82 57 39  82 57 39
+107 78 54  172 197 155  73 53 28  200 154 86  230 164 114  230 172 123
+255 234 192  255 230 188  255 231 189  255 231 189  255 231 189  255 230 188
+255 234 192  211 185 152  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 2 0  190 135 80  232 166 115
+236 217 150  255 234 192  255 231 189  255 231 189  255 231 189  255 231 189
+255 234 192  249 228 179  98 62 37  161 127 82  212 216 184  173 198 156
+161 127 82  82 57 39  172 197 155  191 208 160  173 198 156  172 197 155
+161 127 82  186 149 103  173 198 156  191 208 160  189 206 156  173 198 156
+172 197 155  172 197 155  173 198 156  191 208 160  212 216 184  231 255 198
+231 255 198  231 255 198  231 255 198  210 220 166  210 220 166  210 220 166
+231 255 198  230 232 196  161 127 82  104 74 41  236 169 118  230 172 123
+249 228 179  255 232 190  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  186 149 103  9 2 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 0 2  99 70 33  232 166 115
+236 217 150  255 232 190  255 230 188  255 231 189  255 231 189  255 230 188
+255 232 190  236 217 150  89 60 38  172 197 155  212 216 184  233 255 199
+235 255 203  237 255 203  233 255 199  231 255 198  212 216 184  172 197 155
+125 102 55  107 78 54  107 78 54  107 78 54  125 102 55  172 197 155
+191 208 160  212 216 184  232 255 199  237 255 203  235 255 203  233 255 199
+233 255 199  233 255 199  232 255 199  233 255 199  233 255 199  233 255 199
+232 255 198  212 216 184  172 197 155  94 65 40  236 169 118  229 163 112
+227 181 122  255 230 188  255 231 189  255 231 189  255 230 188  255 233 191
+236 217 150  98 62 37  4 0 2  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  22 13 4  190 135 80
+233 169 121  249 228 179  255 234 192  255 231 189  255 231 189  255 234 192
+249 228 179  233 169 121  166 125 65  91 67 37  172 197 155  212 216 184
+237 255 203  237 255 203  237 255 203  235 255 203  212 216 184  173 198 156
+132 119 61  81 56 37  138 104 59  178 137 80  133 97 51  73 53 28
+82 57 39  107 78 54  86 63 36  161 127 82  173 198 156  211 215 171
+212 216 184  230 232 196  233 255 199  237 255 203  237 255 203  235 255 203
+212 216 184  172 197 155  82 57 39  168 130 70  236 169 118  230 164 114
+230 164 114  249 228 179  255 233 191  255 230 188  255 232 190  255 230 188
+208 160 108  22 13 4  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  96 68 37
+236 169 118  227 181 122  249 228 179  255 232 190  255 232 190  249 228 179
+227 181 122  230 164 114  230 163 113  104 73 33  51 35 26  161 127 82
+186 149 103  186 149 103  161 127 82  132 119 61  86 63 36  80 54 33
+133 97 51  219 162 97  236 169 118  236 169 118  232 166 115  225 161 106
+225 161 106  225 161 106  200 154 86  106 76 45  84 59 38  94 65 40
+86 58 39  86 63 36  132 119 61  161 127 82  186 149 103  161 127 82
+107 78 54  9 2 0  22 13 4  166 125 65  232 166 115  231 165 115
+229 163 112  227 181 122  249 228 179  255 233 191  249 228 179  227 181 122
+98 62 37  9 2 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  32 15 7
+166 125 65  232 166 115  233 170 120  227 181 122  227 181 122  232 168 117
+232 166 115  225 161 106  115 82 46  9 2 0  0 0 0  106 76 45
+138 104 59  131 100 59  131 100 59  131 100 59  190 135 80  236 169 118
+236 169 118  231 165 115  231 165 115  230 164 114  231 165 115  231 165 115
+231 165 115  231 165 115  232 166 115  236 169 118  236 169 118  236 169 118
+236 169 118  186 149 103  146 115 67  146 115 67  146 115 67  104 73 33
+10 1 0  0 0 0  0 0 0  32 15 7  166 125 65  236 169 118
+232 168 117  230 163 113  227 181 122  249 228 179  227 181 122  155 112 61
+32 15 7  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+20 10 4  155 112 61  225 161 106  236 169 118  236 169 118  236 169 118
+208 160 108  115 82 46  4 0 2  0 0 0  0 0 0  180 145 91
+236 169 118  236 169 118  236 169 118  236 169 118  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  230 163 113  231 165 115  230 172 123  235 224 165
+249 228 179  255 232 190  255 234 192  255 234 192  249 228 179  233 170 120
+22 13 4  0 0 0  0 0 0  0 0 0  20 10 4  155 112 61
+225 161 106  230 163 113  230 163 113  225 161 106  155 112 61  20 10 4
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  58 38 16  138 104 59  146 115 67  133 97 51
+22 13 4  0 0 0  0 0 0  0 0 0  0 0 0  166 125 65
+236 169 118  230 164 114  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  230 164 114  232 168 117  236 217 150  255 233 191  255 232 190
+255 232 190  255 231 189  255 231 189  255 231 189  255 234 192  227 165 115
+22 13 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+58 38 16  116 87 35  116 87 35  58 38 16  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  58 38 16
+229 163 112  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 164 114  229 163 112  227 181 122  255 232 190  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  255 234 192  119 92 52
+4 0 2  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  20 10 4
+229 163 112  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 163 113  232 168 117  249 228 179  255 232 190  255 230 188  255 231 189
+255 231 189  255 231 189  255 231 189  255 232 190  249 228 179  81 56 37
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  22 13 4
+200 154 86  232 166 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+229 163 112  227 181 122  255 233 191  255 230 188  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  236 217 150  69 42 21
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  4 0 2
+94 65 40  236 169 118  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 163 113  249 228 179  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  186 149 103  9 2 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+73 53 28  236 169 118  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+229 163 112  249 228 179  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  161 127 82  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+73 53 28  236 169 118  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  230 163 113
+230 172 123  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  161 127 82  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  20 10 4
+190 135 80  232 168 117  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  229 163 112
+236 217 150  255 233 191  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 234 192  227 181 122  58 38 16
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  20 10 4
+229 163 112  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  230 164 114  230 163 113  230 163 113  230 164 114  229 163 112
+236 217 150  255 234 192  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 232 190  249 228 179  73 53 28
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  115 82 46
+232 166 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 164 114  230 163 113  230 172 123  236 217 150  233 169 121  225 161 106
+236 217 150  255 233 191  255 230 188  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 234 192  164 128 83
+9 2 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  178 137 80
+236 169 118  230 164 114  231 165 115  231 165 115  231 165 115  230 164 114
+231 165 115  236 217 150  255 230 188  255 232 190  235 224 165  233 169 121
+230 172 123  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 230 188  255 234 192  227 181 122
+20 10 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  58 38 16  200 154 86
+232 168 117  231 165 115  231 165 115  231 165 115  231 165 115  229 163 112
+227 181 122  255 231 189  255 231 189  255 231 189  255 230 188  229 163 112
+219 162 97  249 228 179  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 231 189  249 228 179
+89 60 38  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  9 2 0  131 100 59  236 169 118
+230 164 114  231 165 115  231 165 115  231 165 115  230 164 114  232 168 117
+249 228 179  255 232 190  255 230 188  255 233 191  236 217 150  116 87 35
+166 125 65  255 230 188  255 232 190  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 231 189  255 231 189  255 230 188  255 234 192
+186 149 103  20 10 4  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  32 15 7  166 125 65  232 166 115
+231 165 115  231 165 115  231 165 115  231 165 115  229 163 112  227 181 122
+255 233 191  255 230 188  255 231 189  255 234 192  208 160 108  69 42 21
+225 161 106  255 230 188  255 231 189  255 231 189  255 231 189  255 231 189
+255 231 189  255 231 189  255 232 190  255 234 192  255 234 192  249 228 179
+208 160 108  58 38 16  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  20 10 4  98 62 37  32 15 7  190 135 80
+236 169 118  230 164 114  231 165 115  231 165 115  230 163 113  249 228 179
+255 231 189  255 231 189  255 230 188  255 234 192  227 181 122  155 112 61
+227 181 122  255 232 190  255 232 190  255 231 189  255 230 188  255 231 189
+255 231 189  255 230 188  249 228 179  227 181 122  227 181 122  230 172 123
+58 38 16  83 51 28  123 94 35  9 2 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  115 82 46  232 166 115  98 62 37  104 73 33
+229 163 112  231 165 115  231 165 115  231 165 115  230 163 113  249 228 179
+255 234 192  255 232 190  255 234 192  255 230 188  249 228 179  249 228 179
+235 224 165  249 228 179  255 230 188  236 217 150  230 172 123  236 217 150
+227 181 122  232 168 117  231 165 115  230 163 113  232 166 115  155 112 61
+58 38 16  219 162 97  225 161 106  9 2 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  40 20 7  190 135 80  236 169 118  178 137 80  22 13 4
+119 92 52  236 169 118  231 165 115  230 164 114  230 164 114  230 172 123
+236 217 150  249 228 179  236 217 150  227 181 122  230 172 123  230 163 113
+229 163 112  231 165 115  230 172 123  232 168 117  230 163 113  230 163 113
+230 163 113  230 164 114  230 164 114  232 166 115  155 112 61  22 13 4
+133 97 51  236 169 118  225 161 106  69 42 21  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+36 17 7  190 135 80  232 166 115  231 165 115  232 166 115  155 112 61
+58 38 16  219 162 97  232 168 117  230 164 114  231 165 115  229 163 112
+231 165 115  233 169 121  229 163 112  229 163 112  230 163 113  231 165 115
+231 165 115  230 164 114  230 163 113  230 164 114  231 165 115  231 165 115
+231 165 115  230 164 114  236 169 118  200 154 86  32 15 7  115 82 46
+230 163 113  231 165 115  232 166 115  200 154 86  58 38 16  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+166 125 65  236 169 118  231 165 115  230 164 114  232 166 115  230 164 114
+58 38 16  69 42 21  225 161 106  232 168 117  231 165 115  231 165 115
+230 164 114  230 163 113  230 164 114  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 164 114  232 166 115  225 161 106  104 73 33  89 60 38  232 166 115
+232 168 117  230 164 114  231 165 115  236 169 118  133 97 51  4 0 2
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  69 42 21
+225 161 106  232 168 117  230 164 114  231 165 115  230 164 114  232 166 115
+190 135 80  58 38 16  166 125 65  236 169 118  236 169 118  236 169 118
+236 169 118  236 169 118  236 169 118  236 169 118  232 168 117  236 169 118
+236 169 118  236 169 118  236 169 118  236 169 118  236 169 118  236 169 118
+236 169 118  225 161 106  115 82 46  22 13 4  168 130 70  236 169 118
+230 164 114  231 165 115  230 164 114  231 165 115  225 161 106  104 73 33
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  9 2 0  155 112 61
+236 169 118  230 164 114  231 165 115  231 165 115  231 165 115  231 165 115
+236 169 118  133 97 51  36 17 7  166 125 65  190 135 80  190 135 80
+190 135 80  190 135 80  190 135 80  190 135 80  225 161 106  190 135 80
+190 135 80  190 135 80  190 135 80  190 135 80  190 135 80  190 135 80
+190 135 80  104 73 33  32 15 7  166 125 65  232 166 115  230 164 114
+231 165 115  231 165 115  231 165 115  230 164 114  236 169 118  200 154 86
+22 13 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  69 42 21  225 161 106
+231 165 115  230 164 114  231 165 115  231 165 115  231 165 115  230 164 114
+232 166 115  225 161 106  98 62 37  46 29 24  132 119 61  132 119 61
+132 119 61  132 119 61  132 119 61  46 29 24  77 45 20  107 78 54
+132 119 61  132 119 61  132 119 61  132 119 61  132 119 61  132 119 61
+107 78 54  32 15 7  166 125 65  236 169 118  231 165 115  230 164 114
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  236 169 118
+115 82 46  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  81 56 37  236 169 118
+230 164 114  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 164 114  232 168 117  225 161 106  104 74 41  173 198 156  247 255 212
+247 255 212  247 255 212  237 255 203  199 209 176  172 197 155  232 255 199
+247 255 212  247 255 212  247 255 212  247 255 212  247 255 212  247 255 212
+119 92 52  166 125 65  232 166 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  230 164 114  236 169 118
+138 104 59  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  22 13 4  190 135 80  232 168 117
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  230 164 114  236 169 118  166 125 65  107 78 54  231 255 198
+231 255 198  231 255 198  231 255 198  235 255 203  237 255 203  232 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  237 255 203  173 198 156
+86 63 36  232 166 115  231 165 115  230 164 114  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  232 168 117
+219 162 97  69 42 21  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  22 13 4  229 163 112  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  232 166 115  83 51 28  173 198 156
+235 255 203  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  232 255 198  235 255 203  107 78 54
+166 125 65  236 169 118  230 164 114  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  230 164 114
+236 169 118  80 54 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  20 10 4  225 161 106  232 166 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  230 164 114  232 168 117  190 135 80  91 67 37
+235 255 203  231 255 198  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  237 255 203  172 197 155  98 62 37
+225 161 106  232 166 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+236 169 118  80 54 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  69 42 21  229 163 112  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  230 164 114  232 166 115  96 68 37
+172 197 155  237 255 203  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  231 255 198  119 92 52  155 112 61
+236 169 118  230 164 114  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+236 169 118  80 54 33  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  168 130 70  236 169 118  230 164 114
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  236 169 118  166 125 65
+134 113 63  247 255 212  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  231 255 198  230 232 196  73 53 28  232 166 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+236 169 118  73 53 28  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  236 169 118  190 135 80
+107 78 54  230 232 196  233 255 199  231 255 198  231 255 198  231 255 198
+231 255 198  231 255 198  235 255 203  172 197 155  104 74 41  236 169 118
+230 164 114  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+236 169 118  96 68 37  9 2 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  236 169 118
+104 73 33  172 197 155  237 255 203  231 255 198  231 255 198  231 255 198
+231 255 198  232 255 198  231 255 198  82 57 39  190 135 80  232 166 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+232 166 115  200 154 86  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  230 164 114  236 169 118
+190 135 80  82 57 39  231 255 198  231 255 198  231 255 198  231 255 198
+231 255 198  237 255 203  186 149 103  115 82 46  232 166 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  229 163 112  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  230 164 114
+236 169 118  104 74 41  186 149 103  237 255 203  231 255 198  231 255 198
+233 255 199  230 232 196  107 78 54  190 135 80  236 169 118  230 164 114
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+232 166 115  225 161 106  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  230 164 114
+232 166 115  200 154 86  82 57 39  231 255 198  233 255 199  232 255 199
+235 255 203  172 197 155  98 62 37  236 169 118  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  225 161 106  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  178 137 80  236 169 118  230 164 114
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  232 166 115  116 87 35  132 119 61  232 255 199  233 255 199
+210 220 166  73 53 28  190 135 80  233 170 120  230 164 114  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  225 161 106  22 13 4  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  83 51 28  230 163 113  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+230 164 114  236 169 118  155 112 61  73 53 28  132 119 61  134 113 63
+125 102 55  51 35 26  219 162 97  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  230 164 114
+236 169 118  133 97 51  9 2 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  20 10 4  225 161 106  232 166 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  232 168 117  219 162 97  69 42 21  4 0 2  9 2 0
+9 2 0  133 97 51  236 169 118  230 164 114  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  231 165 115
+231 165 115  231 165 115  231 165 115  231 165 115  231 165 115  232 168 117
+219 162 97  58 38 16  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/drivers/video/logo/logo_zen_clut224.ppm b/drivers/video/logo/logo_zen_clut224.ppm
new file mode 100644
index 000000000..f4c78972f
--- /dev/null
+++ b/drivers/video/logo/logo_zen_clut224.ppm
@@ -0,0 +1,2043 @@
+P3
+102 120
+255
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 1 1  0 2 3  0 2 3  1 5 6
+1 7 9  1 8 11  2 10 13  2 11 15  2 13 18  3 16 22
+3 18 24  3 21 29  4 23 31  5 25 34  5 28 38  6 30 41
+6 30 41  6 33 45  7 36 49  7 36 49  7 36 49  7 36 49
+7 36 49  7 38 51  7 38 51  7 36 49  7 36 49  7 36 49
+7 36 49  6 33 45  6 33 45  6 30 41  5 28 38  5 27 37
+5 25 34  4 22 30  4 20 27  3 16 22  3 15 20  2 13 18
+2 11 15  2 9 12  1 7 9  1 7 9  1 4 5  0 3 4
+0 2 3  0 1 1  0 1 1  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 1 1
+0 1 1  0 2 3  0 3 4  1 4 5  1 5 6  1 7 9
+2 9 12  2 11 15  2 12 16  3 16 22  3 18 24  3 21 29
+5 25 34  5 27 37  6 30 41  6 33 45  7 36 49  7 38 51
+6 40 54  8 41 55  8 42 57  8 44 59  8 45 61  8 45 61
+8 45 61  8 45 61  8 45 61  8 45 61  8 45 61  8 44 59
+8 42 57  8 42 57  8 41 55  7 38 51  7 36 49  6 33 45
+6 33 45  5 29 40  5 25 34  4 23 31  4 20 27  3 18 24
+3 15 20  2 13 18  1 10 14  1 8 11  1 7 9  1 5 6
+1 4 5  0 2 3  0 2 3  0 1 1  0 1 1  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  0 1 1
+0 2 3  0 3 4  1 4 5  1 5 6  1 7 9  2 9 12
+2 12 16  3 15 20  3 16 22  4 20 27  4 23 31  5 27 37
+6 30 41  6 33 45  7 36 49  7 39 53  8 42 57  8 45 61
+9 47 64  9 47 64  9 49 66  9 50 69  9 50 69  10 53 71
+10 53 71  10 53 71  10 53 71  10 53 71  10 53 71  9 50 69
+9 49 66  9 49 66  9 47 64  8 45 61  8 42 57  8 42 57
+7 39 53  7 36 49  6 33 45  5 28 38  5 25 34  4 23 31
+4 20 27  3 16 22  3 15 20  2 11 15  2 9 12  1 7 9
+1 5 6  0 3 4  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 3 4  1 4 5  1 5 6  1 7 9  2 9 12  2 12 16
+3 15 20  3 18 24  3 21 29  5 25 34  6 30 41  6 33 45
+7 38 51  8 41 55  8 44 59  9 47 64  9 49 66  10 53 71
+10 53 71  10 54 74  11 56 77  11 58 79  11 58 79  11 58 79
+11 58 79  11 58 79  11 58 79  11 58 79  11 58 79  11 58 79
+11 56 77  11 56 77  10 54 74  10 53 71  9 50 69  9 49 66
+8 45 61  8 42 57  6 40 54  7 36 49  6 33 45  5 29 40
+5 25 34  3 21 29  3 18 24  3 15 20  2 12 16  2 10 13
+1 7 9  1 5 6  0 3 4  0 3 4  0 2 3  0 2 3
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 1 1  0 1 1  0 2 3
+0 3 4  1 5 6  1 7 9  1 8 11  2 12 16  3 15 20
+4 19 26  4 23 31  5 27 37  6 30 41  7 36 49  6 40 54
+8 44 59  9 47 64  9 50 69  10 53 71  11 56 77  11 58 79
+11 58 79  11 60 82  12 61 82  12 62 85  12 64 87  12 64 87
+12 64 87  12 64 87  12 64 87  12 64 87  12 64 87  12 64 87
+12 62 85  11 60 82  11 60 82  11 58 79  11 56 77  10 54 74
+10 53 71  9 49 66  8 45 61  8 42 57  7 39 53  7 36 49
+6 30 41  5 27 37  4 22 30  4 19 26  3 15 20  2 12 16
+2 10 13  1 7 9  1 5 6  0 3 4  0 2 3  0 2 3
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 1 1  0 2 3  0 2 3
+1 4 5  1 5 6  1 8 11  2 10 13  2 13 18  3 18 24
+4 22 30  5 27 37  6 30 41  7 36 49  8 41 55  8 45 61
+9 49 66  10 53 71  10 54 74  11 58 79  12 61 82  12 61 82
+12 64 87  12 64 87  12 66 90  12 66 90  13 67 91  13 67 91
+13 67 91  13 67 92  13 67 92  13 67 91  13 67 91  12 66 90
+12 66 90  12 64 87  12 64 87  12 64 87  11 60 82  11 58 79
+11 58 79  10 54 74  9 50 69  9 47 64  8 44 59  8 41 55
+7 36 49  6 30 41  5 27 37  4 23 31  4 19 26  3 15 20
+2 12 16  2 9 12  1 7 9  1 4 5  0 3 4  0 2 3
+0 1 1  0 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 1 1  0 1 1  0 2 3  0 3 4
+1 5 6  1 7 9  2 9 12  2 12 16  3 16 22  3 21 29
+5 25 34  6 30 41  7 36 49  8 41 55  8 45 61  9 49 66
+10 54 74  11 58 79  11 60 82  12 62 85  12 64 87  12 66 90
+13 67 92  13 67 92  13 69 94  13 69 94  13 69 94  13 69 94
+13 69 94  13 71 96  13 71 96  13 69 94  13 69 94  13 69 94
+13 69 94  13 68 93  13 68 93  13 67 91  12 64 87  12 64 87
+12 62 85  11 58 79  11 56 77  10 53 71  9 50 69  8 45 61
+8 41 55  7 36 49  6 30 41  5 27 37  4 23 31  3 18 24
+3 15 20  2 11 15  1 8 11  1 7 9  1 5 6  0 3 4
+0 2 3  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 1 1  0 2 3  1 4 5
+1 7 9  2 9 12  2 12 16  3 15 20  4 19 26  4 24 33
+6 30 41  7 36 49  8 41 55  8 45 61  9 50 69  10 54 74
+11 58 79  12 62 85  12 64 87  13 67 91  13 68 93  13 69 94
+13 69 94  13 71 96  13 72 97  13 72 98  14 73 99  14 73 99
+14 73 99  13 72 98  13 72 98  14 73 99  13 72 98  13 72 98
+13 72 98  13 72 97  13 71 96  13 69 94  13 69 94  13 67 91
+12 66 90  12 64 87  12 62 85  11 58 79  10 54 74  9 50 69
+9 47 64  8 41 55  7 36 49  6 33 45  5 27 37  4 23 31
+3 18 24  2 13 18  1 10 14  1 8 11  1 7 9  1 4 5
+0 2 3  0 2 3  0 1 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 2 3  0 3 4  1 4 5
+1 7 9  1 10 14  3 15 20  3 16 22  4 22 30  5 28 38
+6 33 45  8 41 55  8 45 61  9 50 69  10 54 74  11 58 79
+12 62 85  12 66 90  13 67 92  13 69 94  13 71 96  13 72 98
+14 73 99  14 73 99  14 73 99  14 74 101  14 74 101  14 74 101
+14 74 101  14 74 101  14 74 101  14 74 101  14 74 101  14 74 101
+14 73 100  14 73 99  13 72 98  13 72 98  13 72 97  13 71 96
+13 69 94  13 67 92  12 66 90  12 62 85  11 60 82  11 56 77
+10 53 71  9 47 64  8 41 55  7 36 49  6 33 45  5 27 37
+3 21 29  3 16 22  2 13 18  2 10 13  1 7 9  1 5 6
+0 3 4  0 2 3  0 1 1  0 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 1 1  0 1 1  0 2 3  1 4 5  1 5 6
+1 8 11  2 12 16  3 15 20  4 19 26  5 25 34  6 30 41
+7 36 49  8 42 57  9 47 64  10 53 71  11 58 79  12 62 85
+12 66 90  13 68 93  13 69 94  13 72 98  14 73 99  14 73 100
+14 73 100  14 74 101  14 74 101  14 74 101  14 74 101  14 74 101
+14 74 101  14 74 101  14 74 101  14 74 101  14 74 101  14 74 101
+14 74 101  14 74 101  14 74 101  14 74 101  14 73 99  14 73 99
+13 72 97  13 69 94  13 68 93  12 66 90  12 64 87  11 60 82
+11 56 77  9 50 69  9 47 64  8 41 55  7 36 49  6 30 41
+5 25 34  4 19 26  3 15 20  2 12 16  2 9 12  1 7 9
+1 4 5  0 2 3  0 2 3  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 1 1  0 1 1  0 3 4  1 4 5  1 7 9
+2 9 12  2 13 18  3 16 22  3 21 29  5 27 37  6 33 45
+7 39 53  9 47 64  9 50 69  11 56 77  11 60 82  12 64 87
+13 67 92  13 69 94  13 72 97  13 72 98  14 74 101  14 74 101
+14 74 101  14 74 101  14 74 101  14 75 102  14 75 102  14 75 102
+14 75 102  14 75 102  14 75 102  14 75 102  14 75 102  14 75 102
+14 75 102  14 74 101  14 74 101  14 75 102  14 74 101  14 73 99
+13 72 98  13 72 98  13 69 94  13 68 93  12 66 90  12 64 87
+11 58 79  10 54 74  9 49 66  8 44 59  6 40 54  6 33 45
+5 27 37  4 22 30  3 16 22  2 13 18  2 10 13  1 7 9
+1 5 6  0 3 4  0 2 3  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 2 3  0 2 3  0 2 3  1 5 6  1 7 9
+2 10 13  3 15 20  4 19 26  4 23 31  5 29 40  7 36 49
+8 42 57  9 49 66  10 53 71  11 58 79  12 62 85  12 66 90
+13 69 94  13 72 97  13 72 98  14 74 101  14 75 102  14 74 101
+14 75 102  14 75 102  14 75 102  14 75 102  14 75 102  14 74 101
+14 73 99  14 75 102  14 75 102  14 74 101  14 74 101  14 73 100
+14 73 100  14 75 102  14 75 102  14 74 101  14 74 101  14 74 101
+14 74 101  14 73 99  13 72 98  13 69 94  13 69 94  12 66 90
+12 62 85  11 58 79  10 53 71  9 47 64  8 42 57  7 36 49
+6 30 41  5 25 34  4 19 26  3 15 20  2 12 16  1 8 11
+1 5 6  1 4 5  0 2 3  0 1 1  0 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 2 3  0 2 3  0 3 4  1 5 6  1 8 11
+2 11 15  3 16 22  4 20 27  5 25 34  6 30 41  7 38 51
+8 45 61  9 50 69  11 56 77  11 60 82  12 64 87  13 68 93
+13 71 96  13 72 98  14 73 99  14 74 101  14 74 101  14 75 102
+14 75 102  14 75 102  14 74 101  14 75 102  13 72 98  12 64 87
+9 49 66  8 41 55  6 30 41  6 30 41  7 38 51  9 47 64
+11 58 79  13 71 96  14 75 102  14 74 101  14 75 102  14 74 101
+14 74 101  14 74 101  14 73 99  13 72 98  13 69 94  13 68 93
+12 64 87  11 60 82  11 56 77  9 50 69  9 47 64  8 41 55
+6 33 45  5 27 37  4 22 30  3 18 24  2 13 18  2 10 13
+1 7 9  1 5 6  0 3 4  0 2 3  0 2 3  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  2 9 12
+2 12 16  3 16 22  3 21 29  5 27 37  6 33 45  7 39 53
+9 47 64  10 53 71  11 56 77  12 62 85  12 66 90  13 69 94
+13 72 97  14 73 99  14 74 101  14 74 101  14 75 102  14 75 102
+14 74 101  14 74 101  12 64 87  5 29 40  0 3 4  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  4 24 33  11 60 82  14 75 102  14 74 101
+14 75 102  14 74 101  14 74 101  13 72 98  13 72 97  13 69 94
+13 68 93  12 64 87  11 58 79  10 54 74  9 49 66  8 44 59
+7 36 49  6 30 41  4 24 33  4 19 26  3 15 20  2 11 15
+1 8 11  1 5 6  0 3 4  0 2 3  0 2 3  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 2 3  1 4 5  1 7 9  2 9 12
+2 12 16  3 16 22  4 22 30  5 27 37  6 33 45  8 41 55
+9 47 64  10 54 74  11 58 79  12 62 85  13 67 91  13 69 94
+13 72 98  14 73 99  14 74 101  14 74 101  14 75 102  14 75 102
+11 58 79  3 18 24  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 16 22  11 56 77
+14 74 101  14 74 101  14 74 101  14 74 99  13 72 98  13 71 96
+13 68 93  12 66 90  11 60 82  11 56 77  10 53 71  8 45 61
+8 41 55  6 33 45  5 25 34  3 21 29  3 16 22  2 12 16
+1 8 11  1 7 9  1 4 5  0 3 4  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  3 18 24  4 23 31  5 28 38  7 36 49  8 42 57
+9 49 66  10 54 74  11 60 82  12 64 87  13 67 91  13 69 94
+13 72 98  14 74 101  14 74 101  14 75 102  14 74 101  8 45 61
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  9 8 8  21 20 20  3 3 3  0 0 0  0 0 0
+6 33 45  14 74 101  14 74 101  14 74 101  14 73 99  13 72 98
+13 69 94  13 67 91  12 62 85  11 58 79  10 54 74  9 47 64
+8 41 55  6 33 45  5 28 38  4 23 31  4 19 26  2 13 18
+2 9 12  1 7 9  1 4 5  0 3 4  0 2 3  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  4 19 26  4 23 31  5 28 38  7 36 49  8 42 57
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+14 73 99  14 74 101  14 74 101  14 74 101  9 47 64  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  29 26 26  81 83 82  55 54 51  21 20 20  0 0 0
+0 0 0  5 25 34  14 73 99  14 74 101  14 73 99  13 72 98
+13 69 94  13 68 93  12 64 87  11 60 82  11 56 77  9 50 69
+8 44 59  7 36 49  6 30 41  5 25 34  4 19 26  3 15 20
+2 10 13  1 7 9  1 5 6  0 3 4  0 2 3  0 1 1
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  2 9 12
+3 15 20  4 19 26  4 24 33  5 29 40  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+14 73 99  14 74 101  14 74 101  12 62 85  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 37 35  113 114 111  113 114 111  69 68 60  28 32 37
+3 3 3  0 0 0  5 27 37  14 74 101  14 74 101  14 73 99
+13 72 97  13 69 94  12 66 90  12 62 85  11 58 79  10 53 71
+8 45 61  7 38 51  6 30 41  5 25 34  3 21 29  3 15 20
+2 11 15  1 8 11  1 5 6  1 4 5  0 2 3  0 2 3
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 3 4  1 5 6  1 7 9  2 10 13
+3 15 20  4 19 26  4 24 33  6 30 41  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+14 73 99  14 74 101  14 74 101  2 13 18  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  47 44 41  113 114 111  81 83 82  55 54 51  39 37 35
+9 8 8  0 0 0  0 0 0  9 49 66  14 73 99  14 73 99
+13 72 98  13 69 94  12 66 90  12 62 85  11 58 79  10 53 71
+9 47 64  8 41 55  6 33 45  5 27 37  4 22 30  3 16 22
+2 12 16  1 8 11  1 7 9  1 4 5  0 2 3  0 2 3
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  1 10 14
+2 13 18  4 19 26  5 25 34  6 30 41  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 69 94  13 72 97
+14 73 100  14 73 99  12 62 85  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  39 37 35  39 37 35  16 15 15  6 5 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  12 64 87  13 72 98
+13 72 98  13 69 94  13 67 91  12 64 87  11 58 79  10 54 74
+9 47 64  8 41 55  6 33 45  5 28 38  4 23 31  3 16 22
+2 12 16  2 9 12  1 7 9  1 4 5  0 2 3  0 2 3
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  2 10 13
+3 15 20  4 19 26  5 25 34  6 30 41  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 69 94  13 72 97
+14 73 99  14 73 99  8 41 55  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 1 1  7 7 7  0 0 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  4 20 27  14 73 99
+13 72 98  13 69 94  13 67 91  12 64 87  11 60 82  10 54 74
+9 49 66  8 42 57  7 36 49  5 29 40  4 23 31  3 18 24
+2 12 16  2 9 12  1 7 9  1 4 5  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  1 10 14
+3 15 20  4 19 26  5 25 34  6 30 41  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 69 94  13 72 97
+13 73 99  14 73 99  3 16 22  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  13 68 93
+13 72 97  13 69 94  13 68 93  12 64 87  11 60 82  10 54 74
+9 49 66  8 42 57  7 36 49  6 30 41  4 24 33  3 18 24
+2 12 16  2 9 12  1 7 9  1 4 5  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  1 10 14
+2 13 18  4 19 26  5 25 34  6 30 41  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 72 97
+14 73 99  12 66 90  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  8 42 57
+13 72 98  13 71 96  13 68 93  12 64 87  11 60 82  11 56 77
+9 50 69  8 42 57  7 36 49  6 30 41  4 24 33  4 19 26
+2 13 18  2 9 12  1 7 9  1 5 6  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  2 10 13
+2 13 18  4 19 26  4 24 33  5 29 40  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+13 72 97  11 58 79  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  2 11 15
+13 72 98  13 71 96  13 68 93  12 64 87  11 60 82  11 56 77
+9 50 69  8 44 59  7 36 49  6 30 41  5 25 34  4 19 26
+3 15 20  2 10 13  1 7 9  1 5 6  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  4 19 26  4 24 33  5 29 40  7 36 49  8 44 59
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+13 71 96  9 50 69  0 0 0  0 0 0  0 0 0  0 0 0
+12 12 11  29 26 26  29 26 26  2 2 2  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  16 15 15  55 54 51  69 68 60  21 20 20  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+13 68 93  13 69 94  13 68 93  12 64 87  12 62 85  11 56 77
+9 50 69  8 44 59  7 36 49  6 30 41  5 25 34  4 19 26
+2 13 18  2 10 13  1 7 9  1 5 6  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  4 19 26  4 23 31  5 28 38  7 36 49  8 42 57
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 72 97
+13 72 97  8 45 61  0 0 0  0 0 0  0 0 0  0 0 0
+9 8 8  39 37 35  81 83 82  69 68 60  7 7 7  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  3 3 3  16 15 15  69 68 60  105 98 84  29 26 26
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+11 60 82  13 69 94  13 69 94  12 64 87  12 61 82  11 58 79
+9 50 69  8 44 59  7 38 51  6 33 45  5 25 34  4 19 26
+2 13 18  2 10 13  1 7 9  1 5 6  0 3 4  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  3 18 24  4 23 31  5 28 38  7 36 49  8 42 57
+9 50 69  10 54 74  11 60 82  12 64 87  13 68 93  13 71 96
+13 72 97  8 42 57  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  1 4 5  47 44 41  39 37 35  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 2 3  28 32 37  55 54 51
+9 8 8  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+9 47 64  13 69 94  13 69 94  12 66 90  12 62 85  11 58 79
+10 53 71  8 45 61  7 38 51  6 33 45  5 27 37  4 20 27
+3 15 20  1 10 14  1 7 9  1 5 6  0 3 4  0 2 3
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 3 4  1 4 5  1 7 9  2 9 12
+2 13 18  3 18 24  4 23 31  5 28 38  7 36 49  8 42 57
+9 49 66  10 54 74  11 60 82  12 64 87  13 67 92  13 69 94
+13 72 97  8 41 55  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  1 4 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 1 1
+4 4 4  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+6 30 41  13 72 97  13 69 94  12 66 90  12 64 87  11 58 79
+10 53 71  9 47 64  7 39 53  6 33 45  5 27 37  3 21 29
+3 16 22  2 11 15  1 8 11  1 7 9  1 4 5  0 2 3
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  2 9 12
+2 12 16  3 16 22  4 23 31  5 28 38  7 36 49  8 42 57
+9 49 66  10 54 74  11 58 79  12 64 87  13 67 91  13 69 94
+13 72 97  8 42 57  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+3 16 22  13 72 97  13 69 94  12 66 90  12 64 87  11 58 79
+10 53 71  9 47 64  8 41 55  7 36 49  5 29 40  4 22 30
+3 16 22  2 12 16  1 8 11  1 7 9  1 5 6  0 2 3
+0 2 3  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 1 1  0 2 3  0 3 4  1 7 9  1 9 12
+2 12 16  3 16 22  4 22 30  5 27 37  6 33 45  8 42 57
+9 47 64  10 54 74  11 58 79  12 64 87  13 67 91  13 69 94
+13 72 97  8 42 57  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+3 16 22  13 72 97  13 69 94  13 68 93  12 64 87  11 60 82
+10 54 74  9 49 66  8 42 57  7 36 49  6 30 41  4 23 31
+4 19 26  2 13 18  2 10 13  1 7 9  1 5 6  0 3 4
+0 2 3  0 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 2 3  0 2 3  1 4 5  1 7 9  1 8 11
+2 11 15  3 16 22  3 21 29  5 27 37  6 33 45  8 41 55
+9 47 64  10 54 74  11 58 79  12 62 85  13 67 91  13 69 94
+13 71 96  8 44 59  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  4 4 4  4 4 4  0 0 0  0 0 0  9 8 8
+21 20 20  6 5 5  0 0 0  0 0 0  29 26 26  7 7 7
+0 0 0  0 0 0  12 12 11  1 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+3 21 29  13 72 97  13 71 96  13 68 93  12 66 90  11 60 82
+11 56 77  9 49 66  8 44 59  7 38 51  6 30 41  5 25 34
+4 19 26  2 13 18  1 10 14  1 8 11  1 5 6  0 3 4
+0 2 3  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 2 3  0 2 3  1 4 5  1 5 6  1 8 11
+2 12 16  3 16 22  3 21 29  5 25 34  6 33 45  8 41 55
+9 47 64  10 54 74  11 58 79  12 62 85  13 67 91  13 69 94
+13 69 94  8 45 61  0 0 0  9 8 8  148 147 146  39 37 35
+0 0 0  29 26 26  81 83 82  81 83 82  0 0 0  11 13 22
+69 68 60  28 32 37  7 7 7  148 147 146  201 196 193  39 37 35
+0 0 0  0 0 1  69 68 60  105 98 84  47 44 41  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+6 30 41  14 73 99  13 71 96  13 68 93  12 66 90  12 61 82
+11 56 77  9 50 69  8 45 61  6 40 54  6 33 45  5 27 37
+4 20 27  3 16 22  2 11 15  1 8 11  1 7 9  1 4 5
+0 2 3  0 1 1  0 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 2 3  0 2 3  1 4 5  1 5 6  1 8 11
+2 12 16  3 16 22  3 21 29  5 25 34  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  13 67 91  13 69 94
+13 69 94  9 49 66  0 0 0  81 83 82  251 251 251  171 169 168
+0 2 3  0 0 0  113 114 111  241 241 241  47 44 41  35 25 1
+35 25 1  35 25 1  148 147 146  248 247 247  250 252 255  113 114 111
+7 7 7  3 3 3  69 68 60  224 226 233  224 223 222  55 54 51
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+7 36 49  13 72 98  13 72 97  13 69 94  13 67 91  12 64 87
+11 58 79  10 53 71  9 47 64  8 42 57  7 36 49  5 28 38
+4 23 31  3 16 22  2 12 16  1 10 14  1 7 9  1 5 6
+0 3 4  0 2 3  0 1 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 2 3  0 2 3  1 4 5  1 5 6  1 8 11
+2 11 15  3 16 22  4 20 27  5 25 34  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  12 66 90  13 69 94
+13 69 94  11 58 79  0 0 0  81 83 82  255 255 255  255 255 255
+191 183 178  171 169 168  201 196 193  235 193 64  234 181 0  234 181 0
+234 181 0  222 168 1  225 176 47  245 217 114  247 240 225  255 255 255
+213 210 208  208 204 201  241 241 241  255 255 255  255 255 255  191 183 178
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+5 27 37  14 73 99  13 72 98  13 69 94  13 68 93  12 64 87
+11 60 82  10 54 74  9 49 66  8 44 59  7 38 51  6 30 41
+5 25 34  4 19 26  3 15 20  2 11 15  1 8 11  1 5 6
+1 4 5  0 2 3  0 1 1  0 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 2 3  0 2 3  0 3 4  1 5 6  1 8 11
+2 11 15  3 16 22  4 20 27  5 25 34  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  12 66 90  13 69 94
+13 71 96  11 60 82  0 0 1  47 44 41  250 249 249  255 255 255
+252 254 255  225 202 147  209 152 1  194 135 4  249 197 0  255 210 1
+255 206 13  209 152 1  209 152 1  249 197 0  254 207 32  225 202 147
+247 240 225  250 252 255  252 254 255  255 255 255  252 254 255  191 183 178
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+3 15 20  14 74 101  13 72 98  13 71 96  13 69 94  12 66 90
+12 61 82  11 58 79  9 50 69  9 47 64  8 41 55  6 33 45
+5 27 37  3 21 29  3 16 22  2 13 18  2 9 12  1 7 9
+1 5 6  0 3 4  0 2 3  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 2 3  0 2 3  0 3 4  1 5 6  1 8 11
+2 11 15  3 16 22  3 21 29  5 25 34  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  12 66 90  13 69 94
+13 72 98  12 62 85  0 0 0  0 2 3  201 196 193  254 251 250
+176 158 88  209 152 1  249 197 0  243 191 0  255 205 1  254 207 32
+255 209 45  255 205 1  249 197 0  255 205 1  255 205 1  249 197 0
+243 191 0  235 193 64  225 202 147  245 245 244  242 244 252  81 83 82
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 5 6  13 72 98  14 73 99  13 72 97  13 69 94  13 67 92
+12 62 85  11 58 79  10 53 71  9 49 66  8 42 57  7 36 49
+6 30 41  4 23 31  4 19 26  3 15 20  1 10 14  1 8 11
+1 5 6  0 3 4  0 2 3  0 1 1  0 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 1 1  0 2 3  1 4 5  1 5 6  1 8 11
+2 12 16  3 16 22  3 21 29  5 25 34  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  12 66 90  13 69 94
+13 73 99  12 66 90  1 1 1  0 0 0  55 54 51  199 145 62
+222 168 1  255 205 1  255 205 1  255 205 1  255 205 1  255 209 45
+255 208 38  255 205 1  255 205 1  255 205 1  255 205 1  255 206 13
+255 206 22  255 205 1  249 198 10  255 208 38  167 119 72  1 5 6
+1 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  13 69 94  13 72 98  13 72 98  13 71 96  13 68 93
+12 64 87  11 60 82  11 56 77  9 50 69  8 45 61  7 39 53
+6 33 45  5 27 37  3 21 29  3 16 22  2 13 18  2 10 13
+1 7 9  1 4 5  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 2 3  0 3 4  0 3 4  1 7 9  1 8 11
+2 12 16  3 16 22  4 22 30  5 27 37  6 33 45  8 41 55
+9 47 64  10 53 71  11 58 79  12 62 85  13 67 91  13 69 94
+13 72 97  13 72 98  1 8 11  11 5 1  154 101 6  222 168 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 206 13  255 209 45
+255 206 13  255 205 1  255 205 1  255 205 1  255 208 38  255 209 52
+255 209 49  255 206 22  255 205 1  255 210 1  243 191 0  109 74 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  11 58 79  13 72 98  13 72 98  13 72 97  13 69 94
+13 67 91  12 62 85  11 58 79  10 54 74  9 49 66  8 42 57
+7 36 49  6 30 41  4 24 33  4 20 27  3 15 20  2 11 15
+1 8 11  1 7 9  1 4 5  0 2 3  0 2 3  0 1 1
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+0 1 1  0 2 3  0 2 3  1 5 6  1 7 9  2 9 12
+2 13 18  4 19 26  4 23 31  5 29 40  7 36 49  8 42 57
+9 49 66  10 54 74  11 58 79  12 64 87  13 67 91  13 69 94
+13 72 97  14 73 100  0 8 19  72 47 3  209 152 1  255 205 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 206 22  255 206 22
+255 205 1  255 205 1  255 205 1  254 207 32  255 209 55  255 209 49
+255 206 22  255 205 1  255 205 1  255 210 1  241 197 0  154 101 6
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 36 49  14 73 100  14 73 99  14 73 99  13 71 96
+13 68 93  12 64 87  11 60 82  11 56 77  10 53 71  9 47 64
+7 39 53  6 33 45  5 28 38  4 23 31  3 18 24  3 15 20
+2 10 13  1 7 9  1 7 9  0 3 4  0 2 3  0 1 1
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  0 1 1
+0 1 1  0 3 4  1 4 5  1 5 6  1 8 11  2 11 15
+3 15 20  4 20 27  5 25 34  6 30 41  7 36 49  8 44 59
+9 50 69  10 54 74  11 60 82  12 64 87  13 68 93  13 71 96
+13 72 98  13 72 98  0 14 27  109 74 3  234 181 0  255 205 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 206 13  255 205 1
+255 205 1  255 205 1  254 207 32  255 209 55  255 208 38  255 206 13
+255 205 1  255 210 1  243 191 0  222 168 1  234 181 0  109 74 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  2 9 12  14 73 100  14 74 101  14 73 99  13 72 98
+13 69 94  13 67 91  12 62 85  11 58 79  10 54 74  9 49 66
+8 42 57  7 36 49  6 30 41  5 25 34  3 21 29  3 16 22
+2 12 16  2 9 12  1 7 9  1 5 6  0 3 4  0 3 4
+0 1 1  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 2 3  0 2 3  1 5 6  1 7 9  2 9 12  2 12 16
+3 16 22  4 22 30  5 27 37  6 33 45  7 38 51  8 45 61
+9 50 69  11 56 77  11 60 82  12 64 87  13 68 93  13 71 96
+13 72 98  14 73 99  4 24 33  72 47 3  209 152 1  255 205 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 206 22  255 209 49  254 207 32  255 210 1  249 197 0
+243 191 0  222 168 1  209 152 1  234 181 0  209 152 1  22 11 1
+0 0 0  0 0 0  12 12 11  47 44 41  12 12 11  0 0 0
+0 0 0  0 0 0  13 69 94  14 73 99  14 74 101  13 72 98
+13 71 96  13 67 91  12 64 87  12 61 82  11 58 79  10 53 71
+8 45 61  8 41 55  6 33 45  5 29 40  4 24 33  4 19 26
+3 15 20  2 11 15  1 8 11  1 7 9  1 5 6  0 3 4
+0 2 3  0 1 1  0 1 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 1 1  0 1 1  0 2 3
+0 3 4  1 4 5  1 7 9  1 8 11  2 11 15  3 15 20
+4 19 26  5 25 34  6 30 41  7 36 49  8 41 55  9 47 64
+10 53 71  11 58 79  12 64 87  12 66 90  13 69 94  13 72 97
+13 72 98  14 73 99  6 33 45  5 1 0  154 101 6  209 152 1
+243 191 0  255 205 1  255 210 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 206 13  255 206 13  243 191 0  222 168 1  222 168 1
+234 181 0  241 197 0  255 205 1  234 181 0  139 105 59  16 15 15
+0 0 0  0 0 0  16 15 15  113 114 111  105 98 84  55 54 51
+9 8 8  0 0 0  9 49 66  14 73 99  14 74 101  13 72 98
+13 72 97  13 69 94  13 67 91  12 64 87  11 60 82  10 54 74
+9 49 66  8 44 59  7 38 51  6 33 45  5 27 37  4 22 30
+3 18 24  3 15 20  2 11 15  1 8 11  1 7 9  1 4 5
+0 3 4  0 2 3  0 1 1  0 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 1 1  0 1 1  0 2 3  0 3 4
+1 4 5  1 7 9  1 8 11  1 10 14  2 13 18  3 18 24
+4 22 30  5 28 38  6 33 45  7 38 51  8 45 61  9 50 69
+11 56 77  11 60 82  12 64 87  13 67 91  13 69 94  13 72 98
+14 73 99  13 74 101  6 40 54  4 0 5  105 98 84  174 129 27
+183 122 1  222 168 1  234 181 0  234 181 0  243 191 0  243 191 0
+234 181 0  222 168 1  209 152 1  222 168 1  241 197 0  255 205 1
+255 205 1  222 168 1  202 153 21  176 158 88  171 169 168  81 83 82
+0 0 0  0 0 0  0 2 3  69 68 60  113 114 111  113 114 111
+69 68 60  2 2 2  1 10 14  14 74 101  14 74 101  14 74 99
+14 73 99  13 71 96  13 69 94  12 66 90  12 64 87  11 58 79
+10 53 71  9 47 64  8 42 57  7 38 51  6 33 45  5 25 34
+3 21 29  3 16 22  2 13 18  1 10 14  1 8 11  1 5 6
+0 3 4  0 2 3  0 2 3  0 1 1  0 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 1 1  0 1 1  0 2 3  0 3 4  1 4 5
+1 5 6  1 8 11  1 10 14  2 13 18  3 16 22  3 21 29
+5 25 34  6 33 45  7 38 51  8 41 55  9 47 64  10 53 71
+11 58 79  12 64 87  12 66 90  13 69 94  13 72 97  13 72 98
+14 73 99  14 74 101  7 38 51  0 0 0  148 147 146  148 147 146
+154 101 6  194 135 4  209 152 1  209 152 1  209 152 1  209 152 1
+209 152 1  234 181 0  241 197 0  255 210 1  249 197 0  222 168 1
+202 153 21  176 158 88  208 204 201  224 226 233  213 210 208  148 147 146
+9 8 8  0 0 0  0 0 0  17 25 27  81 83 82  113 114 111
+113 114 111  12 12 11  0 0 0  11 56 77  14 73 100  14 74 101
+14 73 99  13 72 98  13 69 94  13 68 93  12 66 90  12 62 85
+11 58 79  10 53 71  9 47 64  8 42 57  7 36 49  6 30 41
+5 25 34  4 20 27  3 16 22  2 13 18  1 10 14  1 7 9
+1 5 6  0 3 4  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 2 3  0 3 4  0 3 4  1 5 6
+1 7 9  1 10 14  2 13 18  3 16 22  4 20 27  5 25 34
+6 30 41  7 36 49  8 41 55  9 47 64  9 50 69  11 56 77
+11 60 82  12 64 87  13 67 91  13 69 94  13 72 98  14 73 99
+14 74 101  14 74 101  3 16 22  5 1 0  148 147 146  191 183 178
+148 147 146  139 105 59  194 135 4  243 191 0  255 210 1  255 210 1
+255 210 1  255 205 1  243 191 0  222 168 1  194 135 4  176 158 88
+201 196 193  224 226 233  242 244 252  242 244 252  248 247 247  224 223 222
+69 68 60  0 0 0  0 0 0  0 0 0  12 12 11  69 68 60
+47 44 41  0 0 1  0 0 0  0 3 4  13 72 97  14 74 101
+14 74 101  14 73 99  13 72 97  13 69 94  13 67 92  12 64 87
+11 60 82  11 56 77  9 50 69  9 47 64  8 41 55  7 36 49
+6 30 41  5 25 34  4 20 27  3 16 22  2 13 18  2 10 13
+1 7 9  1 5 6  1 4 5  0 2 3  0 2 3  0 1 1
+0 1 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+0 1 1  0 2 3  0 2 3  0 3 4  1 5 6  1 7 9
+2 9 12  2 12 16  3 15 20  4 19 26  4 23 31  5 28 38
+6 33 45  6 40 54  8 44 59  9 49 66  10 54 74  11 58 79
+12 64 87  13 67 91  13 69 94  13 71 96  14 73 99  14 73 99
+14 73 100  12 62 85  0 0 0  2 2 2  148 147 146  213 210 208
+171 169 168  148 147 146  139 105 59  194 135 4  209 152 1  222 168 1
+222 168 1  209 152 1  194 135 4  174 129 27  171 169 168  213 210 208
+224 226 233  250 252 255  255 255 255  255 255 255  254 254 254  252 254 255
+148 147 146  0 0 0  0 0 0  0 0 0  0 0 1  0 0 1
+0 0 1  0 0 0  0 0 0  0 0 0  9 47 64  14 73 99
+14 74 101  14 73 99  13 72 98  13 72 97  13 69 94  12 66 90
+12 64 87  11 58 79  10 54 74  9 50 69  8 45 61  7 39 53
+6 33 45  5 28 38  4 24 33  4 19 26  3 15 20  2 12 16
+1 9 12  1 7 9  1 4 5  0 3 4  0 2 3  0 1 1
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  0 1 1
+0 1 1  0 2 3  1 4 5  1 5 6  1 7 9  2 9 12
+2 12 16  3 15 20  4 19 26  4 23 31  5 27 37  6 33 45
+7 38 51  8 44 59  9 49 66  10 53 71  11 58 79  12 62 85
+12 66 90  13 69 94  13 71 96  13 72 98  14 73 99  14 74 101
+14 73 100  4 24 33  0 0 0  0 0 1  171 169 168  242 244 252
+208 204 201  171 169 168  148 147 146  139 105 59  139 105 59  139 105 59
+167 119 72  167 119 72  148 147 146  191 183 178  218 217 217  241 241 241
+254 254 254  255 255 255  255 255 255  255 255 255  255 255 255  254 254 254
+237 237 236  55 54 51  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  1 5 6  14 72 97
+14 73 100  14 74 101  14 73 99  13 72 98  13 71 96  13 68 93
+12 66 90  12 62 85  11 58 79  10 54 74  9 49 66  8 44 59
+7 38 51  6 33 45  5 28 38  4 23 31  4 19 26  3 15 20
+2 11 15  2 9 12  1 7 9  1 5 6  0 3 4  0 2 3
+0 1 1  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 2 3  0 3 4  1 5 6  1 7 9  1 8 11  2 12 16
+3 15 20  4 19 26  4 23 31  5 28 38  6 33 45  7 38 51
+8 44 59  9 49 66  10 54 74  11 58 79  12 62 85  12 64 87
+13 68 93  13 69 94  13 72 98  14 73 99  14 74 101  14 73 99
+11 60 82  0 0 0  0 0 0  16 15 15  218 217 217  252 254 255
+241 241 241  208 204 201  171 169 168  148 147 146  148 147 146  148 147 146
+148 147 146  171 169 168  201 196 193  218 217 217  245 245 244  254 254 254
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+252 254 255  171 169 168  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  6 33 45
+14 74 101  14 74 101  14 74 101  13 72 98  13 72 98  13 69 94
+13 68 93  12 64 87  12 62 85  11 58 79  10 53 71  9 47 64
+8 42 57  7 38 51  6 33 45  5 27 37  4 23 31  3 18 24
+3 15 20  2 12 16  2 9 12  1 7 9  1 4 5  0 3 4
+0 2 3  0 1 1  0 1 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 1 1  0 1 1  0 2 3
+0 3 4  1 5 6  1 7 9  2 9 12  2 11 15  3 15 20
+3 18 24  4 23 31  5 28 38  6 33 45  7 38 51  8 42 57
+9 47 64  10 53 71  11 58 79  11 60 82  12 64 87  13 67 92
+13 69 94  13 72 97  13 72 98  14 74 101  14 74 101  14 72 97
+1 7 9  0 0 0  0 0 0  113 114 111  255 255 255  255 255 255
+255 255 255  237 237 236  201 196 193  171 169 168  171 169 168  171 169 168
+201 196 193  208 204 201  224 223 222  250 249 249  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  248 247 247  69 68 60  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+11 58 79  14 73 99  14 74 101  14 74 101  14 73 99  13 72 98
+13 69 94  13 67 92  12 64 87  12 61 82  11 58 79  10 53 71
+9 47 64  8 42 57  7 38 51  6 33 45  5 27 37  4 22 30
+3 18 24  3 15 20  2 12 16  1 8 11  1 7 9  1 4 5
+0 3 4  0 2 3  0 1 1  0 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 1 1  0 2 3  0 2 3  0 3 4
+1 4 5  1 7 9  1 8 11  2 12 16  3 15 20  3 18 24
+4 23 31  5 27 37  6 33 45  7 38 51  8 42 57  9 47 64
+10 53 71  11 58 79  11 60 82  12 64 87  13 67 91  13 69 94
+13 72 97  13 72 98  14 74 101  14 74 101  14 73 100  4 23 31
+0 0 0  0 0 0  55 54 51  241 241 241  255 255 255  255 255 255
+255 255 255  252 254 255  232 232 232  208 204 201  208 204 201  208 204 201
+218 217 217  232 232 232  250 251 253  252 254 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  252 254 255  148 147 146  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+1 7 9  13 72 98  14 74 101  14 75 102  14 74 101  13 72 98
+13 72 97  13 69 94  13 67 91  12 64 87  11 60 82  11 58 79
+10 53 71  9 47 64  8 42 57  7 38 51  6 33 45  5 27 37
+3 21 29  3 18 24  3 15 20  2 11 15  1 8 11  1 7 9
+1 4 5  0 3 4  0 2 3  0 1 1  0 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 2 3  0 3 4  0 3 4
+1 5 6  1 8 11  1 10 14  3 15 20  3 16 22  3 21 29
+5 25 34  6 30 41  7 36 49  8 42 57  9 47 64  10 53 71
+11 56 77  11 60 82  12 64 87  12 66 90  13 69 94  13 72 97
+13 72 98  14 73 99  14 74 101  14 73 99  10 53 71  0 0 0
+0 0 0  6 5 5  171 169 168  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  250 251 253  224 226 233  218 217 217  224 226 233
+241 241 241  250 251 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  218 217 217  12 12 11  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 39 53  14 73 100  14 74 101  14 74 101  14 73 99
+13 72 98  13 71 96  13 69 94  13 67 91  12 64 87  11 60 82
+11 56 77  9 50 69  9 47 64  8 41 55  7 36 49  6 30 41
+5 25 34  3 21 29  3 18 24  2 13 18  2 10 13  1 8 11
+1 5 6  0 3 4  0 2 3  0 2 3  0 1 1  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 1 1  0 1 1  0 3 4  0 3 4  1 5 6
+1 7 9  2 10 13  2 13 18  3 16 22  4 20 27  5 25 34
+6 30 41  7 36 49  8 41 55  9 47 64  9 50 69  11 56 77
+11 60 82  12 64 87  13 67 91  13 69 94  13 71 96  13 72 98
+14 73 99  14 74 101  14 74 101  13 69 94  0 3 4  0 0 0
+0 0 0  113 114 111  251 251 251  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  251 251 251  250 249 249  251 251 251
+252 254 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  250 252 255  69 68 60  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  12 64 87  14 73 99  14 74 101  14 74 101
+14 73 99  13 72 98  13 71 96  13 69 94  12 66 90  12 64 87
+11 58 79  10 54 74  9 50 69  8 45 61  8 41 55  6 33 45
+5 29 40  5 25 34  3 21 29  3 16 22  2 12 16  2 9 12
+1 7 9  1 5 6  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 2 3  0 3 4  1 5 6  1 7 9
+2 9 12  2 12 16  3 16 22  4 20 27  4 24 33  5 29 40
+7 36 49  8 41 55  9 47 64  9 50 69  10 54 74  11 60 82
+12 64 87  13 67 91  13 69 94  13 71 96  13 72 98  14 74 99
+14 74 101  14 73 100  14 73 99  3 15 20  0 0 0  0 0 0
+39 37 35  224 223 222  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  148 147 146  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  2 9 12  13 69 94  14 74 101  14 74 101
+14 74 101  14 73 99  13 72 98  13 71 96  13 69 94  12 66 90
+12 64 87  11 58 79  10 54 74  9 50 69  8 45 61  7 39 53
+6 33 45  5 29 40  5 25 34  4 19 26  3 15 20  2 12 16
+2 9 12  1 7 9  1 5 6  0 3 4  0 2 3  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  1 9 12
+2 11 15  3 15 20  4 19 26  5 25 34  5 28 38  6 33 45
+6 40 54  8 45 61  9 50 69  10 54 74  11 58 79  12 64 87
+12 66 90  13 69 94  13 71 96  13 72 98  14 73 99  14 74 101
+14 74 101  14 74 101  4 23 31  0 0 0  0 0 0  0 0 0
+113 114 111  252 254 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  224 223 222  21 20 20  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  1 10 14  14 74 101  14 75 102
+14 74 101  14 74 101  14 73 99  13 72 98  13 71 96  13 69 94
+12 66 90  12 62 85  11 60 82  10 54 74  9 50 69  8 45 61
+7 39 53  7 36 49  5 29 40  4 23 31  4 19 26  3 15 20
+2 12 16  2 9 12  1 7 9  1 4 5  0 3 4  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  0 1 1
+0 1 1  0 2 3  0 3 4  1 7 9  1 7 9  2 10 13
+3 15 20  3 18 24  4 23 31  5 28 38  6 33 45  7 38 51
+8 44 59  9 49 66  10 54 74  11 58 79  12 62 85  12 66 90
+13 69 94  13 71 96  13 72 98  13 72 98  14 74 101  14 74 101
+14 73 100  7 39 53  0 0 0  0 0 0  0 0 0  16 15 15
+148 147 146  253 253 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  253 253 253
+248 247 247  248 247 247  254 254 254  251 251 251  148 147 146  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  6 33 45  14 75 102
+14 75 102  14 74 101  14 74 101  14 73 99  13 72 98  13 69 94
+13 68 93  12 66 90  12 64 87  11 58 79  10 54 74  9 49 66
+8 44 59  7 38 51  6 33 45  5 28 38  4 23 31  3 18 24
+3 15 20  2 11 15  1 8 11  1 7 9  1 4 5  0 2 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 1 1  0 1 1
+0 2 3  0 3 4  1 5 6  1 7 9  2 9 12  2 12 16
+3 16 22  4 20 27  5 25 34  6 30 41  7 36 49  8 41 55
+9 47 64  10 53 71  11 58 79  11 60 82  12 64 87  13 68 93
+13 69 94  13 72 97  14 73 99  14 74 101  14 74 101  14 74 101
+13 69 94  0 0 0  0 0 0  0 0 0  0 0 0  39 37 35
+171 169 168  232 232 232  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  248 247 247  253 253 253  254 254 254  250 249 249
+251 251 251  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  253 253 253  245 245 244  232 232 232  224 223 222  218 217 217
+208 204 201  208 204 201  224 223 222  255 255 255  218 217 217  7 7 7
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  12 66 90
+14 74 101  14 74 101  14 74 101  14 74 101  14 73 99  13 72 97
+13 69 94  13 67 91  12 64 87  12 62 85  11 58 79  10 53 71
+9 47 64  8 42 57  7 36 49  6 33 45  5 25 34  3 21 29
+3 16 22  2 13 18  2 10 13  1 7 9  1 5 6  0 3 4
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 2 3  0 3 4  1 5 6  1 8 11  2 11 15  3 15 20
+3 18 24  4 23 31  5 29 40  7 36 49  8 41 55  8 45 61
+9 50 69  11 56 77  11 60 82  12 64 87  13 67 91  13 69 94
+13 72 97  14 73 99  14 74 101  14 74 101  14 74 101  14 74 101
+5 28 38  0 0 0  0 0 0  0 0 0  0 0 0  69 68 60
+148 147 146  191 183 178  224 223 222  251 251 251  255 255 255  255 255 255
+255 255 255  255 255 255  237 237 236  208 204 201  208 204 201  224 223 222
+245 245 244  255 255 255  255 255 255  255 255 255  255 255 255  254 254 254
+245 245 244  218 217 217  201 196 193  171 169 168  171 169 168  148 147 146
+148 147 146  171 169 168  171 169 168  224 223 222  255 255 255  81 83 82
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  3 18 24
+14 74 101  14 75 102  14 74 101  14 74 101  14 74 101  14 73 99
+13 72 97  13 69 94  13 67 92  12 64 87  11 60 82  11 56 77
+9 50 69  9 47 64  8 42 57  7 36 49  5 29 40  4 24 33
+4 19 26  3 16 22  2 12 16  2 9 12  1 7 9  1 5 6
+0 0 0  0 0 0  0 0 0  0 1 1  0 2 3  0 2 3
+0 3 4  1 5 6  1 7 9  2 10 13  2 13 18  3 16 22
+4 22 30  5 27 37  6 33 45  7 39 53  8 44 59  9 50 69
+10 54 74  11 60 82  12 64 87  13 67 91  13 69 94  13 72 97
+14 73 100  14 74 101  14 74 101  14 74 101  14 74 101  12 62 85
+0 0 0  0 0 0  0 0 0  0 0 0  6 5 5  113 114 111
+148 147 146  171 169 168  213 210 208  241 241 241  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  232 232 232  229 228 227  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+253 253 253  241 241 241  224 223 222  201 196 193  191 183 178  171 169 168
+171 169 168  148 147 146  148 147 146  171 169 168  232 232 232  191 183 178
+1 4 5  0 0 0  21 20 20  29 26 26  3 3 3  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+9 49 66  14 74 101  14 75 102  14 74 101  14 74 101  14 74 99
+13 72 98  13 72 97  13 69 94  13 67 91  12 64 87  11 60 82
+10 54 74  9 50 69  9 47 64  8 41 55  6 33 45  5 28 38
+4 23 31  4 19 26  3 15 20  2 11 15  1 8 11  1 7 9
+0 0 0  0 0 0  0 0 1  0 1 1  0 1 1  0 3 4
+1 4 5  1 7 9  2 9 12  2 12 16  3 16 22  4 20 27
+5 25 34  6 30 41  7 36 49  8 42 57  9 47 64  10 53 71
+11 58 79  12 62 85  12 66 90  13 68 93  13 72 97  14 73 99
+14 73 99  14 74 101  14 74 101  14 75 102  14 74 101  4 19 26
+0 0 0  0 0 0  0 0 0  0 0 0  39 37 35  148 147 146
+213 210 208  241 241 241  250 249 249  254 254 254  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  252 254 255  252 254 255  242 244 252  232 232 232
+218 217 217  191 183 178  171 169 168  148 147 146  171 169 168  237 237 236
+81 83 82  0 0 0  9 8 8  39 37 35  55 54 51  16 15 15
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 2 3  14 72 97  14 75 102  14 75 102  14 74 101  14 74 101
+14 73 99  13 72 98  13 72 97  13 69 94  12 66 90  12 64 87
+11 58 79  10 54 74  9 50 69  8 44 59  7 38 51  6 33 45
+5 27 37  4 22 30  3 18 24  2 13 18  2 10 13  1 7 9
+0 0 0  0 0 0  0 1 1  0 2 3  0 2 3  0 3 4
+1 5 6  1 7 9  2 10 13  3 15 20  3 18 24  4 22 30
+5 28 38  7 36 49  8 41 55  9 47 64  9 50 69  11 56 77
+11 60 82  12 66 90  13 68 93  13 69 94  13 72 98  14 73 99
+14 74 101  14 74 101  14 74 101  14 74 101  12 64 87  0 0 0
+1 1 1  9 8 8  0 0 0  0 2 3  105 98 84  232 232 232
+255 255 255  252 254 255  252 254 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+252 254 255  242 244 252  224 223 222  171 169 168  148 147 146  201 196 193
+191 183 178  3 3 3  0 0 0  0 0 0  17 25 27  55 54 51
+16 15 15  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  7 36 49  14 74 101  14 75 102  14 74 101  14 74 101
+14 74 101  14 73 99  13 72 98  13 71 96  13 69 94  12 66 90
+12 62 85  11 58 79  10 53 71  9 47 64  8 42 57  7 36 49
+6 30 41  5 25 34  4 20 27  3 16 22  2 12 16  2 9 12
+0 0 0  0 0 0  0 1 1  0 2 3  0 2 3  1 4 5
+1 7 9  1 8 11  2 12 16  3 16 22  4 20 27  5 25 34
+6 30 41  7 36 49  8 44 59  9 49 66  10 54 74  11 58 79
+12 62 85  13 67 91  13 69 94  13 72 97  13 72 98  14 74 101
+14 74 101  14 74 101  14 75 102  14 74 101  6 33 45  0 0 0
+21 20 20  21 20 20  0 0 0  7 7 7  191 183 178  252 254 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  232 232 232  191 183 178  148 147 146
+218 217 217  55 54 51  0 0 1  0 0 0  0 0 0  21 20 20
+39 37 35  4 4 4  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  14 72 97  14 74 101  14 75 102  14 74 101
+14 74 101  14 74 101  13 72 98  13 72 98  13 69 94  13 67 91
+12 64 87  11 60 82  11 56 77  9 50 69  8 45 61  6 40 54
+6 33 45  5 28 38  4 23 31  4 19 26  3 15 20  2 10 13
+0 0 0  0 0 1  0 1 1  0 2 3  0 3 4  1 5 6
+1 7 9  2 10 13  2 13 18  3 18 24  4 23 31  5 28 38
+6 33 45  8 41 55  9 47 64  10 53 71  11 58 79  11 60 82
+12 66 90  13 68 93  13 71 96  13 72 98  14 73 99  14 75 102
+14 74 101  14 75 102  14 75 102  14 73 99  1 4 5  6 5 5
+47 44 41  12 12 11  0 0 0  81 83 82  242 244 252  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  252 254 255  241 241 241  201 196 193
+208 204 201  148 147 146  0 0 0  0 0 0  3 3 3  3 3 3
+12 12 11  12 12 11  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  6 33 45  14 74 101  14 75 102  14 75 102
+14 74 101  14 75 102  14 74 101  13 72 98  13 72 97  13 69 94
+12 66 90  12 64 87  11 60 82  10 54 74  9 49 66  8 42 57
+7 36 49  6 33 45  5 27 37  4 20 27  3 16 22  2 12 16
+0 0 0  0 1 1  0 2 3  0 2 3  1 4 5  1 5 6
+1 8 11  2 12 16  3 16 22  3 21 29  5 25 34  6 30 41
+7 36 49  8 44 59  9 49 66  10 54 74  11 60 82  12 64 87
+13 67 91  13 69 94  13 72 98  14 73 100  14 74 101  14 74 101
+14 75 102  14 75 102  14 74 101  11 56 77  0 0 0  29 26 26
+28 32 37  0 0 0  9 8 8  208 204 201  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  252 254 255  242 244 252
+218 217 217  218 217 217  29 26 26  0 0 0  16 15 15  39 37 35
+16 15 15  12 12 11  16 15 15  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  12 64 87  14 74 101  14 75 102
+14 75 102  14 74 101  14 74 101  14 74 99  13 72 98  13 71 96
+13 68 93  12 66 90  12 62 85  11 58 79  10 53 71  9 47 64
+8 41 55  7 36 49  6 30 41  4 24 33  4 19 26  3 15 20
+0 0 1  0 1 1  0 2 3  0 3 4  1 5 6  1 7 9
+2 10 13  2 13 18  4 19 26  4 23 31  5 29 40  6 33 45
+8 41 55  9 47 64  10 53 71  11 58 79  12 62 85  12 66 90
+13 69 94  13 72 97  14 73 99  14 74 99  14 75 102  14 74 101
+14 75 102  14 76 103  14 73 100  3 18 24  4 4 4  47 44 41
+12 12 11  0 0 0  113 114 111  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  253 253 253  254 254 254  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+254 254 254  250 249 249  113 114 111  0 0 0  17 25 27  47 44 41
+29 26 26  16 15 15  47 44 41  6 5 5  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  4 24 33  14 75 102  14 75 102
+14 75 102  14 74 101  14 74 101  14 74 101  14 73 99  13 72 98
+13 69 94  13 68 93  12 64 87  11 60 82  11 56 77  9 50 69
+8 45 61  6 40 54  6 33 45  5 27 37  3 21 29  3 16 22
+0 1 1  0 2 3  0 3 4  1 4 5  1 7 9  1 8 11
+2 12 16  3 16 22  4 20 27  5 25 34  6 33 45  7 36 49
+8 44 59  9 50 69  10 54 74  11 60 82  12 64 87  13 67 91
+13 69 94  13 72 98  14 73 99  14 74 101  14 74 101  14 75 102
+14 75 102  14 74 101  11 58 79  0 0 0  29 26 26  39 37 35
+0 0 0  47 44 41  241 241 241  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  253 253 253  248 247 247  250 249 249  254 254 254  255 255 255
+255 255 255  253 253 253  251 251 251  253 253 253  255 255 255  255 255 255
+255 255 255  255 255 255  171 169 168  0 0 0  12 12 11  39 37 35
+12 12 11  0 0 1  55 54 51  39 37 35  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  13 69 94  14 75 102
+14 75 102  14 75 102  14 74 101  14 74 101  14 74 99  13 72 98
+13 71 96  13 69 94  13 67 91  12 62 85  11 58 79  10 53 71
+9 47 64  8 42 57  7 36 49  6 30 41  4 24 33  3 18 24
+0 1 1  0 2 3  0 3 4  1 5 6  1 7 9  2 10 13
+2 13 18  3 18 24  4 23 31  5 28 38  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 62 85  12 66 90  13 69 94
+13 71 96  13 72 98  14 74 101  14 74 101  14 74 101  14 75 102
+14 75 102  14 75 102  5 27 37  0 0 0  39 37 35  21 20 20
+0 0 1  148 147 146  251 251 251  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  245 245 244  237 237 236  241 241 241  251 251 251  255 255 255
+255 255 255  250 249 249  248 247 247  251 251 251  254 254 254  255 255 255
+255 255 255  255 255 255  218 217 217  7 7 7  0 0 0  6 5 5
+0 0 0  0 0 0  39 37 35  69 68 60  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  9 50 69  14 74 101
+14 74 101  14 75 102  14 75 102  14 74 101  14 74 101  14 73 99
+13 72 98  13 69 94  13 68 93  12 64 87  11 60 82  10 54 74
+9 49 66  8 45 61  7 39 53  6 33 45  5 27 37  3 21 29
+0 1 1  0 3 4  1 4 5  1 7 9  2 9 12  2 12 16
+3 16 22  3 21 29  5 25 34  6 33 45  7 38 51  8 42 57
+9 49 66  10 54 74  11 60 82  12 64 87  13 67 91  13 69 94
+13 72 98  14 73 99  14 74 101  14 74 101  14 75 102  14 75 102
+14 75 102  13 72 98  0 1 1  12 12 11  39 37 35  4 4 4
+16 15 15  218 217 217  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+253 253 253  237 237 236  213 210 208  224 223 222  245 245 244  251 251 251
+253 253 253  245 245 244  241 241 241  248 247 247  253 253 253  255 255 255
+255 255 255  252 254 255  250 249 249  47 44 41  0 0 0  0 0 0
+0 0 0  0 0 0  16 15 15  69 68 60  9 8 8  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  3 15 20  14 75 102
+14 75 102  14 75 102  14 75 102  14 74 101  14 74 101  14 74 101
+13 72 98  13 72 97  13 69 94  12 66 90  12 64 87  11 58 79
+10 53 71  9 49 66  8 42 57  7 36 49  5 29 40  4 23 31
+0 2 3  1 4 5  1 7 9  1 8 11  2 11 15  3 15 20
+3 18 24  4 23 31  5 29 40  7 36 49  8 41 55  9 47 64
+10 53 71  11 58 79  12 62 85  12 66 90  13 69 94  13 71 96
+13 72 98  14 74 99  14 74 101  14 74 101  14 75 102  14 75 102
+14 74 101  10 53 71  0 0 0  21 20 20  21 20 20  0 0 0
+81 83 82  254 254 254  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+254 251 250  224 223 222  201 196 193  208 204 201  229 228 227  245 245 244
+250 249 249  241 241 241  237 237 236  241 241 241  248 247 247  254 254 254
+255 255 255  255 255 255  255 255 255  81 83 82  0 0 0  0 0 0
+0 0 0  0 0 0  1 4 5  69 68 60  16 15 15  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  13 68 93
+14 75 102  14 75 102  14 75 102  14 75 102  14 74 101  14 74 101
+14 73 100  13 72 98  13 71 96  13 68 93  12 64 87  11 60 82
+11 56 77  9 50 69  8 45 61  7 39 53  6 33 45  5 25 34
+0 3 4  1 4 5  1 7 9  2 10 13  2 13 18  3 16 22
+3 21 29  5 27 37  6 33 45  7 39 53  8 45 61  9 50 69
+10 54 74  11 60 82  12 64 87  13 67 91  13 69 94  13 72 98
+14 73 99  14 75 102  14 74 101  14 75 102  14 75 102  14 75 102
+14 74 101  2 11 15  0 0 0  12 12 11  6 5 5  0 0 0
+148 147 146  253 253 253  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  248 247 247  245 245 244  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+250 249 249  218 217 217  191 183 178  191 183 178  213 210 208  237 237 236
+248 247 247  237 237 236  232 232 232  237 237 236  245 245 244  253 253 253
+255 255 255  255 255 255  255 255 255  113 114 111  2 2 2  0 0 0
+0 0 0  0 0 0  3 3 3  47 44 41  12 12 11  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  9 49 66
+14 74 101  14 75 102  14 75 102  14 75 102  14 74 101  14 74 101
+14 74 101  13 72 98  13 72 97  13 69 94  12 66 90  12 62 85
+11 58 79  10 53 71  9 47 64  8 42 57  7 36 49  5 27 37
+1 4 5  1 5 6  1 8 11  2 11 15  3 16 22  4 20 27
+4 24 33  6 30 41  7 36 49  8 42 57  9 47 64  10 53 71
+11 58 79  12 62 85  12 66 90  13 69 94  13 71 96  13 72 98
+14 74 99  14 74 101  14 74 101  14 75 102  14 75 102  14 74 101
+11 58 79  0 0 0  9 8 8  12 12 11  0 0 0  2 2 2
+201 196 193  252 254 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  229 228 227  224 223 222  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+248 247 247  213 210 208  191 183 178  191 183 178  201 196 193  229 228 227
+245 245 244  237 237 236  229 228 227  232 232 232  241 241 241  251 251 251
+255 255 255  255 255 255  255 255 255  171 169 168  0 0 0  0 0 0
+0 0 0  0 0 0  1 1 1  29 26 26  12 12 11  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  4 20 27
+14 75 102  14 75 102  14 75 102  14 76 103  14 75 102  14 74 101
+14 74 101  14 73 99  13 72 98  13 69 94  13 68 93  12 64 87
+11 60 82  11 56 77  9 50 69  8 44 59  7 36 49  6 30 41
+1 5 6  1 7 9  2 9 12  2 13 18  3 18 24  3 21 29
+5 27 37  6 33 45  7 39 53  8 45 61  9 50 69  10 54 74
+11 60 82  12 64 87  13 67 91  13 69 94  13 72 98  14 73 99
+14 74 101  14 75 102  14 76 103  14 75 102  14 76 103  14 75 102
+4 19 26  0 0 0  21 20 20  29 26 26  0 0 0  29 26 26
+232 232 232  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  252 254 255  250 252 255
+250 252 255  213 210 208  218 217 217  255 255 255  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  254 254 254
+248 247 247  208 204 201  171 169 168  191 183 178  201 196 193  224 223 222
+241 241 241  237 237 236  229 228 227  232 232 232  241 241 241  250 249 249
+255 255 255  255 255 255  252 254 255  208 204 201  4 4 4  0 0 0
+0 0 0  0 0 0  0 0 0  9 8 8  6 5 5  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  1 5 6
+14 75 102  14 76 103  14 76 103  14 76 103  14 76 103  14 75 102
+14 74 101  14 74 101  13 72 98  13 72 97  13 69 94  12 66 90
+12 62 85  11 56 77  10 53 71  8 45 61  7 39 53  6 30 41
+1 7 9  1 8 11  2 11 15  3 15 20  4 20 27  4 24 33
+6 30 41  7 36 49  8 42 57  9 47 64  10 53 71  11 58 79
+12 62 85  12 66 90  13 69 94  13 72 97  14 73 100  14 74 101
+14 75 102  14 76 103  14 76 103  14 76 103  14 75 102  12 62 85
+0 0 0  0 0 0  39 37 35  47 44 41  0 2 3  69 68 60
+253 253 253  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  245 245 244  213 210 208  176 158 88  176 158 88  199 145 62
+176 158 88  167 119 72  171 169 168  237 237 236  255 255 255  255 255 255
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+245 245 244  208 204 201  191 183 178  191 183 178  191 183 178  218 217 217
+237 237 236  232 232 232  232 232 232  232 232 232  241 241 241  248 247 247
+254 254 254  255 255 255  255 255 255  224 223 222  12 12 11  0 0 0
+0 0 0  0 0 0  7 7 7  12 12 11  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+13 69 94  14 76 103  14 76 103  14 76 103  14 76 103  14 76 103
+14 75 102  14 74 101  14 73 99  13 72 98  13 69 94  12 66 90
+12 64 87  11 58 79  10 53 71  9 47 64  8 41 55  6 33 45
+1 7 9  2 10 13  2 13 18  3 18 24  4 23 31  5 28 38
+6 33 45  7 39 53  9 47 64  10 53 71  11 56 77  11 60 82
+12 64 87  13 68 93  13 72 97  14 73 100  14 74 101  14 75 102
+14 76 103  14 78 106  14 78 106  14 78 106  14 76 103  3 16 22
+0 0 0  1 1 1  55 54 51  47 44 41  0 0 1  113 114 111
+254 254 254  255 255 255  255 255 255  255 255 255  255 255 255  250 252 255
+225 202 147  174 129 27  183 122 1  183 122 1  183 122 1  183 122 1
+183 122 1  183 122 1  183 122 1  174 129 27  225 202 147  248 247 247
+255 255 255  255 255 255  255 255 255  255 255 255  255 255 255  253 253 253
+237 237 236  208 204 201  191 183 178  191 183 178  191 183 178  208 204 201
+229 228 227  237 237 236  232 232 232  232 232 232  237 237 236  245 245 244
+253 253 253  255 255 255  255 255 255  229 228 227  16 15 15  0 0 0
+0 0 0  0 0 0  29 26 26  28 32 37  1 1 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+12 62 85  14 76 103  14 78 106  14 78 106  14 78 106  14 76 103
+14 75 102  14 74 101  14 74 101  13 72 98  13 69 94  13 68 93
+12 64 87  11 60 82  10 54 74  9 49 66  8 42 57  7 36 49
+1 8 11  2 12 16  3 15 20  4 20 27  5 25 34  6 30 41
+7 36 49  8 42 57  9 49 66  10 54 74  11 58 79  12 64 87
+13 67 91  13 69 94  13 72 98  14 74 101  14 76 103  14 78 106
+14 78 106  14 78 106  15 82 111  14 78 106  11 58 79  0 0 0
+0 0 0  2 2 2  55 54 51  55 54 51  0 0 0  148 147 146
+253 253 253  255 255 255  255 255 255  255 255 255  245 245 244  176 158 88
+183 122 1  183 122 1  183 122 1  183 122 1  194 135 4  194 135 4
+194 135 4  183 122 1  183 122 1  183 122 1  183 122 1  176 158 88
+229 228 227  255 255 255  255 255 255  255 255 255  255 255 255  251 251 251
+232 232 232  208 204 201  201 196 193  191 183 178  191 183 178  201 196 193
+224 223 222  232 232 232  232 232 232  232 232 232  232 232 232  241 241 241
+251 251 251  255 255 255  255 255 255  224 223 222  21 20 20  0 0 0
+0 0 0  0 0 0  47 44 41  47 44 41  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+11 60 82  14 78 106  15 82 111  14 78 106  14 78 106  14 78 106
+14 75 102  14 75 102  14 74 101  13 72 98  13 71 96  13 68 93
+12 64 87  11 60 82  11 56 77  9 50 69  8 44 59  7 36 49
+2 9 12  2 13 18  3 16 22  4 22 30  5 28 38  6 33 45
+7 39 53  8 45 61  10 53 71  11 56 77  12 62 85  12 66 90
+13 69 94  13 72 97  14 74 101  14 76 103  14 78 106  15 82 111
+15 82 111  15 82 111  15 86 117  15 82 111  4 20 27  0 0 0
+0 0 0  0 0 1  39 37 35  55 54 51  0 0 0  148 147 146
+255 255 255  255 255 255  255 255 255  255 255 255  176 158 88  183 122 1
+183 122 1  183 122 1  183 122 1  194 135 4  202 153 21  194 135 4
+202 153 21  194 135 4  194 135 4  183 122 1  183 122 1  183 122 1
+199 145 62  232 232 232  255 255 255  255 255 255  254 254 254  245 245 244
+224 223 222  208 204 201  208 204 201  191 183 178  191 183 178  201 196 193
+213 210 208  232 232 232  232 232 232  232 232 232  232 232 232  241 241 241
+248 247 247  254 254 254  255 255 255  224 223 222  29 26 26  0 0 0
+0 0 0  1 5 6  69 68 60  29 26 26  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+10 54 74  15 82 111  15 82 111  15 82 111  14 78 106  14 78 106
+14 76 103  14 75 102  14 74 101  14 73 99  13 71 96  13 68 93
+12 64 87  12 61 82  11 56 77  9 50 69  8 45 61  7 36 49
+2 10 13  2 13 18  4 19 26  4 24 33  6 30 41  7 36 49
+8 41 55  9 47 64  10 54 74  11 58 79  12 64 87  12 66 90
+13 69 94  13 72 98  14 75 102  14 78 106  15 82 111  15 82 111
+15 86 117  15 86 117  15 86 117  15 86 117  1 7 9  0 0 0
+0 0 0  0 0 1  21 20 20  69 68 60  2 2 2  148 147 146
+255 255 255  255 255 255  254 254 254  208 204 201  194 135 4  194 135 4
+183 122 1  183 122 1  194 135 4  194 135 4  202 153 21  202 153 21
+202 153 21  202 153 21  194 135 4  183 122 1  183 122 1  183 122 1
+183 122 1  176 158 88  255 255 255  255 255 255  254 254 254  237 237 236
+213 210 208  213 210 208  208 204 201  201 196 193  191 183 178  201 196 193
+208 204 201  229 228 227  232 232 232  232 232 232  232 232 232  241 241 241
+248 247 247  253 253 253  255 255 255  224 223 222  29 26 26  0 0 0
+0 0 0  16 15 15  81 83 82  7 7 7  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+11 56 77  15 86 117  15 86 117  15 82 111  15 82 111  14 78 106
+14 76 103  14 75 102  14 74 101  13 72 98  13 71 96  13 68 93
+12 64 87  12 61 82  11 56 77  9 50 69  8 45 61  7 36 49
+1 10 14  3 15 20  4 19 26  5 25 34  6 30 41  7 36 49
+8 44 59  9 50 69  10 54 74  11 60 82  12 64 87  13 68 93
+13 72 97  14 75 102  14 78 106  15 82 111  15 82 111  15 86 117
+16 90 122  16 90 122  16 90 122  16 90 122  2 13 18  0 0 0
+0 0 0  0 0 0  0 2 3  47 44 41  29 26 26  148 147 146
+252 254 255  255 255 255  251 251 251  199 145 62  194 135 4  183 122 1
+183 122 1  194 135 4  194 135 4  202 153 21  202 153 21  202 153 21
+202 153 21  202 153 21  202 153 21  194 135 4  183 122 1  183 122 1
+194 135 4  194 135 4  229 228 227  255 255 255  250 249 249  229 228 227
+208 204 201  213 210 208  213 210 208  201 196 193  191 183 178  201 196 193
+201 196 193  224 223 222  232 232 232  232 232 232  232 232 232  241 241 241
+248 247 247  253 253 253  255 255 255  224 223 222  29 26 26  0 0 0
+0 0 0  55 54 51  55 54 51  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+12 66 90  16 90 122  15 86 117  15 86 117  15 82 111  14 78 106
+14 78 106  14 75 102  14 74 101  13 72 98  13 71 96  13 68 93
+12 64 87  12 62 85  11 56 77  9 50 69  8 45 61  7 36 49
+2 11 15  3 15 20  4 20 27  5 25 34  6 33 45  7 38 51
+8 45 61  9 50 69  11 58 79  12 61 82  12 66 90  13 69 94
+14 73 99  14 76 103  15 82 111  15 86 117  15 86 117  16 90 122
+16 90 122  15 94 128  15 94 128  15 94 128  1 37 56  5 1 0
+72 47 3  109 74 3  35 25 1  2 11 15  47 44 41  148 147 146
+255 255 255  255 255 255  224 223 222  194 135 4  194 135 4  183 122 1
+183 122 1  183 122 1  194 135 4  202 153 21  202 153 21  225 176 47
+202 153 21  202 153 21  202 153 21  194 135 4  183 122 1  183 122 1
+194 135 4  183 122 1  225 202 147  254 251 250  245 245 244  213 210 208
+201 196 193  213 210 208  213 210 208  201 196 193  201 196 193  201 196 193
+201 196 193  224 223 222  232 232 232  232 232 232  237 237 236  245 245 244
+248 247 247  253 253 253  255 255 255  213 210 208  12 12 11  0 0 0
+4 4 4  69 68 60  16 15 15  0 0 0  0 0 0  3 1 3
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 1 1
+15 82 111  16 90 122  16 90 122  15 86 117  15 82 111  15 82 111
+14 78 106  14 75 102  14 74 101  14 73 99  13 71 96  13 68 93
+12 64 87  12 61 82  11 56 77  9 50 69  8 45 61  7 36 49
+2 11 15  3 15 20  3 21 29  5 27 37  6 33 45  7 39 53
+8 45 61  10 53 71  11 58 79  12 64 87  13 67 92  13 71 96
+14 74 101  14 78 106  15 82 111  15 86 117  16 90 122  16 90 122
+15 94 128  15 94 128  15 94 128  15 94 128  57 82 86  209 152 1
+255 210 1  255 210 1  241 197 0  109 74 3  1 7 9  113 114 111
+255 255 255  253 253 253  225 202 147  183 122 1  194 135 4  183 122 1
+183 122 1  194 135 4  194 135 4  202 153 21  202 153 21  202 153 21
+202 153 21  202 153 21  194 135 4  194 135 4  183 122 1  183 122 1
+183 122 1  194 135 4  199 145 62  250 251 253  237 237 236  201 196 193
+191 183 178  213 210 208  213 210 208  201 196 193  201 196 193  201 196 193
+208 204 201  224 223 222  232 232 232  232 232 232  237 237 236  248 247 247
+250 252 255  255 255 255  250 252 255  113 114 111  0 0 0  0 0 0
+28 32 37  69 68 60  6 5 5  5 1 0  83 13 44  121 31 83
+121 31 83  29 26 26  0 0 0  0 0 0  0 0 0  4 22 30
+16 90 122  15 94 128  16 90 122  16 90 122  15 86 117  15 82 111
+14 78 106  14 76 103  14 74 101  14 73 99  13 71 96  13 68 93
+12 64 87  11 60 82  11 56 77  9 50 69  8 44 59  7 36 49
+2 11 15  3 15 20  3 21 29  5 27 37  6 33 45  7 39 53
+8 45 61  10 53 71  11 58 79  12 64 87  13 69 94  14 73 99
+14 76 103  15 82 111  15 86 117  16 90 122  16 90 122  15 94 128
+15 94 128  15 94 128  15 94 128  39 101 116  225 176 47  255 210 1
+255 206 13  255 206 13  255 210 1  255 210 1  154 101 6  11 13 22
+171 169 168  255 255 255  176 158 88  194 135 4  194 135 4  194 135 4
+183 122 1  183 122 1  194 135 4  194 135 4  202 153 21  202 153 21
+202 153 21  202 153 21  194 135 4  183 122 1  183 122 1  183 122 1
+194 135 4  194 135 4  199 145 62  237 237 236  229 228 227  191 183 178
+191 183 178  213 210 208  208 204 201  201 196 193  201 196 193  201 196 193
+208 204 201  224 223 222  232 232 232  232 232 232  245 245 244  247 240 225
+225 202 147  245 217 114  245 217 114  154 101 6  35 25 1  28 32 37
+55 54 51  29 26 26  9 8 8  47 44 41  161 10 114  161 10 114
+161 10 114  121 31 83  12 12 11  29 26 26  3 1 3  6 57 82
+15 94 128  15 94 128  15 94 128  16 90 122  15 86 117  15 82 111
+14 78 106  14 75 102  14 74 101  13 72 98  13 69 94  13 67 91
+12 64 87  11 60 82  10 54 74  9 49 66  8 42 57  7 36 49
+2 11 15  3 16 22  3 21 29  5 27 37  6 33 45  7 39 53
+9 47 64  10 53 71  11 58 79  12 64 87  13 69 94  14 73 99
+14 78 106  15 82 111  15 86 117  16 90 122  15 94 128  15 94 128
+15 94 128  15 94 128  15 94 128  176 158 88  241 197 0  255 206 13
+255 208 38  255 213 45  254 207 32  255 210 1  255 210 1  35 25 1
+2 9 12  171 169 168  176 158 88  174 129 27  194 135 4  194 135 4
+183 122 1  183 122 1  183 122 1  194 135 4  194 135 4  194 135 4
+194 135 4  194 135 4  194 135 4  183 122 1  183 122 1  194 135 4
+194 135 4  194 135 4  174 129 27  224 226 233  224 223 222  191 183 178
+191 183 178  208 204 201  208 204 201  208 204 201  201 196 193  201 196 193
+208 204 201  224 223 222  232 232 232  237 237 236  242 244 252  245 217 114
+255 206 13  255 213 23  255 213 23  255 234 21  154 101 6  20 41 44
+4 0 5  0 2 3  7 7 7  81 83 82  105 98 84  121 31 83
+121 31 83  55 54 51  55 54 51  39 37 35  22 11 1  61 109 99
+15 94 128  15 94 128  15 94 128  16 90 122  15 86 117  15 82 111
+14 78 106  14 76 103  14 74 101  13 72 98  13 69 94  12 66 90
+12 62 85  11 58 79  10 53 71  9 47 64  8 41 55  6 33 45
+2 11 15  3 16 22  3 21 29  5 28 38  6 33 45  6 40 54
+9 47 64  10 54 74  11 60 82  12 66 90  13 72 97  14 76 103
+15 82 111  15 86 117  16 90 122  15 94 128  15 94 128  15 94 128
+15 94 128  15 94 128  61 109 99  255 205 1  255 206 13  255 213 45
+255 209 49  255 209 52  255 209 52  254 207 32  255 210 1  234 181 0
+22 11 1  1 8 11  105 82 42  174 129 27  194 135 4  194 135 4
+194 135 4  183 122 1  183 122 1  183 122 1  194 135 4  194 135 4
+194 135 4  183 122 1  183 122 1  183 122 1  183 122 1  194 135 4
+194 135 4  183 122 1  167 119 72  218 217 217  213 210 208  191 183 178
+191 183 178  208 204 201  201 196 193  208 204 201  208 204 201  201 196 193
+208 204 201  224 223 222  232 232 232  237 237 236  241 241 241  235 193 64
+254 207 32  255 215 52  248 200 52  235 193 64  109 74 3  1 7 9
+0 0 0  0 0 0  0 0 0  9 8 8  29 26 26  17 25 27
+16 15 15  9 8 8  0 2 3  4 0 5  83 13 44  178 87 56
+61 109 99  15 94 128  15 94 128  16 90 122  15 86 117  15 82 111
+14 78 106  14 76 103  14 73 99  13 72 97  13 68 93  12 64 87
+11 60 82  11 56 77  9 50 69  8 45 61  7 39 53  6 33 45
+2 12 16  3 16 22  4 23 31  5 29 40  7 36 49  8 42 57
+9 49 66  11 56 77  12 62 85  13 69 94  14 74 101  14 78 106
+15 82 111  15 86 117  16 90 122  15 94 128  15 94 128  15 94 128
+15 94 128  39 101 116  234 181 0  255 210 1  254 207 32  255 209 52
+255 209 51  255 209 51  255 209 52  255 209 49  255 206 13  255 210 1
+194 135 4  0 0 0  0 0 0  35 25 1  154 101 6  194 135 4
+194 135 4  194 135 4  183 122 1  183 122 1  183 122 1  183 122 1
+183 122 1  183 122 1  183 122 1  194 135 4  183 122 1  183 122 1
+183 122 1  174 129 27  176 158 88  218 217 217  208 204 201  191 183 178
+191 183 178  201 196 193  201 196 193  208 204 201  213 210 208  201 196 193
+208 204 201  224 223 222  232 232 232  241 241 241  237 237 236  225 176 47
+255 213 23  225 176 47  172 59 77  161 10 114  83 13 44  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  22 11 1  121 31 83  161 10 114  172 59 77
+176 158 88  15 94 128  15 94 128  16 90 122  15 86 117  15 82 111
+14 78 106  14 76 103  14 73 100  13 71 96  13 67 92  12 64 87
+11 58 79  10 54 74  9 49 66  8 42 57  7 36 49  5 29 40
+2 13 18  4 19 26  5 25 34  6 33 45  7 39 53  8 45 61
+10 53 71  11 60 82  12 66 90  13 72 98  14 78 106  15 82 111
+15 86 117  16 90 122  16 90 122  15 94 128  15 94 128  15 94 128
+39 101 116  174 129 27  255 205 1  255 205 1  255 208 38  255 209 52
+255 209 51  255 209 51  255 209 51  255 209 52  255 208 38  255 210 1
+255 210 1  109 74 3  0 0 0  7 5 1  16 15 15  154 101 6
+183 122 1  183 122 1  183 122 1  183 122 1  183 122 1  183 122 1
+183 122 1  183 122 1  183 122 1  183 122 1  183 122 1  194 135 4
+174 129 27  174 129 27  176 158 88  218 217 217  201 196 193  191 183 178
+191 183 178  191 183 178  191 183 178  208 204 201  213 210 208  208 204 201
+208 204 201  224 223 222  232 232 232  245 245 244  232 232 232  202 153 21
+255 205 1  178 87 56  150 20 84  161 10 114  83 13 44  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  72 47 3  183 122 1  154 101 6  172 59 77  199 145 62
+176 158 88  15 94 128  15 94 128  15 94 128  16 90 122  15 86 117
+15 82 111  14 78 106  14 73 99  13 69 94  12 66 90  12 62 85
+11 56 77  10 53 71  8 45 61  6 40 54  6 33 45  5 27 37
+3 15 20  3 21 29  5 28 38  7 36 49  8 44 59  9 49 66
+8 56 78  12 62 85  13 69 94  13 74 101  14 78 106  15 82 111
+15 86 117  16 90 122  15 94 128  16 90 122  39 101 116  81 83 82
+174 129 27  243 191 0  255 205 1  255 206 22  255 209 45  255 209 52
+255 209 51  255 209 51  255 209 51  255 209 52  255 209 51  255 206 22
+255 210 1  241 197 0  72 47 3  0 1 1  1 1 1  2 2 2
+109 74 3  194 135 4  194 135 4  183 122 1  183 122 1  183 122 1
+183 122 1  183 122 1  183 122 1  194 135 4  194 135 4  174 129 27
+174 129 27  174 129 27  191 183 178  213 210 208  201 196 193  191 183 178
+191 183 178  191 183 178  191 183 178  213 210 208  213 210 208  208 204 201
+213 210 208  218 217 217  232 232 232  224 226 233  191 183 178  202 153 21
+255 205 1  172 59 77  161 10 114  172 59 77  154 101 6  72 47 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+72 47 3  183 122 1  194 135 4  222 168 1  209 152 1  178 87 56
+167 119 72  15 94 128  15 94 128  15 94 128  16 90 122  15 86 117
+15 82 111  14 78 106  14 74 101  13 69 94  12 64 87  11 60 82
+10 54 74  9 50 69  8 44 59  7 36 49  6 30 41  4 24 33
+3 16 22  4 23 31  6 30 41  7 39 53  9 47 64  10 53 71
+8 56 78  9 63 87  13 71 96  12 75 102  15 82 111  15 82 111
+15 86 117  61 109 99  105 98 84  194 135 4  194 135 4  209 152 1
+234 181 0  255 205 1  255 205 1  255 206 22  255 209 52  255 209 52
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 52  255 208 38
+255 205 1  255 210 1  222 168 1  7 5 1  0 0 0  0 0 0
+0 0 0  72 47 3  183 122 1  209 152 1  194 135 4  194 135 4
+194 135 4  194 135 4  194 135 4  174 129 27  174 129 27  174 129 27
+194 135 4  176 158 88  201 196 193  208 204 201  201 196 193  201 196 193
+191 183 178  171 169 168  191 183 178  213 210 208  213 210 208  208 204 201
+213 210 208  224 223 222  224 223 222  201 196 193  148 147 146  194 135 4
+255 205 1  178 87 56  178 87 56  183 122 1  183 122 1  154 101 6
+72 47 3  22 11 1  7 5 1  7 5 1  22 11 1  109 74 3
+194 135 4  209 152 1  222 168 1  255 205 1  178 87 56  161 10 114
+81 83 82  15 94 128  15 94 128  15 94 128  16 90 122  16 90 122
+15 86 117  14 78 106  14 75 102  13 69 94  12 64 87  11 58 79
+10 53 71  9 47 64  8 41 55  6 33 45  5 28 38  4 22 30
+3 16 22  4 24 33  6 33 45  6 40 54  7 48 67  10 53 71
+11 58 79  10 65 89  11 69 94  12 75 102  9 72 100  61 109 99
+202 153 21  249 197 0  255 210 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 205 1  255 206 13  255 208 38  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 52  255 209 49
+255 206 22  255 210 1  255 210 1  109 74 3  0 0 0  0 0 0
+0 0 0  3 3 3  21 20 20  154 101 6  194 135 4  174 129 27
+174 129 27  174 129 27  174 129 27  174 129 27  194 135 4  194 135 4
+174 129 27  171 169 168  201 196 193  201 196 193  201 196 193  201 196 193
+191 183 178  191 183 178  191 183 178  208 204 201  213 210 208  213 210 208
+218 217 217  218 217 217  218 217 217  191 183 178  148 147 146  154 101 6
+178 87 56  154 101 6  241 197 0  234 181 0  209 152 1  194 135 4
+183 122 1  154 101 6  154 101 6  154 101 6  154 101 6  194 135 4
+222 168 1  243 191 0  255 206 13  255 206 22  172 59 77  161 10 114
+150 20 84  15 94 128  15 94 128  15 94 128  15 94 128  16 90 122
+15 86 117  15 82 111  14 76 103  13 69 94  12 64 87  11 56 77
+9 50 69  8 44 59  7 38 51  6 30 41  5 25 34  4 19 26
+3 16 22  4 24 33  6 33 45  6 40 54  7 48 67  10 53 71
+8 56 78  9 63 87  11 69 94  9 72 100  35 87 96  222 168 1
+255 210 1  255 205 1  255 205 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 206 13  254 207 32  255 209 51  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 52
+255 208 38  255 206 13  255 210 1  249 197 0  35 25 1  0 0 0
+0 0 0  0 0 0  1 1 1  7 5 1  105 82 42  194 135 4
+194 135 4  194 135 4  194 135 4  194 135 4  183 122 1  167 119 72
+171 169 168  191 183 178  201 196 193  201 196 193  201 196 193  201 196 193
+191 183 178  171 169 168  191 183 178  201 196 193  213 210 208  218 217 217
+218 217 217  224 223 222  224 223 222  191 183 178  148 147 146  150 20 84
+161 10 114  178 87 56  255 210 1  255 205 1  234 181 0  222 168 1
+209 152 1  194 135 4  194 135 4  209 152 1  209 152 1  234 181 0
+249 197 0  255 208 38  253 211 50  255 214 50  172 59 77  161 10 114
+172 59 77  176 158 88  15 94 128  15 94 128  15 94 128  16 90 122
+15 86 117  15 82 111  14 78 106  13 72 97  12 64 87  11 56 77
+9 49 66  8 42 57  7 36 49  5 28 38  4 22 30  3 16 22
+3 18 24  4 24 33  6 33 45  6 40 54  7 48 67  9 50 69
+8 56 78  8 60 84  12 66 90  6 67 96  57 82 86  234 181 0
+255 205 1  255 206 22  255 208 38  255 208 38  255 208 38  255 208 38
+255 208 38  255 209 45  255 209 51  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 52
+255 209 51  255 206 22  255 205 1  255 210 1  194 135 4  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  154 101 6
+194 135 4  194 135 4  183 122 1  174 129 27  148 147 146  171 169 168
+171 169 168  191 183 178  201 196 193  201 196 193  201 196 193  201 196 193
+191 183 178  191 183 178  191 183 178  201 196 193  208 204 201  218 217 217
+224 223 222  224 223 222  224 223 222  201 196 193  148 147 146  161 10 114
+161 10 114  178 87 56  255 205 1  255 205 1  255 205 1  243 191 0
+234 181 0  234 181 0  234 181 0  234 181 0  243 191 0  255 205 1
+254 207 32  255 209 55  255 209 52  255 214 50  225 176 47  139 105 59
+225 176 47  255 213 23  199 145 62  15 94 128  16 90 122  16 90 122
+15 86 117  15 86 117  14 78 106  13 72 98  10 65 89  11 56 77
+9 47 64  8 41 55  6 33 45  5 25 34  4 19 26  3 15 20
+3 16 22  4 24 33  6 33 45  6 40 54  5 44 60  7 48 67
+10 54 74  11 58 79  9 63 87  8 66 93  35 87 96  222 168 1
+255 210 1  255 208 38  255 209 55  255 209 52  255 209 52  255 209 52
+255 209 52  255 209 52  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 52  255 209 45  255 206 22  255 210 1  255 210 1  109 74 3
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  105 82 42
+176 158 88  167 119 72  148 147 146  148 147 146  171 169 168  171 169 168
+191 183 178  201 196 193  201 196 193  201 196 193  201 196 193  191 183 178
+191 183 178  191 183 178  191 183 178  191 183 178  208 204 201  224 223 222
+224 223 222  229 228 227  224 226 233  201 196 193  148 147 146  150 20 84
+150 20 84  202 153 21  255 210 1  255 205 1  255 205 1  255 205 1
+255 205 1  249 197 0  255 205 1  255 205 1  255 205 1  255 205 1
+255 209 45  255 209 52  255 209 51  255 218 49  225 176 47  150 20 84
+199 145 62  255 218 49  255 213 23  176 158 88  16 90 122  16 90 122
+15 86 117  15 86 117  14 78 106  13 73 99  10 65 89  11 56 77
+9 47 64  6 40 54  6 30 41  4 24 33  3 18 24  2 12 16
+3 15 20  4 23 31  5 29 40  7 38 51  5 44 60  7 48 67
+9 50 69  8 56 78  8 60 84  9 63 87  13 71 96  202 153 21
+255 210 1  255 206 22  255 209 52  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 52  255 208 38  255 210 1  255 210 1  209 152 1
+5 1 0  0 0 0  0 0 0  0 0 0  3 1 3  81 83 82
+201 196 193  191 183 178  191 183 178  171 169 168  171 169 168  191 183 178
+191 183 178  201 196 193  208 204 201  208 204 201  191 183 178  191 183 178
+191 183 178  191 183 178  191 183 178  191 183 178  208 204 201  224 223 222
+229 228 227  229 228 227  232 232 232  208 204 201  148 147 146  109 74 3
+183 122 1  243 191 0  255 205 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 205 1  255 206 13
+255 209 51  255 209 52  255 209 51  252 207 50  172 59 77  161 10 114
+172 59 77  253 211 50  255 218 49  255 213 23  176 158 88  16 90 122
+15 86 117  15 82 111  14 78 106  13 73 99  10 65 89  8 56 78
+7 48 67  6 40 54  6 30 41  4 22 30  3 16 22  2 11 15
+3 15 20  3 21 29  5 29 40  7 36 49  5 44 60  9 47 64
+7 48 67  6 54 76  11 58 79  9 63 87  13 67 91  174 129 27
+255 205 1  255 206 13  255 209 45  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 52  255 209 52  254 207 32  255 210 1  255 210 1
+109 74 3  0 0 0  0 0 0  0 0 0  0 0 0  113 114 111
+208 204 201  201 196 193  191 183 178  191 183 178  201 196 193  201 196 193
+201 196 193  213 210 208  213 210 208  208 204 201  191 183 178  191 183 178
+191 183 178  201 196 193  191 183 178  191 183 178  208 204 201  224 223 222
+232 232 232  229 228 227  237 237 236  171 169 168  121 31 83  150 20 84
+174 129 27  255 205 1  255 205 1  255 205 1  255 205 1  255 206 13
+255 206 13  255 206 13  255 205 1  255 206 13  255 206 13  255 208 38
+255 209 52  255 209 51  255 211 51  248 200 52  150 20 84  161 10 114
+172 59 77  252 207 50  253 211 50  253 211 50  255 213 23  176 158 88
+39 101 116  14 78 106  14 78 106  11 72 98  10 65 89  8 56 78
+7 48 67  6 40 54  6 30 41  3 21 29  3 15 20  1 10 13
+2 13 18  3 20 27  3 27 37  6 33 45  6 40 54  5 44 60
+7 48 67  10 53 71  8 56 78  8 60 84  9 63 87  139 105 59
+255 210 1  255 205 1  255 208 38  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 52  255 209 49  255 206 13  255 210 1
+241 197 0  35 25 1  0 0 1  7 7 7  55 54 51  171 169 168
+213 210 208  201 196 193  201 196 193  201 196 193  201 196 193  208 204 201
+213 210 208  218 217 217  218 217 217  208 204 201  191 183 178  191 183 178
+201 196 193  201 196 193  191 183 178  191 183 178  208 204 201  229 228 227
+232 232 232  237 237 236  213 210 208  81 83 82  150 20 84  161 10 114
+172 59 77  249 197 0  255 205 1  255 205 1  255 206 22  255 209 45
+255 209 45  255 209 45  255 208 38  255 208 38  255 208 38  255 209 51
+255 209 52  255 209 51  255 211 51  252 207 50  172 59 77  161 10 114
+199 145 62  252 207 50  255 209 51  255 209 51  253 211 50  255 222 40
+254 207 32  113 114 111  9 72 100  11 69 94  9 63 87  8 56 78
+9 47 64  7 39 53  5 29 40  3 20 27  2 13 18  1 8 11
+2 13 18  3 20 27  3 27 37  6 33 45  6 40 54  8 42 57
+9 47 64  7 48 67  10 54 74  8 56 78  8 60 84  139 105 59
+255 210 1  255 205 1  254 207 32  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 52  255 208 38  255 205 1
+255 210 1  209 152 1  113 114 111  201 196 193  224 223 222  224 226 233
+218 217 217  218 217 217  213 210 208  213 210 208  218 217 217  218 217 217
+218 217 217  224 226 233  229 228 227  208 204 201  191 183 178  191 183 178
+208 204 201  208 204 201  191 183 178  191 183 178  208 204 201  229 228 227
+245 245 244  237 237 236  81 83 82  22 11 1  150 20 84  161 10 114
+178 87 56  243 191 0  255 205 1  255 206 22  255 209 49  255 209 52
+255 209 52  255 209 52  255 209 52  255 209 52  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 218 49  202 153 21  167 119 72
+255 209 45  255 211 51  255 209 51  255 209 51  255 209 51  255 209 49
+255 213 45  255 213 23  35 87 96  8 66 93  11 60 82  10 53 71
+5 44 60  7 38 51  5 27 37  3 18 24  2 12 16  1 7 9
+2 13 18  3 20 27  3 27 37  6 33 45  7 38 51  1 37 56
+5 44 60  7 48 67  7 48 67  6 54 76  8 60 84  139 105 59
+255 205 1  255 206 13  255 208 38  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 52  255 209 49  255 206 13
+255 205 1  255 210 1  225 176 47  224 226 233  250 251 253  232 232 232
+229 228 227  224 226 233  224 223 222  224 226 233  224 223 222  224 226 233
+232 232 232  241 241 241  232 232 232  201 196 193  191 183 178  191 183 178
+208 204 201  208 204 201  191 183 178  171 169 168  213 210 208  241 241 241
+232 232 232  81 83 82  0 0 0  1 1 1  83 13 44  150 20 84
+194 135 4  255 205 1  255 206 13  255 208 38  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 218 49  199 145 62  172 59 77  199 145 62
+255 218 49  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+253 211 50  255 208 38  35 87 96  8 60 84  8 56 78  7 48 67
+6 40 54  6 33 45  4 23 31  3 16 22  1 10 13  1 4 5
+2 13 18  3 21 29  3 27 37  6 33 45  7 38 51  6 40 54
+5 44 60  5 44 60  7 48 67  10 53 71  6 54 76  174 129 27
+255 210 1  255 206 13  255 209 45  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 52  255 209 49  255 206 22
+255 205 1  255 205 1  234 181 0  176 158 88  250 252 255  248 247 247
+241 241 241  237 237 236  237 237 236  232 232 232  237 237 236  241 241 241
+245 245 244  248 247 247  232 232 232  201 196 193  191 183 178  201 196 193
+213 210 208  213 210 208  171 169 168  191 183 178  224 223 222  213 210 208
+81 83 82  0 0 0  0 0 0  0 0 0  109 74 3  109 74 3
+209 152 1  255 205 1  255 206 13  255 209 45  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 215 52  225 176 47  150 20 84  161 10 114  172 59 77
+251 208 45  255 211 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 222 40  199 145 62  10 65 89  8 56 78  10 53 71  5 44 60
+7 36 49  5 29 40  3 20 27  2 13 18  1 8 11  1 4 5
+3 15 20  3 21 29  3 27 37  6 33 45  1 37 56  6 40 54
+5 44 60  8 45 61  7 48 67  7 48 67  10 53 71  202 153 21
+255 210 1  255 206 22  255 209 51  255 209 52  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 52  255 209 49  254 207 32
+255 205 1  255 205 1  255 210 1  194 135 4  171 169 168  252 254 255
+253 253 253  250 249 249  248 247 247  248 247 247  250 249 249  253 253 253
+254 254 254  250 249 249  224 223 222  191 183 178  191 183 178  201 196 193
+218 217 217  218 217 217  191 183 178  171 169 168  105 98 84  17 25 27
+0 0 0  0 0 0  0 0 0  7 5 1  109 74 3  150 20 84
+172 59 77  222 168 1  255 213 23  255 209 45  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 218 49  178 87 56  161 10 114  161 10 114  178 87 56
+255 215 52  255 209 52  255 209 51  255 209 52  255 213 45  255 208 38
+176 158 88  23 75 89  6 57 82  10 53 71  8 45 61  7 38 51
+5 29 40  4 23 31  3 16 22  1 10 14  1 7 9  0 3 4
+3 16 22  4 23 31  3 27 37  6 33 45  1 37 56  6 40 54
+8 42 57  5 44 60  5 44 60  5 44 60  45 73 77  255 210 1
+255 210 1  255 206 22  255 209 49  255 209 52  255 209 52  255 209 52
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  254 207 32
+255 205 1  255 205 1  255 205 1  243 191 0  154 101 6  191 183 178
+252 254 255  255 255 255  255 255 255  255 255 255  255 255 255  255 255 255
+255 255 255  248 247 247  218 217 217  191 183 178  201 196 193  208 204 201
+208 204 201  171 169 168  105 98 84  47 44 41  9 8 8  0 0 0
+0 0 0  0 0 0  0 0 0  35 25 1  154 101 6  161 10 114
+161 10 114  178 87 56  255 206 22  255 209 49  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 218 49  255 218 49  178 87 56  161 10 114  172 59 77  235 193 64
+255 215 52  255 209 55  255 214 50  254 207 32  225 176 47  61 109 99
+6 54 76  6 54 76  10 53 71  5 44 60  7 38 51  5 29 40
+4 23 31  3 18 24  2 12 16  1 7 9  1 4 5  0 2 3
+3 16 22  4 24 33  5 29 40  6 33 45  7 36 49  6 40 54
+6 40 54  5 44 60  5 44 60  6 40 54  139 105 59  255 210 1
+255 205 1  255 205 1  255 206 13  254 207 32  255 209 45  255 209 45
+255 209 51  255 209 52  255 209 52  255 209 52  255 209 52  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  254 207 32
+255 205 1  255 205 1  255 205 1  241 197 0  194 135 4  72 47 3
+105 98 84  171 169 168  218 217 217  237 237 236  245 245 244  245 245 244
+232 232 232  208 204 201  171 169 168  148 147 146  113 114 111  105 98 84
+55 54 51  21 20 20  29 26 26  39 37 35  21 20 20  4 4 4
+0 0 0  0 0 0  5 1 0  35 25 1  154 101 6  150 20 84
+161 10 114  172 59 77  255 213 23  255 213 45  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 51  255 209 49
+235 193 64  178 87 56  105 82 42  225 176 47  252 207 50  255 214 50
+255 213 45  255 206 22  225 176 47  105 98 84  13 67 91  6 57 82
+6 54 76  9 50 69  5 44 60  7 36 49  3 27 37  3 21 29
+3 16 22  2 12 16  1 8 11  1 5 6  0 2 3  0 1 1
+3 16 22  4 24 33  3 27 37  6 33 45  1 37 56  6 40 54
+1 37 56  6 40 54  1 37 56  5 44 60  174 129 27  255 210 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 206 13  255 206 22
+255 206 22  254 207 32  255 208 38  255 209 45  255 209 45  255 209 51
+255 209 52  255 209 52  255 209 51  255 209 51  255 209 51  255 209 51
+255 209 51  255 209 51  255 209 51  255 209 51  255 209 52  255 208 38
+255 205 1  255 205 1  255 205 1  243 191 0  209 152 1  109 74 3
+4 0 5  0 0 1  12 12 11  39 37 35  47 44 41  47 44 41
+47 44 41  55 54 51  69 68 60  69 68 60  47 44 41  39 37 35
+21 20 20  22 11 1  39 37 35  47 44 41  21 20 20  6 5 5
+0 0 0  0 0 0  1 1 1  72 47 3  183 122 1  194 135 4
+172 59 77  202 153 21  225 176 47  225 176 47  253 211 50  255 211 51
+255 209 51  255 211 51  255 211 51  255 211 51  255 211 51  225 176 47
+150 20 84  161 10 114  161 10 114  225 176 47  255 222 40  255 206 22
+222 168 1  105 82 42  23 75 89  8 60 84  8 60 84  10 53 71
+7 48 67  8 42 57  7 36 49  3 27 37  3 21 29  3 15 20
+2 11 15  1 8 11  1 5 6  0 3 4  0 1 1  0 1 1
+3 16 22  3 21 29  3 27 37  6 33 45  6 33 45  6 33 45
+7 36 49  7 36 49  7 36 49  6 33 45  139 105 59  255 210 1
+255 210 1  255 205 1  255 205 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 205 1  255 205 1  255 206 13  255 206 13  255 206 22
+255 208 38  255 209 45  255 209 51  255 209 52  255 209 52  255 209 52
+255 209 51  255 209 51  255 209 52  255 209 52  255 209 52  255 208 38
+255 205 1  255 205 1  255 205 1  234 181 0  209 152 1  154 101 6
+7 5 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+16 15 15  47 44 41  69 68 60  55 54 51  47 44 41  39 37 35
+21 20 20  16 15 15  29 26 26  39 37 35  29 26 26  7 7 7
+0 0 0  0 0 0  0 0 0  72 47 3  183 122 1  209 152 1
+222 168 1  154 101 6  161 10 114  150 20 84  178 87 56  255 215 52
+255 214 50  255 209 49  248 200 52  252 207 50  255 222 40  172 59 77
+161 10 114  161 10 114  172 59 77  249 198 10  249 197 0  174 129 27
+45 73 77  6 67 96  10 65 89  8 56 78  10 53 71  5 44 60
+6 40 54  6 33 45  5 28 38  3 21 29  3 16 22  2 11 15
+1 8 11  1 5 6  0 3 4  0 2 3  0 1 1  0 0 1
+2 13 18  3 20 27  4 24 33  3 27 37  5 29 40  6 33 45
+6 33 45  6 33 45  6 33 45  6 33 45  15 45 54  174 129 27
+241 197 0  249 197 0  249 197 0  243 191 0  249 197 0  249 197 0
+249 197 0  249 197 0  255 205 1  255 205 1  255 205 1  255 205 1
+255 205 1  255 206 13  255 206 22  254 207 32  255 209 45  255 209 49
+255 209 51  255 209 51  255 209 51  255 209 49  255 208 38  255 206 13
+255 205 1  255 205 1  255 205 1  234 181 0  209 152 1  154 101 6
+22 11 1  0 0 0  0 0 0  0 0 0  0 0 0  12 12 11
+39 37 35  69 68 60  55 54 51  69 68 60  47 44 41  29 26 26
+16 15 15  12 12 11  21 20 20  39 37 35  29 26 26  9 8 8
+1 1 1  0 0 0  1 1 1  109 74 3  183 122 1  209 152 1
+243 191 0  202 153 21  161 10 114  161 10 114  150 20 84  199 145 62
+252 207 50  172 59 77  150 20 84  172 59 77  199 145 62  178 87 56
+172 59 77  178 87 56  222 168 1  209 152 1  105 82 42  23 75 89
+4 60 87  11 60 82  6 54 76  9 50 69  8 45 61  7 39 53
+6 30 41  5 27 37  3 21 29  3 15 20  2 11 15  1 8 11
+1 5 6  1 4 5  0 2 3  0 2 3  0 1 1  0 0 0
+1 10 14  3 15 20  3 18 24  3 21 29  4 24 33  4 24 33
+3 27 37  3 27 37  3 27 37  3 27 37  3 27 37  6 33 45
+69 68 60  154 101 6  194 135 4  194 135 4  209 152 1  209 152 1
+209 152 1  222 168 1  222 168 1  234 181 0  234 181 0  243 191 0
+249 197 0  255 205 1  255 205 1  255 205 1  255 205 1  255 206 13
+255 206 22  255 206 22  255 206 22  255 206 13  255 205 1  255 205 1
+255 205 1  249 197 0  234 181 0  222 168 1  209 152 1  154 101 6
+22 11 1  0 0 0  0 0 0  0 0 0  5 1 0  29 26 26
+55 54 51  69 68 60  55 54 51  47 44 41  39 37 35  29 26 26
+22 11 1  11 5 1  22 11 1  29 26 26  29 26 26  12 12 11
+1 1 1  0 0 0  5 1 0  72 47 3  183 122 1  194 135 4
+222 168 1  243 191 0  178 87 56  161 10 114  161 10 114  178 87 56
+178 87 56  161 10 114  161 10 114  161 10 114  172 59 77  222 168 1
+209 152 1  209 152 1  154 101 6  45 73 77  4 60 87  8 60 84
+11 58 79  10 53 71  9 49 66  8 42 57  7 36 49  5 29 40
+4 24 33  3 20 27  3 15 20  2 11 15  1 8 11  1 5 6
+0 3 4  0 2 3  0 1 1  0 1 1  0 0 0  0 0 0
+1 7 9  1 9 14  2 12 16  3 15 20  3 16 22  3 16 22
+3 18 24  3 20 27  3 21 29  3 20 27  4 23 31  4 23 31
+3 20 27  5 28 38  20 41 44  47 44 41  105 82 42  154 101 6
+154 101 6  183 122 1  194 135 4  194 135 4  194 135 4  209 152 1
+222 168 1  234 181 0  234 181 0  249 197 0  255 205 1  255 205 1
+255 205 1  255 205 1  255 205 1  255 205 1  255 205 1  255 205 1
+249 197 0  234 181 0  222 168 1  209 152 1  183 122 1  154 101 6
+11 5 1  0 0 0  0 0 0  0 0 0  7 5 1  39 37 35
+69 68 60  105 82 42  55 54 51  55 54 51  39 37 35  29 26 26
+17 25 27  10 20 26  17 25 27  28 32 37  29 26 26  16 15 15
+3 3 3  0 0 0  5 1 0  72 47 3  183 122 1  194 135 4
+209 152 1  222 168 1  243 191 0  222 168 1  209 152 1  209 152 1
+194 135 4  178 87 56  172 59 77  178 87 56  209 152 1  209 152 1
+183 122 1  105 82 42  12 64 87  8 60 84  11 58 79  10 54 74
+9 49 66  8 45 61  7 39 53  7 36 49  5 28 38  4 23 31
+3 18 24  3 15 20  1 10 14  1 8 11  1 5 6  0 3 4
+0 2 3  0 1 1  0 1 1  0 0 1  0 0 0  0 0 0
+1 4 5  1 7 9  1 7 9  1 8 11  1 10 13  1 10 14
+2 12 16  2 13 18  2 13 18  3 15 20  3 15 20  3 16 22
+3 20 27  3 20 27  3 21 29  4 23 31  4 24 33  5 29 40
+20 41 44  47 44 41  69 68 60  109 74 3  154 101 6  183 122 1
+183 122 1  194 135 4  209 152 1  209 152 1  222 168 1  234 181 0
+243 191 0  249 197 0  249 197 0  249 197 0  243 191 0  243 191 0
+222 168 1  209 152 1  209 152 1  183 122 1  183 122 1  109 74 3
+1 7 9  6 33 45  9 47 64  9 50 69  15 53 69  45 73 77
+81 83 82  81 83 82  81 83 82  57 82 86  45 73 77  32 65 75
+23 57 72  15 53 69  15 53 69  23 57 72  32 65 75  24 54 62
+8 45 61  7 38 51  3 27 37  72 47 3  154 101 6  183 122 1
+194 135 4  209 152 1  222 168 1  234 181 0  234 181 0  234 181 0
+234 181 0  234 181 0  209 152 1  209 152 1  194 135 4  154 101 6
+69 68 60  4 60 87  6 57 82  11 56 77  10 53 71  9 47 64
+8 42 57  7 38 51  6 33 45  5 27 37  4 22 30  3 18 24
+2 13 18  1 10 14  1 8 11  1 7 9  1 4 5  0 2 3
+0 1 1  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 1 1  0 3 4  1 4 5  1 5 6  1 5 6  1 7 9
+1 7 9  1 7 9  1 9 12  1 9 14  1 10 14  2 12 16
+2 13 18  3 15 20  3 18 24  3 20 27  3 21 29  3 27 37
+3 27 37  5 29 40  6 33 45  1 37 56  8 42 57  55 54 51
+105 82 42  154 101 6  183 122 1  194 135 4  194 135 4  209 152 1
+222 168 1  222 168 1  222 168 1  222 168 1  222 168 1  222 168 1
+209 152 1  194 135 4  183 122 1  183 122 1  109 74 3  20 41 44
+8 56 78  11 60 82  11 60 82  11 60 82  16 62 81  57 82 86
+81 83 82  61 109 99  81 83 82  57 82 86  32 65 75  32 65 75
+23 57 72  15 53 69  10 53 71  23 57 72  32 65 75  23 57 72
+10 53 71  10 54 74  6 54 76  24 54 62  154 101 6  183 122 1
+194 135 4  194 135 4  209 152 1  209 152 1  209 152 1  209 152 1
+209 152 1  209 152 1  194 135 4  183 122 1  183 122 1  69 68 60
+4 60 87  11 58 79  10 54 74  10 53 71  9 47 64  8 42 57
+7 38 51  6 33 45  5 28 38  4 23 31  3 18 24  2 13 18
+2 11 15  1 8 11  1 7 9  1 4 5  0 2 3  0 2 3
+0 1 1  0 1 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 2 3  0 2 3  0 2 3  1 4 5
+0 3 4  1 4 5  1 4 5  1 5 6  1 7 9  1 7 9
+2 9 12  1 10 14  2 12 16  3 15 20  3 16 22  3 20 27
+4 23 31  3 27 37  6 30 41  6 33 45  7 36 49  7 38 51
+6 40 54  15 45 54  55 54 51  105 82 42  154 101 6  183 122 1
+194 135 4  194 135 4  194 135 4  209 152 1  209 152 1  209 152 1
+194 135 4  183 122 1  183 122 1  105 82 42  16 62 81  11 58 79
+11 58 79  12 61 82  11 58 79  8 56 78  11 56 77  23 75 89
+57 82 86  81 83 82  57 82 86  45 73 77  32 65 75  23 57 72
+15 53 69  7 48 67  7 48 67  10 53 71  23 57 72  15 53 69
+9 50 69  10 53 71  10 54 74  9 50 69  69 68 60  154 101 6
+183 122 1  183 122 1  194 135 4  194 135 4  194 135 4  194 135 4
+194 135 4  194 135 4  183 122 1  154 101 6  69 68 60  6 57 82
+8 56 78  10 54 74  10 53 71  9 47 64  8 44 59  7 38 51
+6 33 45  5 27 37  4 23 31  4 19 26  3 15 20  2 11 15
+1 8 11  1 7 9  1 5 6  0 3 4  0 2 3  0 1 1
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 0 1  0 1 1  0 1 1  0 1 1
+0 1 1  0 2 3  0 2 3  0 2 3  0 3 4  1 4 5
+1 5 6  1 7 9  1 8 11  2 9 12  2 12 16  2 13 18
+3 16 22  3 21 29  4 24 33  5 28 38  6 33 45  7 36 49
+6 40 54  5 44 60  5 44 60  5 44 60  15 53 69  55 54 51
+105 82 42  154 101 6  183 122 1  183 122 1  183 122 1  183 122 1
+183 122 1  154 101 6  105 82 42  11 56 77  8 56 78  45 73 77
+139 105 59  139 105 59  139 105 59  139 105 59  105 98 84  105 98 84
+105 98 84  113 114 111  105 98 84  105 98 84  105 98 84  105 98 84
+105 98 84  81 83 82  81 83 82  105 98 84  105 98 84  105 98 84
+105 98 84  139 105 59  139 105 59  139 105 59  105 82 42  105 82 42
+154 101 6  154 101 6  183 122 1  183 122 1  183 122 1  183 122 1
+183 122 1  154 101 6  105 82 42  32 65 75  6 57 82  8 56 78
+10 54 74  9 50 69  9 47 64  8 42 57  7 38 51  6 33 45
+5 27 37  4 23 31  4 19 26  3 15 20  2 11 15  2 9 12
+1 7 9  1 4 5  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 1 1  0 1 1  0 2 3
+0 2 3  0 3 4  1 4 5  1 7 9  1 8 11  2 10 13
+2 13 18  3 16 22  4 19 26  4 22 30  5 27 37  6 30 41
+7 36 49  7 39 53  8 42 57  5 44 60  7 48 67  7 48 67
+10 53 71  24 54 62  55 54 51  69 68 60  105 82 42  105 82 42
+69 68 60  32 65 75  6 54 76  8 56 78  6 54 76  57 82 86
+193 130 84  193 130 84  193 130 84  193 130 84  193 130 84  193 130 84
+193 130 84  193 130 84  193 130 84  193 130 84  193 130 84  193 130 84
+193 130 84  193 130 84  193 130 84  193 130 84  193 130 84  193 130 84
+193 130 84  193 130 84  193 130 84  193 130 84  105 98 84  6 54 76
+32 65 75  69 68 60  105 82 42  105 82 42  105 82 42  105 82 42
+69 68 60  32 65 75  11 58 79  8 56 78  11 56 77  10 54 74
+9 50 69  9 47 64  8 42 57  7 38 51  6 33 45  5 28 38
+4 23 31  4 19 26  3 15 20  2 12 16  2 9 12  1 7 9
+1 5 6  0 3 4  0 3 4  0 1 1  0 1 1  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 1 1
+0 1 1  0 2 3  0 3 4  1 4 5  1 5 6  1 7 9
+2 9 12  2 12 16  3 15 20  3 18 24  3 21 29  5 25 34
+6 30 41  6 33 45  7 38 51  8 41 55  8 44 59  9 47 64
+9 50 69  6 54 76  6 54 76  10 53 71  9 50 69  10 53 71
+10 53 71  6 54 76  11 56 77  11 56 77  6 54 76  32 65 75
+193 130 84  193 130 84  193 130 84  193 130 84  193 130 84  193 130 84
+167 119 72  167 119 72  167 119 72  167 119 72  167 119 72  167 119 72
+167 119 72  167 119 72  167 119 72  193 130 84  167 119 72  193 130 84
+193 130 84  193 130 84  193 130 84  193 130 84  69 68 60  6 54 76
+8 56 78  8 56 78  6 54 76  6 57 82  6 54 76  6 57 82
+6 57 82  8 56 78  11 58 79  11 56 77  10 54 74  9 50 69
+9 47 64  8 42 57  7 38 51  6 33 45  5 29 40  4 24 33
+4 19 26  3 16 22  2 13 18  2 10 13  1 7 9  1 5 6
+0 3 4  0 3 4  0 2 3  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 1 1  0 1 1  0 2 3  0 2 3  1 4 5  1 5 6
+1 7 9  2 9 12  2 12 16  3 15 20  3 18 24  4 22 30
+5 25 34  6 30 41  6 33 45  7 36 49  8 41 55  8 44 59
+9 47 64  9 49 66  9 50 69  9 50 69  10 53 71  10 53 71
+10 54 74  10 53 71  10 53 71  10 53 71  9 50 69  9 50 69
+167 119 72  193 130 84  193 130 84  193 130 84  167 119 72  167 119 72
+167 119 72  167 119 72  167 119 72  167 119 72  167 119 72  167 119 72
+167 119 72  167 119 72  167 119 72  167 119 72  167 119 72  167 119 72
+167 119 72  193 130 84  193 130 84  167 119 72  15 53 69  10 53 71
+11 56 77  11 56 77  8 56 78  8 56 78  8 56 78  8 56 78
+11 58 79  11 56 77  10 54 74  10 53 71  9 50 69  9 47 64
+8 42 57  7 38 51  6 33 45  6 30 41  5 25 34  3 21 29
+3 16 22  2 13 18  2 11 15  1 8 11  1 7 9  1 4 5
+0 3 4  0 1 1  0 2 3  0 1 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 1 1  0 1 1  0 1 1  0 2 3  1 4 5
+1 5 6  1 7 9  2 10 13  2 12 16  3 15 20  3 18 24
+4 22 30  5 25 34  6 30 41  6 33 45  7 36 49  8 41 55
+8 42 57  8 45 61  9 47 64  9 49 66  9 49 66  9 49 66
+9 49 66  9 50 69  9 49 66  9 49 66  9 47 64  9 47 64
+69 68 60  193 130 84  167 119 72  167 119 72  167 119 72  167 119 72
+139 105 59  139 105 59  139 105 59  139 105 59  139 105 59  139 105 59
+139 105 59  139 105 59  139 105 59  139 105 59  139 105 59  167 119 72
+167 119 72  167 119 72  193 130 84  69 68 60  8 45 61  9 50 69
+10 53 71  10 53 71  10 54 74  10 54 74  10 54 74  10 54 74
+10 54 74  10 53 71  9 50 69  9 49 66  8 45 61  8 42 57
+7 39 53  6 33 45  6 30 41  5 27 37  4 22 30  4 19 26
+3 15 20  2 11 15  2 9 12  1 7 9  1 5 6  0 3 4
+0 1 1  0 1 1  0 1 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 2 3  0 2 3  0 3 4
+1 4 5  1 5 6  1 7 9  2 9 12  2 11 15  3 15 20
+3 18 24  4 22 30  5 25 34  5 28 38  6 33 45  7 36 49
+7 38 51  6 40 54  8 42 57  8 44 59  8 44 59  8 45 61
+8 44 59  8 45 61  8 44 59  8 44 59  8 44 59  8 41 55
+8 42 57  105 82 42  167 119 72  167 119 72  139 105 59  139 105 59
+139 105 59  139 105 59  105 82 42  105 82 42  105 82 42  105 82 42
+105 82 42  105 82 42  139 105 59  139 105 59  139 105 59  139 105 59
+167 119 72  167 119 72  105 82 42  1 37 56  8 42 57  8 44 59
+8 45 61  9 47 64  9 47 64  9 49 66  9 49 66  9 49 66
+9 47 64  9 47 64  8 45 61  8 44 59  8 41 55  7 38 51
+6 33 45  5 29 40  5 27 37  4 23 31  3 18 24  3 15 20
+2 12 16  2 9 12  1 7 9  1 5 6  0 3 4  0 2 3
+0 1 1  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 1  0 1 1  0 1 1  0 2 3
+0 3 4  1 5 6  1 7 9  1 7 9  2 9 12  2 11 15
+3 15 20  3 18 24  4 20 27  4 23 31  5 25 34  6 30 41
+6 33 45  7 36 49  7 36 49  7 38 51  7 39 53  7 39 53
+7 39 53  7 39 53  7 39 53  7 38 51  7 38 51  7 36 49
+6 33 45  8 41 55  69 68 60  167 119 72  139 105 59  139 105 59
+105 82 42  105 82 42  105 82 42  105 82 42  105 82 42  105 82 42
+105 82 42  105 82 42  105 82 42  105 82 42  139 105 59  139 105 59
+139 105 59  69 68 60  6 33 45  6 30 41  7 36 49  7 36 49
+7 38 51  8 41 55  8 41 55  8 41 55  8 42 57  8 42 57
+8 41 55  8 41 55  7 39 53  7 36 49  7 36 49  6 33 45
+5 29 40  5 25 34  3 21 29  3 18 24  3 15 20  2 12 16
+2 9 12  1 7 9  1 5 6  0 3 4  0 2 3  0 1 1
+0 1 1  0 0 1  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 2 3  0 3 4  1 4 5  1 5 6  1 7 9  2 9 12
+2 12 16  3 15 20  3 16 22  4 19 26  4 22 30  5 25 34
+5 27 37  5 29 40  6 30 41  6 33 45  6 33 45  6 33 45
+6 33 45  6 33 45  6 30 41  6 30 41  6 30 41  6 30 41
+5 27 37  5 25 34  5 25 34  28 32 37  105 82 42  105 82 42
+139 105 59  139 105 59  105 82 42  105 82 42  105 82 42  105 82 42
+105 82 42  105 82 42  105 82 42  105 82 42  105 82 42  55 54 51
+28 32 37  4 19 26  4 23 31  5 27 37  5 29 40  6 30 41
+6 33 45  6 33 45  6 33 45  6 33 45  6 33 45  7 36 49
+7 36 49  6 33 45  6 33 45  6 30 41  5 29 40  5 25 34
+4 23 31  4 20 27  3 16 22  3 15 20  2 12 16  2 9 12
+1 7 9  1 5 6  1 4 5  0 2 3  0 2 3  0 1 1
+0 0 1  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 1  0 1 1
+0 1 1  0 2 3  0 3 4  1 4 5  1 5 6  1 7 9
+2 9 12  2 11 15  2 13 18  3 15 20  3 18 24  4 20 27
+4 22 30  4 24 33  5 25 34  5 25 34  5 27 37  5 27 37
+5 27 37  5 25 34  5 25 34  5 25 34  4 24 33  4 23 31
+4 22 30  3 21 29  4 19 26  3 16 22  0 14 27  21 20 20
+39 37 35  39 37 35  47 44 41  47 44 41  39 37 35  39 37 35
+39 37 35  47 44 41  39 37 35  29 26 26  2 13 18  2 11 15
+2 13 18  3 18 24  4 20 27  3 21 29  4 23 31  4 24 33
+5 25 34  5 27 37  5 27 37  5 28 38  5 28 38  5 28 38
+5 27 37  5 27 37  5 25 34  5 25 34  4 23 31  3 21 29
+3 18 24  3 15 20  2 13 18  2 11 15  2 9 12  1 7 9
+1 5 6  1 4 5  0 3 4  0 2 3  0 1 1  0 1 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 1
+0 1 1  0 1 1  0 2 3  0 3 4  0 3 4  1 5 6
+1 7 9  2 9 12  2 10 13  2 11 15  2 13 18  3 16 22
+3 16 22  4 19 26  4 19 26  4 20 27  4 20 27  4 19 26
+4 19 26  4 20 27  4 19 26  4 19 26  3 18 24  3 18 24
+3 16 22  3 16 22  3 15 20  3 15 20  2 12 16  2 11 15
+1 8 11  1 7 9  9 8 8  7 7 7  1 5 6  7 7 7
+1 5 6  1 7 9  7 7 7  1 9 12  2 11 15  2 12 16
+2 13 18  2 13 18  3 15 20  3 16 22  3 16 22  3 18 24
+4 19 26  4 20 27  4 20 27  3 21 29  3 21 29  3 21 29
+3 21 29  3 21 29  4 19 26  4 19 26  3 18 24  3 16 22
+3 15 20  2 12 16  1 10 14  1 8 11  1 7 9  1 5 6
+1 4 5  0 3 4  0 2 3  0 1 1  0 1 1  0 0 1
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 1  0 1 1  0 1 1  0 2 3  0 2 3  0 3 4
+1 4 5  1 7 9  1 7 9  1 8 11  2 10 13  2 11 15
+2 12 16  2 13 18  2 13 18  3 15 20  3 15 20  3 15 20
+3 15 20  3 15 20  3 15 20  2 13 18  2 12 16  2 12 16
+2 12 16  2 12 16  2 11 15  1 10 14  2 10 13  2 9 12
+1 8 11  1 8 11  1 8 11  1 8 11  1 8 11  1 8 11
+1 7 9  1 7 9  1 8 11  1 8 11  1 8 11  1 8 11
+2 9 12  2 9 12  1 10 14  2 11 15  2 11 15  2 12 16
+2 12 16  2 13 18  2 13 18  3 15 20  3 15 20  3 15 20
+3 15 20  3 15 20  2 13 18  2 13 18  2 12 16  2 11 15
+1 10 14  1 8 11  1 7 9  1 7 9  1 5 6  0 3 4
+0 2 3  0 2 3  0 1 1  0 1 1  0 0 1  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 1  0 1 1  0 1 1  0 1 1  0 3 4
+0 3 4  1 4 5  1 5 6  1 7 9  1 7 9  1 8 11
+1 8 11  2 9 12  2 10 13  2 10 13  2 10 13  2 9 12
+2 10 13  2 10 13  2 9 12  1 8 11  1 8 11  1 8 11
+1 8 11  1 7 9  1 7 9  1 7 9  1 7 9  1 7 9
+1 7 9  1 5 6  1 5 6  1 5 6  1 5 6  1 5 6
+1 5 6  1 5 6  1 5 6  1 5 6  1 5 6  1 7 9
+1 7 9  1 7 9  1 7 9  1 7 9  1 8 11  1 8 11
+1 8 11  2 9 12  2 9 12  2 9 12  2 10 13  2 9 12
+2 10 13  2 10 13  2 10 13  2 9 12  1 8 11  1 8 11
+1 7 9  1 5 6  1 4 5  1 4 5  0 3 4  0 3 4
+0 1 1  0 1 1  0 1 1  0 0 1  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 1  0 1 1  0 1 1
+0 1 1  0 2 3  0 3 4  0 3 4  1 4 5  1 4 5
+1 7 9  1 7 9  1 7 9  1 7 9  1 7 9  1 7 9
+1 7 9  1 7 9  1 7 9  1 7 9  1 5 6  1 4 5
+1 5 6  1 5 6  1 5 6  1 4 5  1 4 5  1 4 5
+1 4 5  1 4 5  0 3 4  0 3 4  0 3 4  0 3 4
+0 3 4  0 3 4  0 3 4  0 3 4  1 4 5  1 4 5
+1 4 5  1 4 5  1 4 5  1 4 5  1 5 6  1 4 5
+1 5 6  1 7 9  1 7 9  1 7 9  1 7 9  1 7 9
+1 7 9  1 7 9  1 7 9  1 7 9  1 7 9  1 5 6
+1 4 5  0 3 4  0 3 4  0 2 3  0 2 3  0 1 1
+0 1 1  0 1 1  0 0 1  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
+0 0 0  0 0 0  0 0 0  0 0 0  0 0 0  0 0 0
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d..89451a264 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -132,6 +132,7 @@ if BLOCK
 menu "DOS/FAT/NT Filesystems"
 
 source "fs/fat/Kconfig"
+source "fs/exfat/Kconfig"
 source "fs/ntfs/Kconfig"
 
 endmenu
@@ -251,6 +252,7 @@ source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/exofs/Kconfig"
+source "fs/aufs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index add789ea2..19cd5c6e7 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_HUGETLBFS)		+= hugetlbfs/
 obj-$(CONFIG_CODA_FS)		+= coda/
 obj-$(CONFIG_MINIX_FS)		+= minix/
 obj-$(CONFIG_FAT_FS)		+= fat/
+obj-$(CONFIG_EXFAT_FS)		+= exfat/
 obj-$(CONFIG_BFS_FS)		+= bfs/
 obj-$(CONFIG_ISO9660_FS)	+= isofs/
 obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
@@ -128,3 +129,4 @@ obj-y				+= exofs/ # Multiple modules
 obj-$(CONFIG_CEPH_FS)		+= ceph/
 obj-$(CONFIG_PSTORE)		+= pstore/
 obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
+obj-$(CONFIG_AUFS_FS)           += aufs/
diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig
new file mode 100644
index 000000000..9f4364257
--- /dev/null
+++ b/fs/aufs/Kconfig
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: GPL-2.0
+config AUFS_FS
+	tristate "Aufs (Advanced multi layered unification filesystem) support"
+	help
+	Aufs is a stackable unification filesystem such as Unionfs,
+	which unifies several directories and provides a merged single
+	directory.
+	In the early days, aufs was entirely re-designed and
+	re-implemented Unionfs Version 1.x series. Introducing many
+	original ideas, approaches and improvements, it becomes totally
+	different from Unionfs while keeping the basic features.
+
+if AUFS_FS
+choice
+	prompt "Maximum number of branches"
+	default AUFS_BRANCH_MAX_127
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_127
+	bool "127"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_511
+	bool "511"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_1023
+	bool "1023"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_32767
+	bool "32767"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+endchoice
+
+config AUFS_SBILIST
+	bool
+	depends on AUFS_MAGIC_SYSRQ || PROC_FS
+	default y
+	help
+	Automatic configuration for internal use.
+	When aufs supports Magic SysRq or /proc, enabled automatically.
+
+config AUFS_HNOTIFY
+	bool "Detect direct branch access (bypassing aufs)"
+	help
+	If you want to modify files on branches directly, eg. bypassing aufs,
+	and want aufs to detect the changes of them fully, then enable this
+	option and use 'udba=notify' mount option.
+	Currently there is only one available configuration, "fsnotify".
+	It will have a negative impact to the performance.
+	See detail in aufs.5.
+
+choice
+	prompt "method" if AUFS_HNOTIFY
+	default AUFS_HFSNOTIFY
+config AUFS_HFSNOTIFY
+	bool "fsnotify"
+	select FSNOTIFY
+endchoice
+
+config AUFS_EXPORT
+	bool "NFS-exportable aufs"
+	depends on EXPORTFS
+	help
+	If you want to export your mounted aufs via NFS, then enable this
+	option. There are several requirements for this configuration.
+	See detail in aufs.5.
+
+config AUFS_INO_T_64
+	bool
+	depends on AUFS_EXPORT
+	depends on 64BIT && !(ALPHA || S390)
+	default y
+	help
+	Automatic configuration for internal use.
+	/* typedef unsigned long/int __kernel_ino_t */
+	/* alpha and s390x are int */
+
+config AUFS_XATTR
+	bool "support for XATTR/EA (including Security Labels)"
+	help
+	If your branch fs supports XATTR/EA and you want to make them
+	available in aufs too, then enable this opsion and specify the
+	branch attributes for EA.
+	See detail in aufs.5.
+
+config AUFS_FHSM
+	bool "File-based Hierarchical Storage Management"
+	help
+	Hierarchical Storage Management (or HSM) is a well-known feature
+	in the storage world. Aufs provides this feature as file-based.
+	with multiple branches.
+	These multiple branches are prioritized, ie. the topmost one
+	should be the fastest drive and be used heavily.
+
+config AUFS_RDU
+	bool "Readdir in userspace"
+	help
+	Aufs has two methods to provide a merged view for a directory,
+	by a user-space library and by kernel-space natively. The latter
+	is always enabled but sometimes large and slow.
+	If you enable this option, install the library in aufs2-util
+	package, and set some environment variables for your readdir(3),
+	then the work will be handled in user-space which generally
+	shows better performance in most cases.
+	See detail in aufs.5.
+
+config AUFS_DIRREN
+	bool "Workaround for rename(2)-ing a directory"
+	help
+	By default, aufs returns EXDEV error in renameing a dir who has
+	his child on the lower branch, since it is a bad idea to issue
+	rename(2) internally for every lower branch. But user may not
+	accept this behaviour. So here is a workaround to allow such
+	rename(2) and store some extra infromation on the writable
+	branch. Obviously this costs high (and I don't like it).
+	To use this feature, you need to enable this configuration AND
+	to specify the mount option `dirren.'
+	See details in aufs.5 and the design documents.
+
+config AUFS_SHWH
+	bool "Show whiteouts"
+	help
+	If you want to make the whiteouts in aufs visible, then enable
+	this option and specify 'shwh' mount option. Although it may
+	sounds like philosophy or something, but in technically it
+	simply shows the name of whiteout with keeping its behaviour.
+
+config AUFS_BR_RAMFS
+	bool "Ramfs (initramfs/rootfs) as an aufs branch"
+	help
+	If you want to use ramfs as an aufs branch fs, then enable this
+	option. Generally tmpfs is recommended.
+	Aufs prohibited them to be a branch fs by default, because
+	initramfs becomes unusable after switch_root or something
+	generally. If you sets initramfs as an aufs branch and boot your
+	system by switch_root, you will meet a problem easily since the
+	files in initramfs may be inaccessible.
+	Unless you are going to use ramfs as an aufs branch fs without
+	switch_root or something, leave it N.
+
+config AUFS_BR_FUSE
+	bool "Fuse fs as an aufs branch"
+	depends on FUSE_FS
+	select AUFS_POLL
+	help
+	If you want to use fuse-based userspace filesystem as an aufs
+	branch fs, then enable this option.
+	It implements the internal poll(2) operation which is
+	implemented by fuse only (curretnly).
+
+config AUFS_POLL
+	bool
+	help
+	Automatic configuration for internal use.
+
+config AUFS_BR_HFSPLUS
+	bool "Hfsplus as an aufs branch"
+	depends on HFSPLUS_FS
+	default y
+	help
+	If you want to use hfsplus fs as an aufs branch fs, then enable
+	this option. This option introduces a small overhead at
+	copying-up a file on hfsplus.
+
+config AUFS_BDEV_LOOP
+	bool
+	depends on BLK_DEV_LOOP
+	default y
+	help
+	Automatic configuration for internal use.
+	Convert =[ym] into =y.
+
+config AUFS_DEBUG
+	bool "Debug aufs"
+	help
+	Enable this to compile aufs internal debug code.
+	It will have a negative impact to the performance.
+
+config AUFS_MAGIC_SYSRQ
+	bool
+	depends on AUFS_DEBUG && MAGIC_SYSRQ
+	default y
+	help
+	Automatic configuration for internal use.
+	When aufs supports Magic SysRq, enabled automatically.
+endif
diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile
new file mode 100644
index 000000000..2c819a649
--- /dev/null
+++ b/fs/aufs/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include ${src}/magic.mk
+ifeq (${CONFIG_AUFS_FS},m)
+include ${src}/conf.mk
+endif
+-include ${src}/priv_def.mk
+
+# cf. include/linux/kernel.h
+# enable pr_debug
+ccflags-y += -DDEBUG
+# sparse requires the full pathname
+ifdef M
+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
+else
+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
+endif
+
+obj-$(CONFIG_AUFS_FS) += aufs.o
+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
+	wkq.o vfsub.o dcsub.o \
+	cpup.o whout.o wbr_policy.o \
+	dinfo.o dentry.o \
+	dynop.o \
+	finfo.o file.o f_op.o \
+	dir.o vdir.o \
+	iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
+	mvdown.o ioctl.o
+
+# all are boolean
+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
+aufs-$(CONFIG_SYSFS) += sysfs.o
+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
+aufs-$(CONFIG_AUFS_EXPORT) += export.o
+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
+aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
+aufs-$(CONFIG_AUFS_POLL) += poll.o
+aufs-$(CONFIG_AUFS_RDU) += rdu.o
+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h
new file mode 100644
index 000000000..bcf0a2e40
--- /dev/null
+++ b/fs/aufs/aufs.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * all header files
+ */
+
+#ifndef __AUFS_H__
+#define __AUFS_H__
+
+#ifdef __KERNEL__
+
+#define AuStub(type, name, body, ...) \
+	static inline type name(__VA_ARGS__) { body; }
+
+#define AuStubVoid(name, ...) \
+	AuStub(void, name, , __VA_ARGS__)
+#define AuStubInt0(name, ...) \
+	AuStub(int, name, return 0, __VA_ARGS__)
+
+#include "debug.h"
+
+#include "branch.h"
+#include "cpup.h"
+#include "dcsub.h"
+#include "dbgaufs.h"
+#include "dentry.h"
+#include "dir.h"
+#include "dirren.h"
+#include "dynop.h"
+#include "file.h"
+#include "fstype.h"
+#include "hbl.h"
+#include "inode.h"
+#include "loop.h"
+#include "module.h"
+#include "opts.h"
+#include "rwsem.h"
+#include "super.h"
+#include "sysaufs.h"
+#include "vfsub.h"
+#include "whout.h"
+#include "wkq.h"
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_H__ */
diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c
new file mode 100644
index 000000000..eeca66c00
--- /dev/null
+++ b/fs/aufs/branch.c
@@ -0,0 +1,1432 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * branch management
+ */
+
+#include <linux/compat.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+/*
+ * free a single branch
+ */
+static void au_br_do_free(struct au_branch *br)
+{
+	int i;
+	struct au_wbr *wbr;
+	struct au_dykey **key;
+
+	au_hnotify_fin_br(br);
+	/* always, regardless the mount option */
+	au_dr_hino_free(&br->br_dirren);
+
+	if (br->br_xino.xi_file)
+		fput(br->br_xino.xi_file);
+	for (i = br->br_xino.xi_nondir.total - 1; i >= 0; i--)
+		AuDebugOn(br->br_xino.xi_nondir.array[i]);
+	kfree(br->br_xino.xi_nondir.array);
+
+	AuDebugOn(au_br_count(br));
+	au_br_count_fin(br);
+
+	wbr = br->br_wbr;
+	if (wbr) {
+		for (i = 0; i < AuBrWh_Last; i++)
+			dput(wbr->wbr_wh[i]);
+		AuDebugOn(atomic_read(&wbr->wbr_wh_running));
+		AuRwDestroy(&wbr->wbr_wh_rwsem);
+	}
+
+	if (br->br_fhsm) {
+		au_br_fhsm_fin(br->br_fhsm);
+		kfree(br->br_fhsm);
+	}
+
+	key = br->br_dykey;
+	for (i = 0; i < AuBrDynOp; i++, key++)
+		if (*key)
+			au_dy_put(*key);
+		else
+			break;
+
+	/* recursive lock, s_umount of branch's */
+	lockdep_off();
+	path_put(&br->br_path);
+	lockdep_on();
+	kfree(wbr);
+	kfree(br);
+}
+
+/*
+ * frees all branches
+ */
+void au_br_free(struct au_sbinfo *sbinfo)
+{
+	aufs_bindex_t bmax;
+	struct au_branch **br;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	bmax = sbinfo->si_bbot + 1;
+	br = sbinfo->si_branch;
+	while (bmax--)
+		au_br_do_free(*br++);
+}
+
+/*
+ * find the index of a branch which is specified by @br_id.
+ */
+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
+{
+	aufs_bindex_t bindex, bbot;
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (au_sbr_id(sb, bindex) == br_id)
+			return bindex;
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * add a branch
+ */
+
+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
+			struct dentry *h_root)
+{
+	if (unlikely(h_adding == h_root
+		     || au_test_loopback_overlap(sb, h_adding)))
+		return 1;
+	if (h_adding->d_sb != h_root->d_sb)
+		return 0;
+	return au_test_subdir(h_adding, h_root)
+		|| au_test_subdir(h_root, h_adding);
+}
+
+/*
+ * returns a newly allocated branch. @new_nbranch is a number of branches
+ * after adding a branch.
+ */
+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
+				     int perm)
+{
+	struct au_branch *add_branch;
+	struct dentry *root;
+	struct inode *inode;
+	int err;
+
+	err = -ENOMEM;
+	add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
+	if (unlikely(!add_branch))
+		goto out;
+	add_branch->br_xino.xi_nondir.total = 8; /* initial size */
+	add_branch->br_xino.xi_nondir.array
+		= kcalloc(add_branch->br_xino.xi_nondir.total, sizeof(ino_t),
+			  GFP_NOFS);
+	if (unlikely(!add_branch->br_xino.xi_nondir.array))
+		goto out_br;
+
+	err = au_hnotify_init_br(add_branch, perm);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (au_br_writable(perm)) {
+		/* may be freed separately at changing the branch permission */
+		add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
+					     GFP_NOFS);
+		if (unlikely(!add_branch->br_wbr))
+			goto out_hnotify;
+	}
+
+	if (au_br_fhsm(perm)) {
+		err = au_fhsm_br_alloc(add_branch);
+		if (unlikely(err))
+			goto out_wbr;
+	}
+
+	root = sb->s_root;
+	err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
+	if (!err)
+		err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
+	if (!err) {
+		inode = d_inode(root);
+		err = au_hinode_realloc(au_ii(inode), new_nbranch,
+					/*may_shrink*/0);
+	}
+	if (!err)
+		return add_branch; /* success */
+
+out_wbr:
+	kfree(add_branch->br_wbr);
+out_hnotify:
+	au_hnotify_fin_br(add_branch);
+out_xinondir:
+	kfree(add_branch->br_xino.xi_nondir.array);
+out_br:
+	kfree(add_branch);
+out:
+	return ERR_PTR(err);
+}
+
+/*
+ * test if the branch permission is legal or not.
+ */
+static int test_br(struct inode *inode, int brperm, char *path)
+{
+	int err;
+
+	err = (au_br_writable(brperm) && IS_RDONLY(inode));
+	if (!err)
+		goto out;
+
+	err = -EINVAL;
+	pr_err("write permission for readonly mount or inode, %s\n", path);
+
+out:
+	return err;
+}
+
+/*
+ * returns:
+ * 0: success, the caller will add it
+ * plus: success, it is already unified, the caller should ignore it
+ * minus: error
+ */
+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *root, *h_dentry;
+	struct inode *inode, *h_inode;
+
+	root = sb->s_root;
+	bbot = au_sbbot(sb);
+	if (unlikely(bbot >= 0
+		     && au_find_dbindex(root, add->path.dentry) >= 0)) {
+		err = 1;
+		if (!remount) {
+			err = -EINVAL;
+			pr_err("%s duplicated\n", add->pathname);
+		}
+		goto out;
+	}
+
+	err = -ENOSPC; /* -E2BIG; */
+	if (unlikely(AUFS_BRANCH_MAX <= add->bindex
+		     || AUFS_BRANCH_MAX - 1 <= bbot)) {
+		pr_err("number of branches exceeded %s\n", add->pathname);
+		goto out;
+	}
+
+	err = -EDOM;
+	if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
+		pr_err("bad index %d\n", add->bindex);
+		goto out;
+	}
+
+	inode = d_inode(add->path.dentry);
+	err = -ENOENT;
+	if (unlikely(!inode->i_nlink)) {
+		pr_err("no existence %s\n", add->pathname);
+		goto out;
+	}
+
+	err = -EINVAL;
+	if (unlikely(inode->i_sb == sb)) {
+		pr_err("%s must be outside\n", add->pathname);
+		goto out;
+	}
+
+	if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
+		pr_err("unsupported filesystem, %s (%s)\n",
+		       add->pathname, au_sbtype(inode->i_sb));
+		goto out;
+	}
+
+	if (unlikely(inode->i_sb->s_stack_depth)) {
+		pr_err("already stacked, %s (%s)\n",
+		       add->pathname, au_sbtype(inode->i_sb));
+		goto out;
+	}
+
+	err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
+	if (unlikely(err))
+		goto out;
+
+	if (bbot < 0)
+		return 0; /* success */
+
+	err = -EINVAL;
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (unlikely(test_overlap(sb, add->path.dentry,
+					  au_h_dptr(root, bindex)))) {
+			pr_err("%s is overlapped\n", add->pathname);
+			goto out;
+		}
+
+	err = 0;
+	if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
+		h_dentry = au_h_dptr(root, 0);
+		h_inode = d_inode(h_dentry);
+		if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
+		    || !uid_eq(h_inode->i_uid, inode->i_uid)
+		    || !gid_eq(h_inode->i_gid, inode->i_gid))
+			pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
+				add->pathname,
+				i_uid_read(inode), i_gid_read(inode),
+				(inode->i_mode & S_IALLUGO),
+				i_uid_read(h_inode), i_gid_read(h_inode),
+				(h_inode->i_mode & S_IALLUGO));
+	}
+
+out:
+	return err;
+}
+
+/*
+ * initialize or clean the whiteouts for an adding branch
+ */
+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
+			 int new_perm)
+{
+	int err, old_perm;
+	aufs_bindex_t bindex;
+	struct inode *h_inode;
+	struct au_wbr *wbr;
+	struct au_hinode *hdir;
+	struct dentry *h_dentry;
+
+	err = vfsub_mnt_want_write(au_br_mnt(br));
+	if (unlikely(err))
+		goto out;
+
+	wbr = br->br_wbr;
+	old_perm = br->br_perm;
+	br->br_perm = new_perm;
+	hdir = NULL;
+	h_inode = NULL;
+	bindex = au_br_index(sb, br->br_id);
+	if (0 <= bindex) {
+		hdir = au_hi(d_inode(sb->s_root), bindex);
+		au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	} else {
+		h_dentry = au_br_dentry(br);
+		h_inode = d_inode(h_dentry);
+		inode_lock_nested(h_inode, AuLsc_I_PARENT);
+	}
+	if (!wbr)
+		err = au_wh_init(br, sb);
+	else {
+		wbr_wh_write_lock(wbr);
+		err = au_wh_init(br, sb);
+		wbr_wh_write_unlock(wbr);
+	}
+	if (hdir)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(h_inode);
+	vfsub_mnt_drop_write(au_br_mnt(br));
+	br->br_perm = old_perm;
+
+	if (!err && wbr && !au_br_writable(new_perm)) {
+		kfree(wbr);
+		br->br_wbr = NULL;
+	}
+
+out:
+	return err;
+}
+
+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
+		       int perm)
+{
+	int err;
+	struct kstatfs kst;
+	struct au_wbr *wbr;
+
+	wbr = br->br_wbr;
+	au_rw_init(&wbr->wbr_wh_rwsem);
+	atomic_set(&wbr->wbr_wh_running, 0);
+
+	/*
+	 * a limit for rmdir/rename a dir
+	 * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
+	 */
+	err = vfs_statfs(&br->br_path, &kst);
+	if (unlikely(err))
+		goto out;
+	err = -EINVAL;
+	if (kst.f_namelen >= NAME_MAX)
+		err = au_br_init_wh(sb, br, perm);
+	else
+		pr_err("%pd(%s), unsupported namelen %ld\n",
+		       au_br_dentry(br),
+		       au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
+
+out:
+	return err;
+}
+
+/* initialize a new branch */
+static int au_br_init(struct au_branch *br, struct super_block *sb,
+		      struct au_opt_add *add)
+{
+	int err;
+	struct inode *h_inode;
+
+	err = 0;
+	spin_lock_init(&br->br_xino.xi_nondir.spin);
+	init_waitqueue_head(&br->br_xino.xi_nondir.wqh);
+	br->br_perm = add->perm;
+	br->br_path = add->path; /* set first, path_get() later */
+	spin_lock_init(&br->br_dykey_lock);
+	au_br_count_init(br);
+	atomic_set(&br->br_xino_running, 0);
+	br->br_id = au_new_br_id(sb);
+	AuDebugOn(br->br_id < 0);
+
+	/* always, regardless the given option */
+	err = au_dr_br_init(sb, br, &add->path);
+	if (unlikely(err))
+		goto out_err;
+
+	if (au_br_writable(add->perm)) {
+		err = au_wbr_init(br, sb, add->perm);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	if (au_opt_test(au_mntflags(sb), XINO)) {
+		h_inode = d_inode(add->path.dentry);
+		err = au_xino_br(sb, br, h_inode->i_ino,
+				 au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
+		if (unlikely(err)) {
+			AuDebugOn(br->br_xino.xi_file);
+			goto out_err;
+		}
+	}
+
+	sysaufs_br_init(br);
+	path_get(&br->br_path);
+	goto out; /* success */
+
+out_err:
+	memset(&br->br_path, 0, sizeof(br->br_path));
+out:
+	return err;
+}
+
+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
+			     struct au_branch *br, aufs_bindex_t bbot,
+			     aufs_bindex_t amount)
+{
+	struct au_branch **brp;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	brp = sbinfo->si_branch + bindex;
+	memmove(brp + 1, brp, sizeof(*brp) * amount);
+	*brp = br;
+	sbinfo->si_bbot++;
+	if (unlikely(bbot < 0))
+		sbinfo->si_bbot = 0;
+}
+
+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
+			     aufs_bindex_t bbot, aufs_bindex_t amount)
+{
+	struct au_hdentry *hdp;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	hdp = au_hdentry(dinfo, bindex);
+	memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
+	au_h_dentry_init(hdp);
+	dinfo->di_bbot++;
+	if (unlikely(bbot < 0))
+		dinfo->di_btop = 0;
+}
+
+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
+			     aufs_bindex_t bbot, aufs_bindex_t amount)
+{
+	struct au_hinode *hip;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	hip = au_hinode(iinfo, bindex);
+	memmove(hip + 1, hip, sizeof(*hip) * amount);
+	au_hinode_init(hip);
+	iinfo->ii_bbot++;
+	if (unlikely(bbot < 0))
+		iinfo->ii_btop = 0;
+}
+
+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
+			 aufs_bindex_t bindex)
+{
+	struct dentry *root, *h_dentry;
+	struct inode *root_inode, *h_inode;
+	aufs_bindex_t bbot, amount;
+
+	root = sb->s_root;
+	root_inode = d_inode(root);
+	bbot = au_sbbot(sb);
+	amount = bbot + 1 - bindex;
+	h_dentry = au_br_dentry(br);
+	au_sbilist_lock();
+	au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
+	au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
+	au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
+	au_set_h_dptr(root, bindex, dget(h_dentry));
+	h_inode = d_inode(h_dentry);
+	au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
+	au_sbilist_unlock();
+}
+
+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
+{
+	int err;
+	aufs_bindex_t bbot, add_bindex;
+	struct dentry *root, *h_dentry;
+	struct inode *root_inode;
+	struct au_branch *add_branch;
+
+	root = sb->s_root;
+	root_inode = d_inode(root);
+	IMustLock(root_inode);
+	IiMustWriteLock(root_inode);
+	err = test_add(sb, add, remount);
+	if (unlikely(err < 0))
+		goto out;
+	if (err) {
+		err = 0;
+		goto out; /* success */
+	}
+
+	bbot = au_sbbot(sb);
+	add_branch = au_br_alloc(sb, bbot + 2, add->perm);
+	err = PTR_ERR(add_branch);
+	if (IS_ERR(add_branch))
+		goto out;
+
+	err = au_br_init(add_branch, sb, add);
+	if (unlikely(err)) {
+		au_br_do_free(add_branch);
+		goto out;
+	}
+
+	add_bindex = add->bindex;
+	if (!remount)
+		au_br_do_add(sb, add_branch, add_bindex);
+	else {
+		sysaufs_brs_del(sb, add_bindex);
+		au_br_do_add(sb, add_branch, add_bindex);
+		sysaufs_brs_add(sb, add_bindex);
+	}
+
+	h_dentry = add->path.dentry;
+	if (!add_bindex) {
+		au_cpup_attr_all(root_inode, /*force*/1);
+		sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
+	} else
+		au_add_nlink(root_inode, d_inode(h_dentry));
+
+	/*
+	 * this test/set prevents aufs from handling unnecesary notify events
+	 * of xino files, in case of re-adding a writable branch which was
+	 * once detached from aufs.
+	 */
+	if (au_xino_brid(sb) < 0
+	    && au_br_writable(add_branch->br_perm)
+	    && !au_test_fs_bad_xino(h_dentry->d_sb)
+	    && add_branch->br_xino.xi_file
+	    && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
+		au_xino_brid_set(sb, add_branch->br_id);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
+				       unsigned long long max __maybe_unused,
+				       void *arg)
+{
+	unsigned long long n;
+	struct file **p, *f;
+	struct hlist_bl_head *files;
+	struct hlist_bl_node *pos;
+	struct au_finfo *finfo;
+
+	n = 0;
+	p = a;
+	files = &au_sbi(sb)->si_files;
+	hlist_bl_lock(files);
+	hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
+		f = finfo->fi_file;
+		if (file_count(f)
+		    && !special_file(file_inode(f)->i_mode)) {
+			get_file(f);
+			*p++ = f;
+			n++;
+			AuDebugOn(n > max);
+		}
+	}
+	hlist_bl_unlock(files);
+
+	return n;
+}
+
+static struct file **au_farray_alloc(struct super_block *sb,
+				     unsigned long long *max)
+{
+	*max = au_nfiles(sb);
+	return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
+}
+
+static void au_farray_free(struct file **a, unsigned long long max)
+{
+	unsigned long long ull;
+
+	for (ull = 0; ull < max; ull++)
+		if (a[ull])
+			fput(a[ull]);
+	kvfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * delete a branch
+ */
+
+/* to show the line number, do not make it inlined function */
+#define AuVerbose(do_info, fmt, ...) do { \
+	if (do_info) \
+		pr_info(fmt, ##__VA_ARGS__); \
+} while (0)
+
+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
+			 aufs_bindex_t bbot)
+{
+	return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
+}
+
+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
+			 aufs_bindex_t bbot)
+{
+	return au_test_ibusy(d_inode(dentry), btop, bbot);
+}
+
+/*
+ * test if the branch is deletable or not.
+ */
+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
+			    unsigned int sigen, const unsigned int verbose)
+{
+	int err, i, j, ndentry;
+	aufs_bindex_t btop, bbot;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry *d;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, root, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	for (i = 0; !err && i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		ndentry = dpage->ndentry;
+		for (j = 0; !err && j < ndentry; j++) {
+			d = dpage->dentries[j];
+			AuDebugOn(au_dcount(d) <= 0);
+			if (!au_digen_test(d, sigen)) {
+				di_read_lock_child(d, AuLock_IR);
+				if (unlikely(au_dbrange_test(d))) {
+					di_read_unlock(d, AuLock_IR);
+					continue;
+				}
+			} else {
+				di_write_lock_child(d);
+				if (unlikely(au_dbrange_test(d))) {
+					di_write_unlock(d);
+					continue;
+				}
+				err = au_reval_dpath(d, sigen);
+				if (!err)
+					di_downgrade_lock(d, AuLock_IR);
+				else {
+					di_write_unlock(d);
+					break;
+				}
+			}
+
+			/* AuDbgDentry(d); */
+			btop = au_dbtop(d);
+			bbot = au_dbbot(d);
+			if (btop <= bindex
+			    && bindex <= bbot
+			    && au_h_dptr(d, bindex)
+			    && au_test_dbusy(d, btop, bbot)) {
+				err = -EBUSY;
+				AuVerbose(verbose, "busy %pd\n", d);
+				AuDbgDentry(d);
+			}
+			di_read_unlock(d, AuLock_IR);
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
+			   unsigned int sigen, const unsigned int verbose)
+{
+	int err;
+	unsigned long long max, ull;
+	struct inode *i, **array;
+	aufs_bindex_t btop, bbot;
+
+	array = au_iarray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	AuDbg("b%d\n", bindex);
+	for (ull = 0; !err && ull < max; ull++) {
+		i = array[ull];
+		if (unlikely(!i))
+			break;
+		if (i->i_ino == AUFS_ROOT_INO)
+			continue;
+
+		/* AuDbgInode(i); */
+		if (au_iigen(i, NULL) == sigen)
+			ii_read_lock_child(i);
+		else {
+			ii_write_lock_child(i);
+			err = au_refresh_hinode_self(i);
+			au_iigen_dec(i);
+			if (!err)
+				ii_downgrade_lock(i);
+			else {
+				ii_write_unlock(i);
+				break;
+			}
+		}
+
+		btop = au_ibtop(i);
+		bbot = au_ibbot(i);
+		if (btop <= bindex
+		    && bindex <= bbot
+		    && au_h_iptr(i, bindex)
+		    && au_test_ibusy(i, btop, bbot)) {
+			err = -EBUSY;
+			AuVerbose(verbose, "busy i%lu\n", i->i_ino);
+			AuDbgInode(i);
+		}
+		ii_read_unlock(i);
+	}
+	au_iarray_free(array, max);
+
+out:
+	return err;
+}
+
+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
+			      const unsigned int verbose)
+{
+	int err;
+	unsigned int sigen;
+
+	sigen = au_sigen(root->d_sb);
+	DiMustNoWaiters(root);
+	IiMustNoWaiters(d_inode(root));
+	di_write_unlock(root);
+	err = test_dentry_busy(root, bindex, sigen, verbose);
+	if (!err)
+		err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
+	di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
+
+	return err;
+}
+
+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
+			 struct file **to_free, int *idx)
+{
+	int err;
+	unsigned char matched, root;
+	aufs_bindex_t bindex, bbot;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+
+	err = 0;
+	root = IS_ROOT(file->f_path.dentry);
+	if (root) {
+		get_file(file);
+		to_free[*idx] = file;
+		(*idx)++;
+		goto out;
+	}
+
+	matched = 0;
+	fidir = au_fi(file)->fi_hdir;
+	AuDebugOn(!fidir);
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
+		hfile = fidir->fd_hfile + bindex;
+		if (!hfile->hf_file)
+			continue;
+
+		if (hfile->hf_br->br_id == br_id) {
+			matched = 1;
+			break;
+		}
+	}
+	if (matched)
+		err = -EBUSY;
+
+out:
+	return err;
+}
+
+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
+			  struct file **to_free, int opened)
+{
+	int err, idx;
+	unsigned long long ull, max;
+	aufs_bindex_t btop;
+	struct file *file, **array;
+	struct dentry *root;
+	struct au_hfile *hfile;
+
+	array = au_farray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	idx = 0;
+	root = sb->s_root;
+	di_write_unlock(root);
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		fi_read_lock(file);
+		btop = au_fbtop(file);
+		if (!d_is_dir(file->f_path.dentry)) {
+			hfile = &au_fi(file)->fi_htop;
+			if (hfile->hf_br->br_id == br_id)
+				err = -EBUSY;
+		} else
+			err = test_dir_busy(file, br_id, to_free, &idx);
+		fi_read_unlock(file);
+		if (unlikely(err))
+			break;
+	}
+	di_write_lock_child(root);
+	au_farray_free(array, max);
+	AuDebugOn(idx > opened);
+
+out:
+	return err;
+}
+
+static void br_del_file(struct file **to_free, unsigned long long opened,
+			  aufs_bindex_t br_id)
+{
+	unsigned long long ull;
+	aufs_bindex_t bindex, btop, bbot, bfound;
+	struct file *file;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+
+	for (ull = 0; ull < opened; ull++) {
+		file = to_free[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		AuDebugOn(!d_is_dir(file->f_path.dentry));
+		bfound = -1;
+		fidir = au_fi(file)->fi_hdir;
+		AuDebugOn(!fidir);
+		fi_write_lock(file);
+		btop = au_fbtop(file);
+		bbot = au_fbbot_dir(file);
+		for (bindex = btop; bindex <= bbot; bindex++) {
+			hfile = fidir->fd_hfile + bindex;
+			if (!hfile->hf_file)
+				continue;
+
+			if (hfile->hf_br->br_id == br_id) {
+				bfound = bindex;
+				break;
+			}
+		}
+		AuDebugOn(bfound < 0);
+		au_set_h_fptr(file, bfound, NULL);
+		if (bfound == btop) {
+			for (btop++; btop <= bbot; btop++)
+				if (au_hf_dir(file, btop)) {
+					au_set_fbtop(file, btop);
+					break;
+				}
+		}
+		fi_write_unlock(file);
+	}
+}
+
+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
+			     const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_branch **brp, **p;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	brp = sbinfo->si_branch + bindex;
+	if (bindex < bbot)
+		memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
+	sbinfo->si_branch[0 + bbot] = NULL;
+	sbinfo->si_bbot--;
+
+	p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		sbinfo->si_branch = p;
+	/* harmless error */
+}
+
+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_hdentry *hdp, *p;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	hdp = au_hdentry(dinfo, bindex);
+	if (bindex < bbot)
+		memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
+	/* au_h_dentry_init(au_hdentry(dinfo, bbot); */
+	dinfo->di_bbot--;
+
+	p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		dinfo->di_hdentry = p;
+	/* harmless error */
+}
+
+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_hinode *hip, *p;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	hip = au_hinode(iinfo, bindex);
+	if (bindex < bbot)
+		memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
+	/* au_hinode_init(au_hinode(iinfo, bbot)); */
+	iinfo->ii_bbot--;
+
+	p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		iinfo->ii_hinode = p;
+	/* harmless error */
+}
+
+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
+			 struct au_branch *br)
+{
+	aufs_bindex_t bbot;
+	struct au_sbinfo *sbinfo;
+	struct dentry *root, *h_root;
+	struct inode *inode, *h_inode;
+	struct au_hinode *hinode;
+
+	SiMustWriteLock(sb);
+
+	root = sb->s_root;
+	inode = d_inode(root);
+	sbinfo = au_sbi(sb);
+	bbot = sbinfo->si_bbot;
+
+	h_root = au_h_dptr(root, bindex);
+	hinode = au_hi(inode, bindex);
+	h_inode = au_igrab(hinode->hi_inode);
+	au_hiput(hinode);
+
+	au_sbilist_lock();
+	au_br_do_del_brp(sbinfo, bindex, bbot);
+	au_br_do_del_hdp(au_di(root), bindex, bbot);
+	au_br_do_del_hip(au_ii(inode), bindex, bbot);
+	au_sbilist_unlock();
+
+	/* ignore an error */
+	au_dr_br_fin(sb, br); /* always, regardless the mount option */
+
+	dput(h_root);
+	iput(h_inode);
+	au_br_do_free(br);
+}
+
+static unsigned long long empty_cb(struct super_block *sb, void *array,
+				   unsigned long long max, void *arg)
+{
+	return max;
+}
+
+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
+{
+	int err, rerr, i;
+	unsigned long long opened;
+	unsigned int mnt_flags;
+	aufs_bindex_t bindex, bbot, br_id;
+	unsigned char do_wh, verbose;
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *root;
+	struct file **to_free;
+
+	err = 0;
+	opened = 0;
+	to_free = NULL;
+	root = sb->s_root;
+	bindex = au_find_dbindex(root, del->h_path.dentry);
+	if (bindex < 0) {
+		if (remount)
+			goto out; /* success */
+		err = -ENOENT;
+		pr_err("%s no such branch\n", del->pathname);
+		goto out;
+	}
+	AuDbg("bindex b%d\n", bindex);
+
+	err = -EBUSY;
+	mnt_flags = au_mntflags(sb);
+	verbose = !!au_opt_test(mnt_flags, VERBOSE);
+	bbot = au_sbbot(sb);
+	if (unlikely(!bbot)) {
+		AuVerbose(verbose, "no more branches left\n");
+		goto out;
+	}
+	br = au_sbr(sb, bindex);
+	AuDebugOn(!path_equal(&br->br_path, &del->h_path));
+
+	br_id = br->br_id;
+	opened = au_br_count(br);
+	if (unlikely(opened)) {
+		to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
+		err = PTR_ERR(to_free);
+		if (IS_ERR(to_free))
+			goto out;
+
+		err = test_file_busy(sb, br_id, to_free, opened);
+		if (unlikely(err)) {
+			AuVerbose(verbose, "%llu file(s) opened\n", opened);
+			goto out;
+		}
+	}
+
+	wbr = br->br_wbr;
+	do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
+	if (do_wh) {
+		/* instead of WbrWhMustWriteLock(wbr) */
+		SiMustWriteLock(sb);
+		for (i = 0; i < AuBrWh_Last; i++) {
+			dput(wbr->wbr_wh[i]);
+			wbr->wbr_wh[i] = NULL;
+		}
+	}
+
+	err = test_children_busy(root, bindex, verbose);
+	if (unlikely(err)) {
+		if (do_wh)
+			goto out_wh;
+		goto out;
+	}
+
+	err = 0;
+	if (to_free) {
+		/*
+		 * now we confirmed the branch is deletable.
+		 * let's free the remaining opened dirs on the branch.
+		 */
+		di_write_unlock(root);
+		br_del_file(to_free, opened, br_id);
+		di_write_lock_child(root);
+	}
+
+	if (!remount)
+		au_br_do_del(sb, bindex, br);
+	else {
+		sysaufs_brs_del(sb, bindex);
+		au_br_do_del(sb, bindex, br);
+		sysaufs_brs_add(sb, bindex);
+	}
+
+	if (!bindex) {
+		au_cpup_attr_all(d_inode(root), /*force*/1);
+		sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
+	} else
+		au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
+	if (au_opt_test(mnt_flags, PLINK))
+		au_plink_half_refresh(sb, br_id);
+
+	if (au_xino_brid(sb) == br_id)
+		au_xino_brid_set(sb, -1);
+	goto out; /* success */
+
+out_wh:
+	/* revert */
+	rerr = au_br_init_wh(sb, br, br->br_perm);
+	if (rerr)
+		pr_warn("failed re-creating base whiteout, %s. (%d)\n",
+			del->pathname, rerr);
+out:
+	if (to_free)
+		au_farray_free(to_free, opened);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
+{
+	int err;
+	aufs_bindex_t btop, bbot;
+	struct aufs_ibusy ibusy;
+	struct inode *inode, *h_inode;
+
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = copy_from_user(&ibusy, arg, sizeof(ibusy));
+	if (!err)
+		err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+
+	err = -EINVAL;
+	si_read_lock(sb, AuLock_FLUSH);
+	if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
+		goto out_unlock;
+
+	err = 0;
+	ibusy.h_ino = 0; /* invalid */
+	inode = ilookup(sb, ibusy.ino);
+	if (!inode
+	    || inode->i_ino == AUFS_ROOT_INO
+	    || au_is_bad_inode(inode))
+		goto out_unlock;
+
+	ii_read_lock_child(inode);
+	btop = au_ibtop(inode);
+	bbot = au_ibbot(inode);
+	if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
+		h_inode = au_h_iptr(inode, ibusy.bindex);
+		if (h_inode && au_test_ibusy(inode, btop, bbot))
+			ibusy.h_ino = h_inode->i_ino;
+	}
+	ii_read_unlock(inode);
+	iput(inode);
+
+out_unlock:
+	si_read_unlock(sb);
+	if (!err) {
+		err = __put_user(ibusy.h_ino, &arg->h_ino);
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+	}
+out:
+	return err;
+}
+
+long au_ibusy_ioctl(struct file *file, unsigned long arg)
+{
+	return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
+{
+	return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * change a branch permission
+ */
+
+static void au_warn_ima(void)
+{
+#ifdef CONFIG_IMA
+	/* since it doesn't support mark_files_ro() */
+	AuWarn1("RW -> RO makes IMA to produce wrong message\n");
+#endif
+}
+
+static int do_need_sigen_inc(int a, int b)
+{
+	return au_br_whable(a) && !au_br_whable(b);
+}
+
+static int need_sigen_inc(int old, int new)
+{
+	return do_need_sigen_inc(old, new)
+		|| do_need_sigen_inc(new, old);
+}
+
+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err, do_warn;
+	unsigned int mnt_flags;
+	unsigned long long ull, max;
+	aufs_bindex_t br_id;
+	unsigned char verbose, writer;
+	struct file *file, *hf, **array;
+	struct au_hfile *hfile;
+
+	mnt_flags = au_mntflags(sb);
+	verbose = !!au_opt_test(mnt_flags, VERBOSE);
+
+	array = au_farray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	do_warn = 0;
+	br_id = au_sbr_id(sb, bindex);
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		fi_read_lock(file);
+		if (unlikely(au_test_mmapped(file))) {
+			err = -EBUSY;
+			AuVerbose(verbose, "mmapped %pD\n", file);
+			AuDbgFile(file);
+			FiMustNoWaiters(file);
+			fi_read_unlock(file);
+			goto out_array;
+		}
+
+		hfile = &au_fi(file)->fi_htop;
+		hf = hfile->hf_file;
+		if (!d_is_reg(file->f_path.dentry)
+		    || !(file->f_mode & FMODE_WRITE)
+		    || hfile->hf_br->br_id != br_id
+		    || !(hf->f_mode & FMODE_WRITE))
+			array[ull] = NULL;
+		else {
+			do_warn = 1;
+			get_file(file);
+		}
+
+		FiMustNoWaiters(file);
+		fi_read_unlock(file);
+		fput(file);
+	}
+
+	err = 0;
+	if (do_warn)
+		au_warn_ima();
+
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (!file)
+			continue;
+
+		/* todo: already flushed? */
+		/*
+		 * fs/super.c:mark_files_ro() is gone, but aufs keeps its
+		 * approach which resets f_mode and calls mnt_drop_write() and
+		 * file_release_write() for each file, because the branch
+		 * attribute in aufs world is totally different from the native
+		 * fs rw/ro mode.
+		*/
+		/* fi_read_lock(file); */
+		hfile = &au_fi(file)->fi_htop;
+		hf = hfile->hf_file;
+		/* fi_read_unlock(file); */
+		spin_lock(&hf->f_lock);
+		writer = !!(hf->f_mode & FMODE_WRITER);
+		hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
+		spin_unlock(&hf->f_lock);
+		if (writer) {
+			put_write_access(file_inode(hf));
+			__mnt_drop_write(hf->f_path.mnt);
+		}
+	}
+
+out_array:
+	au_farray_free(array, max);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
+	      int *do_refresh)
+{
+	int err, rerr;
+	aufs_bindex_t bindex;
+	struct dentry *root;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	root = sb->s_root;
+	bindex = au_find_dbindex(root, mod->h_root);
+	if (bindex < 0) {
+		if (remount)
+			return 0; /* success */
+		err = -ENOENT;
+		pr_err("%s no such branch\n", mod->path);
+		goto out;
+	}
+	AuDbg("bindex b%d\n", bindex);
+
+	err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
+	if (unlikely(err))
+		goto out;
+
+	br = au_sbr(sb, bindex);
+	AuDebugOn(mod->h_root != au_br_dentry(br));
+	if (br->br_perm == mod->perm)
+		return 0; /* success */
+
+	/* pre-allocate for non-fhsm --> fhsm */
+	bf = NULL;
+	if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
+		err = au_fhsm_br_alloc(br);
+		if (unlikely(err))
+			goto out;
+		bf = br->br_fhsm;
+		br->br_fhsm = NULL;
+	}
+
+	if (au_br_writable(br->br_perm)) {
+		/* remove whiteout base */
+		err = au_br_init_wh(sb, br, mod->perm);
+		if (unlikely(err))
+			goto out_bf;
+
+		if (!au_br_writable(mod->perm)) {
+			/* rw --> ro, file might be mmapped */
+			DiMustNoWaiters(root);
+			IiMustNoWaiters(d_inode(root));
+			di_write_unlock(root);
+			err = au_br_mod_files_ro(sb, bindex);
+			/* aufs_write_lock() calls ..._child() */
+			di_write_lock_child(root);
+
+			if (unlikely(err)) {
+				rerr = -ENOMEM;
+				br->br_wbr = kzalloc(sizeof(*br->br_wbr),
+						     GFP_NOFS);
+				if (br->br_wbr)
+					rerr = au_wbr_init(br, sb, br->br_perm);
+				if (unlikely(rerr)) {
+					AuIOErr("nested error %d (%d)\n",
+						rerr, err);
+					br->br_perm = mod->perm;
+				}
+			}
+		}
+	} else if (au_br_writable(mod->perm)) {
+		/* ro --> rw */
+		err = -ENOMEM;
+		br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
+		if (br->br_wbr) {
+			err = au_wbr_init(br, sb, mod->perm);
+			if (unlikely(err)) {
+				kfree(br->br_wbr);
+				br->br_wbr = NULL;
+			}
+		}
+	}
+	if (unlikely(err))
+		goto out_bf;
+
+	if (au_br_fhsm(br->br_perm)) {
+		if (!au_br_fhsm(mod->perm)) {
+			/* fhsm --> non-fhsm */
+			au_br_fhsm_fin(br->br_fhsm);
+			kfree(br->br_fhsm);
+			br->br_fhsm = NULL;
+		}
+	} else if (au_br_fhsm(mod->perm))
+		/* non-fhsm --> fhsm */
+		br->br_fhsm = bf;
+
+	*do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
+	br->br_perm = mod->perm;
+	goto out; /* success */
+
+out_bf:
+	kfree(bf);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
+{
+	int err;
+	struct kstatfs kstfs;
+
+	err = vfs_statfs(&br->br_path, &kstfs);
+	if (!err) {
+		stfs->f_blocks = kstfs.f_blocks;
+		stfs->f_bavail = kstfs.f_bavail;
+		stfs->f_files = kstfs.f_files;
+		stfs->f_ffree = kstfs.f_ffree;
+	}
+
+	return err;
+}
diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h
new file mode 100644
index 000000000..65fdd2aa6
--- /dev/null
+++ b/fs/aufs/branch.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * branch filesystems and xino for them
+ */
+
+#ifndef __AUFS_BRANCH_H__
+#define __AUFS_BRANCH_H__
+
+#ifdef __KERNEL__
+
+#include <linux/mount.h>
+#include "dirren.h"
+#include "dynop.h"
+#include "rwsem.h"
+#include "super.h"
+
+/* ---------------------------------------------------------------------- */
+
+/* a xino file */
+struct au_xino_file {
+	struct file		*xi_file;
+	struct {
+		spinlock_t		spin;
+		ino_t			*array;
+		int			total;
+		/* reserved for future use */
+		/* unsigned long	*bitmap; */
+		wait_queue_head_t	wqh;
+	} xi_nondir;
+
+	/* todo: make xino files an array to support huge inode number */
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		 *xi_dbgaufs;
+#endif
+};
+
+/* File-based Hierarchical Storage Management */
+struct au_br_fhsm {
+#ifdef CONFIG_AUFS_FHSM
+	struct mutex		bf_lock;
+	unsigned long		bf_jiffy;
+	struct aufs_stfs	bf_stfs;
+	int			bf_readable;
+#endif
+};
+
+/* members for writable branch only */
+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
+struct au_wbr {
+	struct au_rwsem		wbr_wh_rwsem;
+	struct dentry		*wbr_wh[AuBrWh_Last];
+	atomic_t		wbr_wh_running;
+#define wbr_whbase		wbr_wh[AuBrWh_BASE]	/* whiteout base */
+#define wbr_plink		wbr_wh[AuBrWh_PLINK]	/* pseudo-link dir */
+#define wbr_orph		wbr_wh[AuBrWh_ORPH]	/* dir for orphans */
+
+	/* mfs mode */
+	unsigned long long	wbr_bytes;
+};
+
+/* ext2 has 3 types of operations at least, ext3 has 4 */
+#define AuBrDynOp (AuDyLast * 4)
+
+#ifdef CONFIG_AUFS_HFSNOTIFY
+/* support for asynchronous destruction */
+struct au_br_hfsnotify {
+	struct fsnotify_group	*hfsn_group;
+};
+#endif
+
+/* sysfs entries */
+struct au_brsysfs {
+	char			name[16];
+	struct attribute	attr;
+};
+
+enum {
+	AuBrSysfs_BR,
+	AuBrSysfs_BRID,
+	AuBrSysfs_Last
+};
+
+/* protected by superblock rwsem */
+struct au_branch {
+	struct au_xino_file	br_xino;
+
+	aufs_bindex_t		br_id;
+
+	int			br_perm;
+	struct path		br_path;
+	spinlock_t		br_dykey_lock;
+	struct au_dykey		*br_dykey[AuBrDynOp];
+	struct percpu_counter	br_count;
+
+	struct au_wbr		*br_wbr;
+	struct au_br_fhsm	*br_fhsm;
+
+	/* xino truncation */
+	atomic_t		br_xino_running;
+
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	struct au_br_hfsnotify	*br_hfsn;
+#endif
+
+#ifdef CONFIG_SYSFS
+	/* entries under sysfs per mount-point */
+	struct au_brsysfs	br_sysfs[AuBrSysfs_Last];
+#endif
+
+	struct au_dr_br		br_dirren;
+};
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
+{
+	return br->br_path.mnt;
+}
+
+static inline struct dentry *au_br_dentry(struct au_branch *br)
+{
+	return br->br_path.dentry;
+}
+
+static inline struct super_block *au_br_sb(struct au_branch *br)
+{
+	return au_br_mnt(br)->mnt_sb;
+}
+
+static inline void au_br_get(struct au_branch *br)
+{
+	percpu_counter_inc(&br->br_count);
+}
+
+static inline void au_br_put(struct au_branch *br)
+{
+	percpu_counter_dec(&br->br_count);
+}
+
+static inline s64 au_br_count(struct au_branch *br)
+{
+	return percpu_counter_sum(&br->br_count);
+}
+
+static inline void au_br_count_init(struct au_branch *br)
+{
+	percpu_counter_init(&br->br_count, 0, GFP_NOFS);
+}
+
+static inline void au_br_count_fin(struct au_branch *br)
+{
+	percpu_counter_destroy(&br->br_count);
+}
+
+static inline int au_br_rdonly(struct au_branch *br)
+{
+	return (sb_rdonly(au_br_sb(br))
+		|| !au_br_writable(br->br_perm))
+		? -EROFS : 0;
+}
+
+static inline int au_br_hnotifyable(int brperm __maybe_unused)
+{
+#ifdef CONFIG_AUFS_HNOTIFY
+	return !(brperm & AuBrPerm_RR);
+#else
+	return 0;
+#endif
+}
+
+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
+{
+	int err, exec_flag;
+
+	err = 0;
+	exec_flag = oflag & __FMODE_EXEC;
+	if (unlikely(exec_flag && path_noexec(&br->br_path)))
+		err = -EACCES;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* branch.c */
+struct au_sbinfo;
+void au_br_free(struct au_sbinfo *sinfo);
+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
+struct au_opt_add;
+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
+struct au_opt_del;
+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
+long au_ibusy_ioctl(struct file *file, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
+#endif
+struct au_opt_mod;
+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
+	      int *do_refresh);
+struct aufs_stfs;
+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
+
+/* xino.c */
+static const loff_t au_loff_max = LLONG_MAX;
+
+int au_xib_trunc(struct super_block *sb);
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
+		   loff_t *pos);
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+		    size_t size, loff_t *pos);
+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
+ino_t au_xino_new_ino(struct super_block *sb);
+void au_xino_delete_inode(struct inode *inode, const int unlinked);
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		  ino_t ino);
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		 ino_t *ino);
+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
+	       struct file *base_file, int do_test);
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
+
+struct au_opt_xino;
+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
+void au_xino_clr(struct super_block *sb);
+struct file *au_xino_def(struct super_block *sb);
+int au_xino_path(struct seq_file *seq, struct file *file);
+
+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
+		       ino_t h_ino, int idx);
+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		      int *idx);
+
+/* ---------------------------------------------------------------------- */
+
+/* Superblock to branch */
+static inline
+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_sbr(sb, bindex)->br_id;
+}
+
+static inline
+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_mnt(au_sbr(sb, bindex));
+}
+
+static inline
+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_sb(au_sbr(sb, bindex));
+}
+
+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
+{
+	au_br_get(au_sbr(sb, bindex));
+}
+
+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
+{
+	au_br_put(au_sbr(sb, bindex));
+}
+
+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_sbr(sb, bindex)->br_perm;
+}
+
+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_whable(au_sbr_perm(sb, bindex));
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define wbr_wh_read_lock(wbr)	au_rw_read_lock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_lock(wbr)	au_rw_write_lock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_read_trylock(wbr)	au_rw_read_trylock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_trylock(wbr) au_rw_write_trylock(&(wbr)->wbr_wh_rwsem)
+/*
+#define wbr_wh_read_trylock_nested(wbr) \
+	au_rw_read_trylock_nested(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_trylock_nested(wbr) \
+	au_rw_write_trylock_nested(&(wbr)->wbr_wh_rwsem)
+*/
+
+#define wbr_wh_read_unlock(wbr)	au_rw_read_unlock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_unlock(wbr)	au_rw_write_unlock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_downgrade_lock(wbr)	au_rw_dgrade_lock(&(wbr)->wbr_wh_rwsem)
+
+#define WbrWhMustNoWaiters(wbr)	AuRwMustNoWaiters(&(wbr)->wbr_wh_rwsem)
+#define WbrWhMustAnyLock(wbr)	AuRwMustAnyLock(&(wbr)->wbr_wh_rwsem)
+#define WbrWhMustWriteLock(wbr)	AuRwMustWriteLock(&(wbr)->wbr_wh_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_FHSM
+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
+{
+	mutex_init(&brfhsm->bf_lock);
+	brfhsm->bf_jiffy = 0;
+	brfhsm->bf_readable = 0;
+}
+
+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
+{
+	mutex_destroy(&brfhsm->bf_lock);
+}
+#else
+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_BRANCH_H__ */
diff --git a/fs/aufs/conf.mk b/fs/aufs/conf.mk
new file mode 100644
index 000000000..12782f8e0
--- /dev/null
+++ b/fs/aufs/conf.mk
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+
+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
+
+define AuConf
+ifdef ${1}
+AuConfStr += ${1}=${${1}}
+endif
+endef
+
+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
+	SBILIST \
+	HNOTIFY HFSNOTIFY \
+	EXPORT INO_T_64 \
+	XATTR \
+	FHSM \
+	RDU \
+	DIRREN \
+	SHWH \
+	BR_RAMFS \
+	BR_FUSE POLL \
+	BR_HFSPLUS \
+	BDEV_LOOP \
+	DEBUG MAGIC_SYSRQ
+$(foreach i, ${AuConfAll}, \
+	$(eval $(call AuConf,CONFIG_AUFS_${i})))
+
+AuConfName = ${obj}/conf.str
+${AuConfName}.tmp: FORCE
+	@echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
+${AuConfName}: ${AuConfName}.tmp
+	@diff -q $< $@ > /dev/null 2>&1 || { \
+	echo '  GEN    ' $@; \
+	cp -p $< $@; \
+	}
+FORCE:
+clean-files += ${AuConfName} ${AuConfName}.tmp
+${obj}/sysfs.o: ${AuConfName}
+
+-include ${srctree}/${src}/conf_priv.mk
diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c
new file mode 100644
index 000000000..30c5fefe8
--- /dev/null
+++ b/fs/aufs/cpup.c
@@ -0,0 +1,1441 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * copy-up functions, see wbr_policy.c for copy-down
+ */
+
+#include <linux/fs_stack.h>
+#include <linux/mm.h>
+#include <linux/task_work.h>
+#include "aufs.h"
+
+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
+{
+	const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
+		| S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
+
+	BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
+
+	dst->i_flags |= iflags & ~mask;
+	if (au_test_fs_notime(dst->i_sb))
+		dst->i_flags |= S_NOATIME | S_NOCMTIME;
+}
+
+void au_cpup_attr_timesizes(struct inode *inode)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	fsstack_copy_attr_times(inode, h_inode);
+	fsstack_copy_inode_size(inode, h_inode);
+}
+
+void au_cpup_attr_nlink(struct inode *inode, int force)
+{
+	struct inode *h_inode;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+
+	sb = inode->i_sb;
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (!force
+	    && !S_ISDIR(h_inode->i_mode)
+	    && au_opt_test(au_mntflags(sb), PLINK)
+	    && au_plink_test(inode))
+		return;
+
+	/*
+	 * 0 can happen in revalidating.
+	 * h_inode->i_mutex may not be held here, but it is harmless since once
+	 * i_nlink reaches 0, it will never become positive except O_TMPFILE
+	 * case.
+	 * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
+	 *	 the incorrect link count.
+	 */
+	set_nlink(inode, h_inode->i_nlink);
+
+	/*
+	 * fewer nlink makes find(1) noisy, but larger nlink doesn't.
+	 * it may includes whplink directory.
+	 */
+	if (S_ISDIR(h_inode->i_mode)) {
+		bbot = au_ibbot(inode);
+		for (bindex++; bindex <= bbot; bindex++) {
+			h_inode = au_h_iptr(inode, bindex);
+			if (h_inode)
+				au_add_nlink(inode, h_inode);
+		}
+	}
+}
+
+void au_cpup_attr_changeable(struct inode *inode)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	inode->i_mode = h_inode->i_mode;
+	inode->i_uid = h_inode->i_uid;
+	inode->i_gid = h_inode->i_gid;
+	au_cpup_attr_timesizes(inode);
+	au_cpup_attr_flags(inode, h_inode->i_flags);
+}
+
+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
+{
+	struct au_iinfo *iinfo = au_ii(inode);
+
+	IiMustWriteLock(inode);
+
+	iinfo->ii_higen = h_inode->i_generation;
+	iinfo->ii_hsb1 = h_inode->i_sb;
+}
+
+void au_cpup_attr_all(struct inode *inode, int force)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	au_cpup_attr_changeable(inode);
+	if (inode->i_nlink > 0)
+		au_cpup_attr_nlink(inode, force);
+	inode->i_rdev = h_inode->i_rdev;
+	inode->i_blkbits = h_inode->i_blkbits;
+	au_cpup_igen(inode, h_inode);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
+
+/* keep the timestamps of the parent dir when cpup */
+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
+		    struct path *h_path)
+{
+	struct inode *h_inode;
+
+	dt->dt_dentry = dentry;
+	dt->dt_h_path = *h_path;
+	h_inode = d_inode(h_path->dentry);
+	dt->dt_atime = h_inode->i_atime;
+	dt->dt_mtime = h_inode->i_mtime;
+	/* smp_mb(); */
+}
+
+void au_dtime_revert(struct au_dtime *dt)
+{
+	struct iattr attr;
+	int err;
+
+	attr.ia_atime = dt->dt_atime;
+	attr.ia_mtime = dt->dt_mtime;
+	attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
+		| ATTR_ATIME | ATTR_ATIME_SET;
+
+	/* no delegation since this is a directory */
+	err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
+	if (unlikely(err))
+		pr_warn("restoring timestamps failed(%d). ignored\n", err);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* internal use only */
+struct au_cpup_reg_attr {
+	int		valid;
+	struct kstat	st;
+	unsigned int	iflags; /* inode->i_flags */
+};
+
+static noinline_for_stack
+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
+	       struct au_cpup_reg_attr *h_src_attr)
+{
+	int err, sbits, icex;
+	unsigned int mnt_flags;
+	unsigned char verbose;
+	struct iattr ia;
+	struct path h_path;
+	struct inode *h_isrc, *h_idst;
+	struct kstat *h_st;
+	struct au_branch *br;
+
+	h_path.dentry = au_h_dptr(dst, bindex);
+	h_idst = d_inode(h_path.dentry);
+	br = au_sbr(dst->d_sb, bindex);
+	h_path.mnt = au_br_mnt(br);
+	h_isrc = d_inode(h_src);
+	ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
+		| ATTR_ATIME | ATTR_MTIME
+		| ATTR_ATIME_SET | ATTR_MTIME_SET;
+	if (h_src_attr && h_src_attr->valid) {
+		h_st = &h_src_attr->st;
+		ia.ia_uid = h_st->uid;
+		ia.ia_gid = h_st->gid;
+		ia.ia_atime = h_st->atime;
+		ia.ia_mtime = h_st->mtime;
+		if (h_idst->i_mode != h_st->mode
+		    && !S_ISLNK(h_idst->i_mode)) {
+			ia.ia_valid |= ATTR_MODE;
+			ia.ia_mode = h_st->mode;
+		}
+		sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
+		au_cpup_attr_flags(h_idst, h_src_attr->iflags);
+	} else {
+		ia.ia_uid = h_isrc->i_uid;
+		ia.ia_gid = h_isrc->i_gid;
+		ia.ia_atime = h_isrc->i_atime;
+		ia.ia_mtime = h_isrc->i_mtime;
+		if (h_idst->i_mode != h_isrc->i_mode
+		    && !S_ISLNK(h_idst->i_mode)) {
+			ia.ia_valid |= ATTR_MODE;
+			ia.ia_mode = h_isrc->i_mode;
+		}
+		sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
+		au_cpup_attr_flags(h_idst, h_isrc->i_flags);
+	}
+	/* no delegation since it is just created */
+	err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
+
+	/* is this nfs only? */
+	if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
+		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
+		ia.ia_mode = h_isrc->i_mode;
+		err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
+	}
+
+	icex = br->br_perm & AuBrAttr_ICEX;
+	if (!err) {
+		mnt_flags = au_mntflags(dst->d_sb);
+		verbose = !!au_opt_test(mnt_flags, VERBOSE);
+		err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
+			   char *buf, unsigned long blksize)
+{
+	int err;
+	size_t sz, rbytes, wbytes;
+	unsigned char all_zero;
+	char *p, *zp;
+	struct inode *h_inode;
+	/* reduce stack usage */
+	struct iattr *ia;
+
+	zp = page_address(ZERO_PAGE(0));
+	if (unlikely(!zp))
+		return -ENOMEM; /* possible? */
+
+	err = 0;
+	all_zero = 0;
+	while (len) {
+		AuDbg("len %lld\n", len);
+		sz = blksize;
+		if (len < blksize)
+			sz = len;
+
+		rbytes = 0;
+		/* todo: signal_pending? */
+		while (!rbytes || err == -EAGAIN || err == -EINTR) {
+			rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
+			err = rbytes;
+		}
+		if (unlikely(err < 0))
+			break;
+
+		all_zero = 0;
+		if (len >= rbytes && rbytes == blksize)
+			all_zero = !memcmp(buf, zp, rbytes);
+		if (!all_zero) {
+			wbytes = rbytes;
+			p = buf;
+			while (wbytes) {
+				size_t b;
+
+				b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
+				err = b;
+				/* todo: signal_pending? */
+				if (unlikely(err == -EAGAIN || err == -EINTR))
+					continue;
+				if (unlikely(err < 0))
+					break;
+				wbytes -= b;
+				p += b;
+			}
+			if (unlikely(err < 0))
+				break;
+		} else {
+			loff_t res;
+
+			AuLabel(hole);
+			res = vfsub_llseek(dst, rbytes, SEEK_CUR);
+			err = res;
+			if (unlikely(res < 0))
+				break;
+		}
+		len -= rbytes;
+		err = 0;
+	}
+
+	/* the last block may be a hole */
+	if (!err && all_zero) {
+		AuLabel(last hole);
+
+		err = 1;
+		if (au_test_nfs(dst->f_path.dentry->d_sb)) {
+			/* nfs requires this step to make last hole */
+			/* is this only nfs? */
+			do {
+				/* todo: signal_pending? */
+				err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
+			} while (err == -EAGAIN || err == -EINTR);
+			if (err == 1)
+				dst->f_pos--;
+		}
+
+		if (err == 1) {
+			ia = (void *)buf;
+			ia->ia_size = dst->f_pos;
+			ia->ia_valid = ATTR_SIZE | ATTR_FILE;
+			ia->ia_file = dst;
+			h_inode = file_inode(dst);
+			inode_lock_nested(h_inode, AuLsc_I_CHILD2);
+			/* no delegation since it is just created */
+			err = vfsub_notify_change(&dst->f_path, ia,
+						  /*delegated*/NULL);
+			inode_unlock(h_inode);
+		}
+	}
+
+	return err;
+}
+
+int au_copy_file(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	unsigned long blksize;
+	unsigned char do_kfree;
+	char *buf;
+
+	err = -ENOMEM;
+	blksize = dst->f_path.dentry->d_sb->s_blocksize;
+	if (!blksize || PAGE_SIZE < blksize)
+		blksize = PAGE_SIZE;
+	AuDbg("blksize %lu\n", blksize);
+	do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
+	if (do_kfree)
+		buf = kmalloc(blksize, GFP_NOFS);
+	else
+		buf = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!buf))
+		goto out;
+
+	if (len > (1 << 22))
+		AuDbg("copying a large file %lld\n", (long long)len);
+
+	src->f_pos = 0;
+	dst->f_pos = 0;
+	err = au_do_copy_file(dst, src, len, buf, blksize);
+	if (do_kfree)
+		kfree(buf);
+	else
+		free_page((unsigned long)buf);
+
+out:
+	return err;
+}
+
+static int au_do_copy(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	struct super_block *h_src_sb;
+	struct inode *h_src_inode;
+
+	h_src_inode = file_inode(src);
+	h_src_sb = h_src_inode->i_sb;
+
+	/* XFS acquires inode_lock */
+	if (!au_test_xfs(h_src_sb))
+		err = au_copy_file(dst, src, len);
+	else {
+		inode_unlock_shared(h_src_inode);
+		err = au_copy_file(dst, src, len);
+		inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+	}
+
+	return err;
+}
+
+static int au_clone_or_copy(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	struct super_block *h_src_sb;
+	struct inode *h_src_inode;
+
+	h_src_inode = file_inode(src);
+	h_src_sb = h_src_inode->i_sb;
+	if (h_src_sb != file_inode(dst)->i_sb
+	    || !dst->f_op->clone_file_range) {
+		err = au_do_copy(dst, src, len);
+		goto out;
+	}
+
+	if (!au_test_nfs(h_src_sb)) {
+		inode_unlock_shared(h_src_inode);
+		err = vfsub_clone_file_range(src, dst, len);
+		inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+	} else
+		err = vfsub_clone_file_range(src, dst, len);
+	/* older XFS has a condition in cloning */
+	if (unlikely(err != -EOPNOTSUPP))
+		goto out;
+
+	/* the backend fs on NFS may not support cloning */
+	err = au_do_copy(dst, src, len);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * to support a sparse file which is opened with O_APPEND,
+ * we need to close the file.
+ */
+static int au_cp_regular(struct au_cp_generic *cpg)
+{
+	int err, i;
+	enum { SRC, DST };
+	struct {
+		aufs_bindex_t bindex;
+		unsigned int flags;
+		struct dentry *dentry;
+		int force_wr;
+		struct file *file;
+		void *label;
+	} *f, file[] = {
+		{
+			.bindex = cpg->bsrc,
+			.flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
+			.label = &&out
+		},
+		{
+			.bindex = cpg->bdst,
+			.flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
+			.force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
+			.label = &&out_src
+		}
+	};
+	struct super_block *sb, *h_src_sb;
+	struct inode *h_src_inode;
+	struct task_struct *tsk = current;
+
+	/* bsrc branch can be ro/rw. */
+	sb = cpg->dentry->d_sb;
+	f = file;
+	for (i = 0; i < 2; i++, f++) {
+		f->dentry = au_h_dptr(cpg->dentry, f->bindex);
+		f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
+				    /*file*/NULL, f->force_wr);
+		err = PTR_ERR(f->file);
+		if (IS_ERR(f->file))
+			goto *f->label;
+	}
+
+	/* try stopping to update while we copyup */
+	h_src_inode = d_inode(file[SRC].dentry);
+	h_src_sb = h_src_inode->i_sb;
+	if (!au_test_nfs(h_src_sb))
+		IMustLock(h_src_inode);
+	err = au_clone_or_copy(file[DST].file, file[SRC].file, cpg->len);
+
+	/* i wonder if we had O_NO_DELAY_FPUT flag */
+	if (tsk->flags & PF_KTHREAD)
+		__fput_sync(file[DST].file);
+	else {
+		/* it happend actually */
+		fput(file[DST].file);
+		/*
+		 * too bad.
+		 * we have to call both since we don't know which place the file
+		 * was added to.
+		 */
+		task_work_run();
+		flush_delayed_fput();
+	}
+	au_sbr_put(sb, file[DST].bindex);
+
+out_src:
+	fput(file[SRC].file);
+	au_sbr_put(sb, file[SRC].bindex);
+out:
+	return err;
+}
+
+static int au_do_cpup_regular(struct au_cp_generic *cpg,
+			      struct au_cpup_reg_attr *h_src_attr)
+{
+	int err, rerr;
+	loff_t l;
+	struct path h_path;
+	struct inode *h_src_inode, *h_dst_inode;
+
+	err = 0;
+	h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
+	l = i_size_read(h_src_inode);
+	if (cpg->len == -1 || l < cpg->len)
+		cpg->len = l;
+	if (cpg->len) {
+		/* try stopping to update while we are referencing */
+		inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+		au_pin_hdir_unlock(cpg->pin);
+
+		h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
+		h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
+		h_src_attr->iflags = h_src_inode->i_flags;
+		if (!au_test_nfs(h_src_inode->i_sb))
+			err = vfsub_getattr(&h_path, &h_src_attr->st);
+		else {
+			inode_unlock_shared(h_src_inode);
+			err = vfsub_getattr(&h_path, &h_src_attr->st);
+			inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+		}
+		if (unlikely(err)) {
+			inode_unlock_shared(h_src_inode);
+			goto out;
+		}
+		h_src_attr->valid = 1;
+		if (!au_test_nfs(h_src_inode->i_sb)) {
+			err = au_cp_regular(cpg);
+			inode_unlock_shared(h_src_inode);
+		} else {
+			inode_unlock_shared(h_src_inode);
+			err = au_cp_regular(cpg);
+		}
+		rerr = au_pin_hdir_relock(cpg->pin);
+		if (!err && rerr)
+			err = rerr;
+	}
+	if (!err && (h_src_inode->i_state & I_LINKABLE)) {
+		h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
+		h_dst_inode = d_inode(h_path.dentry);
+		spin_lock(&h_dst_inode->i_lock);
+		h_dst_inode->i_state |= I_LINKABLE;
+		spin_unlock(&h_dst_inode->i_lock);
+	}
+
+out:
+	return err;
+}
+
+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
+			      struct inode *h_dir)
+{
+	int err, symlen;
+	mm_segment_t old_fs;
+	union {
+		char *k;
+		char __user *u;
+	} sym;
+
+	err = -ENOMEM;
+	sym.k = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!sym.k))
+		goto out;
+
+	/* unnecessary to support mmap_sem since symlink is not mmap-able */
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	symlen = vfs_readlink(h_src, sym.u, PATH_MAX);
+	err = symlen;
+	set_fs(old_fs);
+
+	if (symlen > 0) {
+		sym.k[symlen] = 0;
+		err = vfsub_symlink(h_dir, h_path, sym.k);
+	}
+	free_page((unsigned long)sym.k);
+
+out:
+	return err;
+}
+
+/*
+ * regardless 'acl' option, reset all ACL.
+ * All ACL will be copied up later from the original entry on the lower branch.
+ */
+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
+{
+	int err;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_dentry = h_path->dentry;
+	h_inode = d_inode(h_dentry);
+	/* forget_all_cached_acls(h_inode)); */
+	err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
+	AuTraceErr(err);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	if (!err)
+		err = vfsub_acl_chmod(h_inode, mode);
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
+			  struct inode *h_dir, struct path *h_path)
+{
+	int err;
+	struct inode *dir, *inode;
+
+	err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
+	AuTraceErr(err);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * strange behaviour from the users view,
+	 * particularry setattr case
+	 */
+	dir = d_inode(dst_parent);
+	if (au_ibtop(dir) == cpg->bdst)
+		au_cpup_attr_nlink(dir, /*force*/1);
+	inode = d_inode(cpg->dentry);
+	au_cpup_attr_nlink(inode, /*force*/1);
+
+out:
+	return err;
+}
+
+static noinline_for_stack
+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
+	       struct au_cpup_reg_attr *h_src_attr)
+{
+	int err;
+	umode_t mode;
+	unsigned int mnt_flags;
+	unsigned char isdir, isreg, force;
+	const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
+	struct au_dtime dt;
+	struct path h_path;
+	struct dentry *h_src, *h_dst, *h_parent;
+	struct inode *h_inode, *h_dir;
+	struct super_block *sb;
+
+	/* bsrc branch can be ro/rw. */
+	h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
+	h_inode = d_inode(h_src);
+	AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
+
+	/* try stopping to be referenced while we are creating */
+	h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
+	if (au_ftest_cpup(cpg->flags, RENAME))
+		AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
+				  AUFS_WH_PFX_LEN));
+	h_parent = h_dst->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+	AuDebugOn(h_parent != h_dst->d_parent);
+
+	sb = cpg->dentry->d_sb;
+	h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
+	if (do_dt) {
+		h_path.dentry = h_parent;
+		au_dtime_store(&dt, dst_parent, &h_path);
+	}
+	h_path.dentry = h_dst;
+
+	isreg = 0;
+	isdir = 0;
+	mode = h_inode->i_mode;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		isreg = 1;
+		err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
+				   /*want_excl*/true);
+		if (!err)
+			err = au_do_cpup_regular(cpg, h_src_attr);
+		break;
+	case S_IFDIR:
+		isdir = 1;
+		err = vfsub_mkdir(h_dir, &h_path, mode);
+		if (!err)
+			err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
+		break;
+	case S_IFLNK:
+		err = au_do_cpup_symlink(&h_path, h_src, h_dir);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+		AuDebugOn(!capable(CAP_MKNOD));
+		/*FALLTHROUGH*/
+	case S_IFIFO:
+	case S_IFSOCK:
+		err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
+		break;
+	default:
+		AuIOErr("Unknown inode type 0%o\n", mode);
+		err = -EIO;
+	}
+	if (!err)
+		err = au_reset_acl(h_dir, &h_path, mode);
+
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, UDBA_NONE)
+	    && !isdir
+	    && au_opt_test(mnt_flags, XINO)
+	    && (h_inode->i_nlink == 1
+		|| (h_inode->i_state & I_LINKABLE))
+	    /* todo: unnecessary? */
+	    /* && d_inode(cpg->dentry)->i_nlink == 1 */
+	    && cpg->bdst < cpg->bsrc
+	    && !au_ftest_cpup(cpg->flags, KEEPLINO))
+		au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
+		/* ignore this error */
+
+	if (!err) {
+		force = 0;
+		if (isreg) {
+			force = !!cpg->len;
+			if (cpg->len == -1)
+				force = !!i_size_read(h_inode);
+		}
+		au_fhsm_wrote(sb, cpg->bdst, force);
+	}
+
+	if (do_dt)
+		au_dtime_revert(&dt);
+	return err;
+}
+
+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
+{
+	int err;
+	struct dentry *dentry, *h_dentry, *h_parent, *parent;
+	struct inode *h_dir;
+	aufs_bindex_t bdst;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	h_dentry = au_h_dptr(dentry, bdst);
+	if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
+		dget(h_dentry);
+		au_set_h_dptr(dentry, bdst, NULL);
+		err = au_lkup_neg(dentry, bdst, /*wh*/0);
+		if (!err)
+			h_path->dentry = dget(au_h_dptr(dentry, bdst));
+		au_set_h_dptr(dentry, bdst, h_dentry);
+	} else {
+		err = 0;
+		parent = dget_parent(dentry);
+		h_parent = au_h_dptr(parent, bdst);
+		dput(parent);
+		h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
+		if (IS_ERR(h_path->dentry))
+			err = PTR_ERR(h_path->dentry);
+	}
+	if (unlikely(err))
+		goto out;
+
+	h_parent = h_dentry->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+	AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
+	/* no delegation since it is just created */
+	err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
+			   /*flags*/0);
+	dput(h_path->dentry);
+
+out:
+	return err;
+}
+
+/*
+ * copyup the @dentry from @bsrc to @bdst.
+ * the caller must set the both of lower dentries.
+ * @len is for truncating when it is -1 copyup the entire file.
+ * in link/rename cases, @dst_parent may be different from the real one.
+ * basic->bsrc can be larger than basic->bdst.
+ * aufs doesn't touch the credential so
+ * security_inode_copy_up{,_xattr}() are unnecrssary.
+ */
+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
+{
+	int err, rerr;
+	aufs_bindex_t old_ibtop;
+	unsigned char isdir, plink;
+	struct dentry *h_src, *h_dst, *h_parent;
+	struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct path h_path;
+		struct au_cpup_reg_attr h_src_attr;
+	} *a;
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+	a->h_src_attr.valid = 0;
+
+	sb = cpg->dentry->d_sb;
+	br = au_sbr(sb, cpg->bdst);
+	a->h_path.mnt = au_br_mnt(br);
+	h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
+	h_parent = h_dst->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
+	inode = d_inode(cpg->dentry);
+
+	if (!dst_parent)
+		dst_parent = dget_parent(cpg->dentry);
+	else
+		dget(dst_parent);
+
+	plink = !!au_opt_test(au_mntflags(sb), PLINK);
+	dst_inode = au_h_iptr(inode, cpg->bdst);
+	if (dst_inode) {
+		if (unlikely(!plink)) {
+			err = -EIO;
+			AuIOErr("hi%lu(i%lu) exists on b%d "
+				"but plink is disabled\n",
+				dst_inode->i_ino, inode->i_ino, cpg->bdst);
+			goto out_parent;
+		}
+
+		if (dst_inode->i_nlink) {
+			const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
+
+			h_src = au_plink_lkup(inode, cpg->bdst);
+			err = PTR_ERR(h_src);
+			if (IS_ERR(h_src))
+				goto out_parent;
+			if (unlikely(d_is_negative(h_src))) {
+				err = -EIO;
+				AuIOErr("i%lu exists on b%d "
+					"but not pseudo-linked\n",
+					inode->i_ino, cpg->bdst);
+				dput(h_src);
+				goto out_parent;
+			}
+
+			if (do_dt) {
+				a->h_path.dentry = h_parent;
+				au_dtime_store(&a->dt, dst_parent, &a->h_path);
+			}
+
+			a->h_path.dentry = h_dst;
+			delegated = NULL;
+			err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
+			if (!err && au_ftest_cpup(cpg->flags, RENAME))
+				err = au_do_ren_after_cpup(cpg, &a->h_path);
+			if (do_dt)
+				au_dtime_revert(&a->dt);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+			dput(h_src);
+			goto out_parent;
+		} else
+			/* todo: cpup_wh_file? */
+			/* udba work */
+			au_update_ibrange(inode, /*do_put_zero*/1);
+	}
+
+	isdir = S_ISDIR(inode->i_mode);
+	old_ibtop = au_ibtop(inode);
+	err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
+	if (unlikely(err))
+		goto out_rev;
+	dst_inode = d_inode(h_dst);
+	inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
+	/* todo: necessary? */
+	/* au_pin_hdir_unlock(cpg->pin); */
+
+	err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
+	if (unlikely(err)) {
+		/* todo: necessary? */
+		/* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
+		inode_unlock(dst_inode);
+		goto out_rev;
+	}
+
+	if (cpg->bdst < old_ibtop) {
+		if (S_ISREG(inode->i_mode)) {
+			err = au_dy_iaop(inode, cpg->bdst, dst_inode);
+			if (unlikely(err)) {
+				/* ignore an error */
+				/* au_pin_hdir_relock(cpg->pin); */
+				inode_unlock(dst_inode);
+				goto out_rev;
+			}
+		}
+		au_set_ibtop(inode, cpg->bdst);
+	} else
+		au_set_ibbot(inode, cpg->bdst);
+	au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
+		      au_hi_flags(inode, isdir));
+
+	/* todo: necessary? */
+	/* err = au_pin_hdir_relock(cpg->pin); */
+	inode_unlock(dst_inode);
+	if (unlikely(err))
+		goto out_rev;
+
+	src_inode = d_inode(h_src);
+	if (!isdir
+	    && (src_inode->i_nlink > 1
+		|| src_inode->i_state & I_LINKABLE)
+	    && plink)
+		au_plink_append(inode, cpg->bdst, h_dst);
+
+	if (au_ftest_cpup(cpg->flags, RENAME)) {
+		a->h_path.dentry = h_dst;
+		err = au_do_ren_after_cpup(cpg, &a->h_path);
+	}
+	if (!err)
+		goto out_parent; /* success */
+
+	/* revert */
+out_rev:
+	a->h_path.dentry = h_parent;
+	au_dtime_store(&a->dt, dst_parent, &a->h_path);
+	a->h_path.dentry = h_dst;
+	rerr = 0;
+	if (d_is_positive(h_dst)) {
+		if (!isdir) {
+			/* no delegation since it is just created */
+			rerr = vfsub_unlink(h_dir, &a->h_path,
+					    /*delegated*/NULL, /*force*/0);
+		} else
+			rerr = vfsub_rmdir(h_dir, &a->h_path);
+	}
+	au_dtime_revert(&a->dt);
+	if (rerr) {
+		AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
+		err = -EIO;
+	}
+out_parent:
+	dput(dst_parent);
+	kfree(a);
+out:
+	return err;
+}
+
+#if 0 /* reserved */
+struct au_cpup_single_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+	struct dentry *dst_parent;
+};
+
+static void au_call_cpup_single(void *args)
+{
+	struct au_cpup_single_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_single(a->cpg, a->dst_parent);
+	au_pin_hdir_release(a->cpg->pin);
+}
+#endif
+
+/*
+ * prevent SIGXFSZ in copy-up.
+ * testing CAP_MKNOD is for generic fs,
+ * but CAP_FSETID is for xfs only, currently.
+ */
+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
+{
+	int do_sio;
+	struct super_block *sb;
+	struct inode *h_dir;
+
+	do_sio = 0;
+	sb = au_pinned_parent(pin)->d_sb;
+	if (!au_wkq_test()
+	    && (!au_sbi(sb)->si_plink_maint_pid
+		|| au_plink_maint(sb, AuLock_NOPLM))) {
+		switch (mode & S_IFMT) {
+		case S_IFREG:
+			/* no condition about RLIMIT_FSIZE and the file size */
+			do_sio = 1;
+			break;
+		case S_IFCHR:
+		case S_IFBLK:
+			do_sio = !capable(CAP_MKNOD);
+			break;
+		}
+		if (!do_sio)
+			do_sio = ((mode & (S_ISUID | S_ISGID))
+				  && !capable(CAP_FSETID));
+		/* this workaround may be removed in the future */
+		if (!do_sio) {
+			h_dir = au_pinned_h_dir(pin);
+			do_sio = h_dir->i_mode & S_ISVTX;
+		}
+	}
+
+	return do_sio;
+}
+
+#if 0 /* reserved */
+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
+{
+	int err, wkq_err;
+	struct dentry *h_dentry;
+
+	h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
+	if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
+		err = au_cpup_single(cpg, dst_parent);
+	else {
+		struct au_cpup_single_args args = {
+			.errp		= &err,
+			.cpg		= cpg,
+			.dst_parent	= dst_parent
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_single, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+#endif
+
+/*
+ * copyup the @dentry from the first active lower branch to @bdst,
+ * using au_cpup_single().
+ */
+static int au_cpup_simple(struct au_cp_generic *cpg)
+{
+	int err;
+	unsigned int flags_orig;
+	struct dentry *dentry;
+
+	AuDebugOn(cpg->bsrc < 0);
+
+	dentry = cpg->dentry;
+	DiMustWriteLock(dentry);
+
+	err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
+	if (!err) {
+		flags_orig = cpg->flags;
+		au_fset_cpup(cpg->flags, RENAME);
+		err = au_cpup_single(cpg, NULL);
+		cpg->flags = flags_orig;
+		if (!err)
+			return 0; /* success */
+
+		/* revert */
+		au_set_h_dptr(dentry, cpg->bdst, NULL);
+		au_set_dbtop(dentry, cpg->bsrc);
+	}
+
+	return err;
+}
+
+struct au_cpup_simple_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+};
+
+static void au_call_cpup_simple(void *args)
+{
+	struct au_cpup_simple_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_simple(a->cpg);
+	au_pin_hdir_release(a->cpg->pin);
+}
+
+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
+{
+	int err, wkq_err;
+	struct dentry *dentry, *parent;
+	struct file *h_file;
+	struct inode *h_dir;
+
+	dentry = cpg->dentry;
+	h_file = NULL;
+	if (au_ftest_cpup(cpg->flags, HOPEN)) {
+		AuDebugOn(cpg->bsrc < 0);
+		h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
+		err = PTR_ERR(h_file);
+		if (IS_ERR(h_file))
+			goto out;
+	}
+
+	parent = dget_parent(dentry);
+	h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
+	if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
+	    && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
+		err = au_cpup_simple(cpg);
+	else {
+		struct au_cpup_simple_args args = {
+			.errp		= &err,
+			.cpg		= cpg
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	dput(parent);
+	if (h_file)
+		au_h_open_post(dentry, cpg->bsrc, h_file);
+
+out:
+	return err;
+}
+
+int au_sio_cpup_simple(struct au_cp_generic *cpg)
+{
+	aufs_bindex_t bsrc, bbot;
+	struct dentry *dentry, *h_dentry;
+
+	if (cpg->bsrc < 0) {
+		dentry = cpg->dentry;
+		bbot = au_dbbot(dentry);
+		for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
+			h_dentry = au_h_dptr(dentry, bsrc);
+			if (h_dentry) {
+				AuDebugOn(d_is_negative(h_dentry));
+				break;
+			}
+		}
+		AuDebugOn(bsrc > bbot);
+		cpg->bsrc = bsrc;
+	}
+	AuDebugOn(cpg->bsrc <= cpg->bdst);
+	return au_do_sio_cpup_simple(cpg);
+}
+
+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
+{
+	AuDebugOn(cpg->bdst <= cpg->bsrc);
+	return au_do_sio_cpup_simple(cpg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * copyup the deleted file for writing.
+ */
+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
+			 struct file *file)
+{
+	int err;
+	unsigned int flags_orig;
+	aufs_bindex_t bsrc_orig;
+	struct au_dinfo *dinfo;
+	struct {
+		struct au_hdentry *hd;
+		struct dentry *h_dentry;
+	} hdst, hsrc;
+
+	dinfo = au_di(cpg->dentry);
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	bsrc_orig = cpg->bsrc;
+	cpg->bsrc = dinfo->di_btop;
+	hdst.hd = au_hdentry(dinfo, cpg->bdst);
+	hdst.h_dentry = hdst.hd->hd_dentry;
+	hdst.hd->hd_dentry = wh_dentry;
+	dinfo->di_btop = cpg->bdst;
+
+	hsrc.h_dentry = NULL;
+	if (file) {
+		hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
+		hsrc.h_dentry = hsrc.hd->hd_dentry;
+		hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
+	}
+	flags_orig = cpg->flags;
+	cpg->flags = !AuCpup_DTIME;
+	err = au_cpup_single(cpg, /*h_parent*/NULL);
+	cpg->flags = flags_orig;
+	if (file) {
+		if (!err)
+			err = au_reopen_nondir(file);
+		hsrc.hd->hd_dentry = hsrc.h_dentry;
+	}
+	hdst.hd->hd_dentry = hdst.h_dentry;
+	dinfo->di_btop = cpg->bsrc;
+	cpg->bsrc = bsrc_orig;
+
+	return err;
+}
+
+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
+{
+	int err;
+	aufs_bindex_t bdst;
+	struct au_dtime dt;
+	struct dentry *dentry, *parent, *h_parent, *wh_dentry;
+	struct au_branch *br;
+	struct path h_path;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	br = au_sbr(dentry->d_sb, bdst);
+	parent = dget_parent(dentry);
+	h_parent = au_h_dptr(parent, bdst);
+	wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out;
+
+	h_path.dentry = h_parent;
+	h_path.mnt = au_br_mnt(br);
+	au_dtime_store(&dt, parent, &h_path);
+	err = au_do_cpup_wh(cpg, wh_dentry, file);
+	if (unlikely(err))
+		goto out_wh;
+
+	dget(wh_dentry);
+	h_path.dentry = wh_dentry;
+	if (!d_is_dir(wh_dentry)) {
+		/* no delegation since it is just created */
+		err = vfsub_unlink(d_inode(h_parent), &h_path,
+				   /*delegated*/NULL, /*force*/0);
+	} else
+		err = vfsub_rmdir(d_inode(h_parent), &h_path);
+	if (unlikely(err)) {
+		AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
+			wh_dentry, err);
+		err = -EIO;
+	}
+	au_dtime_revert(&dt);
+	au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
+
+out_wh:
+	dput(wh_dentry);
+out:
+	dput(parent);
+	return err;
+}
+
+struct au_cpup_wh_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+	struct file *file;
+};
+
+static void au_call_cpup_wh(void *args)
+{
+	struct au_cpup_wh_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_wh(a->cpg, a->file);
+	au_pin_hdir_release(a->cpg->pin);
+}
+
+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
+{
+	int err, wkq_err;
+	aufs_bindex_t bdst;
+	struct dentry *dentry, *parent, *h_orph, *h_parent;
+	struct inode *dir, *h_dir, *h_tmpdir;
+	struct au_wbr *wbr;
+	struct au_pin wh_pin, *pin_orig;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	h_orph = NULL;
+	h_parent = NULL;
+	h_dir = au_igrab(au_h_iptr(dir, bdst));
+	h_tmpdir = h_dir;
+	pin_orig = NULL;
+	if (!h_dir->i_nlink) {
+		wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
+		h_orph = wbr->wbr_orph;
+
+		h_parent = dget(au_h_dptr(parent, bdst));
+		au_set_h_dptr(parent, bdst, dget(h_orph));
+		h_tmpdir = d_inode(h_orph);
+		au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
+
+		inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
+		/* todo: au_h_open_pre()? */
+
+		pin_orig = cpg->pin;
+		au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
+			    AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
+		cpg->pin = &wh_pin;
+	}
+
+	if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
+	    && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
+		err = au_cpup_wh(cpg, file);
+	else {
+		struct au_cpup_wh_args args = {
+			.errp	= &err,
+			.cpg	= cpg,
+			.file	= file
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	if (h_orph) {
+		inode_unlock(h_tmpdir);
+		/* todo: au_h_open_post()? */
+		au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
+		au_set_h_dptr(parent, bdst, h_parent);
+		AuDebugOn(!pin_orig);
+		cpg->pin = pin_orig;
+	}
+	iput(h_dir);
+	dput(parent);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * generic routine for both of copy-up and copy-down.
+ */
+/* cf. revalidate function in file.c */
+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
+	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg),
+	       void *arg)
+{
+	int err;
+	struct au_pin pin;
+	struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
+
+	err = 0;
+	parent = dget_parent(dentry);
+	if (IS_ROOT(parent))
+		goto out;
+
+	au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
+		    au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
+
+	/* do not use au_dpage */
+	real_parent = parent;
+	while (1) {
+		dput(parent);
+		parent = dget_parent(dentry);
+		h_parent = au_h_dptr(parent, bdst);
+		if (h_parent)
+			goto out; /* success */
+
+		/* find top dir which is necessary to cpup */
+		do {
+			d = parent;
+			dput(parent);
+			parent = dget_parent(d);
+			di_read_lock_parent3(parent, !AuLock_IR);
+			h_parent = au_h_dptr(parent, bdst);
+			di_read_unlock(parent, !AuLock_IR);
+		} while (!h_parent);
+
+		if (d != real_parent)
+			di_write_lock_child3(d);
+
+		/* somebody else might create while we were sleeping */
+		h_dentry = au_h_dptr(d, bdst);
+		if (!h_dentry || d_is_negative(h_dentry)) {
+			if (h_dentry)
+				au_update_dbtop(d);
+
+			au_pin_set_dentry(&pin, d);
+			err = au_do_pin(&pin);
+			if (!err) {
+				err = cp(d, bdst, &pin, h_parent, arg);
+				au_unpin(&pin);
+			}
+		}
+
+		if (d != real_parent)
+			di_write_unlock(d);
+		if (unlikely(err))
+			break;
+	}
+
+out:
+	dput(parent);
+	return err;
+}
+
+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
+		       struct au_pin *pin,
+		       struct dentry *h_parent __maybe_unused,
+		       void *arg __maybe_unused)
+{
+	struct au_cp_generic cpg = {
+		.dentry	= dentry,
+		.bdst	= bdst,
+		.bsrc	= -1,
+		.len	= 0,
+		.pin	= pin,
+		.flags	= AuCpup_DTIME
+	};
+	return au_sio_cpup_simple(&cpg);
+}
+
+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
+}
+
+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	int err;
+	struct dentry *parent;
+	struct inode *dir;
+
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	err = 0;
+	if (au_h_iptr(dir, bdst))
+		goto out;
+
+	di_read_unlock(parent, AuLock_IR);
+	di_write_lock_parent(parent);
+	/* someone else might change our inode while we were sleeping */
+	if (!au_h_iptr(dir, bdst))
+		err = au_cpup_dirs(dentry, bdst);
+	di_downgrade_lock(parent, AuLock_IR);
+
+out:
+	dput(parent);
+	return err;
+}
diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h
new file mode 100644
index 000000000..99295266c
--- /dev/null
+++ b/fs/aufs/cpup.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * copy-up/down functions
+ */
+
+#ifndef __AUFS_CPUP_H__
+#define __AUFS_CPUP_H__
+
+#ifdef __KERNEL__
+
+#include <linux/path.h>
+
+struct inode;
+struct file;
+struct au_pin;
+
+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
+void au_cpup_attr_timesizes(struct inode *inode);
+void au_cpup_attr_nlink(struct inode *inode, int force);
+void au_cpup_attr_changeable(struct inode *inode);
+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
+void au_cpup_attr_all(struct inode *inode, int force);
+
+/* ---------------------------------------------------------------------- */
+
+struct au_cp_generic {
+	struct dentry	*dentry;
+	aufs_bindex_t	bdst, bsrc;
+	loff_t		len;
+	struct au_pin	*pin;
+	unsigned int	flags;
+};
+
+/* cpup flags */
+#define AuCpup_DTIME		1		/* do dtime_store/revert */
+#define AuCpup_KEEPLINO		(1 << 1)	/* do not clear the lower xino,
+						   for link(2) */
+#define AuCpup_RENAME		(1 << 2)	/* rename after cpup */
+#define AuCpup_HOPEN		(1 << 3)	/* call h_open_pre/post() in
+						   cpup */
+#define AuCpup_OVERWRITE	(1 << 4)	/* allow overwriting the
+						   existing entry */
+#define AuCpup_RWDST		(1 << 5)	/* force write target even if
+						   the branch is marked as RO */
+
+#ifndef CONFIG_AUFS_BR_HFSPLUS
+#undef AuCpup_HOPEN
+#define AuCpup_HOPEN		0
+#endif
+
+#define au_ftest_cpup(flags, name)	((flags) & AuCpup_##name)
+#define au_fset_cpup(flags, name) \
+	do { (flags) |= AuCpup_##name; } while (0)
+#define au_fclr_cpup(flags, name) \
+	do { (flags) &= ~AuCpup_##name; } while (0)
+
+int au_copy_file(struct file *dst, struct file *src, loff_t len);
+int au_sio_cpup_simple(struct au_cp_generic *cpg);
+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
+
+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
+	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg),
+	       void *arg);
+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+
+/* ---------------------------------------------------------------------- */
+
+/* keep timestamps when copyup */
+struct au_dtime {
+	struct dentry *dt_dentry;
+	struct path dt_h_path;
+	struct timespec dt_atime, dt_mtime;
+};
+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
+		    struct path *h_path);
+void au_dtime_revert(struct au_dtime *dt);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_CPUP_H__ */
diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
new file mode 100644
index 000000000..b85f0694b
--- /dev/null
+++ b/fs/aufs/dbgaufs.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debugfs interface
+ */
+
+#include <linux/debugfs.h>
+#include "aufs.h"
+
+#ifndef CONFIG_SYSFS
+#error DEBUG_FS depends upon SYSFS
+#endif
+
+static struct dentry *dbgaufs;
+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+/* 20 is max digits length of ulong 64 */
+struct dbgaufs_arg {
+	int n;
+	char a[20 * 4];
+};
+
+/*
+ * common function for all XINO files
+ */
+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
+			      struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
+{
+	int err;
+	struct kstat st;
+	struct dbgaufs_arg *p;
+
+	err = -ENOMEM;
+	p = kmalloc(sizeof(*p), GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = 0;
+	p->n = 0;
+	file->private_data = p;
+	if (!xf)
+		goto out;
+
+	err = vfsub_getattr(&xf->f_path, &st);
+	if (!err) {
+		if (do_fcnt)
+			p->n = snprintf
+				(p->a, sizeof(p->a), "%ld, %llux%u %lld\n",
+				 (long)file_count(xf), st.blocks, st.blksize,
+				 (long long)st.size);
+		else
+			p->n = snprintf(p->a, sizeof(p->a), "%llux%u %lld\n",
+					st.blocks, st.blksize,
+					(long long)st.size);
+		AuDebugOn(p->n >= sizeof(p->a));
+	} else {
+		p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
+		err = 0;
+	}
+
+out:
+	return err;
+
+}
+
+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct dbgaufs_arg *p;
+
+	p = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dbgaufs_plink_arg {
+	int n;
+	char a[];
+};
+
+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
+				 struct file *file)
+{
+	free_page((unsigned long)file->private_data);
+	return 0;
+}
+
+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
+{
+	int err, i, limit;
+	unsigned long n, sum;
+	struct dbgaufs_plink_arg *p;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct hlist_bl_head *hbl;
+
+	err = -ENOMEM;
+	p = (void *)get_zeroed_page(GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = -EFBIG;
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		limit = PAGE_SIZE - sizeof(p->n);
+
+		/* the number of buckets */
+		n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
+		p->n += n;
+		limit -= n;
+
+		sum = 0;
+		for (i = 0, hbl = sbinfo->si_plink; i < AuPlink_NHASH;
+		     i++, hbl++) {
+			n = au_hbl_count(hbl);
+			sum += n;
+
+			n = snprintf(p->a + p->n, limit, "%lu ", n);
+			p->n += n;
+			limit -= n;
+			if (unlikely(limit <= 0))
+				goto out_free;
+		}
+		p->a[p->n - 1] = '\n';
+
+		/* the sum of plinks */
+		n = snprintf(p->a + p->n, limit, "%lu\n", sum);
+		p->n += n;
+		limit -= n;
+		if (unlikely(limit <= 0))
+			goto out_free;
+	} else {
+#define str "1\n0\n0\n"
+		p->n = sizeof(str) - 1;
+		strcpy(p->a, str);
+#undef str
+	}
+	si_read_unlock(sb);
+
+	err = 0;
+	file->private_data = p;
+	goto out; /* success */
+
+out_free:
+	free_page((unsigned long)p);
+out:
+	return err;
+}
+
+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct dbgaufs_plink_arg *p;
+
+	p = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
+}
+
+static const struct file_operations dbgaufs_plink_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_plink_open,
+	.release	= dbgaufs_plink_release,
+	.read		= dbgaufs_plink_read
+};
+
+/* ---------------------------------------------------------------------- */
+
+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
+	si_read_unlock(sb);
+	return err;
+}
+
+static const struct file_operations dbgaufs_xib_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xib_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+/* ---------------------------------------------------------------------- */
+
+#define DbgaufsXi_PREFIX "xi"
+
+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
+{
+	int err;
+	long l;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct file *xf;
+	struct qstr *name;
+
+	err = -ENOENT;
+	xf = NULL;
+	name = &file->f_path.dentry->d_name;
+	if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
+		     || memcmp(name->name, DbgaufsXi_PREFIX,
+			       sizeof(DbgaufsXi_PREFIX) - 1)))
+		goto out;
+	err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
+	if (unlikely(err))
+		goto out;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	if (l <= au_sbbot(sb)) {
+		xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
+		err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
+	} else
+		err = -ENOENT;
+	si_read_unlock(sb);
+
+out:
+	return err;
+}
+
+static const struct file_operations dbgaufs_xino_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xino_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
+{
+	aufs_bindex_t bbot;
+	struct au_branch *br;
+	struct au_xino_file *xi;
+
+	if (!au_sbi(sb)->si_dbgaufs)
+		return;
+
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		xi = &br->br_xino;
+		/* debugfs acquires the parent i_mutex */
+		lockdep_off();
+		debugfs_remove(xi->xi_dbgaufs);
+		lockdep_on();
+		xi->xi_dbgaufs = NULL;
+	}
+}
+
+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_sbinfo *sbinfo;
+	struct dentry *parent;
+	struct au_branch *br;
+	struct au_xino_file *xi;
+	aufs_bindex_t bbot;
+	char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
+
+	sbinfo = au_sbi(sb);
+	parent = sbinfo->si_dbgaufs;
+	if (!parent)
+		return;
+
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
+		br = au_sbr(sb, bindex);
+		xi = &br->br_xino;
+		AuDebugOn(xi->xi_dbgaufs);
+		/* debugfs acquires the parent i_mutex */
+		lockdep_off();
+		xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
+						     sbinfo, &dbgaufs_xino_fop);
+		lockdep_on();
+		/* ignore an error */
+		if (unlikely(!xi->xi_dbgaufs))
+			AuWarn1("failed %s under debugfs\n", name);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_EXPORT
+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
+	si_read_unlock(sb);
+	return err;
+}
+
+static const struct file_operations dbgaufs_xigen_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xigen_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	err = -EIO;
+	sbinfo->si_dbgaufs_xigen = debugfs_create_file
+		("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_xigen_fop);
+	if (sbinfo->si_dbgaufs_xigen)
+		err = 0;
+
+	return err;
+}
+#else
+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
+{
+	return 0;
+}
+#endif /* CONFIG_AUFS_EXPORT */
+
+/* ---------------------------------------------------------------------- */
+
+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
+{
+	/*
+	 * This function is a dynamic '__fin' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	debugfs_remove_recursive(sbinfo->si_dbgaufs);
+	sbinfo->si_dbgaufs = NULL;
+	kobject_put(&sbinfo->si_kobj);
+}
+
+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+	char name[SysaufsSiNameLen];
+
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	err = -ENOENT;
+	if (!dbgaufs) {
+		AuErr1("/debug/aufs is uninitialized\n");
+		goto out;
+	}
+
+	err = -EIO;
+	sysaufs_name(sbinfo, name);
+	sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
+	if (unlikely(!sbinfo->si_dbgaufs))
+		goto out;
+	kobject_get(&sbinfo->si_kobj);
+
+	sbinfo->si_dbgaufs_xib = debugfs_create_file
+		("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_xib_fop);
+	if (unlikely(!sbinfo->si_dbgaufs_xib))
+		goto out_dir;
+
+	sbinfo->si_dbgaufs_plink = debugfs_create_file
+		("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_plink_fop);
+	if (unlikely(!sbinfo->si_dbgaufs_plink))
+		goto out_dir;
+
+	err = dbgaufs_xigen_init(sbinfo);
+	if (!err)
+		goto out; /* success */
+
+out_dir:
+	dbgaufs_si_fin(sbinfo);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void dbgaufs_fin(void)
+{
+	debugfs_remove(dbgaufs);
+}
+
+int __init dbgaufs_init(void)
+{
+	int err;
+
+	err = -EIO;
+	dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
+	if (dbgaufs)
+		err = 0;
+	return err;
+}
diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h
new file mode 100644
index 000000000..f8d7b74e8
--- /dev/null
+++ b/fs/aufs/dbgaufs.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debugfs interface
+ */
+
+#ifndef __DBGAUFS_H__
+#define __DBGAUFS_H__
+
+#ifdef __KERNEL__
+
+struct super_block;
+struct au_sbinfo;
+
+#ifdef CONFIG_DEBUG_FS
+/* dbgaufs.c */
+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
+void dbgaufs_fin(void);
+int __init dbgaufs_init(void);
+#else
+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
+AuStubVoid(dbgaufs_fin, void)
+AuStubInt0(__init dbgaufs_init, void)
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* __KERNEL__ */
+#endif /* __DBGAUFS_H__ */
diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c
new file mode 100644
index 000000000..61c1a2553
--- /dev/null
+++ b/fs/aufs/dcsub.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for dentry cache
+ */
+
+#include "aufs.h"
+
+static void au_dpage_free(struct au_dpage *dpage)
+{
+	int i;
+	struct dentry **p;
+
+	p = dpage->dentries;
+	for (i = 0; i < dpage->ndentry; i++)
+		dput(*p++);
+	free_page((unsigned long)dpage->dentries);
+}
+
+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
+{
+	int err;
+	void *p;
+
+	err = -ENOMEM;
+	dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
+	if (unlikely(!dpages->dpages))
+		goto out;
+
+	p = (void *)__get_free_page(gfp);
+	if (unlikely(!p))
+		goto out_dpages;
+
+	dpages->dpages[0].ndentry = 0;
+	dpages->dpages[0].dentries = p;
+	dpages->ndpage = 1;
+	return 0; /* success */
+
+out_dpages:
+	kfree(dpages->dpages);
+out:
+	return err;
+}
+
+void au_dpages_free(struct au_dcsub_pages *dpages)
+{
+	int i;
+	struct au_dpage *p;
+
+	p = dpages->dpages;
+	for (i = 0; i < dpages->ndpage; i++)
+		au_dpage_free(p++);
+	kfree(dpages->dpages);
+}
+
+static int au_dpages_append(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, gfp_t gfp)
+{
+	int err, sz;
+	struct au_dpage *dpage;
+	void *p;
+
+	dpage = dpages->dpages + dpages->ndpage - 1;
+	sz = PAGE_SIZE / sizeof(dentry);
+	if (unlikely(dpage->ndentry >= sz)) {
+		AuLabel(new dpage);
+		err = -ENOMEM;
+		sz = dpages->ndpage * sizeof(*dpages->dpages);
+		p = au_kzrealloc(dpages->dpages, sz,
+				 sz + sizeof(*dpages->dpages), gfp,
+				 /*may_shrink*/0);
+		if (unlikely(!p))
+			goto out;
+
+		dpages->dpages = p;
+		dpage = dpages->dpages + dpages->ndpage;
+		p = (void *)__get_free_page(gfp);
+		if (unlikely(!p))
+			goto out;
+
+		dpage->ndentry = 0;
+		dpage->dentries = p;
+		dpages->ndpage++;
+	}
+
+	AuDebugOn(au_dcount(dentry) <= 0);
+	dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
+	return 0; /* success */
+
+out:
+	return err;
+}
+
+/* todo: BAD approach */
+/* copied from linux/fs/dcache.c */
+enum d_walk_ret {
+	D_WALK_CONTINUE,
+	D_WALK_QUIT,
+	D_WALK_NORETRY,
+	D_WALK_SKIP,
+};
+
+extern void d_walk(struct dentry *parent, void *data,
+		   enum d_walk_ret (*enter)(void *, struct dentry *),
+		   void (*finish)(void *));
+
+struct ac_dpages_arg {
+	int err;
+	struct au_dcsub_pages *dpages;
+	struct super_block *sb;
+	au_dpages_test test;
+	void *arg;
+};
+
+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
+{
+	enum d_walk_ret ret;
+	struct ac_dpages_arg *arg = _arg;
+
+	ret = D_WALK_CONTINUE;
+	if (dentry->d_sb == arg->sb
+	    && !IS_ROOT(dentry)
+	    && au_dcount(dentry) > 0
+	    && au_di(dentry)
+	    && (!arg->test || arg->test(dentry, arg->arg))) {
+		arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
+		if (unlikely(arg->err))
+			ret = D_WALK_QUIT;
+	}
+
+	return ret;
+}
+
+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
+		   au_dpages_test test, void *arg)
+{
+	struct ac_dpages_arg args = {
+		.err	= 0,
+		.dpages	= dpages,
+		.sb	= root->d_sb,
+		.test	= test,
+		.arg	= arg
+	};
+
+	d_walk(root, &args, au_call_dpages_append, NULL);
+
+	return args.err;
+}
+
+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
+		       int do_include, au_dpages_test test, void *arg)
+{
+	int err;
+
+	err = 0;
+	write_seqlock(&rename_lock);
+	spin_lock(&dentry->d_lock);
+	if (do_include
+	    && au_dcount(dentry) > 0
+	    && (!test || test(dentry, arg)))
+		err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
+	spin_unlock(&dentry->d_lock);
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * RCU for vfsmount is unnecessary since this is a traverse in a single
+	 * mount
+	 */
+	while (!IS_ROOT(dentry)) {
+		dentry = dentry->d_parent; /* rename_lock is locked */
+		spin_lock(&dentry->d_lock);
+		if (au_dcount(dentry) > 0
+		    && (!test || test(dentry, arg)))
+			err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
+		spin_unlock(&dentry->d_lock);
+		if (unlikely(err))
+			break;
+	}
+
+out:
+	write_sequnlock(&rename_lock);
+	return err;
+}
+
+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
+{
+	return au_di(dentry) && dentry->d_sb == arg;
+}
+
+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, int do_include)
+{
+	return au_dcsub_pages_rev(dpages, dentry, do_include,
+				  au_dcsub_dpages_aufs, dentry->d_sb);
+}
+
+int au_test_subdir(struct dentry *d1, struct dentry *d2)
+{
+	struct path path[2] = {
+		{
+			.dentry = d1
+		},
+		{
+			.dentry = d2
+		}
+	};
+
+	return path_is_under(path + 0, path + 1);
+}
diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h
new file mode 100644
index 000000000..50e81bf68
--- /dev/null
+++ b/fs/aufs/dcsub.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for dentry cache
+ */
+
+#ifndef __AUFS_DCSUB_H__
+#define __AUFS_DCSUB_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+
+struct au_dpage {
+	int ndentry;
+	struct dentry **dentries;
+};
+
+struct au_dcsub_pages {
+	int ndpage;
+	struct au_dpage *dpages;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dcsub.c */
+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
+void au_dpages_free(struct au_dcsub_pages *dpages);
+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
+		   au_dpages_test test, void *arg);
+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
+		       int do_include, au_dpages_test test, void *arg);
+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, int do_include);
+int au_test_subdir(struct dentry *d1, struct dentry *d2);
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * todo: in linux-3.13, several similar (but faster) helpers are added to
+ * include/linux/dcache.h. Try them (in the future).
+ */
+
+static inline int au_d_hashed_positive(struct dentry *d)
+{
+	int err;
+	struct inode *inode = d_inode(d);
+
+	err = 0;
+	if (unlikely(d_unhashed(d)
+		     || d_is_negative(d)
+		     || !inode->i_nlink))
+		err = -ENOENT;
+	return err;
+}
+
+static inline int au_d_linkable(struct dentry *d)
+{
+	int err;
+	struct inode *inode = d_inode(d);
+
+	err = au_d_hashed_positive(d);
+	if (err
+	    && d_is_positive(d)
+	    && (inode->i_state & I_LINKABLE))
+		err = 0;
+	return err;
+}
+
+static inline int au_d_alive(struct dentry *d)
+{
+	int err;
+	struct inode *inode;
+
+	err = 0;
+	if (!IS_ROOT(d))
+		err = au_d_hashed_positive(d);
+	else {
+		inode = d_inode(d);
+		if (unlikely(d_unlinked(d)
+			     || d_is_negative(d)
+			     || !inode->i_nlink))
+			err = -ENOENT;
+	}
+	return err;
+}
+
+static inline int au_alive_dir(struct dentry *d)
+{
+	int err;
+
+	err = au_d_alive(d);
+	if (unlikely(err || IS_DEADDIR(d_inode(d))))
+		err = -ENOENT;
+	return err;
+}
+
+static inline int au_qstreq(struct qstr *a, struct qstr *b)
+{
+	return a->len == b->len
+		&& !memcmp(a->name, b->name, a->len);
+}
+
+/*
+ * by the commit
+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
+ *			taking d_lock
+ * the type of d_lockref.count became int, but the inlined function d_count()
+ * still returns unsigned int.
+ * I don't know why. Maybe it is for every d_count() users?
+ * Anyway au_dcount() lives on.
+ */
+static inline int au_dcount(struct dentry *d)
+{
+	return (int)d_count(d);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DCSUB_H__ */
diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c
new file mode 100644
index 000000000..9fe5cb0c4
--- /dev/null
+++ b/fs/aufs/debug.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debug print functions
+ */
+
+#include "aufs.h"
+
+/* Returns 0, or -errno.  arg is in kp->arg. */
+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
+{
+	int err, n;
+
+	err = kstrtoint(val, 0, &n);
+	if (!err) {
+		if (n > 0)
+			au_debug_on();
+		else
+			au_debug_off();
+	}
+	return err;
+}
+
+/* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
+{
+	atomic_t *a;
+
+	a = kp->arg;
+	return sprintf(buffer, "%d", atomic_read(a));
+}
+
+static struct kernel_param_ops param_ops_atomic_t = {
+	.set = param_atomic_t_set,
+	.get = param_atomic_t_get
+	/* void (*free)(void *arg) */
+};
+
+atomic_t aufs_debug = ATOMIC_INIT(0);
+MODULE_PARM_DESC(debug, "debug print");
+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
+
+DEFINE_MUTEX(au_dbg_mtx);	/* just to serialize the dbg msgs */
+char *au_plevel = KERN_DEBUG;
+#define dpri(fmt, ...) do {					\
+	if ((au_plevel						\
+	     && strcmp(au_plevel, KERN_DEBUG))			\
+	    || au_debug_test())					\
+		printk("%s" fmt, au_plevel, ##__VA_ARGS__);	\
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+
+void au_dpri_whlist(struct au_nhash *whlist)
+{
+	unsigned long ul, n;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (ul = 0; ul < n; ul++) {
+		hlist_for_each_entry(pos, head, wh_hash)
+			dpri("b%d, %.*s, %d\n",
+			     pos->wh_bindex,
+			     pos->wh_str.len, pos->wh_str.name,
+			     pos->wh_str.len);
+		head++;
+	}
+}
+
+void au_dpri_vdir(struct au_vdir *vdir)
+{
+	unsigned long ul;
+	union au_vdir_deblk_p p;
+	unsigned char *o;
+
+	if (!vdir || IS_ERR(vdir)) {
+		dpri("err %ld\n", PTR_ERR(vdir));
+		return;
+	}
+
+	dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %llu\n",
+	     vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
+	     vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
+	for (ul = 0; ul < vdir->vd_nblk; ul++) {
+		p.deblk = vdir->vd_deblk[ul];
+		o = p.deblk;
+		dpri("[%lu]: %p\n", ul, o);
+	}
+}
+
+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
+			struct dentry *wh)
+{
+	char *n = NULL;
+	int l = 0;
+
+	if (!inode || IS_ERR(inode)) {
+		dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
+		return -1;
+	}
+
+	/* the type of i_blocks depends upon CONFIG_LBDAF */
+	BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
+		     && sizeof(inode->i_blocks) != sizeof(u64));
+	if (wh) {
+		n = (void *)wh->d_name.name;
+		l = wh->d_name.len;
+	}
+
+	dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
+	     " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
+	     bindex, inode,
+	     inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
+	     atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
+	     i_size_read(inode), (unsigned long long)inode->i_blocks,
+	     hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
+	     inode->i_mapping ? inode->i_mapping->nrpages : 0,
+	     inode->i_state, inode->i_flags, inode_peek_iversion(inode),
+	     inode->i_generation,
+	     l ? ", wh " : "", l, n);
+	return 0;
+}
+
+void au_dpri_inode(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+	aufs_bindex_t bindex;
+	int err, hn;
+
+	err = do_pri_inode(-1, inode, -1, NULL);
+	if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
+		return;
+
+	iinfo = au_ii(inode);
+	dpri("i-1: btop %d, bbot %d, gen %d\n",
+	     iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
+	if (iinfo->ii_btop < 0)
+		return;
+	hn = 0;
+	for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
+		hi = au_hinode(iinfo, bindex);
+		hn = !!au_hn(hi);
+		do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
+	}
+}
+
+void au_dpri_dalias(struct inode *inode)
+{
+	struct dentry *d;
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
+		au_dpri_dentry(d);
+	spin_unlock(&inode->i_lock);
+}
+
+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
+{
+	struct dentry *wh = NULL;
+	int hn;
+	struct inode *inode;
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+
+	if (!dentry || IS_ERR(dentry)) {
+		dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
+		return -1;
+	}
+	/* do not call dget_parent() here */
+	/* note: access d_xxx without d_lock */
+	dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
+	     bindex, dentry, dentry,
+	     dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
+	     au_dcount(dentry), dentry->d_flags,
+	     d_unhashed(dentry) ? "un" : "");
+	hn = -1;
+	inode = NULL;
+	if (d_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (inode
+	    && au_test_aufs(dentry->d_sb)
+	    && bindex >= 0
+	    && !au_is_bad_inode(inode)) {
+		iinfo = au_ii(inode);
+		hi = au_hinode(iinfo, bindex);
+		hn = !!au_hn(hi);
+		wh = hi->hi_whdentry;
+	}
+	do_pri_inode(bindex, inode, hn, wh);
+	return 0;
+}
+
+void au_dpri_dentry(struct dentry *dentry)
+{
+	struct au_dinfo *dinfo;
+	aufs_bindex_t bindex;
+	int err;
+
+	err = do_pri_dentry(-1, dentry);
+	if (err || !au_test_aufs(dentry->d_sb))
+		return;
+
+	dinfo = au_di(dentry);
+	if (!dinfo)
+		return;
+	dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
+	     dinfo->di_btop, dinfo->di_bbot,
+	     dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
+	     dinfo->di_tmpfile);
+	if (dinfo->di_btop < 0)
+		return;
+	for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
+		do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
+}
+
+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
+{
+	char a[32];
+
+	if (!file || IS_ERR(file)) {
+		dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
+		return -1;
+	}
+	a[0] = 0;
+	if (bindex < 0
+	    && !IS_ERR_OR_NULL(file->f_path.dentry)
+	    && au_test_aufs(file->f_path.dentry->d_sb)
+	    && au_fi(file))
+		snprintf(a, sizeof(a), ", gen %d, mmapped %d",
+			 au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
+	dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
+	     bindex, file->f_mode, file->f_flags, (long)file_count(file),
+	     file->f_version, file->f_pos, a);
+	if (!IS_ERR_OR_NULL(file->f_path.dentry))
+		do_pri_dentry(bindex, file->f_path.dentry);
+	return 0;
+}
+
+void au_dpri_file(struct file *file)
+{
+	struct au_finfo *finfo;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+	aufs_bindex_t bindex;
+	int err;
+
+	err = do_pri_file(-1, file);
+	if (err
+	    || IS_ERR_OR_NULL(file->f_path.dentry)
+	    || !au_test_aufs(file->f_path.dentry->d_sb))
+		return;
+
+	finfo = au_fi(file);
+	if (!finfo)
+		return;
+	if (finfo->fi_btop < 0)
+		return;
+	fidir = finfo->fi_hdir;
+	if (!fidir)
+		do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
+	else
+		for (bindex = finfo->fi_btop;
+		     bindex >= 0 && bindex <= fidir->fd_bbot;
+		     bindex++) {
+			hfile = fidir->fd_hfile + bindex;
+			do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
+		}
+}
+
+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
+{
+	struct vfsmount *mnt;
+	struct super_block *sb;
+
+	if (!br || IS_ERR(br))
+		goto out;
+	mnt = au_br_mnt(br);
+	if (!mnt || IS_ERR(mnt))
+		goto out;
+	sb = mnt->mnt_sb;
+	if (!sb || IS_ERR(sb))
+		goto out;
+
+	dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
+	     "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
+	     "xino %d\n",
+	     bindex, br->br_perm, br->br_id, au_br_count(br),
+	     br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
+	     sb->s_flags, sb->s_count,
+	     atomic_read(&sb->s_active), !!br->br_xino.xi_file);
+	return 0;
+
+out:
+	dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
+	return -1;
+}
+
+void au_dpri_sb(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	aufs_bindex_t bindex;
+	int err;
+	/* to reuduce stack size */
+	struct {
+		struct vfsmount mnt;
+		struct au_branch fake;
+	} *a;
+
+	/* this function can be called from magic sysrq */
+	a = kzalloc(sizeof(*a), GFP_ATOMIC);
+	if (unlikely(!a)) {
+		dpri("no memory\n");
+		return;
+	}
+
+	a->mnt.mnt_sb = sb;
+	a->fake.br_path.mnt = &a->mnt;
+	au_br_count_init(&a->fake);
+	err = do_pri_br(-1, &a->fake);
+	au_br_count_fin(&a->fake);
+	kfree(a);
+	dpri("dev 0x%x\n", sb->s_dev);
+	if (err || !au_test_aufs(sb))
+		return;
+
+	sbinfo = au_sbi(sb);
+	if (!sbinfo)
+		return;
+	dpri("nw %d, gen %u, kobj %d\n",
+	     atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
+	     kref_read(&sbinfo->si_kobj.kref));
+	for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
+		do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
+{
+	struct inode *h_inode, *inode = d_inode(dentry);
+	struct dentry *h_dentry;
+	aufs_bindex_t bindex, bbot, bi;
+
+	if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
+		return;
+
+	bbot = au_dbbot(dentry);
+	bi = au_ibbot(inode);
+	if (bi < bbot)
+		bbot = bi;
+	bindex = au_dbtop(dentry);
+	bi = au_ibtop(inode);
+	if (bi > bindex)
+		bindex = bi;
+
+	for (; bindex <= bbot; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		h_inode = au_h_iptr(inode, bindex);
+		if (unlikely(h_inode != d_inode(h_dentry))) {
+			au_debug_on();
+			AuDbg("b%d, %s:%d\n", bindex, func, line);
+			AuDbgDentry(dentry);
+			AuDbgInode(inode);
+			au_debug_off();
+			BUG();
+		}
+	}
+}
+
+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
+{
+	int err, i, j;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	AuDebugOn(err);
+	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
+	AuDebugOn(err);
+	for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		for (j = dpage->ndentry - 1; !err && j >= 0; j--)
+			AuDebugOn(au_digen_test(dentries[j], sigen));
+	}
+	au_dpages_free(&dpages);
+}
+
+void au_dbg_verify_kthread(void)
+{
+	if (au_wkq_test()) {
+		au_dbg_blocked();
+		/*
+		 * It may be recursive, but udba=notify between two aufs mounts,
+		 * where a single ro branch is shared, is not a problem.
+		 */
+		/* WARN_ON(1); */
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+int __init au_debug_init(void)
+{
+	aufs_bindex_t bindex;
+	struct au_vdir_destr destr;
+
+	bindex = -1;
+	AuDebugOn(bindex >= 0);
+
+	destr.len = -1;
+	AuDebugOn(destr.len < NAME_MAX);
+
+#ifdef CONFIG_4KSTACKS
+	pr_warn("CONFIG_4KSTACKS is defined.\n");
+#endif
+
+	return 0;
+}
diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h
new file mode 100644
index 000000000..c1325b139
--- /dev/null
+++ b/fs/aufs/debug.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debug print functions
+ */
+
+#ifndef __AUFS_DEBUG_H__
+#define __AUFS_DEBUG_H__
+
+#ifdef __KERNEL__
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sysrq.h>
+
+#ifdef CONFIG_AUFS_DEBUG
+#define AuDebugOn(a)		BUG_ON(a)
+
+/* module parameter */
+extern atomic_t aufs_debug;
+static inline void au_debug_on(void)
+{
+	atomic_inc(&aufs_debug);
+}
+static inline void au_debug_off(void)
+{
+	atomic_dec_if_positive(&aufs_debug);
+}
+
+static inline int au_debug_test(void)
+{
+	return atomic_read(&aufs_debug) > 0;
+}
+#else
+#define AuDebugOn(a)		do {} while (0)
+AuStubVoid(au_debug_on, void)
+AuStubVoid(au_debug_off, void)
+AuStubInt0(au_debug_test, void)
+#endif /* CONFIG_AUFS_DEBUG */
+
+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
+
+/* ---------------------------------------------------------------------- */
+
+/* debug print */
+
+#define AuDbg(fmt, ...) do { \
+	if (au_debug_test()) \
+		pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
+} while (0)
+#define AuLabel(l)		AuDbg(#l "\n")
+#define AuIOErr(fmt, ...)	pr_err("I/O Error, " fmt, ##__VA_ARGS__)
+#define AuWarn1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		pr_warn(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuErr1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		pr_err(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuIOErr1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		AuIOErr(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuUnsupportMsg	"This operation is not supported." \
+			" Please report this application to aufs-users ML."
+#define AuUnsupport(fmt, ...) do { \
+	pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
+	dump_stack(); \
+} while (0)
+
+#define AuTraceErr(e) do { \
+	if (unlikely((e) < 0)) \
+		AuDbg("err %d\n", (int)(e)); \
+} while (0)
+
+#define AuTraceErrPtr(p) do { \
+	if (IS_ERR(p)) \
+		AuDbg("err %ld\n", PTR_ERR(p)); \
+} while (0)
+
+/* dirty macros for debug print, use with "%.*s" and caution */
+#define AuLNPair(qstr)		(qstr)->len, (qstr)->name
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry;
+#ifdef CONFIG_AUFS_DEBUG
+extern struct mutex au_dbg_mtx;
+extern char *au_plevel;
+struct au_nhash;
+void au_dpri_whlist(struct au_nhash *whlist);
+struct au_vdir;
+void au_dpri_vdir(struct au_vdir *vdir);
+struct inode;
+void au_dpri_inode(struct inode *inode);
+void au_dpri_dalias(struct inode *inode);
+void au_dpri_dentry(struct dentry *dentry);
+struct file;
+void au_dpri_file(struct file *filp);
+struct super_block;
+void au_dpri_sb(struct super_block *sb);
+
+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
+void au_dbg_verify_kthread(void);
+
+int __init au_debug_init(void);
+
+#define AuDbgWhlist(w) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#w "\n"); \
+	au_dpri_whlist(w); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgVdir(v) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#v "\n"); \
+	au_dpri_vdir(v); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgInode(i) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#i "\n"); \
+	au_dpri_inode(i); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgDAlias(i) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#i "\n"); \
+	au_dpri_dalias(i); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgDentry(d) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#d "\n"); \
+	au_dpri_dentry(d); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgFile(f) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#f "\n"); \
+	au_dpri_file(f); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgSb(sb) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#sb "\n"); \
+	au_dpri_sb(sb); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgSym(addr) do {				\
+	char sym[KSYM_SYMBOL_LEN];			\
+	sprint_symbol(sym, (unsigned long)addr);	\
+	AuDbg("%s\n", sym);				\
+} while (0)
+#else
+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
+AuStubVoid(au_dbg_verify_kthread, void)
+AuStubInt0(__init au_debug_init, void)
+
+#define AuDbgWhlist(w)		do {} while (0)
+#define AuDbgVdir(v)		do {} while (0)
+#define AuDbgInode(i)		do {} while (0)
+#define AuDbgDAlias(i)		do {} while (0)
+#define AuDbgDentry(d)		do {} while (0)
+#define AuDbgFile(f)		do {} while (0)
+#define AuDbgSb(sb)		do {} while (0)
+#define AuDbgSym(addr)		do {} while (0)
+#endif /* CONFIG_AUFS_DEBUG */
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
+int __init au_sysrq_init(void);
+void au_sysrq_fin(void);
+
+#ifdef CONFIG_HW_CONSOLE
+#define au_dbg_blocked() do { \
+	WARN_ON(1); \
+	handle_sysrq('w'); \
+} while (0)
+#else
+AuStubVoid(au_dbg_blocked, void)
+#endif
+
+#else
+AuStubInt0(__init au_sysrq_init, void)
+AuStubVoid(au_sysrq_fin, void)
+AuStubVoid(au_dbg_blocked, void)
+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DEBUG_H__ */
diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c
new file mode 100644
index 000000000..548f4bab3
--- /dev/null
+++ b/fs/aufs/dentry.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * lookup and dentry operations
+ */
+
+#include <linux/namei.h>
+#include "aufs.h"
+
+/*
+ * returns positive/negative dentry, NULL or an error.
+ * NULL means whiteout-ed or not-found.
+ */
+static struct dentry*
+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
+	     aufs_bindex_t bindex, struct au_do_lookup_args *args)
+{
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_branch *br;
+	int wh_found, opq;
+	unsigned char wh_able;
+	const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
+	const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
+							  IGNORE_PERM);
+
+	wh_found = 0;
+	br = au_sbr(dentry->d_sb, bindex);
+	wh_able = !!au_br_whable(br->br_perm);
+	if (wh_able)
+		wh_found = au_wh_test(h_parent, &args->whname, ignore_perm);
+	h_dentry = ERR_PTR(wh_found);
+	if (!wh_found)
+		goto real_lookup;
+	if (unlikely(wh_found < 0))
+		goto out;
+
+	/* We found a whiteout */
+	/* au_set_dbbot(dentry, bindex); */
+	au_set_dbwh(dentry, bindex);
+	if (!allow_neg)
+		return NULL; /* success */
+
+real_lookup:
+	if (!ignore_perm)
+		h_dentry = vfsub_lkup_one(args->name, h_parent);
+	else
+		h_dentry = au_sio_lkup_one(args->name, h_parent);
+	if (IS_ERR(h_dentry)) {
+		if (PTR_ERR(h_dentry) == -ENAMETOOLONG
+		    && !allow_neg)
+			h_dentry = NULL;
+		goto out;
+	}
+
+	h_inode = d_inode(h_dentry);
+	if (d_is_negative(h_dentry)) {
+		if (!allow_neg)
+			goto out_neg;
+	} else if (wh_found
+		   || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
+		goto out_neg;
+	else if (au_ftest_lkup(args->flags, DIRREN)
+		 /* && h_inode */
+		 && !au_dr_lkup_h_ino(args, bindex, h_inode->i_ino)) {
+		AuDbg("b%d %pd ignored hi%llu\n", bindex, h_dentry,
+		      (unsigned long long)h_inode->i_ino);
+		goto out_neg;
+	}
+
+	if (au_dbbot(dentry) <= bindex)
+		au_set_dbbot(dentry, bindex);
+	if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
+		au_set_dbtop(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, h_dentry);
+
+	if (!d_is_dir(h_dentry)
+	    || !wh_able
+	    || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
+		goto out; /* success */
+
+	inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
+	opq = au_diropq_test(h_dentry);
+	inode_unlock_shared(h_inode);
+	if (opq > 0)
+		au_set_dbdiropq(dentry, bindex);
+	else if (unlikely(opq < 0)) {
+		au_set_h_dptr(dentry, bindex, NULL);
+		h_dentry = ERR_PTR(opq);
+	}
+	goto out;
+
+out_neg:
+	dput(h_dentry);
+	h_dentry = NULL;
+out:
+	return h_dentry;
+}
+
+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
+{
+	if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
+		     && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
+		return -EPERM;
+	return 0;
+}
+
+/*
+ * returns the number of lower positive dentries,
+ * otherwise an error.
+ * can be called at unlinking with @type is zero.
+ */
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
+		   unsigned int flags)
+{
+	int npositive, err;
+	aufs_bindex_t bindex, btail, bdiropq;
+	unsigned char isdir, dirperm1, dirren;
+	struct au_do_lookup_args args = {
+		.flags		= flags,
+		.name		= &dentry->d_name
+	};
+	struct dentry *parent;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	err = au_test_shwh(sb, args.name);
+	if (unlikely(err))
+		goto out;
+
+	err = au_wh_name_alloc(&args.whname, args.name);
+	if (unlikely(err))
+		goto out;
+
+	isdir = !!d_is_dir(dentry);
+	dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
+	dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
+	if (dirren)
+		au_fset_lkup(args.flags, DIRREN);
+
+	npositive = 0;
+	parent = dget_parent(dentry);
+	btail = au_dbtaildir(parent);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		struct dentry *h_parent, *h_dentry;
+		struct inode *h_inode, *h_dir;
+		struct au_branch *br;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry) {
+			if (d_is_positive(h_dentry))
+				npositive++;
+			break;
+		}
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || !d_is_dir(h_parent))
+			continue;
+
+		if (dirren) {
+			/* if the inum matches, then use the prepared name */
+			err = au_dr_lkup_name(&args, bindex);
+			if (unlikely(err))
+				goto out_parent;
+		}
+
+		h_dir = d_inode(h_parent);
+		inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+		h_dentry = au_do_lookup(h_parent, dentry, bindex, &args);
+		inode_unlock_shared(h_dir);
+		err = PTR_ERR(h_dentry);
+		if (IS_ERR(h_dentry))
+			goto out_parent;
+		if (h_dentry)
+			au_fclr_lkup(args.flags, ALLOW_NEG);
+		if (dirperm1)
+			au_fset_lkup(args.flags, IGNORE_PERM);
+
+		if (au_dbwh(dentry) == bindex)
+			break;
+		if (!h_dentry)
+			continue;
+		if (d_is_negative(h_dentry))
+			continue;
+		h_inode = d_inode(h_dentry);
+		npositive++;
+		if (!args.type)
+			args.type = h_inode->i_mode & S_IFMT;
+		if (args.type != S_IFDIR)
+			break;
+		else if (isdir) {
+			/* the type of lower may be different */
+			bdiropq = au_dbdiropq(dentry);
+			if (bdiropq >= 0 && bdiropq <= bindex)
+				break;
+		}
+		br = au_sbr(sb, bindex);
+		if (dirren
+		    && au_dr_hino_test_add(&br->br_dirren, h_inode->i_ino,
+					   /*add_ent*/NULL)) {
+			/* prepare next name to lookup */
+			err = au_dr_lkup(&args, dentry, bindex);
+			if (unlikely(err))
+				goto out_parent;
+		}
+	}
+
+	if (npositive) {
+		AuLabel(positive);
+		au_update_dbtop(dentry);
+	}
+	err = npositive;
+	if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
+		     && au_dbtop(dentry) < 0)) {
+		err = -EIO;
+		AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
+			dentry, err);
+	}
+
+out_parent:
+	dput(parent);
+	kfree(args.whname.name);
+	if (dirren)
+		au_dr_lkup_fin(&args);
+out:
+	return err;
+}
+
+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
+{
+	struct dentry *dentry;
+	int wkq_err;
+
+	if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
+		dentry = vfsub_lkup_one(name, parent);
+	else {
+		struct vfsub_lkup_one_args args = {
+			.errp	= &dentry,
+			.name	= name,
+			.parent	= parent
+		};
+
+		wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
+		if (unlikely(wkq_err))
+			dentry = ERR_PTR(wkq_err);
+	}
+
+	return dentry;
+}
+
+/*
+ * lookup @dentry on @bindex which should be negative.
+ */
+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
+{
+	int err;
+	struct dentry *parent, *h_parent, *h_dentry;
+	struct au_branch *br;
+
+	parent = dget_parent(dentry);
+	h_parent = au_h_dptr(parent, bindex);
+	br = au_sbr(dentry->d_sb, bindex);
+	if (wh)
+		h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
+	else
+		h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
+	err = PTR_ERR(h_dentry);
+	if (IS_ERR(h_dentry))
+		goto out;
+	if (unlikely(d_is_positive(h_dentry))) {
+		err = -EIO;
+		AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
+		dput(h_dentry);
+		goto out;
+	}
+
+	err = 0;
+	if (bindex < au_dbtop(dentry))
+		au_set_dbtop(dentry, bindex);
+	if (au_dbbot(dentry) < bindex)
+		au_set_dbbot(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, h_dentry);
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* subset of struct inode */
+struct au_iattr {
+	unsigned long		i_ino;
+	/* unsigned int		i_nlink; */
+	kuid_t			i_uid;
+	kgid_t			i_gid;
+	u64			i_version;
+/*
+	loff_t			i_size;
+	blkcnt_t		i_blocks;
+*/
+	umode_t			i_mode;
+};
+
+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
+{
+	ia->i_ino = h_inode->i_ino;
+	/* ia->i_nlink = h_inode->i_nlink; */
+	ia->i_uid = h_inode->i_uid;
+	ia->i_gid = h_inode->i_gid;
+	ia->i_version = inode_query_iversion(h_inode);
+/*
+	ia->i_size = h_inode->i_size;
+	ia->i_blocks = h_inode->i_blocks;
+*/
+	ia->i_mode = (h_inode->i_mode & S_IFMT);
+}
+
+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
+{
+	return ia->i_ino != h_inode->i_ino
+		/* || ia->i_nlink != h_inode->i_nlink */
+		|| !uid_eq(ia->i_uid, h_inode->i_uid)
+		|| !gid_eq(ia->i_gid, h_inode->i_gid)
+		|| !inode_eq_iversion(h_inode, ia->i_version)
+/*
+		|| ia->i_size != h_inode->i_size
+		|| ia->i_blocks != h_inode->i_blocks
+*/
+		|| ia->i_mode != (h_inode->i_mode & S_IFMT);
+}
+
+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
+			      struct au_branch *br)
+{
+	int err;
+	struct au_iattr ia;
+	struct inode *h_inode;
+	struct dentry *h_d;
+	struct super_block *h_sb;
+
+	err = 0;
+	memset(&ia, -1, sizeof(ia));
+	h_sb = h_dentry->d_sb;
+	h_inode = NULL;
+	if (d_is_positive(h_dentry)) {
+		h_inode = d_inode(h_dentry);
+		au_iattr_save(&ia, h_inode);
+	} else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
+		/* nfs d_revalidate may return 0 for negative dentry */
+		/* fuse d_revalidate always return 0 for negative dentry */
+		goto out;
+
+	/* main purpose is namei.c:cached_lookup() and d_revalidate */
+	h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
+	err = PTR_ERR(h_d);
+	if (IS_ERR(h_d))
+		goto out;
+
+	err = 0;
+	if (unlikely(h_d != h_dentry
+		     || d_inode(h_d) != h_inode
+		     || (h_inode && au_iattr_test(&ia, h_inode))))
+		err = au_busy_or_stale();
+	dput(h_d);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
+		struct dentry *h_parent, struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	if (udba == AuOpt_UDBA_REVAL
+	    && !au_test_fs_remote(h_dentry->d_sb)) {
+		IMustLock(h_dir);
+		err = (d_inode(h_dentry->d_parent) != h_dir);
+	} else if (udba != AuOpt_UDBA_NONE)
+		err = au_h_verify_dentry(h_dentry, h_parent, br);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
+{
+	int err;
+	aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
+	struct au_hdentry tmp, *p, *q;
+	struct au_dinfo *dinfo;
+	struct super_block *sb;
+
+	DiMustWriteLock(dentry);
+
+	sb = dentry->d_sb;
+	dinfo = au_di(dentry);
+	bbot = dinfo->di_bbot;
+	bwh = dinfo->di_bwh;
+	bdiropq = dinfo->di_bdiropq;
+	bindex = dinfo->di_btop;
+	p = au_hdentry(dinfo, bindex);
+	for (; bindex <= bbot; bindex++, p++) {
+		if (!p->hd_dentry)
+			continue;
+
+		new_bindex = au_br_index(sb, p->hd_id);
+		if (new_bindex == bindex)
+			continue;
+
+		if (dinfo->di_bwh == bindex)
+			bwh = new_bindex;
+		if (dinfo->di_bdiropq == bindex)
+			bdiropq = new_bindex;
+		if (new_bindex < 0) {
+			au_hdput(p);
+			p->hd_dentry = NULL;
+			continue;
+		}
+
+		/* swap two lower dentries, and loop again */
+		q = au_hdentry(dinfo, new_bindex);
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hd_dentry) {
+			bindex--;
+			p--;
+		}
+	}
+
+	dinfo->di_bwh = -1;
+	if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
+		dinfo->di_bwh = bwh;
+
+	dinfo->di_bdiropq = -1;
+	if (bdiropq >= 0
+	    && bdiropq <= au_sbbot(sb)
+	    && au_sbr_whable(sb, bdiropq))
+		dinfo->di_bdiropq = bdiropq;
+
+	err = -EIO;
+	dinfo->di_btop = -1;
+	dinfo->di_bbot = -1;
+	bbot = au_dbbot(parent);
+	bindex = 0;
+	p = au_hdentry(dinfo, bindex);
+	for (; bindex <= bbot; bindex++, p++)
+		if (p->hd_dentry) {
+			dinfo->di_btop = bindex;
+			break;
+		}
+
+	if (dinfo->di_btop >= 0) {
+		bindex = bbot;
+		p = au_hdentry(dinfo, bindex);
+		for (; bindex >= 0; bindex--, p--)
+			if (p->hd_dentry) {
+				dinfo->di_bbot = bindex;
+				err = 0;
+				break;
+			}
+	}
+
+	return err;
+}
+
+static void au_do_hide(struct dentry *dentry)
+{
+	struct inode *inode;
+
+	if (d_really_is_positive(dentry)) {
+		inode = d_inode(dentry);
+		if (!d_is_dir(dentry)) {
+			if (inode->i_nlink && !d_unhashed(dentry))
+				drop_nlink(inode);
+		} else {
+			clear_nlink(inode);
+			/* stop next lookup */
+			inode->i_flags |= S_DEAD;
+		}
+		smp_mb(); /* necessary? */
+	}
+	d_drop(dentry);
+}
+
+static int au_hide_children(struct dentry *parent)
+{
+	int err, i, j, ndentry;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry *dentry;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, parent, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	/* in reverse order */
+	for (i = dpages.ndpage - 1; i >= 0; i--) {
+		dpage = dpages.dpages + i;
+		ndentry = dpage->ndentry;
+		for (j = ndentry - 1; j >= 0; j--) {
+			dentry = dpage->dentries[j];
+			if (dentry != parent)
+				au_do_hide(dentry);
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static void au_hide(struct dentry *dentry)
+{
+	int err;
+
+	AuDbgDentry(dentry);
+	if (d_is_dir(dentry)) {
+		/* shrink_dcache_parent(dentry); */
+		err = au_hide_children(dentry);
+		if (unlikely(err))
+			AuIOErr("%pd, failed hiding children, ignored %d\n",
+				dentry, err);
+	}
+	au_do_hide(dentry);
+}
+
+/*
+ * By adding a dirty branch, a cached dentry may be affected in various ways.
+ *
+ * a dirty branch is added
+ * - on the top of layers
+ * - in the middle of layers
+ * - to the bottom of layers
+ *
+ * on the added branch there exists
+ * - a whiteout
+ * - a diropq
+ * - a same named entry
+ *   + exist
+ *     * negative --> positive
+ *     * positive --> positive
+ *	 - type is unchanged
+ *	 - type is changed
+ *   + doesn't exist
+ *     * negative --> negative
+ *     * positive --> negative (rejected by au_br_del() for non-dir case)
+ * - none
+ */
+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
+			       struct au_dinfo *tmp)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct {
+		struct dentry *dentry;
+		struct inode *inode;
+		mode_t mode;
+	} orig_h, tmp_h = {
+		.dentry = NULL
+	};
+	struct au_hdentry *hd;
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry;
+
+	err = 0;
+	AuDebugOn(dinfo->di_btop < 0);
+	orig_h.mode = 0;
+	orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
+	orig_h.inode = NULL;
+	if (d_is_positive(orig_h.dentry)) {
+		orig_h.inode = d_inode(orig_h.dentry);
+		orig_h.mode = orig_h.inode->i_mode & S_IFMT;
+	}
+	if (tmp->di_btop >= 0) {
+		tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
+		if (d_is_positive(tmp_h.dentry)) {
+			tmp_h.inode = d_inode(tmp_h.dentry);
+			tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
+		}
+	}
+
+	inode = NULL;
+	if (d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (!orig_h.inode) {
+		AuDbg("nagative originally\n");
+		if (inode) {
+			au_hide(dentry);
+			goto out;
+		}
+		AuDebugOn(inode);
+		AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
+		AuDebugOn(dinfo->di_bdiropq != -1);
+
+		if (!tmp_h.inode) {
+			AuDbg("negative --> negative\n");
+			/* should have only one negative lower */
+			if (tmp->di_btop >= 0
+			    && tmp->di_btop < dinfo->di_btop) {
+				AuDebugOn(tmp->di_btop != tmp->di_bbot);
+				AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
+				au_set_h_dptr(dentry, dinfo->di_btop, NULL);
+				au_di_cp(dinfo, tmp);
+				hd = au_hdentry(tmp, tmp->di_btop);
+				au_set_h_dptr(dentry, tmp->di_btop,
+					      dget(hd->hd_dentry));
+			}
+			au_dbg_verify_dinode(dentry);
+		} else {
+			AuDbg("negative --> positive\n");
+			/*
+			 * similar to the behaviour of creating with bypassing
+			 * aufs.
+			 * unhash it in order to force an error in the
+			 * succeeding create operation.
+			 * we should not set S_DEAD here.
+			 */
+			d_drop(dentry);
+			/* au_di_swap(tmp, dinfo); */
+			au_dbg_verify_dinode(dentry);
+		}
+	} else {
+		AuDbg("positive originally\n");
+		/* inode may be NULL */
+		AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
+		if (!tmp_h.inode) {
+			AuDbg("positive --> negative\n");
+			/* or bypassing aufs */
+			au_hide(dentry);
+			if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
+				dinfo->di_bwh = tmp->di_bwh;
+			if (inode)
+				err = au_refresh_hinode_self(inode);
+			au_dbg_verify_dinode(dentry);
+		} else if (orig_h.mode == tmp_h.mode) {
+			AuDbg("positive --> positive, same type\n");
+			if (!S_ISDIR(orig_h.mode)
+			    && dinfo->di_btop > tmp->di_btop) {
+				/*
+				 * similar to the behaviour of removing and
+				 * creating.
+				 */
+				au_hide(dentry);
+				if (inode)
+					err = au_refresh_hinode_self(inode);
+				au_dbg_verify_dinode(dentry);
+			} else {
+				/* fill empty slots */
+				if (dinfo->di_btop > tmp->di_btop)
+					dinfo->di_btop = tmp->di_btop;
+				if (dinfo->di_bbot < tmp->di_bbot)
+					dinfo->di_bbot = tmp->di_bbot;
+				dinfo->di_bwh = tmp->di_bwh;
+				dinfo->di_bdiropq = tmp->di_bdiropq;
+				bbot = dinfo->di_bbot;
+				bindex = tmp->di_btop;
+				hd = au_hdentry(tmp, bindex);
+				for (; bindex <= bbot; bindex++, hd++) {
+					if (au_h_dptr(dentry, bindex))
+						continue;
+					h_dentry = hd->hd_dentry;
+					if (!h_dentry)
+						continue;
+					AuDebugOn(d_is_negative(h_dentry));
+					h_inode = d_inode(h_dentry);
+					AuDebugOn(orig_h.mode
+						  != (h_inode->i_mode
+						      & S_IFMT));
+					au_set_h_dptr(dentry, bindex,
+						      dget(h_dentry));
+				}
+				if (inode)
+					err = au_refresh_hinode(inode, dentry);
+				au_dbg_verify_dinode(dentry);
+			}
+		} else {
+			AuDbg("positive --> positive, different type\n");
+			/* similar to the behaviour of removing and creating */
+			au_hide(dentry);
+			if (inode)
+				err = au_refresh_hinode_self(inode);
+			au_dbg_verify_dinode(dentry);
+		}
+	}
+
+out:
+	return err;
+}
+
+void au_refresh_dop(struct dentry *dentry, int force_reval)
+{
+	const struct dentry_operations *dop
+		= force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
+	static const unsigned int mask
+		= DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
+
+	BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
+
+	if (dentry->d_op == dop)
+		return;
+
+	AuDbg("%pd\n", dentry);
+	spin_lock(&dentry->d_lock);
+	if (dop == &aufs_dop)
+		dentry->d_flags |= mask;
+	else
+		dentry->d_flags &= ~mask;
+	dentry->d_op = dop;
+	spin_unlock(&dentry->d_lock);
+}
+
+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
+{
+	int err, ebrange, nbr;
+	unsigned int sigen;
+	struct au_dinfo *dinfo, *tmp;
+	struct super_block *sb;
+	struct inode *inode;
+
+	DiMustWriteLock(dentry);
+	AuDebugOn(IS_ROOT(dentry));
+	AuDebugOn(d_really_is_negative(parent));
+
+	sb = dentry->d_sb;
+	sigen = au_sigen(sb);
+	err = au_digen_test(parent, sigen);
+	if (unlikely(err))
+		goto out;
+
+	nbr = au_sbbot(sb) + 1;
+	dinfo = au_di(dentry);
+	err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
+	if (unlikely(err))
+		goto out;
+	ebrange = au_dbrange_test(dentry);
+	if (!ebrange)
+		ebrange = au_do_refresh_hdentry(dentry, parent);
+
+	if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
+		AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
+		if (d_really_is_positive(dentry)) {
+			inode = d_inode(dentry);
+			err = au_refresh_hinode_self(inode);
+		}
+		au_dbg_verify_dinode(dentry);
+		if (!err)
+			goto out_dgen; /* success */
+		goto out;
+	}
+
+	/* temporary dinfo */
+	AuDbgDentry(dentry);
+	err = -ENOMEM;
+	tmp = au_di_alloc(sb, AuLsc_DI_TMP);
+	if (unlikely(!tmp))
+		goto out;
+	au_di_swap(tmp, dinfo);
+	/* returns the number of positive dentries */
+	/*
+	 * if current working dir is removed, it returns an error.
+	 * but the dentry is legal.
+	 */
+	err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
+	AuDbgDentry(dentry);
+	au_di_swap(tmp, dinfo);
+	if (err == -ENOENT)
+		err = 0;
+	if (err >= 0) {
+		/* compare/refresh by dinfo */
+		AuDbgDentry(dentry);
+		err = au_refresh_by_dinfo(dentry, dinfo, tmp);
+		au_dbg_verify_dinode(dentry);
+		AuTraceErr(err);
+	}
+	au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
+	au_rw_write_unlock(&tmp->di_rwsem);
+	au_di_free(tmp);
+	if (unlikely(err))
+		goto out;
+
+out_dgen:
+	au_update_digen(dentry);
+out:
+	if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
+		AuIOErr("failed refreshing %pd, %d\n", dentry, err);
+		AuDbgDentry(dentry);
+	}
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
+			   struct dentry *dentry, aufs_bindex_t bindex)
+{
+	int err, valid;
+
+	err = 0;
+	if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
+		goto out;
+
+	AuDbg("b%d\n", bindex);
+	/*
+	 * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
+	 * due to whiteout and branch permission.
+	 */
+	flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
+		   | LOOKUP_FOLLOW | LOOKUP_EXCL);
+	/* it may return tri-state */
+	valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
+
+	if (unlikely(valid < 0))
+		err = valid;
+	else if (!valid)
+		err = -EINVAL;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* todo: remove this */
+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
+			  unsigned int flags, int do_udba, int dirren)
+{
+	int err;
+	umode_t mode, h_mode;
+	aufs_bindex_t bindex, btail, btop, ibs, ibe;
+	unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
+	struct inode *h_inode, *h_cached_inode;
+	struct dentry *h_dentry;
+	struct qstr *name, *h_name;
+
+	err = 0;
+	plus = 0;
+	mode = 0;
+	ibs = -1;
+	ibe = -1;
+	unhashed = !!d_unhashed(dentry);
+	is_root = !!IS_ROOT(dentry);
+	name = &dentry->d_name;
+	tmpfile = au_di(dentry)->di_tmpfile;
+
+	/*
+	 * Theoretically, REVAL test should be unnecessary in case of
+	 * {FS,I}NOTIFY.
+	 * But {fs,i}notify doesn't fire some necessary events,
+	 *	IN_ATTRIB for atime/nlink/pageio
+	 * Let's do REVAL test too.
+	 */
+	if (do_udba && inode) {
+		mode = (inode->i_mode & S_IFMT);
+		plus = (inode->i_nlink > 0);
+		ibs = au_ibtop(inode);
+		ibe = au_ibbot(inode);
+	}
+
+	btop = au_dbtop(dentry);
+	btail = btop;
+	if (inode && S_ISDIR(inode->i_mode))
+		btail = au_dbtaildir(dentry);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+
+		AuDbg("b%d, %pd\n", bindex, h_dentry);
+		h_nfs = !!au_test_nfs(h_dentry->d_sb);
+		spin_lock(&h_dentry->d_lock);
+		h_name = &h_dentry->d_name;
+		if (unlikely(do_udba
+			     && !is_root
+			     && ((!h_nfs
+				  && (unhashed != !!d_unhashed(h_dentry)
+				      || (!tmpfile && !dirren
+					  && !au_qstreq(name, h_name))
+					  ))
+				 || (h_nfs
+				     && !(flags & LOOKUP_OPEN)
+				     && (h_dentry->d_flags
+					 & DCACHE_NFSFS_RENAMED)))
+			    )) {
+			int h_unhashed;
+
+			h_unhashed = d_unhashed(h_dentry);
+			spin_unlock(&h_dentry->d_lock);
+			AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
+			      unhashed, h_unhashed, dentry, h_dentry);
+			goto err;
+		}
+		spin_unlock(&h_dentry->d_lock);
+
+		err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
+		if (unlikely(err))
+			/* do not goto err, to keep the errno */
+			break;
+
+		/* todo: plink too? */
+		if (!do_udba)
+			continue;
+
+		/* UDBA tests */
+		if (unlikely(!!inode != d_is_positive(h_dentry)))
+			goto err;
+
+		h_inode = NULL;
+		if (d_is_positive(h_dentry))
+			h_inode = d_inode(h_dentry);
+		h_plus = plus;
+		h_mode = mode;
+		h_cached_inode = h_inode;
+		if (h_inode) {
+			h_mode = (h_inode->i_mode & S_IFMT);
+			h_plus = (h_inode->i_nlink > 0);
+		}
+		if (inode && ibs <= bindex && bindex <= ibe)
+			h_cached_inode = au_h_iptr(inode, bindex);
+
+		if (!h_nfs) {
+			if (unlikely(plus != h_plus && !tmpfile))
+				goto err;
+		} else {
+			if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+				     && !is_root
+				     && !IS_ROOT(h_dentry)
+				     && unhashed != d_unhashed(h_dentry)))
+				goto err;
+		}
+		if (unlikely(mode != h_mode
+			     || h_cached_inode != h_inode))
+			goto err;
+		continue;
+
+err:
+		err = -EINVAL;
+		break;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *parent;
+
+	if (!au_digen_test(dentry, sigen))
+		return 0;
+
+	parent = dget_parent(dentry);
+	di_read_lock_parent(parent, AuLock_IR);
+	AuDebugOn(au_digen_test(parent, sigen));
+	au_dbg_verify_gen(parent, sigen);
+	err = au_refresh_dentry(dentry, parent);
+	di_read_unlock(parent, AuLock_IR);
+	dput(parent);
+	AuTraceErr(err);
+	return err;
+}
+
+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *d, *parent;
+
+	if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
+		return simple_reval_dpath(dentry, sigen);
+
+	/* slow loop, keep it simple and stupid */
+	/* cf: au_cpup_dirs() */
+	err = 0;
+	parent = NULL;
+	while (au_digen_test(dentry, sigen)) {
+		d = dentry;
+		while (1) {
+			dput(parent);
+			parent = dget_parent(d);
+			if (!au_digen_test(parent, sigen))
+				break;
+			d = parent;
+		}
+
+		if (d != dentry)
+			di_write_lock_child2(d);
+
+		/* someone might update our dentry while we were sleeping */
+		if (au_digen_test(d, sigen)) {
+			/*
+			 * todo: consolidate with simple_reval_dpath(),
+			 * do_refresh() and au_reval_for_attr().
+			 */
+			di_read_lock_parent(parent, AuLock_IR);
+			err = au_refresh_dentry(d, parent);
+			di_read_unlock(parent, AuLock_IR);
+		}
+
+		if (d != dentry)
+			di_write_unlock(d);
+		dput(parent);
+		if (unlikely(err))
+			break;
+	}
+
+	return err;
+}
+
+/*
+ * if valid returns 1, otherwise 0.
+ */
+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int valid, err;
+	unsigned int sigen;
+	unsigned char do_udba, dirren;
+	struct super_block *sb;
+	struct inode *inode;
+
+	/* todo: support rcu-walk? */
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	valid = 0;
+	if (unlikely(!au_di(dentry)))
+		goto out;
+
+	valid = 1;
+	sb = dentry->d_sb;
+	/*
+	 * todo: very ugly
+	 * i_mutex of parent dir may be held,
+	 * but we should not return 'invalid' due to busy.
+	 */
+	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
+	if (unlikely(err)) {
+		valid = err;
+		AuTraceErr(err);
+		goto out;
+	}
+	inode = NULL;
+	if (d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (unlikely(inode && au_is_bad_inode(inode))) {
+		err = -EINVAL;
+		AuTraceErr(err);
+		goto out_dgrade;
+	}
+	if (unlikely(au_dbrange_test(dentry))) {
+		err = -EINVAL;
+		AuTraceErr(err);
+		goto out_dgrade;
+	}
+
+	sigen = au_sigen(sb);
+	if (au_digen_test(dentry, sigen)) {
+		AuDebugOn(IS_ROOT(dentry));
+		err = au_reval_dpath(dentry, sigen);
+		if (unlikely(err)) {
+			AuTraceErr(err);
+			goto out_dgrade;
+		}
+	}
+	di_downgrade_lock(dentry, AuLock_IR);
+
+	err = -EINVAL;
+	if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
+	    && inode
+	    && !(inode->i_state && I_LINKABLE)
+	    && (IS_DEADDIR(inode) || !inode->i_nlink)) {
+		AuTraceErr(err);
+		goto out_inval;
+	}
+
+	do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
+	if (do_udba && inode) {
+		aufs_bindex_t btop = au_ibtop(inode);
+		struct inode *h_inode;
+
+		if (btop >= 0) {
+			h_inode = au_h_iptr(inode, btop);
+			if (h_inode && au_test_higen(inode, h_inode)) {
+				AuTraceErr(err);
+				goto out_inval;
+			}
+		}
+	}
+
+	dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
+	err = h_d_revalidate(dentry, inode, flags, do_udba, dirren);
+	if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
+		err = -EIO;
+		AuDbg("both of real entry and whiteout found, %p, err %d\n",
+		      dentry, err);
+	}
+	goto out_inval;
+
+out_dgrade:
+	di_downgrade_lock(dentry, AuLock_IR);
+out_inval:
+	aufs_read_unlock(dentry, AuLock_IR);
+	AuTraceErr(err);
+	valid = !err;
+out:
+	if (!valid) {
+		AuDbg("%pd invalid, %d\n", dentry, valid);
+		d_drop(dentry);
+	}
+	return valid;
+}
+
+static void aufs_d_release(struct dentry *dentry)
+{
+	if (au_di(dentry)) {
+		au_di_fin(dentry);
+		au_hn_di_reinit(dentry);
+	}
+}
+
+const struct dentry_operations aufs_dop = {
+	.d_revalidate		= aufs_d_revalidate,
+	.d_weak_revalidate	= aufs_d_revalidate,
+	.d_release		= aufs_d_release
+};
+
+/* aufs_dop without d_revalidate */
+const struct dentry_operations aufs_dop_noreval = {
+	.d_release		= aufs_d_release
+};
diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h
new file mode 100644
index 000000000..8f20b51d5
--- /dev/null
+++ b/fs/aufs/dentry.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * lookup and dentry operations
+ */
+
+#ifndef __AUFS_DENTRY_H__
+#define __AUFS_DENTRY_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include "dirren.h"
+#include "rwsem.h"
+
+struct au_hdentry {
+	struct dentry		*hd_dentry;
+	aufs_bindex_t		hd_id;
+};
+
+struct au_dinfo {
+	atomic_t		di_generation;
+
+	struct au_rwsem		di_rwsem;
+	aufs_bindex_t		di_btop, di_bbot, di_bwh, di_bdiropq;
+	unsigned char		di_tmpfile; /* to allow the different name */
+	struct au_hdentry	*di_hdentry;
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* flags for au_lkup_dentry() */
+#define AuLkup_ALLOW_NEG	1
+#define AuLkup_IGNORE_PERM	(1 << 1)
+#define AuLkup_DIRREN		(1 << 2)
+#define au_ftest_lkup(flags, name)	((flags) & AuLkup_##name)
+#define au_fset_lkup(flags, name) \
+	do { (flags) |= AuLkup_##name; } while (0)
+#define au_fclr_lkup(flags, name) \
+	do { (flags) &= ~AuLkup_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuLkup_DIRREN
+#define AuLkup_DIRREN 0
+#endif
+
+struct au_do_lookup_args {
+	unsigned int		flags;
+	mode_t			type;
+	struct qstr		whname, *name;
+	struct au_dr_lookup	dirren;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dentry.c */
+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
+struct au_branch;
+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
+		struct dentry *h_parent, struct au_branch *br);
+
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
+		   unsigned int flags);
+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
+void au_refresh_dop(struct dentry *dentry, int force_reval);
+
+/* dinfo.c */
+void au_di_init_once(void *_di);
+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
+void au_di_free(struct au_dinfo *dinfo);
+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
+int au_di_init(struct dentry *dentry);
+void au_di_fin(struct dentry *dentry);
+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
+
+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
+void di_read_unlock(struct dentry *d, int flags);
+void di_downgrade_lock(struct dentry *d, int flags);
+void di_write_lock(struct dentry *d, unsigned int lsc);
+void di_write_unlock(struct dentry *d);
+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
+
+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
+aufs_bindex_t au_dbtail(struct dentry *dentry);
+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
+
+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
+		   struct dentry *h_dentry);
+int au_digen_test(struct dentry *dentry, unsigned int sigen);
+int au_dbrange_test(struct dentry *dentry);
+void au_update_digen(struct dentry *dentry);
+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
+void au_update_dbtop(struct dentry *dentry);
+void au_update_dbbot(struct dentry *dentry);
+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_dinfo *au_di(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for dinfo */
+enum {
+	AuLsc_DI_CHILD,		/* child first */
+	AuLsc_DI_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
+	AuLsc_DI_CHILD3,	/* copyup dirs */
+	AuLsc_DI_PARENT,
+	AuLsc_DI_PARENT2,
+	AuLsc_DI_PARENT3,
+	AuLsc_DI_TMP		/* temp for replacing dinfo */
+};
+
+/*
+ * di_read_lock_child, di_write_lock_child,
+ * di_read_lock_child2, di_write_lock_child2,
+ * di_read_lock_child3, di_write_lock_child3,
+ * di_read_lock_parent, di_write_lock_parent,
+ * di_read_lock_parent2, di_write_lock_parent2,
+ * di_read_lock_parent3, di_write_lock_parent3,
+ */
+#define AuReadLockFunc(name, lsc) \
+static inline void di_read_lock_##name(struct dentry *d, int flags) \
+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
+
+#define AuWriteLockFunc(name, lsc) \
+static inline void di_write_lock_##name(struct dentry *d) \
+{ di_write_lock(d, AuLsc_DI_##lsc); }
+
+#define AuRWLockFuncs(name, lsc) \
+	AuReadLockFunc(name, lsc) \
+	AuWriteLockFunc(name, lsc)
+
+AuRWLockFuncs(child, CHILD);
+AuRWLockFuncs(child2, CHILD2);
+AuRWLockFuncs(child3, CHILD3);
+AuRWLockFuncs(parent, PARENT);
+AuRWLockFuncs(parent2, PARENT2);
+AuRWLockFuncs(parent3, PARENT3);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define DiMustNoWaiters(d)	AuRwMustNoWaiters(&au_di(d)->di_rwsem)
+#define DiMustAnyLock(d)	AuRwMustAnyLock(&au_di(d)->di_rwsem)
+#define DiMustWriteLock(d)	AuRwMustWriteLock(&au_di(d)->di_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: memory barrier? */
+static inline unsigned int au_digen(struct dentry *d)
+{
+	return atomic_read(&au_di(d)->di_generation);
+}
+
+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
+{
+	hdentry->hd_dentry = NULL;
+}
+
+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
+					    aufs_bindex_t bindex)
+{
+	return di->di_hdentry + bindex;
+}
+
+static inline void au_hdput(struct au_hdentry *hd)
+{
+	if (hd)
+		dput(hd->hd_dentry);
+}
+
+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_btop;
+}
+
+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bbot;
+}
+
+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bwh;
+}
+
+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bdiropq;
+}
+
+/* todo: hard/soft set? */
+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_btop = bindex;
+}
+
+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_bbot = bindex;
+}
+
+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	/* dbwh can be outside of btop - bbot range */
+	au_di(dentry)->di_bwh = bindex;
+}
+
+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_bdiropq = bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_HNOTIFY
+static inline void au_digen_dec(struct dentry *d)
+{
+	atomic_dec(&au_di(d)->di_generation);
+}
+
+static inline void au_hn_di_reinit(struct dentry *dentry)
+{
+	dentry->d_fsdata = NULL;
+}
+#else
+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
+#endif /* CONFIG_AUFS_HNOTIFY */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DENTRY_H__ */
diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
new file mode 100644
index 000000000..75b7ea4a5
--- /dev/null
+++ b/fs/aufs/dinfo.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dentry private data
+ */
+
+#include "aufs.h"
+
+void au_di_init_once(void *_dinfo)
+{
+	struct au_dinfo *dinfo = _dinfo;
+
+	au_rw_init(&dinfo->di_rwsem);
+}
+
+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
+{
+	struct au_dinfo *dinfo;
+	int nbr, i;
+
+	dinfo = au_cache_alloc_dinfo();
+	if (unlikely(!dinfo))
+		goto out;
+
+	nbr = au_sbbot(sb) + 1;
+	if (nbr <= 0)
+		nbr = 1;
+	dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
+	if (dinfo->di_hdentry) {
+		au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
+		dinfo->di_btop = -1;
+		dinfo->di_bbot = -1;
+		dinfo->di_bwh = -1;
+		dinfo->di_bdiropq = -1;
+		dinfo->di_tmpfile = 0;
+		for (i = 0; i < nbr; i++)
+			dinfo->di_hdentry[i].hd_id = -1;
+		goto out;
+	}
+
+	au_cache_free_dinfo(dinfo);
+	dinfo = NULL;
+
+out:
+	return dinfo;
+}
+
+void au_di_free(struct au_dinfo *dinfo)
+{
+	struct au_hdentry *p;
+	aufs_bindex_t bbot, bindex;
+
+	/* dentry may not be revalidated */
+	bindex = dinfo->di_btop;
+	if (bindex >= 0) {
+		bbot = dinfo->di_bbot;
+		p = au_hdentry(dinfo, bindex);
+		while (bindex++ <= bbot)
+			au_hdput(p++);
+	}
+	kfree(dinfo->di_hdentry);
+	au_cache_free_dinfo(dinfo);
+}
+
+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
+{
+	struct au_hdentry *p;
+	aufs_bindex_t bi;
+
+	AuRwMustWriteLock(&a->di_rwsem);
+	AuRwMustWriteLock(&b->di_rwsem);
+
+#define DiSwap(v, name)				\
+	do {					\
+		v = a->di_##name;		\
+		a->di_##name = b->di_##name;	\
+		b->di_##name = v;		\
+	} while (0)
+
+	DiSwap(p, hdentry);
+	DiSwap(bi, btop);
+	DiSwap(bi, bbot);
+	DiSwap(bi, bwh);
+	DiSwap(bi, bdiropq);
+	/* smp_mb(); */
+
+#undef DiSwap
+}
+
+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
+{
+	AuRwMustWriteLock(&dst->di_rwsem);
+	AuRwMustWriteLock(&src->di_rwsem);
+
+	dst->di_btop = src->di_btop;
+	dst->di_bbot = src->di_bbot;
+	dst->di_bwh = src->di_bwh;
+	dst->di_bdiropq = src->di_bdiropq;
+	/* smp_mb(); */
+}
+
+int au_di_init(struct dentry *dentry)
+{
+	int err;
+	struct super_block *sb;
+	struct au_dinfo *dinfo;
+
+	err = 0;
+	sb = dentry->d_sb;
+	dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
+	if (dinfo) {
+		atomic_set(&dinfo->di_generation, au_sigen(sb));
+		/* smp_mb(); */ /* atomic_set */
+		dentry->d_fsdata = dinfo;
+	} else
+		err = -ENOMEM;
+
+	return err;
+}
+
+void au_di_fin(struct dentry *dentry)
+{
+	struct au_dinfo *dinfo;
+
+	dinfo = au_di(dentry);
+	AuRwDestroy(&dinfo->di_rwsem);
+	au_di_free(dinfo);
+}
+
+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
+{
+	int err, sz;
+	struct au_hdentry *hdp;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	err = -ENOMEM;
+	sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
+	if (!sz)
+		sz = sizeof(*hdp);
+	hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
+			   may_shrink);
+	if (hdp) {
+		dinfo->di_hdentry = hdp;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
+{
+	switch (lsc) {
+	case AuLsc_DI_CHILD:
+		ii_write_lock_child(inode);
+		break;
+	case AuLsc_DI_CHILD2:
+		ii_write_lock_child2(inode);
+		break;
+	case AuLsc_DI_CHILD3:
+		ii_write_lock_child3(inode);
+		break;
+	case AuLsc_DI_PARENT:
+		ii_write_lock_parent(inode);
+		break;
+	case AuLsc_DI_PARENT2:
+		ii_write_lock_parent2(inode);
+		break;
+	case AuLsc_DI_PARENT3:
+		ii_write_lock_parent3(inode);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
+{
+	switch (lsc) {
+	case AuLsc_DI_CHILD:
+		ii_read_lock_child(inode);
+		break;
+	case AuLsc_DI_CHILD2:
+		ii_read_lock_child2(inode);
+		break;
+	case AuLsc_DI_CHILD3:
+		ii_read_lock_child3(inode);
+		break;
+	case AuLsc_DI_PARENT:
+		ii_read_lock_parent(inode);
+		break;
+	case AuLsc_DI_PARENT2:
+		ii_read_lock_parent2(inode);
+		break;
+	case AuLsc_DI_PARENT3:
+		ii_read_lock_parent3(inode);
+		break;
+	default:
+		BUG();
+	}
+}
+
+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
+{
+	struct inode *inode;
+
+	au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
+	if (d_really_is_positive(d)) {
+		inode = d_inode(d);
+		if (au_ftest_lock(flags, IW))
+			do_ii_write_lock(inode, lsc);
+		else if (au_ftest_lock(flags, IR))
+			do_ii_read_lock(inode, lsc);
+	}
+}
+
+void di_read_unlock(struct dentry *d, int flags)
+{
+	struct inode *inode;
+
+	if (d_really_is_positive(d)) {
+		inode = d_inode(d);
+		if (au_ftest_lock(flags, IW)) {
+			au_dbg_verify_dinode(d);
+			ii_write_unlock(inode);
+		} else if (au_ftest_lock(flags, IR)) {
+			au_dbg_verify_dinode(d);
+			ii_read_unlock(inode);
+		}
+	}
+	au_rw_read_unlock(&au_di(d)->di_rwsem);
+}
+
+void di_downgrade_lock(struct dentry *d, int flags)
+{
+	if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
+		ii_downgrade_lock(d_inode(d));
+	au_rw_dgrade_lock(&au_di(d)->di_rwsem);
+}
+
+void di_write_lock(struct dentry *d, unsigned int lsc)
+{
+	au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
+	if (d_really_is_positive(d))
+		do_ii_write_lock(d_inode(d), lsc);
+}
+
+void di_write_unlock(struct dentry *d)
+{
+	au_dbg_verify_dinode(d);
+	if (d_really_is_positive(d))
+		ii_write_unlock(d_inode(d));
+	au_rw_write_unlock(&au_di(d)->di_rwsem);
+}
+
+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
+{
+	AuDebugOn(d1 == d2
+		  || d_inode(d1) == d_inode(d2)
+		  || d1->d_sb != d2->d_sb);
+
+	if ((isdir && au_test_subdir(d1, d2))
+	    || d1 < d2) {
+		di_write_lock_child(d1);
+		di_write_lock_child2(d2);
+	} else {
+		di_write_lock_child(d2);
+		di_write_lock_child2(d1);
+	}
+}
+
+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
+{
+	AuDebugOn(d1 == d2
+		  || d_inode(d1) == d_inode(d2)
+		  || d1->d_sb != d2->d_sb);
+
+	if ((isdir && au_test_subdir(d1, d2))
+	    || d1 < d2) {
+		di_write_lock_parent(d1);
+		di_write_lock_parent2(d2);
+	} else {
+		di_write_lock_parent(d2);
+		di_write_lock_parent2(d1);
+	}
+}
+
+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
+{
+	di_write_unlock(d1);
+	if (d_inode(d1) == d_inode(d2))
+		au_rw_write_unlock(&au_di(d2)->di_rwsem);
+	else
+		di_write_unlock(d2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	struct dentry *d;
+
+	DiMustAnyLock(dentry);
+
+	if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
+		return NULL;
+	AuDebugOn(bindex < 0);
+	d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
+	AuDebugOn(d && au_dcount(d) <= 0);
+	return d;
+}
+
+/*
+ * extended version of au_h_dptr().
+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
+ * error.
+ */
+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	struct dentry *h_dentry;
+	struct inode *inode, *h_inode;
+
+	AuDebugOn(d_really_is_negative(dentry));
+
+	h_dentry = NULL;
+	if (au_dbtop(dentry) <= bindex
+	    && bindex <= au_dbbot(dentry))
+		h_dentry = au_h_dptr(dentry, bindex);
+	if (h_dentry && !au_d_linkable(h_dentry)) {
+		dget(h_dentry);
+		goto out; /* success */
+	}
+
+	inode = d_inode(dentry);
+	AuDebugOn(bindex < au_ibtop(inode));
+	AuDebugOn(au_ibbot(inode) < bindex);
+	h_inode = au_h_iptr(inode, bindex);
+	h_dentry = d_find_alias(h_inode);
+	if (h_dentry) {
+		if (!IS_ERR(h_dentry)) {
+			if (!au_d_linkable(h_dentry))
+				goto out; /* success */
+			dput(h_dentry);
+		} else
+			goto out;
+	}
+
+	if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
+		h_dentry = au_plink_lkup(inode, bindex);
+		AuDebugOn(!h_dentry);
+		if (!IS_ERR(h_dentry)) {
+			if (!au_d_hashed_positive(h_dentry))
+				goto out; /* success */
+			dput(h_dentry);
+			h_dentry = NULL;
+		}
+	}
+
+out:
+	AuDbgDentry(h_dentry);
+	return h_dentry;
+}
+
+aufs_bindex_t au_dbtail(struct dentry *dentry)
+{
+	aufs_bindex_t bbot, bwh;
+
+	bbot = au_dbbot(dentry);
+	if (0 <= bbot) {
+		bwh = au_dbwh(dentry);
+		if (!bwh)
+			return bwh;
+		if (0 < bwh && bwh < bbot)
+			return bwh - 1;
+	}
+	return bbot;
+}
+
+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
+{
+	aufs_bindex_t bbot, bopq;
+
+	bbot = au_dbtail(dentry);
+	if (0 <= bbot) {
+		bopq = au_dbdiropq(dentry);
+		if (0 <= bopq && bopq < bbot)
+			bbot = bopq;
+	}
+	return bbot;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
+		   struct dentry *h_dentry)
+{
+	struct au_dinfo *dinfo;
+	struct au_hdentry *hd;
+	struct au_branch *br;
+
+	DiMustWriteLock(dentry);
+
+	dinfo = au_di(dentry);
+	hd = au_hdentry(dinfo, bindex);
+	au_hdput(hd);
+	hd->hd_dentry = h_dentry;
+	if (h_dentry) {
+		br = au_sbr(dentry->d_sb, bindex);
+		hd->hd_id = br->br_id;
+	}
+}
+
+int au_dbrange_test(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t btop, bbot;
+
+	err = 0;
+	btop = au_dbtop(dentry);
+	bbot = au_dbbot(dentry);
+	if (btop >= 0)
+		AuDebugOn(bbot < 0 && btop > bbot);
+	else {
+		err = -EIO;
+		AuDebugOn(bbot >= 0);
+	}
+
+	return err;
+}
+
+int au_digen_test(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(au_digen(dentry) != sigen
+		     || au_iigen_test(d_inode(dentry), sigen)))
+		err = -EIO;
+
+	return err;
+}
+
+void au_update_digen(struct dentry *dentry)
+{
+	atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
+	/* smp_mb(); */ /* atomic_set */
+}
+
+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
+{
+	struct au_dinfo *dinfo;
+	struct dentry *h_d;
+	struct au_hdentry *hdp;
+	aufs_bindex_t bindex, bbot;
+
+	DiMustWriteLock(dentry);
+
+	dinfo = au_di(dentry);
+	if (!dinfo || dinfo->di_btop < 0)
+		return;
+
+	if (do_put_zero) {
+		bbot = dinfo->di_bbot;
+		bindex = dinfo->di_btop;
+		hdp = au_hdentry(dinfo, bindex);
+		for (; bindex <= bbot; bindex++, hdp++) {
+			h_d = hdp->hd_dentry;
+			if (h_d && d_is_negative(h_d))
+				au_set_h_dptr(dentry, bindex, NULL);
+		}
+	}
+
+	dinfo->di_btop = 0;
+	hdp = au_hdentry(dinfo, dinfo->di_btop);
+	for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
+		if (hdp->hd_dentry)
+			break;
+	if (dinfo->di_btop > dinfo->di_bbot) {
+		dinfo->di_btop = -1;
+		dinfo->di_bbot = -1;
+		return;
+	}
+
+	hdp = au_hdentry(dinfo, dinfo->di_bbot);
+	for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
+		if (hdp->hd_dentry)
+			break;
+	AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
+}
+
+void au_update_dbtop(struct dentry *dentry)
+{
+	aufs_bindex_t bindex, bbot;
+	struct dentry *h_dentry;
+
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		if (d_is_positive(h_dentry)) {
+			au_set_dbtop(dentry, bindex);
+			return;
+		}
+		au_set_h_dptr(dentry, bindex, NULL);
+	}
+}
+
+void au_update_dbbot(struct dentry *dentry)
+{
+	aufs_bindex_t bindex, btop;
+	struct dentry *h_dentry;
+
+	btop = au_dbtop(dentry);
+	for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		if (d_is_positive(h_dentry)) {
+			au_set_dbbot(dentry, bindex);
+			return;
+		}
+		au_set_h_dptr(dentry, bindex, NULL);
+	}
+}
+
+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
+{
+	aufs_bindex_t bindex, bbot;
+
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
+		if (au_h_dptr(dentry, bindex) == h_dentry)
+			return bindex;
+	return -1;
+}
diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c
new file mode 100644
index 000000000..4b4a6de78
--- /dev/null
+++ b/fs/aufs/dir.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * directory operations
+ */
+
+#include <linux/fs_stack.h>
+#include "aufs.h"
+
+void au_add_nlink(struct inode *dir, struct inode *h_dir)
+{
+	unsigned int nlink;
+
+	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
+
+	nlink = dir->i_nlink;
+	nlink += h_dir->i_nlink - 2;
+	if (h_dir->i_nlink < 2)
+		nlink += 2;
+	smp_mb(); /* for i_nlink */
+	/* 0 can happen in revaliding */
+	set_nlink(dir, nlink);
+}
+
+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
+{
+	unsigned int nlink;
+
+	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
+
+	nlink = dir->i_nlink;
+	nlink -= h_dir->i_nlink - 2;
+	if (h_dir->i_nlink < 2)
+		nlink -= 2;
+	smp_mb(); /* for i_nlink */
+	/* nlink == 0 means the branch-fs is broken */
+	set_nlink(dir, nlink);
+}
+
+loff_t au_dir_size(struct file *file, struct dentry *dentry)
+{
+	loff_t sz;
+	aufs_bindex_t bindex, bbot;
+	struct file *h_file;
+	struct dentry *h_dentry;
+
+	sz = 0;
+	if (file) {
+		AuDebugOn(!d_is_dir(file->f_path.dentry));
+
+		bbot = au_fbbot_dir(file);
+		for (bindex = au_fbtop(file);
+		     bindex <= bbot && sz < KMALLOC_MAX_SIZE;
+		     bindex++) {
+			h_file = au_hf_dir(file, bindex);
+			if (h_file && file_inode(h_file))
+				sz += vfsub_f_size_read(h_file);
+		}
+	} else {
+		AuDebugOn(!dentry);
+		AuDebugOn(!d_is_dir(dentry));
+
+		bbot = au_dbtaildir(dentry);
+		for (bindex = au_dbtop(dentry);
+		     bindex <= bbot && sz < KMALLOC_MAX_SIZE;
+		     bindex++) {
+			h_dentry = au_h_dptr(dentry, bindex);
+			if (h_dentry && d_is_positive(h_dentry))
+				sz += i_size_read(d_inode(h_dentry));
+		}
+	}
+	if (sz < KMALLOC_MAX_SIZE)
+		sz = roundup_pow_of_two(sz);
+	if (sz > KMALLOC_MAX_SIZE)
+		sz = KMALLOC_MAX_SIZE;
+	else if (sz < NAME_MAX) {
+		BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
+		sz = AUFS_RDBLK_DEF;
+	}
+	return sz;
+}
+
+struct au_dir_ts_arg {
+	struct dentry *dentry;
+	aufs_bindex_t brid;
+};
+
+static void au_do_dir_ts(void *arg)
+{
+	struct au_dir_ts_arg *a = arg;
+	struct au_dtime dt;
+	struct path h_path;
+	struct inode *dir, *h_dir;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_hinode *hdir;
+	int err;
+	aufs_bindex_t btop, bindex;
+
+	sb = a->dentry->d_sb;
+	if (d_really_is_negative(a->dentry))
+		goto out;
+	/* no dir->i_mutex lock */
+	aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
+
+	dir = d_inode(a->dentry);
+	btop = au_ibtop(dir);
+	bindex = au_br_index(sb, a->brid);
+	if (bindex < btop)
+		goto out_unlock;
+
+	br = au_sbr(sb, bindex);
+	h_path.dentry = au_h_dptr(a->dentry, bindex);
+	if (!h_path.dentry)
+		goto out_unlock;
+	h_path.mnt = au_br_mnt(br);
+	au_dtime_store(&dt, a->dentry, &h_path);
+
+	br = au_sbr(sb, btop);
+	if (!au_br_writable(br->br_perm))
+		goto out_unlock;
+	h_path.dentry = au_h_dptr(a->dentry, btop);
+	h_path.mnt = au_br_mnt(br);
+	err = vfsub_mnt_want_write(h_path.mnt);
+	if (err)
+		goto out_unlock;
+	hdir = au_hi(dir, btop);
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	h_dir = au_h_iptr(dir, btop);
+	if (h_dir->i_nlink
+	    && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
+		dt.dt_h_path = h_path;
+		au_dtime_revert(&dt);
+	}
+	au_hn_inode_unlock(hdir);
+	vfsub_mnt_drop_write(h_path.mnt);
+	au_cpup_attr_timesizes(dir);
+
+out_unlock:
+	aufs_read_unlock(a->dentry, AuLock_DW);
+out:
+	dput(a->dentry);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	kfree(arg);
+}
+
+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
+{
+	int perm, wkq_err;
+	aufs_bindex_t btop;
+	struct au_dir_ts_arg *arg;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	IMustLock(dir);
+
+	dentry = d_find_any_alias(dir);
+	AuDebugOn(!dentry);
+	sb = dentry->d_sb;
+	btop = au_ibtop(dir);
+	if (btop == bindex) {
+		au_cpup_attr_timesizes(dir);
+		goto out;
+	}
+
+	perm = au_sbr_perm(sb, btop);
+	if (!au_br_writable(perm))
+		goto out;
+
+	arg = kmalloc(sizeof(*arg), GFP_NOFS);
+	if (!arg)
+		goto out;
+
+	arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
+	arg->brid = au_sbr_id(sb, bindex);
+	wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
+	if (unlikely(wkq_err)) {
+		pr_err("wkq %d\n", wkq_err);
+		dput(dentry);
+		kfree(arg);
+	}
+
+out:
+	dput(dentry);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int reopen_dir(struct file *file)
+{
+	int err;
+	unsigned int flags;
+	aufs_bindex_t bindex, btail, btop;
+	struct dentry *dentry, *h_dentry;
+	struct file *h_file;
+
+	/* open all lower dirs */
+	dentry = file->f_path.dentry;
+	btop = au_dbtop(dentry);
+	for (bindex = au_fbtop(file); bindex < btop; bindex++)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbtop(file, btop);
+
+	btail = au_dbtaildir(dentry);
+	for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbbot_dir(file, btail);
+
+	flags = vfsub_file_flags(file);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		h_file = au_hf_dir(file, bindex);
+		if (h_file)
+			continue;
+
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+		err = PTR_ERR(h_file);
+		if (IS_ERR(h_file))
+			goto out; /* close all? */
+		au_set_h_fptr(file, bindex, h_file);
+	}
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	err = 0;
+
+out:
+	return err;
+}
+
+static int do_open_dir(struct file *file, int flags, struct file *h_file)
+{
+	int err;
+	aufs_bindex_t bindex, btail;
+	struct dentry *dentry, *h_dentry;
+	struct vfsmount *mnt;
+
+	FiMustWriteLock(file);
+	AuDebugOn(h_file);
+
+	err = 0;
+	mnt = file->f_path.mnt;
+	dentry = file->f_path.dentry;
+	file->f_version = inode_query_iversion(d_inode(dentry));
+	bindex = au_dbtop(dentry);
+	au_set_fbtop(file, bindex);
+	btail = au_dbtaildir(dentry);
+	au_set_fbbot_dir(file, btail);
+	for (; !err && bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+
+		err = vfsub_test_mntns(mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			break;
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+		if (IS_ERR(h_file)) {
+			err = PTR_ERR(h_file);
+			break;
+		}
+		au_set_h_fptr(file, bindex, h_file);
+	}
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	if (!err)
+		return 0; /* success */
+
+	/* close all */
+	for (bindex = au_fbtop(file); bindex <= btail; bindex++)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbtop(file, -1);
+	au_set_fbbot_dir(file, -1);
+
+	return err;
+}
+
+static int aufs_open_dir(struct inode *inode __maybe_unused,
+			 struct file *file)
+{
+	int err;
+	struct super_block *sb;
+	struct au_fidir *fidir;
+
+	err = -ENOMEM;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	fidir = au_fidir_alloc(sb);
+	if (fidir) {
+		struct au_do_open_args args = {
+			.open	= do_open_dir,
+			.fidir	= fidir
+		};
+		err = au_do_open(file, &args);
+		if (unlikely(err))
+			kfree(fidir);
+	}
+	si_read_unlock(sb);
+	return err;
+}
+
+static int aufs_release_dir(struct inode *inode __maybe_unused,
+			    struct file *file)
+{
+	struct au_vdir *vdir_cache;
+	struct au_finfo *finfo;
+	struct au_fidir *fidir;
+	struct au_hfile *hf;
+	aufs_bindex_t bindex, bbot;
+
+	finfo = au_fi(file);
+	fidir = finfo->fi_hdir;
+	if (fidir) {
+		au_hbl_del(&finfo->fi_hlist,
+			   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+		vdir_cache = fidir->fd_vdir_cache; /* lock-free */
+		if (vdir_cache)
+			au_vdir_free(vdir_cache);
+
+		bindex = finfo->fi_btop;
+		if (bindex >= 0) {
+			hf = fidir->fd_hfile + bindex;
+			/*
+			 * calls fput() instead of filp_close(),
+			 * since no dnotify or lock for the lower file.
+			 */
+			bbot = fidir->fd_bbot;
+			for (; bindex <= bbot; bindex++, hf++)
+				if (hf->hf_file)
+					au_hfput(hf, /*execed*/0);
+		}
+		kfree(fidir);
+		finfo->fi_hdir = NULL;
+	}
+	au_finfo_fin(file);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_flush_dir(struct file *file, fl_owner_t id)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct file *h_file;
+
+	err = 0;
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
+		h_file = au_hf_dir(file, bindex);
+		if (h_file)
+			err = vfsub_flush(h_file, id);
+	}
+	return err;
+}
+
+static int aufs_flush_dir(struct file *file, fl_owner_t id)
+{
+	return au_do_flush(file, id, au_do_flush_dir);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct inode *inode;
+	struct super_block *sb;
+
+	err = 0;
+	sb = dentry->d_sb;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
+		struct path h_path;
+
+		if (au_test_ro(sb, bindex, inode))
+			continue;
+		h_path.dentry = au_h_dptr(dentry, bindex);
+		if (!h_path.dentry)
+			continue;
+
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		err = vfsub_fsync(NULL, &h_path, datasync);
+	}
+
+	return err;
+}
+
+static int au_do_fsync_dir(struct file *file, int datasync)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct file *h_file;
+	struct super_block *sb;
+	struct inode *inode;
+
+	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
+	if (unlikely(err))
+		goto out;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
+		h_file = au_hf_dir(file, bindex);
+		if (!h_file || au_test_ro(sb, bindex, inode))
+			continue;
+
+		err = vfsub_fsync(h_file, &h_file->f_path, datasync);
+	}
+
+out:
+	return err;
+}
+
+/*
+ * @file may be NULL
+ */
+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
+			  int datasync)
+{
+	int err;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct super_block *sb;
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	inode_lock(inode);
+	sb = dentry->d_sb;
+	si_noflush_read_lock(sb);
+	if (file)
+		err = au_do_fsync_dir(file, datasync);
+	else {
+		di_write_lock_child(dentry);
+		err = au_do_fsync_dir_no_file(dentry, datasync);
+	}
+	au_cpup_attr_timesizes(inode);
+	di_write_unlock(dentry);
+	if (file)
+		fi_write_unlock(file);
+
+	si_read_unlock(sb);
+	inode_unlock(inode);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct dentry *dentry;
+	struct inode *inode, *h_inode;
+	struct super_block *sb;
+
+	AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
+
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+
+	sb = dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
+	if (unlikely(err))
+		goto out;
+	err = au_alive_dir(dentry);
+	if (!err)
+		err = au_vdir_init(file);
+	di_downgrade_lock(dentry, AuLock_IR);
+	if (unlikely(err))
+		goto out_unlock;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	if (!au_test_nfsd()) {
+		err = au_vdir_fill_de(file, ctx);
+		fsstack_copy_attr_atime(inode, h_inode);
+	} else {
+		/*
+		 * nfsd filldir may call lookup_one_len(), vfs_getattr(),
+		 * encode_fh() and others.
+		 */
+		atomic_inc(&h_inode->i_count);
+		di_read_unlock(dentry, AuLock_IR);
+		si_read_unlock(sb);
+		err = au_vdir_fill_de(file, ctx);
+		fsstack_copy_attr_atime(inode, h_inode);
+		fi_write_unlock(file);
+		iput(h_inode);
+
+		AuTraceErr(err);
+		return err;
+	}
+
+out_unlock:
+	di_read_unlock(dentry, AuLock_IR);
+	fi_write_unlock(file);
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define AuTestEmpty_WHONLY	1
+#define AuTestEmpty_CALLED	(1 << 1)
+#define AuTestEmpty_SHWH	(1 << 2)
+#define au_ftest_testempty(flags, name)	((flags) & AuTestEmpty_##name)
+#define au_fset_testempty(flags, name) \
+	do { (flags) |= AuTestEmpty_##name; } while (0)
+#define au_fclr_testempty(flags, name) \
+	do { (flags) &= ~AuTestEmpty_##name; } while (0)
+
+#ifndef CONFIG_AUFS_SHWH
+#undef AuTestEmpty_SHWH
+#define AuTestEmpty_SHWH	0
+#endif
+
+struct test_empty_arg {
+	struct dir_context ctx;
+	struct au_nhash *whlist;
+	unsigned int flags;
+	int err;
+	aufs_bindex_t bindex;
+};
+
+static int test_empty_cb(struct dir_context *ctx, const char *__name,
+			 int namelen, loff_t offset __maybe_unused, u64 ino,
+			 unsigned int d_type)
+{
+	struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
+						  ctx);
+	char *name = (void *)__name;
+
+	arg->err = 0;
+	au_fset_testempty(arg->flags, CALLED);
+	/* smp_mb(); */
+	if (name[0] == '.'
+	    && (namelen == 1 || (name[1] == '.' && namelen == 2)))
+		goto out; /* success */
+
+	if (namelen <= AUFS_WH_PFX_LEN
+	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+		if (au_ftest_testempty(arg->flags, WHONLY)
+		    && !au_nhash_test_known_wh(arg->whlist, name, namelen))
+			arg->err = -ENOTEMPTY;
+		goto out;
+	}
+
+	name += AUFS_WH_PFX_LEN;
+	namelen -= AUFS_WH_PFX_LEN;
+	if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
+		arg->err = au_nhash_append_wh
+			(arg->whlist, name, namelen, ino, d_type, arg->bindex,
+			 au_ftest_testempty(arg->flags, SHWH));
+
+out:
+	/* smp_mb(); */
+	AuTraceErr(arg->err);
+	return arg->err;
+}
+
+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
+{
+	int err;
+	struct file *h_file;
+
+	h_file = au_h_open(dentry, arg->bindex,
+			   O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
+			   /*file*/NULL, /*force_wr*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = 0;
+	if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
+	    && !file_inode(h_file)->i_nlink)
+		goto out_put;
+
+	do {
+		arg->err = 0;
+		au_fclr_testempty(arg->flags, CALLED);
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(h_file, &arg->ctx);
+		if (err >= 0)
+			err = arg->err;
+	} while (!err && au_ftest_testempty(arg->flags, CALLED));
+
+out_put:
+	fput(h_file);
+	au_sbr_put(dentry->d_sb, arg->bindex);
+out:
+	return err;
+}
+
+struct do_test_empty_args {
+	int *errp;
+	struct dentry *dentry;
+	struct test_empty_arg *arg;
+};
+
+static void call_do_test_empty(void *args)
+{
+	struct do_test_empty_args *a = args;
+	*a->errp = do_test_empty(a->dentry, a->arg);
+}
+
+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
+{
+	int err, wkq_err;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, arg->bindex);
+	h_inode = d_inode(h_dentry);
+	/* todo: i_mode changes anytime? */
+	inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
+	err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
+	inode_unlock_shared(h_inode);
+	if (!err)
+		err = do_test_empty(dentry, arg);
+	else {
+		struct do_test_empty_args args = {
+			.errp	= &err,
+			.dentry	= dentry,
+			.arg	= arg
+		};
+		unsigned int flags = arg->flags;
+
+		wkq_err = au_wkq_wait(call_do_test_empty, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+		arg->flags = flags;
+	}
+
+	return err;
+}
+
+int au_test_empty_lower(struct dentry *dentry)
+{
+	int err;
+	unsigned int rdhash;
+	aufs_bindex_t bindex, btop, btail;
+	struct au_nhash whlist;
+	struct test_empty_arg arg = {
+		.ctx = {
+			.actor = test_empty_cb
+		}
+	};
+	int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
+
+	SiMustAnyLock(dentry->d_sb);
+
+	rdhash = au_sbi(dentry->d_sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
+	err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+
+	arg.flags = 0;
+	arg.whlist = &whlist;
+	btop = au_dbtop(dentry);
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
+		au_fset_testempty(arg.flags, SHWH);
+	test_empty = do_test_empty;
+	if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
+		test_empty = sio_test_empty;
+	arg.bindex = btop;
+	err = test_empty(dentry, &arg);
+	if (unlikely(err))
+		goto out_whlist;
+
+	au_fset_testempty(arg.flags, WHONLY);
+	btail = au_dbtaildir(dentry);
+	for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
+		struct dentry *h_dentry;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry && d_is_positive(h_dentry)) {
+			arg.bindex = bindex;
+			err = test_empty(dentry, &arg);
+		}
+	}
+
+out_whlist:
+	au_nhash_wh_free(&whlist);
+out:
+	return err;
+}
+
+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
+{
+	int err;
+	struct test_empty_arg arg = {
+		.ctx = {
+			.actor = test_empty_cb
+		}
+	};
+	aufs_bindex_t bindex, btail;
+
+	err = 0;
+	arg.whlist = whlist;
+	arg.flags = AuTestEmpty_WHONLY;
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
+		au_fset_testempty(arg.flags, SHWH);
+	btail = au_dbtaildir(dentry);
+	for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
+		struct dentry *h_dentry;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry && d_is_positive(h_dentry)) {
+			arg.bindex = bindex;
+			err = sio_test_empty(dentry, &arg);
+		}
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+const struct file_operations aufs_dir_fop = {
+	.owner		= THIS_MODULE,
+	.llseek		= default_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= aufs_iterate_shared,
+	.unlocked_ioctl	= aufs_ioctl_dir,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= aufs_compat_ioctl_dir,
+#endif
+	.open		= aufs_open_dir,
+	.release	= aufs_release_dir,
+	.flush		= aufs_flush_dir,
+	.fsync		= aufs_fsync_dir
+};
diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h
new file mode 100644
index 000000000..b3eb2f73c
--- /dev/null
+++ b/fs/aufs/dir.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * directory operations
+ */
+
+#ifndef __AUFS_DIR_H__
+#define __AUFS_DIR_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+
+/* ---------------------------------------------------------------------- */
+
+/* need to be faster and smaller */
+
+struct au_nhash {
+	unsigned int		nh_num;
+	struct hlist_head	*nh_head;
+};
+
+struct au_vdir_destr {
+	unsigned char	len;
+	unsigned char	name[0];
+} __packed;
+
+struct au_vdir_dehstr {
+	struct hlist_node	hash;
+	struct au_vdir_destr	*str;
+} ____cacheline_aligned_in_smp;
+
+struct au_vdir_de {
+	ino_t			de_ino;
+	unsigned char		de_type;
+	/* caution: packed */
+	struct au_vdir_destr	de_str;
+} __packed;
+
+struct au_vdir_wh {
+	struct hlist_node	wh_hash;
+#ifdef CONFIG_AUFS_SHWH
+	ino_t			wh_ino;
+	aufs_bindex_t		wh_bindex;
+	unsigned char		wh_type;
+#else
+	aufs_bindex_t		wh_bindex;
+#endif
+	/* caution: packed */
+	struct au_vdir_destr	wh_str;
+} __packed;
+
+union au_vdir_deblk_p {
+	unsigned char		*deblk;
+	struct au_vdir_de	*de;
+};
+
+struct au_vdir {
+	unsigned char	**vd_deblk;
+	unsigned long	vd_nblk;
+	struct {
+		unsigned long		ul;
+		union au_vdir_deblk_p	p;
+	} vd_last;
+
+	u64		vd_version;
+	unsigned int	vd_deblk_sz;
+	unsigned long		vd_jiffy;
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* dir.c */
+extern const struct file_operations aufs_dir_fop;
+void au_add_nlink(struct inode *dir, struct inode *h_dir);
+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
+loff_t au_dir_size(struct file *file, struct dentry *dentry);
+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
+int au_test_empty_lower(struct dentry *dentry);
+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
+
+/* vdir.c */
+unsigned int au_rdhash_est(loff_t sz);
+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
+void au_nhash_wh_free(struct au_nhash *whlist);
+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
+			    int limit);
+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
+		       unsigned int d_type, aufs_bindex_t bindex,
+		       unsigned char shwh);
+void au_vdir_free(struct au_vdir *vdir);
+int au_vdir_init(struct file *file);
+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
+
+/* ioctl.c */
+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
+
+#ifdef CONFIG_AUFS_RDU
+/* rdu.c */
+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
+			 unsigned long arg);
+#endif
+#else
+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
+       unsigned int cmd, unsigned long arg)
+#ifdef CONFIG_COMPAT
+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
+       unsigned int cmd, unsigned long arg)
+#endif
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DIR_H__ */
diff --git a/fs/aufs/dirren.c b/fs/aufs/dirren.c
new file mode 100644
index 000000000..dd9dfaea3
--- /dev/null
+++ b/fs/aufs/dirren.c
@@ -0,0 +1,1315 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * special handling in renaming a directoy
+ * in order to support looking-up the before-renamed name on the lower readonly
+ * branches
+ */
+
+#include <linux/byteorder/generic.h>
+#include "aufs.h"
+
+static void au_dr_hino_del(struct au_dr_br *dr, struct au_dr_hino *ent)
+{
+	int idx;
+
+	idx = au_dr_ihash(ent->dr_h_ino);
+	au_hbl_del(&ent->dr_hnode, dr->dr_h_ino + idx);
+}
+
+static int au_dr_hino_test_empty(struct au_dr_br *dr)
+{
+	int ret, i;
+	struct hlist_bl_head *hbl;
+
+	ret = 1;
+	for (i = 0; ret && i < AuDirren_NHASH; i++) {
+		hbl = dr->dr_h_ino + i;
+		hlist_bl_lock(hbl);
+		ret &= hlist_bl_empty(hbl);
+		hlist_bl_unlock(hbl);
+	}
+
+	return ret;
+}
+
+static struct au_dr_hino *au_dr_hino_find(struct au_dr_br *dr, ino_t ino)
+{
+	struct au_dr_hino *found, *ent;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	int idx;
+
+	found = NULL;
+	idx = au_dr_ihash(ino);
+	hbl = dr->dr_h_ino + idx;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
+		if (ent->dr_h_ino == ino) {
+			found = ent;
+			break;
+		}
+	hlist_bl_unlock(hbl);
+
+	return found;
+}
+
+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t ino,
+			struct au_dr_hino *add_ent)
+{
+	int found, idx;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_dr_hino *ent;
+
+	found = 0;
+	idx = au_dr_ihash(ino);
+	hbl = dr->dr_h_ino + idx;
+#if 0
+	{
+		struct hlist_bl_node *tmp;
+
+		hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
+			AuDbg("hi%llu\n", (unsigned long long)ent->dr_h_ino);
+	}
+#endif
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
+		if (ent->dr_h_ino == ino) {
+			found = 1;
+			break;
+		}
+	if (!found && add_ent)
+		hlist_bl_add_head(&add_ent->dr_hnode, hbl);
+	hlist_bl_unlock(hbl);
+
+	if (!found && add_ent)
+		AuDbg("i%llu added\n", (unsigned long long)add_ent->dr_h_ino);
+
+	return found;
+}
+
+void au_dr_hino_free(struct au_dr_br *dr)
+{
+	int i;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_dr_hino *ent;
+
+	/* SiMustWriteLock(sb); */
+
+	for (i = 0; i < AuDirren_NHASH; i++) {
+		hbl = dr->dr_h_ino + i;
+		/* no spinlock since sbinfo must be write-locked */
+		hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
+			kfree(ent);
+		INIT_HLIST_BL_HEAD(hbl);
+	}
+}
+
+/* returns the number of inodes or an error */
+static int au_dr_hino_store(struct super_block *sb, struct au_branch *br,
+			    struct file *hinofile)
+{
+	int err, i;
+	ssize_t ssz;
+	loff_t pos, oldsize;
+	__be64 u64;
+	struct inode *hinoinode;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *n1, *n2;
+	struct au_dr_hino *ent;
+
+	SiMustWriteLock(sb);
+	AuDebugOn(!au_br_writable(br->br_perm));
+
+	hinoinode = file_inode(hinofile);
+	oldsize = i_size_read(hinoinode);
+
+	err = 0;
+	pos = 0;
+	hbl = br->br_dirren.dr_h_ino;
+	for (i = 0; !err && i < AuDirren_NHASH; i++, hbl++) {
+		/* no bit-lock since sbinfo must be write-locked */
+		hlist_bl_for_each_entry_safe(ent, n1, n2, hbl, dr_hnode) {
+			AuDbg("hi%llu, %pD2\n",
+			      (unsigned long long)ent->dr_h_ino, hinofile);
+			u64 = cpu_to_be64(ent->dr_h_ino);
+			ssz = vfsub_write_k(hinofile, &u64, sizeof(u64), &pos);
+			if (ssz == sizeof(u64))
+				continue;
+
+			/* write error */
+			pr_err("ssz %zd, %pD2\n", ssz, hinofile);
+			err = -ENOSPC;
+			if (ssz < 0)
+				err = ssz;
+			break;
+		}
+	}
+	/* regardless the error */
+	if (pos < oldsize) {
+		err = vfsub_trunc(&hinofile->f_path, pos, /*attr*/0, hinofile);
+		AuTraceErr(err);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_dr_hino_load(struct au_dr_br *dr, struct file *hinofile)
+{
+	int err, hidx;
+	ssize_t ssz;
+	size_t sz, n;
+	loff_t pos;
+	uint64_t u64;
+	struct au_dr_hino *ent;
+	struct inode *hinoinode;
+	struct hlist_bl_head *hbl;
+
+	err = 0;
+	pos = 0;
+	hbl = dr->dr_h_ino;
+	hinoinode = file_inode(hinofile);
+	sz = i_size_read(hinoinode);
+	AuDebugOn(sz % sizeof(u64));
+	n = sz / sizeof(u64);
+	while (n--) {
+		ssz = vfsub_read_k(hinofile, &u64, sizeof(u64), &pos);
+		if (unlikely(ssz != sizeof(u64))) {
+			pr_err("ssz %zd, %pD2\n", ssz, hinofile);
+			err = -EINVAL;
+			if (ssz < 0)
+				err = ssz;
+			goto out_free;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_NOFS);
+		if (!ent) {
+			err = -ENOMEM;
+			AuTraceErr(err);
+			goto out_free;
+		}
+		ent->dr_h_ino = be64_to_cpu((__force __be64)u64);
+		AuDbg("hi%llu, %pD2\n",
+		      (unsigned long long)ent->dr_h_ino, hinofile);
+		hidx = au_dr_ihash(ent->dr_h_ino);
+		au_hbl_add(&ent->dr_hnode, hbl + hidx);
+	}
+	goto out; /* success */
+
+out_free:
+	au_dr_hino_free(dr);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * @bindex/@br is a switch to distinguish whether suspending hnotify or not.
+ * @path is a switch to distinguish load and store.
+ */
+static int au_dr_hino(struct super_block *sb, aufs_bindex_t bindex,
+		      struct au_branch *br, const struct path *path)
+{
+	int err, flags;
+	unsigned char load, suspend;
+	struct file *hinofile;
+	struct au_hinode *hdir;
+	struct inode *dir, *delegated;
+	struct path hinopath;
+	struct qstr hinoname = QSTR_INIT(AUFS_WH_DR_BRHINO,
+					 sizeof(AUFS_WH_DR_BRHINO) - 1);
+
+	AuDebugOn(bindex < 0 && !br);
+	AuDebugOn(bindex >= 0 && br);
+
+	err = -EINVAL;
+	suspend = !br;
+	if (suspend)
+		br = au_sbr(sb, bindex);
+	load = !!path;
+	if (!load) {
+		path = &br->br_path;
+		AuDebugOn(!au_br_writable(br->br_perm));
+		if (unlikely(!au_br_writable(br->br_perm)))
+			goto out;
+	}
+
+	hdir = NULL;
+	if (suspend) {
+		dir = d_inode(sb->s_root);
+		hdir = au_hinode(au_ii(dir), bindex);
+		dir = hdir->hi_inode;
+		au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
+	} else {
+		dir = d_inode(path->dentry);
+		inode_lock_nested(dir, AuLsc_I_CHILD);
+	}
+	hinopath.dentry = vfsub_lkup_one(&hinoname, path->dentry);
+	err = PTR_ERR(hinopath.dentry);
+	if (IS_ERR(hinopath.dentry))
+		goto out_unlock;
+
+	err = 0;
+	flags = O_RDONLY;
+	if (load) {
+		if (d_is_negative(hinopath.dentry))
+			goto out_dput; /* success */
+	} else {
+		if (au_dr_hino_test_empty(&br->br_dirren)) {
+			if (d_is_positive(hinopath.dentry)) {
+				delegated = NULL;
+				err = vfsub_unlink(dir, &hinopath, &delegated,
+						   /*force*/0);
+				AuTraceErr(err);
+				if (unlikely(err))
+					pr_err("ignored err %d, %pd2\n",
+					       err, hinopath.dentry);
+				if (unlikely(err == -EWOULDBLOCK))
+					iput(delegated);
+				err = 0;
+			}
+			goto out_dput;
+		} else if (!d_is_positive(hinopath.dentry)) {
+			err = vfsub_create(dir, &hinopath, 0600,
+					   /*want_excl*/false);
+			AuTraceErr(err);
+			if (unlikely(err))
+				goto out_dput;
+		}
+		flags = O_WRONLY;
+	}
+	hinopath.mnt = path->mnt;
+	hinofile = vfsub_dentry_open(&hinopath, flags);
+	if (suspend)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(dir);
+	dput(hinopath.dentry);
+	AuTraceErrPtr(hinofile);
+	if (IS_ERR(hinofile)) {
+		err = PTR_ERR(hinofile);
+		goto out;
+	}
+
+	if (load)
+		err = au_dr_hino_load(&br->br_dirren, hinofile);
+	else
+		err = au_dr_hino_store(sb, br, hinofile);
+	fput(hinofile);
+	goto out;
+
+out_dput:
+	dput(hinopath.dentry);
+out_unlock:
+	if (suspend)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(dir);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_dr_brid_init(struct au_dr_brid *brid, const struct path *path)
+{
+	int err;
+	struct kstatfs kstfs;
+	dev_t dev;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	err = vfs_statfs((void *)path, &kstfs);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	/* todo: support for UUID */
+
+	if (kstfs.f_fsid.val[0] || kstfs.f_fsid.val[1]) {
+		brid->type = AuBrid_FSID;
+		brid->fsid = kstfs.f_fsid;
+	} else {
+		dentry = path->dentry;
+		sb = dentry->d_sb;
+		dev = sb->s_dev;
+		if (dev) {
+			brid->type = AuBrid_DEV;
+			brid->dev = dev;
+		}
+	}
+
+out:
+	return err;
+}
+
+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
+		  const struct path *path)
+{
+	int err, i;
+	struct au_dr_br *dr;
+	struct hlist_bl_head *hbl;
+
+	dr = &br->br_dirren;
+	hbl = dr->dr_h_ino;
+	for (i = 0; i < AuDirren_NHASH; i++, hbl++)
+		INIT_HLIST_BL_HEAD(hbl);
+
+	err = au_dr_brid_init(&dr->dr_brid, path);
+	if (unlikely(err))
+		goto out;
+
+	if (au_opt_test(au_mntflags(sb), DIRREN))
+		err = au_dr_hino(sb, /*bindex*/-1, br, path);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_dr_br_fin(struct super_block *sb, struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	if (au_br_writable(br->br_perm))
+		err = au_dr_hino(sb, /*bindex*/-1, br, /*path*/NULL);
+	if (!err)
+		au_dr_hino_free(&br->br_dirren);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_brid_str(struct au_dr_brid *brid, struct inode *h_inode,
+		       char *buf, size_t sz)
+{
+	int err;
+	unsigned int major, minor;
+	char *p;
+
+	p = buf;
+	err = snprintf(p, sz, "%d_", brid->type);
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+	switch (brid->type) {
+	case AuBrid_Unset:
+		return -EINVAL;
+	case AuBrid_UUID:
+		err = snprintf(p, sz, "%pU", brid->uuid.b);
+		break;
+	case AuBrid_FSID:
+		err = snprintf(p, sz, "%08x-%08x",
+			       brid->fsid.val[0], brid->fsid.val[1]);
+		break;
+	case AuBrid_DEV:
+		major = MAJOR(brid->dev);
+		minor = MINOR(brid->dev);
+		if (major <= 0xff && minor <= 0xff)
+			err = snprintf(p, sz, "%02x%02x", major, minor);
+		else
+			err = snprintf(p, sz, "%03x:%05x", major, minor);
+		break;
+	}
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+	err = snprintf(p, sz, "_%llu", (unsigned long long)h_inode->i_ino);
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+
+	return p - buf;
+}
+
+static int au_drinfo_name(struct au_branch *br, char *name, int len)
+{
+	int rlen;
+	struct dentry *br_dentry;
+	struct inode *br_inode;
+
+	br_dentry = au_br_dentry(br);
+	br_inode = d_inode(br_dentry);
+	rlen = au_brid_str(&br->br_dirren.dr_brid, br_inode, name, len);
+	AuDebugOn(rlen >= AUFS_DIRREN_ENV_VAL_SZ);
+	AuDebugOn(rlen > len);
+
+	return rlen;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * from the given @h_dentry, construct drinfo at @*fdata.
+ * when the size of @*fdata is not enough, reallocate and return new @fdata and
+ * @allocated.
+ */
+static int au_drinfo_construct(struct au_drinfo_fdata **fdata,
+			       struct dentry *h_dentry,
+			       unsigned char *allocated)
+{
+	int err, v;
+	struct au_drinfo_fdata *f, *p;
+	struct au_drinfo *drinfo;
+	struct inode *h_inode;
+	struct qstr *qname;
+
+	err = 0;
+	f = *fdata;
+	h_inode = d_inode(h_dentry);
+	qname = &h_dentry->d_name;
+	drinfo = &f->drinfo;
+	drinfo->ino = (__force uint64_t)cpu_to_be64(h_inode->i_ino);
+	drinfo->oldnamelen = qname->len;
+	if (*allocated < sizeof(*f) + qname->len) {
+		v = roundup_pow_of_two(*allocated + qname->len);
+		p = au_krealloc(f, v, GFP_NOFS, /*may_shrink*/0);
+		if (unlikely(!p)) {
+			err = -ENOMEM;
+			AuTraceErr(err);
+			goto out;
+		}
+		f = p;
+		*fdata = f;
+		*allocated = v;
+		drinfo = &f->drinfo;
+	}
+	memcpy(drinfo->oldname, qname->name, qname->len);
+	AuDbg("i%llu, %.*s\n",
+	      be64_to_cpu((__force __be64)drinfo->ino), drinfo->oldnamelen,
+	      drinfo->oldname);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* callers have to free the return value */
+static struct au_drinfo *au_drinfo_read_k(struct file *file, ino_t h_ino)
+{
+	struct au_drinfo *ret, *drinfo;
+	struct au_drinfo_fdata fdata;
+	int len;
+	loff_t pos;
+	ssize_t ssz;
+
+	ret = ERR_PTR(-EIO);
+	pos = 0;
+	ssz = vfsub_read_k(file, &fdata, sizeof(fdata), &pos);
+	if (unlikely(ssz != sizeof(fdata))) {
+		AuIOErr("ssz %zd, %u, %pD2\n",
+			ssz, (unsigned int)sizeof(fdata), file);
+		goto out;
+	}
+
+	fdata.magic = ntohl((__force __be32)fdata.magic);
+	switch (fdata.magic) {
+	case AUFS_DRINFO_MAGIC_V1:
+		break;
+	default:
+		AuIOErr("magic-num 0x%x, 0x%x, %pD2\n",
+			fdata.magic, AUFS_DRINFO_MAGIC_V1, file);
+		goto out;
+	}
+
+	drinfo = &fdata.drinfo;
+	len = drinfo->oldnamelen;
+	if (!len) {
+		AuIOErr("broken drinfo %pD2\n", file);
+		goto out;
+	}
+
+	ret = NULL;
+	drinfo->ino = be64_to_cpu((__force __be64)drinfo->ino);
+	if (unlikely(h_ino && drinfo->ino != h_ino)) {
+		AuDbg("ignored i%llu, i%llu, %pD2\n",
+		      (unsigned long long)drinfo->ino,
+		      (unsigned long long)h_ino, file);
+		goto out; /* success */
+	}
+
+	ret = kmalloc(sizeof(*ret) + len, GFP_NOFS);
+	if (unlikely(!ret)) {
+		ret = ERR_PTR(-ENOMEM);
+		AuTraceErrPtr(ret);
+		goto out;
+	}
+
+	*ret = *drinfo;
+	ssz = vfsub_read_k(file, (void *)ret->oldname, len, &pos);
+	if (unlikely(ssz != len)) {
+		kfree(ret);
+		ret = ERR_PTR(-EIO);
+		AuIOErr("ssz %zd, %u, %pD2\n", ssz, len, file);
+		goto out;
+	}
+
+	AuDbg("oldname %.*s\n", ret->oldnamelen, ret->oldname);
+
+out:
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* in order to be revertible */
+struct au_drinfo_rev_elm {
+	int			created;
+	struct dentry		*info_dentry;
+	struct au_drinfo	*info_last;
+};
+
+struct au_drinfo_rev {
+	unsigned char			already;
+	aufs_bindex_t			nelm;
+	struct au_drinfo_rev_elm	elm[0];
+};
+
+/* todo: isn't it too large? */
+struct au_drinfo_store {
+	struct path h_ppath;
+	struct dentry *h_dentry;
+	struct au_drinfo_fdata *fdata;
+	char *infoname;			/* inside of whname, just after PFX */
+	char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ];
+	aufs_bindex_t btgt, btail;
+	unsigned char no_sio,
+		allocated,		/* current size of *fdata */
+		infonamelen,		/* room size for p */
+		whnamelen,		/* length of the genarated name */
+		renameback;		/* renamed back */
+};
+
+/* on rename(2) error, the caller should revert it using @elm */
+static int au_drinfo_do_store(struct au_drinfo_store *w,
+			      struct au_drinfo_rev_elm *elm)
+{
+	int err, len;
+	ssize_t ssz;
+	loff_t pos;
+	struct path infopath = {
+		.mnt = w->h_ppath.mnt
+	};
+	struct inode *h_dir, *h_inode, *delegated;
+	struct file *infofile;
+	struct qstr *qname;
+
+	AuDebugOn(elm
+		  && memcmp(elm, page_address(ZERO_PAGE(0)), sizeof(*elm)));
+
+	infopath.dentry = vfsub_lookup_one_len(w->whname, w->h_ppath.dentry,
+					       w->whnamelen);
+	AuTraceErrPtr(infopath.dentry);
+	if (IS_ERR(infopath.dentry)) {
+		err = PTR_ERR(infopath.dentry);
+		goto out;
+	}
+
+	err = 0;
+	h_dir = d_inode(w->h_ppath.dentry);
+	if (elm && d_is_negative(infopath.dentry)) {
+		err = vfsub_create(h_dir, &infopath, 0600, /*want_excl*/true);
+		AuTraceErr(err);
+		if (unlikely(err))
+			goto out_dput;
+		elm->created = 1;
+		elm->info_dentry = dget(infopath.dentry);
+	}
+
+	infofile = vfsub_dentry_open(&infopath, O_RDWR);
+	AuTraceErrPtr(infofile);
+	if (IS_ERR(infofile)) {
+		err = PTR_ERR(infofile);
+		goto out_dput;
+	}
+
+	h_inode = d_inode(infopath.dentry);
+	if (elm && i_size_read(h_inode)) {
+		h_inode = d_inode(w->h_dentry);
+		elm->info_last = au_drinfo_read_k(infofile, h_inode->i_ino);
+		AuTraceErrPtr(elm->info_last);
+		if (IS_ERR(elm->info_last)) {
+			err = PTR_ERR(elm->info_last);
+			elm->info_last = NULL;
+			AuDebugOn(elm->info_dentry);
+			goto out_fput;
+		}
+	}
+
+	if (elm && w->renameback) {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &infopath, &delegated, /*force*/0);
+		AuTraceErr(err);
+		if (unlikely(err == -EWOULDBLOCK))
+			iput(delegated);
+		goto out_fput;
+	}
+
+	pos = 0;
+	qname = &w->h_dentry->d_name;
+	len = sizeof(*w->fdata) + qname->len;
+	if (!elm)
+		len = sizeof(*w->fdata) + w->fdata->drinfo.oldnamelen;
+	ssz = vfsub_write_k(infofile, w->fdata, len, &pos);
+	if (ssz == len) {
+		AuDbg("hi%llu, %.*s\n", w->fdata->drinfo.ino,
+		      w->fdata->drinfo.oldnamelen, w->fdata->drinfo.oldname);
+		goto out_fput; /* success */
+	} else {
+		err = -EIO;
+		if (ssz < 0)
+			err = ssz;
+		/* the caller should revert it using @elm */
+	}
+
+out_fput:
+	fput(infofile);
+out_dput:
+	dput(infopath.dentry);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+struct au_call_drinfo_do_store_args {
+	int *errp;
+	struct au_drinfo_store *w;
+	struct au_drinfo_rev_elm *elm;
+};
+
+static void au_call_drinfo_do_store(void *args)
+{
+	struct au_call_drinfo_do_store_args *a = args;
+
+	*a->errp = au_drinfo_do_store(a->w, a->elm);
+}
+
+static int au_drinfo_store_sio(struct au_drinfo_store *w,
+			       struct au_drinfo_rev_elm *elm)
+{
+	int err, wkq_err;
+
+	if (w->no_sio)
+		err = au_drinfo_do_store(w, elm);
+	else {
+		struct au_call_drinfo_do_store_args a = {
+			.errp	= &err,
+			.w	= w,
+			.elm	= elm
+		};
+		wkq_err = au_wkq_wait(au_call_drinfo_do_store, &a);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+	AuTraceErr(err);
+
+	return err;
+}
+
+static int au_drinfo_store_work_init(struct au_drinfo_store *w,
+				     aufs_bindex_t btgt)
+{
+	int err;
+
+	memset(w, 0, sizeof(*w));
+	w->allocated = roundup_pow_of_two(sizeof(*w->fdata) + 40);
+	strcpy(w->whname, AUFS_WH_DR_INFO_PFX);
+	w->infoname = w->whname + sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+	w->infonamelen = sizeof(w->whname) - sizeof(AUFS_WH_DR_INFO_PFX);
+	w->btgt = btgt;
+	w->no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
+
+	err = -ENOMEM;
+	w->fdata = kcalloc(1, w->allocated, GFP_NOFS);
+	if (unlikely(!w->fdata)) {
+		AuTraceErr(err);
+		goto out;
+	}
+	w->fdata->magic = (__force uint32_t)htonl(AUFS_DRINFO_MAGIC_V1);
+	err = 0;
+
+out:
+	return err;
+}
+
+static void au_drinfo_store_work_fin(struct au_drinfo_store *w)
+{
+	kfree(w->fdata);
+}
+
+static void au_drinfo_store_rev(struct au_drinfo_rev *rev,
+				struct au_drinfo_store *w)
+{
+	struct au_drinfo_rev_elm *elm;
+	struct inode *h_dir, *delegated;
+	int err, nelm;
+	struct path infopath = {
+		.mnt = w->h_ppath.mnt
+	};
+
+	h_dir = d_inode(w->h_ppath.dentry);
+	IMustLock(h_dir);
+
+	err = 0;
+	elm = rev->elm;
+	for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
+		AuDebugOn(elm->created && elm->info_last);
+		if (elm->created) {
+			AuDbg("here\n");
+			delegated = NULL;
+			infopath.dentry = elm->info_dentry;
+			err = vfsub_unlink(h_dir, &infopath, &delegated,
+					   !w->no_sio);
+			AuTraceErr(err);
+			if (unlikely(err == -EWOULDBLOCK))
+				iput(delegated);
+			dput(elm->info_dentry);
+		} else if (elm->info_last) {
+			AuDbg("here\n");
+			w->fdata->drinfo = *elm->info_last;
+			memcpy(w->fdata->drinfo.oldname,
+			       elm->info_last->oldname,
+			       elm->info_last->oldnamelen);
+			err = au_drinfo_store_sio(w, /*elm*/NULL);
+			kfree(elm->info_last);
+		}
+		if (unlikely(err))
+			AuIOErr("%d, %s\n", err, w->whname);
+		/* go on even if err */
+	}
+}
+
+/* caller has to call au_dr_rename_fin() later */
+static int au_drinfo_store(struct dentry *dentry, aufs_bindex_t btgt,
+			   struct qstr *dst_name, void *_rev)
+{
+	int err, sz, nelm;
+	aufs_bindex_t bindex, btail;
+	struct au_drinfo_store work;
+	struct au_drinfo_rev *rev, **p;
+	struct au_drinfo_rev_elm *elm;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_hinode *hdir;
+
+	err = au_drinfo_store_work_init(&work, btgt);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOMEM;
+	btail = au_dbtaildir(dentry);
+	nelm = btail - btgt;
+	sz = sizeof(*rev) + sizeof(*elm) * nelm;
+	rev = kcalloc(1, sz, GFP_NOFS);
+	if (unlikely(!rev)) {
+		AuTraceErr(err);
+		goto out_args;
+	}
+	rev->nelm = nelm;
+	elm = rev->elm;
+	p = _rev;
+	*p = rev;
+
+	err = 0;
+	sb = dentry->d_sb;
+	work.h_ppath.dentry = au_h_dptr(dentry, btgt);
+	work.h_ppath.mnt = au_sbr_mnt(sb, btgt);
+	hdir = au_hi(d_inode(dentry), btgt);
+	au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
+	for (bindex = btgt + 1; bindex <= btail; bindex++, elm++) {
+		work.h_dentry = au_h_dptr(dentry, bindex);
+		if (!work.h_dentry)
+			continue;
+
+		err = au_drinfo_construct(&work.fdata, work.h_dentry,
+					  &work.allocated);
+		AuTraceErr(err);
+		if (unlikely(err))
+			break;
+
+		work.renameback = au_qstreq(&work.h_dentry->d_name, dst_name);
+		br = au_sbr(sb, bindex);
+		work.whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+		work.whnamelen += au_drinfo_name(br, work.infoname,
+						 work.infonamelen);
+		AuDbg("whname %.*s, i%llu, %.*s\n",
+		      work.whnamelen, work.whname,
+		      be64_to_cpu((__force __be64)work.fdata->drinfo.ino),
+		      work.fdata->drinfo.oldnamelen,
+		      work.fdata->drinfo.oldname);
+
+		err = au_drinfo_store_sio(&work, elm);
+		AuTraceErr(err);
+		if (unlikely(err))
+			break;
+	}
+	if (unlikely(err)) {
+		/* revert all drinfo */
+		au_drinfo_store_rev(rev, &work);
+		kfree(rev);
+		*p = NULL;
+	}
+	au_hn_inode_unlock(hdir);
+
+out_args:
+	au_drinfo_store_work_fin(&work);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
+		 struct qstr *dst_name, void *_rev)
+{
+	int err, already;
+	ino_t ino;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_dr_br *dr;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_dr_hino *ent;
+	struct au_drinfo_rev *rev, **p;
+
+	AuDbg("bindex %d\n", bindex);
+
+	err = -ENOMEM;
+	ent = kmalloc(sizeof(*ent), GFP_NOFS);
+	if (unlikely(!ent))
+		goto out;
+
+	sb = src->d_sb;
+	br = au_sbr(sb, bindex);
+	dr = &br->br_dirren;
+	h_dentry = au_h_dptr(src, bindex);
+	h_inode = d_inode(h_dentry);
+	ino = h_inode->i_ino;
+	ent->dr_h_ino = ino;
+	already = au_dr_hino_test_add(dr, ino, ent);
+	AuDbg("b%d, hi%llu, already %d\n",
+	      bindex, (unsigned long long)ino, already);
+
+	err = au_drinfo_store(src, bindex, dst_name, _rev);
+	AuTraceErr(err);
+	if (!err) {
+		p = _rev;
+		rev = *p;
+		rev->already = already;
+		goto out; /* success */
+	}
+
+	/* revert */
+	if (!already)
+		au_dr_hino_del(dr, ent);
+	kfree(ent);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *_rev)
+{
+	struct au_drinfo_rev *rev;
+	struct au_drinfo_rev_elm *elm;
+	int nelm;
+
+	rev = _rev;
+	elm = rev->elm;
+	for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
+		dput(elm->info_dentry);
+		kfree(elm->info_last);
+	}
+	kfree(rev);
+}
+
+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t btgt, void *_rev)
+{
+	int err;
+	struct au_drinfo_store work;
+	struct au_drinfo_rev *rev = _rev;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct inode *h_inode;
+	struct au_dr_br *dr;
+	struct au_dr_hino *ent;
+
+	err = au_drinfo_store_work_init(&work, btgt);
+	if (unlikely(err))
+		goto out;
+
+	sb = src->d_sb;
+	br = au_sbr(sb, btgt);
+	work.h_ppath.dentry = au_h_dptr(src, btgt);
+	work.h_ppath.mnt = au_br_mnt(br);
+	au_drinfo_store_rev(rev, &work);
+	au_drinfo_store_work_fin(&work);
+	if (rev->already)
+		goto out;
+
+	dr = &br->br_dirren;
+	h_inode = d_inode(work.h_ppath.dentry);
+	ent = au_dr_hino_find(dr, h_inode->i_ino);
+	BUG_ON(!ent);
+	au_dr_hino_del(dr, ent);
+	kfree(ent);
+
+out:
+	kfree(rev);
+	if (unlikely(err))
+		pr_err("failed to remove dirren info\n");
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct au_drinfo *au_drinfo_do_load(struct path *h_ppath,
+					   char *whname, int whnamelen,
+					   struct dentry **info_dentry)
+{
+	struct au_drinfo *drinfo;
+	struct file *f;
+	struct inode *h_dir;
+	struct path infopath;
+	int unlocked;
+
+	AuDbg("%pd/%.*s\n", h_ppath->dentry, whnamelen, whname);
+
+	*info_dentry = NULL;
+	drinfo = NULL;
+	unlocked = 0;
+	h_dir = d_inode(h_ppath->dentry);
+	inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+	infopath.dentry = vfsub_lookup_one_len(whname, h_ppath->dentry,
+					       whnamelen);
+	if (IS_ERR(infopath.dentry)) {
+		drinfo = (void *)infopath.dentry;
+		goto out;
+	}
+
+	if (d_is_negative(infopath.dentry))
+		goto out_dput; /* success */
+
+	infopath.mnt = h_ppath->mnt;
+	f = vfsub_dentry_open(&infopath, O_RDONLY);
+	inode_unlock_shared(h_dir);
+	unlocked = 1;
+	if (IS_ERR(f)) {
+		drinfo = (void *)f;
+		goto out_dput;
+	}
+
+	drinfo = au_drinfo_read_k(f, /*h_ino*/0);
+	if (IS_ERR_OR_NULL(drinfo))
+		goto out_fput;
+
+	AuDbg("oldname %.*s\n", drinfo->oldnamelen, drinfo->oldname);
+	*info_dentry = dget(infopath.dentry); /* keep it alive */
+
+out_fput:
+	fput(f);
+out_dput:
+	dput(infopath.dentry);
+out:
+	if (!unlocked)
+		inode_unlock_shared(h_dir);
+	AuTraceErrPtr(drinfo);
+	return drinfo;
+}
+
+struct au_drinfo_do_load_args {
+	struct au_drinfo **drinfop;
+	struct path *h_ppath;
+	char *whname;
+	int whnamelen;
+	struct dentry **info_dentry;
+};
+
+static void au_call_drinfo_do_load(void *args)
+{
+	struct au_drinfo_do_load_args *a = args;
+
+	*a->drinfop = au_drinfo_do_load(a->h_ppath, a->whname, a->whnamelen,
+					a->info_dentry);
+}
+
+struct au_drinfo_load {
+	struct path h_ppath;
+	struct qstr *qname;
+	unsigned char no_sio;
+
+	aufs_bindex_t ninfo;
+	struct au_drinfo **drinfo;
+};
+
+static int au_drinfo_load(struct au_drinfo_load *w, aufs_bindex_t bindex,
+			  struct au_branch *br)
+{
+	int err, wkq_err, whnamelen, e;
+	char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ]
+		= AUFS_WH_DR_INFO_PFX;
+	struct au_drinfo *drinfo;
+	struct qstr oldname;
+	struct inode *h_dir, *delegated;
+	struct dentry *info_dentry;
+	struct path infopath;
+
+	whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+	whnamelen += au_drinfo_name(br, whname + whnamelen,
+				    sizeof(whname) - whnamelen);
+	if (w->no_sio)
+		drinfo = au_drinfo_do_load(&w->h_ppath, whname, whnamelen,
+					   &info_dentry);
+	else {
+		struct au_drinfo_do_load_args args = {
+			.drinfop	= &drinfo,
+			.h_ppath	= &w->h_ppath,
+			.whname		= whname,
+			.whnamelen	= whnamelen,
+			.info_dentry	= &info_dentry
+		};
+		wkq_err = au_wkq_wait(au_call_drinfo_do_load, &args);
+		if (unlikely(wkq_err))
+			drinfo = ERR_PTR(wkq_err);
+	}
+	err = PTR_ERR(drinfo);
+	if (IS_ERR_OR_NULL(drinfo))
+		goto out;
+
+	err = 0;
+	oldname.len = drinfo->oldnamelen;
+	oldname.name = drinfo->oldname;
+	if (au_qstreq(w->qname, &oldname)) {
+		/* the name is renamed back */
+		kfree(drinfo);
+		drinfo = NULL;
+
+		infopath.dentry = info_dentry;
+		infopath.mnt = w->h_ppath.mnt;
+		h_dir = d_inode(w->h_ppath.dentry);
+		delegated = NULL;
+		inode_lock_nested(h_dir, AuLsc_I_PARENT);
+		e = vfsub_unlink(h_dir, &infopath, &delegated, !w->no_sio);
+		inode_unlock(h_dir);
+		if (unlikely(e))
+			AuIOErr("ignored %d, %pd2\n", e, &infopath.dentry);
+		if (unlikely(e == -EWOULDBLOCK))
+			iput(delegated);
+	}
+	kfree(w->drinfo[bindex]);
+	w->drinfo[bindex] = drinfo;
+	dput(info_dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_dr_lkup_free(struct au_drinfo **drinfo, int n)
+{
+	struct au_drinfo **p = drinfo;
+
+	while (n-- > 0)
+		kfree(*drinfo++);
+	kfree(p);
+}
+
+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
+	       aufs_bindex_t btgt)
+{
+	int err, ninfo;
+	struct au_drinfo_load w;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+	struct inode *h_dir;
+	struct au_dr_hino *ent;
+	struct super_block *sb;
+
+	AuDbg("%.*s, name %.*s, whname %.*s, b%d\n",
+	      AuLNPair(&dentry->d_name), AuLNPair(&lkup->dirren.dr_name),
+	      AuLNPair(&lkup->whname), btgt);
+
+	sb = dentry->d_sb;
+	bbot = au_sbbot(sb);
+	w.ninfo = bbot + 1;
+	if (!lkup->dirren.drinfo) {
+		lkup->dirren.drinfo = kcalloc(w.ninfo,
+					      sizeof(*lkup->dirren.drinfo),
+					      GFP_NOFS);
+		if (unlikely(!lkup->dirren.drinfo)) {
+			err = -ENOMEM;
+			goto out;
+		}
+		lkup->dirren.ninfo = w.ninfo;
+	}
+	w.drinfo = lkup->dirren.drinfo;
+	w.no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
+	w.h_ppath.dentry = au_h_dptr(dentry, btgt);
+	AuDebugOn(!w.h_ppath.dentry);
+	w.h_ppath.mnt = au_sbr_mnt(sb, btgt);
+	w.qname = &dentry->d_name;
+
+	ninfo = 0;
+	for (bindex = btgt + 1; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_drinfo_load(&w, bindex, br);
+		if (unlikely(err))
+			goto out_free;
+		if (w.drinfo[bindex])
+			ninfo++;
+	}
+	if (!ninfo) {
+		br = au_sbr(sb, btgt);
+		h_dir = d_inode(w.h_ppath.dentry);
+		ent = au_dr_hino_find(&br->br_dirren, h_dir->i_ino);
+		AuDebugOn(!ent);
+		au_dr_hino_del(&br->br_dirren, ent);
+		kfree(ent);
+	}
+	goto out; /* success */
+
+out_free:
+	au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
+	lkup->dirren.ninfo = 0;
+	lkup->dirren.drinfo = NULL;
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_dr_lkup_fin(struct au_do_lookup_args *lkup)
+{
+	au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
+}
+
+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt)
+{
+	int err;
+	struct au_drinfo *drinfo;
+
+	err = 0;
+	if (!lkup->dirren.drinfo)
+		goto out;
+	AuDebugOn(lkup->dirren.ninfo < btgt + 1);
+	drinfo = lkup->dirren.drinfo[btgt + 1];
+	if (!drinfo)
+		goto out;
+
+	kfree(lkup->whname.name);
+	lkup->whname.name = NULL;
+	lkup->dirren.dr_name.len = drinfo->oldnamelen;
+	lkup->dirren.dr_name.name = drinfo->oldname;
+	lkup->name = &lkup->dirren.dr_name;
+	err = au_wh_name_alloc(&lkup->whname, lkup->name);
+	if (!err)
+		AuDbg("name %.*s, whname %.*s, b%d\n",
+		      AuLNPair(lkup->name), AuLNPair(&lkup->whname),
+		      btgt);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
+		     ino_t h_ino)
+{
+	int match;
+	struct au_drinfo *drinfo;
+
+	match = 1;
+	if (!lkup->dirren.drinfo)
+		goto out;
+	AuDebugOn(lkup->dirren.ninfo < bindex + 1);
+	drinfo = lkup->dirren.drinfo[bindex + 1];
+	if (!drinfo)
+		goto out;
+
+	match = (drinfo->ino == h_ino);
+	AuDbg("match %d\n", match);
+
+out:
+	return match;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dr_opt_set(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_dr_hino(sb, bindex, /*br*/NULL, &br->br_path);
+	}
+
+	return err;
+}
+
+int au_dr_opt_flush(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_writable(br->br_perm))
+			err = au_dr_hino(sb, bindex, /*br*/NULL, /*path*/NULL);
+	}
+
+	return err;
+}
+
+int au_dr_opt_clr(struct super_block *sb, int no_flush)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	if (!no_flush) {
+		err = au_dr_opt_flush(sb);
+		if (unlikely(err))
+			goto out;
+	}
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		au_dr_hino_free(&br->br_dirren);
+	}
+
+out:
+	return err;
+}
diff --git a/fs/aufs/dirren.h b/fs/aufs/dirren.h
new file mode 100644
index 000000000..847480eeb
--- /dev/null
+++ b/fs/aufs/dirren.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * renamed dir info
+ */
+
+#ifndef __AUFS_DIRREN_H__
+#define __AUFS_DIRREN_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include <linux/statfs.h>
+#include <linux/uuid.h>
+#include "hbl.h"
+
+#define AuDirren_NHASH 100
+
+#ifdef CONFIG_AUFS_DIRREN
+enum au_brid_type {
+	AuBrid_Unset,
+	AuBrid_UUID,
+	AuBrid_FSID,
+	AuBrid_DEV
+};
+
+struct au_dr_brid {
+	enum au_brid_type	type;
+	union {
+		uuid_t	uuid;	/* unimplemented yet */
+		fsid_t	fsid;
+		dev_t	dev;
+	};
+};
+
+/* 20 is the max digits length of ulong 64 */
+/* brid-type "_" uuid "_" inum */
+#define AUFS_DIRREN_FNAME_SZ	(1 + 1 + UUID_STRING_LEN + 20)
+#define AUFS_DIRREN_ENV_VAL_SZ	(AUFS_DIRREN_FNAME_SZ + 1 + 20)
+
+struct au_dr_hino {
+	struct hlist_bl_node	dr_hnode;
+	ino_t			dr_h_ino;
+};
+
+struct au_dr_br {
+	struct hlist_bl_head	dr_h_ino[AuDirren_NHASH];
+	struct au_dr_brid	dr_brid;
+};
+
+struct au_dr_lookup {
+	/* dr_name is pointed by struct au_do_lookup_args.name */
+	struct qstr		dr_name; /* subset of dr_info */
+	aufs_bindex_t		ninfo;
+	struct au_drinfo	**drinfo;
+};
+#else
+struct au_dr_hino;
+/* empty */
+struct au_dr_br { };
+struct au_dr_lookup { };
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_branch;
+struct au_do_lookup_args;
+struct au_hinode;
+#ifdef CONFIG_AUFS_DIRREN
+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t h_ino,
+			struct au_dr_hino *add_ent);
+void au_dr_hino_free(struct au_dr_br *dr);
+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
+		  const struct path *path);
+int au_dr_br_fin(struct super_block *sb, struct au_branch *br);
+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
+		 struct qstr *dst_name, void *_rev);
+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *rev);
+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t bindex, void *rev);
+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
+	       aufs_bindex_t bindex);
+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
+		     ino_t h_ino);
+void au_dr_lkup_fin(struct au_do_lookup_args *lkup);
+int au_dr_opt_set(struct super_block *sb);
+int au_dr_opt_flush(struct super_block *sb);
+int au_dr_opt_clr(struct super_block *sb, int no_flush);
+#else
+AuStubInt0(au_dr_hino_test_add, struct au_dr_br *dr, ino_t h_ino,
+	   struct au_dr_hino *add_ent);
+AuStubVoid(au_dr_hino_free, struct au_dr_br *dr);
+AuStubInt0(au_dr_br_init, struct super_block *sb, struct au_branch *br,
+	   const struct path *path);
+AuStubInt0(au_dr_br_fin, struct super_block *sb, struct au_branch *br);
+AuStubInt0(au_dr_rename, struct dentry *src, aufs_bindex_t bindex,
+	   struct qstr *dst_name, void *_rev);
+AuStubVoid(au_dr_rename_fin, struct dentry *src, aufs_bindex_t btgt, void *rev);
+AuStubVoid(au_dr_rename_rev, struct dentry *src, aufs_bindex_t bindex,
+	   void *rev);
+AuStubInt0(au_dr_lkup, struct au_do_lookup_args *lkup, struct dentry *dentry,
+	   aufs_bindex_t bindex);
+AuStubInt0(au_dr_lkup_name, struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
+AuStubInt0(au_dr_lkup_h_ino, struct au_do_lookup_args *lkup,
+	   aufs_bindex_t bindex, ino_t h_ino);
+AuStubVoid(au_dr_lkup_fin, struct au_do_lookup_args *lkup);
+AuStubInt0(au_dr_opt_set, struct super_block *sb);
+AuStubInt0(au_dr_opt_flush, struct super_block *sb);
+AuStubInt0(au_dr_opt_clr, struct super_block *sb, int no_flush);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_DIRREN
+static inline int au_dr_ihash(ino_t h_ino)
+{
+	return h_ino % AuDirren_NHASH;
+}
+#else
+AuStubInt0(au_dr_ihash, ino_t h_ino);
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DIRREN_H__ */
diff --git a/fs/aufs/dynop.c b/fs/aufs/dynop.c
new file mode 100644
index 000000000..d38dd77a7
--- /dev/null
+++ b/fs/aufs/dynop.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dynamically customizable operations for regular files
+ */
+
+#include "aufs.h"
+
+#define DyPrSym(key)	AuDbgSym(key->dk_op.dy_hop)
+
+/*
+ * How large will these lists be?
+ * Usually just a few elements, 20-30 at most for each, I guess.
+ */
+static struct hlist_bl_head dynop[AuDyLast];
+
+static struct au_dykey *dy_gfind_get(struct hlist_bl_head *hbl,
+				     const void *h_op)
+{
+	struct au_dykey *key, *tmp;
+	struct hlist_bl_node *pos;
+
+	key = NULL;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
+		if (tmp->dk_op.dy_hop == h_op) {
+			key = tmp;
+			kref_get(&key->dk_kref);
+			break;
+		}
+	hlist_bl_unlock(hbl);
+
+	return key;
+}
+
+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
+{
+	struct au_dykey **k, *found;
+	const void *h_op = key->dk_op.dy_hop;
+	int i;
+
+	found = NULL;
+	k = br->br_dykey;
+	for (i = 0; i < AuBrDynOp; i++)
+		if (k[i]) {
+			if (k[i]->dk_op.dy_hop == h_op) {
+				found = k[i];
+				break;
+			}
+		} else
+			break;
+	if (!found) {
+		spin_lock(&br->br_dykey_lock);
+		for (; i < AuBrDynOp; i++)
+			if (k[i]) {
+				if (k[i]->dk_op.dy_hop == h_op) {
+					found = k[i];
+					break;
+				}
+			} else {
+				k[i] = key;
+				break;
+			}
+		spin_unlock(&br->br_dykey_lock);
+		BUG_ON(i == AuBrDynOp); /* expand the array */
+	}
+
+	return found;
+}
+
+/* kref_get() if @key is already added */
+static struct au_dykey *dy_gadd(struct hlist_bl_head *hbl, struct au_dykey *key)
+{
+	struct au_dykey *tmp, *found;
+	struct hlist_bl_node *pos;
+	const void *h_op = key->dk_op.dy_hop;
+
+	found = NULL;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
+		if (tmp->dk_op.dy_hop == h_op) {
+			kref_get(&tmp->dk_kref);
+			found = tmp;
+			break;
+		}
+	if (!found)
+		hlist_bl_add_head(&key->dk_hnode, hbl);
+	hlist_bl_unlock(hbl);
+
+	if (!found)
+		DyPrSym(key);
+	return found;
+}
+
+static void dy_free_rcu(struct rcu_head *rcu)
+{
+	struct au_dykey *key;
+
+	key = container_of(rcu, struct au_dykey, dk_rcu);
+	DyPrSym(key);
+	kfree(key);
+}
+
+static void dy_free(struct kref *kref)
+{
+	struct au_dykey *key;
+	struct hlist_bl_head *hbl;
+
+	key = container_of(kref, struct au_dykey, dk_kref);
+	hbl = dynop + key->dk_op.dy_type;
+	au_hbl_del(&key->dk_hnode, hbl);
+	call_rcu(&key->dk_rcu, dy_free_rcu);
+}
+
+void au_dy_put(struct au_dykey *key)
+{
+	kref_put(&key->dk_kref, dy_free);
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define DyDbgSize(cnt, op)	AuDebugOn(cnt != sizeof(op)/sizeof(void *))
+
+#ifdef CONFIG_AUFS_DEBUG
+#define DyDbgDeclare(cnt)	unsigned int cnt = 0
+#define DyDbgInc(cnt)		do { cnt++; } while (0)
+#else
+#define DyDbgDeclare(cnt)	do {} while (0)
+#define DyDbgInc(cnt)		do {} while (0)
+#endif
+
+#define DySet(func, dst, src, h_op, h_sb) do {				\
+	DyDbgInc(cnt);							\
+	if (h_op->func) {						\
+		if (src.func)						\
+			dst.func = src.func;				\
+		else							\
+			AuDbg("%s %s\n", au_sbtype(h_sb), #func);	\
+	}								\
+} while (0)
+
+#define DySetForce(func, dst, src) do {		\
+	AuDebugOn(!src.func);			\
+	DyDbgInc(cnt);				\
+	dst.func = src.func;			\
+} while (0)
+
+#define DySetAop(func) \
+	DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
+#define DySetAopForce(func) \
+	DySetForce(func, dyaop->da_op, aufs_aop)
+
+static void dy_aop(struct au_dykey *key, const void *h_op,
+		   struct super_block *h_sb __maybe_unused)
+{
+	struct au_dyaop *dyaop = (void *)key;
+	const struct address_space_operations *h_aop = h_op;
+	DyDbgDeclare(cnt);
+
+	AuDbg("%s\n", au_sbtype(h_sb));
+
+	DySetAop(writepage);
+	DySetAopForce(readpage);	/* force */
+	DySetAop(writepages);
+	DySetAop(set_page_dirty);
+	DySetAop(readpages);
+	DySetAop(write_begin);
+	DySetAop(write_end);
+	DySetAop(bmap);
+	DySetAop(invalidatepage);
+	DySetAop(releasepage);
+	DySetAop(freepage);
+	/* this one will be changed according to an aufs mount option */
+	DySetAop(direct_IO);
+	DySetAop(migratepage);
+	DySetAop(isolate_page);
+	DySetAop(putback_page);
+	DySetAop(launder_page);
+	DySetAop(is_partially_uptodate);
+	DySetAop(is_dirty_writeback);
+	DySetAop(error_remove_page);
+	DySetAop(swap_activate);
+	DySetAop(swap_deactivate);
+
+	DyDbgSize(cnt, *h_aop);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void dy_bug(struct kref *kref)
+{
+	BUG();
+}
+
+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
+{
+	struct au_dykey *key, *old;
+	struct hlist_bl_head *hbl;
+	struct op {
+		unsigned int sz;
+		void (*set)(struct au_dykey *key, const void *h_op,
+			    struct super_block *h_sb __maybe_unused);
+	};
+	static const struct op a[] = {
+		[AuDy_AOP] = {
+			.sz	= sizeof(struct au_dyaop),
+			.set	= dy_aop
+		}
+	};
+	const struct op *p;
+
+	hbl = dynop + op->dy_type;
+	key = dy_gfind_get(hbl, op->dy_hop);
+	if (key)
+		goto out_add; /* success */
+
+	p = a + op->dy_type;
+	key = kzalloc(p->sz, GFP_NOFS);
+	if (unlikely(!key)) {
+		key = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	key->dk_op.dy_hop = op->dy_hop;
+	kref_init(&key->dk_kref);
+	p->set(key, op->dy_hop, au_br_sb(br));
+	old = dy_gadd(hbl, key);
+	if (old) {
+		kfree(key);
+		key = old;
+	}
+
+out_add:
+	old = dy_bradd(br, key);
+	if (old)
+		/* its ref-count should never be zero here */
+		kref_put(&key->dk_kref, dy_bug);
+out:
+	return key;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
+ * See the aufs manual in detail.
+ */
+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
+{
+	if (!do_dx)
+		dyaop->da_op.direct_IO = NULL;
+	else
+		dyaop->da_op.direct_IO = aufs_aop.direct_IO;
+}
+
+static struct au_dyaop *dy_aget(struct au_branch *br,
+				const struct address_space_operations *h_aop,
+				int do_dx)
+{
+	struct au_dyaop *dyaop;
+	struct au_dynop op;
+
+	op.dy_type = AuDy_AOP;
+	op.dy_haop = h_aop;
+	dyaop = (void *)dy_get(&op, br);
+	if (IS_ERR(dyaop))
+		goto out;
+	dy_adx(dyaop, do_dx);
+
+out:
+	return dyaop;
+}
+
+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
+		struct inode *h_inode)
+{
+	int err, do_dx;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_dyaop *dyaop;
+
+	AuDebugOn(!S_ISREG(h_inode->i_mode));
+	IiMustWriteLock(inode);
+
+	sb = inode->i_sb;
+	br = au_sbr(sb, bindex);
+	do_dx = !!au_opt_test(au_mntflags(sb), DIO);
+	dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
+	err = PTR_ERR(dyaop);
+	if (IS_ERR(dyaop))
+		/* unnecessary to call dy_fput() */
+		goto out;
+
+	err = 0;
+	inode->i_mapping->a_ops = &dyaop->da_op;
+
+out:
+	return err;
+}
+
+/*
+ * Is it safe to replace a_ops during the inode/file is in operation?
+ * Yes, I hope so.
+ */
+int au_dy_irefresh(struct inode *inode)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct inode *h_inode;
+
+	err = 0;
+	if (S_ISREG(inode->i_mode)) {
+		btop = au_ibtop(inode);
+		h_inode = au_h_iptr(inode, btop);
+		err = au_dy_iaop(inode, btop, h_inode);
+	}
+	return err;
+}
+
+void au_dy_arefresh(int do_dx)
+{
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_dykey *key;
+
+	hbl = dynop + AuDy_AOP;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(key, pos, hbl, dk_hnode)
+		dy_adx((void *)key, do_dx);
+	hlist_bl_unlock(hbl);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void __init au_dy_init(void)
+{
+	int i;
+
+	/* make sure that 'struct au_dykey *' can be any type */
+	BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
+
+	for (i = 0; i < AuDyLast; i++)
+		INIT_HLIST_BL_HEAD(dynop + i);
+}
+
+void au_dy_fin(void)
+{
+	int i;
+
+	for (i = 0; i < AuDyLast; i++)
+		WARN_ON(!hlist_bl_empty(dynop + i));
+}
diff --git a/fs/aufs/dynop.h b/fs/aufs/dynop.h
new file mode 100644
index 000000000..81a770505
--- /dev/null
+++ b/fs/aufs/dynop.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dynamically customizable operations (for regular files only)
+ */
+
+#ifndef __AUFS_DYNOP_H__
+#define __AUFS_DYNOP_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/kref.h>
+
+enum {AuDy_AOP, AuDyLast};
+
+struct au_dynop {
+	int						dy_type;
+	union {
+		const void				*dy_hop;
+		const struct address_space_operations	*dy_haop;
+	};
+};
+
+struct au_dykey {
+	union {
+		struct hlist_bl_node	dk_hnode;
+		struct rcu_head		dk_rcu;
+	};
+	struct au_dynop		dk_op;
+
+	/*
+	 * during I am in the branch local array, kref is gotten. when the
+	 * branch is removed, kref is put.
+	 */
+	struct kref		dk_kref;
+};
+
+/* stop unioning since their sizes are very different from each other */
+struct au_dyaop {
+	struct au_dykey			da_key;
+	struct address_space_operations	da_op; /* not const */
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dynop.c */
+struct au_branch;
+void au_dy_put(struct au_dykey *key);
+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
+		struct inode *h_inode);
+int au_dy_irefresh(struct inode *inode);
+void au_dy_arefresh(int do_dio);
+
+void __init au_dy_init(void);
+void au_dy_fin(void);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DYNOP_H__ */
diff --git a/fs/aufs/export.c b/fs/aufs/export.c
new file mode 100644
index 000000000..155e07a8e
--- /dev/null
+++ b/fs/aufs/export.c
@@ -0,0 +1,836 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * export via nfs
+ */
+
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/nsproxy.h>
+#include <linux/random.h>
+#include <linux/writeback.h>
+#include "aufs.h"
+
+union conv {
+#ifdef CONFIG_AUFS_INO_T_64
+	__u32 a[2];
+#else
+	__u32 a[1];
+#endif
+	ino_t ino;
+};
+
+static ino_t decode_ino(__u32 *a)
+{
+	union conv u;
+
+	BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
+	u.a[0] = a[0];
+#ifdef CONFIG_AUFS_INO_T_64
+	u.a[1] = a[1];
+#endif
+	return u.ino;
+}
+
+static void encode_ino(__u32 *a, ino_t ino)
+{
+	union conv u;
+
+	u.ino = ino;
+	a[0] = u.a[0];
+#ifdef CONFIG_AUFS_INO_T_64
+	a[1] = u.a[1];
+#endif
+}
+
+/* NFS file handle */
+enum {
+	Fh_br_id,
+	Fh_sigen,
+#ifdef CONFIG_AUFS_INO_T_64
+	/* support 64bit inode number */
+	Fh_ino1,
+	Fh_ino2,
+	Fh_dir_ino1,
+	Fh_dir_ino2,
+#else
+	Fh_ino1,
+	Fh_dir_ino1,
+#endif
+	Fh_igen,
+	Fh_h_type,
+	Fh_tail,
+
+	Fh_ino = Fh_ino1,
+	Fh_dir_ino = Fh_dir_ino1
+};
+
+static int au_test_anon(struct dentry *dentry)
+{
+	/* note: read d_flags without d_lock */
+	return !!(dentry->d_flags & DCACHE_DISCONNECTED);
+}
+
+int au_test_nfsd(void)
+{
+	int ret;
+	struct task_struct *tsk = current;
+	char comm[sizeof(tsk->comm)];
+
+	ret = 0;
+	if (tsk->flags & PF_KTHREAD) {
+		get_task_comm(comm, tsk);
+		ret = !strcmp(comm, "nfsd");
+	}
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+/* inode generation external table */
+
+void au_xigen_inc(struct inode *inode)
+{
+	loff_t pos;
+	ssize_t sz;
+	__u32 igen;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	sb = inode->i_sb;
+	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+
+	sbinfo = au_sbi(sb);
+	pos = inode->i_ino;
+	pos *= sizeof(igen);
+	igen = inode->i_generation + 1;
+	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
+			 sizeof(igen), &pos);
+	if (sz == sizeof(igen))
+		return; /* success */
+
+	if (unlikely(sz >= 0))
+		AuIOErr("xigen error (%zd)\n", sz);
+}
+
+int au_xigen_new(struct inode *inode)
+{
+	int err;
+	loff_t pos;
+	ssize_t sz;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	err = 0;
+	/* todo: dirty, at mount time */
+	if (inode->i_ino == AUFS_ROOT_INO)
+		goto out;
+	sb = inode->i_sb;
+	SiMustAnyLock(sb);
+	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
+		goto out;
+
+	err = -EFBIG;
+	pos = inode->i_ino;
+	if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
+		AuIOErr1("too large i%lld\n", pos);
+		goto out;
+	}
+	pos *= sizeof(inode->i_generation);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	file = sbinfo->si_xigen;
+	BUG_ON(!file);
+
+	if (vfsub_f_size_read(file)
+	    < pos + sizeof(inode->i_generation)) {
+		inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
+		sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
+				 sizeof(inode->i_generation), &pos);
+	} else
+		sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
+				sizeof(inode->i_generation), &pos);
+	if (sz == sizeof(inode->i_generation))
+		goto out; /* success */
+
+	err = sz;
+	if (unlikely(sz >= 0)) {
+		err = -EIO;
+		AuIOErr("xigen error (%zd)\n", sz);
+	}
+
+out:
+	return err;
+}
+
+int au_xigen_set(struct super_block *sb, struct file *base)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	file = au_xino_create2(base, sbinfo->si_xigen);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	err = 0;
+	if (sbinfo->si_xigen)
+		fput(sbinfo->si_xigen);
+	sbinfo->si_xigen = file;
+
+out:
+	return err;
+}
+
+void au_xigen_clr(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	if (sbinfo->si_xigen) {
+		fput(sbinfo->si_xigen);
+		sbinfo->si_xigen = NULL;
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
+				    ino_t dir_ino)
+{
+	struct dentry *dentry, *d;
+	struct inode *inode;
+	unsigned int sigen;
+
+	dentry = NULL;
+	inode = ilookup(sb, ino);
+	if (!inode)
+		goto out;
+
+	dentry = ERR_PTR(-ESTALE);
+	sigen = au_sigen(sb);
+	if (unlikely(au_is_bad_inode(inode)
+		     || IS_DEADDIR(inode)
+		     || sigen != au_iigen(inode, NULL)))
+		goto out_iput;
+
+	dentry = NULL;
+	if (!dir_ino || S_ISDIR(inode->i_mode))
+		dentry = d_find_alias(inode);
+	else {
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
+			spin_lock(&d->d_lock);
+			if (!au_test_anon(d)
+			    && d_inode(d->d_parent)->i_ino == dir_ino) {
+				dentry = dget_dlock(d);
+				spin_unlock(&d->d_lock);
+				break;
+			}
+			spin_unlock(&d->d_lock);
+		}
+		spin_unlock(&inode->i_lock);
+	}
+	if (unlikely(dentry && au_digen_test(dentry, sigen))) {
+		/* need to refresh */
+		dput(dentry);
+		dentry = NULL;
+	}
+
+out_iput:
+	iput(inode);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: dirty? */
+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
+
+struct au_compare_mnt_args {
+	/* input */
+	struct super_block *sb;
+
+	/* output */
+	struct vfsmount *mnt;
+};
+
+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
+{
+	struct au_compare_mnt_args *a = arg;
+
+	if (mnt->mnt_sb != a->sb)
+		return 0;
+	a->mnt = mntget(mnt);
+	return 1;
+}
+
+static struct vfsmount *au_mnt_get(struct super_block *sb)
+{
+	int err;
+	struct path root;
+	struct au_compare_mnt_args args = {
+		.sb = sb
+	};
+
+	get_fs_root(current->fs, &root);
+	rcu_read_lock();
+	err = iterate_mounts(au_compare_mnt, &args, root.mnt);
+	rcu_read_unlock();
+	path_put(&root);
+	AuDebugOn(!err);
+	AuDebugOn(!args.mnt);
+	return args.mnt;
+}
+
+struct au_nfsd_si_lock {
+	unsigned int sigen;
+	aufs_bindex_t bindex, br_id;
+	unsigned char force_lock;
+};
+
+static int si_nfsd_read_lock(struct super_block *sb,
+			     struct au_nfsd_si_lock *nsi_lock)
+{
+	int err;
+	aufs_bindex_t bindex;
+
+	si_read_lock(sb, AuLock_FLUSH);
+
+	/* branch id may be wrapped around */
+	err = 0;
+	bindex = au_br_index(sb, nsi_lock->br_id);
+	if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
+		goto out; /* success */
+
+	err = -ESTALE;
+	bindex = -1;
+	if (!nsi_lock->force_lock)
+		si_read_unlock(sb);
+
+out:
+	nsi_lock->bindex = bindex;
+	return err;
+}
+
+struct find_name_by_ino {
+	struct dir_context ctx;
+	int called, found;
+	ino_t ino;
+	char *name;
+	int namelen;
+};
+
+static int
+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
+		 loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
+						  ctx);
+
+	a->called++;
+	if (a->ino != ino)
+		return 0;
+
+	memcpy(a->name, name, namelen);
+	a->namelen = namelen;
+	a->found = 1;
+	return 1;
+}
+
+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
+				     struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry, *parent;
+	struct file *file;
+	struct inode *dir;
+	struct find_name_by_ino arg = {
+		.ctx = {
+			.actor = find_name_by_ino
+		}
+	};
+	int err;
+
+	parent = path->dentry;
+	if (nsi_lock)
+		si_read_unlock(parent->d_sb);
+	file = vfsub_dentry_open(path, au_dir_roflags);
+	dentry = (void *)file;
+	if (IS_ERR(file))
+		goto out;
+
+	dentry = ERR_PTR(-ENOMEM);
+	arg.name = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!arg.name))
+		goto out_file;
+	arg.ino = ino;
+	arg.found = 0;
+	do {
+		arg.called = 0;
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(file, &arg.ctx);
+	} while (!err && !arg.found && arg.called);
+	dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_name;
+	/* instead of ENOENT */
+	dentry = ERR_PTR(-ESTALE);
+	if (!arg.found)
+		goto out_name;
+
+	/* do not call vfsub_lkup_one() */
+	dir = d_inode(parent);
+	dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
+	AuTraceErrPtr(dentry);
+	if (IS_ERR(dentry))
+		goto out_name;
+	AuDebugOn(au_test_anon(dentry));
+	if (unlikely(d_really_is_negative(dentry))) {
+		dput(dentry);
+		dentry = ERR_PTR(-ENOENT);
+	}
+
+out_name:
+	free_page((unsigned long)arg.name);
+out_file:
+	fput(file);
+out:
+	if (unlikely(nsi_lock
+		     && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
+		if (!IS_ERR(dentry)) {
+			dput(dentry);
+			dentry = ERR_PTR(-ESTALE);
+		}
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
+					ino_t dir_ino,
+					struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry;
+	struct path path;
+
+	if (dir_ino != AUFS_ROOT_INO) {
+		path.dentry = decode_by_ino(sb, dir_ino, 0);
+		dentry = path.dentry;
+		if (!path.dentry || IS_ERR(path.dentry))
+			goto out;
+		AuDebugOn(au_test_anon(path.dentry));
+	} else
+		path.dentry = dget(sb->s_root);
+
+	path.mnt = au_mnt_get(sb);
+	dentry = au_lkup_by_ino(&path, ino, nsi_lock);
+	path_put(&path);
+
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int h_acceptable(void *expv, struct dentry *dentry)
+{
+	return 1;
+}
+
+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
+			   char *buf, int len, struct super_block *sb)
+{
+	char *p;
+	int n;
+	struct path path;
+
+	p = d_path(h_rootpath, buf, len);
+	if (IS_ERR(p))
+		goto out;
+	n = strlen(p);
+
+	path.mnt = h_rootpath->mnt;
+	path.dentry = h_parent;
+	p = d_path(&path, buf, len);
+	if (IS_ERR(p))
+		goto out;
+	if (n != 1)
+		p += n;
+
+	path.mnt = au_mnt_get(sb);
+	path.dentry = sb->s_root;
+	p = d_path(&path, buf, len - strlen(p));
+	mntput(path.mnt);
+	if (IS_ERR(p))
+		goto out;
+	if (n != 1)
+		p[strlen(p)] = '/';
+
+out:
+	AuTraceErrPtr(p);
+	return p;
+}
+
+static
+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
+			      int fh_len, struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry, *h_parent, *root;
+	struct super_block *h_sb;
+	char *pathname, *p;
+	struct vfsmount *h_mnt;
+	struct au_branch *br;
+	int err;
+	struct path path;
+
+	br = au_sbr(sb, nsi_lock->bindex);
+	h_mnt = au_br_mnt(br);
+	h_sb = h_mnt->mnt_sb;
+	/* todo: call lower fh_to_dentry()? fh_to_parent()? */
+	lockdep_off();
+	h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
+				      fh_len - Fh_tail, fh[Fh_h_type],
+				      h_acceptable, /*context*/NULL);
+	lockdep_on();
+	dentry = h_parent;
+	if (unlikely(!h_parent || IS_ERR(h_parent))) {
+		AuWarn1("%s decode_fh failed, %ld\n",
+			au_sbtype(h_sb), PTR_ERR(h_parent));
+		goto out;
+	}
+	dentry = NULL;
+	if (unlikely(au_test_anon(h_parent))) {
+		AuWarn1("%s decode_fh returned a disconnected dentry\n",
+			au_sbtype(h_sb));
+		goto out_h_parent;
+	}
+
+	dentry = ERR_PTR(-ENOMEM);
+	pathname = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!pathname))
+		goto out_h_parent;
+
+	root = sb->s_root;
+	path.mnt = h_mnt;
+	di_read_lock_parent(root, !AuLock_IR);
+	path.dentry = au_h_dptr(root, nsi_lock->bindex);
+	di_read_unlock(root, !AuLock_IR);
+	p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
+	dentry = (void *)p;
+	if (IS_ERR(p))
+		goto out_pathname;
+
+	si_read_unlock(sb);
+	err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
+	dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_relock;
+
+	dentry = ERR_PTR(-ENOENT);
+	AuDebugOn(au_test_anon(path.dentry));
+	if (unlikely(d_really_is_negative(path.dentry)))
+		goto out_path;
+
+	if (ino != d_inode(path.dentry)->i_ino)
+		dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
+	else
+		dentry = dget(path.dentry);
+
+out_path:
+	path_put(&path);
+out_relock:
+	if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
+		if (!IS_ERR(dentry)) {
+			dput(dentry);
+			dentry = ERR_PTR(-ESTALE);
+		}
+out_pathname:
+	free_page((unsigned long)pathname);
+out_h_parent:
+	dput(h_parent);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *
+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
+		  int fh_type)
+{
+	struct dentry *dentry;
+	__u32 *fh = fid->raw;
+	struct au_branch *br;
+	ino_t ino, dir_ino;
+	struct au_nfsd_si_lock nsi_lock = {
+		.force_lock	= 0
+	};
+
+	dentry = ERR_PTR(-ESTALE);
+	/* it should never happen, but the file handle is unreliable */
+	if (unlikely(fh_len < Fh_tail))
+		goto out;
+	nsi_lock.sigen = fh[Fh_sigen];
+	nsi_lock.br_id = fh[Fh_br_id];
+
+	/* branch id may be wrapped around */
+	br = NULL;
+	if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
+		goto out;
+	nsi_lock.force_lock = 1;
+
+	/* is this inode still cached? */
+	ino = decode_ino(fh + Fh_ino);
+	/* it should never happen */
+	if (unlikely(ino == AUFS_ROOT_INO))
+		goto out_unlock;
+
+	dir_ino = decode_ino(fh + Fh_dir_ino);
+	dentry = decode_by_ino(sb, ino, dir_ino);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (dentry)
+		goto accept;
+
+	/* is the parent dir cached? */
+	br = au_sbr(sb, nsi_lock.bindex);
+	au_br_get(br);
+	dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (dentry)
+		goto accept;
+
+	/* lookup path */
+	dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (unlikely(!dentry))
+		/* todo?: make it ESTALE */
+		goto out_unlock;
+
+accept:
+	if (!au_digen_test(dentry, au_sigen(sb))
+	    && d_inode(dentry)->i_generation == fh[Fh_igen])
+		goto out_unlock; /* success */
+
+	dput(dentry);
+	dentry = ERR_PTR(-ESTALE);
+out_unlock:
+	if (br)
+		au_br_put(br);
+	si_read_unlock(sb);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+#if 0 /* reserved for future use */
+/* support subtreecheck option */
+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
+					int fh_len, int fh_type)
+{
+	struct dentry *parent;
+	__u32 *fh = fid->raw;
+	ino_t dir_ino;
+
+	dir_ino = decode_ino(fh + Fh_dir_ino);
+	parent = decode_by_ino(sb, dir_ino, 0);
+	if (IS_ERR(parent))
+		goto out;
+	if (!parent)
+		parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
+					dir_ino, fh, fh_len);
+
+out:
+	AuTraceErrPtr(parent);
+	return parent;
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
+			  struct inode *dir)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb, *h_sb;
+	struct dentry *dentry, *parent, *h_parent;
+	struct inode *h_dir;
+	struct au_branch *br;
+
+	err = -ENOSPC;
+	if (unlikely(*max_len <= Fh_tail)) {
+		AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
+		goto out;
+	}
+
+	err = FILEID_ROOT;
+	if (inode->i_ino == AUFS_ROOT_INO) {
+		AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
+		goto out;
+	}
+
+	h_parent = NULL;
+	sb = inode->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH);
+	if (unlikely(err))
+		goto out;
+
+#ifdef CONFIG_AUFS_DEBUG
+	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
+		AuWarn1("NFS-exporting requires xino\n");
+#endif
+	err = -EIO;
+	parent = NULL;
+	ii_read_lock_child(inode);
+	bindex = au_ibtop(inode);
+	if (!dir) {
+		dentry = d_find_any_alias(inode);
+		if (unlikely(!dentry))
+			goto out_unlock;
+		AuDebugOn(au_test_anon(dentry));
+		parent = dget_parent(dentry);
+		dput(dentry);
+		if (unlikely(!parent))
+			goto out_unlock;
+		if (d_really_is_positive(parent))
+			dir = d_inode(parent);
+	}
+
+	ii_read_lock_parent(dir);
+	h_dir = au_h_iptr(dir, bindex);
+	ii_read_unlock(dir);
+	if (unlikely(!h_dir))
+		goto out_parent;
+	h_parent = d_find_any_alias(h_dir);
+	if (unlikely(!h_parent))
+		goto out_hparent;
+
+	err = -EPERM;
+	br = au_sbr(sb, bindex);
+	h_sb = au_br_sb(br);
+	if (unlikely(!h_sb->s_export_op)) {
+		AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
+		goto out_hparent;
+	}
+
+	fh[Fh_br_id] = br->br_id;
+	fh[Fh_sigen] = au_sigen(sb);
+	encode_ino(fh + Fh_ino, inode->i_ino);
+	encode_ino(fh + Fh_dir_ino, dir->i_ino);
+	fh[Fh_igen] = inode->i_generation;
+
+	*max_len -= Fh_tail;
+	fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
+					   max_len,
+					   /*connectable or subtreecheck*/0);
+	err = fh[Fh_h_type];
+	*max_len += Fh_tail;
+	/* todo: macros? */
+	if (err != FILEID_INVALID)
+		err = 99;
+	else
+		AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
+
+out_hparent:
+	dput(h_parent);
+out_parent:
+	dput(parent);
+out_unlock:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+out:
+	if (unlikely(err < 0))
+		err = FILEID_INVALID;
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_commit_metadata(struct inode *inode)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct inode *h_inode;
+	int (*f)(struct inode *inode);
+
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+	ii_write_lock_child(inode);
+	bindex = au_ibtop(inode);
+	AuDebugOn(bindex < 0);
+	h_inode = au_h_iptr(inode, bindex);
+
+	f = h_inode->i_sb->s_export_op->commit_metadata;
+	if (f)
+		err = f(h_inode);
+	else {
+		struct writeback_control wbc = {
+			.sync_mode	= WB_SYNC_ALL,
+			.nr_to_write	= 0 /* metadata only */
+		};
+
+		err = sync_inode(h_inode, &wbc);
+	}
+
+	au_cpup_attr_timesizes(inode);
+	ii_write_unlock(inode);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct export_operations aufs_export_op = {
+	.fh_to_dentry		= aufs_fh_to_dentry,
+	/* .fh_to_parent	= aufs_fh_to_parent, */
+	.encode_fh		= aufs_encode_fh,
+	.commit_metadata	= aufs_commit_metadata
+};
+
+void au_export_init(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	__u32 u;
+
+	BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
+			 && IS_MODULE(CONFIG_EXPORTFS),
+			 AUFS_NAME ": unsupported configuration "
+			 "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
+
+	sb->s_export_op = &aufs_export_op;
+	sbinfo = au_sbi(sb);
+	sbinfo->si_xigen = NULL;
+	get_random_bytes(&u, sizeof(u));
+	BUILD_BUG_ON(sizeof(u) != sizeof(int));
+	atomic_set(&sbinfo->si_xigen_next, u);
+}
diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
new file mode 100644
index 000000000..00df9096a
--- /dev/null
+++ b/fs/aufs/f_op.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file and vm operations
+ */
+
+#include <linux/aio.h>
+#include <linux/fs_stack.h>
+#include <linux/mman.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct dentry *dentry, *h_dentry;
+	struct au_finfo *finfo;
+	struct inode *h_inode;
+
+	FiMustWriteLock(file);
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	AuDebugOn(IS_ERR_OR_NULL(dentry));
+	finfo = au_fi(file);
+	memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
+	atomic_set(&finfo->fi_mmapped, 0);
+	bindex = au_dbtop(dentry);
+	if (!h_file) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			goto out;
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+	} else {
+		h_dentry = h_file->f_path.dentry;
+		err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			goto out;
+		get_file(h_file);
+	}
+	if (IS_ERR(h_file))
+		err = PTR_ERR(h_file);
+	else {
+		if ((flags & __O_TMPFILE)
+		    && !(flags & O_EXCL)) {
+			h_inode = file_inode(h_file);
+			spin_lock(&h_inode->i_lock);
+			h_inode->i_state |= I_LINKABLE;
+			spin_unlock(&h_inode->i_lock);
+		}
+		au_set_fbtop(file, bindex);
+		au_set_h_fptr(file, bindex, h_file);
+		au_update_figen(file);
+		/* todo: necessary? */
+		/* file->f_ra = h_file->f_ra; */
+	}
+
+out:
+	return err;
+}
+
+static int aufs_open_nondir(struct inode *inode __maybe_unused,
+			    struct file *file)
+{
+	int err;
+	struct super_block *sb;
+	struct au_do_open_args args = {
+		.open	= au_do_open_nondir
+	};
+
+	AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
+	      file, vfsub_file_flags(file), file->f_mode);
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	err = au_do_open(file, &args);
+	si_read_unlock(sb);
+	return err;
+}
+
+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
+{
+	struct au_finfo *finfo;
+	aufs_bindex_t bindex;
+
+	finfo = au_fi(file);
+	au_hbl_del(&finfo->fi_hlist,
+		   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+	bindex = finfo->fi_btop;
+	if (bindex >= 0)
+		au_set_h_fptr(file, bindex, NULL);
+
+	au_finfo_fin(file);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
+{
+	int err;
+	struct file *h_file;
+
+	err = 0;
+	h_file = au_hf_top(file);
+	if (h_file)
+		err = vfsub_flush(h_file, id);
+	return err;
+}
+
+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
+{
+	return au_do_flush(file, id, au_do_flush_nondir);
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * read and write functions acquire [fdi]_rwsem once, but release before
+ * mmap_sem. This is because to stop a race condition between mmap(2).
+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
+ * read functions after [fdi]_rwsem are released, but it should be harmless.
+ */
+
+/* Callers should call au_read_post() or fput() in the end */
+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
+{
+	struct file *h_file;
+	int err;
+
+	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
+	if (!err) {
+		di_read_unlock(file->f_path.dentry, AuLock_IR);
+		h_file = au_hf_top(file);
+		get_file(h_file);
+		if (!keep_fi)
+			fi_read_unlock(file);
+	} else
+		h_file = ERR_PTR(err);
+
+	return h_file;
+}
+
+static void au_read_post(struct inode *inode, struct file *h_file)
+{
+	/* update without lock, I don't think it a problem */
+	fsstack_copy_attr_atime(inode, file_inode(h_file));
+	fput(h_file);
+}
+
+struct au_write_pre {
+	/* input */
+	unsigned int lsc;
+
+	/* output */
+	blkcnt_t blks;
+	aufs_bindex_t btop;
+};
+
+/*
+ * return with iinfo is write-locked
+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
+ * end
+ */
+static struct file *au_write_pre(struct file *file, int do_ready,
+				 struct au_write_pre *wpre)
+{
+	struct file *h_file;
+	struct dentry *dentry;
+	int err;
+	unsigned int lsc;
+	struct au_pin pin;
+
+	lsc = 0;
+	if (wpre)
+		lsc = wpre->lsc;
+	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc);
+	h_file = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	if (do_ready) {
+		err = au_ready_to_write(file, -1, &pin);
+		if (unlikely(err)) {
+			h_file = ERR_PTR(err);
+			di_write_unlock(dentry);
+			goto out_fi;
+		}
+	}
+
+	di_downgrade_lock(dentry, /*flags*/0);
+	if (wpre)
+		wpre->btop = au_fbtop(file);
+	h_file = au_hf_top(file);
+	get_file(h_file);
+	if (wpre)
+		wpre->blks = file_inode(h_file)->i_blocks;
+	if (do_ready)
+		au_unpin(&pin);
+	di_read_unlock(dentry, /*flags*/0);
+
+out_fi:
+	fi_write_unlock(file);
+out:
+	return h_file;
+}
+
+static void au_write_post(struct inode *inode, struct file *h_file,
+			  struct au_write_pre *wpre, ssize_t written)
+{
+	struct inode *h_inode;
+
+	au_cpup_attr_timesizes(inode);
+	AuDebugOn(au_ibtop(inode) != wpre->btop);
+	h_inode = file_inode(h_file);
+	inode->i_mode = h_inode->i_mode;
+	ii_write_unlock(inode);
+	/* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
+	if (written > 0)
+		au_fhsm_wrote(inode->i_sb, wpre->btop,
+			      /*force*/h_inode->i_blocks > wpre->blks);
+	fput(h_file);
+}
+
+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	ssize_t err;
+	struct inode *inode;
+	struct file *h_file;
+	struct super_block *sb;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* filedata may be obsoleted by concurrent copyup, but no problem */
+	err = vfsub_read_u(h_file, buf, count, ppos);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/*
+ * todo: very ugly
+ * it locks both of i_mutex and si_rwsem for read in safe.
+ * if the plink maintenance mode continues forever (that is the problem),
+ * may loop forever.
+ */
+static void au_mtx_and_read_lock(struct inode *inode)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+
+	while (1) {
+		inode_lock(inode);
+		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (!err)
+			break;
+		inode_unlock(inode);
+		si_read_lock(sb, AuLock_NOPLMW);
+		si_read_unlock(sb);
+	}
+}
+
+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
+			  size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+	char __user *buf = (char __user *)ubuf;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_write_u(h_file, buf, count, ppos);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
+			  struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct file *file;
+	ssize_t (*iter)(struct kiocb *, struct iov_iter *);
+
+	err = security_file_permission(h_file, rw);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOSYS;
+	iter = NULL;
+	if (rw == MAY_READ)
+		iter = h_file->f_op->read_iter;
+	else if (rw == MAY_WRITE)
+		iter = h_file->f_op->write_iter;
+
+	file = kio->ki_filp;
+	kio->ki_filp = h_file;
+	if (iter) {
+		lockdep_off();
+		err = iter(kio, iov_iter);
+		lockdep_on();
+	} else
+		/* currently there is no such fs */
+		WARN_ON_ONCE(1);
+	kio->ki_filp = file;
+
+out:
+	return err;
+}
+
+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct file *file, *h_file;
+	struct inode *inode;
+	struct super_block *sb;
+
+	file = kio->ki_filp;
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	if (au_test_loopback_kthread()) {
+		au_warn_loopback(h_file->f_path.dentry->d_sb);
+		if (file->f_mapping != h_file->f_mapping) {
+			file->f_mapping = h_file->f_mapping;
+			smp_mb(); /* unnecessary? */
+		}
+	}
+	fi_read_unlock(file);
+
+	err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *file, *h_file;
+
+	file = kio->ki_filp;
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
+				struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags)
+{
+	ssize_t err;
+	struct file *h_file;
+	struct inode *inode;
+	struct super_block *sb;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
+	/* todo: necessasry? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static ssize_t
+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
+		  size_t len, unsigned int flags)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
+			   loff_t len)
+{
+	long err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	lockdep_off();
+	err = vfs_fallocate(h_file, mode, offset, len);
+	lockdep_on();
+	au_write_post(inode, h_file, &wpre, /*written*/1);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
+				    struct file *dst, loff_t dst_pos,
+				    size_t len, unsigned int flags)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	enum { SRC, DST };
+	struct {
+		struct inode *inode;
+		struct file *h_file;
+		struct super_block *h_sb;
+	} a[2];
+#define a_src	a[SRC]
+#define a_dst	a[DST]
+
+	err = -EINVAL;
+	a_src.inode = file_inode(src);
+	if (unlikely(!S_ISREG(a_src.inode->i_mode)))
+		goto out;
+	a_dst.inode = file_inode(dst);
+	if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
+		goto out;
+
+	au_mtx_and_read_lock(a_dst.inode);
+	/*
+	 * in order to match the order in di_write_lock2_{child,parent}(),
+	 * use f_path.dentry for this comparision.
+	 */
+	if (src->f_path.dentry < dst->f_path.dentry) {
+		a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
+		err = PTR_ERR(a_src.h_file);
+		if (IS_ERR(a_src.h_file))
+			goto out_si;
+
+		wpre.lsc = AuLsc_FI_2;
+		a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
+		err = PTR_ERR(a_dst.h_file);
+		if (IS_ERR(a_dst.h_file)) {
+			au_read_post(a_src.inode, a_src.h_file);
+			goto out_si;
+		}
+	} else {
+		wpre.lsc = AuLsc_FI_1;
+		a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
+		err = PTR_ERR(a_dst.h_file);
+		if (IS_ERR(a_dst.h_file))
+			goto out_si;
+
+		a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
+		err = PTR_ERR(a_src.h_file);
+		if (IS_ERR(a_src.h_file)) {
+			au_write_post(a_dst.inode, a_dst.h_file, &wpre,
+				      /*written*/0);
+			goto out_si;
+		}
+	}
+
+	err = -EXDEV;
+	a_src.h_sb = file_inode(a_src.h_file)->i_sb;
+	a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
+	if (unlikely(a_src.h_sb != a_dst.h_sb)) {
+		AuDbgFile(src);
+		AuDbgFile(dst);
+		goto out_file;
+	}
+
+	err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
+				    dst_pos, len, flags);
+
+out_file:
+	au_write_post(a_dst.inode, a_dst.h_file, &wpre, err);
+	fi_read_unlock(src);
+	au_read_post(a_src.inode, a_src.h_file);
+out_si:
+	si_read_unlock(a_dst.inode->i_sb);
+	inode_unlock(a_dst.inode);
+out:
+	return err;
+#undef a_src
+#undef a_dst
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * The locking order around current->mmap_sem.
+ * - in most and regular cases
+ *   file I/O syscall -- aufs_read() or something
+ *	-- si_rwsem for read -- mmap_sem
+ *	(Note that [fdi]i_rwsem are released before mmap_sem).
+ * - in mmap case
+ *   mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
+ * It means that when aufs acquires si_rwsem for write, the process should never
+ * acquire mmap_sem.
+ *
+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
+ * problem either since any directory is not able to be mmap-ed.
+ * The similar scenario is applied to aufs_readlink() too.
+ */
+
+#if 0 /* stop calling security_file_mmap() */
+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
+#define AuConv_VM_PROT(f, b)	_calc_vm_trans(f, VM_##b, PROT_##b)
+
+static unsigned long au_arch_prot_conv(unsigned long flags)
+{
+	/* currently ppc64 only */
+#ifdef CONFIG_PPC64
+	/* cf. linux/arch/powerpc/include/asm/mman.h */
+	AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
+	return AuConv_VM_PROT(flags, SAO);
+#else
+	AuDebugOn(arch_calc_vm_prot_bits(-1));
+	return 0;
+#endif
+}
+
+static unsigned long au_prot_conv(unsigned long flags)
+{
+	return AuConv_VM_PROT(flags, READ)
+		| AuConv_VM_PROT(flags, WRITE)
+		| AuConv_VM_PROT(flags, EXEC)
+		| au_arch_prot_conv(flags);
+}
+
+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
+#define AuConv_VM_MAP(f, b)	_calc_vm_trans(f, VM_##b, MAP_##b)
+
+static unsigned long au_flag_conv(unsigned long flags)
+{
+	return AuConv_VM_MAP(flags, GROWSDOWN)
+		| AuConv_VM_MAP(flags, DENYWRITE)
+		| AuConv_VM_MAP(flags, LOCKED);
+}
+#endif
+
+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err;
+	const unsigned char wlock
+		= (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
+	struct super_block *sb;
+	struct file *h_file;
+	struct inode *inode;
+
+	AuDbgVmRegion(file, vma);
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	lockdep_off();
+	si_read_lock(sb, AuLock_NOPLMW);
+
+	h_file = au_write_pre(file, wlock, /*wpre*/NULL);
+	lockdep_on();
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = 0;
+	au_set_mmapped(file);
+	au_vm_file_reset(vma, h_file);
+	/*
+	 * we cannot call security_mmap_file() here since it may acquire
+	 * mmap_sem or i_mutex.
+	 *
+	 * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
+	 *			 au_flag_conv(vma->vm_flags));
+	 */
+	if (!err)
+		err = call_mmap(h_file, vma);
+	if (!err) {
+		au_vm_prfile_set(vma, file);
+		fsstack_copy_attr_atime(inode, file_inode(h_file));
+		goto out_fput; /* success */
+	}
+	au_unset_mmapped(file);
+	au_vm_file_reset(vma, file);
+
+out_fput:
+	lockdep_off();
+	ii_write_unlock(inode);
+	lockdep_on();
+	fput(h_file);
+out:
+	lockdep_off();
+	si_read_unlock(sb);
+	lockdep_on();
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	int err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	err = 0; /* -EBADF; */ /* posix? */
+	if (unlikely(!(file->f_mode & FMODE_WRITE)))
+		goto out;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out_unlock;
+
+	err = vfsub_fsync(h_file, &h_file->f_path, datasync);
+	au_write_post(inode, h_file, &wpre, /*written*/0);
+
+out_unlock:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+out:
+	return err;
+}
+
+static int aufs_fasync(int fd, struct file *file, int flag)
+{
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	if (h_file->f_op->fasync)
+		err = h_file->f_op->fasync(fd, h_file, flag);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static int aufs_setfl(struct file *file, unsigned long arg)
+{
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* stop calling h_file->fasync */
+	arg |= vfsub_file_flags(file) & FASYNC;
+	err = setfl(/*unused fd*/-1, h_file, arg);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* no one supports this operation, currently */
+#if 0
+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
+			     size_t len, loff_t *pos, int more)
+{
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+const struct file_operations aufs_file_fop = {
+	.owner		= THIS_MODULE,
+
+	.llseek		= default_llseek,
+
+	.read		= aufs_read,
+	.write		= aufs_write,
+	.read_iter	= aufs_read_iter,
+	.write_iter	= aufs_write_iter,
+
+#ifdef CONFIG_AUFS_POLL
+	.poll		= aufs_poll,
+#endif
+	.unlocked_ioctl	= aufs_ioctl_nondir,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= aufs_compat_ioctl_nondir,
+#endif
+	.mmap		= aufs_mmap,
+	.open		= aufs_open_nondir,
+	.flush		= aufs_flush_nondir,
+	.release	= aufs_release_nondir,
+	.fsync		= aufs_fsync_nondir,
+	.fasync		= aufs_fasync,
+	/* .sendpage	= aufs_sendpage, */
+	.setfl		= aufs_setfl,
+	.splice_write	= aufs_splice_write,
+	.splice_read	= aufs_splice_read,
+#if 0
+	.aio_splice_write = aufs_aio_splice_write,
+	.aio_splice_read  = aufs_aio_splice_read,
+#endif
+	.fallocate	= aufs_fallocate,
+	.copy_file_range = aufs_copy_file_range
+};
diff --git a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c
new file mode 100644
index 000000000..d35d726ac
--- /dev/null
+++ b/fs/aufs/fhsm.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2011-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/*
+ * File-based Hierarchy Storage Management
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	AuDebugOn(!fhsm);
+	return fhsm->fhsm_bottom;
+}
+
+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	AuDebugOn(!fhsm);
+	fhsm->fhsm_bottom = bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
+{
+	struct au_br_fhsm *bf;
+
+	bf = br->br_fhsm;
+	MtxMustLock(&bf->bf_lock);
+
+	return !bf->bf_readable
+		|| time_after(jiffies,
+			      bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_fhsm_notify(struct super_block *sb, int val)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	if (au_fhsm_pid(fhsm)
+	    && atomic_read(&fhsm->fhsm_readable) != -1) {
+		atomic_set(&fhsm->fhsm_readable, val);
+		if (val)
+			wake_up(&fhsm->fhsm_wqh);
+	}
+}
+
+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
+			struct aufs_stfs *rstfs, int do_lock, int do_notify)
+{
+	int err;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	br = au_sbr(sb, bindex);
+	AuDebugOn(au_br_rdonly(br));
+	bf = br->br_fhsm;
+	AuDebugOn(!bf);
+
+	if (do_lock)
+		mutex_lock(&bf->bf_lock);
+	else
+		MtxMustLock(&bf->bf_lock);
+
+	/* sb->s_root for NFS is unreliable */
+	err = au_br_stfs(br, &bf->bf_stfs);
+	if (unlikely(err)) {
+		AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
+		goto out;
+	}
+
+	bf->bf_jiffy = jiffies;
+	bf->bf_readable = 1;
+	if (do_notify)
+		au_fhsm_notify(sb, /*val*/1);
+	if (rstfs)
+		*rstfs = bf->bf_stfs;
+
+out:
+	if (do_lock)
+		mutex_unlock(&bf->bf_lock);
+	au_fhsm_notify(sb, /*val*/1);
+
+	return err;
+}
+
+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	AuDbg("b%d, force %d\n", bindex, force);
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	if (!au_ftest_si(sbinfo, FHSM)
+	    || fhsm->fhsm_bottom == bindex)
+		return;
+
+	br = au_sbr(sb, bindex);
+	bf = br->br_fhsm;
+	AuDebugOn(!bf);
+	mutex_lock(&bf->bf_lock);
+	if (force
+	    || au_fhsm_pid(fhsm)
+	    || au_fhsm_test_jiffy(sbinfo, br))
+		err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
+				  /*do_notify*/1);
+	mutex_unlock(&bf->bf_lock);
+}
+
+void au_fhsm_wrote_all(struct super_block *sb, int force)
+{
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	/* exclude the bottom */
+	bbot = au_fhsm_bottom(sb);
+	for (bindex = 0; bindex < bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_fhsm(br->br_perm))
+			au_fhsm_wrote(sb, bindex, force);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static __poll_t au_fhsm_poll(struct file *file, struct poll_table_struct *wait)
+{
+	__poll_t mask;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	mask = 0;
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+	poll_wait(file, &fhsm->fhsm_wqh, wait);
+	if (atomic_read(&fhsm->fhsm_readable))
+		mask = EPOLLIN /* | EPOLLRDNORM */;
+
+	if (!mask)
+		AuDbg("mask 0x%x\n", mask);
+	return mask;
+}
+
+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
+			      struct aufs_stfs *stfs, __s16 brid)
+{
+	int err;
+
+	err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
+	if (!err)
+		err = __put_user(brid, &stbr->brid);
+	if (unlikely(err))
+		err = -EFAULT;
+
+	return err;
+}
+
+static ssize_t au_fhsm_do_read(struct super_block *sb,
+			       struct aufs_stbr __user *stbr, size_t count)
+{
+	ssize_t err;
+	int nstbr;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	/* except the bottom branch */
+	err = 0;
+	nstbr = 0;
+	bbot = au_fhsm_bottom(sb);
+	for (bindex = 0; !err && bindex < bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!au_br_fhsm(br->br_perm))
+			continue;
+
+		bf = br->br_fhsm;
+		mutex_lock(&bf->bf_lock);
+		if (bf->bf_readable) {
+			err = -EFAULT;
+			if (count >= sizeof(*stbr))
+				err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
+							  br->br_id);
+			if (!err) {
+				bf->bf_readable = 0;
+				count -= sizeof(*stbr);
+				nstbr++;
+			}
+		}
+		mutex_unlock(&bf->bf_lock);
+	}
+	if (!err)
+		err = sizeof(*stbr) * nstbr;
+
+	return err;
+}
+
+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
+			   loff_t *pos)
+{
+	ssize_t err;
+	int readable;
+	aufs_bindex_t nfhsm, bindex, bbot;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	struct au_branch *br;
+	struct super_block *sb;
+
+	err = 0;
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+need_data:
+	spin_lock_irq(&fhsm->fhsm_wqh.lock);
+	if (!atomic_read(&fhsm->fhsm_readable)) {
+		if (vfsub_file_flags(file) & O_NONBLOCK)
+			err = -EAGAIN;
+		else
+			err = wait_event_interruptible_locked_irq
+				(fhsm->fhsm_wqh,
+				 atomic_read(&fhsm->fhsm_readable));
+	}
+	spin_unlock_irq(&fhsm->fhsm_wqh.lock);
+	if (unlikely(err))
+		goto out;
+
+	/* sb may already be dead */
+	au_rw_read_lock(&sbinfo->si_rwsem);
+	readable = atomic_read(&fhsm->fhsm_readable);
+	if (readable > 0) {
+		sb = sbinfo->si_sb;
+		AuDebugOn(!sb);
+		/* exclude the bottom branch */
+		nfhsm = 0;
+		bbot = au_fhsm_bottom(sb);
+		for (bindex = 0; bindex < bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm))
+				nfhsm++;
+		}
+		err = -EMSGSIZE;
+		if (nfhsm * sizeof(struct aufs_stbr) <= count) {
+			atomic_set(&fhsm->fhsm_readable, 0);
+			err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
+					     count);
+		}
+	}
+	au_rw_read_unlock(&sbinfo->si_rwsem);
+	if (!readable)
+		goto need_data;
+
+out:
+	return err;
+}
+
+static int au_fhsm_release(struct inode *inode, struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	/* sb may already be dead */
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock(&fhsm->fhsm_spin);
+	fhsm->fhsm_pid = 0;
+	spin_unlock(&fhsm->fhsm_spin);
+	kobject_put(&sbinfo->si_kobj);
+
+	return 0;
+}
+
+static const struct file_operations au_fhsm_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= noop_llseek,
+	.read		= au_fhsm_read,
+	.poll		= au_fhsm_poll,
+	.release	= au_fhsm_release
+};
+
+int au_fhsm_fd(struct super_block *sb, int oflags)
+{
+	int err, fd;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
+		goto out;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock(&fhsm->fhsm_spin);
+	if (!fhsm->fhsm_pid)
+		fhsm->fhsm_pid = current->pid;
+	else
+		err = -EBUSY;
+	spin_unlock(&fhsm->fhsm_spin);
+	if (unlikely(err))
+		goto out;
+
+	oflags |= O_RDONLY;
+	/* oflags |= FMODE_NONOTIFY; */
+	fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
+	err = fd;
+	if (unlikely(fd < 0))
+		goto out_pid;
+
+	/* succeed reglardless 'fhsm' status */
+	kobject_get(&sbinfo->si_kobj);
+	si_noflush_read_lock(sb);
+	if (au_ftest_si(sbinfo, FHSM))
+		au_fhsm_wrote_all(sb, /*force*/0);
+	si_read_unlock(sb);
+	goto out; /* success */
+
+out_pid:
+	spin_lock(&fhsm->fhsm_spin);
+	fhsm->fhsm_pid = 0;
+	spin_unlock(&fhsm->fhsm_spin);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_fhsm_br_alloc(struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
+	if (br->br_fhsm)
+		au_br_fhsm_init(br->br_fhsm);
+	else
+		err = -ENOMEM;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_fhsm_fin(struct super_block *sb)
+{
+	au_fhsm_notify(sb, /*val*/-1);
+}
+
+void au_fhsm_init(struct au_sbinfo *sbinfo)
+{
+	struct au_fhsm *fhsm;
+
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock_init(&fhsm->fhsm_spin);
+	init_waitqueue_head(&fhsm->fhsm_wqh);
+	atomic_set(&fhsm->fhsm_readable, 0);
+	fhsm->fhsm_expire
+		= msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
+	fhsm->fhsm_bottom = -1;
+}
+
+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
+{
+	sbinfo->si_fhsm.fhsm_expire
+		= msecs_to_jiffies(sec * MSEC_PER_SEC);
+}
+
+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
+{
+	unsigned int u;
+
+	if (!au_ftest_si(sbinfo, FHSM))
+		return;
+
+	u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
+	if (u != AUFS_FHSM_CACHE_DEF_SEC)
+		seq_printf(seq, ",fhsm_sec=%u", u);
+}
diff --git a/fs/aufs/file.c b/fs/aufs/file.c
new file mode 100644
index 000000000..4646b4b02
--- /dev/null
+++ b/fs/aufs/file.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * handling file/dir, and address_space operation
+ */
+
+#ifdef CONFIG_AUFS_DEBUG
+#include <linux/migrate.h>
+#endif
+#include <linux/pagemap.h>
+#include "aufs.h"
+
+/* drop flags for writing */
+unsigned int au_file_roflags(unsigned int flags)
+{
+	flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
+	flags |= O_RDONLY | O_NOATIME;
+	return flags;
+}
+
+/* common functions to regular file and dir */
+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
+		       struct file *file, int force_wr)
+{
+	struct file *h_file;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct path h_path;
+	int err;
+
+	/* a race condition can happen between open and unlink/rmdir */
+	h_file = ERR_PTR(-ENOENT);
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
+		goto out;
+	h_inode = d_inode(h_dentry);
+	spin_lock(&h_dentry->d_lock);
+	err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
+		/* || !d_inode(dentry)->i_nlink */
+		;
+	spin_unlock(&h_dentry->d_lock);
+	if (unlikely(err))
+		goto out;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, bindex);
+	err = au_br_test_oflag(flags, br);
+	h_file = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	/* drop flags for writing */
+	if (au_test_ro(sb, bindex, d_inode(dentry))) {
+		if (force_wr && !(flags & O_WRONLY))
+			force_wr = 0;
+		flags = au_file_roflags(flags);
+		if (force_wr) {
+			h_file = ERR_PTR(-EROFS);
+			flags = au_file_roflags(flags);
+			if (unlikely(vfsub_native_ro(h_inode)
+				     || IS_APPEND(h_inode)))
+				goto out;
+			flags &= ~O_ACCMODE;
+			flags |= O_WRONLY;
+		}
+	}
+	flags &= ~O_CREAT;
+	au_br_get(br);
+	h_path.dentry = h_dentry;
+	h_path.mnt = au_br_mnt(br);
+	h_file = vfsub_dentry_open(&h_path, flags);
+	if (IS_ERR(h_file))
+		goto out_br;
+
+	if (flags & __FMODE_EXEC) {
+		err = deny_write_access(h_file);
+		if (unlikely(err)) {
+			fput(h_file);
+			h_file = ERR_PTR(err);
+			goto out_br;
+		}
+	}
+	fsnotify_open(h_file);
+	goto out; /* success */
+
+out_br:
+	au_br_put(br);
+out:
+	return h_file;
+}
+
+static int au_cmoo(struct dentry *dentry)
+{
+	int err, cmoo, matched;
+	unsigned int udba;
+	struct path h_path;
+	struct au_pin pin;
+	struct au_cp_generic cpg = {
+		.dentry	= dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= -1,
+		.pin	= &pin,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN
+	};
+	struct inode *delegated;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	pid_t pid;
+	struct au_branch *br;
+	struct dentry *parent;
+	struct au_hinode *hdir;
+
+	DiMustWriteLock(dentry);
+	IiMustWriteLock(d_inode(dentry));
+
+	err = 0;
+	if (IS_ROOT(dentry))
+		goto out;
+	cpg.bsrc = au_dbtop(dentry);
+	if (!cpg.bsrc)
+		goto out;
+
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	pid = au_fhsm_pid(fhsm);
+	rcu_read_lock();
+	matched = (pid
+		   && (current->pid == pid
+		       || rcu_dereference(current->real_parent)->pid == pid));
+	rcu_read_unlock();
+	if (matched)
+		goto out;
+
+	br = au_sbr(sb, cpg.bsrc);
+	cmoo = au_br_cmoo(br->br_perm);
+	if (!cmoo)
+		goto out;
+	if (!d_is_reg(dentry))
+		cmoo &= AuBrAttr_COO_ALL;
+	if (!cmoo)
+		goto out;
+
+	parent = dget_parent(dentry);
+	di_write_lock_parent(parent);
+	err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
+	cpg.bdst = err;
+	if (unlikely(err < 0)) {
+		err = 0;	/* there is no upper writable branch */
+		goto out_dgrade;
+	}
+	AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
+
+	/* do not respect the coo attrib for the target branch */
+	err = au_cpup_dirs(dentry, cpg.bdst);
+	if (unlikely(err))
+		goto out_dgrade;
+
+	di_downgrade_lock(parent, AuLock_IR);
+	udba = au_opt_udba(sb);
+	err = au_pin(&pin, dentry, cpg.bdst, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_parent;
+
+	err = au_sio_cpup_simple(&cpg);
+	au_unpin(&pin);
+	if (unlikely(err))
+		goto out_parent;
+	if (!(cmoo & AuBrWAttr_MOO))
+		goto out_parent; /* success */
+
+	err = au_pin(&pin, dentry, cpg.bsrc, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_parent;
+
+	h_path.mnt = au_br_mnt(br);
+	h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
+	hdir = au_hi(d_inode(parent), cpg.bsrc);
+	delegated = NULL;
+	err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
+	au_unpin(&pin);
+	/* todo: keep h_dentry or not? */
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err)) {
+		pr_err("unlink %pd after coo failed (%d), ignored\n",
+		       dentry, err);
+		err = 0;
+	}
+	goto out_parent; /* success */
+
+out_dgrade:
+	di_downgrade_lock(parent, AuLock_IR);
+out_parent:
+	di_read_unlock(parent, AuLock_IR);
+	dput(parent);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_do_open(struct file *file, struct au_do_open_args *args)
+{
+	int err, aopen = args->aopen;
+	struct dentry *dentry;
+	struct au_finfo *finfo;
+
+	if (!aopen)
+		err = au_finfo_init(file, args->fidir);
+	else {
+		lockdep_off();
+		err = au_finfo_init(file, args->fidir);
+		lockdep_on();
+	}
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	AuDebugOn(IS_ERR_OR_NULL(dentry));
+	di_write_lock_child(dentry);
+	err = au_cmoo(dentry);
+	di_downgrade_lock(dentry, AuLock_IR);
+	if (!err) {
+		if (!aopen)
+			err = args->open(file, vfsub_file_flags(file), NULL);
+		else {
+			lockdep_off();
+			err = args->open(file, vfsub_file_flags(file), NULL);
+			lockdep_on();
+		}
+	}
+	di_read_unlock(dentry, AuLock_IR);
+
+	finfo = au_fi(file);
+	if (!err) {
+		finfo->fi_file = file;
+		au_hbl_add(&finfo->fi_hlist,
+			   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+	}
+	if (!aopen)
+		fi_write_unlock(file);
+	else {
+		lockdep_off();
+		fi_write_unlock(file);
+		lockdep_on();
+	}
+	if (unlikely(err)) {
+		finfo->fi_hdir = NULL;
+		au_finfo_fin(file);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_reopen_nondir(struct file *file)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct dentry *dentry;
+	struct file *h_file, *h_file_tmp;
+
+	dentry = file->f_path.dentry;
+	btop = au_dbtop(dentry);
+	h_file_tmp = NULL;
+	if (au_fbtop(file) == btop) {
+		h_file = au_hf_top(file);
+		if (file->f_mode == h_file->f_mode)
+			return 0; /* success */
+		h_file_tmp = h_file;
+		get_file(h_file_tmp);
+		au_set_h_fptr(file, btop, NULL);
+	}
+	AuDebugOn(au_fi(file)->fi_hdir);
+	/*
+	 * it can happen
+	 * file exists on both of rw and ro
+	 * open --> dbtop and fbtop are both 0
+	 * prepend a branch as rw, "rw" become ro
+	 * remove rw/file
+	 * delete the top branch, "rw" becomes rw again
+	 *	--> dbtop is 1, fbtop is still 0
+	 * write --> fbtop is 0 but dbtop is 1
+	 */
+	/* AuDebugOn(au_fbtop(file) < btop); */
+
+	h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
+			   file, /*force_wr*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file)) {
+		if (h_file_tmp) {
+			au_sbr_get(dentry->d_sb, btop);
+			au_set_h_fptr(file, btop, h_file_tmp);
+			h_file_tmp = NULL;
+		}
+		goto out; /* todo: close all? */
+	}
+
+	err = 0;
+	au_set_fbtop(file, btop);
+	au_set_h_fptr(file, btop, h_file);
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+
+out:
+	if (h_file_tmp)
+		fput(h_file_tmp);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
+			struct dentry *hi_wh)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct au_dinfo *dinfo;
+	struct dentry *h_dentry;
+	struct au_hdentry *hdp;
+
+	dinfo = au_di(file->f_path.dentry);
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	btop = dinfo->di_btop;
+	dinfo->di_btop = btgt;
+	hdp = au_hdentry(dinfo, btgt);
+	h_dentry = hdp->hd_dentry;
+	hdp->hd_dentry = hi_wh;
+	err = au_reopen_nondir(file);
+	hdp->hd_dentry = h_dentry;
+	dinfo->di_btop = btop;
+
+	return err;
+}
+
+static int au_ready_to_write_wh(struct file *file, loff_t len,
+				aufs_bindex_t bcpup, struct au_pin *pin)
+{
+	int err;
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry, *hi_wh;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= bcpup,
+		.bsrc	= -1,
+		.len	= len,
+		.pin	= pin
+	};
+
+	au_update_dbtop(cpg.dentry);
+	inode = d_inode(cpg.dentry);
+	h_inode = NULL;
+	if (au_dbtop(cpg.dentry) <= bcpup
+	    && au_dbbot(cpg.dentry) >= bcpup) {
+		h_dentry = au_h_dptr(cpg.dentry, bcpup);
+		if (h_dentry && d_is_positive(h_dentry))
+			h_inode = d_inode(h_dentry);
+	}
+	hi_wh = au_hi_wh(inode, bcpup);
+	if (!hi_wh && !h_inode)
+		err = au_sio_cpup_wh(&cpg, file);
+	else
+		/* already copied-up after unlink */
+		err = au_reopen_wh(file, bcpup, hi_wh);
+
+	if (!err
+	    && (inode->i_nlink > 1
+		|| (inode->i_state & I_LINKABLE))
+	    && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
+		au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
+
+	return err;
+}
+
+/*
+ * prepare the @file for writing.
+ */
+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
+{
+	int err;
+	aufs_bindex_t dbtop;
+	struct dentry *parent;
+	struct inode *inode;
+	struct super_block *sb;
+	struct file *h_file;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= len,
+		.pin	= pin,
+		.flags	= AuCpup_DTIME
+	};
+
+	sb = cpg.dentry->d_sb;
+	inode = d_inode(cpg.dentry);
+	cpg.bsrc = au_fbtop(file);
+	err = au_test_ro(sb, cpg.bsrc, inode);
+	if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
+		err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
+			     /*flags*/0);
+		goto out;
+	}
+
+	/* need to cpup or reopen */
+	parent = dget_parent(cpg.dentry);
+	di_write_lock_parent(parent);
+	err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
+	cpg.bdst = err;
+	if (unlikely(err < 0))
+		goto out_dgrade;
+	err = 0;
+
+	if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
+		err = au_cpup_dirs(cpg.dentry, cpg.bdst);
+		if (unlikely(err))
+			goto out_dgrade;
+	}
+
+	err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_dgrade;
+
+	dbtop = au_dbtop(cpg.dentry);
+	if (dbtop <= cpg.bdst)
+		cpg.bsrc = cpg.bdst;
+
+	if (dbtop <= cpg.bdst		/* just reopen */
+	    || !d_unhashed(cpg.dentry)	/* copyup and reopen */
+		) {
+		h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
+		if (IS_ERR(h_file))
+			err = PTR_ERR(h_file);
+		else {
+			di_downgrade_lock(parent, AuLock_IR);
+			if (dbtop > cpg.bdst)
+				err = au_sio_cpup_simple(&cpg);
+			if (!err)
+				err = au_reopen_nondir(file);
+			au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
+		}
+	} else {			/* copyup as wh and reopen */
+		/*
+		 * since writable hfsplus branch is not supported,
+		 * h_open_pre/post() are unnecessary.
+		 */
+		err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
+		di_downgrade_lock(parent, AuLock_IR);
+	}
+
+	if (!err) {
+		au_pin_set_parent_lflag(pin, /*lflag*/0);
+		goto out_dput; /* success */
+	}
+	au_unpin(pin);
+	goto out_unlock;
+
+out_dgrade:
+	di_downgrade_lock(parent, AuLock_IR);
+out_unlock:
+	di_read_unlock(parent, AuLock_IR);
+out_dput:
+	dput(parent);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_do_flush(struct file *file, fl_owner_t id,
+		int (*flush)(struct file *file, fl_owner_t id))
+{
+	int err;
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_noflush_read_lock(sb);
+	fi_read_lock(file);
+	ii_read_lock_child(inode);
+
+	err = flush(file, id);
+	au_cpup_attr_timesizes(inode);
+
+	ii_read_unlock(inode);
+	fi_read_unlock(file);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
+{
+	int err;
+	struct au_pin pin;
+	struct au_finfo *finfo;
+	struct dentry *parent, *hi_wh;
+	struct inode *inode;
+	struct super_block *sb;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= -1,
+		.pin	= &pin,
+		.flags	= AuCpup_DTIME
+	};
+
+	FiMustWriteLock(file);
+
+	err = 0;
+	finfo = au_fi(file);
+	sb = cpg.dentry->d_sb;
+	inode = d_inode(cpg.dentry);
+	cpg.bdst = au_ibtop(inode);
+	if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
+		goto out;
+
+	parent = dget_parent(cpg.dentry);
+	if (au_test_ro(sb, cpg.bdst, inode)) {
+		di_read_lock_parent(parent, !AuLock_IR);
+		err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
+		cpg.bdst = err;
+		di_read_unlock(parent, !AuLock_IR);
+		if (unlikely(err < 0))
+			goto out_parent;
+		err = 0;
+	}
+
+	di_read_lock_parent(parent, AuLock_IR);
+	hi_wh = au_hi_wh(inode, cpg.bdst);
+	if (!S_ISDIR(inode->i_mode)
+	    && au_opt_test(au_mntflags(sb), PLINK)
+	    && au_plink_test(inode)
+	    && !d_unhashed(cpg.dentry)
+	    && cpg.bdst < au_dbtop(cpg.dentry)) {
+		err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
+		if (unlikely(err))
+			goto out_unlock;
+
+		/* always superio. */
+		err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+	} else if (hi_wh) {
+		/* already copied-up after unlink */
+		err = au_reopen_wh(file, cpg.bdst, hi_wh);
+		*need_reopen = 0;
+	}
+
+out_unlock:
+	di_read_unlock(parent, AuLock_IR);
+out_parent:
+	dput(parent);
+out:
+	return err;
+}
+
+static void au_do_refresh_dir(struct file *file)
+{
+	aufs_bindex_t bindex, bbot, new_bindex, brid;
+	struct au_hfile *p, tmp, *q;
+	struct au_finfo *finfo;
+	struct super_block *sb;
+	struct au_fidir *fidir;
+
+	FiMustWriteLock(file);
+
+	sb = file->f_path.dentry->d_sb;
+	finfo = au_fi(file);
+	fidir = finfo->fi_hdir;
+	AuDebugOn(!fidir);
+	p = fidir->fd_hfile + finfo->fi_btop;
+	brid = p->hf_br->br_id;
+	bbot = fidir->fd_bbot;
+	for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
+		if (!p->hf_file)
+			continue;
+
+		new_bindex = au_br_index(sb, p->hf_br->br_id);
+		if (new_bindex == bindex)
+			continue;
+		if (new_bindex < 0) {
+			au_set_h_fptr(file, bindex, NULL);
+			continue;
+		}
+
+		/* swap two lower inode, and loop again */
+		q = fidir->fd_hfile + new_bindex;
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hf_file) {
+			bindex--;
+			p--;
+		}
+	}
+
+	p = fidir->fd_hfile;
+	if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
+		bbot = au_sbbot(sb);
+		for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
+		     finfo->fi_btop++, p++)
+			if (p->hf_file) {
+				if (file_inode(p->hf_file))
+					break;
+				au_hfput(p, /*execed*/0);
+			}
+	} else {
+		bbot = au_br_index(sb, brid);
+		for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
+		     finfo->fi_btop++, p++)
+			if (p->hf_file)
+				au_hfput(p, /*execed*/0);
+		bbot = au_sbbot(sb);
+	}
+
+	p = fidir->fd_hfile + bbot;
+	for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
+	     fidir->fd_bbot--, p--)
+		if (p->hf_file) {
+			if (file_inode(p->hf_file))
+				break;
+			au_hfput(p, /*execed*/0);
+		}
+	AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
+}
+
+/*
+ * after branch manipulating, refresh the file.
+ */
+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
+{
+	int err, need_reopen, nbr;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *dentry;
+	struct super_block *sb;
+	struct au_finfo *finfo;
+	struct au_hfile *hfile;
+
+	dentry = file->f_path.dentry;
+	sb = dentry->d_sb;
+	nbr = au_sbbot(sb) + 1;
+	finfo = au_fi(file);
+	if (!finfo->fi_hdir) {
+		hfile = &finfo->fi_htop;
+		AuDebugOn(!hfile->hf_file);
+		bindex = au_br_index(sb, hfile->hf_br->br_id);
+		AuDebugOn(bindex < 0);
+		if (bindex != finfo->fi_btop)
+			au_set_fbtop(file, bindex);
+	} else {
+		err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
+		if (unlikely(err))
+			goto out;
+		au_do_refresh_dir(file);
+	}
+
+	err = 0;
+	need_reopen = 1;
+	if (!au_test_mmapped(file))
+		err = au_file_refresh_by_inode(file, &need_reopen);
+	if (finfo->fi_hdir)
+		/* harmless if err */
+		au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
+	if (!err && need_reopen && !d_unlinked(dentry))
+		err = reopen(file);
+	if (!err) {
+		au_update_figen(file);
+		goto out; /* success */
+	}
+
+	/* error, close all lower files */
+	if (finfo->fi_hdir) {
+		bbot = au_fbbot_dir(file);
+		for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
+			au_set_h_fptr(file, bindex, NULL);
+	}
+
+out:
+	return err;
+}
+
+/* common function to regular file and dir */
+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
+			  int wlock, unsigned int fi_lsc)
+{
+	int err;
+	unsigned int sigen, figen;
+	aufs_bindex_t btop;
+	unsigned char pseudo_link;
+	struct dentry *dentry;
+	struct inode *inode;
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	sigen = au_sigen(dentry->d_sb);
+	fi_write_lock_nested(file, fi_lsc);
+	figen = au_figen(file);
+	if (!fi_lsc)
+		di_write_lock_child(dentry);
+	else
+		di_write_lock_child2(dentry);
+	btop = au_dbtop(dentry);
+	pseudo_link = (btop != au_ibtop(inode));
+	if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
+		if (!wlock) {
+			di_downgrade_lock(dentry, AuLock_IR);
+			fi_downgrade_lock(file);
+		}
+		goto out; /* success */
+	}
+
+	AuDbg("sigen %d, figen %d\n", sigen, figen);
+	if (au_digen_test(dentry, sigen)) {
+		err = au_reval_dpath(dentry, sigen);
+		AuDebugOn(!err && au_digen_test(dentry, sigen));
+	}
+
+	if (!err)
+		err = refresh_file(file, reopen);
+	if (!err) {
+		if (!wlock) {
+			di_downgrade_lock(dentry, AuLock_IR);
+			fi_downgrade_lock(file);
+		}
+	} else {
+		di_write_unlock(dentry);
+		fi_write_unlock(file);
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* cf. aufs_nopage() */
+/* for madvise(2) */
+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
+{
+	unlock_page(page);
+	return 0;
+}
+
+/* it will never be called, but necessary to support O_DIRECT */
+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{ BUG(); return 0; }
+
+/* they will never be called. */
+#ifdef CONFIG_AUFS_DEBUG
+static int aufs_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned len, unsigned flags,
+			    struct page **pagep, void **fsdata)
+{ AuUnsupport(); return 0; }
+static int aufs_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, unsigned len, unsigned copied,
+			  struct page *page, void *fsdata)
+{ AuUnsupport(); return 0; }
+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
+{ AuUnsupport(); return 0; }
+
+static int aufs_set_page_dirty(struct page *page)
+{ AuUnsupport(); return 0; }
+static void aufs_invalidatepage(struct page *page, unsigned int offset,
+				unsigned int length)
+{ AuUnsupport(); }
+static int aufs_releasepage(struct page *page, gfp_t gfp)
+{ AuUnsupport(); return 0; }
+#if 0 /* called by memory compaction regardless file */
+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
+			    struct page *page, enum migrate_mode mode)
+{ AuUnsupport(); return 0; }
+#endif
+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
+{ AuUnsupport(); return true; }
+static void aufs_putback_page(struct page *page)
+{ AuUnsupport(); }
+static int aufs_launder_page(struct page *page)
+{ AuUnsupport(); return 0; }
+static int aufs_is_partially_uptodate(struct page *page,
+				      unsigned long from,
+				      unsigned long count)
+{ AuUnsupport(); return 0; }
+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
+				    bool *writeback)
+{ AuUnsupport(); }
+static int aufs_error_remove_page(struct address_space *mapping,
+				  struct page *page)
+{ AuUnsupport(); return 0; }
+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
+			      sector_t *span)
+{ AuUnsupport(); return 0; }
+static void aufs_swap_deactivate(struct file *file)
+{ AuUnsupport(); }
+#endif /* CONFIG_AUFS_DEBUG */
+
+const struct address_space_operations aufs_aop = {
+	.readpage		= aufs_readpage,
+	.direct_IO		= aufs_direct_IO,
+#ifdef CONFIG_AUFS_DEBUG
+	.writepage		= aufs_writepage,
+	/* no writepages, because of writepage */
+	.set_page_dirty		= aufs_set_page_dirty,
+	/* no readpages, because of readpage */
+	.write_begin		= aufs_write_begin,
+	.write_end		= aufs_write_end,
+	/* no bmap, no block device */
+	.invalidatepage		= aufs_invalidatepage,
+	.releasepage		= aufs_releasepage,
+	/* is fallback_migrate_page ok? */
+	/* .migratepage		= aufs_migratepage, */
+	.isolate_page		= aufs_isolate_page,
+	.putback_page		= aufs_putback_page,
+	.launder_page		= aufs_launder_page,
+	.is_partially_uptodate	= aufs_is_partially_uptodate,
+	.is_dirty_writeback	= aufs_is_dirty_writeback,
+	.error_remove_page	= aufs_error_remove_page,
+	.swap_activate		= aufs_swap_activate,
+	.swap_deactivate	= aufs_swap_deactivate
+#endif /* CONFIG_AUFS_DEBUG */
+};
diff --git a/fs/aufs/file.h b/fs/aufs/file.h
new file mode 100644
index 000000000..d36674eb4
--- /dev/null
+++ b/fs/aufs/file.h
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file operations
+ */
+
+#ifndef __AUFS_FILE_H__
+#define __AUFS_FILE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm_types.h>
+#include <linux/poll.h>
+#include "rwsem.h"
+
+struct au_branch;
+struct au_hfile {
+	struct file		*hf_file;
+	struct au_branch	*hf_br;
+};
+
+struct au_vdir;
+struct au_fidir {
+	aufs_bindex_t		fd_bbot;
+	aufs_bindex_t		fd_nent;
+	struct au_vdir		*fd_vdir_cache;
+	struct au_hfile		fd_hfile[];
+};
+
+static inline int au_fidir_sz(int nent)
+{
+	AuDebugOn(nent < 0);
+	return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
+}
+
+struct au_finfo {
+	atomic_t		fi_generation;
+
+	struct au_rwsem		fi_rwsem;
+	aufs_bindex_t		fi_btop;
+
+	/* do not union them */
+	struct {				/* for non-dir */
+		struct au_hfile			fi_htop;
+		atomic_t			fi_mmapped;
+	};
+	struct au_fidir		*fi_hdir;	/* for dir only */
+
+	struct hlist_bl_node	fi_hlist;
+	struct file		*fi_file;	/* very ugly */
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* file.c */
+extern const struct address_space_operations aufs_aop;
+unsigned int au_file_roflags(unsigned int flags);
+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
+		       struct file *file, int force_wr);
+struct au_do_open_args {
+	int		aopen;
+	int		(*open)(struct file *file, int flags,
+				struct file *h_file);
+	struct au_fidir	*fidir;
+	struct file	*h_file;
+};
+int au_do_open(struct file *file, struct au_do_open_args *args);
+int au_reopen_nondir(struct file *file);
+struct au_pin;
+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
+			  int wlock, unsigned int fi_lsc);
+int au_do_flush(struct file *file, fl_owner_t id,
+		int (*flush)(struct file *file, fl_owner_t id));
+
+/* poll.c */
+#ifdef CONFIG_AUFS_POLL
+__poll_t aufs_poll(struct file *file, poll_table *wait);
+#endif
+
+#ifdef CONFIG_AUFS_BR_HFSPLUS
+/* hfsplus.c */
+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
+			   int force_wr);
+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
+		    struct file *h_file);
+#else
+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
+       aufs_bindex_t bindex, int force_wr)
+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
+	   struct file *h_file);
+#endif
+
+/* f_op.c */
+extern const struct file_operations aufs_file_fop;
+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
+
+/* finfo.c */
+void au_hfput(struct au_hfile *hf, int execed);
+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
+		   struct file *h_file);
+
+void au_update_figen(struct file *file);
+struct au_fidir *au_fidir_alloc(struct super_block *sb);
+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
+
+void au_fi_init_once(void *_fi);
+void au_finfo_fin(struct file *file);
+int au_finfo_init(struct file *file, struct au_fidir *fidir);
+
+/* ioctl.c */
+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
+			   unsigned long arg);
+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
+			      unsigned long arg);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_finfo *au_fi(struct file *file)
+{
+	return file->private_data;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define fi_read_lock(f)	au_rw_read_lock(&au_fi(f)->fi_rwsem)
+#define fi_write_lock(f)	au_rw_write_lock(&au_fi(f)->fi_rwsem)
+#define fi_read_trylock(f)	au_rw_read_trylock(&au_fi(f)->fi_rwsem)
+#define fi_write_trylock(f)	au_rw_write_trylock(&au_fi(f)->fi_rwsem)
+/*
+#define fi_read_trylock_nested(f) \
+	au_rw_read_trylock_nested(&au_fi(f)->fi_rwsem)
+#define fi_write_trylock_nested(f) \
+	au_rw_write_trylock_nested(&au_fi(f)->fi_rwsem)
+*/
+
+#define fi_read_unlock(f)	au_rw_read_unlock(&au_fi(f)->fi_rwsem)
+#define fi_write_unlock(f)	au_rw_write_unlock(&au_fi(f)->fi_rwsem)
+#define fi_downgrade_lock(f)	au_rw_dgrade_lock(&au_fi(f)->fi_rwsem)
+
+/* lock subclass for finfo */
+enum {
+	AuLsc_FI_1,
+	AuLsc_FI_2
+};
+
+static inline void fi_read_lock_nested(struct file *f, unsigned int lsc)
+{
+	au_rw_read_lock_nested(&au_fi(f)->fi_rwsem, lsc);
+}
+
+static inline void fi_write_lock_nested(struct file *f, unsigned int lsc)
+{
+	au_rw_write_lock_nested(&au_fi(f)->fi_rwsem, lsc);
+}
+
+/*
+ * fi_read_lock_1, fi_write_lock_1,
+ * fi_read_lock_2, fi_write_lock_2
+ */
+#define AuReadLockFunc(name) \
+static inline void fi_read_lock_##name(struct file *f) \
+{ fi_read_lock_nested(f, AuLsc_FI_##name); }
+
+#define AuWriteLockFunc(name) \
+static inline void fi_write_lock_##name(struct file *f) \
+{ fi_write_lock_nested(f, AuLsc_FI_##name); }
+
+#define AuRWLockFuncs(name) \
+	AuReadLockFunc(name) \
+	AuWriteLockFunc(name)
+
+AuRWLockFuncs(1);
+AuRWLockFuncs(2);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define FiMustNoWaiters(f)	AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
+#define FiMustAnyLock(f)	AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
+#define FiMustWriteLock(f)	AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: hard/soft set? */
+static inline aufs_bindex_t au_fbtop(struct file *file)
+{
+	FiMustAnyLock(file);
+	return au_fi(file)->fi_btop;
+}
+
+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_bbot;
+}
+
+static inline struct au_vdir *au_fvdir_cache(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_vdir_cache;
+}
+
+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustWriteLock(file);
+	au_fi(file)->fi_btop = bindex;
+}
+
+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustWriteLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	au_fi(file)->fi_hdir->fd_bbot = bindex;
+}
+
+static inline void au_set_fvdir_cache(struct file *file,
+				      struct au_vdir *vdir_cache)
+{
+	FiMustWriteLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
+}
+
+static inline struct file *au_hf_top(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_htop.hf_file;
+}
+
+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
+}
+
+/* todo: memory barrier? */
+static inline unsigned int au_figen(struct file *f)
+{
+	return atomic_read(&au_fi(f)->fi_generation);
+}
+
+static inline void au_set_mmapped(struct file *f)
+{
+	if (atomic_inc_return(&au_fi(f)->fi_mmapped))
+		return;
+	pr_warn("fi_mmapped wrapped around\n");
+	while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
+		;
+}
+
+static inline void au_unset_mmapped(struct file *f)
+{
+	atomic_dec(&au_fi(f)->fi_mmapped);
+}
+
+static inline int au_test_mmapped(struct file *f)
+{
+	return atomic_read(&au_fi(f)->fi_mmapped);
+}
+
+/* customize vma->vm_file */
+
+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
+				       struct file *file)
+{
+	struct file *f;
+
+	f = vma->vm_file;
+	get_file(file);
+	vma->vm_file = file;
+	fput(f);
+}
+
+#ifdef CONFIG_MMU
+#define AuDbgVmRegion(file, vma) do {} while (0)
+
+static inline void au_vm_file_reset(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	au_do_vm_file_reset(vma, file);
+}
+#else
+#define AuDbgVmRegion(file, vma) \
+	AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
+
+static inline void au_vm_file_reset(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	struct file *f;
+
+	au_do_vm_file_reset(vma, file);
+	f = vma->vm_region->vm_file;
+	get_file(file);
+	vma->vm_region->vm_file = file;
+	fput(f);
+}
+#endif /* CONFIG_MMU */
+
+/* handle vma->vm_prfile */
+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	get_file(file);
+	vma->vm_prfile = file;
+#ifndef CONFIG_MMU
+	get_file(file);
+	vma->vm_region->vm_prfile = file;
+#endif
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_FILE_H__ */
diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c
new file mode 100644
index 000000000..3144f462a
--- /dev/null
+++ b/fs/aufs/finfo.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file private data
+ */
+
+#include "aufs.h"
+
+void au_hfput(struct au_hfile *hf, int execed)
+{
+	if (execed)
+		allow_write_access(hf->hf_file);
+	fput(hf->hf_file);
+	hf->hf_file = NULL;
+	au_br_put(hf->hf_br);
+	hf->hf_br = NULL;
+}
+
+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
+{
+	struct au_finfo *finfo = au_fi(file);
+	struct au_hfile *hf;
+	struct au_fidir *fidir;
+
+	fidir = finfo->fi_hdir;
+	if (!fidir) {
+		AuDebugOn(finfo->fi_btop != bindex);
+		hf = &finfo->fi_htop;
+	} else
+		hf = fidir->fd_hfile + bindex;
+
+	if (hf && hf->hf_file)
+		au_hfput(hf, vfsub_file_execed(file));
+	if (val) {
+		FiMustWriteLock(file);
+		AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
+		hf->hf_file = val;
+		hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
+	}
+}
+
+void au_update_figen(struct file *file)
+{
+	atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
+	/* smp_mb(); */ /* atomic_set */
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_fidir *au_fidir_alloc(struct super_block *sb)
+{
+	struct au_fidir *fidir;
+	int nbr;
+
+	nbr = au_sbbot(sb) + 1;
+	if (nbr < 2)
+		nbr = 2; /* initial allocate for 2 branches */
+	fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
+	if (fidir) {
+		fidir->fd_bbot = -1;
+		fidir->fd_nent = nbr;
+	}
+
+	return fidir;
+}
+
+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
+{
+	int err;
+	struct au_fidir *fidir, *p;
+
+	AuRwMustWriteLock(&finfo->fi_rwsem);
+	fidir = finfo->fi_hdir;
+	AuDebugOn(!fidir);
+
+	err = -ENOMEM;
+	p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
+			 GFP_NOFS, may_shrink);
+	if (p) {
+		p->fd_nent = nbr;
+		finfo->fi_hdir = p;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_finfo_fin(struct file *file)
+{
+	struct au_finfo *finfo;
+
+	au_nfiles_dec(file->f_path.dentry->d_sb);
+
+	finfo = au_fi(file);
+	AuDebugOn(finfo->fi_hdir);
+	AuRwDestroy(&finfo->fi_rwsem);
+	au_cache_free_finfo(finfo);
+}
+
+void au_fi_init_once(void *_finfo)
+{
+	struct au_finfo *finfo = _finfo;
+
+	au_rw_init(&finfo->fi_rwsem);
+}
+
+int au_finfo_init(struct file *file, struct au_fidir *fidir)
+{
+	int err;
+	struct au_finfo *finfo;
+	struct dentry *dentry;
+
+	err = -ENOMEM;
+	dentry = file->f_path.dentry;
+	finfo = au_cache_alloc_finfo();
+	if (unlikely(!finfo))
+		goto out;
+
+	err = 0;
+	au_nfiles_inc(dentry->d_sb);
+	au_rw_write_lock(&finfo->fi_rwsem);
+	finfo->fi_btop = -1;
+	finfo->fi_hdir = fidir;
+	atomic_set(&finfo->fi_generation, au_digen(dentry));
+	/* smp_mb(); */ /* atomic_set */
+
+	file->private_data = finfo;
+
+out:
+	return err;
+}
diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h
new file mode 100644
index 000000000..7562506dc
--- /dev/null
+++ b/fs/aufs/fstype.h
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * judging filesystem type
+ */
+
+#ifndef __AUFS_FSTYPE_H__
+#define __AUFS_FSTYPE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <linux/nfs_fs.h>
+#include <linux/romfs_fs.h>
+
+static inline int au_test_aufs(struct super_block *sb)
+{
+	return sb->s_magic == AUFS_SUPER_MAGIC;
+}
+
+static inline const char *au_sbtype(struct super_block *sb)
+{
+	return sb->s_type->name;
+}
+
+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ISO9660_FS)
+	return sb->s_magic == ISOFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ROMFS_FS)
+	return sb->s_magic == ROMFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_CRAMFS)
+	return sb->s_magic == CRAMFS_MAGIC;
+#endif
+	return 0;
+}
+
+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_NFS_FS)
+	return sb->s_magic == NFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_FUSE_FS)
+	return sb->s_magic == FUSE_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_XFS_FS)
+	return sb->s_magic == XFS_SB_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_TMPFS
+	return sb->s_magic == TMPFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ECRYPT_FS)
+	return !strcmp(au_sbtype(sb), "ecryptfs");
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_ramfs(struct super_block *sb)
+{
+	return sb->s_magic == RAMFS_MAGIC;
+}
+
+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_UBIFS_FS)
+	return sb->s_magic == UBIFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_PROC_FS
+	return sb->s_magic == PROC_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_SYSFS
+	return sb->s_magic == SYSFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
+	return sb->s_magic == CONFIGFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_minix(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_MINIX_FS)
+	return sb->s_magic == MINIX3_SUPER_MAGIC
+		|| sb->s_magic == MINIX2_SUPER_MAGIC
+		|| sb->s_magic == MINIX2_SUPER_MAGIC2
+		|| sb->s_magic == MINIX_SUPER_MAGIC
+		|| sb->s_magic == MINIX_SUPER_MAGIC2;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_fat(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_FAT_FS)
+	return sb->s_magic == MSDOS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_msdos(struct super_block *sb)
+{
+	return au_test_fat(sb);
+}
+
+static inline int au_test_vfat(struct super_block *sb)
+{
+	return au_test_fat(sb);
+}
+
+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_SECURITYFS
+	return sb->s_magic == SECURITYFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_SQUASHFS)
+	return sb->s_magic == SQUASHFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_BTRFS_FS)
+	return sb->s_magic == BTRFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_XENFS)
+	return sb->s_magic == XENFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_DEBUG_FS
+	return sb->s_magic == DEBUGFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_NILFS)
+	return sb->s_magic == NILFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
+	return sb->s_magic == HFSPLUS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * they can't be an aufs branch.
+ */
+static inline int au_test_fs_unsuppoted(struct super_block *sb)
+{
+	return
+#ifndef CONFIG_AUFS_BR_RAMFS
+		au_test_ramfs(sb) ||
+#endif
+		au_test_procfs(sb)
+		|| au_test_sysfs(sb)
+		|| au_test_configfs(sb)
+		|| au_test_debugfs(sb)
+		|| au_test_securityfs(sb)
+		|| au_test_xenfs(sb)
+		|| au_test_ecryptfs(sb)
+		/* || !strcmp(au_sbtype(sb), "unionfs") */
+		|| au_test_aufs(sb); /* will be supported in next version */
+}
+
+static inline int au_test_fs_remote(struct super_block *sb)
+{
+	return !au_test_tmpfs(sb)
+#ifdef CONFIG_AUFS_BR_RAMFS
+		&& !au_test_ramfs(sb)
+#endif
+		&& !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * Note: these functions (below) are created after reading ->getattr() in all
+ * filesystems under linux/fs. it means we have to do so in every update...
+ */
+
+/*
+ * some filesystems require getattr to refresh the inode attributes before
+ * referencing.
+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
+ * and leave the work for d_revalidate()
+ */
+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
+{
+	return au_test_nfs(sb)
+		|| au_test_fuse(sb)
+		/* || au_test_btrfs(sb) */	/* untested */
+		;
+}
+
+/*
+ * filesystems which don't maintain i_size or i_blocks.
+ */
+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
+{
+	return au_test_xfs(sb)
+		|| au_test_btrfs(sb)
+		|| au_test_ubifs(sb)
+		|| au_test_hfsplus(sb)	/* maintained, but incorrect */
+		/* || au_test_minix(sb) */	/* untested */
+		;
+}
+
+/*
+ * filesystems which don't store the correct value in some of their inode
+ * attributes.
+ */
+static inline int au_test_fs_bad_iattr(struct super_block *sb)
+{
+	return au_test_fs_bad_iattr_size(sb)
+		|| au_test_fat(sb)
+		|| au_test_msdos(sb)
+		|| au_test_vfat(sb);
+}
+
+/* they don't check i_nlink in link(2) */
+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
+{
+	return au_test_tmpfs(sb)
+#ifdef CONFIG_AUFS_BR_RAMFS
+		|| au_test_ramfs(sb)
+#endif
+		|| au_test_ubifs(sb)
+		|| au_test_hfsplus(sb);
+}
+
+/*
+ * filesystems which sets S_NOATIME and S_NOCMTIME.
+ */
+static inline int au_test_fs_notime(struct super_block *sb)
+{
+	return au_test_nfs(sb)
+		|| au_test_fuse(sb)
+		|| au_test_ubifs(sb)
+		;
+}
+
+/* temporary support for i#1 in cramfs */
+static inline int au_test_fs_unique_ino(struct inode *inode)
+{
+	if (au_test_cramfs(inode->i_sb))
+		return inode->i_ino != 1;
+	return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * the filesystem where the xino files placed must support i/o after unlink and
+ * maintain i_size and i_blocks.
+ */
+static inline int au_test_fs_bad_xino(struct super_block *sb)
+{
+	return au_test_fs_remote(sb)
+		|| au_test_fs_bad_iattr_size(sb)
+		/* don't want unnecessary work for xino */
+		|| au_test_aufs(sb)
+		|| au_test_ecryptfs(sb)
+		|| au_test_nilfs(sb);
+}
+
+static inline int au_test_fs_trunc_xino(struct super_block *sb)
+{
+	return au_test_tmpfs(sb)
+		|| au_test_ramfs(sb);
+}
+
+/*
+ * test if the @sb is real-readonly.
+ */
+static inline int au_test_fs_rr(struct super_block *sb)
+{
+	return au_test_squashfs(sb)
+		|| au_test_iso9660(sb)
+		|| au_test_cramfs(sb)
+		|| au_test_romfs(sb);
+}
+
+/*
+ * test if the @inode is nfs with 'noacl' option
+ * NFS always sets SB_POSIXACL regardless its mount option 'noacl.'
+ */
+static inline int au_test_nfs_noacl(struct inode *inode)
+{
+	return au_test_nfs(inode->i_sb)
+		/* && IS_POSIXACL(inode) */
+		&& !nfs_server_capable(inode, NFS_CAP_ACLS);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_FSTYPE_H__ */
diff --git a/fs/aufs/hbl.h b/fs/aufs/hbl.h
new file mode 100644
index 000000000..328586e37
--- /dev/null
+++ b/fs/aufs/hbl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * helpers for hlist_bl.h
+ */
+
+#ifndef __AUFS_HBL_H__
+#define __AUFS_HBL_H__
+
+#ifdef __KERNEL__
+
+#include <linux/list_bl.h>
+
+static inline void au_hbl_add(struct hlist_bl_node *node,
+			      struct hlist_bl_head *hbl)
+{
+	hlist_bl_lock(hbl);
+	hlist_bl_add_head(node, hbl);
+	hlist_bl_unlock(hbl);
+}
+
+static inline void au_hbl_del(struct hlist_bl_node *node,
+			      struct hlist_bl_head *hbl)
+{
+	hlist_bl_lock(hbl);
+	hlist_bl_del(node);
+	hlist_bl_unlock(hbl);
+}
+
+#define au_hbl_for_each(pos, head)					\
+	for (pos = hlist_bl_first(head);				\
+	     pos;							\
+	     pos = pos->next)
+
+static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
+{
+	unsigned long cnt;
+	struct hlist_bl_node *pos;
+
+	cnt = 0;
+	hlist_bl_lock(hbl);
+	au_hbl_for_each(pos, hbl)
+		cnt++;
+	hlist_bl_unlock(hbl);
+	return cnt;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_HBL_H__ */
diff --git a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c
new file mode 100644
index 000000000..edd7f6385
--- /dev/null
+++ b/fs/aufs/hfsnotify.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * fsnotify for the lower directories
+ */
+
+#include "aufs.h"
+
+/* FS_IN_IGNORED is unnecessary */
+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
+				 | FS_CREATE | FS_EVENT_ON_CHILD);
+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
+
+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
+{
+	struct au_hnotify *hn = container_of(mark, struct au_hnotify,
+					     hn_mark);
+	/* AuDbg("here\n"); */
+	au_cache_free_hnotify(hn);
+	smp_mb__before_atomic(); /* for atomic64_dec */
+	if (atomic64_dec_and_test(&au_hfsn_ifree))
+		wake_up(&au_hfsn_wq);
+}
+
+static int au_hfsn_alloc(struct au_hinode *hinode)
+{
+	int err;
+	struct au_hnotify *hn;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct fsnotify_mark *mark;
+	aufs_bindex_t bindex;
+
+	hn = hinode->hi_notify;
+	sb = hn->hn_aufs_inode->i_sb;
+	bindex = au_br_index(sb, hinode->hi_id);
+	br = au_sbr(sb, bindex);
+	AuDebugOn(!br->br_hfsn);
+
+	mark = &hn->hn_mark;
+	fsnotify_init_mark(mark, br->br_hfsn->hfsn_group);
+	mark->mask = AuHfsnMask;
+	/*
+	 * by udba rename or rmdir, aufs assign a new inode to the known
+	 * h_inode, so specify 1 to allow dups.
+	 */
+	lockdep_off();
+	err = fsnotify_add_mark(mark, hinode->hi_inode, /*mnt*/NULL,
+				/*allow_dups*/1);
+	lockdep_on();
+
+	return err;
+}
+
+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
+{
+	struct fsnotify_mark *mark;
+	unsigned long long ull;
+	struct fsnotify_group *group;
+
+	ull = atomic64_inc_return(&au_hfsn_ifree);
+	BUG_ON(!ull);
+
+	mark = &hn->hn_mark;
+	spin_lock(&mark->lock);
+	group = mark->group;
+	fsnotify_get_group(group);
+	spin_unlock(&mark->lock);
+	lockdep_off();
+	fsnotify_destroy_mark(mark, group);
+	fsnotify_put_mark(mark);
+	fsnotify_put_group(group);
+	lockdep_on();
+
+	/* free hn by myself */
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
+{
+	struct fsnotify_mark *mark;
+
+	mark = &hinode->hi_notify->hn_mark;
+	spin_lock(&mark->lock);
+	if (do_set) {
+		AuDebugOn(mark->mask & AuHfsnMask);
+		mark->mask |= AuHfsnMask;
+	} else {
+		AuDebugOn(!(mark->mask & AuHfsnMask));
+		mark->mask &= ~AuHfsnMask;
+	}
+	spin_unlock(&mark->lock);
+	/* fsnotify_recalc_inode_mask(hinode->hi_inode); */
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* #define AuDbgHnotify */
+#ifdef AuDbgHnotify
+static char *au_hfsn_name(u32 mask)
+{
+#ifdef CONFIG_AUFS_DEBUG
+#define test_ret(flag)				\
+	do {					\
+		if (mask & flag)		\
+			return #flag;		\
+	} while (0)
+	test_ret(FS_ACCESS);
+	test_ret(FS_MODIFY);
+	test_ret(FS_ATTRIB);
+	test_ret(FS_CLOSE_WRITE);
+	test_ret(FS_CLOSE_NOWRITE);
+	test_ret(FS_OPEN);
+	test_ret(FS_MOVED_FROM);
+	test_ret(FS_MOVED_TO);
+	test_ret(FS_CREATE);
+	test_ret(FS_DELETE);
+	test_ret(FS_DELETE_SELF);
+	test_ret(FS_MOVE_SELF);
+	test_ret(FS_UNMOUNT);
+	test_ret(FS_Q_OVERFLOW);
+	test_ret(FS_IN_IGNORED);
+	test_ret(FS_ISDIR);
+	test_ret(FS_IN_ONESHOT);
+	test_ret(FS_EVENT_ON_CHILD);
+	return "";
+#undef test_ret
+#else
+	return "??";
+#endif
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_free_group(struct fsnotify_group *group)
+{
+	struct au_br_hfsnotify *hfsn = group->private;
+
+	/* AuDbg("here\n"); */
+	kfree(hfsn);
+}
+
+static int au_hfsn_handle_event(struct fsnotify_group *group,
+				struct inode *inode,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
+				u32 mask, const void *data, int data_type,
+				const unsigned char *file_name, u32 cookie,
+				struct fsnotify_iter_info *iter_info)
+{
+	int err;
+	struct au_hnotify *hnotify;
+	struct inode *h_dir, *h_inode;
+	struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
+
+	AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
+
+	err = 0;
+	/* if FS_UNMOUNT happens, there must be another bug */
+	AuDebugOn(mask & FS_UNMOUNT);
+	if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
+		goto out;
+
+	h_dir = inode;
+	h_inode = NULL;
+#ifdef AuDbgHnotify
+	au_debug_on();
+	if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
+	    || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
+		AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
+		      h_dir->i_ino, mask, au_hfsn_name(mask),
+		      AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
+		/* WARN_ON(1); */
+	}
+	au_debug_off();
+#endif
+
+	AuDebugOn(!inode_mark);
+	hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
+	err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
+
+out:
+	return err;
+}
+
+static struct fsnotify_ops au_hfsn_ops = {
+	.handle_event		= au_hfsn_handle_event,
+	.free_group_priv	= au_hfsn_free_group,
+	.free_mark		= au_hfsn_free_mark
+};
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_fin_br(struct au_branch *br)
+{
+	struct au_br_hfsnotify *hfsn;
+
+	hfsn = br->br_hfsn;
+	if (hfsn) {
+		lockdep_off();
+		fsnotify_put_group(hfsn->hfsn_group);
+		lockdep_on();
+	}
+}
+
+static int au_hfsn_init_br(struct au_branch *br, int perm)
+{
+	int err;
+	struct fsnotify_group *group;
+	struct au_br_hfsnotify *hfsn;
+
+	err = 0;
+	br->br_hfsn = NULL;
+	if (!au_br_hnotifyable(perm))
+		goto out;
+
+	err = -ENOMEM;
+	hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
+	if (unlikely(!hfsn))
+		goto out;
+
+	err = 0;
+	group = fsnotify_alloc_group(&au_hfsn_ops);
+	if (IS_ERR(group)) {
+		err = PTR_ERR(group);
+		pr_err("fsnotify_alloc_group() failed, %d\n", err);
+		goto out_hfsn;
+	}
+
+	group->private = hfsn;
+	hfsn->hfsn_group = group;
+	br->br_hfsn = hfsn;
+	goto out; /* success */
+
+out_hfsn:
+	kfree(hfsn);
+out:
+	return err;
+}
+
+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
+{
+	int err;
+
+	err = 0;
+	if (!br->br_hfsn)
+		err = au_hfsn_init_br(br, perm);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_fin(void)
+{
+	AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
+	wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
+}
+
+const struct au_hnotify_op au_hnotify_op = {
+	.ctl		= au_hfsn_ctl,
+	.alloc		= au_hfsn_alloc,
+	.free		= au_hfsn_free,
+
+	.fin		= au_hfsn_fin,
+
+	.reset_br	= au_hfsn_reset_br,
+	.fin_br		= au_hfsn_fin_br,
+	.init_br	= au_hfsn_init_br
+};
diff --git a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c
new file mode 100644
index 000000000..f4ed51678
--- /dev/null
+++ b/fs/aufs/hfsplus.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * special support for filesystems which aqucires an inode mutex
+ * at final closing a file, eg, hfsplus.
+ *
+ * This trick is very simple and stupid, just to open the file before really
+ * neceeary open to tell hfsplus that this is not the final closing.
+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
+ * and au_h_open_post() after releasing it.
+ */
+
+#include "aufs.h"
+
+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
+			   int force_wr)
+{
+	struct file *h_file;
+	struct dentry *h_dentry;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	AuDebugOn(!h_dentry);
+	AuDebugOn(d_is_negative(h_dentry));
+
+	h_file = NULL;
+	if (au_test_hfsplus(h_dentry->d_sb)
+	    && d_is_reg(h_dentry))
+		h_file = au_h_open(dentry, bindex,
+				   O_RDONLY | O_NOATIME | O_LARGEFILE,
+				   /*file*/NULL, force_wr);
+	return h_file;
+}
+
+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
+		    struct file *h_file)
+{
+	if (h_file) {
+		fput(h_file);
+		au_sbr_put(dentry->d_sb, bindex);
+	}
+}
diff --git a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c
new file mode 100644
index 000000000..2ea836223
--- /dev/null
+++ b/fs/aufs/hnotify.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * abstraction to notify the direct changes on lower directories
+ */
+
+#include "aufs.h"
+
+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
+{
+	int err;
+	struct au_hnotify *hn;
+
+	err = -ENOMEM;
+	hn = au_cache_alloc_hnotify();
+	if (hn) {
+		hn->hn_aufs_inode = inode;
+		hinode->hi_notify = hn;
+		err = au_hnotify_op.alloc(hinode);
+		AuTraceErr(err);
+		if (unlikely(err)) {
+			hinode->hi_notify = NULL;
+			au_cache_free_hnotify(hn);
+			/*
+			 * The upper dir was removed by udba, but the same named
+			 * dir left. In this case, aufs assignes a new inode
+			 * number and set the monitor again.
+			 * For the lower dir, the old monitnor is still left.
+			 */
+			if (err == -EEXIST)
+				err = 0;
+		}
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+void au_hn_free(struct au_hinode *hinode)
+{
+	struct au_hnotify *hn;
+
+	hn = hinode->hi_notify;
+	if (hn) {
+		hinode->hi_notify = NULL;
+		if (au_hnotify_op.free(hinode, hn))
+			au_cache_free_hnotify(hn);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_hn_ctl(struct au_hinode *hinode, int do_set)
+{
+	if (hinode->hi_notify)
+		au_hnotify_op.ctl(hinode, do_set);
+}
+
+void au_hn_reset(struct inode *inode, unsigned int flags)
+{
+	aufs_bindex_t bindex, bbot;
+	struct inode *hi;
+	struct dentry *iwhdentry;
+
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
+		hi = au_h_iptr(inode, bindex);
+		if (!hi)
+			continue;
+
+		/* inode_lock_nested(hi, AuLsc_I_CHILD); */
+		iwhdentry = au_hi_wh(inode, bindex);
+		if (iwhdentry)
+			dget(iwhdentry);
+		au_igrab(hi);
+		au_set_h_iptr(inode, bindex, NULL, 0);
+		au_set_h_iptr(inode, bindex, au_igrab(hi),
+			      flags & ~AuHi_XINO);
+		iput(hi);
+		dput(iwhdentry);
+		/* inode_unlock(hi); */
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int hn_xino(struct inode *inode, struct inode *h_inode)
+{
+	int err;
+	aufs_bindex_t bindex, bbot, bfound, btop;
+	struct inode *h_i;
+
+	err = 0;
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("branch root dir was changed\n");
+		goto out;
+	}
+
+	bfound = -1;
+	bbot = au_ibbot(inode);
+	btop = au_ibtop(inode);
+#if 0 /* reserved for future use */
+	if (bindex == bbot) {
+		/* keep this ino in rename case */
+		goto out;
+	}
+#endif
+	for (bindex = btop; bindex <= bbot; bindex++)
+		if (au_h_iptr(inode, bindex) == h_inode) {
+			bfound = bindex;
+			break;
+		}
+	if (bfound < 0)
+		goto out;
+
+	for (bindex = btop; bindex <= bbot; bindex++) {
+		h_i = au_h_iptr(inode, bindex);
+		if (!h_i)
+			continue;
+
+		err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
+		/* ignore this error */
+		/* bad action? */
+	}
+
+	/* children inode number will be broken */
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int hn_gen_tree(struct dentry *dentry)
+{
+	int err, i, j, ndentry;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			struct dentry *d;
+
+			d = dentries[j];
+			if (IS_ROOT(d))
+				continue;
+
+			au_digen_dec(d);
+			if (d_really_is_positive(d))
+				/* todo: reset children xino?
+				   cached children only? */
+				au_iigen_dec(d_inode(d));
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+
+#if 0
+	/* discard children */
+	dentry_unhash(dentry);
+	dput(dentry);
+#endif
+out:
+	return err;
+}
+
+/*
+ * return 0 if processed.
+ */
+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
+			   const unsigned int isdir)
+{
+	int err;
+	struct dentry *d;
+	struct qstr *dname;
+
+	err = 1;
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("branch root dir was changed\n");
+		err = 0;
+		goto out;
+	}
+
+	if (!isdir) {
+		AuDebugOn(!name);
+		au_iigen_dec(inode);
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
+			spin_lock(&d->d_lock);
+			dname = &d->d_name;
+			if (dname->len != nlen
+			    && memcmp(dname->name, name, nlen)) {
+				spin_unlock(&d->d_lock);
+				continue;
+			}
+			err = 0;
+			au_digen_dec(d);
+			spin_unlock(&d->d_lock);
+			break;
+		}
+		spin_unlock(&inode->i_lock);
+	} else {
+		au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
+		d = d_find_any_alias(inode);
+		if (!d) {
+			au_iigen_dec(inode);
+			goto out;
+		}
+
+		spin_lock(&d->d_lock);
+		dname = &d->d_name;
+		if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
+			spin_unlock(&d->d_lock);
+			err = hn_gen_tree(d);
+			spin_lock(&d->d_lock);
+		}
+		spin_unlock(&d->d_lock);
+		dput(d);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
+{
+	int err;
+
+	if (IS_ROOT(dentry)) {
+		pr_warn("branch root dir was changed\n");
+		return 0;
+	}
+
+	err = 0;
+	if (!isdir) {
+		au_digen_dec(dentry);
+		if (d_really_is_positive(dentry))
+			au_iigen_dec(d_inode(dentry));
+	} else {
+		au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
+		if (d_really_is_positive(dentry))
+			err = hn_gen_tree(dentry);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* hnotify job flags */
+#define AuHnJob_XINO0		1
+#define AuHnJob_GEN		(1 << 1)
+#define AuHnJob_DIRENT		(1 << 2)
+#define AuHnJob_ISDIR		(1 << 3)
+#define AuHnJob_TRYXINO0	(1 << 4)
+#define AuHnJob_MNTPNT		(1 << 5)
+#define au_ftest_hnjob(flags, name)	((flags) & AuHnJob_##name)
+#define au_fset_hnjob(flags, name) \
+	do { (flags) |= AuHnJob_##name; } while (0)
+#define au_fclr_hnjob(flags, name) \
+	do { (flags) &= ~AuHnJob_##name; } while (0)
+
+enum {
+	AuHn_CHILD,
+	AuHn_PARENT,
+	AuHnLast
+};
+
+struct au_hnotify_args {
+	struct inode *h_dir, *dir, *h_child_inode;
+	u32 mask;
+	unsigned int flags[AuHnLast];
+	unsigned int h_child_nlen;
+	char h_child_name[];
+};
+
+struct hn_job_args {
+	unsigned int flags;
+	struct inode *inode, *h_inode, *dir, *h_dir;
+	struct dentry *dentry;
+	char *h_name;
+	int h_nlen;
+};
+
+static int hn_job(struct hn_job_args *a)
+{
+	const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
+	int e;
+
+	/* reset xino */
+	if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
+		hn_xino(a->inode, a->h_inode); /* ignore this error */
+
+	if (au_ftest_hnjob(a->flags, TRYXINO0)
+	    && a->inode
+	    && a->h_inode) {
+		inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
+		if (!a->h_inode->i_nlink
+		    && !(a->h_inode->i_state & I_LINKABLE))
+			hn_xino(a->inode, a->h_inode); /* ignore this error */
+		inode_unlock_shared(a->h_inode);
+	}
+
+	/* make the generation obsolete */
+	if (au_ftest_hnjob(a->flags, GEN)) {
+		e = -1;
+		if (a->inode)
+			e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
+					      isdir);
+		if (e && a->dentry)
+			hn_gen_by_name(a->dentry, isdir);
+		/* ignore this error */
+	}
+
+	/* make dir entries obsolete */
+	if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
+		struct au_vdir *vdir;
+
+		vdir = au_ivdir(a->inode);
+		if (vdir)
+			vdir->vd_jiffy = 0;
+		/* IMustLock(a->inode); */
+		/* inode_inc_iversion(a->inode); */
+	}
+
+	/* can do nothing but warn */
+	if (au_ftest_hnjob(a->flags, MNTPNT)
+	    && a->dentry
+	    && d_mountpoint(a->dentry))
+		pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
+					   struct inode *dir)
+{
+	struct dentry *dentry, *d, *parent;
+	struct qstr *dname;
+
+	parent = d_find_any_alias(dir);
+	if (!parent)
+		return NULL;
+
+	dentry = NULL;
+	spin_lock(&parent->d_lock);
+	list_for_each_entry(d, &parent->d_subdirs, d_child) {
+		/* AuDbg("%pd\n", d); */
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+		dname = &d->d_name;
+		if (dname->len != nlen || memcmp(dname->name, name, nlen))
+			goto cont_unlock;
+		if (au_di(d))
+			au_digen_dec(d);
+		else
+			goto cont_unlock;
+		if (au_dcount(d) > 0) {
+			dentry = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			break;
+		}
+
+cont_unlock:
+		spin_unlock(&d->d_lock);
+	}
+	spin_unlock(&parent->d_lock);
+	dput(parent);
+
+	if (dentry)
+		di_write_lock_child(dentry);
+
+	return dentry;
+}
+
+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
+					 aufs_bindex_t bindex, ino_t h_ino)
+{
+	struct inode *inode;
+	ino_t ino;
+	int err;
+
+	inode = NULL;
+	err = au_xino_read(sb, bindex, h_ino, &ino);
+	if (!err && ino)
+		inode = ilookup(sb, ino);
+	if (!inode)
+		goto out;
+
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("wrong root branch\n");
+		iput(inode);
+		inode = NULL;
+		goto out;
+	}
+
+	ii_write_lock_child(inode);
+
+out:
+	return inode;
+}
+
+static void au_hn_bh(void *_args)
+{
+	struct au_hnotify_args *a = _args;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot, bfound;
+	unsigned char xino, try_iput;
+	int err;
+	struct inode *inode;
+	ino_t h_ino;
+	struct hn_job_args args;
+	struct dentry *dentry;
+	struct au_sbinfo *sbinfo;
+
+	AuDebugOn(!_args);
+	AuDebugOn(!a->h_dir);
+	AuDebugOn(!a->dir);
+	AuDebugOn(!a->mask);
+	AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
+	      a->mask, a->dir->i_ino, a->h_dir->i_ino,
+	      a->h_child_inode ? a->h_child_inode->i_ino : 0);
+
+	inode = NULL;
+	dentry = NULL;
+	/*
+	 * do not lock a->dir->i_mutex here
+	 * because of d_revalidate() may cause a deadlock.
+	 */
+	sb = a->dir->i_sb;
+	AuDebugOn(!sb);
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!sbinfo);
+	si_write_lock(sb, AuLock_NOPLMW);
+
+	if (au_opt_test(sbinfo->si_mntflags, DIRREN))
+		switch (a->mask & FS_EVENTS_POSS_ON_CHILD) {
+		case FS_MOVED_FROM:
+		case FS_MOVED_TO:
+			AuWarn1("DIRREN with UDBA may not work correctly "
+				"for the direct rename(2)\n");
+		}
+
+	ii_read_lock_parent(a->dir);
+	bfound = -1;
+	bbot = au_ibbot(a->dir);
+	for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
+		if (au_h_iptr(a->dir, bindex) == a->h_dir) {
+			bfound = bindex;
+			break;
+		}
+	ii_read_unlock(a->dir);
+	if (unlikely(bfound < 0))
+		goto out;
+
+	xino = !!au_opt_test(au_mntflags(sb), XINO);
+	h_ino = 0;
+	if (a->h_child_inode)
+		h_ino = a->h_child_inode->i_ino;
+
+	if (a->h_child_nlen
+	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
+		dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
+					      a->dir);
+	try_iput = 0;
+	if (dentry && d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (xino && !inode && h_ino
+	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
+		inode = lookup_wlock_by_ino(sb, bfound, h_ino);
+		try_iput = 1;
+	}
+
+	args.flags = a->flags[AuHn_CHILD];
+	args.dentry = dentry;
+	args.inode = inode;
+	args.h_inode = a->h_child_inode;
+	args.dir = a->dir;
+	args.h_dir = a->h_dir;
+	args.h_name = a->h_child_name;
+	args.h_nlen = a->h_child_nlen;
+	err = hn_job(&args);
+	if (dentry) {
+		if (au_di(dentry))
+			di_write_unlock(dentry);
+		dput(dentry);
+	}
+	if (inode && try_iput) {
+		ii_write_unlock(inode);
+		iput(inode);
+	}
+
+	ii_write_lock_parent(a->dir);
+	args.flags = a->flags[AuHn_PARENT];
+	args.dentry = NULL;
+	args.inode = a->dir;
+	args.h_inode = a->h_dir;
+	args.dir = NULL;
+	args.h_dir = NULL;
+	args.h_name = NULL;
+	args.h_nlen = 0;
+	err = hn_job(&args);
+	ii_write_unlock(a->dir);
+
+out:
+	iput(a->h_child_inode);
+	iput(a->h_dir);
+	iput(a->dir);
+	si_write_unlock(sb);
+	au_nwt_done(&sbinfo->si_nowait);
+	kfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
+	       struct qstr *h_child_qstr, struct inode *h_child_inode)
+{
+	int err, len;
+	unsigned int flags[AuHnLast], f;
+	unsigned char isdir, isroot, wh;
+	struct inode *dir;
+	struct au_hnotify_args *args;
+	char *p, *h_child_name;
+
+	err = 0;
+	AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
+	dir = igrab(hnotify->hn_aufs_inode);
+	if (!dir)
+		goto out;
+
+	isroot = (dir->i_ino == AUFS_ROOT_INO);
+	wh = 0;
+	h_child_name = (void *)h_child_qstr->name;
+	len = h_child_qstr->len;
+	if (h_child_name) {
+		if (len > AUFS_WH_PFX_LEN
+		    && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+			h_child_name += AUFS_WH_PFX_LEN;
+			len -= AUFS_WH_PFX_LEN;
+			wh = 1;
+		}
+	}
+
+	isdir = 0;
+	if (h_child_inode)
+		isdir = !!S_ISDIR(h_child_inode->i_mode);
+	flags[AuHn_PARENT] = AuHnJob_ISDIR;
+	flags[AuHn_CHILD] = 0;
+	if (isdir)
+		flags[AuHn_CHILD] = AuHnJob_ISDIR;
+	au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
+	au_fset_hnjob(flags[AuHn_CHILD], GEN);
+	switch (mask & FS_EVENTS_POSS_ON_CHILD) {
+	case FS_MOVED_FROM:
+	case FS_MOVED_TO:
+		au_fset_hnjob(flags[AuHn_CHILD], XINO0);
+		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
+		/*FALLTHROUGH*/
+	case FS_CREATE:
+		AuDebugOn(!h_child_name);
+		break;
+
+	case FS_DELETE:
+		/*
+		 * aufs never be able to get this child inode.
+		 * revalidation should be in d_revalidate()
+		 * by checking i_nlink, i_generation or d_unhashed().
+		 */
+		AuDebugOn(!h_child_name);
+		au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
+		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
+		break;
+
+	default:
+		AuDebugOn(1);
+	}
+
+	if (wh)
+		h_child_inode = NULL;
+
+	err = -ENOMEM;
+	/* iput() and kfree() will be called in au_hnotify() */
+	args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
+	if (unlikely(!args)) {
+		AuErr1("no memory\n");
+		iput(dir);
+		goto out;
+	}
+	args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
+	args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
+	args->mask = mask;
+	args->dir = dir;
+	args->h_dir = igrab(h_dir);
+	if (h_child_inode)
+		h_child_inode = igrab(h_child_inode); /* can be NULL */
+	args->h_child_inode = h_child_inode;
+	args->h_child_nlen = len;
+	if (len) {
+		p = (void *)args;
+		p += sizeof(*args);
+		memcpy(p, h_child_name, len);
+		p[len] = 0;
+	}
+
+	/* NFS fires the event for silly-renamed one from kworker */
+	f = 0;
+	if (!dir->i_nlink
+	    || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
+		f = AuWkq_NEST;
+	err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
+	if (unlikely(err)) {
+		pr_err("wkq %d\n", err);
+		iput(args->h_child_inode);
+		iput(args->h_dir);
+		iput(args->dir);
+		kfree(args);
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
+{
+	int err;
+
+	AuDebugOn(!(udba & AuOptMask_UDBA));
+
+	err = 0;
+	if (au_hnotify_op.reset_br)
+		err = au_hnotify_op.reset_br(udba, br, perm);
+
+	return err;
+}
+
+int au_hnotify_init_br(struct au_branch *br, int perm)
+{
+	int err;
+
+	err = 0;
+	if (au_hnotify_op.init_br)
+		err = au_hnotify_op.init_br(br, perm);
+
+	return err;
+}
+
+void au_hnotify_fin_br(struct au_branch *br)
+{
+	if (au_hnotify_op.fin_br)
+		au_hnotify_op.fin_br(br);
+}
+
+static void au_hn_destroy_cache(void)
+{
+	kmem_cache_destroy(au_cache[AuCache_HNOTIFY]);
+	au_cache[AuCache_HNOTIFY] = NULL;
+}
+
+int __init au_hnotify_init(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	au_cache[AuCache_HNOTIFY] = AuCache(au_hnotify);
+	if (au_cache[AuCache_HNOTIFY]) {
+		err = 0;
+		if (au_hnotify_op.init)
+			err = au_hnotify_op.init();
+		if (unlikely(err))
+			au_hn_destroy_cache();
+	}
+	AuTraceErr(err);
+	return err;
+}
+
+void au_hnotify_fin(void)
+{
+	if (au_hnotify_op.fin)
+		au_hnotify_op.fin();
+
+	/* cf. au_cache_fin() */
+	if (au_cache[AuCache_HNOTIFY])
+		au_hn_destroy_cache();
+}
diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c
new file mode 100644
index 000000000..e3a2829ec
--- /dev/null
+++ b/fs/aufs/i_op.c
@@ -0,0 +1,1459 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (except add/del/rename)
+ */
+
+#include <linux/device_cgroup.h>
+#include <linux/fs_stack.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+static int h_permission(struct inode *h_inode, int mask,
+			struct path *h_path, int brperm)
+{
+	int err;
+	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+
+	err = -EPERM;
+	if (write_mask && IS_IMMUTABLE(h_inode))
+		goto out;
+
+	err = -EACCES;
+	if (((mask & MAY_EXEC)
+	     && S_ISREG(h_inode->i_mode)
+	     && (path_noexec(h_path)
+		 || !(h_inode->i_mode & S_IXUGO))))
+		goto out;
+
+	/*
+	 * - skip the lower fs test in the case of write to ro branch.
+	 * - nfs dir permission write check is optimized, but a policy for
+	 *   link/rename requires a real check.
+	 * - nfs always sets SB_POSIXACL regardless its mount option 'noacl.'
+	 *   in this case, generic_permission() returns -EOPNOTSUPP.
+	 */
+	if ((write_mask && !au_br_writable(brperm))
+	    || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
+		&& write_mask && !(mask & MAY_READ))
+	    || !h_inode->i_op->permission) {
+		/* AuLabel(generic_permission); */
+		/* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
+		err = generic_permission(h_inode, mask);
+		if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
+			err = h_inode->i_op->permission(h_inode, mask);
+		AuTraceErr(err);
+	} else {
+		/* AuLabel(h_inode->permission); */
+		err = h_inode->i_op->permission(h_inode, mask);
+		AuTraceErr(err);
+	}
+
+	if (!err)
+		err = devcgroup_inode_permission(h_inode, mask);
+	if (!err)
+		err = security_inode_permission(h_inode, mask);
+
+#if 0
+	if (!err) {
+		/* todo: do we need to call ima_path_check()? */
+		struct path h_path = {
+			.dentry	=
+			.mnt	= h_mnt
+		};
+		err = ima_path_check(&h_path,
+				     mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				     IMA_COUNT_LEAVE);
+	}
+#endif
+
+out:
+	return err;
+}
+
+static int aufs_permission(struct inode *inode, int mask)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	const unsigned char isdir = !!S_ISDIR(inode->i_mode),
+		write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+
+	/* todo: support rcu-walk? */
+	if (mask & MAY_NOT_BLOCK)
+		return -ECHILD;
+
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_read_lock_child(inode);
+#if 0
+	err = au_iigen_test(inode, au_sigen(sb));
+	if (unlikely(err))
+		goto out;
+#endif
+
+	if (!isdir
+	    || write_mask
+	    || au_opt_test(au_mntflags(sb), DIRPERM1)) {
+		err = au_busy_or_stale();
+		h_inode = au_h_iptr(inode, au_ibtop(inode));
+		if (unlikely(!h_inode
+			     || (h_inode->i_mode & S_IFMT)
+			     != (inode->i_mode & S_IFMT)))
+			goto out;
+
+		err = 0;
+		bindex = au_ibtop(inode);
+		br = au_sbr(sb, bindex);
+		err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
+		if (write_mask
+		    && !err
+		    && !special_file(h_inode->i_mode)) {
+			/* test whether the upper writable branch exists */
+			err = -EROFS;
+			for (; bindex >= 0; bindex--)
+				if (!au_br_rdonly(au_sbr(sb, bindex))) {
+					err = 0;
+					break;
+				}
+		}
+		goto out;
+	}
+
+	/* non-write to dir */
+	err = 0;
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (h_inode) {
+			err = au_busy_or_stale();
+			if (unlikely(!S_ISDIR(h_inode->i_mode)))
+				break;
+
+			br = au_sbr(sb, bindex);
+			err = h_permission(h_inode, mask, &br->br_path,
+					   br->br_perm);
+		}
+	}
+
+out:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
+				  unsigned int flags)
+{
+	struct dentry *ret, *parent;
+	struct inode *inode;
+	struct super_block *sb;
+	int err, npositive;
+
+	IMustLock(dir);
+
+	/* todo: support rcu-walk? */
+	ret = ERR_PTR(-ECHILD);
+	if (flags & LOOKUP_RCU)
+		goto out;
+
+	ret = ERR_PTR(-ENAMETOOLONG);
+	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		goto out;
+
+	sb = dir->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	ret = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	err = au_di_init(dentry);
+	ret = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_si;
+
+	inode = NULL;
+	npositive = 0; /* suppress a warning */
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_read_lock_parent(parent, AuLock_IR);
+	err = au_alive_dir(parent);
+	if (!err)
+		err = au_digen_test(parent, au_sigen(sb));
+	if (!err) {
+		/* regardless LOOKUP_CREATE, always ALLOW_NEG */
+		npositive = au_lkup_dentry(dentry, au_dbtop(parent),
+					   AuLkup_ALLOW_NEG);
+		err = npositive;
+	}
+	di_read_unlock(parent, AuLock_IR);
+	ret = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out_unlock;
+
+	if (npositive) {
+		inode = au_new_inode(dentry, /*must_new*/0);
+		if (IS_ERR(inode)) {
+			ret = (void *)inode;
+			inode = NULL;
+			goto out_unlock;
+		}
+	}
+
+	if (inode)
+		atomic_inc(&inode->i_count);
+	ret = d_splice_alias(inode, dentry);
+#if 0
+	if (unlikely(d_need_lookup(dentry))) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+		spin_unlock(&dentry->d_lock);
+	} else
+#endif
+	if (inode) {
+		if (!IS_ERR(ret)) {
+			iput(inode);
+			if (ret && ret != dentry)
+				ii_write_unlock(inode);
+		} else {
+			ii_write_unlock(inode);
+			iput(inode);
+			inode = NULL;
+		}
+	}
+
+out_unlock:
+	di_write_unlock(dentry);
+out_si:
+	si_read_unlock(sb);
+out:
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct aopen_node {
+	struct hlist_bl_node hblist;
+	struct file *file, *h_file;
+};
+
+static int au_do_aopen(struct inode *inode, struct file *file)
+{
+	struct hlist_bl_head *aopen;
+	struct hlist_bl_node *pos;
+	struct aopen_node *node;
+	struct au_do_open_args args = {
+		.aopen	= 1,
+		.open	= au_do_open_nondir
+	};
+
+	aopen = &au_sbi(inode->i_sb)->si_aopen;
+	hlist_bl_lock(aopen);
+	hlist_bl_for_each_entry(node, pos, aopen, hblist)
+		if (node->file == file) {
+			args.h_file = node->h_file;
+			break;
+		}
+	hlist_bl_unlock(aopen);
+	/* AuDebugOn(!args.h_file); */
+
+	return au_do_open(file, &args);
+}
+
+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
+			    struct file *file, unsigned int open_flag,
+			    umode_t create_mode, int *opened)
+{
+	int err, unlocked, h_opened = *opened;
+	unsigned int lkup_flags;
+	struct dentry *parent, *d;
+	struct hlist_bl_head *aopen;
+	struct vfsub_aopen_args args = {
+		.open_flag	= open_flag,
+		.create_mode	= create_mode,
+		.opened		= &h_opened
+	};
+	struct aopen_node aopen_node = {
+		.file	= file
+	};
+
+	IMustLock(dir);
+	AuDbg("open_flag 0%o\n", open_flag);
+	AuDbgDentry(dentry);
+
+	err = 0;
+	if (!au_di(dentry)) {
+		lkup_flags = LOOKUP_OPEN;
+		if (open_flag & O_CREAT)
+			lkup_flags |= LOOKUP_CREATE;
+		d = aufs_lookup(dir, dentry, lkup_flags);
+		if (IS_ERR(d)) {
+			err = PTR_ERR(d);
+			AuTraceErr(err);
+			goto out;
+		} else if (d) {
+			/*
+			 * obsoleted dentry found.
+			 * another error will be returned later.
+			 */
+			d_drop(d);
+			AuDbgDentry(d);
+			dput(d);
+		}
+		AuDbgDentry(dentry);
+	}
+
+	if (d_is_positive(dentry)
+	    || d_unhashed(dentry)
+	    || d_unlinked(dentry)
+	    || !(open_flag & O_CREAT))
+		goto out_no_open;
+
+	unlocked = 0;
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
+	if (unlikely(err))
+		goto out;
+
+	parent = dentry->d_parent;	/* dir is locked */
+	di_write_lock_parent(parent);
+	err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
+	if (unlikely(err))
+		goto out_unlock;
+
+	AuDbgDentry(dentry);
+	if (d_is_positive(dentry))
+		goto out_unlock;
+
+	args.file = get_empty_filp();
+	err = PTR_ERR(args.file);
+	if (IS_ERR(args.file))
+		goto out_unlock;
+
+	args.file->f_flags = file->f_flags;
+	err = au_aopen_or_create(dir, dentry, &args);
+	AuTraceErr(err);
+	AuDbgFile(args.file);
+	if (unlikely(err < 0)) {
+		if (h_opened & FILE_OPENED)
+			fput(args.file);
+		else
+			put_filp(args.file);
+		goto out_unlock;
+	}
+	di_write_unlock(parent);
+	di_write_unlock(dentry);
+	unlocked = 1;
+
+	/* some filesystems don't set FILE_CREATED while succeeded? */
+	*opened |= FILE_CREATED;
+	if (h_opened & FILE_OPENED)
+		aopen_node.h_file = args.file;
+	else {
+		put_filp(args.file);
+		args.file = NULL;
+	}
+	aopen = &au_sbi(dir->i_sb)->si_aopen;
+	au_hbl_add(&aopen_node.hblist, aopen);
+	err = finish_open(file, dentry, au_do_aopen, opened);
+	au_hbl_del(&aopen_node.hblist, aopen);
+	AuTraceErr(err);
+	AuDbgFile(file);
+	if (aopen_node.h_file)
+		fput(aopen_node.h_file);
+
+out_unlock:
+	if (unlocked)
+		si_read_unlock(dentry->d_sb);
+	else {
+		di_write_unlock(parent);
+		aufs_read_unlock(dentry, AuLock_DW);
+	}
+	AuDbgDentry(dentry);
+	if (unlikely(err < 0))
+		goto out;
+out_no_open:
+	if (err >= 0 && !(*opened & FILE_CREATED)) {
+		AuLabel(out_no_open);
+		dget(dentry);
+		err = finish_no_open(file, dentry);
+	}
+out:
+	AuDbg("%pd%s%s\n", dentry,
+	      (*opened & FILE_CREATED) ? " created" : "",
+	      (*opened & FILE_OPENED) ? " opened" : "");
+	AuTraceErr(err);
+	return err;
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
+			  const unsigned char add_entry, aufs_bindex_t bcpup,
+			  aufs_bindex_t btop)
+{
+	int err;
+	struct dentry *h_parent;
+	struct inode *h_dir;
+
+	if (add_entry)
+		IMustLock(d_inode(parent));
+	else
+		di_write_lock_parent(parent);
+
+	err = 0;
+	if (!au_h_dptr(parent, bcpup)) {
+		if (btop > bcpup)
+			err = au_cpup_dirs(dentry, bcpup);
+		else if (btop < bcpup)
+			err = au_cpdown_dirs(dentry, bcpup);
+		else
+			BUG();
+	}
+	if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
+		h_parent = au_h_dptr(parent, bcpup);
+		h_dir = d_inode(h_parent);
+		inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+		err = au_lkup_neg(dentry, bcpup, /*wh*/0);
+		/* todo: no unlock here */
+		inode_unlock_shared(h_dir);
+
+		AuDbg("bcpup %d\n", bcpup);
+		if (!err) {
+			if (d_really_is_negative(dentry))
+				au_set_h_dptr(dentry, btop, NULL);
+			au_update_dbrange(dentry, /*do_put_zero*/0);
+		}
+	}
+
+	if (!add_entry)
+		di_write_unlock(parent);
+	if (!err)
+		err = bcpup; /* success */
+
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * decide the branch and the parent dir where we will create a new entry.
+ * returns new bindex or an error.
+ * copyup the parent dir if needed.
+ */
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args)
+{
+	int err;
+	unsigned int flags;
+	aufs_bindex_t bcpup, btop, src_btop;
+	const unsigned char add_entry
+		= au_ftest_wrdir(args->flags, ADD_ENTRY)
+		| au_ftest_wrdir(args->flags, TMPFILE);
+	struct super_block *sb;
+	struct dentry *parent;
+	struct au_sbinfo *sbinfo;
+
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	parent = dget_parent(dentry);
+	btop = au_dbtop(dentry);
+	bcpup = btop;
+	if (args->force_btgt < 0) {
+		if (src_dentry) {
+			src_btop = au_dbtop(src_dentry);
+			if (src_btop < btop)
+				bcpup = src_btop;
+		} else if (add_entry) {
+			flags = 0;
+			if (au_ftest_wrdir(args->flags, ISDIR))
+				au_fset_wbr(flags, DIR);
+			err = AuWbrCreate(sbinfo, dentry, flags);
+			bcpup = err;
+		}
+
+		if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
+			if (add_entry)
+				err = AuWbrCopyup(sbinfo, dentry);
+			else {
+				if (!IS_ROOT(dentry)) {
+					di_read_lock_parent(parent, !AuLock_IR);
+					err = AuWbrCopyup(sbinfo, dentry);
+					di_read_unlock(parent, !AuLock_IR);
+				} else
+					err = AuWbrCopyup(sbinfo, dentry);
+			}
+			bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else {
+		bcpup = args->force_btgt;
+		AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
+	}
+
+	AuDbg("btop %d, bcpup %d\n", btop, bcpup);
+	err = bcpup;
+	if (bcpup == btop)
+		goto out; /* success */
+
+	/* copyup the new parent into the branch we process */
+	err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
+	if (err >= 0) {
+		if (d_really_is_negative(dentry)) {
+			au_set_h_dptr(dentry, btop, NULL);
+			au_set_dbtop(dentry, bcpup);
+			au_set_dbbot(dentry, bcpup);
+		}
+		AuDebugOn(add_entry
+			  && !au_ftest_wrdir(args->flags, TMPFILE)
+			  && !au_h_dptr(dentry, bcpup));
+	}
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_pin_hdir_unlock(struct au_pin *p)
+{
+	if (p->hdir)
+		au_hn_inode_unlock(p->hdir);
+}
+
+int au_pin_hdir_lock(struct au_pin *p)
+{
+	int err;
+
+	err = 0;
+	if (!p->hdir)
+		goto out;
+
+	/* even if an error happens later, keep this lock */
+	au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
+
+	err = -EBUSY;
+	if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
+		goto out;
+
+	err = 0;
+	if (p->h_dentry)
+		err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
+				  p->h_parent, p->br);
+
+out:
+	return err;
+}
+
+int au_pin_hdir_relock(struct au_pin *p)
+{
+	int err, i;
+	struct inode *h_i;
+	struct dentry *h_d[] = {
+		p->h_dentry,
+		p->h_parent
+	};
+
+	err = au_pin_hdir_lock(p);
+	if (unlikely(err))
+		goto out;
+
+	for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
+		if (!h_d[i])
+			continue;
+		if (d_is_positive(h_d[i])) {
+			h_i = d_inode(h_d[i]);
+			err = !h_i->i_nlink;
+		}
+	}
+
+out:
+	return err;
+}
+
+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
+{
+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
+	p->hdir->hi_inode->i_rwsem.owner = task;
+#endif
+}
+
+void au_pin_hdir_acquire_nest(struct au_pin *p)
+{
+	if (p->hdir) {
+		rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
+				   p->lsc_hi, 0, NULL, _RET_IP_);
+		au_pin_hdir_set_owner(p, current);
+	}
+}
+
+void au_pin_hdir_release(struct au_pin *p)
+{
+	if (p->hdir) {
+		au_pin_hdir_set_owner(p, p->task);
+		rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
+	}
+}
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin)
+{
+	if (pin && pin->parent)
+		return au_h_dptr(pin->parent, pin->bindex);
+	return NULL;
+}
+
+void au_unpin(struct au_pin *p)
+{
+	if (p->hdir)
+		au_pin_hdir_unlock(p);
+	if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
+		vfsub_mnt_drop_write(p->h_mnt);
+	if (!p->hdir)
+		return;
+
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_unlock(p->parent, AuLock_IR);
+	iput(p->hdir->hi_inode);
+	dput(p->parent);
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+	/* do not clear p->task */
+}
+
+int au_do_pin(struct au_pin *p)
+{
+	int err;
+	struct super_block *sb;
+	struct inode *h_dir;
+
+	err = 0;
+	sb = p->dentry->d_sb;
+	p->br = au_sbr(sb, p->bindex);
+	if (IS_ROOT(p->dentry)) {
+		if (au_ftest_pin(p->flags, MNT_WRITE)) {
+			p->h_mnt = au_br_mnt(p->br);
+			err = vfsub_mnt_want_write(p->h_mnt);
+			if (unlikely(err)) {
+				au_fclr_pin(p->flags, MNT_WRITE);
+				goto out_err;
+			}
+		}
+		goto out;
+	}
+
+	p->h_dentry = NULL;
+	if (p->bindex <= au_dbbot(p->dentry))
+		p->h_dentry = au_h_dptr(p->dentry, p->bindex);
+
+	p->parent = dget_parent(p->dentry);
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_lock(p->parent, AuLock_IR, p->lsc_di);
+
+	h_dir = NULL;
+	p->h_parent = au_h_dptr(p->parent, p->bindex);
+	p->hdir = au_hi(d_inode(p->parent), p->bindex);
+	if (p->hdir)
+		h_dir = p->hdir->hi_inode;
+
+	/*
+	 * udba case, or
+	 * if DI_LOCKED is not set, then p->parent may be different
+	 * and h_parent can be NULL.
+	 */
+	if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
+		err = -EBUSY;
+		if (!au_ftest_pin(p->flags, DI_LOCKED))
+			di_read_unlock(p->parent, AuLock_IR);
+		dput(p->parent);
+		p->parent = NULL;
+		goto out_err;
+	}
+
+	if (au_ftest_pin(p->flags, MNT_WRITE)) {
+		p->h_mnt = au_br_mnt(p->br);
+		err = vfsub_mnt_want_write(p->h_mnt);
+		if (unlikely(err)) {
+			au_fclr_pin(p->flags, MNT_WRITE);
+			if (!au_ftest_pin(p->flags, DI_LOCKED))
+				di_read_unlock(p->parent, AuLock_IR);
+			dput(p->parent);
+			p->parent = NULL;
+			goto out_err;
+		}
+	}
+
+	au_igrab(h_dir);
+	err = au_pin_hdir_lock(p);
+	if (!err)
+		goto out; /* success */
+
+	au_unpin(p);
+
+out_err:
+	pr_err("err %d\n", err);
+	err = au_busy_or_stale();
+out:
+	return err;
+}
+
+void au_pin_init(struct au_pin *p, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags)
+{
+	p->dentry = dentry;
+	p->udba = udba;
+	p->lsc_di = lsc_di;
+	p->lsc_hi = lsc_hi;
+	p->flags = flags;
+	p->bindex = bindex;
+
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+
+	p->h_dentry = NULL;
+	p->h_parent = NULL;
+	p->br = NULL;
+	p->task = current;
+}
+
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags)
+{
+	au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
+		    udba, flags);
+	return au_do_pin(pin);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * ->setattr() and ->getattr() are called in various cases.
+ * chmod, stat: dentry is revalidated.
+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
+ *		  unhashed.
+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
+ */
+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *parent;
+
+	err = 0;
+	if (au_digen_test(dentry, sigen)) {
+		parent = dget_parent(dentry);
+		di_read_lock_parent(parent, AuLock_IR);
+		err = au_refresh_dentry(dentry, parent);
+		di_read_unlock(parent, AuLock_IR);
+		dput(parent);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
+		     struct au_icpup_args *a)
+{
+	int err;
+	loff_t sz;
+	aufs_bindex_t btop, ibtop;
+	struct dentry *hi_wh, *parent;
+	struct inode *inode;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= 0
+	};
+
+	if (d_is_dir(dentry))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	/* plink or hi_wh() case */
+	btop = au_dbtop(dentry);
+	inode = d_inode(dentry);
+	ibtop = au_ibtop(inode);
+	if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
+		wr_dir_args.force_btgt = ibtop;
+	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
+	if (unlikely(err < 0))
+		goto out;
+	a->btgt = err;
+	if (err != btop)
+		au_fset_icpup(a->flags, DID_CPUP);
+
+	err = 0;
+	a->pin_flags = AuPin_MNT_WRITE;
+	parent = NULL;
+	if (!IS_ROOT(dentry)) {
+		au_fset_pin(a->pin_flags, DI_LOCKED);
+		parent = dget_parent(dentry);
+		di_write_lock_parent(parent);
+	}
+
+	err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
+	if (unlikely(err))
+		goto out_parent;
+
+	sz = -1;
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	a->h_inode = d_inode(a->h_path.dentry);
+	if (ia && (ia->ia_valid & ATTR_SIZE)) {
+		inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
+		if (ia->ia_size < i_size_read(a->h_inode))
+			sz = ia->ia_size;
+		inode_unlock_shared(a->h_inode);
+	}
+
+	hi_wh = NULL;
+	if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
+		hi_wh = au_hi_wh(inode, a->btgt);
+		if (!hi_wh) {
+			struct au_cp_generic cpg = {
+				.dentry	= dentry,
+				.bdst	= a->btgt,
+				.bsrc	= -1,
+				.len	= sz,
+				.pin	= &a->pin
+			};
+			err = au_sio_cpup_wh(&cpg, /*file*/NULL);
+			if (unlikely(err))
+				goto out_unlock;
+			hi_wh = au_hi_wh(inode, a->btgt);
+			/* todo: revalidate hi_wh? */
+		}
+	}
+
+	if (parent) {
+		au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
+		di_downgrade_lock(parent, AuLock_IR);
+		dput(parent);
+		parent = NULL;
+	}
+	if (!au_ftest_icpup(a->flags, DID_CPUP))
+		goto out; /* success */
+
+	if (!d_unhashed(dentry)) {
+		struct au_cp_generic cpg = {
+			.dentry	= dentry,
+			.bdst	= a->btgt,
+			.bsrc	= btop,
+			.len	= sz,
+			.pin	= &a->pin,
+			.flags	= AuCpup_DTIME | AuCpup_HOPEN
+		};
+		err = au_sio_cpup_simple(&cpg);
+		if (!err)
+			a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	} else if (!hi_wh)
+		a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	else
+		a->h_path.dentry = hi_wh; /* do not dget here */
+
+out_unlock:
+	a->h_inode = d_inode(a->h_path.dentry);
+	if (!err)
+		goto out; /* success */
+	au_unpin(&a->pin);
+out_parent:
+	if (parent) {
+		di_write_unlock(parent);
+		dput(parent);
+	}
+out:
+	if (!err)
+		inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
+	return err;
+}
+
+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int err;
+	struct inode *inode, *delegated;
+	struct super_block *sb;
+	struct file *file;
+	struct au_icpup_args *a;
+
+	inode = d_inode(dentry);
+	IMustLock(inode);
+
+	err = setattr_prepare(dentry, ia);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		ia->ia_valid &= ~ATTR_MODE;
+
+	file = NULL;
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_kfree;
+
+	if (ia->ia_valid & ATTR_FILE) {
+		/* currently ftruncate(2) only */
+		AuDebugOn(!d_is_reg(dentry));
+		file = ia->ia_file;
+		err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1,
+					    /*fi_lsc*/0);
+		if (unlikely(err))
+			goto out_si;
+		ia->ia_file = au_hf_top(file);
+		a->udba = AuOpt_UDBA_NONE;
+	} else {
+		/* fchmod() doesn't pass ia_file */
+		a->udba = au_opt_udba(sb);
+		di_write_lock_child(dentry);
+		/* no d_unlinked(), to set UDBA_NONE for root */
+		if (d_unhashed(dentry))
+			a->udba = AuOpt_UDBA_NONE;
+		if (a->udba != AuOpt_UDBA_NONE) {
+			AuDebugOn(IS_ROOT(dentry));
+			err = au_reval_for_attr(dentry, au_sigen(sb));
+			if (unlikely(err))
+				goto out_dentry;
+		}
+	}
+
+	err = au_pin_and_icpup(dentry, ia, a);
+	if (unlikely(err < 0))
+		goto out_dentry;
+	if (au_ftest_icpup(a->flags, DID_CPUP)) {
+		ia->ia_file = NULL;
+		ia->ia_valid &= ~ATTR_FILE;
+	}
+
+	a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
+	if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
+	    == (ATTR_MODE | ATTR_CTIME)) {
+		err = security_path_chmod(&a->h_path, ia->ia_mode);
+		if (unlikely(err))
+			goto out_unlock;
+	} else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
+		   && (ia->ia_valid & ATTR_CTIME)) {
+		err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	if (ia->ia_valid & ATTR_SIZE) {
+		struct file *f;
+
+		if (ia->ia_size < i_size_read(inode))
+			/* unmap only */
+			truncate_setsize(inode, ia->ia_size);
+
+		f = NULL;
+		if (ia->ia_valid & ATTR_FILE)
+			f = ia->ia_file;
+		inode_unlock(a->h_inode);
+		err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
+		inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
+	} else {
+		delegated = NULL;
+		while (1) {
+			err = vfsub_notify_change(&a->h_path, ia, &delegated);
+			if (delegated) {
+				err = break_deleg_wait(&delegated);
+				if (!err)
+					continue;
+			}
+			break;
+		}
+	}
+	/*
+	 * regardless aufs 'acl' option setting.
+	 * why don't all acl-aware fs call this func from their ->setattr()?
+	 */
+	if (!err && (ia->ia_valid & ATTR_MODE))
+		err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
+	if (!err)
+		au_cpup_attr_changeable(inode);
+
+out_unlock:
+	inode_unlock(a->h_inode);
+	au_unpin(&a->pin);
+	if (unlikely(err))
+		au_update_dbtop(dentry);
+out_dentry:
+	di_write_unlock(dentry);
+	if (file) {
+		fi_write_unlock(file);
+		ia->ia_file = file;
+		ia->ia_valid |= ATTR_FILE;
+	}
+out_si:
+	si_read_unlock(sb);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
+static int au_h_path_to_set_attr(struct dentry *dentry,
+				 struct au_icpup_args *a, struct path *h_path)
+{
+	int err;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	a->udba = au_opt_udba(sb);
+	/* no d_unlinked(), to set UDBA_NONE for root */
+	if (d_unhashed(dentry))
+		a->udba = AuOpt_UDBA_NONE;
+	if (a->udba != AuOpt_UDBA_NONE) {
+		AuDebugOn(IS_ROOT(dentry));
+		err = au_reval_for_attr(dentry, au_sigen(sb));
+		if (unlikely(err))
+			goto out;
+	}
+	err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
+	if (unlikely(err < 0))
+		goto out;
+
+	h_path->dentry = a->h_path.dentry;
+	h_path->mnt = au_sbr_mnt(sb, a->btgt);
+
+out:
+	return err;
+}
+
+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
+		  struct au_sxattr *arg)
+{
+	int err;
+	struct path h_path;
+	struct super_block *sb;
+	struct au_icpup_args *a;
+	struct inode *h_inode;
+
+	IMustLock(inode);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_kfree;
+
+	h_path.dentry = NULL;	/* silence gcc */
+	di_write_lock_child(dentry);
+	err = au_h_path_to_set_attr(dentry, a, &h_path);
+	if (unlikely(err))
+		goto out_di;
+
+	inode_unlock(a->h_inode);
+	switch (arg->type) {
+	case AU_XATTR_SET:
+		AuDebugOn(d_is_negative(h_path.dentry));
+		err = vfsub_setxattr(h_path.dentry,
+				     arg->u.set.name, arg->u.set.value,
+				     arg->u.set.size, arg->u.set.flags);
+		break;
+	case AU_ACL_SET:
+		err = -EOPNOTSUPP;
+		h_inode = d_inode(h_path.dentry);
+		if (h_inode->i_op->set_acl)
+			/* this will call posix_acl_update_mode */
+			err = h_inode->i_op->set_acl(h_inode,
+						     arg->u.acl_set.acl,
+						     arg->u.acl_set.type);
+		break;
+	}
+	if (!err)
+		au_cpup_attr_timesizes(inode);
+
+	au_unpin(&a->pin);
+	if (unlikely(err))
+		au_update_dbtop(dentry);
+
+out_di:
+	di_write_unlock(dentry);
+	si_read_unlock(sb);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+#endif
+
+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
+			     unsigned int nlink)
+{
+	unsigned int n;
+
+	inode->i_mode = st->mode;
+	/* don't i_[ug]id_write() here */
+	inode->i_uid = st->uid;
+	inode->i_gid = st->gid;
+	inode->i_atime = st->atime;
+	inode->i_mtime = st->mtime;
+	inode->i_ctime = st->ctime;
+
+	au_cpup_attr_nlink(inode, /*force*/0);
+	if (S_ISDIR(inode->i_mode)) {
+		n = inode->i_nlink;
+		n -= nlink;
+		n += st->nlink;
+		smp_mb(); /* for i_nlink */
+		/* 0 can happen */
+		set_nlink(inode, n);
+	}
+
+	spin_lock(&inode->i_lock);
+	inode->i_blocks = st->blocks;
+	i_size_write(inode, st->size);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * common routine for aufs_getattr() and au_getxattr().
+ * returns zero or negative (an error).
+ * @dentry will be read-locked in success.
+ */
+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
+		      int locked)
+{
+	int err;
+	unsigned int mnt_flags, sigen;
+	unsigned char udba_none;
+	aufs_bindex_t bindex;
+	struct super_block *sb, *h_sb;
+	struct inode *inode;
+
+	h_path->mnt = NULL;
+	h_path->dentry = NULL;
+
+	err = 0;
+	sb = dentry->d_sb;
+	mnt_flags = au_mntflags(sb);
+	udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
+
+	if (unlikely(locked))
+		goto body; /* skip locking dinfo */
+
+	/* support fstat(2) */
+	if (!d_unlinked(dentry) && !udba_none) {
+		sigen = au_sigen(sb);
+		err = au_digen_test(dentry, sigen);
+		if (!err) {
+			di_read_lock_child(dentry, AuLock_IR);
+			err = au_dbrange_test(dentry);
+			if (unlikely(err)) {
+				di_read_unlock(dentry, AuLock_IR);
+				goto out;
+			}
+		} else {
+			AuDebugOn(IS_ROOT(dentry));
+			di_write_lock_child(dentry);
+			err = au_dbrange_test(dentry);
+			if (!err)
+				err = au_reval_for_attr(dentry, sigen);
+			if (!err)
+				di_downgrade_lock(dentry, AuLock_IR);
+			else {
+				di_write_unlock(dentry);
+				goto out;
+			}
+		}
+	} else
+		di_read_lock_child(dentry, AuLock_IR);
+
+body:
+	inode = d_inode(dentry);
+	bindex = au_ibtop(inode);
+	h_path->mnt = au_sbr_mnt(sb, bindex);
+	h_sb = h_path->mnt->mnt_sb;
+	if (!force
+	    && !au_test_fs_bad_iattr(h_sb)
+	    && udba_none)
+		goto out; /* success */
+
+	if (au_dbtop(dentry) == bindex)
+		h_path->dentry = au_h_dptr(dentry, bindex);
+	else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
+		h_path->dentry = au_plink_lkup(inode, bindex);
+		if (IS_ERR(h_path->dentry))
+			/* pretending success */
+			h_path->dentry = NULL;
+		else
+			dput(h_path->dentry);
+	}
+
+out:
+	return err;
+}
+
+static int aufs_getattr(const struct path *path, struct kstat *st,
+			u32 request, unsigned int query)
+{
+	int err;
+	unsigned char positive;
+	struct path h_path;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct super_block *sb;
+
+	dentry = path->dentry;
+	inode = d_inode(dentry);
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out;
+	err = au_h_path_getattr(dentry, /*force*/0, &h_path, /*locked*/0);
+	if (unlikely(err))
+		goto out_si;
+	if (unlikely(!h_path.dentry))
+		/* illegally overlapped or something */
+		goto out_fill; /* pretending success */
+
+	positive = d_is_positive(h_path.dentry);
+	if (positive)
+		/* no vfsub version */
+		err = vfs_getattr(&h_path, st, request, query);
+	if (!err) {
+		if (positive)
+			au_refresh_iattr(inode, st,
+					 d_inode(h_path.dentry)->i_nlink);
+		goto out_fill; /* success */
+	}
+	AuTraceErr(err);
+	goto out_di;
+
+out_fill:
+	generic_fillattr(inode, st);
+out_di:
+	di_read_unlock(dentry, AuLock_IR);
+out_si:
+	si_read_unlock(sb);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
+				 struct delayed_call *done)
+{
+	const char *ret;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	int err;
+	aufs_bindex_t bindex;
+
+	ret = NULL; /* suppress a warning */
+	err = -ECHILD;
+	if (!dentry)
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
+	if (unlikely(err))
+		goto out;
+
+	err = au_d_hashed_positive(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = -EINVAL;
+	inode = d_inode(dentry);
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (unlikely(!h_inode->i_op->get_link))
+		goto out_unlock;
+
+	err = -EBUSY;
+	h_dentry = NULL;
+	if (au_dbtop(dentry) <= bindex) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry)
+			dget(h_dentry);
+	}
+	if (!h_dentry) {
+		h_dentry = d_find_any_alias(h_inode);
+		if (IS_ERR(h_dentry)) {
+			err = PTR_ERR(h_dentry);
+			goto out_unlock;
+		}
+	}
+	if (unlikely(!h_dentry))
+		goto out_unlock;
+
+	err = 0;
+	AuDbg("%pf\n", h_inode->i_op->get_link);
+	AuDbgDentry(h_dentry);
+	ret = vfs_get_link(h_dentry, done);
+	dput(h_dentry);
+	if (IS_ERR(ret))
+		err = PTR_ERR(ret);
+
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_IR);
+out:
+	if (unlikely(err))
+		ret = ERR_PTR(err);
+	AuTraceErrPtr(ret);
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_is_special(struct inode *inode)
+{
+	return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
+}
+
+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct inode *h_inode;
+	struct vfsmount *h_mnt;
+
+	sb = inode->i_sb;
+	WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
+		  "unexpected s_flags 0x%lx", sb->s_flags);
+
+	/* mmap_sem might be acquired already, cf. aufs_mmap() */
+	lockdep_off();
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_write_lock_child(inode);
+
+	err = 0;
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (!au_test_ro(sb, bindex, inode)) {
+		h_mnt = au_sbr_mnt(sb, bindex);
+		err = vfsub_mnt_want_write(h_mnt);
+		if (!err) {
+			err = vfsub_update_time(h_inode, ts, flags);
+			vfsub_mnt_drop_write(h_mnt);
+		}
+	} else if (au_is_special(h_inode)) {
+		/*
+		 * Never copy-up here.
+		 * These special files may already be opened and used for
+		 * communicating. If we copied it up, then the communication
+		 * would be corrupted.
+		 */
+		AuWarn1("timestamps for i%lu are ignored "
+			"since it is on readonly branch (hi%lu).\n",
+			inode->i_ino, h_inode->i_ino);
+	} else if (flags & ~S_ATIME) {
+		err = -EIO;
+		AuIOErr1("unexpected flags 0x%x\n", flags);
+		AuDebugOn(1);
+	}
+
+	if (!err)
+		au_cpup_attr_timesizes(inode);
+	ii_write_unlock(inode);
+	si_read_unlock(sb);
+	lockdep_on();
+
+	if (!err && (flags & S_VERSION))
+		inode_inc_iversion(inode);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* no getattr version will be set by module.c:aufs_init() */
+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
+	aufs_iop[] = {
+	[AuIop_SYMLINK] = {
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl, /* unsupport for symlink? */
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.get_link	= aufs_get_link,
+
+		/* .update_time	= aufs_update_time */
+	},
+	[AuIop_DIR] = {
+		.create		= aufs_create,
+		.lookup		= aufs_lookup,
+		.link		= aufs_link,
+		.unlink		= aufs_unlink,
+		.symlink	= aufs_symlink,
+		.mkdir		= aufs_mkdir,
+		.rmdir		= aufs_rmdir,
+		.mknod		= aufs_mknod,
+		.rename		= aufs_rename,
+
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl,
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.update_time	= aufs_update_time,
+		.atomic_open	= aufs_atomic_open,
+		.tmpfile	= aufs_tmpfile
+	},
+	[AuIop_OTHER] = {
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl,
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.update_time	= aufs_update_time
+	}
+};
diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
new file mode 100644
index 000000000..a9e405292
--- /dev/null
+++ b/fs/aufs/i_op_add.c
@@ -0,0 +1,920 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (add entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * final procedure of adding a new entry, except link(2).
+ * remove whiteout, instantiate, copyup the parent dir's times and size
+ * and update version.
+ * if it failed, re-create the removed whiteout.
+ */
+static int epilog(struct inode *dir, aufs_bindex_t bindex,
+		  struct dentry *wh_dentry, struct dentry *dentry)
+{
+	int err, rerr;
+	aufs_bindex_t bwh;
+	struct path h_path;
+	struct super_block *sb;
+	struct inode *inode, *h_dir;
+	struct dentry *wh;
+
+	bwh = -1;
+	sb = dir->i_sb;
+	if (wh_dentry) {
+		h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
+		IMustLock(h_dir);
+		AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
+		bwh = au_dbwh(dentry);
+		h_path.dentry = wh_dentry;
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out;
+	}
+
+	inode = au_new_inode(dentry, /*must_new*/1);
+	if (!IS_ERR(inode)) {
+		d_instantiate(dentry, inode);
+		dir = d_inode(dentry->d_parent); /* dir inode is locked */
+		IMustLock(dir);
+		au_dir_ts(dir, bindex);
+		inode_inc_iversion(dir);
+		au_fhsm_wrote(sb, bindex, /*force*/0);
+		return 0; /* success */
+	}
+
+	err = PTR_ERR(inode);
+	if (!wh_dentry)
+		goto out;
+
+	/* revert */
+	/* dir inode is locked */
+	wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
+	rerr = PTR_ERR(wh);
+	if (IS_ERR(wh)) {
+		AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
+			dentry, err, rerr);
+		err = -EIO;
+	} else
+		dput(wh);
+
+out:
+	return err;
+}
+
+static int au_d_may_add(struct dentry *dentry)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(d_unhashed(dentry)))
+		err = -ENOENT;
+	if (unlikely(d_really_is_positive(dentry)))
+		err = -EEXIST;
+	return err;
+}
+
+/*
+ * simple tests for the adding inode operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	err = -ENAMETOOLONG;
+	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		goto out;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (d_really_is_negative(dentry)) {
+		err = -EEXIST;
+		if (unlikely(d_is_positive(h_dentry)))
+			goto out;
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(d_is_negative(h_dentry)))
+			goto out;
+		h_inode = d_inode(h_dentry);
+		if (unlikely(!h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	}
+
+	err = 0;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		err = -EIO;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * initial procedure of adding a new entry.
+ * prepare writable branch and the parent dir, lock it,
+ * and lookup whiteout for the new entry.
+ */
+static struct dentry*
+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
+		  struct dentry *src_dentry, struct au_pin *pin,
+		  struct au_wr_dir_args *wr_dir_args)
+{
+	struct dentry *wh_dentry, *h_parent;
+	struct super_block *sb;
+	struct au_branch *br;
+	int err;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	AuDbg("%pd\n", dentry);
+
+	err = au_wr_dir(dentry, src_dentry, wr_dir_args);
+	bcpup = err;
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_parent = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbtop(dentry) == bcpup)
+		err = au_may_add(dentry, bcpup, h_parent,
+				 au_ftest_wrdir(wr_dir_args->flags, ISDIR));
+	else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		err = -ENAMETOOLONG;
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_unpin;
+
+	br = au_sbr(sb, bcpup);
+	if (dt) {
+		struct path tmp = {
+			.dentry	= h_parent,
+			.mnt	= au_br_mnt(br)
+		};
+		au_dtime_store(dt, au_pinned_parent(pin), &tmp);
+	}
+
+	wh_dentry = NULL;
+	if (bcpup != au_dbwh(dentry))
+		goto out; /* success */
+
+	/*
+	 * ENAMETOOLONG here means that if we allowed create such name, then it
+	 * would not be able to removed in the future. So we don't allow such
+	 * name here and we don't handle ENAMETOOLONG differently here.
+	 */
+	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+
+out_unpin:
+	if (IS_ERR(wh_dentry))
+		au_unpin(pin);
+out:
+	return wh_dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+enum { Mknod, Symlink, Creat };
+struct simple_arg {
+	int type;
+	union {
+		struct {
+			umode_t			mode;
+			bool			want_excl;
+			bool			try_aopen;
+			struct vfsub_aopen_args	*aopen;
+		} c;
+		struct {
+			const char *symname;
+		} s;
+		struct {
+			umode_t mode;
+			dev_t dev;
+		} m;
+	} u;
+};
+
+static int add_simple(struct inode *dir, struct dentry *dentry,
+		      struct simple_arg *arg)
+{
+	int err, rerr;
+	aufs_bindex_t btop;
+	unsigned char created;
+	const unsigned char try_aopen
+		= (arg->type == Creat && arg->u.c.try_aopen);
+	struct dentry *wh_dentry, *parent;
+	struct inode *h_dir;
+	struct super_block *sb;
+	struct au_branch *br;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+		struct path h_path;
+		struct au_wr_dir_args wr_dir_args;
+	} *a;
+
+	AuDbg("%pd\n", dentry);
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+	a->wr_dir_args.force_btgt = -1;
+	a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	if (!try_aopen) {
+		err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+		if (unlikely(err))
+			goto out_free;
+	}
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	if (!try_aopen)
+		di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
+				      &a->pin, &a->wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	btop = au_dbtop(dentry);
+	sb = dentry->d_sb;
+	br = au_sbr(sb, btop);
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	a->h_path.mnt = au_br_mnt(br);
+	h_dir = au_pinned_h_dir(&a->pin);
+	switch (arg->type) {
+	case Creat:
+		err = 0;
+		if (!try_aopen || !h_dir->i_op->atomic_open)
+			err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
+					   arg->u.c.want_excl);
+		else
+			err = vfsub_atomic_open(h_dir, a->h_path.dentry,
+						arg->u.c.aopen, br);
+		break;
+	case Symlink:
+		err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
+		break;
+	case Mknod:
+		err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
+				  arg->u.m.dev);
+		break;
+	default:
+		BUG();
+	}
+	created = !err;
+	if (!err)
+		err = epilog(dir, btop, wh_dentry, dentry);
+
+	/* revert */
+	if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
+		/* no delegation since it is just created */
+		rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
+				    /*force*/0);
+		if (rerr) {
+			AuIOErr("%pd revert failure(%d, %d)\n",
+				dentry, err, rerr);
+			err = -EIO;
+		}
+		au_dtime_revert(&a->dt);
+	}
+
+	if (!err && try_aopen && !h_dir->i_op->atomic_open)
+		*arg->u.c.aopen->opened |= FILE_CREATED;
+
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+
+out_parent:
+	if (!try_aopen)
+		di_write_unlock(parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	if (!try_aopen)
+		aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
+
+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+	       dev_t dev)
+{
+	struct simple_arg arg = {
+		.type = Mknod,
+		.u.m = {
+			.mode	= mode,
+			.dev	= dev
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	struct simple_arg arg = {
+		.type = Symlink,
+		.u.s.symname = symname
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl)
+{
+	struct simple_arg arg = {
+		.type = Creat,
+		.u.c = {
+			.mode		= mode,
+			.want_excl	= want_excl
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
+		       struct vfsub_aopen_args *aopen_args)
+{
+	struct simple_arg arg = {
+		.type = Creat,
+		.u.c = {
+			.mode		= aopen_args->create_mode,
+			.want_excl	= aopen_args->open_flag & O_EXCL,
+			.try_aopen	= true,
+			.aopen		= aopen_args
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent, *h_dentry;
+	struct inode *h_dir, *inode;
+	struct vfsmount *h_mnt;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_TMPFILE
+	};
+
+	/* copy-up may happen */
+	inode_lock(dir);
+
+	sb = dir->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out;
+
+	err = au_di_init(dentry);
+	if (unlikely(err))
+		goto out_si;
+
+	err = -EBUSY;
+	parent = d_find_any_alias(dir);
+	AuDebugOn(!parent);
+	di_write_lock_parent(parent);
+	if (unlikely(d_inode(parent) != dir))
+		goto out_parent;
+
+	err = au_digen_test(parent, au_sigen(sb));
+	if (unlikely(err))
+		goto out_parent;
+
+	bindex = au_dbtop(parent);
+	au_set_dbtop(dentry, bindex);
+	au_set_dbbot(dentry, bindex);
+	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
+	bindex = err;
+	if (unlikely(err < 0))
+		goto out_parent;
+
+	err = -EOPNOTSUPP;
+	h_dir = au_h_iptr(dir, bindex);
+	if (unlikely(!h_dir->i_op->tmpfile))
+		goto out_parent;
+
+	h_mnt = au_sbr_mnt(sb, bindex);
+	err = vfsub_mnt_want_write(h_mnt);
+	if (unlikely(err))
+		goto out_parent;
+
+	h_parent = au_h_dptr(parent, bindex);
+	h_dentry = vfs_tmpfile(h_parent, mode, /*open_flag*/0);
+	if (IS_ERR(h_dentry)) {
+		err = PTR_ERR(h_dentry);
+		goto out_mnt;
+	}
+
+	au_set_dbtop(dentry, bindex);
+	au_set_dbbot(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, dget(h_dentry));
+	inode = au_new_inode(dentry, /*must_new*/1);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		au_set_h_dptr(dentry, bindex, NULL);
+		au_set_dbtop(dentry, -1);
+		au_set_dbbot(dentry, -1);
+	} else {
+		if (!inode->i_nlink)
+			set_nlink(inode, 1);
+		d_tmpfile(dentry, inode);
+		au_di(dentry)->di_tmpfile = 1;
+
+		/* update without i_mutex */
+		if (au_ibtop(dir) == au_dbtop(dentry))
+			au_cpup_attr_timesizes(dir);
+	}
+	dput(h_dentry);
+
+out_mnt:
+	vfsub_mnt_drop_write(h_mnt);
+out_parent:
+	di_write_unlock(parent);
+	dput(parent);
+	di_write_unlock(dentry);
+	if (unlikely(err)) {
+		au_di_fin(dentry);
+		dentry->d_fsdata = NULL;
+	}
+out_si:
+	si_read_unlock(sb);
+out:
+	inode_unlock(dir);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_link_args {
+	aufs_bindex_t bdst, bsrc;
+	struct au_pin pin;
+	struct path h_path;
+	struct dentry *src_parent, *parent;
+};
+
+static int au_cpup_before_link(struct dentry *src_dentry,
+			       struct au_link_args *a)
+{
+	int err;
+	struct dentry *h_src_dentry;
+	struct au_cp_generic cpg = {
+		.dentry	= src_dentry,
+		.bdst	= a->bdst,
+		.bsrc	= a->bsrc,
+		.len	= -1,
+		.pin	= &a->pin,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
+	};
+
+	di_read_lock_parent(a->src_parent, AuLock_IR);
+	err = au_test_and_cpup_dirs(src_dentry, a->bdst);
+	if (unlikely(err))
+		goto out;
+
+	h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
+	err = au_pin(&a->pin, src_dentry, a->bdst,
+		     au_opt_udba(src_dentry->d_sb),
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out;
+
+	err = au_sio_cpup_simple(&cpg);
+	au_unpin(&a->pin);
+
+out:
+	di_read_unlock(a->src_parent, AuLock_IR);
+	return err;
+}
+
+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
+			   struct au_link_args *a)
+{
+	int err;
+	unsigned char plink;
+	aufs_bindex_t bbot;
+	struct dentry *h_src_dentry;
+	struct inode *h_inode, *inode, *delegated;
+	struct super_block *sb;
+	struct file *h_file;
+
+	plink = 0;
+	h_inode = NULL;
+	sb = src_dentry->d_sb;
+	inode = d_inode(src_dentry);
+	if (au_ibtop(inode) <= a->bdst)
+		h_inode = au_h_iptr(inode, a->bdst);
+	if (!h_inode || !h_inode->i_nlink) {
+		/* copyup src_dentry as the name of dentry. */
+		bbot = au_dbbot(dentry);
+		if (bbot < a->bsrc)
+			au_set_dbbot(dentry, a->bsrc);
+		au_set_h_dptr(dentry, a->bsrc,
+			      dget(au_h_dptr(src_dentry, a->bsrc)));
+		dget(a->h_path.dentry);
+		au_set_h_dptr(dentry, a->bdst, NULL);
+		AuDbg("temporary d_inode...\n");
+		spin_lock(&dentry->d_lock);
+		dentry->d_inode = d_inode(src_dentry); /* tmp */
+		spin_unlock(&dentry->d_lock);
+		h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
+		if (IS_ERR(h_file))
+			err = PTR_ERR(h_file);
+		else {
+			struct au_cp_generic cpg = {
+				.dentry	= dentry,
+				.bdst	= a->bdst,
+				.bsrc	= -1,
+				.len	= -1,
+				.pin	= &a->pin,
+				.flags	= AuCpup_KEEPLINO
+			};
+			err = au_sio_cpup_simple(&cpg);
+			au_h_open_post(dentry, a->bsrc, h_file);
+			if (!err) {
+				dput(a->h_path.dentry);
+				a->h_path.dentry = au_h_dptr(dentry, a->bdst);
+			} else
+				au_set_h_dptr(dentry, a->bdst,
+					      a->h_path.dentry);
+		}
+		spin_lock(&dentry->d_lock);
+		dentry->d_inode = NULL; /* restore */
+		spin_unlock(&dentry->d_lock);
+		AuDbg("temporary d_inode...done\n");
+		au_set_h_dptr(dentry, a->bsrc, NULL);
+		au_set_dbbot(dentry, bbot);
+	} else {
+		/* the inode of src_dentry already exists on a.bdst branch */
+		h_src_dentry = d_find_alias(h_inode);
+		if (!h_src_dentry && au_plink_test(inode)) {
+			plink = 1;
+			h_src_dentry = au_plink_lkup(inode, a->bdst);
+			err = PTR_ERR(h_src_dentry);
+			if (IS_ERR(h_src_dentry))
+				goto out;
+
+			if (unlikely(d_is_negative(h_src_dentry))) {
+				dput(h_src_dentry);
+				h_src_dentry = NULL;
+			}
+
+		}
+		if (h_src_dentry) {
+			delegated = NULL;
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path, &delegated);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+			dput(h_src_dentry);
+		} else {
+			AuIOErr("no dentry found for hi%lu on b%d\n",
+				h_inode->i_ino, a->bdst);
+			err = -EIO;
+		}
+	}
+
+	if (!err && !plink)
+		au_plink_append(inode, a->bdst, a->h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry)
+{
+	int err, rerr;
+	struct au_dtime dt;
+	struct au_link_args *a;
+	struct dentry *wh_dentry, *h_src_dentry;
+	struct inode *inode, *delegated;
+	struct super_block *sb;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	IMustLock(dir);
+	inode = d_inode(src_dentry);
+	IMustLock(inode);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->parent = dentry->d_parent; /* dir inode is locked */
+	err = aufs_read_and_write_lock2(dentry, src_dentry,
+					AuLock_NOPLM | AuLock_GEN);
+	if (unlikely(err))
+		goto out_kfree;
+	err = au_d_linkable(src_dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	a->src_parent = dget_parent(src_dentry);
+	wr_dir_args.force_btgt = au_ibtop(inode);
+
+	di_write_lock_parent(a->parent);
+	wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
+				      &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	err = 0;
+	sb = dentry->d_sb;
+	a->bdst = au_dbtop(dentry);
+	a->h_path.dentry = au_h_dptr(dentry, a->bdst);
+	a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
+	a->bsrc = au_ibtop(inode);
+	h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
+	if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
+		h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
+	if (!h_src_dentry) {
+		a->bsrc = au_dbtop(src_dentry);
+		h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
+		AuDebugOn(!h_src_dentry);
+	} else if (IS_ERR(h_src_dentry)) {
+		err = PTR_ERR(h_src_dentry);
+		goto out_parent;
+	}
+
+	/*
+	 * aufs doesn't touch the credential so
+	 * security_dentry_create_files_as() is unnecrssary.
+	 */
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
+			err = au_cpup_or_link(src_dentry, dentry, a);
+		else {
+			delegated = NULL;
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path, &delegated);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+		}
+		dput(h_src_dentry);
+	} else {
+		/*
+		 * copyup src_dentry to the branch we process,
+		 * and then link(2) to it.
+		 */
+		dput(h_src_dentry);
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
+			au_unpin(&a->pin);
+			di_write_unlock(a->parent);
+			err = au_cpup_before_link(src_dentry, a);
+			di_write_lock_parent(a->parent);
+			if (!err)
+				err = au_pin(&a->pin, dentry, a->bdst,
+					     au_opt_udba(sb),
+					     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+			if (unlikely(err))
+				goto out_wh;
+		}
+		if (!err) {
+			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
+			err = -ENOENT;
+			if (h_src_dentry && d_is_positive(h_src_dentry)) {
+				delegated = NULL;
+				err = vfsub_link(h_src_dentry,
+						 au_pinned_h_dir(&a->pin),
+						 &a->h_path, &delegated);
+				if (unlikely(err == -EWOULDBLOCK)) {
+					pr_warn("cannot retry"
+						" for NFSv4 delegation"
+						" for an internal link\n");
+					iput(delegated);
+				}
+			}
+		}
+	}
+	if (unlikely(err))
+		goto out_unpin;
+
+	if (wh_dentry) {
+		a->h_path.dentry = wh_dentry;
+		err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out_revert;
+	}
+
+	au_dir_ts(dir, a->bdst);
+	inode_inc_iversion(dir);
+	inc_nlink(inode);
+	inode->i_ctime = dir->i_ctime;
+	d_instantiate(dentry, au_igrab(inode));
+	if (d_unhashed(a->h_path.dentry))
+		/* some filesystem calls d_drop() */
+		d_drop(dentry);
+	/* some filesystems consume an inode even hardlink */
+	au_fhsm_wrote(sb, a->bdst, /*force*/0);
+	goto out_unpin; /* success */
+
+out_revert:
+	/* no delegation since it is just created */
+	rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
+			    /*delegated*/NULL, /*force*/0);
+	if (unlikely(rerr)) {
+		AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
+		err = -EIO;
+	}
+	au_dtime_revert(&dt);
+out_unpin:
+	au_unpin(&a->pin);
+out_wh:
+	dput(wh_dentry);
+out_parent:
+	di_write_unlock(a->parent);
+	dput(a->src_parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	aufs_read_and_write_unlock2(dentry, src_dentry);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err, rerr;
+	aufs_bindex_t bindex;
+	unsigned char diropq;
+	struct path h_path;
+	struct dentry *wh_dentry, *parent, *opq_dentry;
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct {
+		struct au_pin pin;
+		struct au_dtime dt;
+	} *a; /* reduce the stack usage */
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
+	};
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
+				      &a->pin, &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	sb = dentry->d_sb;
+	bindex = au_dbtop(dentry);
+	h_path.dentry = au_h_dptr(dentry, bindex);
+	h_path.mnt = au_sbr_mnt(sb, bindex);
+	err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
+	if (unlikely(err))
+		goto out_unpin;
+
+	/* make the dir opaque */
+	diropq = 0;
+	h_inode = d_inode(h_path.dentry);
+	if (wh_dentry
+	    || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		opq_dentry = au_diropq_create(dentry, bindex);
+		inode_unlock(h_inode);
+		err = PTR_ERR(opq_dentry);
+		if (IS_ERR(opq_dentry))
+			goto out_dir;
+		dput(opq_dentry);
+		diropq = 1;
+	}
+
+	err = epilog(dir, bindex, wh_dentry, dentry);
+	if (!err) {
+		inc_nlink(dir);
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	if (diropq) {
+		AuLabel(revert opq);
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		rerr = au_diropq_remove(dentry, bindex);
+		inode_unlock(h_inode);
+		if (rerr) {
+			AuIOErr("%pd reverting diropq failed(%d, %d)\n",
+				dentry, err, rerr);
+			err = -EIO;
+		}
+	}
+
+out_dir:
+	AuLabel(revert dir);
+	rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
+	if (rerr) {
+		AuIOErr("%pd reverting dir failed(%d, %d)\n",
+			dentry, err, rerr);
+		err = -EIO;
+	}
+	au_dtime_revert(&a->dt);
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+out_parent:
+	di_write_unlock(parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
new file mode 100644
index 000000000..120ceaad2
--- /dev/null
+++ b/fs/aufs/i_op_del.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (del entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * decide if a new whiteout for @dentry is necessary or not.
+ * when it is necessary, prepare the parent dir for the upper branch whose
+ * branch index is @bcpup for creation. the actual creation of the whiteout will
+ * be done by caller.
+ * return value:
+ * 0: wh is unnecessary
+ * plus: wh is necessary
+ * minus: error
+ */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
+{
+	int need_wh, err;
+	aufs_bindex_t btop;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	if (*bcpup < 0) {
+		*bcpup = btop;
+		if (au_test_ro(sb, btop, d_inode(dentry))) {
+			err = AuWbrCopyup(au_sbi(sb), dentry);
+			*bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else
+		AuDebugOn(btop < *bcpup
+			  || au_test_ro(sb, *bcpup, d_inode(dentry)));
+	AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
+
+	if (*bcpup != btop) {
+		err = au_cpup_dirs(dentry, *bcpup);
+		if (unlikely(err))
+			goto out;
+		need_wh = 1;
+	} else {
+		struct au_dinfo *dinfo, *tmp;
+
+		need_wh = -ENOMEM;
+		dinfo = au_di(dentry);
+		tmp = au_di_alloc(sb, AuLsc_DI_TMP);
+		if (tmp) {
+			au_di_cp(tmp, dinfo);
+			au_di_swap(tmp, dinfo);
+			/* returns the number of positive dentries */
+			need_wh = au_lkup_dentry(dentry, btop + 1,
+						 /* AuLkup_IGNORE_PERM */ 0);
+			au_di_swap(tmp, dinfo);
+			au_rw_write_unlock(&tmp->di_rwsem);
+			au_di_free(tmp);
+		}
+	}
+	AuDbg("need_wh %d\n", need_wh);
+	err = need_wh;
+
+out:
+	return err;
+}
+
+/*
+ * simple tests for the del-entry operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry, *h_latest;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (d_really_is_positive(dentry)) {
+		err = -ENOENT;
+		if (unlikely(d_is_negative(h_dentry)))
+			goto out;
+		h_inode = d_inode(h_dentry);
+		if (unlikely(!h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(d_is_positive(h_dentry)))
+			goto out;
+	}
+
+	err = -ENOENT;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		goto out;
+	err = 0;
+
+	/*
+	 * rmdir a dir may break the consistency on some filesystem.
+	 * let's try heavy test.
+	 */
+	err = -EACCES;
+	if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
+		     && au_test_h_perm(d_inode(h_parent),
+				       MAY_EXEC | MAY_WRITE)))
+		goto out;
+
+	h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
+	err = -EIO;
+	if (IS_ERR(h_latest))
+		goto out;
+	if (h_latest == h_dentry)
+		err = 0;
+	dput(h_latest);
+
+out:
+	return err;
+}
+
+/*
+ * decide the branch where we operate for @dentry. the branch index will be set
+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
+ * dir for reverting.
+ * when a new whiteout is necessary, create it.
+ */
+static struct dentry*
+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
+		    struct au_dtime *dt, struct au_pin *pin)
+{
+	struct dentry *wh_dentry;
+	struct super_block *sb;
+	struct path h_path;
+	int err, need_wh;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
+	wh_dentry = ERR_PTR(need_wh);
+	if (unlikely(need_wh < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	bcpup = *rbcpup;
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_path.dentry = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbtop(dentry) == bcpup) {
+		err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
+		wh_dentry = ERR_PTR(err);
+		if (unlikely(err))
+			goto out_unpin;
+	}
+
+	h_path.mnt = au_sbr_mnt(sb, bcpup);
+	au_dtime_store(dt, au_pinned_parent(pin), &h_path);
+	wh_dentry = NULL;
+	if (!need_wh)
+		goto out; /* success, no need to create whiteout */
+
+	wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_unpin;
+
+	/* returns with the parent is locked and wh_dentry is dget-ed */
+	goto out; /* success */
+
+out_unpin:
+	au_unpin(pin);
+out:
+	return wh_dentry;
+}
+
+/*
+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
+ * in order to be revertible and save time for removing many child whiteouts
+ * under the dir.
+ * returns 1 when there are too many child whiteout and caller should remove
+ * them asynchronously. returns 0 when the number of children is enough small to
+ * remove now or the branch fs is a remote fs.
+ * otherwise return an error.
+ */
+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
+			   struct au_nhash *whlist, struct inode *dir)
+{
+	int rmdir_later, err, dirwh;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+	struct inode *inode;
+
+	sb = dentry->d_sb;
+	SiMustAnyLock(sb);
+	h_dentry = au_h_dptr(dentry, bindex);
+	err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
+	if (unlikely(err))
+		goto out;
+
+	/* stop monitoring */
+	inode = d_inode(dentry);
+	au_hn_free(au_hi(inode, bindex));
+
+	if (!au_test_fs_remote(h_dentry->d_sb)) {
+		dirwh = au_sbi(sb)->si_dirwh;
+		rmdir_later = (dirwh <= 1);
+		if (!rmdir_later)
+			rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
+							      dirwh);
+		if (rmdir_later)
+			return rmdir_later;
+	}
+
+	err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
+	if (unlikely(err)) {
+		AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
+			h_dentry, bindex, err);
+		err = 0;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * final procedure for deleting a entry.
+ * maintain dentry and iattr.
+ */
+static void epilog(struct inode *dir, struct dentry *dentry,
+		   aufs_bindex_t bindex)
+{
+	struct inode *inode;
+
+	inode = d_inode(dentry);
+	d_drop(dentry);
+	inode->i_ctime = dir->i_ctime;
+
+	au_dir_ts(dir, bindex);
+	inode_inc_iversion(dir);
+}
+
+/*
+ * when an error happened, remove the created whiteout and revert everything.
+ */
+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
+		     aufs_bindex_t bwh, struct dentry *wh_dentry,
+		     struct dentry *dentry, struct au_dtime *dt)
+{
+	int rerr;
+	struct path h_path = {
+		.dentry	= wh_dentry,
+		.mnt	= au_sbr_mnt(dir->i_sb, bindex)
+	};
+
+	rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
+	if (!rerr) {
+		au_set_dbwh(dentry, bwh);
+		au_dtime_revert(dt);
+		return 0;
+	}
+
+	AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
+	return -EIO;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bwh, bindex, btop;
+	struct inode *inode, *h_dir, *delegated;
+	struct dentry *parent, *wh_dentry;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+		struct path h_path;
+	} *a;
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_d_hashed_positive(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	err = -EISDIR;
+	if (unlikely(d_is_dir(dentry)))
+		goto out_unlock; /* possible? */
+
+	btop = au_dbtop(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
+					&a->pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	dget(a->h_path.dentry);
+	if (bindex == btop) {
+		h_dir = au_pinned_h_dir(&a->pin);
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	} else {
+		/* dir inode is locked */
+		h_dir = d_inode(wh_dentry->d_parent);
+		IMustLock(h_dir);
+		err = 0;
+	}
+
+	if (!err) {
+		vfsub_drop_nlink(inode);
+		epilog(dir, dentry, bindex);
+
+		/* update target timestamps */
+		if (bindex == btop) {
+			vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
+			/*ignore*/
+			inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+		} else
+			/* todo: this timestamp may be reverted later */
+			inode->i_ctime = h_dir->i_ctime;
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
+				 &a->dt);
+		if (rerr)
+			err = rerr;
+	}
+
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+	dput(a->h_path.dentry);
+out_parent:
+	di_write_unlock(parent);
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
+
+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int err, rmdir_later;
+	aufs_bindex_t bwh, bindex, btop;
+	struct inode *inode;
+	struct dentry *parent, *wh_dentry, *h_dentry;
+	struct au_whtmp_rmdir *args;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+	} *a;
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_alive_dir(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	err = -ENOTDIR;
+	if (unlikely(!d_is_dir(dentry)))
+		goto out_unlock; /* possible? */
+
+	err = -ENOMEM;
+	args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
+	if (unlikely(!args))
+		goto out_unlock;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	err = au_test_empty(dentry, &args->whlist);
+	if (unlikely(err))
+		goto out_parent;
+
+	btop = au_dbtop(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
+					&a->pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	h_dentry = au_h_dptr(dentry, btop);
+	dget(h_dentry);
+	rmdir_later = 0;
+	if (bindex == btop) {
+		err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
+		if (err > 0) {
+			rmdir_later = err;
+			err = 0;
+		}
+	} else {
+		/* stop monitoring */
+		au_hn_free(au_hi(inode, btop));
+
+		/* dir inode is locked */
+		IMustLock(d_inode(wh_dentry->d_parent));
+		err = 0;
+	}
+
+	if (!err) {
+		vfsub_dead_dir(inode);
+		au_set_dbdiropq(dentry, -1);
+		epilog(dir, dentry, bindex);
+
+		if (rmdir_later) {
+			au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
+			args = NULL;
+		}
+
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	AuLabel(revert);
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
+				 &a->dt);
+		if (rerr)
+			err = rerr;
+	}
+
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+	dput(h_dentry);
+out_parent:
+	di_write_unlock(parent);
+	if (args)
+		au_whtmp_rmdir_free(args);
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
new file mode 100644
index 000000000..eb57654a8
--- /dev/null
+++ b/fs/aufs/i_op_ren.c
@@ -0,0 +1,1246 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operation (rename entry)
+ * todo: this is crazy monster
+ */
+
+#include "aufs.h"
+
+enum { AuSRC, AuDST, AuSrcDst };
+enum { AuPARENT, AuCHILD, AuParentChild };
+
+#define AuRen_ISDIR_SRC		1
+#define AuRen_ISDIR_DST		(1 << 1)
+#define AuRen_ISSAMEDIR		(1 << 2)
+#define AuRen_WHSRC		(1 << 3)
+#define AuRen_WHDST		(1 << 4)
+#define AuRen_MNT_WRITE		(1 << 5)
+#define AuRen_DT_DSTDIR		(1 << 6)
+#define AuRen_DIROPQ_SRC	(1 << 7)
+#define AuRen_DIROPQ_DST	(1 << 8)
+#define AuRen_DIRREN		(1 << 9)
+#define AuRen_DROPPED_SRC	(1 << 10)
+#define AuRen_DROPPED_DST	(1 << 11)
+#define au_ftest_ren(flags, name)	((flags) & AuRen_##name)
+#define au_fset_ren(flags, name) \
+	do { (flags) |= AuRen_##name; } while (0)
+#define au_fclr_ren(flags, name) \
+	do { (flags) &= ~AuRen_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuRen_DIRREN
+#define AuRen_DIRREN		0
+#endif
+
+struct au_ren_args {
+	struct {
+		struct dentry *dentry, *h_dentry, *parent, *h_parent,
+			*wh_dentry;
+		struct inode *dir, *inode;
+		struct au_hinode *hdir, *hinode;
+		struct au_dtime dt[AuParentChild];
+		aufs_bindex_t btop, bdiropq;
+	} sd[AuSrcDst];
+
+#define src_dentry	sd[AuSRC].dentry
+#define src_dir		sd[AuSRC].dir
+#define src_inode	sd[AuSRC].inode
+#define src_h_dentry	sd[AuSRC].h_dentry
+#define src_parent	sd[AuSRC].parent
+#define src_h_parent	sd[AuSRC].h_parent
+#define src_wh_dentry	sd[AuSRC].wh_dentry
+#define src_hdir	sd[AuSRC].hdir
+#define src_hinode	sd[AuSRC].hinode
+#define src_h_dir	sd[AuSRC].hdir->hi_inode
+#define src_dt		sd[AuSRC].dt
+#define src_btop	sd[AuSRC].btop
+#define src_bdiropq	sd[AuSRC].bdiropq
+
+#define dst_dentry	sd[AuDST].dentry
+#define dst_dir		sd[AuDST].dir
+#define dst_inode	sd[AuDST].inode
+#define dst_h_dentry	sd[AuDST].h_dentry
+#define dst_parent	sd[AuDST].parent
+#define dst_h_parent	sd[AuDST].h_parent
+#define dst_wh_dentry	sd[AuDST].wh_dentry
+#define dst_hdir	sd[AuDST].hdir
+#define dst_hinode	sd[AuDST].hinode
+#define dst_h_dir	sd[AuDST].hdir->hi_inode
+#define dst_dt		sd[AuDST].dt
+#define dst_btop	sd[AuDST].btop
+#define dst_bdiropq	sd[AuDST].bdiropq
+
+	struct dentry *h_trap;
+	struct au_branch *br;
+	struct path h_path;
+	struct au_nhash whlist;
+	aufs_bindex_t btgt, src_bwh;
+
+	struct {
+		unsigned short auren_flags;
+		unsigned char flags;	/* syscall parameter */
+		unsigned char exchange;
+	} __packed;
+
+	struct au_whtmp_rmdir *thargs;
+	struct dentry *h_dst;
+	struct au_hinode *h_root;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * functions for reverting.
+ * when an error happened in a single rename systemcall, we should revert
+ * everything as if nothing happened.
+ * we don't need to revert the copied-up/down the parent dir since they are
+ * harmless.
+ */
+
+#define RevertFailure(fmt, ...) do { \
+	AuIOErr("revert failure: " fmt " (%d, %d)\n", \
+		##__VA_ARGS__, err, rerr); \
+	err = -EIO; \
+} while (0)
+
+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
+{
+	int rerr;
+	struct dentry *d;
+#define src_or_dst(member) a->sd[idx].member
+
+	d = src_or_dst(dentry); /* {src,dst}_dentry */
+	au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
+	rerr = au_diropq_remove(d, a->btgt);
+	au_hn_inode_unlock(src_or_dst(hinode));
+	au_set_dbdiropq(d, src_or_dst(bdiropq));
+	if (rerr)
+		RevertFailure("remove diropq %pd", d);
+
+#undef src_or_dst_
+}
+
+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
+{
+	if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
+		au_ren_do_rev_diropq(err, a, AuSRC);
+	if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
+		au_ren_do_rev_diropq(err, a, AuDST);
+}
+
+static void au_ren_rev_rename(int err, struct au_ren_args *a)
+{
+	int rerr;
+	struct inode *delegated;
+
+	a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
+					  a->src_h_parent);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("lkup one %pd", a->src_dentry);
+		return;
+	}
+
+	delegated = NULL;
+	rerr = vfsub_rename(a->dst_h_dir,
+			    au_h_dptr(a->src_dentry, a->btgt),
+			    a->src_h_dir, &a->h_path, &delegated, a->flags);
+	if (unlikely(rerr == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	/* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
+	if (rerr)
+		RevertFailure("rename %pd", a->src_dentry);
+}
+
+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
+{
+	int rerr;
+	struct inode *delegated;
+
+	a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
+					  a->dst_h_parent);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("lkup one %pd", a->dst_dentry);
+		return;
+	}
+	if (d_is_positive(a->h_path.dentry)) {
+		d_drop(a->h_path.dentry);
+		dput(a->h_path.dentry);
+		return;
+	}
+
+	delegated = NULL;
+	rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
+			    &delegated, a->flags);
+	if (unlikely(rerr == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	if (!rerr)
+		au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
+	else
+		RevertFailure("rename %pd", a->h_dst);
+}
+
+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = a->src_wh_dentry;
+	rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
+	au_set_dbwh(a->src_dentry, a->src_bwh);
+	if (rerr)
+		RevertFailure("unlink %pd", a->src_wh_dentry);
+}
+#undef RevertFailure
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * when we have to copyup the renaming entry, do it with the rename-target name
+ * in order to minimize the cost (the later actual rename is unnecessary).
+ * otherwise rename it on the target branch.
+ */
+static int au_ren_or_cpup(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d;
+	struct inode *delegated;
+
+	d = a->src_dentry;
+	if (au_dbtop(d) == a->btgt) {
+		a->h_path.dentry = a->dst_h_dentry;
+		AuDebugOn(au_dbtop(d) != a->btgt);
+		delegated = NULL;
+		err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
+				   a->dst_h_dir, &a->h_path, &delegated,
+				   a->flags);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal rename\n");
+			iput(delegated);
+		}
+	} else
+		BUG();
+
+	if (!err && a->h_dst)
+		/* it will be set to dinfo later */
+		dget(a->h_dst);
+
+	return err;
+}
+
+/* cf. aufs_rmdir() */
+static int au_ren_del_whtmp(struct au_ren_args *a)
+{
+	int err;
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	SiMustAnyLock(dir->i_sb);
+	if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
+				     au_sbi(dir->i_sb)->si_dirwh)
+	    || au_test_fs_remote(a->h_dst->d_sb)) {
+		err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
+		if (unlikely(err))
+			pr_warn("failed removing whtmp dir %pd (%d), "
+				"ignored.\n", a->h_dst, err);
+	} else {
+		au_nhash_wh_free(&a->thargs->whlist);
+		a->thargs->whlist = a->whlist;
+		a->whlist.nh_num = 0;
+		au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
+		dput(a->h_dst);
+		a->thargs = NULL;
+	}
+
+	return 0;
+}
+
+/* make it 'opaque' dir. */
+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
+{
+	int err;
+	struct dentry *d, *diropq;
+#define src_or_dst(member) a->sd[idx].member
+
+	err = 0;
+	d = src_or_dst(dentry); /* {src,dst}_dentry */
+	src_or_dst(bdiropq) = au_dbdiropq(d);
+	src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
+	au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
+	diropq = au_diropq_create(d, a->btgt);
+	au_hn_inode_unlock(src_or_dst(hinode));
+	if (IS_ERR(diropq))
+		err = PTR_ERR(diropq);
+	else
+		dput(diropq);
+
+#undef src_or_dst_
+	return err;
+}
+
+static int au_ren_diropq(struct au_ren_args *a)
+{
+	int err;
+	unsigned char always;
+	struct dentry *d;
+
+	err = 0;
+	d = a->dst_dentry; /* already renamed on the branch */
+	always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    && !au_ftest_ren(a->auren_flags, DIRREN)
+	    && a->btgt != au_dbdiropq(a->src_dentry)
+	    && (a->dst_wh_dentry
+		|| a->btgt <= au_dbdiropq(d)
+		/* hide the lower to keep xino */
+		/* the lowers may not be a dir, but we hide them anyway */
+		|| a->btgt < au_dbbot(d)
+		|| always)) {
+		AuDbg("here\n");
+		err = au_ren_do_diropq(a, AuSRC);
+		if (unlikely(err))
+			goto out;
+		au_fset_ren(a->auren_flags, DIROPQ_SRC);
+	}
+	if (!a->exchange)
+		goto out; /* success */
+
+	d = a->src_dentry; /* already renamed on the branch */
+	if (au_ftest_ren(a->auren_flags, ISDIR_DST)
+	    && a->btgt != au_dbdiropq(a->dst_dentry)
+	    && (a->btgt < au_dbdiropq(d)
+		|| a->btgt < au_dbbot(d)
+		|| always)) {
+		AuDbgDentry(a->src_dentry);
+		AuDbgDentry(a->dst_dentry);
+		err = au_ren_do_diropq(a, AuDST);
+		if (unlikely(err))
+			goto out_rev_src;
+		au_fset_ren(a->auren_flags, DIROPQ_DST);
+	}
+	goto out; /* success */
+
+out_rev_src:
+	AuDbg("err %d, reverting src\n", err);
+	au_ren_rev_diropq(err, a);
+out:
+	return err;
+}
+
+static int do_rename(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d, *h_d;
+
+	if (!a->exchange) {
+		/* prepare workqueue args for asynchronous rmdir */
+		h_d = a->dst_h_dentry;
+		if (au_ftest_ren(a->auren_flags, ISDIR_DST)
+		    /* && !au_ftest_ren(a->auren_flags, DIRREN) */
+		    && d_is_positive(h_d)) {
+			err = -ENOMEM;
+			a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
+							 GFP_NOFS);
+			if (unlikely(!a->thargs))
+				goto out;
+			a->h_dst = dget(h_d);
+		}
+
+		/* create whiteout for src_dentry */
+		if (au_ftest_ren(a->auren_flags, WHSRC)) {
+			a->src_bwh = au_dbwh(a->src_dentry);
+			AuDebugOn(a->src_bwh >= 0);
+			a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
+							a->src_h_parent);
+			err = PTR_ERR(a->src_wh_dentry);
+			if (IS_ERR(a->src_wh_dentry))
+				goto out_thargs;
+		}
+
+		/* lookup whiteout for dentry */
+		if (au_ftest_ren(a->auren_flags, WHDST)) {
+			h_d = au_wh_lkup(a->dst_h_parent,
+					 &a->dst_dentry->d_name, a->br);
+			err = PTR_ERR(h_d);
+			if (IS_ERR(h_d))
+				goto out_whsrc;
+			if (d_is_negative(h_d))
+				dput(h_d);
+			else
+				a->dst_wh_dentry = h_d;
+		}
+
+		/* rename dentry to tmpwh */
+		if (a->thargs) {
+			err = au_whtmp_ren(a->dst_h_dentry, a->br);
+			if (unlikely(err))
+				goto out_whdst;
+
+			d = a->dst_dentry;
+			au_set_h_dptr(d, a->btgt, NULL);
+			err = au_lkup_neg(d, a->btgt, /*wh*/0);
+			if (unlikely(err))
+				goto out_whtmp;
+			a->dst_h_dentry = au_h_dptr(d, a->btgt);
+		}
+	}
+
+	BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
+#if 0
+	BUG_ON(!au_ftest_ren(a->auren_flags, DIRREN)
+	       && d_is_positive(a->dst_h_dentry)
+	       && a->src_btop != a->btgt);
+#endif
+
+	/* rename by vfs_rename or cpup */
+	err = au_ren_or_cpup(a);
+	if (unlikely(err))
+		/* leave the copied-up one */
+		goto out_whtmp;
+
+	/* make dir opaque */
+	err = au_ren_diropq(a);
+	if (unlikely(err))
+		goto out_rename;
+
+	/* update target timestamps */
+	if (a->exchange) {
+		AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
+		a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
+		vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
+		a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+	}
+	AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
+	a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
+	vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
+	a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+
+	if (!a->exchange) {
+		/* remove whiteout for dentry */
+		if (a->dst_wh_dentry) {
+			a->h_path.dentry = a->dst_wh_dentry;
+			err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
+						  a->dst_dentry);
+			if (unlikely(err))
+				goto out_diropq;
+		}
+
+		/* remove whtmp */
+		if (a->thargs)
+			au_ren_del_whtmp(a); /* ignore this error */
+
+		au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
+	}
+	err = 0;
+	goto out_success;
+
+out_diropq:
+	au_ren_rev_diropq(err, a);
+out_rename:
+	au_ren_rev_rename(err, a);
+	dput(a->h_dst);
+out_whtmp:
+	if (a->thargs)
+		au_ren_rev_whtmp(err, a);
+out_whdst:
+	dput(a->dst_wh_dentry);
+	a->dst_wh_dentry = NULL;
+out_whsrc:
+	if (a->src_wh_dentry)
+		au_ren_rev_whsrc(err, a);
+out_success:
+	dput(a->src_wh_dentry);
+	dput(a->dst_wh_dentry);
+out_thargs:
+	if (a->thargs) {
+		dput(a->h_dst);
+		au_whtmp_rmdir_free(a->thargs);
+		a->thargs = NULL;
+	}
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * test if @dentry dir can be rename destination or not.
+ * success means, it is a logically empty dir.
+ */
+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
+{
+	return au_test_empty(dentry, whlist);
+}
+
+/*
+ * test if @a->src_dentry dir can be rename source or not.
+ * if it can, return 0.
+ * success means,
+ * - it is a logically empty dir.
+ * - or, it exists on writable branch and has no children including whiteouts
+ *   on the lower branch unless DIRREN is on.
+ */
+static int may_rename_srcdir(struct au_ren_args *a)
+{
+	int err;
+	unsigned int rdhash;
+	aufs_bindex_t btop, btgt;
+	struct dentry *dentry;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	dentry = a->src_dentry;
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	if (au_opt_test(sbinfo->si_mntflags, DIRREN))
+		au_fset_ren(a->auren_flags, DIRREN);
+
+	btgt = a->btgt;
+	btop = au_dbtop(dentry);
+	if (btop != btgt) {
+		struct au_nhash whlist;
+
+		SiMustAnyLock(sb);
+		rdhash = sbinfo->si_rdhash;
+		if (!rdhash)
+			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
+							   dentry));
+		err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
+		if (unlikely(err))
+			goto out;
+		err = au_test_empty(dentry, &whlist);
+		au_nhash_wh_free(&whlist);
+		goto out;
+	}
+
+	if (btop == au_dbtaildir(dentry))
+		return 0; /* success */
+
+	err = au_test_empty_lower(dentry);
+
+out:
+	if (err == -ENOTEMPTY) {
+		if (au_ftest_ren(a->auren_flags, DIRREN)) {
+			err = 0;
+		} else {
+			AuWarn1("renaming dir who has child(ren) on multiple "
+				"branches, is not supported\n");
+			err = -EXDEV;
+		}
+	}
+	return err;
+}
+
+/* side effect: sets whlist and h_dentry */
+static int au_ren_may_dir(struct au_ren_args *a)
+{
+	int err;
+	unsigned int rdhash;
+	struct dentry *d;
+
+	d = a->dst_dentry;
+	SiMustAnyLock(d->d_sb);
+
+	err = 0;
+	if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
+		rdhash = au_sbi(d->d_sb)->si_rdhash;
+		if (!rdhash)
+			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
+		err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
+		if (unlikely(err))
+			goto out;
+
+		if (!a->exchange) {
+			au_set_dbtop(d, a->dst_btop);
+			err = may_rename_dstdir(d, &a->whlist);
+			au_set_dbtop(d, a->btgt);
+		} else
+			err = may_rename_srcdir(a);
+	}
+	a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
+	if (unlikely(err))
+		goto out;
+
+	d = a->src_dentry;
+	a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
+		err = may_rename_srcdir(a);
+		if (unlikely(err)) {
+			au_nhash_wh_free(&a->whlist);
+			a->whlist.nh_num = 0;
+		}
+	}
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * simple tests for rename.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+static int au_may_ren(struct au_ren_args *a)
+{
+	int err, isdir;
+	struct inode *h_inode;
+
+	if (a->src_btop == a->btgt) {
+		err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
+				 au_ftest_ren(a->auren_flags, ISDIR_SRC));
+		if (unlikely(err))
+			goto out;
+		err = -EINVAL;
+		if (unlikely(a->src_h_dentry == a->h_trap))
+			goto out;
+	}
+
+	err = 0;
+	if (a->dst_btop != a->btgt)
+		goto out;
+
+	err = -ENOTEMPTY;
+	if (unlikely(a->dst_h_dentry == a->h_trap))
+		goto out;
+
+	err = -EIO;
+	isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
+	if (d_really_is_negative(a->dst_dentry)) {
+		if (d_is_negative(a->dst_h_dentry))
+			err = au_may_add(a->dst_dentry, a->btgt,
+					 a->dst_h_parent, isdir);
+	} else {
+		if (unlikely(d_is_negative(a->dst_h_dentry)))
+			goto out;
+		h_inode = d_inode(a->dst_h_dentry);
+		if (h_inode->i_nlink)
+			err = au_may_del(a->dst_dentry, a->btgt,
+					 a->dst_h_parent, isdir);
+	}
+
+out:
+	if (unlikely(err == -ENOENT || err == -EEXIST))
+		err = -EIO;
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * locking order
+ * (VFS)
+ * - src_dir and dir by lock_rename()
+ * - inode if exitsts
+ * (aufs)
+ * - lock all
+ *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
+ *     + si_read_lock
+ *     + di_write_lock2_child()
+ *       + di_write_lock_child()
+ *	   + ii_write_lock_child()
+ *       + di_write_lock_child2()
+ *	   + ii_write_lock_child2()
+ *     + src_parent and parent
+ *       + di_write_lock_parent()
+ *	   + ii_write_lock_parent()
+ *       + di_write_lock_parent2()
+ *	   + ii_write_lock_parent2()
+ *   + lower src_dir and dir by vfsub_lock_rename()
+ *   + verify the every relationships between child and parent. if any
+ *     of them failed, unlock all and return -EBUSY.
+ */
+static void au_ren_unlock(struct au_ren_args *a)
+{
+	vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
+			    a->dst_h_parent, a->dst_hdir);
+	if (au_ftest_ren(a->auren_flags, DIRREN)
+	    && a->h_root)
+		au_hn_inode_unlock(a->h_root);
+	if (au_ftest_ren(a->auren_flags, MNT_WRITE))
+		vfsub_mnt_drop_write(au_br_mnt(a->br));
+}
+
+static int au_ren_lock(struct au_ren_args *a)
+{
+	int err;
+	unsigned int udba;
+
+	err = 0;
+	a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
+	a->src_hdir = au_hi(a->src_dir, a->btgt);
+	a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
+	a->dst_hdir = au_hi(a->dst_dir, a->btgt);
+
+	err = vfsub_mnt_want_write(au_br_mnt(a->br));
+	if (unlikely(err))
+		goto out;
+	au_fset_ren(a->auren_flags, MNT_WRITE);
+	if (au_ftest_ren(a->auren_flags, DIRREN)) {
+		struct dentry *root;
+		struct inode *dir;
+
+		/*
+		 * sbinfo is already locked, so this ii_read_lock is
+		 * unnecessary. but our debugging feature checks it.
+		 */
+		root = a->src_inode->i_sb->s_root;
+		if (root != a->src_parent && root != a->dst_parent) {
+			dir = d_inode(root);
+			ii_read_lock_parent3(dir);
+			a->h_root = au_hi(dir, a->btgt);
+			ii_read_unlock(dir);
+			au_hn_inode_lock_nested(a->h_root, AuLsc_I_PARENT3);
+		}
+	}
+	a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
+				      a->dst_h_parent, a->dst_hdir);
+	udba = au_opt_udba(a->src_dentry->d_sb);
+	if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
+		     || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
+		err = au_busy_or_stale();
+	if (!err && au_dbtop(a->src_dentry) == a->btgt)
+		err = au_h_verify(a->src_h_dentry, udba,
+				  d_inode(a->src_h_parent), a->src_h_parent,
+				  a->br);
+	if (!err && au_dbtop(a->dst_dentry) == a->btgt)
+		err = au_h_verify(a->dst_h_dentry, udba,
+				  d_inode(a->dst_h_parent), a->dst_h_parent,
+				  a->br);
+	if (!err)
+		goto out; /* success */
+
+	err = au_busy_or_stale();
+	au_ren_unlock(a);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_ren_refresh_dir(struct au_ren_args *a)
+{
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	inode_inc_iversion(dir);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
+		/* is this updating defined in POSIX? */
+		au_cpup_attr_timesizes(a->src_inode);
+		au_cpup_attr_nlink(dir, /*force*/1);
+	}
+	au_dir_ts(dir, a->btgt);
+
+	if (a->exchange) {
+		dir = a->src_dir;
+		inode_inc_iversion(dir);
+		if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
+			/* is this updating defined in POSIX? */
+			au_cpup_attr_timesizes(a->dst_inode);
+			au_cpup_attr_nlink(dir, /*force*/1);
+		}
+		au_dir_ts(dir, a->btgt);
+	}
+
+	if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		return;
+
+	dir = a->src_dir;
+	inode_inc_iversion(dir);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
+		au_cpup_attr_nlink(dir, /*force*/1);
+	au_dir_ts(dir, a->btgt);
+}
+
+static void au_ren_refresh(struct au_ren_args *a)
+{
+	aufs_bindex_t bbot, bindex;
+	struct dentry *d, *h_d;
+	struct inode *i, *h_i;
+	struct super_block *sb;
+
+	d = a->dst_dentry;
+	d_drop(d);
+	if (a->h_dst)
+		/* already dget-ed by au_ren_or_cpup() */
+		au_set_h_dptr(d, a->btgt, a->h_dst);
+
+	i = a->dst_inode;
+	if (i) {
+		if (!a->exchange) {
+			if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
+				vfsub_drop_nlink(i);
+			else {
+				vfsub_dead_dir(i);
+				au_cpup_attr_timesizes(i);
+			}
+			au_update_dbrange(d, /*do_put_zero*/1);
+		} else
+			au_cpup_attr_nlink(i, /*force*/1);
+	} else {
+		bbot = a->btgt;
+		for (bindex = au_dbtop(d); bindex < bbot; bindex++)
+			au_set_h_dptr(d, bindex, NULL);
+		bbot = au_dbbot(d);
+		for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
+			au_set_h_dptr(d, bindex, NULL);
+		au_update_dbrange(d, /*do_put_zero*/0);
+	}
+
+	if (a->exchange
+	    || au_ftest_ren(a->auren_flags, DIRREN)) {
+		d_drop(a->src_dentry);
+		if (au_ftest_ren(a->auren_flags, DIRREN))
+			au_set_dbwh(a->src_dentry, -1);
+		return;
+	}
+
+	d = a->src_dentry;
+	au_set_dbwh(d, -1);
+	bbot = au_dbbot(d);
+	for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
+		h_d = au_h_dptr(d, bindex);
+		if (h_d)
+			au_set_h_dptr(d, bindex, NULL);
+	}
+	au_set_dbbot(d, a->btgt);
+
+	sb = d->d_sb;
+	i = a->src_inode;
+	if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
+		return; /* success */
+
+	bbot = au_ibbot(i);
+	for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
+		h_i = au_h_iptr(i, bindex);
+		if (h_i) {
+			au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
+			/* ignore this error */
+			au_set_h_iptr(i, bindex, NULL, 0);
+		}
+	}
+	au_set_ibbot(i, a->btgt);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* mainly for link(2) and rename(2) */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
+{
+	aufs_bindex_t bdiropq, bwh;
+	struct dentry *parent;
+	struct au_branch *br;
+
+	parent = dentry->d_parent;
+	IMustLock(d_inode(parent)); /* dir is locked */
+
+	bdiropq = au_dbdiropq(parent);
+	bwh = au_dbwh(dentry);
+	br = au_sbr(dentry->d_sb, btgt);
+	if (au_br_rdonly(br)
+	    || (0 <= bdiropq && bdiropq < btgt)
+	    || (0 <= bwh && bwh < btgt))
+		btgt = -1;
+
+	AuDbg("btgt %d\n", btgt);
+	return btgt;
+}
+
+/* sets src_btop, dst_btop and btgt */
+static int au_ren_wbr(struct au_ren_args *a)
+{
+	int err;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	a->src_btop = au_dbtop(a->src_dentry);
+	a->dst_btop = au_dbtop(a->dst_dentry);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    || au_ftest_ren(a->auren_flags, ISDIR_DST))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	wr_dir_args.force_btgt = a->src_btop;
+	if (a->dst_inode && a->dst_btop < a->src_btop)
+		wr_dir_args.force_btgt = a->dst_btop;
+	wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
+	err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
+	a->btgt = err;
+	if (a->exchange)
+		au_update_dbtop(a->dst_dentry);
+
+	return err;
+}
+
+static void au_ren_dt(struct au_ren_args *a)
+{
+	a->h_path.dentry = a->src_h_parent;
+	au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
+	if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
+		a->h_path.dentry = a->dst_h_parent;
+		au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
+	}
+
+	au_fclr_ren(a->auren_flags, DT_DSTDIR);
+	if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    && !a->exchange)
+		return;
+
+	a->h_path.dentry = a->src_h_dentry;
+	au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
+	if (d_is_positive(a->dst_h_dentry)) {
+		au_fset_ren(a->auren_flags, DT_DSTDIR);
+		a->h_path.dentry = a->dst_h_dentry;
+		au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
+	}
+}
+
+static void au_ren_rev_dt(int err, struct au_ren_args *a)
+{
+	struct dentry *h_d;
+	struct inode *h_inode;
+
+	au_dtime_revert(a->src_dt + AuPARENT);
+	if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		au_dtime_revert(a->dst_dt + AuPARENT);
+
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
+		h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
+		h_inode = d_inode(h_d);
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		au_dtime_revert(a->src_dt + AuCHILD);
+		inode_unlock(h_inode);
+
+		if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
+			h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
+			h_inode = d_inode(h_d);
+			inode_lock_nested(h_inode, AuLsc_I_CHILD);
+			au_dtime_revert(a->dst_dt + AuCHILD);
+			inode_unlock(h_inode);
+		}
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
+		struct inode *_dst_dir, struct dentry *_dst_dentry,
+		unsigned int _flags)
+{
+	int err, lock_flags;
+	void *rev;
+	/* reduce stack space */
+	struct au_ren_args *a;
+	struct au_pin pin;
+
+	AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
+	IMustLock(_src_dir);
+	IMustLock(_dst_dir);
+
+	err = -EINVAL;
+	if (unlikely(_flags & RENAME_WHITEOUT))
+		goto out;
+
+	err = -ENOMEM;
+	BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->flags = _flags;
+	a->exchange = _flags & RENAME_EXCHANGE;
+	a->src_dir = _src_dir;
+	a->src_dentry = _src_dentry;
+	a->src_inode = NULL;
+	if (d_really_is_positive(a->src_dentry))
+		a->src_inode = d_inode(a->src_dentry);
+	a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
+	a->dst_dir = _dst_dir;
+	a->dst_dentry = _dst_dentry;
+	a->dst_inode = NULL;
+	if (d_really_is_positive(a->dst_dentry))
+		a->dst_inode = d_inode(a->dst_dentry);
+	a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
+	if (a->dst_inode) {
+		/*
+		 * if EXCHANGE && src is non-dir && dst is dir,
+		 * dst is not locked.
+		 */
+		/* IMustLock(a->dst_inode); */
+		au_igrab(a->dst_inode);
+	}
+
+	err = -ENOTDIR;
+	lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
+	if (d_is_dir(a->src_dentry)) {
+		au_fset_ren(a->auren_flags, ISDIR_SRC);
+		if (unlikely(!a->exchange
+			     && d_really_is_positive(a->dst_dentry)
+			     && !d_is_dir(a->dst_dentry)))
+			goto out_free;
+		lock_flags |= AuLock_DIRS;
+	}
+	if (a->dst_inode && d_is_dir(a->dst_dentry)) {
+		au_fset_ren(a->auren_flags, ISDIR_DST);
+		if (unlikely(!a->exchange
+			     && d_really_is_positive(a->src_dentry)
+			     && !d_is_dir(a->src_dentry)))
+			goto out_free;
+		lock_flags |= AuLock_DIRS;
+	}
+	err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
+					lock_flags);
+	if (unlikely(err))
+		goto out_free;
+
+	err = au_d_hashed_positive(a->src_dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	err = -ENOENT;
+	if (a->dst_inode) {
+		/*
+		 * If it is a dir, VFS unhash it before this
+		 * function. It means we cannot rely upon d_unhashed().
+		 */
+		if (unlikely(!a->dst_inode->i_nlink))
+			goto out_unlock;
+		if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
+			err = au_d_hashed_positive(a->dst_dentry);
+			if (unlikely(err && !a->exchange))
+				goto out_unlock;
+		} else if (unlikely(IS_DEADDIR(a->dst_inode)))
+			goto out_unlock;
+	} else if (unlikely(d_unhashed(a->dst_dentry)))
+		goto out_unlock;
+
+	/*
+	 * is it possible?
+	 * yes, it happened (in linux-3.3-rcN) but I don't know why.
+	 * there may exist a problem somewhere else.
+	 */
+	err = -EINVAL;
+	if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
+		goto out_unlock;
+
+	au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
+	di_write_lock_parent(a->dst_parent);
+
+	/* which branch we process */
+	err = au_ren_wbr(a);
+	if (unlikely(err < 0))
+		goto out_parent;
+	a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
+	a->h_path.mnt = au_br_mnt(a->br);
+
+	/* are they available to be renamed */
+	err = au_ren_may_dir(a);
+	if (unlikely(err))
+		goto out_children;
+
+	/* prepare the writable parent dir on the same branch */
+	if (a->dst_btop == a->btgt) {
+		au_fset_ren(a->auren_flags, WHDST);
+	} else {
+		err = au_cpup_dirs(a->dst_dentry, a->btgt);
+		if (unlikely(err))
+			goto out_children;
+	}
+
+	err = 0;
+	if (!a->exchange) {
+		if (a->src_dir != a->dst_dir) {
+			/*
+			 * this temporary unlock is safe,
+			 * because both dir->i_mutex are locked.
+			 */
+			di_write_unlock(a->dst_parent);
+			di_write_lock_parent(a->src_parent);
+			err = au_wr_dir_need_wh(a->src_dentry,
+						au_ftest_ren(a->auren_flags,
+							     ISDIR_SRC),
+						&a->btgt);
+			di_write_unlock(a->src_parent);
+			di_write_lock2_parent(a->src_parent, a->dst_parent,
+					      /*isdir*/1);
+			au_fclr_ren(a->auren_flags, ISSAMEDIR);
+		} else
+			err = au_wr_dir_need_wh(a->src_dentry,
+						au_ftest_ren(a->auren_flags,
+							     ISDIR_SRC),
+						&a->btgt);
+	}
+	if (unlikely(err < 0))
+		goto out_children;
+	if (err)
+		au_fset_ren(a->auren_flags, WHSRC);
+
+	/* cpup src */
+	if (a->src_btop != a->btgt) {
+		err = au_pin(&pin, a->src_dentry, a->btgt,
+			     au_opt_udba(a->src_dentry->d_sb),
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			struct au_cp_generic cpg = {
+				.dentry	= a->src_dentry,
+				.bdst	= a->btgt,
+				.bsrc	= a->src_btop,
+				.len	= -1,
+				.pin	= &pin,
+				.flags	= AuCpup_DTIME | AuCpup_HOPEN
+			};
+			AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+		if (unlikely(err))
+			goto out_children;
+		a->src_btop = a->btgt;
+		a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
+		if (!a->exchange)
+			au_fset_ren(a->auren_flags, WHSRC);
+	}
+
+	/* cpup dst */
+	if (a->exchange && a->dst_inode
+	    && a->dst_btop != a->btgt) {
+		err = au_pin(&pin, a->dst_dentry, a->btgt,
+			     au_opt_udba(a->dst_dentry->d_sb),
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			struct au_cp_generic cpg = {
+				.dentry	= a->dst_dentry,
+				.bdst	= a->btgt,
+				.bsrc	= a->dst_btop,
+				.len	= -1,
+				.pin	= &pin,
+				.flags	= AuCpup_DTIME | AuCpup_HOPEN
+			};
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+		if (unlikely(err))
+			goto out_children;
+		a->dst_btop = a->btgt;
+		a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
+	}
+
+	/* lock them all */
+	err = au_ren_lock(a);
+	if (unlikely(err))
+		/* leave the copied-up one */
+		goto out_children;
+
+	if (!a->exchange) {
+		if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
+			err = au_may_ren(a);
+		else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
+			err = -ENAMETOOLONG;
+		if (unlikely(err))
+			goto out_hdir;
+	}
+
+	/* store timestamps to be revertible */
+	au_ren_dt(a);
+
+	/* store dirren info */
+	if (au_ftest_ren(a->auren_flags, DIRREN)) {
+		err = au_dr_rename(a->src_dentry, a->btgt,
+				   &a->dst_dentry->d_name, &rev);
+		AuTraceErr(err);
+		if (unlikely(err))
+			goto out_dt;
+	}
+
+	/* here we go */
+	err = do_rename(a);
+	if (unlikely(err))
+		goto out_dirren;
+
+	if (au_ftest_ren(a->auren_flags, DIRREN))
+		au_dr_rename_fin(a->src_dentry, a->btgt, rev);
+
+	/* update dir attributes */
+	au_ren_refresh_dir(a);
+
+	/* dput/iput all lower dentries */
+	au_ren_refresh(a);
+
+	goto out_hdir; /* success */
+
+out_dirren:
+	if (au_ftest_ren(a->auren_flags, DIRREN))
+		au_dr_rename_rev(a->src_dentry, a->btgt, rev);
+out_dt:
+	au_ren_rev_dt(err, a);
+out_hdir:
+	au_ren_unlock(a);
+out_children:
+	au_nhash_wh_free(&a->whlist);
+	if (err && a->dst_inode && a->dst_btop != a->btgt) {
+		AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
+		au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
+		au_set_dbtop(a->dst_dentry, a->dst_btop);
+	}
+out_parent:
+	if (!err) {
+		if (d_unhashed(a->src_dentry))
+			au_fset_ren(a->auren_flags, DROPPED_SRC);
+		if (d_unhashed(a->dst_dentry))
+			au_fset_ren(a->auren_flags, DROPPED_DST);
+		if (!a->exchange)
+			d_move(a->src_dentry, a->dst_dentry);
+		else {
+			d_exchange(a->src_dentry, a->dst_dentry);
+			if (au_ftest_ren(a->auren_flags, DROPPED_DST))
+				d_drop(a->dst_dentry);
+		}
+		if (au_ftest_ren(a->auren_flags, DROPPED_SRC))
+			d_drop(a->src_dentry);
+	} else {
+		au_update_dbtop(a->dst_dentry);
+		if (!a->dst_inode)
+			d_drop(a->dst_dentry);
+	}
+	if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		di_write_unlock(a->dst_parent);
+	else
+		di_write_unlock2(a->src_parent, a->dst_parent);
+out_unlock:
+	aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
+out_free:
+	iput(a->dst_inode);
+	if (a->thargs)
+		au_whtmp_rmdir_free(a->thargs);
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
new file mode 100644
index 000000000..d5cba3b98
--- /dev/null
+++ b/fs/aufs/iinfo.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode private data
+ */
+
+#include "aufs.h"
+
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct inode *h_inode;
+	struct au_hinode *hinode;
+
+	IiMustAnyLock(inode);
+
+	hinode = au_hinode(au_ii(inode), bindex);
+	h_inode = hinode->hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+	return h_inode;
+}
+
+/* todo: hard/soft set? */
+void au_hiput(struct au_hinode *hinode)
+{
+	au_hn_free(hinode);
+	dput(hinode->hi_whdentry);
+	iput(hinode->hi_inode);
+}
+
+unsigned int au_hi_flags(struct inode *inode, int isdir)
+{
+	unsigned int flags;
+	const unsigned int mnt_flags = au_mntflags(inode->i_sb);
+
+	flags = 0;
+	if (au_opt_test(mnt_flags, XINO))
+		au_fset_hi(flags, XINO);
+	if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
+		au_fset_hi(flags, HNOTIFY);
+	return flags;
+}
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags)
+{
+	struct au_hinode *hinode;
+	struct inode *hi;
+	struct au_iinfo *iinfo = au_ii(inode);
+
+	IiMustWriteLock(inode);
+
+	hinode = au_hinode(iinfo, bindex);
+	hi = hinode->hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+
+	if (hi)
+		au_hiput(hinode);
+	hinode->hi_inode = h_inode;
+	if (h_inode) {
+		int err;
+		struct super_block *sb = inode->i_sb;
+		struct au_branch *br;
+
+		AuDebugOn(inode->i_mode
+			  && (h_inode->i_mode & S_IFMT)
+			  != (inode->i_mode & S_IFMT));
+		if (bindex == iinfo->ii_btop)
+			au_cpup_igen(inode, h_inode);
+		br = au_sbr(sb, bindex);
+		hinode->hi_id = br->br_id;
+		if (au_ftest_hi(flags, XINO)) {
+			err = au_xino_write(sb, bindex, h_inode->i_ino,
+					    inode->i_ino);
+			if (unlikely(err))
+				AuIOErr1("failed au_xino_write() %d\n", err);
+		}
+
+		if (au_ftest_hi(flags, HNOTIFY)
+		    && au_br_hnotifyable(br->br_perm)) {
+			err = au_hn_alloc(hinode, inode);
+			if (unlikely(err))
+				AuIOErr1("au_hn_alloc() %d\n", err);
+		}
+	}
+}
+
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh)
+{
+	struct au_hinode *hinode;
+
+	IiMustWriteLock(inode);
+
+	hinode = au_hinode(au_ii(inode), bindex);
+	AuDebugOn(hinode->hi_whdentry);
+	hinode->hi_whdentry = h_wh;
+}
+
+void au_update_iigen(struct inode *inode, int half)
+{
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+	unsigned int sigen;
+
+	sigen = au_sigen(inode->i_sb);
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	iigen->ig_generation = sigen;
+	if (half)
+		au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
+	else
+		au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
+	spin_unlock(&iigen->ig_spin);
+}
+
+/* it may be called at remount time, too */
+void au_update_ibrange(struct inode *inode, int do_put_zero)
+{
+	struct au_iinfo *iinfo;
+	aufs_bindex_t bindex, bbot;
+
+	AuDebugOn(au_is_bad_inode(inode));
+	IiMustWriteLock(inode);
+
+	iinfo = au_ii(inode);
+	if (do_put_zero && iinfo->ii_btop >= 0) {
+		for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
+		     bindex++) {
+			struct inode *h_i;
+
+			h_i = au_hinode(iinfo, bindex)->hi_inode;
+			if (h_i
+			    && !h_i->i_nlink
+			    && !(h_i->i_state & I_LINKABLE))
+				au_set_h_iptr(inode, bindex, NULL, 0);
+		}
+	}
+
+	iinfo->ii_btop = -1;
+	iinfo->ii_bbot = -1;
+	bbot = au_sbbot(inode->i_sb);
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (au_hinode(iinfo, bindex)->hi_inode) {
+			iinfo->ii_btop = bindex;
+			break;
+		}
+	if (iinfo->ii_btop >= 0)
+		for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
+			if (au_hinode(iinfo, bindex)->hi_inode) {
+				iinfo->ii_bbot = bindex;
+				break;
+			}
+	AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_icntnr_init_once(void *_c)
+{
+	struct au_icntnr *c = _c;
+	struct au_iinfo *iinfo = &c->iinfo;
+
+	spin_lock_init(&iinfo->ii_generation.ig_spin);
+	au_rw_init(&iinfo->ii_rwsem);
+	inode_init_once(&c->vfs_inode);
+}
+
+void au_hinode_init(struct au_hinode *hinode)
+{
+	hinode->hi_inode = NULL;
+	hinode->hi_id = -1;
+	au_hn_init(hinode);
+	hinode->hi_whdentry = NULL;
+}
+
+int au_iinfo_init(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct super_block *sb;
+	struct au_hinode *hi;
+	int nbr, i;
+
+	sb = inode->i_sb;
+	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+	nbr = au_sbbot(sb) + 1;
+	if (unlikely(nbr <= 0))
+		nbr = 1;
+	hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
+	if (hi) {
+		au_ninodes_inc(sb);
+
+		iinfo->ii_hinode = hi;
+		for (i = 0; i < nbr; i++, hi++)
+			au_hinode_init(hi);
+
+		iinfo->ii_generation.ig_generation = au_sigen(sb);
+		iinfo->ii_btop = -1;
+		iinfo->ii_bbot = -1;
+		iinfo->ii_vdir = NULL;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
+{
+	int err, i;
+	struct au_hinode *hip;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	err = -ENOMEM;
+	hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
+			  may_shrink);
+	if (hip) {
+		iinfo->ii_hinode = hip;
+		i = iinfo->ii_bbot + 1;
+		hip += i;
+		for (; i < nbr; i++, hip++)
+			au_hinode_init(hip);
+		err = 0;
+	}
+
+	return err;
+}
+
+void au_iinfo_fin(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+	const unsigned char unlinked = !inode->i_nlink;
+
+	AuDebugOn(au_is_bad_inode(inode));
+
+	sb = inode->i_sb;
+	au_ninodes_dec(sb);
+	if (si_pid_test(sb))
+		au_xino_delete_inode(inode, unlinked);
+	else {
+		/*
+		 * it is safe to hide the dependency between sbinfo and
+		 * sb->s_umount.
+		 */
+		lockdep_off();
+		si_noflush_read_lock(sb);
+		au_xino_delete_inode(inode, unlinked);
+		si_read_unlock(sb);
+		lockdep_on();
+	}
+
+	iinfo = au_ii(inode);
+	if (iinfo->ii_vdir)
+		au_vdir_free(iinfo->ii_vdir);
+
+	bindex = iinfo->ii_btop;
+	if (bindex >= 0) {
+		hi = au_hinode(iinfo, bindex);
+		bbot = iinfo->ii_bbot;
+		while (bindex++ <= bbot) {
+			if (hi->hi_inode)
+				au_hiput(hi);
+			hi++;
+		}
+	}
+	kfree(iinfo->ii_hinode);
+	AuRwDestroy(&iinfo->ii_rwsem);
+}
diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c
new file mode 100644
index 000000000..6dee92dd8
--- /dev/null
+++ b/fs/aufs/inode.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode functions
+ */
+
+#include "aufs.h"
+
+struct inode *au_igrab(struct inode *inode)
+{
+	if (inode) {
+		AuDebugOn(!atomic_read(&inode->i_count));
+		ihold(inode);
+	}
+	return inode;
+}
+
+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
+{
+	au_cpup_attr_all(inode, /*force*/0);
+	au_update_iigen(inode, /*half*/1);
+	if (do_version)
+		inode_inc_iversion(inode);
+}
+
+static int au_ii_refresh(struct inode *inode, int *update)
+{
+	int err, e, nbr;
+	umode_t type;
+	aufs_bindex_t bindex, new_bindex;
+	struct super_block *sb;
+	struct au_iinfo *iinfo;
+	struct au_hinode *p, *q, tmp;
+
+	AuDebugOn(au_is_bad_inode(inode));
+	IiMustWriteLock(inode);
+
+	*update = 0;
+	sb = inode->i_sb;
+	nbr = au_sbbot(sb) + 1;
+	type = inode->i_mode & S_IFMT;
+	iinfo = au_ii(inode);
+	err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
+	if (unlikely(err))
+		goto out;
+
+	AuDebugOn(iinfo->ii_btop < 0);
+	p = au_hinode(iinfo, iinfo->ii_btop);
+	for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
+	     bindex++, p++) {
+		if (!p->hi_inode)
+			continue;
+
+		AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
+		new_bindex = au_br_index(sb, p->hi_id);
+		if (new_bindex == bindex)
+			continue;
+
+		if (new_bindex < 0) {
+			*update = 1;
+			au_hiput(p);
+			p->hi_inode = NULL;
+			continue;
+		}
+
+		if (new_bindex < iinfo->ii_btop)
+			iinfo->ii_btop = new_bindex;
+		if (iinfo->ii_bbot < new_bindex)
+			iinfo->ii_bbot = new_bindex;
+		/* swap two lower inode, and loop again */
+		q = au_hinode(iinfo, new_bindex);
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hi_inode) {
+			bindex--;
+			p--;
+		}
+	}
+	au_update_ibrange(inode, /*do_put_zero*/0);
+	au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
+	e = au_dy_irefresh(inode);
+	if (unlikely(e && !err))
+		err = e;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_refresh_iop(struct inode *inode, int force_getattr)
+{
+	int type;
+	struct au_sbinfo *sbi = au_sbi(inode->i_sb);
+	const struct inode_operations *iop
+		= force_getattr ? aufs_iop : sbi->si_iop_array;
+
+	if (inode->i_op == iop)
+		return;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		type = AuIop_DIR;
+		break;
+	case S_IFLNK:
+		type = AuIop_SYMLINK;
+		break;
+	default:
+		type = AuIop_OTHER;
+		break;
+	}
+
+	inode->i_op = iop + type;
+	/* unnecessary smp_wmb() */
+}
+
+int au_refresh_hinode_self(struct inode *inode)
+{
+	int err, update;
+
+	err = au_ii_refresh(inode, &update);
+	if (!err)
+		au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
+
+	AuTraceErr(err);
+	return err;
+}
+
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
+{
+	int err, e, update;
+	unsigned int flags;
+	umode_t mode;
+	aufs_bindex_t bindex, bbot;
+	unsigned char isdir;
+	struct au_hinode *p;
+	struct au_iinfo *iinfo;
+
+	err = au_ii_refresh(inode, &update);
+	if (unlikely(err))
+		goto out;
+
+	update = 0;
+	iinfo = au_ii(inode);
+	p = au_hinode(iinfo, iinfo->ii_btop);
+	mode = (inode->i_mode & S_IFMT);
+	isdir = S_ISDIR(mode);
+	flags = au_hi_flags(inode, isdir);
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
+		struct inode *h_i, *h_inode;
+		struct dentry *h_d;
+
+		h_d = au_h_dptr(dentry, bindex);
+		if (!h_d || d_is_negative(h_d))
+			continue;
+
+		h_inode = d_inode(h_d);
+		AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
+		if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
+			h_i = au_h_iptr(inode, bindex);
+			if (h_i) {
+				if (h_i == h_inode)
+					continue;
+				err = -EIO;
+				break;
+			}
+		}
+		if (bindex < iinfo->ii_btop)
+			iinfo->ii_btop = bindex;
+		if (iinfo->ii_bbot < bindex)
+			iinfo->ii_bbot = bindex;
+		au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
+		update = 1;
+	}
+	au_update_ibrange(inode, /*do_put_zero*/0);
+	e = au_dy_irefresh(inode);
+	if (unlikely(e && !err))
+		err = e;
+	if (!err)
+		au_refresh_hinode_attr(inode, update && isdir);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int set_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	unsigned int flags;
+	umode_t mode;
+	aufs_bindex_t bindex, btop, btail;
+	unsigned char isdir;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_iinfo *iinfo;
+	struct inode_operations *iop;
+
+	IiMustWriteLock(inode);
+
+	err = 0;
+	isdir = 0;
+	iop = au_sbi(inode->i_sb)->si_iop_array;
+	btop = au_dbtop(dentry);
+	h_dentry = au_h_dptr(dentry, btop);
+	h_inode = d_inode(h_dentry);
+	mode = h_inode->i_mode;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_OTHER;
+		inode->i_fop = &aufs_file_fop;
+		err = au_dy_iaop(inode, btop, h_inode);
+		if (unlikely(err))
+			goto out;
+		break;
+	case S_IFDIR:
+		isdir = 1;
+		btail = au_dbtaildir(dentry);
+		inode->i_op = iop + AuIop_DIR;
+		inode->i_fop = &aufs_dir_fop;
+		break;
+	case S_IFLNK:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_SYMLINK;
+		break;
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+	case S_IFSOCK:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_OTHER;
+		init_special_inode(inode, mode, h_inode->i_rdev);
+		break;
+	default:
+		AuIOErr("Unknown file type 0%o\n", mode);
+		err = -EIO;
+		goto out;
+	}
+
+	/* do not set hnotify for whiteouted dirs (SHWH mode) */
+	flags = au_hi_flags(inode, isdir);
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
+	    && au_ftest_hi(flags, HNOTIFY)
+	    && dentry->d_name.len > AUFS_WH_PFX_LEN
+	    && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
+		au_fclr_hi(flags, HNOTIFY);
+	iinfo = au_ii(inode);
+	iinfo->ii_btop = btop;
+	iinfo->ii_bbot = btail;
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry)
+			au_set_h_iptr(inode, bindex,
+				      au_igrab(d_inode(h_dentry)), flags);
+	}
+	au_cpup_attr_all(inode, /*force*/1);
+	/*
+	 * to force calling aufs_get_acl() every time,
+	 * do not call cache_no_acl() for aufs inode.
+	 */
+
+out:
+	return err;
+}
+
+/*
+ * successful returns with iinfo write_locked
+ * minus: errno
+ * zero: success, matched
+ * plus: no error, but unmatched
+ */
+static int reval_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	unsigned int gen, igflags;
+	aufs_bindex_t bindex, bbot;
+	struct inode *h_inode, *h_dinode;
+	struct dentry *h_dentry;
+
+	/*
+	 * before this function, if aufs got any iinfo lock, it must be only
+	 * one, the parent dir.
+	 * it can happen by UDBA and the obsoleted inode number.
+	 */
+	err = -EIO;
+	if (unlikely(inode->i_ino == parent_ino(dentry)))
+		goto out;
+
+	err = 1;
+	ii_write_lock_new_child(inode);
+	h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
+	h_dinode = d_inode(h_dentry);
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (!h_inode || h_inode != h_dinode)
+			continue;
+
+		err = 0;
+		gen = au_iigen(inode, &igflags);
+		if (gen == au_digen(dentry)
+		    && !au_ig_ftest(igflags, HALF_REFRESHED))
+			break;
+
+		/* fully refresh inode using dentry */
+		err = au_refresh_hinode(inode, dentry);
+		if (!err)
+			au_update_iigen(inode, /*half*/0);
+		break;
+	}
+
+	if (unlikely(err))
+		ii_write_unlock(inode);
+out:
+	return err;
+}
+
+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+	   unsigned int d_type, ino_t *ino)
+{
+	int err, idx;
+	const int isnondir = d_type != DT_DIR;
+
+	/* prevent hardlinked inode number from race condition */
+	if (isnondir) {
+		err = au_xinondir_enter(sb, bindex, h_ino, &idx);
+		if (unlikely(err))
+			goto out;
+	}
+
+	err = au_xino_read(sb, bindex, h_ino, ino);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (!*ino) {
+		err = -EIO;
+		*ino = au_xino_new_ino(sb);
+		if (unlikely(!*ino))
+			goto out_xinondir;
+		err = au_xino_write(sb, bindex, h_ino, *ino);
+		if (unlikely(err))
+			goto out_xinondir;
+	}
+
+out_xinondir:
+	if (isnondir && idx >= 0)
+		au_xinondir_leave(sb, bindex, h_ino, idx);
+out:
+	return err;
+}
+
+/* successful returns with iinfo write_locked */
+/* todo: return with unlocked? */
+struct inode *au_new_inode(struct dentry *dentry, int must_new)
+{
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+	ino_t h_ino, ino;
+	int err, idx, hlinked;
+	aufs_bindex_t btop;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	h_dentry = au_h_dptr(dentry, btop);
+	h_inode = d_inode(h_dentry);
+	h_ino = h_inode->i_ino;
+	hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
+
+new_ino:
+	/*
+	 * stop 'race'-ing between hardlinks under different
+	 * parents.
+	 */
+	if (hlinked) {
+		err = au_xinondir_enter(sb, btop, h_ino, &idx);
+		inode = ERR_PTR(err);
+		if (unlikely(err))
+			goto out;
+	}
+
+	err = au_xino_read(sb, btop, h_ino, &ino);
+	inode = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (!ino) {
+		ino = au_xino_new_ino(sb);
+		if (unlikely(!ino)) {
+			inode = ERR_PTR(-EIO);
+			goto out_xinondir;
+		}
+	}
+
+	AuDbg("i%lu\n", (unsigned long)ino);
+	inode = au_iget_locked(sb, ino);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_xinondir;
+
+	AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
+	if (inode->i_state & I_NEW) {
+		ii_write_lock_new_child(inode);
+		err = set_inode(inode, dentry);
+		if (!err) {
+			unlock_new_inode(inode);
+			goto out_xinondir; /* success */
+		}
+
+		/*
+		 * iget_failed() calls iput(), but we need to call
+		 * ii_write_unlock() after iget_failed(). so dirty hack for
+		 * i_count.
+		 */
+		atomic_inc(&inode->i_count);
+		iget_failed(inode);
+		ii_write_unlock(inode);
+		au_xino_write(sb, btop, h_ino, /*ino*/0);
+		/* ignore this error */
+		goto out_iput;
+	} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
+		/*
+		 * horrible race condition between lookup, readdir and copyup
+		 * (or something).
+		 */
+		if (hlinked && idx >= 0)
+			au_xinondir_leave(sb, btop, h_ino, idx);
+		err = reval_inode(inode, dentry);
+		if (unlikely(err < 0)) {
+			hlinked = 0;
+			goto out_iput;
+		}
+		if (!err)
+			goto out; /* success */
+		else if (hlinked && idx >= 0) {
+			err = au_xinondir_enter(sb, btop, h_ino, &idx);
+			if (unlikely(err)) {
+				iput(inode);
+				inode = ERR_PTR(err);
+				goto out;
+			}
+		}
+	}
+
+	if (unlikely(au_test_fs_unique_ino(h_inode)))
+		AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
+			" b%d, %s, %pd, hi%lu, i%lu.\n",
+			btop, au_sbtype(h_dentry->d_sb), dentry,
+			(unsigned long)h_ino, (unsigned long)ino);
+	ino = 0;
+	err = au_xino_write(sb, btop, h_ino, /*ino*/0);
+	if (!err) {
+		iput(inode);
+		if (hlinked && idx >= 0)
+			au_xinondir_leave(sb, btop, h_ino, idx);
+		goto new_ino;
+	}
+
+out_iput:
+	iput(inode);
+	inode = ERR_PTR(err);
+out_xinondir:
+	if (hlinked && idx >= 0)
+		au_xinondir_leave(sb, btop, h_ino, idx);
+out:
+	return inode;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode)
+{
+	int err;
+	struct inode *hi;
+
+	err = au_br_rdonly(au_sbr(sb, bindex));
+
+	/* pseudo-link after flushed may happen out of bounds */
+	if (!err
+	    && inode
+	    && au_ibtop(inode) <= bindex
+	    && bindex <= au_ibbot(inode)) {
+		/*
+		 * permission check is unnecessary since vfsub routine
+		 * will be called later
+		 */
+		hi = au_h_iptr(inode, bindex);
+		if (hi)
+			err = IS_IMMUTABLE(hi) ? -EROFS : 0;
+	}
+
+	return err;
+}
+
+int au_test_h_perm(struct inode *h_inode, int mask)
+{
+	if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
+		return 0;
+	return inode_permission(h_inode, mask);
+}
+
+int au_test_h_perm_sio(struct inode *h_inode, int mask)
+{
+	if (au_test_nfs(h_inode->i_sb)
+	    && (mask & MAY_WRITE)
+	    && S_ISDIR(h_inode->i_mode))
+		mask |= MAY_READ; /* force permission check */
+	return au_test_h_perm(h_inode, mask);
+}
diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h
new file mode 100644
index 000000000..8af51ff5f
--- /dev/null
+++ b/fs/aufs/inode.h
@@ -0,0 +1,695 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations
+ */
+
+#ifndef __AUFS_INODE_H__
+#define __AUFS_INODE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fsnotify.h>
+#include "rwsem.h"
+
+struct vfsmount;
+
+struct au_hnotify {
+#ifdef CONFIG_AUFS_HNOTIFY
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	/* never use fsnotify_add_vfsmount_mark() */
+	struct fsnotify_mark		hn_mark;
+#endif
+	struct inode		*hn_aufs_inode;	/* no get/put */
+#endif
+} ____cacheline_aligned_in_smp;
+
+struct au_hinode {
+	struct inode		*hi_inode;
+	aufs_bindex_t		hi_id;
+#ifdef CONFIG_AUFS_HNOTIFY
+	struct au_hnotify	*hi_notify;
+#endif
+
+	/* reference to the copied-up whiteout with get/put */
+	struct dentry		*hi_whdentry;
+};
+
+/* ig_flags */
+#define AuIG_HALF_REFRESHED		1
+#define au_ig_ftest(flags, name)	((flags) & AuIG_##name)
+#define au_ig_fset(flags, name) \
+	do { (flags) |= AuIG_##name; } while (0)
+#define au_ig_fclr(flags, name) \
+	do { (flags) &= ~AuIG_##name; } while (0)
+
+struct au_iigen {
+	spinlock_t	ig_spin;
+	__u32		ig_generation, ig_flags;
+};
+
+struct au_vdir;
+struct au_iinfo {
+	struct au_iigen		ii_generation;
+	struct super_block	*ii_hsb1;	/* no get/put */
+
+	struct au_rwsem		ii_rwsem;
+	aufs_bindex_t		ii_btop, ii_bbot;
+	__u32			ii_higen;
+	struct au_hinode	*ii_hinode;
+	struct au_vdir		*ii_vdir;
+};
+
+struct au_icntnr {
+	struct au_iinfo iinfo;
+	struct inode vfs_inode;
+	struct hlist_bl_node plink;
+} ____cacheline_aligned_in_smp;
+
+/* au_pin flags */
+#define AuPin_DI_LOCKED		1
+#define AuPin_MNT_WRITE		(1 << 1)
+#define au_ftest_pin(flags, name)	((flags) & AuPin_##name)
+#define au_fset_pin(flags, name) \
+	do { (flags) |= AuPin_##name; } while (0)
+#define au_fclr_pin(flags, name) \
+	do { (flags) &= ~AuPin_##name; } while (0)
+
+struct au_pin {
+	/* input */
+	struct dentry *dentry;
+	unsigned int udba;
+	unsigned char lsc_di, lsc_hi, flags;
+	aufs_bindex_t bindex;
+
+	/* output */
+	struct dentry *parent;
+	struct au_hinode *hdir;
+	struct vfsmount *h_mnt;
+
+	/* temporary unlock/relock for copyup */
+	struct dentry *h_dentry, *h_parent;
+	struct au_branch *br;
+	struct task_struct *task;
+};
+
+void au_pin_hdir_unlock(struct au_pin *p);
+int au_pin_hdir_lock(struct au_pin *p);
+int au_pin_hdir_relock(struct au_pin *p);
+void au_pin_hdir_acquire_nest(struct au_pin *p);
+void au_pin_hdir_release(struct au_pin *p);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_iinfo *au_ii(struct inode *inode)
+{
+	BUG_ON(is_bad_inode(inode));
+	return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* inode.c */
+struct inode *au_igrab(struct inode *inode);
+void au_refresh_iop(struct inode *inode, int force_getattr);
+int au_refresh_hinode_self(struct inode *inode);
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+	   unsigned int d_type, ino_t *ino);
+struct inode *au_new_inode(struct dentry *dentry, int must_new);
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode);
+int au_test_h_perm(struct inode *h_inode, int mask);
+int au_test_h_perm_sio(struct inode *h_inode, int mask);
+
+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
+			    ino_t h_ino, unsigned int d_type, ino_t *ino)
+{
+#ifdef CONFIG_AUFS_SHWH
+	return au_ino(sb, bindex, h_ino, d_type, ino);
+#else
+	return 0;
+#endif
+}
+
+/* i_op.c */
+enum {
+	AuIop_SYMLINK,
+	AuIop_DIR,
+	AuIop_OTHER,
+	AuIop_Last
+};
+extern struct inode_operations aufs_iop[AuIop_Last],
+	aufs_iop_nogetattr[AuIop_Last];
+
+/* au_wr_dir flags */
+#define AuWrDir_ADD_ENTRY	1
+#define AuWrDir_ISDIR		(1 << 1)
+#define AuWrDir_TMPFILE		(1 << 2)
+#define au_ftest_wrdir(flags, name)	((flags) & AuWrDir_##name)
+#define au_fset_wrdir(flags, name) \
+	do { (flags) |= AuWrDir_##name; } while (0)
+#define au_fclr_wrdir(flags, name) \
+	do { (flags) &= ~AuWrDir_##name; } while (0)
+
+struct au_wr_dir_args {
+	aufs_bindex_t force_btgt;
+	unsigned char flags;
+};
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args);
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin);
+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags);
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags) __must_check;
+int au_do_pin(struct au_pin *pin) __must_check;
+void au_unpin(struct au_pin *pin);
+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
+
+#define AuIcpup_DID_CPUP	1
+#define au_ftest_icpup(flags, name)	((flags) & AuIcpup_##name)
+#define au_fset_icpup(flags, name) \
+	do { (flags) |= AuIcpup_##name; } while (0)
+#define au_fclr_icpup(flags, name) \
+	do { (flags) &= ~AuIcpup_##name; } while (0)
+
+struct au_icpup_args {
+	unsigned char flags;
+	unsigned char pin_flags;
+	aufs_bindex_t btgt;
+	unsigned int udba;
+	struct au_pin pin;
+	struct path h_path;
+	struct inode *h_inode;
+};
+
+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
+		     struct au_icpup_args *a);
+
+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
+		      int locked);
+
+/* i_op_add.c */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+	       dev_t dev);
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl);
+struct vfsub_aopen_args;
+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
+		       struct vfsub_aopen_args *args);
+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry);
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+
+/* i_op_del.c */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_unlink(struct inode *dir, struct dentry *dentry);
+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
+
+/* i_op_ren.c */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
+		struct inode *dir, struct dentry *dentry,
+		unsigned int flags);
+
+/* iinfo.c */
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
+void au_hiput(struct au_hinode *hinode);
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh);
+unsigned int au_hi_flags(struct inode *inode, int isdir);
+
+/* hinode flags */
+#define AuHi_XINO	1
+#define AuHi_HNOTIFY	(1 << 1)
+#define au_ftest_hi(flags, name)	((flags) & AuHi_##name)
+#define au_fset_hi(flags, name) \
+	do { (flags) |= AuHi_##name; } while (0)
+#define au_fclr_hi(flags, name) \
+	do { (flags) &= ~AuHi_##name; } while (0)
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuHi_HNOTIFY
+#define AuHi_HNOTIFY	0
+#endif
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags);
+
+void au_update_iigen(struct inode *inode, int half);
+void au_update_ibrange(struct inode *inode, int do_put_zero);
+
+void au_icntnr_init_once(void *_c);
+void au_hinode_init(struct au_hinode *hinode);
+int au_iinfo_init(struct inode *inode);
+void au_iinfo_fin(struct inode *inode);
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
+
+#ifdef CONFIG_PROC_FS
+/* plink.c */
+int au_plink_maint(struct super_block *sb, int flags);
+struct au_sbinfo;
+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
+int au_plink_maint_enter(struct super_block *sb);
+#ifdef CONFIG_AUFS_DEBUG
+void au_plink_list(struct super_block *sb);
+#else
+AuStubVoid(au_plink_list, struct super_block *sb)
+#endif
+int au_plink_test(struct inode *inode);
+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
+		     struct dentry *h_dentry);
+void au_plink_put(struct super_block *sb, int verbose);
+void au_plink_clean(struct super_block *sb, int verbose);
+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
+#else
+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
+AuStubVoid(au_plink_list, struct super_block *sb);
+AuStubInt0(au_plink_test, struct inode *inode);
+AuStub(struct dentry *, au_plink_lkup, return NULL,
+       struct inode *inode, aufs_bindex_t bindex);
+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
+	   struct dentry *h_dentry);
+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_AUFS_XATTR
+/* xattr.c */
+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
+		  unsigned int verbose);
+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
+void au_xattr_init(struct super_block *sb);
+#else
+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
+	   int ignore_flags, unsigned int verbose);
+AuStubVoid(au_xattr_init, struct super_block *sb);
+#endif
+
+#ifdef CONFIG_FS_POSIX_ACL
+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+
+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
+enum {
+	AU_XATTR_SET,
+	AU_ACL_SET
+};
+
+struct au_sxattr {
+	int type;
+	union {
+		struct {
+			const char	*name;
+			const void	*value;
+			size_t		size;
+			int		flags;
+		} set;
+		struct {
+			struct posix_acl *acl;
+			int		type;
+		} acl_set;
+	} u;
+};
+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
+		  struct au_sxattr *arg);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for iinfo */
+enum {
+	AuLsc_II_CHILD,		/* child first */
+	AuLsc_II_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
+	AuLsc_II_CHILD3,	/* copyup dirs */
+	AuLsc_II_PARENT,	/* see AuLsc_I_PARENT in vfsub.h */
+	AuLsc_II_PARENT2,
+	AuLsc_II_PARENT3,	/* copyup dirs */
+	AuLsc_II_NEW_CHILD
+};
+
+/*
+ * ii_read_lock_child, ii_write_lock_child,
+ * ii_read_lock_child2, ii_write_lock_child2,
+ * ii_read_lock_child3, ii_write_lock_child3,
+ * ii_read_lock_parent, ii_write_lock_parent,
+ * ii_read_lock_parent2, ii_write_lock_parent2,
+ * ii_read_lock_parent3, ii_write_lock_parent3,
+ * ii_read_lock_new_child, ii_write_lock_new_child,
+ */
+#define AuReadLockFunc(name, lsc) \
+static inline void ii_read_lock_##name(struct inode *i) \
+{ \
+	au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuWriteLockFunc(name, lsc) \
+static inline void ii_write_lock_##name(struct inode *i) \
+{ \
+	au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuRWLockFuncs(name, lsc) \
+	AuReadLockFunc(name, lsc) \
+	AuWriteLockFunc(name, lsc)
+
+AuRWLockFuncs(child, CHILD);
+AuRWLockFuncs(child2, CHILD2);
+AuRWLockFuncs(child3, CHILD3);
+AuRWLockFuncs(parent, PARENT);
+AuRWLockFuncs(parent2, PARENT2);
+AuRWLockFuncs(parent3, PARENT3);
+AuRWLockFuncs(new_child, NEW_CHILD);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define ii_read_unlock(i)	au_rw_read_unlock(&au_ii(i)->ii_rwsem)
+#define ii_write_unlock(i)	au_rw_write_unlock(&au_ii(i)->ii_rwsem)
+#define ii_downgrade_lock(i)	au_rw_dgrade_lock(&au_ii(i)->ii_rwsem)
+
+#define IiMustNoWaiters(i)	AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
+#define IiMustAnyLock(i)	AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
+#define IiMustWriteLock(i)	AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+static inline void au_icntnr_init(struct au_icntnr *c)
+{
+#ifdef CONFIG_AUFS_DEBUG
+	c->vfs_inode.i_mode = 0;
+#endif
+}
+
+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
+{
+	unsigned int gen;
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	if (igflags)
+		*igflags = iigen->ig_flags;
+	gen = iigen->ig_generation;
+	spin_unlock(&iigen->ig_spin);
+
+	return gen;
+}
+
+/* tiny test for inode number */
+/* tmpfs generation is too rough */
+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
+{
+	struct au_iinfo *iinfo;
+
+	iinfo = au_ii(inode);
+	AuRwMustAnyLock(&iinfo->ii_rwsem);
+	return !(iinfo->ii_hsb1 == h_inode->i_sb
+		 && iinfo->ii_higen == h_inode->i_generation);
+}
+
+static inline void au_iigen_dec(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	iigen->ig_generation--;
+	spin_unlock(&iigen->ig_spin);
+}
+
+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(inode && au_iigen(inode, NULL) != sigen))
+		err = -EIO;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
+					  aufs_bindex_t bindex)
+{
+	return iinfo->ii_hinode + bindex;
+}
+
+static inline int au_is_bad_inode(struct inode *inode)
+{
+	return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
+}
+
+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
+					aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex)->hi_id;
+}
+
+static inline aufs_bindex_t au_ibtop(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_btop;
+}
+
+static inline aufs_bindex_t au_ibbot(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_bbot;
+}
+
+static inline struct au_vdir *au_ivdir(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_vdir;
+}
+
+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex)->hi_whdentry;
+}
+
+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_btop = bindex;
+}
+
+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_bbot = bindex;
+}
+
+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_vdir = vdir;
+}
+
+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
+{
+	if (pin)
+		return pin->parent;
+	return NULL;
+}
+
+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
+{
+	if (pin && pin->hdir)
+		return pin->hdir->hi_inode;
+	return NULL;
+}
+
+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
+{
+	if (pin)
+		return pin->hdir;
+	return NULL;
+}
+
+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
+{
+	if (pin)
+		pin->dentry = dentry;
+}
+
+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
+					   unsigned char lflag)
+{
+	if (pin) {
+		if (lflag)
+			au_fset_pin(pin->flags, DI_LOCKED);
+		else
+			au_fclr_pin(pin->flags, DI_LOCKED);
+	}
+}
+
+#if 0 /* reserved */
+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
+{
+	if (pin) {
+		dput(pin->parent);
+		pin->parent = dget(parent);
+	}
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_branch;
+#ifdef CONFIG_AUFS_HNOTIFY
+struct au_hnotify_op {
+	void (*ctl)(struct au_hinode *hinode, int do_set);
+	int (*alloc)(struct au_hinode *hinode);
+
+	/*
+	 * if it returns true, the the caller should free hinode->hi_notify,
+	 * otherwise ->free() frees it.
+	 */
+	int (*free)(struct au_hinode *hinode,
+		    struct au_hnotify *hn) __must_check;
+
+	void (*fin)(void);
+	int (*init)(void);
+
+	int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
+	void (*fin_br)(struct au_branch *br);
+	int (*init_br)(struct au_branch *br, int perm);
+};
+
+/* hnotify.c */
+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
+void au_hn_free(struct au_hinode *hinode);
+void au_hn_ctl(struct au_hinode *hinode, int do_set);
+void au_hn_reset(struct inode *inode, unsigned int flags);
+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
+	       struct qstr *h_child_qstr, struct inode *h_child_inode);
+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
+int au_hnotify_init_br(struct au_branch *br, int perm);
+void au_hnotify_fin_br(struct au_branch *br);
+int __init au_hnotify_init(void);
+void au_hnotify_fin(void);
+
+/* hfsnotify.c */
+extern const struct au_hnotify_op au_hnotify_op;
+
+static inline
+void au_hn_init(struct au_hinode *hinode)
+{
+	hinode->hi_notify = NULL;
+}
+
+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
+{
+	return hinode->hi_notify;
+}
+
+#else
+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
+       struct au_hinode *hinode __maybe_unused,
+       struct inode *inode __maybe_unused)
+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
+	   int do_set __maybe_unused)
+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
+	   unsigned int flags __maybe_unused)
+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
+	   struct au_branch *br __maybe_unused,
+	   int perm __maybe_unused)
+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
+	   int perm __maybe_unused)
+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
+AuStubInt0(__init au_hnotify_init, void)
+AuStubVoid(au_hnotify_fin, void)
+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
+#endif /* CONFIG_AUFS_HNOTIFY */
+
+static inline void au_hn_suspend(struct au_hinode *hdir)
+{
+	au_hn_ctl(hdir, /*do_set*/0);
+}
+
+static inline void au_hn_resume(struct au_hinode *hdir)
+{
+	au_hn_ctl(hdir, /*do_set*/1);
+}
+
+static inline void au_hn_inode_lock(struct au_hinode *hdir)
+{
+	inode_lock(hdir->hi_inode);
+	au_hn_suspend(hdir);
+}
+
+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
+					  unsigned int sc __maybe_unused)
+{
+	inode_lock_nested(hdir->hi_inode, sc);
+	au_hn_suspend(hdir);
+}
+
+#if 0 /* unused */
+#include "vfsub.h"
+static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
+						  unsigned int sc)
+{
+	inode_lock_shared_nested(hdir->hi_inode, sc);
+	au_hn_suspend(hdir);
+}
+#endif
+
+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
+{
+	au_hn_resume(hdir);
+	inode_unlock(hdir->hi_inode);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_INODE_H__ */
diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c
new file mode 100644
index 000000000..de188adb3
--- /dev/null
+++ b/fs/aufs/ioctl.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * ioctl
+ * plink-management and readdir in userspace.
+ * assist the pathconf(3) wrapper library.
+ * move-down
+ * File-based Hierarchical Storage Management.
+ */
+
+#include <linux/compat.h>
+#include <linux/file.h>
+#include "aufs.h"
+
+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
+{
+	int err, fd;
+	aufs_bindex_t wbi, bindex, bbot;
+	struct file *h_file;
+	struct super_block *sb;
+	struct dentry *root;
+	struct au_branch *br;
+	struct aufs_wbr_fd wbrfd = {
+		.oflags	= au_dir_roflags,
+		.brid	= -1
+	};
+	const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
+		| O_NOATIME | O_CLOEXEC;
+
+	AuDebugOn(wbrfd.oflags & ~valid);
+
+	if (arg) {
+		err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
+		if (unlikely(err)) {
+			err = -EFAULT;
+			goto out;
+		}
+
+		err = -EINVAL;
+		AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
+		wbrfd.oflags |= au_dir_roflags;
+		AuDbg("0%o\n", wbrfd.oflags);
+		if (unlikely(wbrfd.oflags & ~valid))
+			goto out;
+	}
+
+	fd = get_unused_fd_flags(0);
+	err = fd;
+	if (unlikely(fd < 0))
+		goto out;
+
+	h_file = ERR_PTR(-EINVAL);
+	wbi = 0;
+	br = NULL;
+	sb = path->dentry->d_sb;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_IR);
+	bbot = au_sbbot(sb);
+	if (wbrfd.brid >= 0) {
+		wbi = au_br_index(sb, wbrfd.brid);
+		if (unlikely(wbi < 0 || wbi > bbot))
+			goto out_unlock;
+	}
+
+	h_file = ERR_PTR(-ENOENT);
+	br = au_sbr(sb, wbi);
+	if (!au_br_writable(br->br_perm)) {
+		if (arg)
+			goto out_unlock;
+
+		bindex = wbi + 1;
+		wbi = -1;
+		for (; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_writable(br->br_perm)) {
+				wbi = bindex;
+				br = au_sbr(sb, wbi);
+				break;
+			}
+		}
+	}
+	AuDbg("wbi %d\n", wbi);
+	if (wbi >= 0)
+		h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
+				   /*force_wr*/0);
+
+out_unlock:
+	aufs_read_unlock(root, AuLock_IR);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out_fd;
+
+	au_br_put(br); /* cf. au_h_open() */
+	fd_install(fd, h_file);
+	err = fd;
+	goto out; /* success */
+
+out_fd:
+	put_unused_fd(fd);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err;
+	struct dentry *dentry;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ioctl(file, cmd, arg);
+		break;
+
+	case AUFS_CTL_WBR_FD:
+		err = au_wbr_fd(&file->f_path, (void __user *)arg);
+		break;
+
+	case AUFS_CTL_IBUSY:
+		err = au_ibusy_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_BRINFO:
+		err = au_brinfo_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_FHSM_FD:
+		dentry = file->f_path.dentry;
+		if (IS_ROOT(dentry))
+			err = au_fhsm_fd(dentry->d_sb, arg);
+		else
+			err = -ENOTTY;
+		break;
+
+	default:
+		/* do not call the lower */
+		AuDbg("0x%x\n", cmd);
+		err = -ENOTTY;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err;
+
+	switch (cmd) {
+	case AUFS_CTL_MVDOWN:
+		err = au_mvdown(file->f_path.dentry, (void __user *)arg);
+		break;
+
+	case AUFS_CTL_WBR_FD:
+		err = au_wbr_fd(&file->f_path, (void __user *)arg);
+		break;
+
+	default:
+		/* do not call the lower */
+		AuDbg("0x%x\n", cmd);
+		err = -ENOTTY;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
+			   unsigned long arg)
+{
+	long err;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_compat_ioctl(file, cmd, arg);
+		break;
+
+	case AUFS_CTL_IBUSY:
+		err = au_ibusy_compat_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_BRINFO:
+		err = au_brinfo_compat_ioctl(file, arg);
+		break;
+
+	default:
+		err = aufs_ioctl_dir(file, cmd, arg);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
new file mode 100644
index 000000000..983ef9876
--- /dev/null
+++ b/fs/aufs/loop.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * support for loopback block device as a branch
+ */
+
+#include "aufs.h"
+
+/* added into drivers/block/loop.c */
+static struct file *(*backing_file_func)(struct super_block *sb);
+
+/*
+ * test if two lower dentries have overlapping branches.
+ */
+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
+{
+	struct super_block *h_sb;
+	struct file *backing_file;
+
+	if (unlikely(!backing_file_func)) {
+		/* don't load "loop" module here */
+		backing_file_func = symbol_get(loop_backing_file);
+		if (unlikely(!backing_file_func))
+			/* "loop" module is not loaded */
+			return 0;
+	}
+
+	h_sb = h_adding->d_sb;
+	backing_file = backing_file_func(h_sb);
+	if (!backing_file)
+		return 0;
+
+	h_adding = backing_file->f_path.dentry;
+	/*
+	 * h_adding can be local NFS.
+	 * in this case aufs cannot detect the loop.
+	 */
+	if (unlikely(h_adding->d_sb == sb))
+		return 1;
+	return !!au_test_subdir(h_adding, sb->s_root);
+}
+
+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
+int au_test_loopback_kthread(void)
+{
+	int ret;
+	struct task_struct *tsk = current;
+	char c, comm[sizeof(tsk->comm)];
+
+	ret = 0;
+	if (tsk->flags & PF_KTHREAD) {
+		get_task_comm(comm, tsk);
+		c = comm[4];
+		ret = ('0' <= c && c <= '9'
+		       && !strncmp(comm, "loop", 4));
+	}
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define au_warn_loopback_step	16
+static int au_warn_loopback_nelem = au_warn_loopback_step;
+static unsigned long *au_warn_loopback_array;
+
+void au_warn_loopback(struct super_block *h_sb)
+{
+	int i, new_nelem;
+	unsigned long *a, magic;
+	static DEFINE_SPINLOCK(spin);
+
+	magic = h_sb->s_magic;
+	spin_lock(&spin);
+	a = au_warn_loopback_array;
+	for (i = 0; i < au_warn_loopback_nelem && *a; i++)
+		if (a[i] == magic) {
+			spin_unlock(&spin);
+			return;
+		}
+
+	/* h_sb is new to us, print it */
+	if (i < au_warn_loopback_nelem) {
+		a[i] = magic;
+		goto pr;
+	}
+
+	/* expand the array */
+	new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
+	a = au_kzrealloc(au_warn_loopback_array,
+			 au_warn_loopback_nelem * sizeof(unsigned long),
+			 new_nelem * sizeof(unsigned long), GFP_ATOMIC,
+			 /*may_shrink*/0);
+	if (a) {
+		au_warn_loopback_nelem = new_nelem;
+		au_warn_loopback_array = a;
+		a[i] = magic;
+		goto pr;
+	}
+
+	spin_unlock(&spin);
+	AuWarn1("realloc failed, ignored\n");
+	return;
+
+pr:
+	spin_unlock(&spin);
+	pr_warn("you may want to try another patch for loopback file "
+		"on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
+}
+
+int au_loopback_init(void)
+{
+	int err;
+	struct super_block *sb __maybe_unused;
+
+	BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
+
+	err = 0;
+	au_warn_loopback_array = kcalloc(au_warn_loopback_step,
+					 sizeof(unsigned long), GFP_NOFS);
+	if (unlikely(!au_warn_loopback_array))
+		err = -ENOMEM;
+
+	return err;
+}
+
+void au_loopback_fin(void)
+{
+	if (backing_file_func)
+		symbol_put(loop_backing_file);
+	kfree(au_warn_loopback_array);
+}
diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
new file mode 100644
index 000000000..e6b76609a
--- /dev/null
+++ b/fs/aufs/loop.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * support for loopback mount as a branch
+ */
+
+#ifndef __AUFS_LOOP_H__
+#define __AUFS_LOOP_H__
+
+#ifdef __KERNEL__
+
+struct dentry;
+struct super_block;
+
+#ifdef CONFIG_AUFS_BDEV_LOOP
+/* drivers/block/loop.c */
+struct file *loop_backing_file(struct super_block *sb);
+
+/* loop.c */
+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
+int au_test_loopback_kthread(void);
+void au_warn_loopback(struct super_block *h_sb);
+
+int au_loopback_init(void);
+void au_loopback_fin(void);
+#else
+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
+	   struct dentry *h_adding)
+AuStubInt0(au_test_loopback_kthread, void)
+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
+
+AuStubInt0(au_loopback_init, void)
+AuStubVoid(au_loopback_fin, void)
+#endif /* BLK_DEV_LOOP */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_LOOP_H__ */
diff --git a/fs/aufs/magic.mk b/fs/aufs/magic.mk
new file mode 100644
index 000000000..7bc9eef3f
--- /dev/null
+++ b/fs/aufs/magic.mk
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# defined in ${srctree}/fs/fuse/inode.c
+# tristate
+ifdef CONFIG_FUSE_FS
+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
+endif
+
+# defined in ${srctree}/fs/xfs/xfs_sb.h
+# tristate
+ifdef CONFIG_XFS_FS
+ccflags-y += -DXFS_SB_MAGIC=0x58465342
+endif
+
+# defined in ${srctree}/fs/configfs/mount.c
+# tristate
+ifdef CONFIG_CONFIGFS_FS
+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
+endif
+
+# defined in ${srctree}/fs/ubifs/ubifs.h
+# tristate
+ifdef CONFIG_UBIFS_FS
+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
+endif
+
+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
+# tristate
+ifdef CONFIG_HFSPLUS_FS
+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
+endif
diff --git a/fs/aufs/module.c b/fs/aufs/module.c
new file mode 100644
index 000000000..9eeec710a
--- /dev/null
+++ b/fs/aufs/module.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * module global variables and operations
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include "aufs.h"
+
+/* shrinkable realloc */
+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
+{
+	size_t sz;
+	int diff;
+
+	sz = 0;
+	diff = -1;
+	if (p) {
+#if 0 /* unused */
+		if (!new_sz) {
+			kfree(p);
+			p = NULL;
+			goto out;
+		}
+#else
+		AuDebugOn(!new_sz);
+#endif
+		sz = ksize(p);
+		diff = au_kmidx_sub(sz, new_sz);
+	}
+	if (sz && !diff)
+		goto out;
+
+	if (sz < new_sz)
+		/* expand or SLOB */
+		p = krealloc(p, new_sz, gfp);
+	else if (new_sz < sz && may_shrink) {
+		/* shrink */
+		void *q;
+
+		q = kmalloc(new_sz, gfp);
+		if (q) {
+			if (p) {
+				memcpy(q, p, new_sz);
+				kfree(p);
+			}
+			p = q;
+		} else
+			p = NULL;
+	}
+
+out:
+	return p;
+}
+
+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
+		   int may_shrink)
+{
+	p = au_krealloc(p, new_sz, gfp, may_shrink);
+	if (p && new_sz > nused)
+		memset(p + nused, 0, new_sz - nused);
+	return p;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * aufs caches
+ */
+struct kmem_cache *au_cache[AuCache_Last];
+
+static void au_cache_fin(void)
+{
+	int i;
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+
+	/* excluding AuCache_HNOTIFY */
+	BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
+	for (i = 0; i < AuCache_HNOTIFY; i++) {
+		kmem_cache_destroy(au_cache[i]);
+		au_cache[i] = NULL;
+	}
+}
+
+static int __init au_cache_init(void)
+{
+	au_cache[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
+	if (au_cache[AuCache_DINFO])
+		/* SLAB_DESTROY_BY_RCU */
+		au_cache[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
+						       au_icntnr_init_once);
+	if (au_cache[AuCache_ICNTNR])
+		au_cache[AuCache_FINFO] = AuCacheCtor(au_finfo,
+						      au_fi_init_once);
+	if (au_cache[AuCache_FINFO])
+		au_cache[AuCache_VDIR] = AuCache(au_vdir);
+	if (au_cache[AuCache_VDIR])
+		au_cache[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
+	if (au_cache[AuCache_DEHSTR])
+		return 0;
+
+	au_cache_fin();
+	return -ENOMEM;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dir_roflags;
+
+#ifdef CONFIG_AUFS_SBILIST
+/*
+ * iterate_supers_type() doesn't protect us from
+ * remounting (branch management)
+ */
+struct hlist_bl_head au_sbilist;
+#endif
+
+/*
+ * functions for module interface.
+ */
+MODULE_LICENSE("GPL");
+/* MODULE_LICENSE("GPL v2"); */
+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
+MODULE_DESCRIPTION(AUFS_NAME
+	" -- Advanced multi layered unification filesystem");
+MODULE_VERSION(AUFS_VERSION);
+MODULE_ALIAS_FS(AUFS_NAME);
+
+/* this module parameter has no meaning when SYSFS is disabled */
+int sysaufs_brs = 1;
+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
+
+/* this module parameter has no meaning when USER_NS is disabled */
+bool au_userns;
+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
+
+/* ---------------------------------------------------------------------- */
+
+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
+
+int au_seq_path(struct seq_file *seq, struct path *path)
+{
+	int err;
+
+	err = seq_path(seq, path, au_esc_chars);
+	if (err >= 0)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int __init aufs_init(void)
+{
+	int err, i;
+	char *p;
+
+	p = au_esc_chars;
+	for (i = 1; i <= ' '; i++)
+		*p++ = i;
+	*p++ = '\\';
+	*p++ = '\x7f';
+	*p = 0;
+
+	au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
+
+	memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
+	for (i = 0; i < AuIop_Last; i++)
+		aufs_iop_nogetattr[i].getattr = NULL;
+
+	memset(au_cache, 0, sizeof(au_cache));	/* including hnotify */
+
+	au_sbilist_init();
+	sysaufs_brs_init();
+	au_debug_init();
+	au_dy_init();
+	err = sysaufs_init();
+	if (unlikely(err))
+		goto out;
+	err = au_procfs_init();
+	if (unlikely(err))
+		goto out_sysaufs;
+	err = au_wkq_init();
+	if (unlikely(err))
+		goto out_procfs;
+	err = au_loopback_init();
+	if (unlikely(err))
+		goto out_wkq;
+	err = au_hnotify_init();
+	if (unlikely(err))
+		goto out_loopback;
+	err = au_sysrq_init();
+	if (unlikely(err))
+		goto out_hin;
+	err = au_cache_init();
+	if (unlikely(err))
+		goto out_sysrq;
+
+	aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
+	err = register_filesystem(&aufs_fs_type);
+	if (unlikely(err))
+		goto out_cache;
+
+	/* since we define pr_fmt, call printk directly */
+	printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
+	goto out; /* success */
+
+out_cache:
+	au_cache_fin();
+out_sysrq:
+	au_sysrq_fin();
+out_hin:
+	au_hnotify_fin();
+out_loopback:
+	au_loopback_fin();
+out_wkq:
+	au_wkq_fin();
+out_procfs:
+	au_procfs_fin();
+out_sysaufs:
+	sysaufs_fin();
+	au_dy_fin();
+out:
+	return err;
+}
+
+static void __exit aufs_exit(void)
+{
+	unregister_filesystem(&aufs_fs_type);
+	au_cache_fin();
+	au_sysrq_fin();
+	au_hnotify_fin();
+	au_loopback_fin();
+	au_wkq_fin();
+	au_procfs_fin();
+	sysaufs_fin();
+	au_dy_fin();
+}
+
+module_init(aufs_init);
+module_exit(aufs_exit);
diff --git a/fs/aufs/module.h b/fs/aufs/module.h
new file mode 100644
index 000000000..6d222d1c6
--- /dev/null
+++ b/fs/aufs/module.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * module initialization and module-global
+ */
+
+#ifndef __AUFS_MODULE_H__
+#define __AUFS_MODULE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/slab.h>
+
+struct path;
+struct seq_file;
+
+/* module parameters */
+extern int sysaufs_brs;
+extern bool au_userns;
+
+/* ---------------------------------------------------------------------- */
+
+extern int au_dir_roflags;
+
+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
+		   int may_shrink);
+
+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
+{
+#ifndef CONFIG_SLOB
+	return kmalloc_index(sz) - kmalloc_index(new_sz);
+#else
+	return -1; /* SLOB is untested */
+#endif
+}
+
+int au_seq_path(struct seq_file *seq, struct path *path);
+
+#ifdef CONFIG_PROC_FS
+/* procfs.c */
+int __init au_procfs_init(void);
+void au_procfs_fin(void);
+#else
+AuStubInt0(au_procfs_init, void);
+AuStubVoid(au_procfs_fin, void);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/* kmem cache */
+enum {
+	AuCache_DINFO,
+	AuCache_ICNTNR,
+	AuCache_FINFO,
+	AuCache_VDIR,
+	AuCache_DEHSTR,
+	AuCache_HNOTIFY, /* must be last */
+	AuCache_Last
+};
+
+extern struct kmem_cache *au_cache[AuCache_Last];
+
+#define AuCacheFlags		(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
+#define AuCache(type)		KMEM_CACHE(type, AuCacheFlags)
+#define AuCacheCtor(type, ctor)	\
+	kmem_cache_create(#type, sizeof(struct type), \
+			  __alignof__(struct type), AuCacheFlags, ctor)
+
+#define AuCacheFuncs(name, index) \
+static inline struct au_##name *au_cache_alloc_##name(void) \
+{ return kmem_cache_alloc(au_cache[AuCache_##index], GFP_NOFS); } \
+static inline void au_cache_free_##name(struct au_##name *p) \
+{ kmem_cache_free(au_cache[AuCache_##index], p); }
+
+AuCacheFuncs(dinfo, DINFO);
+AuCacheFuncs(icntnr, ICNTNR);
+AuCacheFuncs(finfo, FINFO);
+AuCacheFuncs(vdir, VDIR);
+AuCacheFuncs(vdir_dehstr, DEHSTR);
+#ifdef CONFIG_AUFS_HNOTIFY
+AuCacheFuncs(hnotify, HNOTIFY);
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_MODULE_H__ */
diff --git a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c
new file mode 100644
index 000000000..b51219658
--- /dev/null
+++ b/fs/aufs/mvdown.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2011-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * move-down, opposite of copy-up
+ */
+
+#include "aufs.h"
+
+struct au_mvd_args {
+	struct {
+		struct super_block *h_sb;
+		struct dentry *h_parent;
+		struct au_hinode *hdir;
+		struct inode *h_dir, *h_inode;
+		struct au_pin pin;
+	} info[AUFS_MVDOWN_NARRAY];
+
+	struct aufs_mvdown mvdown;
+	struct dentry *dentry, *parent;
+	struct inode *inode, *dir;
+	struct super_block *sb;
+	aufs_bindex_t bopq, bwh, bfound;
+	unsigned char rename_lock;
+};
+
+#define mvd_errno		mvdown.au_errno
+#define mvd_bsrc		mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
+#define mvd_src_brid		mvdown.stbr[AUFS_MVDOWN_UPPER].brid
+#define mvd_bdst		mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
+#define mvd_dst_brid		mvdown.stbr[AUFS_MVDOWN_LOWER].brid
+
+#define mvd_h_src_sb		info[AUFS_MVDOWN_UPPER].h_sb
+#define mvd_h_src_parent	info[AUFS_MVDOWN_UPPER].h_parent
+#define mvd_hdir_src		info[AUFS_MVDOWN_UPPER].hdir
+#define mvd_h_src_dir		info[AUFS_MVDOWN_UPPER].h_dir
+#define mvd_h_src_inode		info[AUFS_MVDOWN_UPPER].h_inode
+#define mvd_pin_src		info[AUFS_MVDOWN_UPPER].pin
+
+#define mvd_h_dst_sb		info[AUFS_MVDOWN_LOWER].h_sb
+#define mvd_h_dst_parent	info[AUFS_MVDOWN_LOWER].h_parent
+#define mvd_hdir_dst		info[AUFS_MVDOWN_LOWER].hdir
+#define mvd_h_dst_dir		info[AUFS_MVDOWN_LOWER].h_dir
+#define mvd_h_dst_inode		info[AUFS_MVDOWN_LOWER].h_inode
+#define mvd_pin_dst		info[AUFS_MVDOWN_LOWER].pin
+
+#define AU_MVD_PR(flag, ...) do {			\
+		if (flag)				\
+			pr_err(__VA_ARGS__);		\
+	} while (0)
+
+static int find_lower_writable(struct au_mvd_args *a)
+{
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	sb = a->sb;
+	bindex = a->mvd_bsrc;
+	bbot = au_sbbot(sb);
+	if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm)
+			    && !sb_rdonly(au_br_sb(br)))
+				return bindex;
+		}
+	else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (!au_br_rdonly(br))
+				return bindex;
+		}
+	else
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (!sb_rdonly(au_br_sb(br))) {
+				if (au_br_rdonly(br))
+					a->mvdown.flags
+						|= AUFS_MVDOWN_ROLOWER_R;
+				return bindex;
+			}
+		}
+
+	return -1;
+}
+
+/* make the parent dir on bdst */
+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = 0;
+	a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
+	a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
+	a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
+	a->mvd_h_dst_parent = NULL;
+	if (au_dbbot(a->parent) >= a->mvd_bdst)
+		a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
+	if (!a->mvd_h_dst_parent) {
+		err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
+		if (unlikely(err)) {
+			AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
+			goto out;
+		}
+		a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* lock them all */
+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct dentry *h_trap;
+
+	a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
+	a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
+	err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
+		     au_opt_udba(a->sb),
+		     AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+	AuTraceErr(err);
+	if (unlikely(err)) {
+		AU_MVD_PR(dmsg, "pin_dst failed\n");
+		goto out;
+	}
+
+	if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
+		a->rename_lock = 0;
+		au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
+			    AuLsc_DI_PARENT, AuLsc_I_PARENT3,
+			    au_opt_udba(a->sb),
+			    AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+		err = au_do_pin(&a->mvd_pin_src);
+		AuTraceErr(err);
+		a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
+		if (unlikely(err)) {
+			AU_MVD_PR(dmsg, "pin_src failed\n");
+			goto out_dst;
+		}
+		goto out; /* success */
+	}
+
+	a->rename_lock = 1;
+	au_pin_hdir_unlock(&a->mvd_pin_dst);
+	err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
+		     au_opt_udba(a->sb),
+		     AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+	AuTraceErr(err);
+	a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
+	if (unlikely(err)) {
+		AU_MVD_PR(dmsg, "pin_src failed\n");
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+		goto out_dst;
+	}
+	au_pin_hdir_unlock(&a->mvd_pin_src);
+	h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				   a->mvd_h_dst_parent, a->mvd_hdir_dst);
+	if (h_trap) {
+		err = (h_trap != a->mvd_h_src_parent);
+		if (err)
+			err = (h_trap != a->mvd_h_dst_parent);
+	}
+	BUG_ON(err); /* it should never happen */
+	if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
+		err = -EBUSY;
+		AuTraceErr(err);
+		vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				    a->mvd_h_dst_parent, a->mvd_hdir_dst);
+		au_pin_hdir_lock(&a->mvd_pin_src);
+		au_unpin(&a->mvd_pin_src);
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+		goto out_dst;
+	}
+	goto out; /* success */
+
+out_dst:
+	au_unpin(&a->mvd_pin_dst);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	if (!a->rename_lock)
+		au_unpin(&a->mvd_pin_src);
+	else {
+		vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				    a->mvd_h_dst_parent, a->mvd_hdir_dst);
+		au_pin_hdir_lock(&a->mvd_pin_src);
+		au_unpin(&a->mvd_pin_src);
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+	}
+	au_unpin(&a->mvd_pin_dst);
+}
+
+/* copy-down the file */
+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_cp_generic cpg = {
+		.dentry	= a->dentry,
+		.bdst	= a->mvd_bdst,
+		.bsrc	= a->mvd_bsrc,
+		.len	= -1,
+		.pin	= &a->mvd_pin_dst,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN
+	};
+
+	AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
+	if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
+		au_fset_cpup(cpg.flags, OVERWRITE);
+	if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
+		au_fset_cpup(cpg.flags, RWDST);
+	err = au_sio_cpdown_simple(&cpg);
+	if (unlikely(err))
+		AU_MVD_PR(dmsg, "cpdown failed\n");
+
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
+ * were sleeping
+ */
+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct path h_path;
+	struct au_branch *br;
+	struct inode *delegated;
+
+	br = au_sbr(a->sb, a->mvd_bdst);
+	h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry)) {
+		AU_MVD_PR(dmsg, "wh_lkup failed\n");
+		goto out;
+	}
+
+	err = 0;
+	if (d_is_positive(h_path.dentry)) {
+		h_path.mnt = au_br_mnt(br);
+		delegated = NULL;
+		err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
+				   &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+		if (unlikely(err))
+			AU_MVD_PR(dmsg, "wh_unlink failed\n");
+	}
+	dput(h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * unlink the topmost h_dentry
+ */
+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct path h_path;
+	struct inode *delegated;
+
+	h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
+	h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
+	delegated = NULL;
+	err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err))
+		AU_MVD_PR(dmsg, "unlink failed\n");
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* Since mvdown succeeded, we ignore an error of this function */
+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_branch *br;
+
+	a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
+	br = au_sbr(a->sb, a->mvd_bsrc);
+	err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
+	if (!err) {
+		br = au_sbr(a->sb, a->mvd_bdst);
+		a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
+		err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
+	}
+	if (!err)
+		a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
+	else
+		AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
+}
+
+/*
+ * copy-down the file and unlink the bsrc file.
+ * - unlink the bdst whout if exist
+ * - copy-down the file (with whtmp name and rename)
+ * - unlink the bsrc file
+ */
+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = au_do_mkdir(dmsg, a);
+	if (!err)
+		err = au_do_lock(dmsg, a);
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * do not revert the activities we made on bdst since they should be
+	 * harmless in aufs.
+	 */
+
+	err = au_do_cpdown(dmsg, a);
+	if (!err)
+		err = au_do_unlink_wh(dmsg, a);
+	if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
+		err = au_do_unlink(dmsg, a);
+	if (unlikely(err))
+		goto out_unlock;
+
+	AuDbg("%pd2, 0x%x, %d --> %d\n",
+	      a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
+	if (find_lower_writable(a) < 0)
+		a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
+
+	if (a->mvdown.flags & AUFS_MVDOWN_STFS)
+		au_do_stfs(dmsg, a);
+
+	/* maintain internal array */
+	if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
+		au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
+		au_set_dbtop(a->dentry, a->mvd_bdst);
+		au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
+		au_set_ibtop(a->inode, a->mvd_bdst);
+	} else {
+		/* hide the lower */
+		au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
+		au_set_dbbot(a->dentry, a->mvd_bsrc);
+		au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
+		au_set_ibbot(a->inode, a->mvd_bsrc);
+	}
+	if (au_dbbot(a->dentry) < a->mvd_bdst)
+		au_set_dbbot(a->dentry, a->mvd_bdst);
+	if (au_ibbot(a->inode) < a->mvd_bdst)
+		au_set_ibbot(a->inode, a->mvd_bdst);
+
+out_unlock:
+	au_do_unlock(dmsg, a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* make sure the file is idle */
+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err, plinked;
+
+	err = 0;
+	plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
+	if (au_dbtop(a->dentry) == a->mvd_bsrc
+	    && au_dcount(a->dentry) == 1
+	    && atomic_read(&a->inode->i_count) == 1
+	    /* && a->mvd_h_src_inode->i_nlink == 1 */
+	    && (!plinked || !au_plink_test(a->inode))
+	    && a->inode->i_nlink == 1)
+		goto out;
+
+	err = -EBUSY;
+	AU_MVD_PR(dmsg,
+		  "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
+		  a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
+		  atomic_read(&a->inode->i_count), a->inode->i_nlink,
+		  a->mvd_h_src_inode->i_nlink,
+		  plinked, plinked ? au_plink_test(a->inode) : 0);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* make sure the parent dir is fine */
+static int au_mvd_args_parent(const unsigned char dmsg,
+			      struct au_mvd_args *a)
+{
+	int err;
+	aufs_bindex_t bindex;
+
+	err = 0;
+	if (unlikely(au_alive_dir(a->parent))) {
+		err = -ENOENT;
+		AU_MVD_PR(dmsg, "parent dir is dead\n");
+		goto out;
+	}
+
+	a->bopq = au_dbdiropq(a->parent);
+	bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
+	AuDbg("b%d\n", bindex);
+	if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
+		     || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
+		err = -EINVAL;
+		a->mvd_errno = EAU_MVDOWN_OPAQUE;
+		AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
+			  a->bopq, a->mvd_bdst);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args_intermediate(const unsigned char dmsg,
+				    struct au_mvd_args *a)
+{
+	int err;
+	struct au_dinfo *dinfo, *tmp;
+
+	/* lookup the next lower positive entry */
+	err = -ENOMEM;
+	tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
+	if (unlikely(!tmp))
+		goto out;
+
+	a->bfound = -1;
+	a->bwh = -1;
+	dinfo = au_di(a->dentry);
+	au_di_cp(tmp, dinfo);
+	au_di_swap(tmp, dinfo);
+
+	/* returns the number of positive dentries */
+	err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
+			     /* AuLkup_IGNORE_PERM */ 0);
+	if (!err)
+		a->bwh = au_dbwh(a->dentry);
+	else if (err > 0)
+		a->bfound = au_dbtop(a->dentry);
+
+	au_di_swap(tmp, dinfo);
+	au_rw_write_unlock(&tmp->di_rwsem);
+	au_di_free(tmp);
+	if (unlikely(err < 0))
+		AU_MVD_PR(dmsg, "failed look-up lower\n");
+
+	/*
+	 * here, we have these cases.
+	 * bfound == -1
+	 *	no positive dentry under bsrc. there are more sub-cases.
+	 *	bwh < 0
+	 *		there no whiteout, we can safely move-down.
+	 *	bwh <= bsrc
+	 *		impossible
+	 *	bsrc < bwh && bwh < bdst
+	 *		there is a whiteout on RO branch. cannot proceed.
+	 *	bwh == bdst
+	 *		there is a whiteout on the RW target branch. it should
+	 *		be removed.
+	 *	bdst < bwh
+	 *		there is a whiteout somewhere unrelated branch.
+	 * -1 < bfound && bfound <= bsrc
+	 *	impossible.
+	 * bfound < bdst
+	 *	found, but it is on RO branch between bsrc and bdst. cannot
+	 *	proceed.
+	 * bfound == bdst
+	 *	found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
+	 *	error.
+	 * bdst < bfound
+	 *	found, after we create the file on bdst, it will be hidden.
+	 */
+
+	AuDebugOn(a->bfound == -1
+		  && a->bwh != -1
+		  && a->bwh <= a->mvd_bsrc);
+	AuDebugOn(-1 < a->bfound
+		  && a->bfound <= a->mvd_bsrc);
+
+	err = -EINVAL;
+	if (a->bfound == -1
+	    && a->mvd_bsrc < a->bwh
+	    && a->bwh != -1
+	    && a->bwh < a->mvd_bdst) {
+		a->mvd_errno = EAU_MVDOWN_WHITEOUT;
+		AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
+			  a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
+		goto out;
+	} else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
+		a->mvd_errno = EAU_MVDOWN_UPPER;
+		AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
+			  a->mvd_bdst, a->bfound);
+		goto out;
+	}
+
+	err = 0; /* success */
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = 0;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
+	    && a->bfound == a->mvd_bdst)
+		err = -EEXIST;
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_branch *br;
+
+	err = -EISDIR;
+	if (unlikely(S_ISDIR(a->inode->i_mode)))
+		goto out;
+
+	err = -EINVAL;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
+		a->mvd_bsrc = au_ibtop(a->inode);
+	else {
+		a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
+		if (unlikely(a->mvd_bsrc < 0
+			     || (a->mvd_bsrc < au_dbtop(a->dentry)
+				 || au_dbbot(a->dentry) < a->mvd_bsrc
+				 || !au_h_dptr(a->dentry, a->mvd_bsrc))
+			     || (a->mvd_bsrc < au_ibtop(a->inode)
+				 || au_ibbot(a->inode) < a->mvd_bsrc
+				 || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
+			a->mvd_errno = EAU_MVDOWN_NOUPPER;
+			AU_MVD_PR(dmsg, "no upper\n");
+			goto out;
+		}
+	}
+	if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
+		a->mvd_errno = EAU_MVDOWN_BOTTOM;
+		AU_MVD_PR(dmsg, "on the bottom\n");
+		goto out;
+	}
+	a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
+	br = au_sbr(a->sb, a->mvd_bsrc);
+	err = au_br_rdonly(br);
+	if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
+		if (unlikely(err))
+			goto out;
+	} else if (!(vfsub_native_ro(a->mvd_h_src_inode)
+		     || IS_APPEND(a->mvd_h_src_inode))) {
+		if (err)
+			a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
+		/* go on */
+	} else
+		goto out;
+
+	err = -EINVAL;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
+		a->mvd_bdst = find_lower_writable(a);
+		if (unlikely(a->mvd_bdst < 0)) {
+			a->mvd_errno = EAU_MVDOWN_BOTTOM;
+			AU_MVD_PR(dmsg, "no writable lower branch\n");
+			goto out;
+		}
+	} else {
+		a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
+		if (unlikely(a->mvd_bdst < 0
+			     || au_sbbot(a->sb) < a->mvd_bdst)) {
+			a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
+			AU_MVD_PR(dmsg, "no lower brid\n");
+			goto out;
+		}
+	}
+
+	err = au_mvd_args_busy(dmsg, a);
+	if (!err)
+		err = au_mvd_args_parent(dmsg, a);
+	if (!err)
+		err = au_mvd_args_intermediate(dmsg, a);
+	if (!err)
+		err = au_mvd_args_exist(dmsg, a);
+	if (!err)
+		AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
+{
+	int err, e;
+	unsigned char dmsg;
+	struct au_mvd_args *args;
+	struct inode *inode;
+
+	inode = d_inode(dentry);
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -ENOMEM;
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (unlikely(!args))
+		goto out;
+
+	err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
+	if (!err)
+		err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out_free;
+	}
+	AuDbg("flags 0x%x\n", args->mvdown.flags);
+	args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
+	args->mvdown.au_errno = 0;
+	args->dentry = dentry;
+	args->inode = inode;
+	args->sb = dentry->d_sb;
+
+	err = -ENOENT;
+	dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
+	args->parent = dget_parent(dentry);
+	args->dir = d_inode(args->parent);
+	inode_lock_nested(args->dir, I_MUTEX_PARENT);
+	dput(args->parent);
+	if (unlikely(args->parent != dentry->d_parent)) {
+		AU_MVD_PR(dmsg, "parent dir is moved\n");
+		goto out_dir;
+	}
+
+	inode_lock_nested(inode, I_MUTEX_CHILD);
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
+	if (unlikely(err))
+		goto out_inode;
+
+	di_write_lock_parent(args->parent);
+	err = au_mvd_args(dmsg, args);
+	if (unlikely(err))
+		goto out_parent;
+
+	err = au_do_mvdown(dmsg, args);
+	if (unlikely(err))
+		goto out_parent;
+
+	au_cpup_attr_timesizes(args->dir);
+	au_cpup_attr_timesizes(inode);
+	if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
+		au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
+	/* au_digen_dec(dentry); */
+
+out_parent:
+	di_write_unlock(args->parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+out_inode:
+	inode_unlock(inode);
+out_dir:
+	inode_unlock(args->dir);
+out_free:
+	e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
+	if (unlikely(e))
+		err = -EFAULT;
+	kfree(args);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c
new file mode 100644
index 000000000..eb3fe3fb4
--- /dev/null
+++ b/fs/aufs/opts.c
@@ -0,0 +1,1891 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount options/flags
+ */
+
+#include <linux/namei.h>
+#include <linux/types.h> /* a distribution requires */
+#include <linux/parser.h>
+#include "aufs.h"
+
+/* ---------------------------------------------------------------------- */
+
+enum {
+	Opt_br,
+	Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
+	Opt_idel, Opt_imod,
+	Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
+	Opt_rdblk_def, Opt_rdhash_def,
+	Opt_xino, Opt_noxino,
+	Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
+	Opt_trunc_xino_path, Opt_itrunc_xino,
+	Opt_trunc_xib, Opt_notrunc_xib,
+	Opt_shwh, Opt_noshwh,
+	Opt_plink, Opt_noplink, Opt_list_plink,
+	Opt_udba,
+	Opt_dio, Opt_nodio,
+	Opt_diropq_a, Opt_diropq_w,
+	Opt_warn_perm, Opt_nowarn_perm,
+	Opt_wbr_copyup, Opt_wbr_create,
+	Opt_fhsm_sec,
+	Opt_verbose, Opt_noverbose,
+	Opt_sum, Opt_nosum, Opt_wsum,
+	Opt_dirperm1, Opt_nodirperm1,
+	Opt_dirren, Opt_nodirren,
+	Opt_acl, Opt_noacl,
+	Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
+};
+
+static match_table_t options = {
+	{Opt_br, "br=%s"},
+	{Opt_br, "br:%s"},
+
+	{Opt_add, "add=%d:%s"},
+	{Opt_add, "add:%d:%s"},
+	{Opt_add, "ins=%d:%s"},
+	{Opt_add, "ins:%d:%s"},
+	{Opt_append, "append=%s"},
+	{Opt_append, "append:%s"},
+	{Opt_prepend, "prepend=%s"},
+	{Opt_prepend, "prepend:%s"},
+
+	{Opt_del, "del=%s"},
+	{Opt_del, "del:%s"},
+	/* {Opt_idel, "idel:%d"}, */
+	{Opt_mod, "mod=%s"},
+	{Opt_mod, "mod:%s"},
+	/* {Opt_imod, "imod:%d:%s"}, */
+
+	{Opt_dirwh, "dirwh=%d"},
+
+	{Opt_xino, "xino=%s"},
+	{Opt_noxino, "noxino"},
+	{Opt_trunc_xino, "trunc_xino"},
+	{Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
+	{Opt_notrunc_xino, "notrunc_xino"},
+	{Opt_trunc_xino_path, "trunc_xino=%s"},
+	{Opt_itrunc_xino, "itrunc_xino=%d"},
+	/* {Opt_zxino, "zxino=%s"}, */
+	{Opt_trunc_xib, "trunc_xib"},
+	{Opt_notrunc_xib, "notrunc_xib"},
+
+#ifdef CONFIG_PROC_FS
+	{Opt_plink, "plink"},
+#else
+	{Opt_ignore_silent, "plink"},
+#endif
+
+	{Opt_noplink, "noplink"},
+
+#ifdef CONFIG_AUFS_DEBUG
+	{Opt_list_plink, "list_plink"},
+#endif
+
+	{Opt_udba, "udba=%s"},
+
+	{Opt_dio, "dio"},
+	{Opt_nodio, "nodio"},
+
+#ifdef CONFIG_AUFS_DIRREN
+	{Opt_dirren, "dirren"},
+	{Opt_nodirren, "nodirren"},
+#else
+	{Opt_ignore, "dirren"},
+	{Opt_ignore_silent, "nodirren"},
+#endif
+
+#ifdef CONFIG_AUFS_FHSM
+	{Opt_fhsm_sec, "fhsm_sec=%d"},
+#else
+	{Opt_ignore, "fhsm_sec=%d"},
+#endif
+
+	{Opt_diropq_a, "diropq=always"},
+	{Opt_diropq_a, "diropq=a"},
+	{Opt_diropq_w, "diropq=whiteouted"},
+	{Opt_diropq_w, "diropq=w"},
+
+	{Opt_warn_perm, "warn_perm"},
+	{Opt_nowarn_perm, "nowarn_perm"},
+
+	/* keep them temporary */
+	{Opt_ignore_silent, "nodlgt"},
+	{Opt_ignore, "clean_plink"},
+
+#ifdef CONFIG_AUFS_SHWH
+	{Opt_shwh, "shwh"},
+#endif
+	{Opt_noshwh, "noshwh"},
+
+	{Opt_dirperm1, "dirperm1"},
+	{Opt_nodirperm1, "nodirperm1"},
+
+	{Opt_verbose, "verbose"},
+	{Opt_verbose, "v"},
+	{Opt_noverbose, "noverbose"},
+	{Opt_noverbose, "quiet"},
+	{Opt_noverbose, "q"},
+	{Opt_noverbose, "silent"},
+
+	{Opt_sum, "sum"},
+	{Opt_nosum, "nosum"},
+	{Opt_wsum, "wsum"},
+
+	{Opt_rdcache, "rdcache=%d"},
+	{Opt_rdblk, "rdblk=%d"},
+	{Opt_rdblk_def, "rdblk=def"},
+	{Opt_rdhash, "rdhash=%d"},
+	{Opt_rdhash_def, "rdhash=def"},
+
+	{Opt_wbr_create, "create=%s"},
+	{Opt_wbr_create, "create_policy=%s"},
+	{Opt_wbr_copyup, "cpup=%s"},
+	{Opt_wbr_copyup, "copyup=%s"},
+	{Opt_wbr_copyup, "copyup_policy=%s"},
+
+	/* generic VFS flag */
+#ifdef CONFIG_FS_POSIX_ACL
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+#else
+	{Opt_ignore, "acl"},
+	{Opt_ignore_silent, "noacl"},
+#endif
+
+	/* internal use for the scripts */
+	{Opt_ignore_silent, "si=%s"},
+
+	{Opt_br, "dirs=%s"},
+	{Opt_ignore, "debug=%d"},
+	{Opt_ignore, "delete=whiteout"},
+	{Opt_ignore, "delete=all"},
+	{Opt_ignore, "imap=%s"},
+
+	/* temporary workaround, due to old mount(8)? */
+	{Opt_ignore_silent, "relatime"},
+
+	{Opt_err, NULL}
+};
+
+/* ---------------------------------------------------------------------- */
+
+static const char *au_parser_pattern(int val, match_table_t tbl)
+{
+	struct match_token *p;
+
+	p = tbl;
+	while (p->pattern) {
+		if (p->token == val)
+			return p->pattern;
+		p++;
+	}
+	BUG();
+	return "??";
+}
+
+static const char *au_optstr(int *val, match_table_t tbl)
+{
+	struct match_token *p;
+	int v;
+
+	v = *val;
+	if (!v)
+		goto out;
+	p = tbl;
+	while (p->pattern) {
+		if (p->token
+		    && (v & p->token) == p->token) {
+			*val &= ~p->token;
+			return p->pattern;
+		}
+		p++;
+	}
+
+out:
+	return NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t brperm = {
+	{AuBrPerm_RO, AUFS_BRPERM_RO},
+	{AuBrPerm_RR, AUFS_BRPERM_RR},
+	{AuBrPerm_RW, AUFS_BRPERM_RW},
+	{0, NULL}
+};
+
+static match_table_t brattr = {
+	/* general */
+	{AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
+	{AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
+	/* 'unpin' attrib is meaningless since linux-3.18-rc1 */
+	{AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
+#ifdef CONFIG_AUFS_FHSM
+	{AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
+#endif
+#ifdef CONFIG_AUFS_XATTR
+	{AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
+	{AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
+	{AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
+	{AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
+	{AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
+	{AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
+#endif
+
+	/* ro/rr branch */
+	{AuBrRAttr_WH, AUFS_BRRATTR_WH},
+
+	/* rw branch */
+	{AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
+	{AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
+
+	{0, NULL}
+};
+
+static int br_attr_val(char *str, match_table_t table, substring_t args[])
+{
+	int attr, v;
+	char *p;
+
+	attr = 0;
+	do {
+		p = strchr(str, '+');
+		if (p)
+			*p = 0;
+		v = match_token(str, table, args);
+		if (v) {
+			if (v & AuBrAttr_CMOO_Mask)
+				attr &= ~AuBrAttr_CMOO_Mask;
+			attr |= v;
+		} else {
+			if (p)
+				*p = '+';
+			pr_warn("ignored branch attribute %s\n", str);
+			break;
+		}
+		if (p)
+			str = p + 1;
+	} while (p);
+
+	return attr;
+}
+
+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
+{
+	int sz;
+	const char *p;
+	char *q;
+
+	q = str->a;
+	*q = 0;
+	p = au_optstr(&perm, brattr);
+	if (p) {
+		sz = strlen(p);
+		memcpy(q, p, sz + 1);
+		q += sz;
+	} else
+		goto out;
+
+	do {
+		p = au_optstr(&perm, brattr);
+		if (p) {
+			*q++ = '+';
+			sz = strlen(p);
+			memcpy(q, p, sz + 1);
+			q += sz;
+		}
+	} while (p);
+
+out:
+	return q - str->a;
+}
+
+static int noinline_for_stack br_perm_val(char *perm)
+{
+	int val, bad, sz;
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	au_br_perm_str_t attr;
+
+	p = strchr(perm, '+');
+	if (p)
+		*p = 0;
+	val = match_token(perm, brperm, args);
+	if (!val) {
+		if (p)
+			*p = '+';
+		pr_warn("ignored branch permission %s\n", perm);
+		val = AuBrPerm_RO;
+		goto out;
+	}
+	if (!p)
+		goto out;
+
+	val |= br_attr_val(p + 1, brattr, args);
+
+	bad = 0;
+	switch (val & AuBrPerm_Mask) {
+	case AuBrPerm_RO:
+	case AuBrPerm_RR:
+		bad = val & AuBrWAttr_Mask;
+		val &= ~AuBrWAttr_Mask;
+		break;
+	case AuBrPerm_RW:
+		bad = val & AuBrRAttr_Mask;
+		val &= ~AuBrRAttr_Mask;
+		break;
+	}
+
+	/*
+	 * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
+	 * does not treat it as an error, just warning.
+	 * this is a tiny guard for the user operation.
+	 */
+	if (val & AuBrAttr_UNPIN) {
+		bad |= AuBrAttr_UNPIN;
+		val &= ~AuBrAttr_UNPIN;
+	}
+
+	if (unlikely(bad)) {
+		sz = au_do_optstr_br_attr(&attr, bad);
+		AuDebugOn(!sz);
+		pr_warn("ignored branch attribute %s\n", attr.a);
+	}
+
+out:
+	return val;
+}
+
+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
+{
+	au_br_perm_str_t attr;
+	const char *p;
+	char *q;
+	int sz;
+
+	q = str->a;
+	p = au_optstr(&perm, brperm);
+	AuDebugOn(!p || !*p);
+	sz = strlen(p);
+	memcpy(q, p, sz + 1);
+	q += sz;
+
+	sz = au_do_optstr_br_attr(&attr, perm);
+	if (sz) {
+		*q++ = '+';
+		memcpy(q, attr.a, sz + 1);
+	}
+
+	AuDebugOn(strlen(str->a) >= sizeof(str->a));
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t udbalevel = {
+	{AuOpt_UDBA_REVAL, "reval"},
+	{AuOpt_UDBA_NONE, "none"},
+#ifdef CONFIG_AUFS_HNOTIFY
+	{AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	{AuOpt_UDBA_HNOTIFY, "fsnotify"},
+#endif
+#endif
+	{-1, NULL}
+};
+
+static int noinline_for_stack udba_val(char *str)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	return match_token(str, udbalevel, args);
+}
+
+const char *au_optstr_udba(int udba)
+{
+	return au_parser_pattern(udba, udbalevel);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t au_wbr_create_policy = {
+	{AuWbrCreate_TDP, "tdp"},
+	{AuWbrCreate_TDP, "top-down-parent"},
+	{AuWbrCreate_RR, "rr"},
+	{AuWbrCreate_RR, "round-robin"},
+	{AuWbrCreate_MFS, "mfs"},
+	{AuWbrCreate_MFS, "most-free-space"},
+	{AuWbrCreate_MFSV, "mfs:%d"},
+	{AuWbrCreate_MFSV, "most-free-space:%d"},
+
+	/* top-down regardless the parent, and then mfs */
+	{AuWbrCreate_TDMFS, "tdmfs:%d"},
+	{AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
+
+	{AuWbrCreate_MFSRR, "mfsrr:%d"},
+	{AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
+	{AuWbrCreate_PMFS, "pmfs"},
+	{AuWbrCreate_PMFSV, "pmfs:%d"},
+	{AuWbrCreate_PMFSRR, "pmfsrr:%d"},
+	{AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
+
+	{-1, NULL}
+};
+
+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
+			    struct au_opt_wbr_create *create)
+{
+	int err;
+	unsigned long long ull;
+
+	err = 0;
+	if (!match_u64(arg, &ull))
+		create->mfsrr_watermark = ull;
+	else {
+		pr_err("bad integer in %s\n", str);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int au_wbr_mfs_sec(substring_t *arg, char *str,
+			  struct au_opt_wbr_create *create)
+{
+	int n, err;
+
+	err = 0;
+	if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
+		create->mfs_second = n;
+	else {
+		pr_err("bad integer in %s\n", str);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int noinline_for_stack
+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
+{
+	int err, e;
+	substring_t args[MAX_OPT_ARGS];
+
+	err = match_token(str, au_wbr_create_policy, args);
+	create->wbr_create = err;
+	switch (err) {
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRRV:
+		e = au_wbr_mfs_wmark(&args[0], str, create);
+		if (!e)
+			e = au_wbr_mfs_sec(&args[1], str, create);
+		if (unlikely(e))
+			err = e;
+		break;
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_PMFSRR:
+		e = au_wbr_mfs_wmark(&args[0], str, create);
+		if (unlikely(e)) {
+			err = e;
+			break;
+		}
+		/*FALLTHROUGH*/
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_PMFS:
+		create->mfs_second = AUFS_MFS_DEF_SEC;
+		break;
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFSV:
+		e = au_wbr_mfs_sec(&args[0], str, create);
+		if (unlikely(e))
+			err = e;
+		break;
+	}
+
+	return err;
+}
+
+const char *au_optstr_wbr_create(int wbr_create)
+{
+	return au_parser_pattern(wbr_create, au_wbr_create_policy);
+}
+
+static match_table_t au_wbr_copyup_policy = {
+	{AuWbrCopyup_TDP, "tdp"},
+	{AuWbrCopyup_TDP, "top-down-parent"},
+	{AuWbrCopyup_BUP, "bup"},
+	{AuWbrCopyup_BUP, "bottom-up-parent"},
+	{AuWbrCopyup_BU, "bu"},
+	{AuWbrCopyup_BU, "bottom-up"},
+	{-1, NULL}
+};
+
+static int noinline_for_stack au_wbr_copyup_val(char *str)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	return match_token(str, au_wbr_copyup_policy, args);
+}
+
+const char *au_optstr_wbr_copyup(int wbr_copyup)
+{
+	return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+static void dump_opts(struct au_opts *opts)
+{
+#ifdef CONFIG_AUFS_DEBUG
+	/* reduce stack space */
+	union {
+		struct au_opt_add *add;
+		struct au_opt_del *del;
+		struct au_opt_mod *mod;
+		struct au_opt_xino *xino;
+		struct au_opt_xino_itrunc *xino_itrunc;
+		struct au_opt_wbr_create *create;
+	} u;
+	struct au_opt *opt;
+
+	opt = opts->opt;
+	while (opt->type != Opt_tail) {
+		switch (opt->type) {
+		case Opt_add:
+			u.add = &opt->add;
+			AuDbg("add {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_del:
+		case Opt_idel:
+			u.del = &opt->del;
+			AuDbg("del {%s, %p}\n",
+			      u.del->pathname, u.del->h_path.dentry);
+			break;
+		case Opt_mod:
+		case Opt_imod:
+			u.mod = &opt->mod;
+			AuDbg("mod {%s, 0x%x, %p}\n",
+				  u.mod->path, u.mod->perm, u.mod->h_root);
+			break;
+		case Opt_append:
+			u.add = &opt->add;
+			AuDbg("append {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_prepend:
+			u.add = &opt->add;
+			AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_dirwh:
+			AuDbg("dirwh %d\n", opt->dirwh);
+			break;
+		case Opt_rdcache:
+			AuDbg("rdcache %d\n", opt->rdcache);
+			break;
+		case Opt_rdblk:
+			AuDbg("rdblk %u\n", opt->rdblk);
+			break;
+		case Opt_rdblk_def:
+			AuDbg("rdblk_def\n");
+			break;
+		case Opt_rdhash:
+			AuDbg("rdhash %u\n", opt->rdhash);
+			break;
+		case Opt_rdhash_def:
+			AuDbg("rdhash_def\n");
+			break;
+		case Opt_xino:
+			u.xino = &opt->xino;
+			AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
+			break;
+		case Opt_trunc_xino:
+			AuLabel(trunc_xino);
+			break;
+		case Opt_notrunc_xino:
+			AuLabel(notrunc_xino);
+			break;
+		case Opt_trunc_xino_path:
+		case Opt_itrunc_xino:
+			u.xino_itrunc = &opt->xino_itrunc;
+			AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
+			break;
+		case Opt_noxino:
+			AuLabel(noxino);
+			break;
+		case Opt_trunc_xib:
+			AuLabel(trunc_xib);
+			break;
+		case Opt_notrunc_xib:
+			AuLabel(notrunc_xib);
+			break;
+		case Opt_shwh:
+			AuLabel(shwh);
+			break;
+		case Opt_noshwh:
+			AuLabel(noshwh);
+			break;
+		case Opt_dirperm1:
+			AuLabel(dirperm1);
+			break;
+		case Opt_nodirperm1:
+			AuLabel(nodirperm1);
+			break;
+		case Opt_plink:
+			AuLabel(plink);
+			break;
+		case Opt_noplink:
+			AuLabel(noplink);
+			break;
+		case Opt_list_plink:
+			AuLabel(list_plink);
+			break;
+		case Opt_udba:
+			AuDbg("udba %d, %s\n",
+				  opt->udba, au_optstr_udba(opt->udba));
+			break;
+		case Opt_dio:
+			AuLabel(dio);
+			break;
+		case Opt_nodio:
+			AuLabel(nodio);
+			break;
+		case Opt_diropq_a:
+			AuLabel(diropq_a);
+			break;
+		case Opt_diropq_w:
+			AuLabel(diropq_w);
+			break;
+		case Opt_warn_perm:
+			AuLabel(warn_perm);
+			break;
+		case Opt_nowarn_perm:
+			AuLabel(nowarn_perm);
+			break;
+		case Opt_verbose:
+			AuLabel(verbose);
+			break;
+		case Opt_noverbose:
+			AuLabel(noverbose);
+			break;
+		case Opt_sum:
+			AuLabel(sum);
+			break;
+		case Opt_nosum:
+			AuLabel(nosum);
+			break;
+		case Opt_wsum:
+			AuLabel(wsum);
+			break;
+		case Opt_wbr_create:
+			u.create = &opt->wbr_create;
+			AuDbg("create %d, %s\n", u.create->wbr_create,
+				  au_optstr_wbr_create(u.create->wbr_create));
+			switch (u.create->wbr_create) {
+			case AuWbrCreate_MFSV:
+			case AuWbrCreate_PMFSV:
+				AuDbg("%d sec\n", u.create->mfs_second);
+				break;
+			case AuWbrCreate_MFSRR:
+			case AuWbrCreate_TDMFS:
+				AuDbg("%llu watermark\n",
+					  u.create->mfsrr_watermark);
+				break;
+			case AuWbrCreate_MFSRRV:
+			case AuWbrCreate_TDMFSV:
+			case AuWbrCreate_PMFSRRV:
+				AuDbg("%llu watermark, %d sec\n",
+					  u.create->mfsrr_watermark,
+					  u.create->mfs_second);
+				break;
+			}
+			break;
+		case Opt_wbr_copyup:
+			AuDbg("copyup %d, %s\n", opt->wbr_copyup,
+				  au_optstr_wbr_copyup(opt->wbr_copyup));
+			break;
+		case Opt_fhsm_sec:
+			AuDbg("fhsm_sec %u\n", opt->fhsm_second);
+			break;
+		case Opt_dirren:
+			AuLabel(dirren);
+			break;
+		case Opt_nodirren:
+			AuLabel(nodirren);
+			break;
+		case Opt_acl:
+			AuLabel(acl);
+			break;
+		case Opt_noacl:
+			AuLabel(noacl);
+			break;
+		default:
+			BUG();
+		}
+		opt++;
+	}
+#endif
+}
+
+void au_opts_free(struct au_opts *opts)
+{
+	struct au_opt *opt;
+
+	opt = opts->opt;
+	while (opt->type != Opt_tail) {
+		switch (opt->type) {
+		case Opt_add:
+		case Opt_append:
+		case Opt_prepend:
+			path_put(&opt->add.path);
+			break;
+		case Opt_del:
+		case Opt_idel:
+			path_put(&opt->del.h_path);
+			break;
+		case Opt_mod:
+		case Opt_imod:
+			dput(opt->mod.h_root);
+			break;
+		case Opt_xino:
+			fput(opt->xino.file);
+			break;
+		}
+		opt++;
+	}
+}
+
+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
+		   aufs_bindex_t bindex)
+{
+	int err;
+	struct au_opt_add *add = &opt->add;
+	char *p;
+
+	add->bindex = bindex;
+	add->perm = AuBrPerm_RO;
+	add->pathname = opt_str;
+	p = strchr(opt_str, '=');
+	if (p) {
+		*p++ = 0;
+		if (*p)
+			add->perm = br_perm_val(p);
+	}
+
+	err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
+	if (!err) {
+		if (!p) {
+			add->perm = AuBrPerm_RO;
+			if (au_test_fs_rr(add->path.dentry->d_sb))
+				add->perm = AuBrPerm_RR;
+			else if (!bindex && !(sb_flags & SB_RDONLY))
+				add->perm = AuBrPerm_RW;
+		}
+		opt->type = Opt_add;
+		goto out;
+	}
+	pr_err("lookup failed %s (%d)\n", add->pathname, err);
+	err = -EINVAL;
+
+out:
+	return err;
+}
+
+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
+{
+	int err;
+
+	del->pathname = args[0].from;
+	AuDbg("del path %s\n", del->pathname);
+
+	err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
+	if (unlikely(err))
+		pr_err("lookup failed %s (%d)\n", del->pathname, err);
+
+	return err;
+}
+
+#if 0 /* reserved for future use */
+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
+			      struct au_opt_del *del, substring_t args[])
+{
+	int err;
+	struct dentry *root;
+
+	err = -EINVAL;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	if (bindex < 0 || au_sbbot(sb) < bindex) {
+		pr_err("out of bounds, %d\n", bindex);
+		goto out;
+	}
+
+	err = 0;
+	del->h_path.dentry = dget(au_h_dptr(root, bindex));
+	del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
+
+out:
+	aufs_read_unlock(root, !AuLock_IR);
+	return err;
+}
+#endif
+
+static int noinline_for_stack
+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
+{
+	int err;
+	struct path path;
+	char *p;
+
+	err = -EINVAL;
+	mod->path = args[0].from;
+	p = strchr(mod->path, '=');
+	if (unlikely(!p)) {
+		pr_err("no permssion %s\n", args[0].from);
+		goto out;
+	}
+
+	*p++ = 0;
+	err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
+	if (unlikely(err)) {
+		pr_err("lookup failed %s (%d)\n", mod->path, err);
+		goto out;
+	}
+
+	mod->perm = br_perm_val(p);
+	AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
+	mod->h_root = dget(path.dentry);
+	path_put(&path);
+
+out:
+	return err;
+}
+
+#if 0 /* reserved for future use */
+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
+			      struct au_opt_mod *mod, substring_t args[])
+{
+	int err;
+	struct dentry *root;
+
+	err = -EINVAL;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	if (bindex < 0 || au_sbbot(sb) < bindex) {
+		pr_err("out of bounds, %d\n", bindex);
+		goto out;
+	}
+
+	err = 0;
+	mod->perm = br_perm_val(args[1].from);
+	AuDbg("mod path %s, perm 0x%x, %s\n",
+	      mod->path, mod->perm, args[1].from);
+	mod->h_root = dget(au_h_dptr(root, bindex));
+
+out:
+	aufs_read_unlock(root, !AuLock_IR);
+	return err;
+}
+#endif
+
+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
+			      substring_t args[])
+{
+	int err;
+	struct file *file;
+
+	file = au_xino_create(sb, args[0].from, /*silent*/0);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(file->f_path.dentry->d_sb == sb)) {
+		fput(file);
+		pr_err("%s must be outside\n", args[0].from);
+		goto out;
+	}
+
+	err = 0;
+	xino->file = file;
+	xino->path = args[0].from;
+
+out:
+	return err;
+}
+
+static int noinline_for_stack
+au_opts_parse_xino_itrunc_path(struct super_block *sb,
+			       struct au_opt_xino_itrunc *xino_itrunc,
+			       substring_t args[])
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct path path;
+	struct dentry *root;
+
+	err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
+	if (unlikely(err)) {
+		pr_err("lookup failed %s (%d)\n", args[0].from, err);
+		goto out;
+	}
+
+	xino_itrunc->bindex = -1;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		if (au_h_dptr(root, bindex) == path.dentry) {
+			xino_itrunc->bindex = bindex;
+			break;
+		}
+	}
+	aufs_read_unlock(root, !AuLock_IR);
+	path_put(&path);
+
+	if (unlikely(xino_itrunc->bindex < 0)) {
+		pr_err("no such branch %s\n", args[0].from);
+		err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+/* called without aufs lock */
+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
+{
+	int err, n, token;
+	aufs_bindex_t bindex;
+	unsigned char skipped;
+	struct dentry *root;
+	struct au_opt *opt, *opt_tail;
+	char *opt_str;
+	/* reduce the stack space */
+	union {
+		struct au_opt_xino_itrunc *xino_itrunc;
+		struct au_opt_wbr_create *create;
+	} u;
+	struct {
+		substring_t args[MAX_OPT_ARGS];
+	} *a;
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	root = sb->s_root;
+	err = 0;
+	bindex = 0;
+	opt = opts->opt;
+	opt_tail = opt + opts->max_opt - 1;
+	opt->type = Opt_tail;
+	while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
+		err = -EINVAL;
+		skipped = 0;
+		token = match_token(opt_str, options, a->args);
+		switch (token) {
+		case Opt_br:
+			err = 0;
+			while (!err && (opt_str = strsep(&a->args[0].from, ":"))
+			       && *opt_str) {
+				err = opt_add(opt, opt_str, opts->sb_flags,
+					      bindex++);
+				if (unlikely(!err && ++opt > opt_tail)) {
+					err = -E2BIG;
+					break;
+				}
+				opt->type = Opt_tail;
+				skipped = 1;
+			}
+			break;
+		case Opt_add:
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			bindex = n;
+			err = opt_add(opt, a->args[1].from, opts->sb_flags,
+				      bindex);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_append:
+			err = opt_add(opt, a->args[0].from, opts->sb_flags,
+				      /*dummy bindex*/1);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_prepend:
+			err = opt_add(opt, a->args[0].from, opts->sb_flags,
+				      /*bindex*/0);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_del:
+			err = au_opts_parse_del(&opt->del, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#if 0 /* reserved for future use */
+		case Opt_idel:
+			del->pathname = "(indexed)";
+			if (unlikely(match_int(&args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			err = au_opts_parse_idel(sb, n, &opt->del, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#endif
+		case Opt_mod:
+			err = au_opts_parse_mod(&opt->mod, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#ifdef IMOD /* reserved for future use */
+		case Opt_imod:
+			u.mod->path = "(indexed)";
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#endif
+		case Opt_xino:
+			err = au_opts_parse_xino(sb, &opt->xino, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+
+		case Opt_trunc_xino_path:
+			err = au_opts_parse_xino_itrunc_path
+				(sb, &opt->xino_itrunc, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+
+		case Opt_itrunc_xino:
+			u.xino_itrunc = &opt->xino_itrunc;
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			u.xino_itrunc->bindex = n;
+			aufs_read_lock(root, AuLock_FLUSH);
+			if (n < 0 || au_sbbot(sb) < n) {
+				pr_err("out of bounds, %d\n", n);
+				aufs_read_unlock(root, !AuLock_IR);
+				break;
+			}
+			aufs_read_unlock(root, !AuLock_IR);
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_dirwh:
+			if (unlikely(match_int(&a->args[0], &opt->dirwh)))
+				break;
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_rdcache:
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (unlikely(n > AUFS_RDCACHE_MAX)) {
+				pr_err("rdcache must be smaller than %d\n",
+				       AUFS_RDCACHE_MAX);
+				break;
+			}
+			opt->rdcache = n;
+			err = 0;
+			opt->type = token;
+			break;
+		case Opt_rdblk:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0
+				     || n > KMALLOC_MAX_SIZE)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (unlikely(n && n < NAME_MAX)) {
+				pr_err("rdblk must be larger than %d\n",
+				       NAME_MAX);
+				break;
+			}
+			opt->rdblk = n;
+			err = 0;
+			opt->type = token;
+			break;
+		case Opt_rdhash:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0
+				     || n * sizeof(struct hlist_head)
+				     > KMALLOC_MAX_SIZE)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			opt->rdhash = n;
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_trunc_xino:
+		case Opt_notrunc_xino:
+		case Opt_noxino:
+		case Opt_trunc_xib:
+		case Opt_notrunc_xib:
+		case Opt_shwh:
+		case Opt_noshwh:
+		case Opt_dirperm1:
+		case Opt_nodirperm1:
+		case Opt_plink:
+		case Opt_noplink:
+		case Opt_list_plink:
+		case Opt_dio:
+		case Opt_nodio:
+		case Opt_diropq_a:
+		case Opt_diropq_w:
+		case Opt_warn_perm:
+		case Opt_nowarn_perm:
+		case Opt_verbose:
+		case Opt_noverbose:
+		case Opt_sum:
+		case Opt_nosum:
+		case Opt_wsum:
+		case Opt_rdblk_def:
+		case Opt_rdhash_def:
+		case Opt_dirren:
+		case Opt_nodirren:
+		case Opt_acl:
+		case Opt_noacl:
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_udba:
+			opt->udba = udba_val(a->args[0].from);
+			if (opt->udba >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+
+		case Opt_wbr_create:
+			u.create = &opt->wbr_create;
+			u.create->wbr_create
+				= au_wbr_create_val(a->args[0].from, u.create);
+			if (u.create->wbr_create >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+		case Opt_wbr_copyup:
+			opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
+			if (opt->wbr_copyup >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+
+		case Opt_fhsm_sec:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (sysaufs_brs) {
+				opt->fhsm_second = n;
+				opt->type = token;
+			} else
+				pr_warn("ignored %s\n", opt_str);
+			err = 0;
+			break;
+
+		case Opt_ignore:
+			pr_warn("ignored %s\n", opt_str);
+			/*FALLTHROUGH*/
+		case Opt_ignore_silent:
+			skipped = 1;
+			err = 0;
+			break;
+		case Opt_err:
+			pr_err("unknown option %s\n", opt_str);
+			break;
+		}
+
+		if (!err && !skipped) {
+			if (unlikely(++opt > opt_tail)) {
+				err = -E2BIG;
+				opt--;
+				opt->type = Opt_tail;
+				break;
+			}
+			opt->type = Opt_tail;
+		}
+	}
+
+	kfree(a);
+	dump_opts(opts);
+	if (unlikely(err))
+		au_opts_free(opts);
+
+out:
+	return err;
+}
+
+static int au_opt_wbr_create(struct super_block *sb,
+			     struct au_opt_wbr_create *create)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 1; /* handled */
+	sbinfo = au_sbi(sb);
+	if (sbinfo->si_wbr_create_ops->fin) {
+		err = sbinfo->si_wbr_create_ops->fin(sb);
+		if (!err)
+			err = 1;
+	}
+
+	sbinfo->si_wbr_create = create->wbr_create;
+	sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
+	switch (create->wbr_create) {
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRR:
+	case AuWbrCreate_PMFSRRV:
+		sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
+		/*FALLTHROUGH*/
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFS:
+	case AuWbrCreate_PMFSV:
+		sbinfo->si_wbr_mfs.mfs_expire
+			= msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
+		break;
+	}
+
+	if (sbinfo->si_wbr_create_ops->init)
+		sbinfo->si_wbr_create_ops->init(sb); /* ignore */
+
+	return err;
+}
+
+/*
+ * returns,
+ * plus: processed without an error
+ * zero: unprocessed
+ */
+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
+			 struct au_opts *opts)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 1; /* handled */
+	sbinfo = au_sbi(sb);
+	switch (opt->type) {
+	case Opt_udba:
+		sbinfo->si_mntflags &= ~AuOptMask_UDBA;
+		sbinfo->si_mntflags |= opt->udba;
+		opts->given_udba |= opt->udba;
+		break;
+
+	case Opt_plink:
+		au_opt_set(sbinfo->si_mntflags, PLINK);
+		break;
+	case Opt_noplink:
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_put(sb, /*verbose*/1);
+		au_opt_clr(sbinfo->si_mntflags, PLINK);
+		break;
+	case Opt_list_plink:
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_list(sb);
+		break;
+
+	case Opt_dio:
+		au_opt_set(sbinfo->si_mntflags, DIO);
+		au_fset_opts(opts->flags, REFRESH_DYAOP);
+		break;
+	case Opt_nodio:
+		au_opt_clr(sbinfo->si_mntflags, DIO);
+		au_fset_opts(opts->flags, REFRESH_DYAOP);
+		break;
+
+	case Opt_fhsm_sec:
+		au_fhsm_set(sbinfo, opt->fhsm_second);
+		break;
+
+	case Opt_diropq_a:
+		au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
+		break;
+	case Opt_diropq_w:
+		au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
+		break;
+
+	case Opt_warn_perm:
+		au_opt_set(sbinfo->si_mntflags, WARN_PERM);
+		break;
+	case Opt_nowarn_perm:
+		au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
+		break;
+
+	case Opt_verbose:
+		au_opt_set(sbinfo->si_mntflags, VERBOSE);
+		break;
+	case Opt_noverbose:
+		au_opt_clr(sbinfo->si_mntflags, VERBOSE);
+		break;
+
+	case Opt_sum:
+		au_opt_set(sbinfo->si_mntflags, SUM);
+		break;
+	case Opt_wsum:
+		au_opt_clr(sbinfo->si_mntflags, SUM);
+		au_opt_set(sbinfo->si_mntflags, SUM_W);
+	case Opt_nosum:
+		au_opt_clr(sbinfo->si_mntflags, SUM);
+		au_opt_clr(sbinfo->si_mntflags, SUM_W);
+		break;
+
+	case Opt_wbr_create:
+		err = au_opt_wbr_create(sb, &opt->wbr_create);
+		break;
+	case Opt_wbr_copyup:
+		sbinfo->si_wbr_copyup = opt->wbr_copyup;
+		sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
+		break;
+
+	case Opt_dirwh:
+		sbinfo->si_dirwh = opt->dirwh;
+		break;
+
+	case Opt_rdcache:
+		sbinfo->si_rdcache
+			= msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
+		break;
+	case Opt_rdblk:
+		sbinfo->si_rdblk = opt->rdblk;
+		break;
+	case Opt_rdblk_def:
+		sbinfo->si_rdblk = AUFS_RDBLK_DEF;
+		break;
+	case Opt_rdhash:
+		sbinfo->si_rdhash = opt->rdhash;
+		break;
+	case Opt_rdhash_def:
+		sbinfo->si_rdhash = AUFS_RDHASH_DEF;
+		break;
+
+	case Opt_shwh:
+		au_opt_set(sbinfo->si_mntflags, SHWH);
+		break;
+	case Opt_noshwh:
+		au_opt_clr(sbinfo->si_mntflags, SHWH);
+		break;
+
+	case Opt_dirperm1:
+		au_opt_set(sbinfo->si_mntflags, DIRPERM1);
+		break;
+	case Opt_nodirperm1:
+		au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
+		break;
+
+	case Opt_trunc_xino:
+		au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
+		break;
+	case Opt_notrunc_xino:
+		au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
+		break;
+
+	case Opt_trunc_xino_path:
+	case Opt_itrunc_xino:
+		err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
+		if (!err)
+			err = 1;
+		break;
+
+	case Opt_trunc_xib:
+		au_fset_opts(opts->flags, TRUNC_XIB);
+		break;
+	case Opt_notrunc_xib:
+		au_fclr_opts(opts->flags, TRUNC_XIB);
+		break;
+
+	case Opt_dirren:
+		err = 1;
+		if (!au_opt_test(sbinfo->si_mntflags, DIRREN)) {
+			err = au_dr_opt_set(sb);
+			if (!err)
+				err = 1;
+		}
+		if (err == 1)
+			au_opt_set(sbinfo->si_mntflags, DIRREN);
+		break;
+	case Opt_nodirren:
+		err = 1;
+		if (au_opt_test(sbinfo->si_mntflags, DIRREN)) {
+			err = au_dr_opt_clr(sb, au_ftest_opts(opts->flags,
+							      DR_FLUSHED));
+			if (!err)
+				err = 1;
+		}
+		if (err == 1)
+			au_opt_clr(sbinfo->si_mntflags, DIRREN);
+		break;
+
+	case Opt_acl:
+		sb->s_flags |= SB_POSIXACL;
+		break;
+	case Opt_noacl:
+		sb->s_flags &= ~SB_POSIXACL;
+		break;
+
+	default:
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+/*
+ * returns tri-state.
+ * plus: processed without an error
+ * zero: unprocessed
+ * minus: error
+ */
+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
+		     struct au_opts *opts)
+{
+	int err, do_refresh;
+
+	err = 0;
+	switch (opt->type) {
+	case Opt_append:
+		opt->add.bindex = au_sbbot(sb) + 1;
+		if (opt->add.bindex < 0)
+			opt->add.bindex = 0;
+		goto add;
+	case Opt_prepend:
+		opt->add.bindex = 0;
+	add: /* indented label */
+	case Opt_add:
+		err = au_br_add(sb, &opt->add,
+				au_ftest_opts(opts->flags, REMOUNT));
+		if (!err) {
+			err = 1;
+			au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+
+	case Opt_del:
+	case Opt_idel:
+		err = au_br_del(sb, &opt->del,
+				au_ftest_opts(opts->flags, REMOUNT));
+		if (!err) {
+			err = 1;
+			au_fset_opts(opts->flags, TRUNC_XIB);
+			au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+
+	case Opt_mod:
+	case Opt_imod:
+		err = au_br_mod(sb, &opt->mod,
+				au_ftest_opts(opts->flags, REMOUNT),
+				&do_refresh);
+		if (!err) {
+			err = 1;
+			if (do_refresh)
+				au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+	}
+	return err;
+}
+
+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
+		       struct au_opt_xino **opt_xino,
+		       struct au_opts *opts)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *root, *parent, *h_root;
+
+	err = 0;
+	switch (opt->type) {
+	case Opt_xino:
+		err = au_xino_set(sb, &opt->xino,
+				  !!au_ftest_opts(opts->flags, REMOUNT));
+		if (unlikely(err))
+			break;
+
+		*opt_xino = &opt->xino;
+		au_xino_brid_set(sb, -1);
+
+		/* safe d_parent access */
+		parent = opt->xino.file->f_path.dentry->d_parent;
+		root = sb->s_root;
+		bbot = au_sbbot(sb);
+		for (bindex = 0; bindex <= bbot; bindex++) {
+			h_root = au_h_dptr(root, bindex);
+			if (h_root == parent) {
+				au_xino_brid_set(sb, au_sbr_id(sb, bindex));
+				break;
+			}
+		}
+		break;
+
+	case Opt_noxino:
+		au_xino_clr(sb);
+		au_xino_brid_set(sb, -1);
+		*opt_xino = (void *)-1;
+		break;
+	}
+
+	return err;
+}
+
+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
+		   unsigned int pending)
+{
+	int err, fhsm;
+	aufs_bindex_t bindex, bbot;
+	unsigned char do_plink, skip, do_free, can_no_dreval;
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *root, *dentry;
+	struct inode *dir, *h_dir;
+	struct au_sbinfo *sbinfo;
+	struct au_hinode *hdir;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
+
+	if (!(sb_flags & SB_RDONLY)) {
+		if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
+			pr_warn("first branch should be rw\n");
+		if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
+			pr_warn_once("shwh should be used with ro\n");
+	}
+
+	if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
+	    && !au_opt_test(sbinfo->si_mntflags, XINO))
+		pr_warn_once("udba=*notify requires xino\n");
+
+	if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
+		pr_warn_once("dirperm1 breaks the protection"
+			     " by the permission bits on the lower branch\n");
+
+	err = 0;
+	fhsm = 0;
+	root = sb->s_root;
+	dir = d_inode(root);
+	do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
+	can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
+				      UDBA_NONE);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		skip = 0;
+		h_dir = au_h_iptr(dir, bindex);
+		br = au_sbr(sb, bindex);
+
+		if ((br->br_perm & AuBrAttr_ICEX)
+		    && !h_dir->i_op->listxattr)
+			br->br_perm &= ~AuBrAttr_ICEX;
+#if 0
+		if ((br->br_perm & AuBrAttr_ICEX_SEC)
+		    && (au_br_sb(br)->s_flags & SB_NOSEC))
+			br->br_perm &= ~AuBrAttr_ICEX_SEC;
+#endif
+
+		do_free = 0;
+		wbr = br->br_wbr;
+		if (wbr)
+			wbr_wh_read_lock(wbr);
+
+		if (!au_br_writable(br->br_perm)) {
+			do_free = !!wbr;
+			skip = (!wbr
+				|| (!wbr->wbr_whbase
+				    && !wbr->wbr_plink
+				    && !wbr->wbr_orph));
+		} else if (!au_br_wh_linkable(br->br_perm)) {
+			/* skip = (!br->br_whbase && !br->br_orph); */
+			skip = (!wbr || !wbr->wbr_whbase);
+			if (skip && wbr) {
+				if (do_plink)
+					skip = !!wbr->wbr_plink;
+				else
+					skip = !wbr->wbr_plink;
+			}
+		} else {
+			/* skip = (br->br_whbase && br->br_ohph); */
+			skip = (wbr && wbr->wbr_whbase);
+			if (skip) {
+				if (do_plink)
+					skip = !!wbr->wbr_plink;
+				else
+					skip = !wbr->wbr_plink;
+			}
+		}
+		if (wbr)
+			wbr_wh_read_unlock(wbr);
+
+		if (can_no_dreval) {
+			dentry = br->br_path.dentry;
+			spin_lock(&dentry->d_lock);
+			if (dentry->d_flags &
+			    (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
+				can_no_dreval = 0;
+			spin_unlock(&dentry->d_lock);
+		}
+
+		if (au_br_fhsm(br->br_perm)) {
+			fhsm++;
+			AuDebugOn(!br->br_fhsm);
+		}
+
+		if (skip)
+			continue;
+
+		hdir = au_hi(dir, bindex);
+		au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+		if (wbr)
+			wbr_wh_write_lock(wbr);
+		err = au_wh_init(br, sb);
+		if (wbr)
+			wbr_wh_write_unlock(wbr);
+		au_hn_inode_unlock(hdir);
+
+		if (!err && do_free) {
+			kfree(wbr);
+			br->br_wbr = NULL;
+		}
+	}
+
+	if (can_no_dreval)
+		au_fset_si(sbinfo, NO_DREVAL);
+	else
+		au_fclr_si(sbinfo, NO_DREVAL);
+
+	if (fhsm >= 2) {
+		au_fset_si(sbinfo, FHSM);
+		for (bindex = bbot; bindex >= 0; bindex--) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm)) {
+				au_fhsm_set_bottom(sb, bindex);
+				break;
+			}
+		}
+	} else {
+		au_fclr_si(sbinfo, FHSM);
+		au_fhsm_set_bottom(sb, -1);
+	}
+
+	return err;
+}
+
+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
+{
+	int err;
+	unsigned int tmp;
+	aufs_bindex_t bindex, bbot;
+	struct au_opt *opt;
+	struct au_opt_xino *opt_xino, xino;
+	struct au_sbinfo *sbinfo;
+	struct au_branch *br;
+	struct inode *dir;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	opt_xino = NULL;
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail)
+		err = au_opt_simple(sb, opt++, opts);
+	if (err > 0)
+		err = 0;
+	else if (unlikely(err < 0))
+		goto out;
+
+	/* disable xino and udba temporary */
+	sbinfo = au_sbi(sb);
+	tmp = sbinfo->si_mntflags;
+	au_opt_clr(sbinfo->si_mntflags, XINO);
+	au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
+
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail)
+		err = au_opt_br(sb, opt++, opts);
+	if (err > 0)
+		err = 0;
+	else if (unlikely(err < 0))
+		goto out;
+
+	bbot = au_sbbot(sb);
+	if (unlikely(bbot < 0)) {
+		err = -EINVAL;
+		pr_err("no branches\n");
+		goto out;
+	}
+
+	if (au_opt_test(tmp, XINO))
+		au_opt_set(sbinfo->si_mntflags, XINO);
+	opt = opts->opt;
+	while (!err && opt->type != Opt_tail)
+		err = au_opt_xino(sb, opt++, &opt_xino, opts);
+	if (unlikely(err))
+		goto out;
+
+	err = au_opts_verify(sb, sb->s_flags, tmp);
+	if (unlikely(err))
+		goto out;
+
+	/* restore xino */
+	if (au_opt_test(tmp, XINO) && !opt_xino) {
+		xino.file = au_xino_def(sb);
+		err = PTR_ERR(xino.file);
+		if (IS_ERR(xino.file))
+			goto out;
+
+		err = au_xino_set(sb, &xino, /*remount*/0);
+		fput(xino.file);
+		if (unlikely(err))
+			goto out;
+	}
+
+	/* restore udba */
+	tmp &= AuOptMask_UDBA;
+	sbinfo->si_mntflags &= ~AuOptMask_UDBA;
+	sbinfo->si_mntflags |= tmp;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_hnotify_reset_br(tmp, br, br->br_perm);
+		if (unlikely(err))
+			AuIOErr("hnotify failed on br %d, %d, ignored\n",
+				bindex, err);
+		/* go on even if err */
+	}
+	if (au_opt_test(tmp, UDBA_HNOTIFY)) {
+		dir = d_inode(sb->s_root);
+		au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
+	}
+
+out:
+	return err;
+}
+
+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
+{
+	int err, rerr;
+	unsigned char no_dreval;
+	struct inode *dir;
+	struct au_opt_xino *opt_xino;
+	struct au_opt *opt;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = au_dr_opt_flush(sb);
+	if (unlikely(err))
+		goto out;
+	au_fset_opts(opts->flags, DR_FLUSHED);
+
+	dir = d_inode(sb->s_root);
+	sbinfo = au_sbi(sb);
+	opt_xino = NULL;
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail) {
+		err = au_opt_simple(sb, opt, opts);
+		if (!err)
+			err = au_opt_br(sb, opt, opts);
+		if (!err)
+			err = au_opt_xino(sb, opt, &opt_xino, opts);
+		opt++;
+	}
+	if (err > 0)
+		err = 0;
+	AuTraceErr(err);
+	/* go on even err */
+
+	no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
+	rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
+	if (unlikely(rerr && !err))
+		err = rerr;
+
+	if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
+		au_fset_opts(opts->flags, REFRESH_IDOP);
+
+	if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
+		rerr = au_xib_trunc(sb);
+		if (unlikely(rerr && !err))
+			err = rerr;
+	}
+
+	/* will be handled by the caller */
+	if (!au_ftest_opts(opts->flags, REFRESH)
+	    && (opts->given_udba
+		|| au_opt_test(sbinfo->si_mntflags, XINO)
+		|| au_ftest_opts(opts->flags, REFRESH_IDOP)
+		    ))
+		au_fset_opts(opts->flags, REFRESH);
+
+	AuDbg("status 0x%x\n", opts->flags);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int au_opt_udba(struct super_block *sb)
+{
+	return au_mntflags(sb) & AuOptMask_UDBA;
+}
diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h
new file mode 100644
index 000000000..30bda60f7
--- /dev/null
+++ b/fs/aufs/opts.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount options/flags
+ */
+
+#ifndef __AUFS_OPTS_H__
+#define __AUFS_OPTS_H__
+
+#ifdef __KERNEL__
+
+#include <linux/path.h>
+
+struct file;
+
+/* ---------------------------------------------------------------------- */
+
+/* mount flags */
+#define AuOpt_XINO		1		/* external inode number bitmap
+						   and translation table */
+#define AuOpt_TRUNC_XINO	(1 << 1)	/* truncate xino files */
+#define AuOpt_UDBA_NONE		(1 << 2)	/* users direct branch access */
+#define AuOpt_UDBA_REVAL	(1 << 3)
+#define AuOpt_UDBA_HNOTIFY	(1 << 4)
+#define AuOpt_SHWH		(1 << 5)	/* show whiteout */
+#define AuOpt_PLINK		(1 << 6)	/* pseudo-link */
+#define AuOpt_DIRPERM1		(1 << 7)	/* ignore the lower dir's perm
+						   bits */
+#define AuOpt_ALWAYS_DIROPQ	(1 << 9)	/* policy to creating diropq */
+#define AuOpt_SUM		(1 << 10)	/* summation for statfs(2) */
+#define AuOpt_SUM_W		(1 << 11)	/* unimplemented */
+#define AuOpt_WARN_PERM		(1 << 12)	/* warn when add-branch */
+#define AuOpt_VERBOSE		(1 << 13)	/* busy inode when del-branch */
+#define AuOpt_DIO		(1 << 14)	/* direct io */
+#define AuOpt_DIRREN		(1 << 15)	/* directory rename */
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuOpt_UDBA_HNOTIFY
+#define AuOpt_UDBA_HNOTIFY	0
+#endif
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuOpt_DIRREN
+#define AuOpt_DIRREN		0
+#endif
+#ifndef CONFIG_AUFS_SHWH
+#undef AuOpt_SHWH
+#define AuOpt_SHWH		0
+#endif
+
+#define AuOpt_Def	(AuOpt_XINO \
+			 | AuOpt_UDBA_REVAL \
+			 | AuOpt_PLINK \
+			 /* | AuOpt_DIRPERM1 */ \
+			 | AuOpt_WARN_PERM)
+#define AuOptMask_UDBA	(AuOpt_UDBA_NONE \
+			 | AuOpt_UDBA_REVAL \
+			 | AuOpt_UDBA_HNOTIFY)
+
+#define au_opt_test(flags, name)	(flags & AuOpt_##name)
+#define au_opt_set(flags, name) do { \
+	BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
+	((flags) |= AuOpt_##name); \
+} while (0)
+#define au_opt_set_udba(flags, name) do { \
+	(flags) &= ~AuOptMask_UDBA; \
+	((flags) |= AuOpt_##name); \
+} while (0)
+#define au_opt_clr(flags, name) do { \
+	((flags) &= ~AuOpt_##name); \
+} while (0)
+
+static inline unsigned int au_opts_plink(unsigned int mntflags)
+{
+#ifdef CONFIG_PROC_FS
+	return mntflags;
+#else
+	return mntflags & ~AuOpt_PLINK;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies to select one among multiple writable branches */
+enum {
+	AuWbrCreate_TDP,	/* top down parent */
+	AuWbrCreate_RR,		/* round robin */
+	AuWbrCreate_MFS,	/* most free space */
+	AuWbrCreate_MFSV,	/* mfs with seconds */
+	AuWbrCreate_MFSRR,	/* mfs then rr */
+	AuWbrCreate_MFSRRV,	/* mfs then rr with seconds */
+	AuWbrCreate_TDMFS,	/* top down regardless parent and mfs */
+	AuWbrCreate_TDMFSV,	/* top down regardless parent and mfs */
+	AuWbrCreate_PMFS,	/* parent and mfs */
+	AuWbrCreate_PMFSV,	/* parent and mfs with seconds */
+	AuWbrCreate_PMFSRR,	/* parent, mfs and round-robin */
+	AuWbrCreate_PMFSRRV,	/* plus seconds */
+
+	AuWbrCreate_Def = AuWbrCreate_TDP
+};
+
+enum {
+	AuWbrCopyup_TDP,	/* top down parent */
+	AuWbrCopyup_BUP,	/* bottom up parent */
+	AuWbrCopyup_BU,		/* bottom up */
+
+	AuWbrCopyup_Def = AuWbrCopyup_TDP
+};
+
+/* ---------------------------------------------------------------------- */
+
+struct au_opt_add {
+	aufs_bindex_t	bindex;
+	char		*pathname;
+	int		perm;
+	struct path	path;
+};
+
+struct au_opt_del {
+	char		*pathname;
+	struct path	h_path;
+};
+
+struct au_opt_mod {
+	char		*path;
+	int		perm;
+	struct dentry	*h_root;
+};
+
+struct au_opt_xino {
+	char		*path;
+	struct file	*file;
+};
+
+struct au_opt_xino_itrunc {
+	aufs_bindex_t	bindex;
+};
+
+struct au_opt_wbr_create {
+	int			wbr_create;
+	int			mfs_second;
+	unsigned long long	mfsrr_watermark;
+};
+
+struct au_opt {
+	int type;
+	union {
+		struct au_opt_xino	xino;
+		struct au_opt_xino_itrunc xino_itrunc;
+		struct au_opt_add	add;
+		struct au_opt_del	del;
+		struct au_opt_mod	mod;
+		int			dirwh;
+		int			rdcache;
+		unsigned int		rdblk;
+		unsigned int		rdhash;
+		int			udba;
+		struct au_opt_wbr_create wbr_create;
+		int			wbr_copyup;
+		unsigned int		fhsm_second;
+	};
+};
+
+/* opts flags */
+#define AuOpts_REMOUNT		1
+#define AuOpts_REFRESH		(1 << 1)
+#define AuOpts_TRUNC_XIB	(1 << 2)
+#define AuOpts_REFRESH_DYAOP	(1 << 3)
+#define AuOpts_REFRESH_IDOP	(1 << 4)
+#define AuOpts_DR_FLUSHED	(1 << 5)
+#define au_ftest_opts(flags, name)	((flags) & AuOpts_##name)
+#define au_fset_opts(flags, name) \
+	do { (flags) |= AuOpts_##name; } while (0)
+#define au_fclr_opts(flags, name) \
+	do { (flags) &= ~AuOpts_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuOpts_DR_FLUSHED
+#define AuOpts_DR_FLUSHED	0
+#endif
+
+struct au_opts {
+	struct au_opt	*opt;
+	int		max_opt;
+
+	unsigned int	given_udba;
+	unsigned int	flags;
+	unsigned long	sb_flags;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* opts.c */
+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
+const char *au_optstr_udba(int udba);
+const char *au_optstr_wbr_copyup(int wbr_copyup);
+const char *au_optstr_wbr_create(int wbr_create);
+
+void au_opts_free(struct au_opts *opts);
+struct super_block;
+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
+		   unsigned int pending);
+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
+
+unsigned int au_opt_udba(struct super_block *sb);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_OPTS_H__ */
diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c
new file mode 100644
index 000000000..f1a569f23
--- /dev/null
+++ b/fs/aufs/plink.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * pseudo-link
+ */
+
+#include "aufs.h"
+
+/*
+ * the pseudo-link maintenance mode.
+ * during a user process maintains the pseudo-links,
+ * prohibit adding a new plink and branch manipulation.
+ *
+ * Flags
+ * NOPLM:
+ *	For entry functions which will handle plink, and i_mutex is already held
+ *	in VFS.
+ *	They cannot wait and should return an error at once.
+ *	Callers has to check the error.
+ * NOPLMW:
+ *	For entry functions which will handle plink, but i_mutex is not held
+ *	in VFS.
+ *	They can wait the plink maintenance mode to finish.
+ *
+ * They behave like F_SETLK and F_SETLKW.
+ * If the caller never handle plink, then both flags are unnecessary.
+ */
+
+int au_plink_maint(struct super_block *sb, int flags)
+{
+	int err;
+	pid_t pid, ppid;
+	struct task_struct *parent, *prev;
+	struct au_sbinfo *sbi;
+
+	SiMustAnyLock(sb);
+
+	err = 0;
+	if (!au_opt_test(au_mntflags(sb), PLINK))
+		goto out;
+
+	sbi = au_sbi(sb);
+	pid = sbi->si_plink_maint_pid;
+	if (!pid || pid == current->pid)
+		goto out;
+
+	/* todo: it highly depends upon /sbin/mount.aufs */
+	prev = NULL;
+	parent = current;
+	ppid = 0;
+	rcu_read_lock();
+	while (1) {
+		parent = rcu_dereference(parent->real_parent);
+		if (parent == prev)
+			break;
+		ppid = task_pid_vnr(parent);
+		if (pid == ppid) {
+			rcu_read_unlock();
+			goto out;
+		}
+		prev = parent;
+	}
+	rcu_read_unlock();
+
+	if (au_ftest_lock(flags, NOPLMW)) {
+		/* if there is no i_mutex lock in VFS, we don't need to wait */
+		/* AuDebugOn(!lockdep_depth(current)); */
+		while (sbi->si_plink_maint_pid) {
+			si_read_unlock(sb);
+			/* gave up wake_up_bit() */
+			wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
+
+			if (au_ftest_lock(flags, FLUSH))
+				au_nwt_flush(&sbi->si_nowait);
+			si_noflush_read_lock(sb);
+		}
+	} else if (au_ftest_lock(flags, NOPLM)) {
+		AuDbg("ppid %d, pid %d\n", ppid, pid);
+		err = -EAGAIN;
+	}
+
+out:
+	return err;
+}
+
+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
+{
+	spin_lock(&sbinfo->si_plink_maint_lock);
+	sbinfo->si_plink_maint_pid = 0;
+	spin_unlock(&sbinfo->si_plink_maint_lock);
+	wake_up_all(&sbinfo->si_plink_wq);
+}
+
+int au_plink_maint_enter(struct super_block *sb)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	/* make sure i am the only one in this fs */
+	si_write_lock(sb, AuLock_FLUSH);
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		spin_lock(&sbinfo->si_plink_maint_lock);
+		if (!sbinfo->si_plink_maint_pid)
+			sbinfo->si_plink_maint_pid = current->pid;
+		else
+			err = -EBUSY;
+		spin_unlock(&sbinfo->si_plink_maint_lock);
+	}
+	si_write_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_DEBUG
+void au_plink_list(struct super_block *sb)
+{
+	int i;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		hlist_bl_lock(hbl);
+		hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
+			AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
+		hlist_bl_unlock(hbl);
+	}
+}
+#endif
+
+/* is the inode pseudo-linked? */
+int au_plink_test(struct inode *inode)
+{
+	int found, i;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+
+	sbinfo = au_sbi(inode->i_sb);
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+	AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
+	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
+
+	found = 0;
+	i = au_plink_hash(inode->i_ino);
+	hbl =  sbinfo->si_plink + i;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
+		if (&icntnr->vfs_inode == inode) {
+			found = 1;
+			break;
+		}
+	hlist_bl_unlock(hbl);
+	return found;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * generate a name for plink.
+ * the file will be stored under AUFS_WH_PLINKDIR.
+ */
+/* 20 is max digits length of ulong 64 */
+#define PLINK_NAME_LEN	((20 + 1) * 2)
+
+static int plink_name(char *name, int len, struct inode *inode,
+		      aufs_bindex_t bindex)
+{
+	int rlen;
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, bindex);
+	rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
+	return rlen;
+}
+
+struct au_do_plink_lkup_args {
+	struct dentry **errp;
+	struct qstr *tgtname;
+	struct dentry *h_parent;
+	struct au_branch *br;
+};
+
+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
+				       struct dentry *h_parent,
+				       struct au_branch *br)
+{
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_inode = d_inode(h_parent);
+	inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
+	h_dentry = vfsub_lkup_one(tgtname, h_parent);
+	inode_unlock_shared(h_inode);
+	return h_dentry;
+}
+
+static void au_call_do_plink_lkup(void *args)
+{
+	struct au_do_plink_lkup_args *a = args;
+	*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
+}
+
+/* lookup the plink-ed @inode under the branch at @bindex */
+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct dentry *h_dentry, *h_parent;
+	struct au_branch *br;
+	int wkq_err;
+	char a[PLINK_NAME_LEN];
+	struct qstr tgtname = QSTR_INIT(a, 0);
+
+	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
+
+	br = au_sbr(inode->i_sb, bindex);
+	h_parent = br->br_wbr->wbr_plink;
+	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
+
+	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
+		struct au_do_plink_lkup_args args = {
+			.errp		= &h_dentry,
+			.tgtname	= &tgtname,
+			.h_parent	= h_parent,
+			.br		= br
+		};
+
+		wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
+		if (unlikely(wkq_err))
+			h_dentry = ERR_PTR(wkq_err);
+	} else
+		h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
+
+	return h_dentry;
+}
+
+/* create a pseudo-link */
+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
+		      struct dentry *h_dentry, struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+	struct inode *h_dir, *delegated;
+
+	h_dir = d_inode(h_parent);
+	inode_lock_nested(h_dir, AuLsc_I_CHILD2);
+again:
+	h_path.dentry = vfsub_lkup_one(tgt, h_parent);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	err = 0;
+	/* wh.plink dir is not monitored */
+	/* todo: is it really safe? */
+	if (d_is_positive(h_path.dentry)
+	    && d_inode(h_path.dentry) != d_inode(h_dentry)) {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+		dput(h_path.dentry);
+		h_path.dentry = NULL;
+		if (!err)
+			goto again;
+	}
+	if (!err && d_is_negative(h_path.dentry)) {
+		delegated = NULL;
+		err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal link\n");
+			iput(delegated);
+		}
+	}
+	dput(h_path.dentry);
+
+out:
+	inode_unlock(h_dir);
+	return err;
+}
+
+struct do_whplink_args {
+	int *errp;
+	struct qstr *tgt;
+	struct dentry *h_parent;
+	struct dentry *h_dentry;
+	struct au_branch *br;
+};
+
+static void call_do_whplink(void *args)
+{
+	struct do_whplink_args *a = args;
+	*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
+}
+
+static int whplink(struct dentry *h_dentry, struct inode *inode,
+		   aufs_bindex_t bindex, struct au_branch *br)
+{
+	int err, wkq_err;
+	struct au_wbr *wbr;
+	struct dentry *h_parent;
+	char a[PLINK_NAME_LEN];
+	struct qstr tgtname = QSTR_INIT(a, 0);
+
+	wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
+	h_parent = wbr->wbr_plink;
+	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
+
+	/* always superio. */
+	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
+		struct do_whplink_args args = {
+			.errp		= &err,
+			.tgt		= &tgtname,
+			.h_parent	= h_parent,
+			.h_dentry	= h_dentry,
+			.br		= br
+		};
+		wkq_err = au_wkq_wait(call_do_whplink, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	} else
+		err = do_whplink(&tgtname, h_parent, h_dentry, br);
+
+	return err;
+}
+
+/*
+ * create a new pseudo-link for @h_dentry on @bindex.
+ * the linked inode is held in aufs @inode.
+ */
+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
+		     struct dentry *h_dentry)
+{
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+	int found, err, cnt, i;
+
+	sb = inode->i_sb;
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	found = au_plink_test(inode);
+	if (found)
+		return;
+
+	i = au_plink_hash(inode->i_ino);
+	hbl = sbinfo->si_plink + i;
+	au_igrab(inode);
+
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
+		if (&icntnr->vfs_inode == inode) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		icntnr = container_of(inode, struct au_icntnr, vfs_inode);
+		hlist_bl_add_head(&icntnr->plink, hbl);
+	}
+	hlist_bl_unlock(hbl);
+	if (!found) {
+		cnt = au_hbl_count(hbl);
+#define msg "unexpectedly unblanced or too many pseudo-links"
+		if (cnt > AUFS_PLINK_WARN)
+			AuWarn1(msg ", %d\n", cnt);
+#undef msg
+		err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
+		if (unlikely(err)) {
+			pr_warn("err %d, damaged pseudo link.\n", err);
+			au_hbl_del(&icntnr->plink, hbl);
+			iput(&icntnr->vfs_inode);
+		}
+	} else
+		iput(&icntnr->vfs_inode);
+}
+
+/* free all plinks */
+void au_plink_put(struct super_block *sb, int verbose)
+{
+	int i, warned;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_icntnr *icntnr;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	/* no spin_lock since sbinfo is write-locked */
+	warned = 0;
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		if (!warned && verbose && !hlist_bl_empty(hbl)) {
+			pr_warn("pseudo-link is not flushed");
+			warned = 1;
+		}
+		hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
+			iput(&icntnr->vfs_inode);
+		INIT_HLIST_BL_HEAD(hbl);
+	}
+}
+
+void au_plink_clean(struct super_block *sb, int verbose)
+{
+	struct dentry *root;
+
+	root = sb->s_root;
+	aufs_write_lock(root);
+	if (au_opt_test(au_mntflags(sb), PLINK))
+		au_plink_put(sb, verbose);
+	aufs_write_unlock(root);
+}
+
+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
+{
+	int do_put;
+	aufs_bindex_t btop, bbot, bindex;
+
+	do_put = 0;
+	btop = au_ibtop(inode);
+	bbot = au_ibbot(inode);
+	if (btop >= 0) {
+		for (bindex = btop; bindex <= bbot; bindex++) {
+			if (!au_h_iptr(inode, bindex)
+			    || au_ii_br_id(inode, bindex) != br_id)
+				continue;
+			au_set_h_iptr(inode, bindex, NULL, 0);
+			do_put = 1;
+			break;
+		}
+		if (do_put)
+			for (bindex = btop; bindex <= bbot; bindex++)
+				if (au_h_iptr(inode, bindex)) {
+					do_put = 0;
+					break;
+				}
+	} else
+		do_put = 1;
+
+	return do_put;
+}
+
+/* free the plinks on a branch specified by @br_id */
+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
+{
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_icntnr *icntnr;
+	struct inode *inode;
+	int i, do_put;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	/* no bit_lock since sbinfo is write-locked */
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
+			inode = au_igrab(&icntnr->vfs_inode);
+			ii_write_lock_child(inode);
+			do_put = au_plink_do_half_refresh(inode, br_id);
+			if (do_put) {
+				hlist_bl_del(&icntnr->plink);
+				iput(inode);
+			}
+			ii_write_unlock(inode);
+			iput(inode);
+		}
+	}
+}
diff --git a/fs/aufs/poll.c b/fs/aufs/poll.c
new file mode 100644
index 000000000..374b46db7
--- /dev/null
+++ b/fs/aufs/poll.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * poll operation
+ * There is only one filesystem which implements ->poll operation, currently.
+ */
+
+#include "aufs.h"
+
+__poll_t aufs_poll(struct file *file, poll_table *wait)
+{
+	__poll_t mask;
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	/* We should pretend an error happened. */
+	mask = EPOLLERR /* | EPOLLIN | EPOLLOUT */;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* it is not an error if h_file has no operation */
+	mask = DEFAULT_POLLMASK;
+	if (h_file->f_op->poll)
+		mask = h_file->f_op->poll(h_file, wait);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	if (mask & POLLERR)
+		AuDbg("mask 0x%x\n", mask);
+	return mask;
+}
diff --git a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c
new file mode 100644
index 000000000..bbd7aa2ec
--- /dev/null
+++ b/fs/aufs/posix_acl.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * posix acl operations
+ */
+
+#include <linux/fs.h>
+#include "aufs.h"
+
+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int err;
+	aufs_bindex_t bindex;
+	struct inode *h_inode;
+	struct super_block *sb;
+
+	acl = NULL;
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_read_lock_child(inode);
+	if (!(sb->s_flags & SB_POSIXACL))
+		goto out;
+
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (unlikely(!h_inode
+		     || ((h_inode->i_mode & S_IFMT)
+			 != (inode->i_mode & S_IFMT)))) {
+		err = au_busy_or_stale();
+		acl = ERR_PTR(err);
+		goto out;
+	}
+
+	/* always topmost only */
+	acl = get_acl(h_inode, type);
+	if (!IS_ERR_OR_NULL(acl))
+		set_cached_acl(inode, type, acl);
+
+out:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+
+	AuTraceErrPtr(acl);
+	return acl;
+}
+
+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	int err;
+	ssize_t ssz;
+	struct dentry *dentry;
+	struct au_sxattr arg = {
+		.type = AU_ACL_SET,
+		.u.acl_set = {
+			.acl	= acl,
+			.type	= type
+		},
+	};
+
+	IMustLock(inode);
+
+	if (inode->i_ino == AUFS_ROOT_INO)
+		dentry = dget(inode->i_sb->s_root);
+	else {
+		dentry = d_find_alias(inode);
+		if (!dentry)
+			dentry = d_find_any_alias(inode);
+		if (!dentry) {
+			pr_warn("cannot handle this inode, "
+				"please report to aufs-users ML\n");
+			err = -ENOENT;
+			goto out;
+		}
+	}
+
+	ssz = au_sxattr(dentry, inode, &arg);
+	dput(dentry);
+	err = ssz;
+	if (ssz >= 0) {
+		err = 0;
+		set_cached_acl(inode, type, acl);
+	}
+
+out:
+	return err;
+}
diff --git a/fs/aufs/procfs.c b/fs/aufs/procfs.c
new file mode 100644
index 000000000..6201739c6
--- /dev/null
+++ b/fs/aufs/procfs.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * procfs interfaces
+ */
+
+#include <linux/proc_fs.h>
+#include "aufs.h"
+
+static int au_procfs_plm_release(struct inode *inode, struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = file->private_data;
+	if (sbinfo) {
+		au_plink_maint_leave(sbinfo);
+		kobject_put(&sbinfo->si_kobj);
+	}
+
+	return 0;
+}
+
+static void au_procfs_plm_write_clean(struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = file->private_data;
+	if (sbinfo)
+		au_plink_clean(sbinfo->si_sb, /*verbose*/0);
+}
+
+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
+{
+	int err;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_node *pos;
+
+	err = -EBUSY;
+	if (unlikely(file->private_data))
+		goto out;
+
+	sb = NULL;
+	/* don't use au_sbilist_lock() here */
+	hlist_bl_lock(&au_sbilist);
+	hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
+		if (id == sysaufs_si_id(sbinfo)) {
+			kobject_get(&sbinfo->si_kobj);
+			sb = sbinfo->si_sb;
+			break;
+		}
+	hlist_bl_unlock(&au_sbilist);
+
+	err = -EINVAL;
+	if (unlikely(!sb))
+		goto out;
+
+	err = au_plink_maint_enter(sb);
+	if (!err)
+		/* keep kobject_get() */
+		file->private_data = sbinfo;
+	else
+		kobject_put(&sbinfo->si_kobj);
+out:
+	return err;
+}
+
+/*
+ * Accept a valid "si=xxxx" only.
+ * Once it is accepted successfully, accept "clean" too.
+ */
+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
+				   size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	unsigned long id;
+	/* last newline is allowed */
+	char buf[3 + sizeof(unsigned long) * 2 + 1];
+
+	err = -EACCES;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(count > sizeof(buf)))
+		goto out;
+
+	err = copy_from_user(buf, ubuf, count);
+	if (unlikely(err)) {
+		err = -EFAULT;
+		goto out;
+	}
+	buf[count] = 0;
+
+	err = -EINVAL;
+	if (!strcmp("clean", buf)) {
+		au_procfs_plm_write_clean(file);
+		goto out_success;
+	} else if (unlikely(strncmp("si=", buf, 3)))
+		goto out;
+
+	err = kstrtoul(buf + 3, 16, &id);
+	if (unlikely(err))
+		goto out;
+
+	err = au_procfs_plm_write_si(file, id);
+	if (unlikely(err))
+		goto out;
+
+out_success:
+	err = count; /* success */
+out:
+	return err;
+}
+
+static const struct file_operations au_procfs_plm_fop = {
+	.write		= au_procfs_plm_write,
+	.release	= au_procfs_plm_release,
+	.owner		= THIS_MODULE
+};
+
+/* ---------------------------------------------------------------------- */
+
+static struct proc_dir_entry *au_procfs_dir;
+
+void au_procfs_fin(void)
+{
+	remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
+	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
+}
+
+int __init au_procfs_init(void)
+{
+	int err;
+	struct proc_dir_entry *entry;
+
+	err = -ENOMEM;
+	au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
+	if (unlikely(!au_procfs_dir))
+		goto out;
+
+	entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
+			    au_procfs_dir, &au_procfs_plm_fop);
+	if (unlikely(!entry))
+		goto out_dir;
+
+	err = 0;
+	goto out; /* success */
+
+
+out_dir:
+	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
+out:
+	return err;
+}
diff --git a/fs/aufs/rdu.c b/fs/aufs/rdu.c
new file mode 100644
index 000000000..f6a10f553
--- /dev/null
+++ b/fs/aufs/rdu.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * readdir in userspace.
+ */
+
+#include <linux/compat.h>
+#include <linux/fs_stack.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+/* bits for struct aufs_rdu.flags */
+#define	AuRdu_CALLED	1
+#define	AuRdu_CONT	(1 << 1)
+#define	AuRdu_FULL	(1 << 2)
+#define au_ftest_rdu(flags, name)	((flags) & AuRdu_##name)
+#define au_fset_rdu(flags, name) \
+	do { (flags) |= AuRdu_##name; } while (0)
+#define au_fclr_rdu(flags, name) \
+	do { (flags) &= ~AuRdu_##name; } while (0)
+
+struct au_rdu_arg {
+	struct dir_context		ctx;
+	struct aufs_rdu			*rdu;
+	union au_rdu_ent_ul		ent;
+	unsigned long			end;
+
+	struct super_block		*sb;
+	int				err;
+};
+
+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
+		       loff_t offset, u64 h_ino, unsigned int d_type)
+{
+	int err, len;
+	struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
+	struct aufs_rdu *rdu = arg->rdu;
+	struct au_rdu_ent ent;
+
+	err = 0;
+	arg->err = 0;
+	au_fset_rdu(rdu->cookie.flags, CALLED);
+	len = au_rdu_len(nlen);
+	if (arg->ent.ul + len  < arg->end) {
+		ent.ino = h_ino;
+		ent.bindex = rdu->cookie.bindex;
+		ent.type = d_type;
+		ent.nlen = nlen;
+		if (unlikely(nlen > AUFS_MAX_NAMELEN))
+			ent.type = DT_UNKNOWN;
+
+		/* unnecessary to support mmap_sem since this is a dir */
+		err = -EFAULT;
+		if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
+			goto out;
+		if (copy_to_user(arg->ent.e->name, name, nlen))
+			goto out;
+		/* the terminating NULL */
+		if (__put_user(0, arg->ent.e->name + nlen))
+			goto out;
+		err = 0;
+		/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
+		arg->ent.ul += len;
+		rdu->rent++;
+	} else {
+		err = -EFAULT;
+		au_fset_rdu(rdu->cookie.flags, FULL);
+		rdu->full = 1;
+		rdu->tail = arg->ent;
+	}
+
+out:
+	/* AuTraceErr(err); */
+	return err;
+}
+
+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
+{
+	int err;
+	loff_t offset;
+	struct au_rdu_cookie *cookie = &arg->rdu->cookie;
+
+	/* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
+	offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
+	err = offset;
+	if (unlikely(offset != cookie->h_pos))
+		goto out;
+
+	err = 0;
+	do {
+		arg->err = 0;
+		au_fclr_rdu(cookie->flags, CALLED);
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(h_file, &arg->ctx);
+		if (err >= 0)
+			err = arg->err;
+	} while (!err
+		 && au_ftest_rdu(cookie->flags, CALLED)
+		 && !au_ftest_rdu(cookie->flags, FULL));
+	cookie->h_pos = h_file->f_pos;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
+{
+	int err;
+	aufs_bindex_t bbot;
+	struct au_rdu_arg arg = {
+		.ctx = {
+			.actor = au_rdu_fill
+		}
+	};
+	struct dentry *dentry;
+	struct inode *inode;
+	struct file *h_file;
+	struct au_rdu_cookie *cookie = &rdu->cookie;
+
+	err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	rdu->rent = 0;
+	rdu->tail = rdu->ent;
+	rdu->full = 0;
+	arg.rdu = rdu;
+	arg.ent = rdu->ent;
+	arg.end = arg.ent.ul;
+	arg.end += rdu->sz;
+
+	err = -ENOTDIR;
+	if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
+		goto out;
+
+	err = security_file_permission(file, MAY_READ);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	inode_lock_shared(inode);
+
+	arg.sb = inode->i_sb;
+	err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_mtx;
+	err = au_alive_dir(dentry);
+	if (unlikely(err))
+		goto out_si;
+	/* todo: reval? */
+	fi_read_lock(file);
+
+	err = -EAGAIN;
+	if (unlikely(au_ftest_rdu(cookie->flags, CONT)
+		     && cookie->generation != au_figen(file)))
+		goto out_unlock;
+
+	err = 0;
+	if (!rdu->blk) {
+		rdu->blk = au_sbi(arg.sb)->si_rdblk;
+		if (!rdu->blk)
+			rdu->blk = au_dir_size(file, /*dentry*/NULL);
+	}
+	bbot = au_fbtop(file);
+	if (cookie->bindex < bbot)
+		cookie->bindex = bbot;
+	bbot = au_fbbot_dir(file);
+	/* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
+	for (; !err && cookie->bindex <= bbot;
+	     cookie->bindex++, cookie->h_pos = 0) {
+		h_file = au_hf_dir(file, cookie->bindex);
+		if (!h_file)
+			continue;
+
+		au_fclr_rdu(cookie->flags, FULL);
+		err = au_rdu_do(h_file, &arg);
+		AuTraceErr(err);
+		if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
+			break;
+	}
+	AuDbg("rent %llu\n", rdu->rent);
+
+	if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
+		rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
+		au_fset_rdu(cookie->flags, CONT);
+		cookie->generation = au_figen(file);
+	}
+
+	ii_read_lock_child(inode);
+	fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
+	ii_read_unlock(inode);
+
+out_unlock:
+	fi_read_unlock(file);
+out_si:
+	si_read_unlock(arg.sb);
+out_mtx:
+	inode_unlock_shared(inode);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
+{
+	int err;
+	ino_t ino;
+	unsigned long long nent;
+	union au_rdu_ent_ul *u;
+	struct au_rdu_ent ent;
+	struct super_block *sb;
+
+	err = 0;
+	nent = rdu->nent;
+	u = &rdu->ent;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	while (nent-- > 0) {
+		/* unnecessary to support mmap_sem since this is a dir */
+		err = copy_from_user(&ent, u->e, sizeof(ent));
+		if (!err)
+			err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+			break;
+		}
+
+		/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
+		if (!ent.wh)
+			err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
+		else
+			err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
+					&ino);
+		if (unlikely(err)) {
+			AuTraceErr(err);
+			break;
+		}
+
+		err = __put_user(ino, &u->e->ino);
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+			break;
+		}
+		u->ul += au_rdu_len(ent.nlen);
+	}
+	si_read_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_rdu_verify(struct aufs_rdu *rdu)
+{
+	AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
+	      "%llu, b%d, 0x%x, g%u}\n",
+	      rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
+	      rdu->blk,
+	      rdu->rent, rdu->shwh, rdu->full,
+	      rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
+	      rdu->cookie.generation);
+
+	if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
+		return 0;
+
+	AuDbg("%u:%u\n",
+	      rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
+	return -EINVAL;
+}
+
+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err, e;
+	struct aufs_rdu rdu;
+	void __user *p = (void __user *)arg;
+
+	err = copy_from_user(&rdu, p, sizeof(rdu));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	err = au_rdu_verify(&rdu);
+	if (unlikely(err))
+		goto out;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+		err = au_rdu(file, &rdu);
+		if (unlikely(err))
+			break;
+
+		e = copy_to_user(p, &rdu, sizeof(rdu));
+		if (unlikely(e)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+		break;
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ino(file, &rdu);
+		break;
+
+	default:
+		/* err = -ENOTTY; */
+		err = -EINVAL;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err, e;
+	struct aufs_rdu rdu;
+	void __user *p = compat_ptr(arg);
+
+	/* todo: get_user()? */
+	err = copy_from_user(&rdu, p, sizeof(rdu));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	rdu.ent.e = compat_ptr(rdu.ent.ul);
+	err = au_rdu_verify(&rdu);
+	if (unlikely(err))
+		goto out;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+		err = au_rdu(file, &rdu);
+		if (unlikely(err))
+			break;
+
+		rdu.ent.ul = ptr_to_compat(rdu.ent.e);
+		rdu.tail.ul = ptr_to_compat(rdu.tail.e);
+		e = copy_to_user(p, &rdu, sizeof(rdu));
+		if (unlikely(e)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+		break;
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ino(file, &rdu);
+		break;
+
+	default:
+		/* err = -ENOTTY; */
+		err = -EINVAL;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+#endif
diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h
new file mode 100644
index 000000000..e4711f6f7
--- /dev/null
+++ b/fs/aufs/rwsem.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * simple read-write semaphore wrappers
+ */
+
+#ifndef __AUFS_RWSEM_H__
+#define __AUFS_RWSEM_H__
+
+#ifdef __KERNEL__
+
+#include "debug.h"
+
+/* in the futre, the name 'au_rwsem' will be totally gone */
+#define au_rwsem	rw_semaphore
+
+/* to debug easier, do not make them inlined functions */
+#define AuRwMustNoWaiters(rw)	AuDebugOn(rwsem_is_contended(rw))
+/* rwsem_is_locked() is unusable */
+#define AuRwMustReadLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held_type(rw, 1))
+#define AuRwMustWriteLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held_type(rw, 0))
+#define AuRwMustAnyLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held(rw))
+#define AuRwDestroy(rw)		AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && lockdep_is_held(rw))
+
+#define au_rw_init(rw)	init_rwsem(rw)
+
+#define au_rw_init_wlock(rw) do {		\
+		au_rw_init(rw);			\
+		down_write(rw);			\
+	} while (0)
+
+#define au_rw_init_wlock_nested(rw, lsc) do {	\
+		au_rw_init(rw);			\
+		down_write_nested(rw, lsc);	\
+	} while (0)
+
+#define au_rw_read_lock(rw)		down_read(rw)
+#define au_rw_read_lock_nested(rw, lsc)	down_read_nested(rw, lsc)
+#define au_rw_read_unlock(rw)		up_read(rw)
+#define au_rw_dgrade_lock(rw)		downgrade_write(rw)
+#define au_rw_write_lock(rw)		down_write(rw)
+#define au_rw_write_lock_nested(rw, lsc) down_write_nested(rw, lsc)
+#define au_rw_write_unlock(rw)		up_write(rw)
+/* why is not _nested version defined? */
+#define au_rw_read_trylock(rw)		down_read_trylock(rw)
+#define au_rw_write_trylock(rw)		down_write_trylock(rw)
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_RWSEM_H__ */
diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c
new file mode 100644
index 000000000..bf8877f33
--- /dev/null
+++ b/fs/aufs/sbinfo.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * superblock private data
+ */
+
+#include "aufs.h"
+
+/*
+ * they are necessary regardless sysfs is disabled.
+ */
+void au_si_free(struct kobject *kobj)
+{
+	int i;
+	struct au_sbinfo *sbinfo;
+	char *locked __maybe_unused; /* debug only */
+
+	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
+	for (i = 0; i < AuPlink_NHASH; i++)
+		AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
+	AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
+
+	AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
+	percpu_counter_destroy(&sbinfo->si_ninodes);
+	AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
+	percpu_counter_destroy(&sbinfo->si_nfiles);
+
+	au_rw_write_lock(&sbinfo->si_rwsem);
+	au_br_free(sbinfo);
+	au_rw_write_unlock(&sbinfo->si_rwsem);
+
+	kfree(sbinfo->si_branch);
+	mutex_destroy(&sbinfo->si_xib_mtx);
+	AuRwDestroy(&sbinfo->si_rwsem);
+
+	kfree(sbinfo);
+}
+
+int au_si_alloc(struct super_block *sb)
+{
+	int err, i;
+	struct au_sbinfo *sbinfo;
+
+	err = -ENOMEM;
+	sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
+	if (unlikely(!sbinfo))
+		goto out;
+
+	/* will be reallocated separately */
+	sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
+	if (unlikely(!sbinfo->si_branch))
+		goto out_sbinfo;
+
+	err = sysaufs_si_init(sbinfo);
+	if (unlikely(err))
+		goto out_br;
+
+	au_nwt_init(&sbinfo->si_nowait);
+	au_rw_init_wlock(&sbinfo->si_rwsem);
+
+	percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
+	percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
+
+	sbinfo->si_bbot = -1;
+	sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
+
+	sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
+	sbinfo->si_wbr_create = AuWbrCreate_Def;
+	sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
+	sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
+
+	au_fhsm_init(sbinfo);
+
+	sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
+
+	sbinfo->si_xino_jiffy = jiffies;
+	sbinfo->si_xino_expire
+		= msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
+	mutex_init(&sbinfo->si_xib_mtx);
+	sbinfo->si_xino_brid = -1;
+	/* leave si_xib_last_pindex and si_xib_next_bit */
+
+	INIT_HLIST_BL_HEAD(&sbinfo->si_aopen);
+
+	sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
+	sbinfo->si_rdblk = AUFS_RDBLK_DEF;
+	sbinfo->si_rdhash = AUFS_RDHASH_DEF;
+	sbinfo->si_dirwh = AUFS_DIRWH_DEF;
+
+	for (i = 0; i < AuPlink_NHASH; i++)
+		INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
+	init_waitqueue_head(&sbinfo->si_plink_wq);
+	spin_lock_init(&sbinfo->si_plink_maint_lock);
+
+	INIT_HLIST_BL_HEAD(&sbinfo->si_files);
+
+	/* with getattr by default */
+	sbinfo->si_iop_array = aufs_iop;
+
+	/* leave other members for sysaufs and si_mnt. */
+	sbinfo->si_sb = sb;
+	sb->s_fs_info = sbinfo;
+	si_pid_set(sb);
+	return 0; /* success */
+
+out_br:
+	kfree(sbinfo->si_branch);
+out_sbinfo:
+	kfree(sbinfo);
+out:
+	return err;
+}
+
+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
+{
+	int err, sz;
+	struct au_branch **brp;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	err = -ENOMEM;
+	sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
+	if (unlikely(!sz))
+		sz = sizeof(*brp);
+	brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
+			   may_shrink);
+	if (brp) {
+		sbinfo->si_branch = brp;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int au_sigen_inc(struct super_block *sb)
+{
+	unsigned int gen;
+	struct inode *inode;
+
+	SiMustWriteLock(sb);
+
+	gen = ++au_sbi(sb)->si_generation;
+	au_update_digen(sb->s_root);
+	inode = d_inode(sb->s_root);
+	au_update_iigen(inode, /*half*/0);
+	inode_inc_iversion(inode);
+	return gen;
+}
+
+aufs_bindex_t au_new_br_id(struct super_block *sb)
+{
+	aufs_bindex_t br_id;
+	int i;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
+		br_id = ++sbinfo->si_last_br_id;
+		AuDebugOn(br_id < 0);
+		if (br_id && au_br_index(sb, br_id) < 0)
+			return br_id;
+	}
+
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
+int si_read_lock(struct super_block *sb, int flags)
+{
+	int err;
+
+	err = 0;
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+
+	si_noflush_read_lock(sb);
+	err = au_plink_maint(sb, flags);
+	if (unlikely(err))
+		si_read_unlock(sb);
+
+	return err;
+}
+
+int si_write_lock(struct super_block *sb, int flags)
+{
+	int err;
+
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+
+	si_noflush_write_lock(sb);
+	err = au_plink_maint(sb, flags);
+	if (unlikely(err))
+		si_write_unlock(sb);
+
+	return err;
+}
+
+/* dentry and super_block lock. call at entry point */
+int aufs_read_lock(struct dentry *dentry, int flags)
+{
+	int err;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, flags);
+	if (unlikely(err))
+		goto out;
+
+	if (au_ftest_lock(flags, DW))
+		di_write_lock_child(dentry);
+	else
+		di_read_lock_child(dentry, flags);
+
+	if (au_ftest_lock(flags, GEN)) {
+		err = au_digen_test(dentry, au_sigen(sb));
+		if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
+			AuDebugOn(!err && au_dbrange_test(dentry));
+		else if (!err)
+			err = au_dbrange_test(dentry);
+		if (unlikely(err))
+			aufs_read_unlock(dentry, flags);
+	}
+
+out:
+	return err;
+}
+
+void aufs_read_unlock(struct dentry *dentry, int flags)
+{
+	if (au_ftest_lock(flags, DW))
+		di_write_unlock(dentry);
+	else
+		di_read_unlock(dentry, flags);
+	si_read_unlock(dentry->d_sb);
+}
+
+void aufs_write_lock(struct dentry *dentry)
+{
+	si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
+	di_write_lock_child(dentry);
+}
+
+void aufs_write_unlock(struct dentry *dentry)
+{
+	di_write_unlock(dentry);
+	si_write_unlock(dentry->d_sb);
+}
+
+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
+{
+	int err;
+	unsigned int sigen;
+	struct super_block *sb;
+
+	sb = d1->d_sb;
+	err = si_read_lock(sb, flags);
+	if (unlikely(err))
+		goto out;
+
+	di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
+
+	if (au_ftest_lock(flags, GEN)) {
+		sigen = au_sigen(sb);
+		err = au_digen_test(d1, sigen);
+		AuDebugOn(!err && au_dbrange_test(d1));
+		if (!err) {
+			err = au_digen_test(d2, sigen);
+			AuDebugOn(!err && au_dbrange_test(d2));
+		}
+		if (unlikely(err))
+			aufs_read_and_write_unlock2(d1, d2);
+	}
+
+out:
+	return err;
+}
+
+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
+{
+	di_write_unlock2(d1, d2);
+	si_read_unlock(d1->d_sb);
+}
diff --git a/fs/aufs/super.c b/fs/aufs/super.c
new file mode 100644
index 000000000..56313e759
--- /dev/null
+++ b/fs/aufs/super.c
@@ -0,0 +1,1051 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount and super_block operations
+ */
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/vmalloc.h>
+#include "aufs.h"
+
+/*
+ * super_operations
+ */
+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
+{
+	struct au_icntnr *c;
+
+	c = au_cache_alloc_icntnr();
+	if (c) {
+		au_icntnr_init(c);
+		inode_set_iversion(&c->vfs_inode, 1); /* sigen(sb); */
+		c->iinfo.ii_hinode = NULL;
+		return &c->vfs_inode;
+	}
+	return NULL;
+}
+
+static void aufs_destroy_inode_cb(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
+}
+
+static void aufs_destroy_inode(struct inode *inode)
+{
+	if (!au_is_bad_inode(inode))
+		au_iinfo_fin(inode);
+	call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
+}
+
+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
+{
+	struct inode *inode;
+	int err;
+
+	inode = iget_locked(sb, ino);
+	if (unlikely(!inode)) {
+		inode = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	if (!(inode->i_state & I_NEW))
+		goto out;
+
+	err = au_xigen_new(inode);
+	if (!err)
+		err = au_iinfo_init(inode);
+	if (!err)
+		inode_inc_iversion(inode);
+	else {
+		iget_failed(inode);
+		inode = ERR_PTR(err);
+	}
+
+out:
+	/* never return NULL */
+	AuDebugOn(!inode);
+	AuTraceErrPtr(inode);
+	return inode;
+}
+
+/* lock free root dinfo */
+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct path path;
+	struct au_hdentry *hdp;
+	struct au_branch *br;
+	au_br_perm_str_t perm;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	bindex = 0;
+	hdp = au_hdentry(au_di(sb->s_root), bindex);
+	for (; !err && bindex <= bbot; bindex++, hdp++) {
+		br = au_sbr(sb, bindex);
+		path.mnt = au_br_mnt(br);
+		path.dentry = hdp->hd_dentry;
+		err = au_seq_path(seq, &path);
+		if (!err) {
+			au_optstr_br_perm(&perm, br->br_perm);
+			seq_printf(seq, "=%s", perm.a);
+			if (bindex != bbot)
+				seq_putc(seq, ':');
+		}
+	}
+	if (unlikely(err || seq_has_overflowed(seq)))
+		err = -E2BIG;
+
+	return err;
+}
+
+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
+		       const char *append)
+{
+	char *p;
+
+	p = fmt;
+	while (*pat != ':')
+		*p++ = *pat++;
+	*p++ = *pat++;
+	strcpy(p, append);
+	AuDebugOn(strlen(fmt) >= len);
+}
+
+static void au_show_wbr_create(struct seq_file *m, int v,
+			       struct au_sbinfo *sbinfo)
+{
+	const char *pat;
+	char fmt[32];
+	struct au_wbr_mfs *mfs;
+
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+
+	seq_puts(m, ",create=");
+	pat = au_optstr_wbr_create(v);
+	mfs = &sbinfo->si_wbr_mfs;
+	switch (v) {
+	case AuWbrCreate_TDP:
+	case AuWbrCreate_RR:
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_PMFS:
+		seq_puts(m, pat);
+		break;
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_PMFSRR:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
+		seq_printf(m, fmt, mfs->mfsrr_watermark);
+		break;
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFSV:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
+		seq_printf(m, fmt,
+			   jiffies_to_msecs(mfs->mfs_expire)
+			   / MSEC_PER_SEC);
+		break;
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRRV:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
+		seq_printf(m, fmt, mfs->mfsrr_watermark,
+			   jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
+{
+#ifdef CONFIG_SYSFS
+	return 0;
+#else
+	int err;
+	const int len = sizeof(AUFS_XINO_FNAME) - 1;
+	aufs_bindex_t bindex, brid;
+	struct qstr *name;
+	struct file *f;
+	struct dentry *d, *h_root;
+
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+
+	err = 0;
+	f = au_sbi(sb)->si_xib;
+	if (!f)
+		goto out;
+
+	/* stop printing the default xino path on the first writable branch */
+	h_root = NULL;
+	brid = au_xino_brid(sb);
+	if (brid >= 0) {
+		bindex = au_br_index(sb, brid);
+		h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
+	}
+	d = f->f_path.dentry;
+	name = &d->d_name;
+	/* safe ->d_parent because the file is unlinked */
+	if (d->d_parent == h_root
+	    && name->len == len
+	    && !memcmp(name->name, AUFS_XINO_FNAME, len))
+		goto out;
+
+	seq_puts(seq, ",xino=");
+	err = au_xino_path(seq, f);
+
+out:
+	return err;
+#endif
+}
+
+/* seq_file will re-call me in case of too long string */
+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
+{
+	int err;
+	unsigned int mnt_flags, v;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+#define AuBool(name, str) do { \
+	v = au_opt_test(mnt_flags, name); \
+	if (v != au_opt_test(AuOpt_Def, name)) \
+		seq_printf(m, ",%s" #str, v ? "" : "no"); \
+} while (0)
+
+#define AuStr(name, str) do { \
+	v = mnt_flags & AuOptMask_##name; \
+	if (v != (AuOpt_Def & AuOptMask_##name)) \
+		seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
+} while (0)
+
+#define AuUInt(name, str, val) do { \
+	if (val != AUFS_##name##_DEF) \
+		seq_printf(m, "," #str "=%u", val); \
+} while (0)
+
+	sb = dentry->d_sb;
+	if (sb->s_flags & SB_POSIXACL)
+		seq_puts(m, ",acl");
+#if 0
+	if (sb->s_flags & SB_I_VERSION)
+		seq_puts(m, ",i_version");
+#endif
+
+	/* lock free root dinfo */
+	si_noflush_read_lock(sb);
+	sbinfo = au_sbi(sb);
+	seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
+
+	mnt_flags = au_mntflags(sb);
+	if (au_opt_test(mnt_flags, XINO)) {
+		err = au_show_xino(m, sb);
+		if (unlikely(err))
+			goto out;
+	} else
+		seq_puts(m, ",noxino");
+
+	AuBool(TRUNC_XINO, trunc_xino);
+	AuStr(UDBA, udba);
+	AuBool(SHWH, shwh);
+	AuBool(PLINK, plink);
+	AuBool(DIO, dio);
+	AuBool(DIRPERM1, dirperm1);
+
+	v = sbinfo->si_wbr_create;
+	if (v != AuWbrCreate_Def)
+		au_show_wbr_create(m, v, sbinfo);
+
+	v = sbinfo->si_wbr_copyup;
+	if (v != AuWbrCopyup_Def)
+		seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
+
+	v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
+	if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
+		seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
+
+	AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
+
+	v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
+	AuUInt(RDCACHE, rdcache, v);
+
+	AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
+	AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
+
+	au_fhsm_show(m, sbinfo);
+
+	AuBool(DIRREN, dirren);
+	AuBool(SUM, sum);
+	/* AuBool(SUM_W, wsum); */
+	AuBool(WARN_PERM, warn_perm);
+	AuBool(VERBOSE, verbose);
+
+out:
+	/* be sure to print "br:" last */
+	if (!sysaufs_brs) {
+		seq_puts(m, ",br:");
+		au_show_brs(m, sb);
+	}
+	si_read_unlock(sb);
+	return 0;
+
+#undef AuBool
+#undef AuStr
+#undef AuUInt
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* sum mode which returns the summation for statfs(2) */
+
+static u64 au_add_till_max(u64 a, u64 b)
+{
+	u64 old;
+
+	old = a;
+	a += b;
+	if (old <= a)
+		return a;
+	return ULLONG_MAX;
+}
+
+static u64 au_mul_till_max(u64 a, long mul)
+{
+	u64 old;
+
+	old = a;
+	a *= mul;
+	if (old <= a)
+		return a;
+	return ULLONG_MAX;
+}
+
+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	long bsize, factor;
+	u64 blocks, bfree, bavail, files, ffree;
+	aufs_bindex_t bbot, bindex, i;
+	unsigned char shared;
+	struct path h_path;
+	struct super_block *h_sb;
+
+	err = 0;
+	bsize = LONG_MAX;
+	files = 0;
+	ffree = 0;
+	blocks = 0;
+	bfree = 0;
+	bavail = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		h_sb = h_path.mnt->mnt_sb;
+		shared = 0;
+		for (i = 0; !shared && i < bindex; i++)
+			shared = (au_sbr_sb(sb, i) == h_sb);
+		if (shared)
+			continue;
+
+		/* sb->s_root for NFS is unreliable */
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, buf);
+		if (unlikely(err))
+			goto out;
+
+		if (bsize > buf->f_bsize) {
+			/*
+			 * we will reduce bsize, so we have to expand blocks
+			 * etc. to match them again
+			 */
+			factor = (bsize / buf->f_bsize);
+			blocks = au_mul_till_max(blocks, factor);
+			bfree = au_mul_till_max(bfree, factor);
+			bavail = au_mul_till_max(bavail, factor);
+			bsize = buf->f_bsize;
+		}
+
+		factor = (buf->f_bsize / bsize);
+		blocks = au_add_till_max(blocks,
+				au_mul_till_max(buf->f_blocks, factor));
+		bfree = au_add_till_max(bfree,
+				au_mul_till_max(buf->f_bfree, factor));
+		bavail = au_add_till_max(bavail,
+				au_mul_till_max(buf->f_bavail, factor));
+		files = au_add_till_max(files, buf->f_files);
+		ffree = au_add_till_max(ffree, buf->f_ffree);
+	}
+
+	buf->f_bsize = bsize;
+	buf->f_blocks = blocks;
+	buf->f_bfree = bfree;
+	buf->f_bavail = bavail;
+	buf->f_files = files;
+	buf->f_ffree = ffree;
+	buf->f_frsize = 0;
+
+out:
+	return err;
+}
+
+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int err;
+	struct path h_path;
+	struct super_block *sb;
+
+	/* lock free root dinfo */
+	sb = dentry->d_sb;
+	si_noflush_read_lock(sb);
+	if (!au_opt_test(au_mntflags(sb), SUM)) {
+		/* sb->s_root for NFS is unreliable */
+		h_path.mnt = au_sbr_mnt(sb, 0);
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, buf);
+	} else
+		err = au_statfs_sum(sb, buf);
+	si_read_unlock(sb);
+
+	if (!err) {
+		buf->f_type = AUFS_SUPER_MAGIC;
+		buf->f_namelen = AUFS_MAX_NAMELEN;
+		memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
+	}
+	/* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_sync_fs(struct super_block *sb, int wait)
+{
+	int err, e;
+	aufs_bindex_t bbot, bindex;
+	struct au_branch *br;
+	struct super_block *h_sb;
+
+	err = 0;
+	si_noflush_read_lock(sb);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!au_br_writable(br->br_perm))
+			continue;
+
+		h_sb = au_sbr_sb(sb, bindex);
+		e = vfsub_sync_filesystem(h_sb, wait);
+		if (unlikely(e && !err))
+			err = e;
+		/* go on even if an error happens */
+	}
+	si_read_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* final actions when unmounting a file system */
+static void aufs_put_super(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = au_sbi(sb);
+	if (!sbinfo)
+		return;
+
+	dbgaufs_si_fin(sbinfo);
+	kobject_put(&sbinfo->si_kobj);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
+		     struct super_block *sb, void *arg)
+{
+	void *array;
+	unsigned long long n, sz;
+
+	array = NULL;
+	n = 0;
+	if (!*hint)
+		goto out;
+
+	if (*hint > ULLONG_MAX / sizeof(array)) {
+		array = ERR_PTR(-EMFILE);
+		pr_err("hint %llu\n", *hint);
+		goto out;
+	}
+
+	sz = sizeof(array) * *hint;
+	array = kzalloc(sz, GFP_NOFS);
+	if (unlikely(!array))
+		array = vzalloc(sz);
+	if (unlikely(!array)) {
+		array = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	n = cb(sb, array, *hint, arg);
+	AuDebugOn(n > *hint);
+
+out:
+	*hint = n;
+	return array;
+}
+
+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
+				       unsigned long long max __maybe_unused,
+				       void *arg)
+{
+	unsigned long long n;
+	struct inode **p, *inode;
+	struct list_head *head;
+
+	n = 0;
+	p = a;
+	head = arg;
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, head, i_sb_list) {
+		if (!au_is_bad_inode(inode)
+		    && au_ii(inode)->ii_btop >= 0) {
+			spin_lock(&inode->i_lock);
+			if (atomic_read(&inode->i_count)) {
+				au_igrab(inode);
+				*p++ = inode;
+				n++;
+				AuDebugOn(n > max);
+			}
+			spin_unlock(&inode->i_lock);
+		}
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+
+	return n;
+}
+
+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
+{
+	*max = au_ninodes(sb);
+	return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
+}
+
+void au_iarray_free(struct inode **a, unsigned long long max)
+{
+	unsigned long long ull;
+
+	for (ull = 0; ull < max; ull++)
+		iput(a[ull]);
+	kvfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * refresh dentry and inode at remount time.
+ */
+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
+		      struct dentry *parent)
+{
+	int err;
+
+	di_write_lock_child(dentry);
+	di_read_lock_parent(parent, AuLock_IR);
+	err = au_refresh_dentry(dentry, parent);
+	if (!err && dir_flags)
+		au_hn_reset(d_inode(dentry), dir_flags);
+	di_read_unlock(parent, AuLock_IR);
+	di_write_unlock(dentry);
+
+	return err;
+}
+
+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
+			   struct au_sbinfo *sbinfo,
+			   const unsigned int dir_flags, unsigned int do_idop)
+{
+	int err;
+	struct dentry *parent;
+
+	err = 0;
+	parent = dget_parent(dentry);
+	if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
+		if (d_really_is_positive(dentry)) {
+			if (!d_is_dir(dentry))
+				err = au_do_refresh(dentry, /*dir_flags*/0,
+						 parent);
+			else {
+				err = au_do_refresh(dentry, dir_flags, parent);
+				if (unlikely(err))
+					au_fset_si(sbinfo, FAILED_REFRESH_DIR);
+			}
+		} else
+			err = au_do_refresh(dentry, /*dir_flags*/0, parent);
+		AuDbgDentry(dentry);
+	}
+	dput(parent);
+
+	if (!err) {
+		if (do_idop)
+			au_refresh_dop(dentry, /*force_reval*/0);
+	} else
+		au_refresh_dop(dentry, /*force_reval*/1);
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
+{
+	int err, i, j, ndentry, e;
+	unsigned int sigen;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries, *d;
+	struct au_sbinfo *sbinfo;
+	struct dentry *root = sb->s_root;
+	const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
+
+	if (do_idop)
+		au_refresh_dop(root, /*force_reval*/0);
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, root, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	sigen = au_sigen(sb);
+	sbinfo = au_sbi(sb);
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			d = dentries[j];
+			e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
+					    do_idop);
+			if (unlikely(e && !err))
+				err = e;
+			/* go on even err */
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
+{
+	int err, e;
+	unsigned int sigen;
+	unsigned long long max, ull;
+	struct inode *inode, **array;
+
+	array = au_iarray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	sigen = au_sigen(sb);
+	for (ull = 0; ull < max; ull++) {
+		inode = array[ull];
+		if (unlikely(!inode))
+			break;
+
+		e = 0;
+		ii_write_lock_child(inode);
+		if (au_iigen(inode, NULL) != sigen) {
+			e = au_refresh_hinode_self(inode);
+			if (unlikely(e)) {
+				au_refresh_iop(inode, /*force_getattr*/1);
+				pr_err("error %d, i%lu\n", e, inode->i_ino);
+				if (!err)
+					err = e;
+				/* go on even if err */
+			}
+		}
+		if (!e && do_idop)
+			au_refresh_iop(inode, /*force_getattr*/0);
+		ii_write_unlock(inode);
+	}
+
+	au_iarray_free(array, max);
+
+out:
+	return err;
+}
+
+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
+{
+	int err, e;
+	unsigned int udba;
+	aufs_bindex_t bindex, bbot;
+	struct dentry *root;
+	struct inode *inode;
+	struct au_branch *br;
+	struct au_sbinfo *sbi;
+
+	au_sigen_inc(sb);
+	sbi = au_sbi(sb);
+	au_fclr_si(sbi, FAILED_REFRESH_DIR);
+
+	root = sb->s_root;
+	DiMustNoWaiters(root);
+	inode = d_inode(root);
+	IiMustNoWaiters(inode);
+
+	udba = au_opt_udba(sb);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_hnotify_reset_br(udba, br, br->br_perm);
+		if (unlikely(err))
+			AuIOErr("hnotify failed on br %d, %d, ignored\n",
+				bindex, err);
+		/* go on even if err */
+	}
+	au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
+
+	if (do_idop) {
+		if (au_ftest_si(sbi, NO_DREVAL)) {
+			AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
+			sb->s_d_op = &aufs_dop_noreval;
+			AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
+			sbi->si_iop_array = aufs_iop_nogetattr;
+		} else {
+			AuDebugOn(sb->s_d_op == &aufs_dop);
+			sb->s_d_op = &aufs_dop;
+			AuDebugOn(sbi->si_iop_array == aufs_iop);
+			sbi->si_iop_array = aufs_iop;
+		}
+		pr_info("reset to %pf and %pf\n",
+			sb->s_d_op, sbi->si_iop_array);
+	}
+
+	di_write_unlock(root);
+	err = au_refresh_d(sb, do_idop);
+	e = au_refresh_i(sb, do_idop);
+	if (unlikely(e && !err))
+		err = e;
+	/* aufs_write_lock() calls ..._child() */
+	di_write_lock_child(root);
+
+	au_cpup_attr_all(inode, /*force*/1);
+
+	if (unlikely(err))
+		AuIOErr("refresh failed, ignored, %d\n", err);
+}
+
+/* stop extra interpretation of errno in mount(8), and strange error messages */
+static int cvt_err(int err)
+{
+	AuTraceErr(err);
+
+	switch (err) {
+	case -ENOENT:
+	case -ENOTDIR:
+	case -EEXIST:
+	case -EIO:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	int err, do_dx;
+	unsigned int mntflags;
+	struct au_opts opts = {
+		.opt = NULL
+	};
+	struct dentry *root;
+	struct inode *inode;
+	struct au_sbinfo *sbinfo;
+
+	err = 0;
+	root = sb->s_root;
+	if (!data || !*data) {
+		err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (!err) {
+			di_write_lock_child(root);
+			err = au_opts_verify(sb, *flags, /*pending*/0);
+			aufs_write_unlock(root);
+		}
+		goto out;
+	}
+
+	err = -ENOMEM;
+	opts.opt = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!opts.opt))
+		goto out;
+	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
+	opts.flags = AuOpts_REMOUNT;
+	opts.sb_flags = *flags;
+
+	/* parse it before aufs lock */
+	err = au_opts_parse(sb, data, &opts);
+	if (unlikely(err))
+		goto out_opts;
+
+	sbinfo = au_sbi(sb);
+	inode = d_inode(root);
+	inode_lock(inode);
+	err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_mtx;
+	di_write_lock_child(root);
+
+	/* au_opts_remount() may return an error */
+	err = au_opts_remount(sb, &opts);
+	au_opts_free(&opts);
+
+	if (au_ftest_opts(opts.flags, REFRESH))
+		au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
+
+	if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
+		mntflags = au_mntflags(sb);
+		do_dx = !!au_opt_test(mntflags, DIO);
+		au_dy_arefresh(do_dx);
+	}
+
+	au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
+	aufs_write_unlock(root);
+
+out_mtx:
+	inode_unlock(inode);
+out_opts:
+	free_page((unsigned long)opts.opt);
+out:
+	err = cvt_err(err);
+	AuTraceErr(err);
+	return err;
+}
+
+static const struct super_operations aufs_sop = {
+	.alloc_inode	= aufs_alloc_inode,
+	.destroy_inode	= aufs_destroy_inode,
+	/* always deleting, no clearing */
+	.drop_inode	= generic_delete_inode,
+	.show_options	= aufs_show_options,
+	.statfs		= aufs_statfs,
+	.put_super	= aufs_put_super,
+	.sync_fs	= aufs_sync_fs,
+	.remount_fs	= aufs_remount_fs
+};
+
+/* ---------------------------------------------------------------------- */
+
+static int alloc_root(struct super_block *sb)
+{
+	int err;
+	struct inode *inode;
+	struct dentry *root;
+
+	err = -ENOMEM;
+	inode = au_iget_locked(sb, AUFS_ROOT_INO);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out;
+
+	inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
+	inode->i_fop = &aufs_dir_fop;
+	inode->i_mode = S_IFDIR;
+	set_nlink(inode, 2);
+	unlock_new_inode(inode);
+
+	root = d_make_root(inode);
+	if (unlikely(!root))
+		goto out;
+	err = PTR_ERR(root);
+	if (IS_ERR(root))
+		goto out;
+
+	err = au_di_init(root);
+	if (!err) {
+		sb->s_root = root;
+		return 0; /* success */
+	}
+	dput(root);
+
+out:
+	return err;
+}
+
+static int aufs_fill_super(struct super_block *sb, void *raw_data,
+			   int silent __maybe_unused)
+{
+	int err;
+	struct au_opts opts = {
+		.opt = NULL
+	};
+	struct au_sbinfo *sbinfo;
+	struct dentry *root;
+	struct inode *inode;
+	char *arg = raw_data;
+
+	if (unlikely(!arg || !*arg)) {
+		err = -EINVAL;
+		pr_err("no arg\n");
+		goto out;
+	}
+
+	err = -ENOMEM;
+	opts.opt = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!opts.opt))
+		goto out;
+	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
+	opts.sb_flags = sb->s_flags;
+
+	err = au_si_alloc(sb);
+	if (unlikely(err))
+		goto out_opts;
+	sbinfo = au_sbi(sb);
+
+	/* all timestamps always follow the ones on the branch */
+	sb->s_flags |= SB_NOATIME | SB_NODIRATIME;
+	sb->s_flags |= SB_I_VERSION; /* do we really need this? */
+	sb->s_op = &aufs_sop;
+	sb->s_d_op = &aufs_dop;
+	sb->s_magic = AUFS_SUPER_MAGIC;
+	sb->s_maxbytes = 0;
+	sb->s_stack_depth = 1;
+	au_export_init(sb);
+	au_xattr_init(sb);
+
+	err = alloc_root(sb);
+	if (unlikely(err)) {
+		si_write_unlock(sb);
+		goto out_info;
+	}
+	root = sb->s_root;
+	inode = d_inode(root);
+
+	/*
+	 * actually we can parse options regardless aufs lock here.
+	 * but at remount time, parsing must be done before aufs lock.
+	 * so we follow the same rule.
+	 */
+	ii_write_lock_parent(inode);
+	aufs_write_unlock(root);
+	err = au_opts_parse(sb, arg, &opts);
+	if (unlikely(err))
+		goto out_root;
+
+	/* lock vfs_inode first, then aufs. */
+	inode_lock(inode);
+	aufs_write_lock(root);
+	err = au_opts_mount(sb, &opts);
+	au_opts_free(&opts);
+	if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
+		sb->s_d_op = &aufs_dop_noreval;
+		pr_info("%pf\n", sb->s_d_op);
+		au_refresh_dop(root, /*force_reval*/0);
+		sbinfo->si_iop_array = aufs_iop_nogetattr;
+		au_refresh_iop(inode, /*force_getattr*/0);
+	}
+	aufs_write_unlock(root);
+	inode_unlock(inode);
+	if (!err)
+		goto out_opts; /* success */
+
+out_root:
+	dput(root);
+	sb->s_root = NULL;
+out_info:
+	dbgaufs_si_fin(sbinfo);
+	kobject_put(&sbinfo->si_kobj);
+	sb->s_fs_info = NULL;
+out_opts:
+	free_page((unsigned long)opts.opt);
+out:
+	AuTraceErr(err);
+	err = cvt_err(err);
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
+				 const char *dev_name __maybe_unused,
+				 void *raw_data)
+{
+	struct dentry *root;
+	struct super_block *sb;
+
+	/* all timestamps always follow the ones on the branch */
+	/* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
+	root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
+	if (IS_ERR(root))
+		goto out;
+
+	sb = root->d_sb;
+	si_write_lock(sb, !AuLock_FLUSH);
+	sysaufs_brs_add(sb, 0);
+	si_write_unlock(sb);
+	au_sbilist_add(sb);
+
+out:
+	return root;
+}
+
+static void aufs_kill_sb(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = au_sbi(sb);
+	if (sbinfo) {
+		au_sbilist_del(sb);
+		aufs_write_lock(sb->s_root);
+		au_fhsm_fin(sb);
+		if (sbinfo->si_wbr_create_ops->fin)
+			sbinfo->si_wbr_create_ops->fin(sb);
+		if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
+			au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
+			au_remount_refresh(sb, /*do_idop*/0);
+		}
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_put(sb, /*verbose*/1);
+		au_xino_clr(sb);
+		au_dr_opt_flush(sb);
+		sbinfo->si_sb = NULL;
+		aufs_write_unlock(sb->s_root);
+		au_nwt_flush(&sbinfo->si_nowait);
+	}
+	kill_anon_super(sb);
+}
+
+struct file_system_type aufs_fs_type = {
+	.name		= AUFS_FSTYPE,
+	/* a race between rename and others */
+	.fs_flags	= FS_RENAME_DOES_D_MOVE,
+	.mount		= aufs_mount,
+	.kill_sb	= aufs_kill_sb,
+	/* no need to __module_get() and module_put(). */
+	.owner		= THIS_MODULE,
+};
diff --git a/fs/aufs/super.h b/fs/aufs/super.h
new file mode 100644
index 000000000..e6da60317
--- /dev/null
+++ b/fs/aufs/super.h
@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * super_block operations
+ */
+
+#ifndef __AUFS_SUPER_H__
+#define __AUFS_SUPER_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include "hbl.h"
+#include "rwsem.h"
+#include "wkq.h"
+
+/* policies to select one among multiple writable branches */
+struct au_wbr_copyup_operations {
+	int (*copyup)(struct dentry *dentry);
+};
+
+#define AuWbr_DIR	1		/* target is a dir */
+#define AuWbr_PARENT	(1 << 1)	/* always require a parent */
+
+#define au_ftest_wbr(flags, name)	((flags) & AuWbr_##name)
+#define au_fset_wbr(flags, name)	{ (flags) |= AuWbr_##name; }
+#define au_fclr_wbr(flags, name)	{ (flags) &= ~AuWbr_##name; }
+
+struct au_wbr_create_operations {
+	int (*create)(struct dentry *dentry, unsigned int flags);
+	int (*init)(struct super_block *sb);
+	int (*fin)(struct super_block *sb);
+};
+
+struct au_wbr_mfs {
+	struct mutex	mfs_lock; /* protect this structure */
+	unsigned long	mfs_jiffy;
+	unsigned long	mfs_expire;
+	aufs_bindex_t	mfs_bindex;
+
+	unsigned long long	mfsrr_bytes;
+	unsigned long long	mfsrr_watermark;
+};
+
+#define AuPlink_NHASH 100
+static inline int au_plink_hash(ino_t ino)
+{
+	return ino % AuPlink_NHASH;
+}
+
+/* File-based Hierarchical Storage Management */
+struct au_fhsm {
+#ifdef CONFIG_AUFS_FHSM
+	/* allow only one process who can receive the notification */
+	spinlock_t		fhsm_spin;
+	pid_t			fhsm_pid;
+	wait_queue_head_t	fhsm_wqh;
+	atomic_t		fhsm_readable;
+
+	/* these are protected by si_rwsem */
+	unsigned long		fhsm_expire;
+	aufs_bindex_t		fhsm_bottom;
+#endif
+};
+
+struct au_branch;
+struct au_sbinfo {
+	/* nowait tasks in the system-wide workqueue */
+	struct au_nowait_tasks	si_nowait;
+
+	/*
+	 * tried sb->s_umount, but failed due to the dependecy between i_mutex.
+	 * rwsem for au_sbinfo is necessary.
+	 */
+	struct au_rwsem		si_rwsem;
+
+	/*
+	 * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
+	 * remount.
+	 */
+	struct percpu_counter	si_ninodes, si_nfiles;
+
+	/* branch management */
+	unsigned int		si_generation;
+
+	/* see AuSi_ flags */
+	unsigned char		au_si_status;
+
+	aufs_bindex_t		si_bbot;
+
+	/* dirty trick to keep br_id plus */
+	unsigned int		si_last_br_id :
+				sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
+	struct au_branch	**si_branch;
+
+	/* policy to select a writable branch */
+	unsigned char		si_wbr_copyup;
+	unsigned char		si_wbr_create;
+	struct au_wbr_copyup_operations *si_wbr_copyup_ops;
+	struct au_wbr_create_operations *si_wbr_create_ops;
+
+	/* round robin */
+	atomic_t		si_wbr_rr_next;
+
+	/* most free space */
+	struct au_wbr_mfs	si_wbr_mfs;
+
+	/* File-based Hierarchical Storage Management */
+	struct au_fhsm		si_fhsm;
+
+	/* mount flags */
+	/* include/asm-ia64/siginfo.h defines a macro named si_flags */
+	unsigned int		si_mntflags;
+
+	/* external inode number (bitmap and translation table) */
+	vfs_readf_t		si_xread;
+	vfs_writef_t		si_xwrite;
+	struct file		*si_xib;
+	struct mutex		si_xib_mtx; /* protect xib members */
+	unsigned long		*si_xib_buf;
+	unsigned long		si_xib_last_pindex;
+	int			si_xib_next_bit;
+	aufs_bindex_t		si_xino_brid;
+	unsigned long		si_xino_jiffy;
+	unsigned long		si_xino_expire;
+	/* reserved for future use */
+	/* unsigned long long	si_xib_limit; */	/* Max xib file size */
+
+#ifdef CONFIG_AUFS_EXPORT
+	/* i_generation */
+	struct file		*si_xigen;
+	atomic_t		si_xigen_next;
+#endif
+
+	/* dirty trick to suppoer atomic_open */
+	struct hlist_bl_head	si_aopen;
+
+	/* vdir parameters */
+	unsigned long		si_rdcache;	/* max cache time in jiffies */
+	unsigned int		si_rdblk;	/* deblk size */
+	unsigned int		si_rdhash;	/* hash size */
+
+	/*
+	 * If the number of whiteouts are larger than si_dirwh, leave all of
+	 * them after au_whtmp_ren to reduce the cost of rmdir(2).
+	 * future fsck.aufs or kernel thread will remove them later.
+	 * Otherwise, remove all whiteouts and the dir in rmdir(2).
+	 */
+	unsigned int		si_dirwh;
+
+	/* pseudo_link list */
+	struct hlist_bl_head	si_plink[AuPlink_NHASH];
+	wait_queue_head_t	si_plink_wq;
+	spinlock_t		si_plink_maint_lock;
+	pid_t			si_plink_maint_pid;
+
+	/* file list */
+	struct hlist_bl_head	si_files;
+
+	/* with/without getattr, brother of sb->s_d_op */
+	struct inode_operations *si_iop_array;
+
+	/*
+	 * sysfs and lifetime management.
+	 * this is not a small structure and it may be a waste of memory in case
+	 * of sysfs is disabled, particulary when many aufs-es are mounted.
+	 * but using sysfs is majority.
+	 */
+	struct kobject		si_kobj;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		 *si_dbgaufs;
+	struct dentry		 *si_dbgaufs_plink;
+	struct dentry		 *si_dbgaufs_xib;
+#ifdef CONFIG_AUFS_EXPORT
+	struct dentry		 *si_dbgaufs_xigen;
+#endif
+#endif
+
+#ifdef CONFIG_AUFS_SBILIST
+	struct hlist_bl_node	si_list;
+#endif
+
+	/* dirty, necessary for unmounting, sysfs and sysrq */
+	struct super_block	*si_sb;
+};
+
+/* sbinfo status flags */
+/*
+ * set true when refresh_dirs() failed at remount time.
+ * then try refreshing dirs at access time again.
+ * if it is false, refreshing dirs at access time is unnecesary
+ */
+#define AuSi_FAILED_REFRESH_DIR	1
+#define AuSi_FHSM		(1 << 1)	/* fhsm is active now */
+#define AuSi_NO_DREVAL		(1 << 2)	/* disable all d_revalidate */
+
+#ifndef CONFIG_AUFS_FHSM
+#undef AuSi_FHSM
+#define AuSi_FHSM		0
+#endif
+
+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
+					   unsigned int flag)
+{
+	AuRwMustAnyLock(&sbi->si_rwsem);
+	return sbi->au_si_status & flag;
+}
+#define au_ftest_si(sbinfo, name)	au_do_ftest_si(sbinfo, AuSi_##name)
+#define au_fset_si(sbinfo, name) do { \
+	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
+	(sbinfo)->au_si_status |= AuSi_##name; \
+} while (0)
+#define au_fclr_si(sbinfo, name) do { \
+	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
+	(sbinfo)->au_si_status &= ~AuSi_##name; \
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+
+/* policy to select one among writable branches */
+#define AuWbrCopyup(sbinfo, ...) \
+	((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
+#define AuWbrCreate(sbinfo, ...) \
+	((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
+
+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
+#define AuLock_DW		1		/* write-lock dentry */
+#define AuLock_IR		(1 << 1)	/* read-lock inode */
+#define AuLock_IW		(1 << 2)	/* write-lock inode */
+#define AuLock_FLUSH		(1 << 3)	/* wait for 'nowait' tasks */
+#define AuLock_DIRS		(1 << 4)	/* target is a pair of dirs */
+						/* except RENAME_EXCHANGE */
+#define AuLock_NOPLM		(1 << 5)	/* return err in plm mode */
+#define AuLock_NOPLMW		(1 << 6)	/* wait for plm mode ends */
+#define AuLock_GEN		(1 << 7)	/* test digen/iigen */
+#define au_ftest_lock(flags, name)	((flags) & AuLock_##name)
+#define au_fset_lock(flags, name) \
+	do { (flags) |= AuLock_##name; } while (0)
+#define au_fclr_lock(flags, name) \
+	do { (flags) &= ~AuLock_##name; } while (0)
+
+/* ---------------------------------------------------------------------- */
+
+/* super.c */
+extern struct file_system_type aufs_fs_type;
+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
+					   unsigned long long max, void *arg);
+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
+		     struct super_block *sb, void *arg);
+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
+void au_iarray_free(struct inode **a, unsigned long long max);
+
+/* sbinfo.c */
+void au_si_free(struct kobject *kobj);
+int au_si_alloc(struct super_block *sb);
+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
+
+unsigned int au_sigen_inc(struct super_block *sb);
+aufs_bindex_t au_new_br_id(struct super_block *sb);
+
+int si_read_lock(struct super_block *sb, int flags);
+int si_write_lock(struct super_block *sb, int flags);
+int aufs_read_lock(struct dentry *dentry, int flags);
+void aufs_read_unlock(struct dentry *dentry, int flags);
+void aufs_write_lock(struct dentry *dentry);
+void aufs_write_unlock(struct dentry *dentry);
+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
+
+/* wbr_policy.c */
+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
+extern struct au_wbr_create_operations au_wbr_create_ops[];
+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
+
+/* mvdown.c */
+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
+
+#ifdef CONFIG_AUFS_FHSM
+/* fhsm.c */
+
+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
+{
+	pid_t pid;
+
+	spin_lock(&fhsm->fhsm_spin);
+	pid = fhsm->fhsm_pid;
+	spin_unlock(&fhsm->fhsm_spin);
+
+	return pid;
+}
+
+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
+void au_fhsm_wrote_all(struct super_block *sb, int force);
+int au_fhsm_fd(struct super_block *sb, int oflags);
+int au_fhsm_br_alloc(struct au_branch *br);
+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
+void au_fhsm_fin(struct super_block *sb);
+void au_fhsm_init(struct au_sbinfo *sbinfo);
+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
+#else
+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
+	   int force)
+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(au_fhsm_fin, struct super_block *sb)
+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_EXPORT
+int au_test_nfsd(void);
+void au_export_init(struct super_block *sb);
+void au_xigen_inc(struct inode *inode);
+int au_xigen_new(struct inode *inode);
+int au_xigen_set(struct super_block *sb, struct file *base);
+void au_xigen_clr(struct super_block *sb);
+
+static inline int au_busy_or_stale(void)
+{
+	if (!au_test_nfsd())
+		return -EBUSY;
+	return -ESTALE;
+}
+#else
+AuStubInt0(au_test_nfsd, void)
+AuStubVoid(au_export_init, struct super_block *sb)
+AuStubVoid(au_xigen_inc, struct inode *inode)
+AuStubInt0(au_xigen_new, struct inode *inode)
+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
+AuStubVoid(au_xigen_clr, struct super_block *sb)
+AuStub(int, au_busy_or_stale, return -EBUSY, void)
+#endif /* CONFIG_AUFS_EXPORT */
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_SBILIST
+/* module.c */
+extern struct hlist_bl_head au_sbilist;
+
+static inline void au_sbilist_init(void)
+{
+	INIT_HLIST_BL_HEAD(&au_sbilist);
+}
+
+static inline void au_sbilist_add(struct super_block *sb)
+{
+	au_hbl_add(&au_sbi(sb)->si_list, &au_sbilist);
+}
+
+static inline void au_sbilist_del(struct super_block *sb)
+{
+	au_hbl_del(&au_sbi(sb)->si_list, &au_sbilist);
+}
+
+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
+static inline void au_sbilist_lock(void)
+{
+	hlist_bl_lock(&au_sbilist);
+}
+
+static inline void au_sbilist_unlock(void)
+{
+	hlist_bl_unlock(&au_sbilist);
+}
+#define AuGFP_SBILIST	GFP_ATOMIC
+#else
+AuStubVoid(au_sbilist_lock, void)
+AuStubVoid(au_sbilist_unlock, void)
+#define AuGFP_SBILIST	GFP_NOFS
+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
+#else
+AuStubVoid(au_sbilist_init, void)
+AuStubVoid(au_sbilist_add, struct super_block *sb)
+AuStubVoid(au_sbilist_del, struct super_block *sb)
+AuStubVoid(au_sbilist_lock, void)
+AuStubVoid(au_sbilist_unlock, void)
+#define AuGFP_SBILIST	GFP_NOFS
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
+{
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+#ifdef CONFIG_DEBUG_FS
+	sbinfo->si_dbgaufs = NULL;
+	sbinfo->si_dbgaufs_plink = NULL;
+	sbinfo->si_dbgaufs_xib = NULL;
+#ifdef CONFIG_AUFS_EXPORT
+	sbinfo->si_dbgaufs_xigen = NULL;
+#endif
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* current->atomic_flags */
+/* this value should never corrupt the ones defined in linux/sched.h */
+#define PFA_AUFS	7
+
+TASK_PFA_TEST(AUFS, test_aufs)	/* task_test_aufs */
+TASK_PFA_SET(AUFS, aufs)	/* task_set_aufs */
+TASK_PFA_CLEAR(AUFS, aufs)	/* task_clear_aufs */
+
+static inline int si_pid_test(struct super_block *sb)
+{
+	return !!task_test_aufs(current);
+}
+
+static inline void si_pid_clr(struct super_block *sb)
+{
+	AuDebugOn(!task_test_aufs(current));
+	task_clear_aufs(current);
+}
+
+static inline void si_pid_set(struct super_block *sb)
+{
+	AuDebugOn(task_test_aufs(current));
+	task_set_aufs(current);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* lock superblock. mainly for entry point functions */
+#define __si_read_lock(sb)	au_rw_read_lock(&au_sbi(sb)->si_rwsem)
+#define __si_write_lock(sb)	au_rw_write_lock(&au_sbi(sb)->si_rwsem)
+#define __si_read_trylock(sb)	au_rw_read_trylock(&au_sbi(sb)->si_rwsem)
+#define __si_write_trylock(sb)	au_rw_write_trylock(&au_sbi(sb)->si_rwsem)
+/*
+#define __si_read_trylock_nested(sb) \
+	au_rw_read_trylock_nested(&au_sbi(sb)->si_rwsem)
+#define __si_write_trylock_nested(sb) \
+	au_rw_write_trylock_nested(&au_sbi(sb)->si_rwsem)
+*/
+
+#define __si_read_unlock(sb)	au_rw_read_unlock(&au_sbi(sb)->si_rwsem)
+#define __si_write_unlock(sb)	au_rw_write_unlock(&au_sbi(sb)->si_rwsem)
+#define __si_downgrade_lock(sb)	au_rw_dgrade_lock(&au_sbi(sb)->si_rwsem)
+
+#define SiMustNoWaiters(sb)	AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
+#define SiMustAnyLock(sb)	AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
+#define SiMustWriteLock(sb)	AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
+
+static inline void si_noflush_read_lock(struct super_block *sb)
+{
+	__si_read_lock(sb);
+	si_pid_set(sb);
+}
+
+static inline int si_noflush_read_trylock(struct super_block *sb)
+{
+	int locked;
+
+	locked = __si_read_trylock(sb);
+	if (locked)
+		si_pid_set(sb);
+	return locked;
+}
+
+static inline void si_noflush_write_lock(struct super_block *sb)
+{
+	__si_write_lock(sb);
+	si_pid_set(sb);
+}
+
+static inline int si_noflush_write_trylock(struct super_block *sb)
+{
+	int locked;
+
+	locked = __si_write_trylock(sb);
+	if (locked)
+		si_pid_set(sb);
+	return locked;
+}
+
+#if 0 /* reserved */
+static inline int si_read_trylock(struct super_block *sb, int flags)
+{
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+	return si_noflush_read_trylock(sb);
+}
+#endif
+
+static inline void si_read_unlock(struct super_block *sb)
+{
+	si_pid_clr(sb);
+	__si_read_unlock(sb);
+}
+
+#if 0 /* reserved */
+static inline int si_write_trylock(struct super_block *sb, int flags)
+{
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+	return si_noflush_write_trylock(sb);
+}
+#endif
+
+static inline void si_write_unlock(struct super_block *sb)
+{
+	si_pid_clr(sb);
+	__si_write_unlock(sb);
+}
+
+#if 0 /* reserved */
+static inline void si_downgrade_lock(struct super_block *sb)
+{
+	__si_downgrade_lock(sb);
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_bbot;
+}
+
+static inline unsigned int au_mntflags(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_mntflags;
+}
+
+static inline unsigned int au_sigen(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_generation;
+}
+
+static inline unsigned long long au_ninodes(struct super_block *sb)
+{
+	s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
+
+	BUG_ON(n < 0);
+	return n;
+}
+
+static inline void au_ninodes_inc(struct super_block *sb)
+{
+	percpu_counter_inc(&au_sbi(sb)->si_ninodes);
+}
+
+static inline void au_ninodes_dec(struct super_block *sb)
+{
+	percpu_counter_dec(&au_sbi(sb)->si_ninodes);
+}
+
+static inline unsigned long long au_nfiles(struct super_block *sb)
+{
+	s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
+
+	BUG_ON(n < 0);
+	return n;
+}
+
+static inline void au_nfiles_inc(struct super_block *sb)
+{
+	percpu_counter_inc(&au_sbi(sb)->si_nfiles);
+}
+
+static inline void au_nfiles_dec(struct super_block *sb)
+{
+	percpu_counter_dec(&au_sbi(sb)->si_nfiles);
+}
+
+static inline struct au_branch *au_sbr(struct super_block *sb,
+				       aufs_bindex_t bindex)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_branch[0 + bindex];
+}
+
+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
+{
+	SiMustWriteLock(sb);
+	au_sbi(sb)->si_xino_brid = brid;
+}
+
+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_xino_brid;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_SUPER_H__ */
diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c
new file mode 100644
index 000000000..d87a20d4d
--- /dev/null
+++ b/fs/aufs/sysaufs.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface and lifetime management
+ * they are necessary regardless sysfs is disabled.
+ */
+
+#include <linux/random.h>
+#include "aufs.h"
+
+unsigned long sysaufs_si_mask;
+struct kset *sysaufs_kset;
+
+#define AuSiAttr(_name) { \
+	.attr   = { .name = __stringify(_name), .mode = 0444 },	\
+	.show   = sysaufs_si_##_name,				\
+}
+
+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
+struct attribute *sysaufs_si_attrs[] = {
+	&sysaufs_si_attr_xi_path.attr,
+	NULL,
+};
+
+static const struct sysfs_ops au_sbi_ops = {
+	.show   = sysaufs_si_show
+};
+
+static struct kobj_type au_sbi_ktype = {
+	.release	= au_si_free,
+	.sysfs_ops	= &au_sbi_ops,
+	.default_attrs	= sysaufs_si_attrs
+};
+
+/* ---------------------------------------------------------------------- */
+
+int sysaufs_si_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+
+	sbinfo->si_kobj.kset = sysaufs_kset;
+	/* cf. sysaufs_name() */
+	err = kobject_init_and_add
+		(&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
+		 SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
+
+	dbgaufs_si_null(sbinfo);
+	if (!err) {
+		err = dbgaufs_si_init(sbinfo);
+		if (unlikely(err))
+			kobject_put(&sbinfo->si_kobj);
+	}
+	return err;
+}
+
+void sysaufs_fin(void)
+{
+	dbgaufs_fin();
+	sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
+	kset_unregister(sysaufs_kset);
+}
+
+int __init sysaufs_init(void)
+{
+	int err;
+
+	do {
+		get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
+	} while (!sysaufs_si_mask);
+
+	err = -EINVAL;
+	sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
+	if (unlikely(!sysaufs_kset))
+		goto out;
+	err = PTR_ERR(sysaufs_kset);
+	if (IS_ERR(sysaufs_kset))
+		goto out;
+	err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
+	if (unlikely(err)) {
+		kset_unregister(sysaufs_kset);
+		goto out;
+	}
+
+	err = dbgaufs_init();
+	if (unlikely(err))
+		sysaufs_fin();
+out:
+	return err;
+}
diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h
new file mode 100644
index 000000000..50aca1dc8
--- /dev/null
+++ b/fs/aufs/sysaufs.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface and mount lifetime management
+ */
+
+#ifndef __SYSAUFS_H__
+#define __SYSAUFS_H__
+
+#ifdef __KERNEL__
+
+#include <linux/sysfs.h>
+#include "module.h"
+
+struct super_block;
+struct au_sbinfo;
+
+struct sysaufs_si_attr {
+	struct attribute attr;
+	int (*show)(struct seq_file *seq, struct super_block *sb);
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* sysaufs.c */
+extern unsigned long sysaufs_si_mask;
+extern struct kset *sysaufs_kset;
+extern struct attribute *sysaufs_si_attrs[];
+int sysaufs_si_init(struct au_sbinfo *sbinfo);
+int __init sysaufs_init(void);
+void sysaufs_fin(void);
+
+/* ---------------------------------------------------------------------- */
+
+/* some people doesn't like to show a pointer in kernel */
+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
+{
+	return sysaufs_si_mask ^ (unsigned long)sbinfo;
+}
+
+#define SysaufsSiNamePrefix	"si_"
+#define SysaufsSiNameLen	(sizeof(SysaufsSiNamePrefix) + 16)
+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
+{
+	snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
+		 sysaufs_si_id(sbinfo));
+}
+
+struct au_branch;
+#ifdef CONFIG_SYSFS
+/* sysfs.c */
+extern struct attribute_group *sysaufs_attr_group;
+
+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
+			 char *buf);
+long au_brinfo_ioctl(struct file *file, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
+#endif
+
+void sysaufs_br_init(struct au_branch *br);
+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
+
+#define sysaufs_brs_init()	do {} while (0)
+
+#else
+#define sysaufs_attr_group	NULL
+
+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
+       struct attribute *attr, char *buf)
+AuStubVoid(sysaufs_br_init, struct au_branch *br)
+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
+
+static inline void sysaufs_brs_init(void)
+{
+	sysaufs_brs = 0;
+}
+
+#endif /* CONFIG_SYSFS */
+
+#endif /* __KERNEL__ */
+#endif /* __SYSAUFS_H__ */
diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c
new file mode 100644
index 000000000..58e148602
--- /dev/null
+++ b/fs/aufs/sysfs.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface
+ */
+
+#include <linux/compat.h>
+#include <linux/seq_file.h>
+#include "aufs.h"
+
+#ifdef CONFIG_AUFS_FS_MODULE
+/* this entry violates the "one line per file" policy of sysfs */
+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	ssize_t err;
+	static char *conf =
+/* this file is generated at compiling */
+#include "conf.str"
+		;
+
+	err = snprintf(buf, PAGE_SIZE, conf);
+	if (unlikely(err >= PAGE_SIZE))
+		err = -EFBIG;
+	return err;
+}
+
+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
+#endif
+
+static struct attribute *au_attr[] = {
+#ifdef CONFIG_AUFS_FS_MODULE
+	&au_config_attr.attr,
+#endif
+	NULL,	/* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group sysaufs_attr_group_body = {
+	.attrs = au_attr
+};
+
+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
+
+/* ---------------------------------------------------------------------- */
+
+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
+{
+	int err;
+
+	SiMustAnyLock(sb);
+
+	err = 0;
+	if (au_opt_test(au_mntflags(sb), XINO)) {
+		err = au_xino_path(seq, au_sbi(sb)->si_xib);
+		seq_putc(seq, '\n');
+	}
+	return err;
+}
+
+/*
+ * the lifetime of branch is independent from the entry under sysfs.
+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
+ * unlinked.
+ */
+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
+			 aufs_bindex_t bindex, int idx)
+{
+	int err;
+	struct path path;
+	struct dentry *root;
+	struct au_branch *br;
+	au_br_perm_str_t perm;
+
+	AuDbg("b%d\n", bindex);
+
+	err = 0;
+	root = sb->s_root;
+	di_read_lock_parent(root, !AuLock_IR);
+	br = au_sbr(sb, bindex);
+
+	switch (idx) {
+	case AuBrSysfs_BR:
+		path.mnt = au_br_mnt(br);
+		path.dentry = au_h_dptr(root, bindex);
+		err = au_seq_path(seq, &path);
+		if (!err) {
+			au_optstr_br_perm(&perm, br->br_perm);
+			seq_printf(seq, "=%s\n", perm.a);
+		}
+		break;
+	case AuBrSysfs_BRID:
+		seq_printf(seq, "%d\n", br->br_id);
+		break;
+	}
+	di_read_unlock(root, !AuLock_IR);
+	if (unlikely(err || seq_has_overflowed(seq)))
+		err = -E2BIG;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct seq_file *au_seq(char *p, ssize_t len)
+{
+	struct seq_file *seq;
+
+	seq = kzalloc(sizeof(*seq), GFP_NOFS);
+	if (seq) {
+		/* mutex_init(&seq.lock); */
+		seq->buf = p;
+		seq->size = len;
+		return seq; /* success */
+	}
+
+	seq = ERR_PTR(-ENOMEM);
+	return seq;
+}
+
+#define SysaufsBr_PREFIX	"br"
+#define SysaufsBrid_PREFIX	"brid"
+
+/* todo: file size may exceed PAGE_SIZE */
+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
+			char *buf)
+{
+	ssize_t err;
+	int idx;
+	long l;
+	aufs_bindex_t bbot;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct seq_file *seq;
+	char *name;
+	struct attribute **cattr;
+
+	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
+	sb = sbinfo->si_sb;
+
+	/*
+	 * prevent a race condition between sysfs and aufs.
+	 * for instance, sysfs_file_read() calls sysfs_get_active_two() which
+	 * prohibits maintaining the sysfs entries.
+	 * hew we acquire read lock after sysfs_get_active_two().
+	 * on the other hand, the remount process may maintain the sysfs/aufs
+	 * entries after acquiring write lock.
+	 * it can cause a deadlock.
+	 * simply we gave up processing read here.
+	 */
+	err = -EBUSY;
+	if (unlikely(!si_noflush_read_trylock(sb)))
+		goto out;
+
+	seq = au_seq(buf, PAGE_SIZE);
+	err = PTR_ERR(seq);
+	if (IS_ERR(seq))
+		goto out_unlock;
+
+	name = (void *)attr->name;
+	cattr = sysaufs_si_attrs;
+	while (*cattr) {
+		if (!strcmp(name, (*cattr)->name)) {
+			err = container_of(*cattr, struct sysaufs_si_attr, attr)
+				->show(seq, sb);
+			goto out_seq;
+		}
+		cattr++;
+	}
+
+	if (!strncmp(name, SysaufsBrid_PREFIX,
+		     sizeof(SysaufsBrid_PREFIX) - 1)) {
+		idx = AuBrSysfs_BRID;
+		name += sizeof(SysaufsBrid_PREFIX) - 1;
+	} else if (!strncmp(name, SysaufsBr_PREFIX,
+			    sizeof(SysaufsBr_PREFIX) - 1)) {
+		idx = AuBrSysfs_BR;
+		name += sizeof(SysaufsBr_PREFIX) - 1;
+	} else
+		  BUG();
+
+	err = kstrtol(name, 10, &l);
+	if (!err) {
+		bbot = au_sbbot(sb);
+		if (l <= bbot)
+			err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
+		else
+			err = -ENOENT;
+	}
+
+out_seq:
+	if (!err) {
+		err = seq->count;
+		/* sysfs limit */
+		if (unlikely(err == PAGE_SIZE))
+			err = -EFBIG;
+	}
+	kfree(seq);
+out_unlock:
+	si_read_unlock(sb);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
+{
+	int err;
+	int16_t brid;
+	aufs_bindex_t bindex, bbot;
+	size_t sz;
+	char *buf;
+	struct seq_file *seq;
+	struct au_branch *br;
+
+	si_read_lock(sb, AuLock_FLUSH);
+	bbot = au_sbbot(sb);
+	err = bbot + 1;
+	if (!arg)
+		goto out;
+
+	err = -ENOMEM;
+	buf = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!buf))
+		goto out;
+
+	seq = au_seq(buf, PAGE_SIZE);
+	err = PTR_ERR(seq);
+	if (IS_ERR(seq))
+		goto out_buf;
+
+	sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
+	for (bindex = 0; bindex <= bbot; bindex++, arg++) {
+		err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
+		if (unlikely(err))
+			break;
+
+		br = au_sbr(sb, bindex);
+		brid = br->br_id;
+		BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
+		err = __put_user(brid, &arg->id);
+		if (unlikely(err))
+			break;
+
+		BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
+		err = __put_user(br->br_perm, &arg->perm);
+		if (unlikely(err))
+			break;
+
+		err = au_seq_path(seq, &br->br_path);
+		if (unlikely(err))
+			break;
+		seq_putc(seq, '\0');
+		if (!seq_has_overflowed(seq)) {
+			err = copy_to_user(arg->path, seq->buf, seq->count);
+			seq->count = 0;
+			if (unlikely(err))
+				break;
+		} else {
+			err = -E2BIG;
+			goto out_seq;
+		}
+	}
+	if (unlikely(err))
+		err = -EFAULT;
+
+out_seq:
+	kfree(seq);
+out_buf:
+	free_page((unsigned long)buf);
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+long au_brinfo_ioctl(struct file *file, unsigned long arg)
+{
+	return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
+{
+	return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+void sysaufs_br_init(struct au_branch *br)
+{
+	int i;
+	struct au_brsysfs *br_sysfs;
+	struct attribute *attr;
+
+	br_sysfs = br->br_sysfs;
+	for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+		attr = &br_sysfs->attr;
+		sysfs_attr_init(attr);
+		attr->name = br_sysfs->name;
+		attr->mode = S_IRUGO;
+		br_sysfs++;
+	}
+}
+
+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_branch *br;
+	struct kobject *kobj;
+	struct au_brsysfs *br_sysfs;
+	int i;
+	aufs_bindex_t bbot;
+
+	dbgaufs_brs_del(sb, bindex);
+
+	if (!sysaufs_brs)
+		return;
+
+	kobj = &au_sbi(sb)->si_kobj;
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		br_sysfs = br->br_sysfs;
+		for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+			sysfs_remove_file(kobj, &br_sysfs->attr);
+			br_sysfs++;
+		}
+	}
+}
+
+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err, i;
+	aufs_bindex_t bbot;
+	struct kobject *kobj;
+	struct au_branch *br;
+	struct au_brsysfs *br_sysfs;
+
+	dbgaufs_brs_add(sb, bindex);
+
+	if (!sysaufs_brs)
+		return;
+
+	kobj = &au_sbi(sb)->si_kobj;
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		br_sysfs = br->br_sysfs;
+		snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
+			 SysaufsBr_PREFIX "%d", bindex);
+		snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
+			 SysaufsBrid_PREFIX "%d", bindex);
+		for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+			err = sysfs_create_file(kobj, &br_sysfs->attr);
+			if (unlikely(err))
+				pr_warn("failed %s under sysfs(%d)\n",
+					br_sysfs->name, err);
+			br_sysfs++;
+		}
+	}
+}
diff --git a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c
new file mode 100644
index 000000000..401c1e8ec
--- /dev/null
+++ b/fs/aufs/sysrq.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * magic sysrq hanlder
+ */
+
+/* #include <linux/sysrq.h> */
+#include <linux/writeback.h>
+#include "aufs.h"
+
+/* ---------------------------------------------------------------------- */
+
+static void sysrq_sb(struct super_block *sb)
+{
+	char *plevel;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+	struct hlist_bl_head *files;
+	struct hlist_bl_node *pos;
+	struct au_finfo *finfo;
+
+	plevel = au_plevel;
+	au_plevel = KERN_WARNING;
+
+	/* since we define pr_fmt, call printk directly */
+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
+
+	sbinfo = au_sbi(sb);
+	printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
+	pr("superblock\n");
+	au_dpri_sb(sb);
+
+#if 0
+	pr("root dentry\n");
+	au_dpri_dentry(sb->s_root);
+	pr("root inode\n");
+	au_dpri_inode(d_inode(sb->s_root));
+#endif
+
+#if 0
+	do {
+		int err, i, j, ndentry;
+		struct au_dcsub_pages dpages;
+		struct au_dpage *dpage;
+
+		err = au_dpages_init(&dpages, GFP_ATOMIC);
+		if (unlikely(err))
+			break;
+		err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
+		if (!err)
+			for (i = 0; i < dpages.ndpage; i++) {
+				dpage = dpages.dpages + i;
+				ndentry = dpage->ndentry;
+				for (j = 0; j < ndentry; j++)
+					au_dpri_dentry(dpage->dentries[j]);
+			}
+		au_dpages_free(&dpages);
+	} while (0);
+#endif
+
+#if 1
+	{
+		struct inode *i;
+
+		pr("isolated inode\n");
+		spin_lock(&sb->s_inode_list_lock);
+		list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
+			spin_lock(&i->i_lock);
+			if (1 || hlist_empty(&i->i_dentry))
+				au_dpri_inode(i);
+			spin_unlock(&i->i_lock);
+		}
+		spin_unlock(&sb->s_inode_list_lock);
+	}
+#endif
+	pr("files\n");
+	files = &au_sbi(sb)->si_files;
+	hlist_bl_lock(files);
+	hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
+		umode_t mode;
+
+		file = finfo->fi_file;
+		mode = file_inode(file)->i_mode;
+		if (!special_file(mode))
+			au_dpri_file(file);
+	}
+	hlist_bl_unlock(files);
+	pr("done\n");
+
+#undef pr
+	au_plevel = plevel;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* module parameter */
+static char *aufs_sysrq_key = "a";
+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
+
+static void au_sysrq(int key __maybe_unused)
+{
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_node *pos;
+
+	lockdep_off();
+	au_sbilist_lock();
+	hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
+		sysrq_sb(sbinfo->si_sb);
+	au_sbilist_unlock();
+	lockdep_on();
+}
+
+static struct sysrq_key_op au_sysrq_op = {
+	.handler	= au_sysrq,
+	.help_msg	= "Aufs",
+	.action_msg	= "Aufs",
+	.enable_mask	= SYSRQ_ENABLE_DUMP
+};
+
+/* ---------------------------------------------------------------------- */
+
+int __init au_sysrq_init(void)
+{
+	int err;
+	char key;
+
+	err = -1;
+	key = *aufs_sysrq_key;
+	if ('a' <= key && key <= 'z')
+		err = register_sysrq_key(key, &au_sysrq_op);
+	if (unlikely(err))
+		pr_err("err %d, sysrq=%c\n", err, key);
+	return err;
+}
+
+void au_sysrq_fin(void)
+{
+	int err;
+
+	err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
+	if (unlikely(err))
+		pr_err("err %d (ignored)\n", err);
+}
diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c
new file mode 100644
index 000000000..8d0a23c51
--- /dev/null
+++ b/fs/aufs/vdir.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * virtual or vertical directory
+ */
+
+#include "aufs.h"
+
+static unsigned int calc_size(int nlen)
+{
+	return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
+}
+
+static int set_deblk_end(union au_vdir_deblk_p *p,
+			 union au_vdir_deblk_p *deblk_end)
+{
+	if (calc_size(0) <= deblk_end->deblk - p->deblk) {
+		p->de->de_str.len = 0;
+		/* smp_mb(); */
+		return 0;
+	}
+	return -1; /* error */
+}
+
+/* returns true or false */
+static int is_deblk_end(union au_vdir_deblk_p *p,
+			union au_vdir_deblk_p *deblk_end)
+{
+	if (calc_size(0) <= deblk_end->deblk - p->deblk)
+		return !p->de->de_str.len;
+	return 1;
+}
+
+static unsigned char *last_deblk(struct au_vdir *vdir)
+{
+	return vdir->vd_deblk[vdir->vd_nblk - 1];
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* estimate the appropriate size for name hash table */
+unsigned int au_rdhash_est(loff_t sz)
+{
+	unsigned int n;
+
+	n = UINT_MAX;
+	sz >>= 10;
+	if (sz < n)
+		n = sz;
+	if (sz < AUFS_RDHASH_DEF)
+		n = AUFS_RDHASH_DEF;
+	/* pr_info("n %u\n", n); */
+	return n;
+}
+
+/*
+ * the allocated memory has to be freed by
+ * au_nhash_wh_free() or au_nhash_de_free().
+ */
+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
+{
+	struct hlist_head *head;
+	unsigned int u;
+	size_t sz;
+
+	sz = sizeof(*nhash->nh_head) * num_hash;
+	head = kmalloc(sz, gfp);
+	if (head) {
+		nhash->nh_num = num_hash;
+		nhash->nh_head = head;
+		for (u = 0; u < num_hash; u++)
+			INIT_HLIST_HEAD(head++);
+		return 0; /* success */
+	}
+
+	return -ENOMEM;
+}
+
+static void nhash_count(struct hlist_head *head)
+{
+#if 0
+	unsigned long n;
+	struct hlist_node *pos;
+
+	n = 0;
+	hlist_for_each(pos, head)
+		n++;
+	pr_info("%lu\n", n);
+#endif
+}
+
+static void au_nhash_wh_do_free(struct hlist_head *head)
+{
+	struct au_vdir_wh *pos;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(pos, node, head, wh_hash)
+		kfree(pos);
+}
+
+static void au_nhash_de_do_free(struct hlist_head *head)
+{
+	struct au_vdir_dehstr *pos;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(pos, node, head, hash)
+		au_cache_free_vdir_dehstr(pos);
+}
+
+static void au_nhash_do_free(struct au_nhash *nhash,
+			     void (*free)(struct hlist_head *head))
+{
+	unsigned int n;
+	struct hlist_head *head;
+
+	n = nhash->nh_num;
+	if (!n)
+		return;
+
+	head = nhash->nh_head;
+	while (n-- > 0) {
+		nhash_count(head);
+		free(head++);
+	}
+	kfree(nhash->nh_head);
+}
+
+void au_nhash_wh_free(struct au_nhash *whlist)
+{
+	au_nhash_do_free(whlist, au_nhash_wh_do_free);
+}
+
+static void au_nhash_de_free(struct au_nhash *delist)
+{
+	au_nhash_do_free(delist, au_nhash_de_do_free);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
+			    int limit)
+{
+	int num;
+	unsigned int u, n;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+
+	num = 0;
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (u = 0; u < n; u++, head++)
+		hlist_for_each_entry(pos, head, wh_hash)
+			if (pos->wh_bindex == btgt && ++num > limit)
+				return 1;
+	return 0;
+}
+
+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
+				       unsigned char *name,
+				       unsigned int len)
+{
+	unsigned int v;
+	/* const unsigned int magic_bit = 12; */
+
+	AuDebugOn(!nhash->nh_num || !nhash->nh_head);
+
+	v = 0;
+	if (len > 8)
+		len = 8;
+	while (len--)
+		v += *name++;
+	/* v = hash_long(v, magic_bit); */
+	v %= nhash->nh_num;
+	return nhash->nh_head + v;
+}
+
+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
+			      int nlen)
+{
+	return str->len == nlen && !memcmp(str->name, name, nlen);
+}
+
+/* returns found or not */
+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
+{
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct au_vdir_destr *str;
+
+	head = au_name_hash(whlist, name, nlen);
+	hlist_for_each_entry(pos, head, wh_hash) {
+		str = &pos->wh_str;
+		AuDbg("%.*s\n", str->len, str->name);
+		if (au_nhash_test_name(str, name, nlen))
+			return 1;
+	}
+	return 0;
+}
+
+/* returns found(true) or not */
+static int test_known(struct au_nhash *delist, char *name, int nlen)
+{
+	struct hlist_head *head;
+	struct au_vdir_dehstr *pos;
+	struct au_vdir_destr *str;
+
+	head = au_name_hash(delist, name, nlen);
+	hlist_for_each_entry(pos, head, hash) {
+		str = pos->str;
+		AuDbg("%.*s\n", str->len, str->name);
+		if (au_nhash_test_name(str, name, nlen))
+			return 1;
+	}
+	return 0;
+}
+
+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
+			    unsigned char d_type)
+{
+#ifdef CONFIG_AUFS_SHWH
+	wh->wh_ino = ino;
+	wh->wh_type = d_type;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
+		       unsigned int d_type, aufs_bindex_t bindex,
+		       unsigned char shwh)
+{
+	int err;
+	struct au_vdir_destr *str;
+	struct au_vdir_wh *wh;
+
+	AuDbg("%.*s\n", nlen, name);
+	AuDebugOn(!whlist->nh_num || !whlist->nh_head);
+
+	err = -ENOMEM;
+	wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
+	if (unlikely(!wh))
+		goto out;
+
+	err = 0;
+	wh->wh_bindex = bindex;
+	if (shwh)
+		au_shwh_init_wh(wh, ino, d_type);
+	str = &wh->wh_str;
+	str->len = nlen;
+	memcpy(str->name, name, nlen);
+	hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
+	/* smp_mb(); */
+
+out:
+	return err;
+}
+
+static int append_deblk(struct au_vdir *vdir)
+{
+	int err;
+	unsigned long ul;
+	const unsigned int deblk_sz = vdir->vd_deblk_sz;
+	union au_vdir_deblk_p p, deblk_end;
+	unsigned char **o;
+
+	err = -ENOMEM;
+	o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
+			GFP_NOFS, /*may_shrink*/0);
+	if (unlikely(!o))
+		goto out;
+
+	vdir->vd_deblk = o;
+	p.deblk = kmalloc(deblk_sz, GFP_NOFS);
+	if (p.deblk) {
+		ul = vdir->vd_nblk++;
+		vdir->vd_deblk[ul] = p.deblk;
+		vdir->vd_last.ul = ul;
+		vdir->vd_last.p.deblk = p.deblk;
+		deblk_end.deblk = p.deblk + deblk_sz;
+		err = set_deblk_end(&p, &deblk_end);
+	}
+
+out:
+	return err;
+}
+
+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
+		     unsigned int d_type, struct au_nhash *delist)
+{
+	int err;
+	unsigned int sz;
+	const unsigned int deblk_sz = vdir->vd_deblk_sz;
+	union au_vdir_deblk_p p, *room, deblk_end;
+	struct au_vdir_dehstr *dehstr;
+
+	p.deblk = last_deblk(vdir);
+	deblk_end.deblk = p.deblk + deblk_sz;
+	room = &vdir->vd_last.p;
+	AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
+		  || !is_deblk_end(room, &deblk_end));
+
+	sz = calc_size(nlen);
+	if (unlikely(sz > deblk_end.deblk - room->deblk)) {
+		err = append_deblk(vdir);
+		if (unlikely(err))
+			goto out;
+
+		p.deblk = last_deblk(vdir);
+		deblk_end.deblk = p.deblk + deblk_sz;
+		/* smp_mb(); */
+		AuDebugOn(room->deblk != p.deblk);
+	}
+
+	err = -ENOMEM;
+	dehstr = au_cache_alloc_vdir_dehstr();
+	if (unlikely(!dehstr))
+		goto out;
+
+	dehstr->str = &room->de->de_str;
+	hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
+	room->de->de_ino = ino;
+	room->de->de_type = d_type;
+	room->de->de_str.len = nlen;
+	memcpy(room->de->de_str.name, name, nlen);
+
+	err = 0;
+	room->deblk += sz;
+	if (unlikely(set_deblk_end(room, &deblk_end)))
+		err = append_deblk(vdir);
+	/* smp_mb(); */
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_vdir_free(struct au_vdir *vdir)
+{
+	unsigned char **deblk;
+
+	deblk = vdir->vd_deblk;
+	while (vdir->vd_nblk--)
+		kfree(*deblk++);
+	kfree(vdir->vd_deblk);
+	au_cache_free_vdir(vdir);
+}
+
+static struct au_vdir *alloc_vdir(struct file *file)
+{
+	struct au_vdir *vdir;
+	struct super_block *sb;
+	int err;
+
+	sb = file->f_path.dentry->d_sb;
+	SiMustAnyLock(sb);
+
+	err = -ENOMEM;
+	vdir = au_cache_alloc_vdir();
+	if (unlikely(!vdir))
+		goto out;
+
+	vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
+	if (unlikely(!vdir->vd_deblk))
+		goto out_free;
+
+	vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
+	if (!vdir->vd_deblk_sz) {
+		/* estimate the appropriate size for deblk */
+		vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
+		/* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
+	}
+	vdir->vd_nblk = 0;
+	vdir->vd_version = 0;
+	vdir->vd_jiffy = 0;
+	err = append_deblk(vdir);
+	if (!err)
+		return vdir; /* success */
+
+	kfree(vdir->vd_deblk);
+
+out_free:
+	au_cache_free_vdir(vdir);
+out:
+	vdir = ERR_PTR(err);
+	return vdir;
+}
+
+static int reinit_vdir(struct au_vdir *vdir)
+{
+	int err;
+	union au_vdir_deblk_p p, deblk_end;
+
+	while (vdir->vd_nblk > 1) {
+		kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
+		/* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
+		vdir->vd_nblk--;
+	}
+	p.deblk = vdir->vd_deblk[0];
+	deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
+	err = set_deblk_end(&p, &deblk_end);
+	/* keep vd_dblk_sz */
+	vdir->vd_last.ul = 0;
+	vdir->vd_last.p.deblk = vdir->vd_deblk[0];
+	vdir->vd_version = 0;
+	vdir->vd_jiffy = 0;
+	/* smp_mb(); */
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define AuFillVdir_CALLED	1
+#define AuFillVdir_WHABLE	(1 << 1)
+#define AuFillVdir_SHWH		(1 << 2)
+#define au_ftest_fillvdir(flags, name)	((flags) & AuFillVdir_##name)
+#define au_fset_fillvdir(flags, name) \
+	do { (flags) |= AuFillVdir_##name; } while (0)
+#define au_fclr_fillvdir(flags, name) \
+	do { (flags) &= ~AuFillVdir_##name; } while (0)
+
+#ifndef CONFIG_AUFS_SHWH
+#undef AuFillVdir_SHWH
+#define AuFillVdir_SHWH		0
+#endif
+
+struct fillvdir_arg {
+	struct dir_context	ctx;
+	struct file		*file;
+	struct au_vdir		*vdir;
+	struct au_nhash		delist;
+	struct au_nhash		whlist;
+	aufs_bindex_t		bindex;
+	unsigned int		flags;
+	int			err;
+};
+
+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
+		    loff_t offset __maybe_unused, u64 h_ino,
+		    unsigned int d_type)
+{
+	struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
+	char *name = (void *)__name;
+	struct super_block *sb;
+	ino_t ino;
+	const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
+
+	arg->err = 0;
+	sb = arg->file->f_path.dentry->d_sb;
+	au_fset_fillvdir(arg->flags, CALLED);
+	/* smp_mb(); */
+	if (nlen <= AUFS_WH_PFX_LEN
+	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+		if (test_known(&arg->delist, name, nlen)
+		    || au_nhash_test_known_wh(&arg->whlist, name, nlen))
+			goto out; /* already exists or whiteouted */
+
+		arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
+		if (!arg->err) {
+			if (unlikely(nlen > AUFS_MAX_NAMELEN))
+				d_type = DT_UNKNOWN;
+			arg->err = append_de(arg->vdir, name, nlen, ino,
+					     d_type, &arg->delist);
+		}
+	} else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
+		name += AUFS_WH_PFX_LEN;
+		nlen -= AUFS_WH_PFX_LEN;
+		if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
+			goto out; /* already whiteouted */
+
+		if (shwh)
+			arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
+					     &ino);
+		if (!arg->err) {
+			if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
+				d_type = DT_UNKNOWN;
+			arg->err = au_nhash_append_wh
+				(&arg->whlist, name, nlen, ino, d_type,
+				 arg->bindex, shwh);
+		}
+	}
+
+out:
+	if (!arg->err)
+		arg->vdir->vd_jiffy = jiffies;
+	/* smp_mb(); */
+	AuTraceErr(arg->err);
+	return arg->err;
+}
+
+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
+			  struct au_nhash *whlist, struct au_nhash *delist)
+{
+#ifdef CONFIG_AUFS_SHWH
+	int err;
+	unsigned int nh, u;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct hlist_node *n;
+	char *p, *o;
+	struct au_vdir_destr *destr;
+
+	AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
+
+	err = -ENOMEM;
+	o = p = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = 0;
+	nh = whlist->nh_num;
+	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+	p += AUFS_WH_PFX_LEN;
+	for (u = 0; u < nh; u++) {
+		head = whlist->nh_head + u;
+		hlist_for_each_entry_safe(pos, n, head, wh_hash) {
+			destr = &pos->wh_str;
+			memcpy(p, destr->name, destr->len);
+			err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
+					pos->wh_ino, pos->wh_type, delist);
+			if (unlikely(err))
+				break;
+		}
+	}
+
+	free_page((unsigned long)o);
+
+out:
+	AuTraceErr(err);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+static int au_do_read_vdir(struct fillvdir_arg *arg)
+{
+	int err;
+	unsigned int rdhash;
+	loff_t offset;
+	aufs_bindex_t bbot, bindex, btop;
+	unsigned char shwh;
+	struct file *hf, *file;
+	struct super_block *sb;
+
+	file = arg->file;
+	sb = file->f_path.dentry->d_sb;
+	SiMustAnyLock(sb);
+
+	rdhash = au_sbi(sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
+	err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out_delist;
+
+	err = 0;
+	arg->flags = 0;
+	shwh = 0;
+	if (au_opt_test(au_mntflags(sb), SHWH)) {
+		shwh = 1;
+		au_fset_fillvdir(arg->flags, SHWH);
+	}
+	btop = au_fbtop(file);
+	bbot = au_fbbot_dir(file);
+	for (bindex = btop; !err && bindex <= bbot; bindex++) {
+		hf = au_hf_dir(file, bindex);
+		if (!hf)
+			continue;
+
+		offset = vfsub_llseek(hf, 0, SEEK_SET);
+		err = offset;
+		if (unlikely(offset))
+			break;
+
+		arg->bindex = bindex;
+		au_fclr_fillvdir(arg->flags, WHABLE);
+		if (shwh
+		    || (bindex != bbot
+			&& au_br_whable(au_sbr_perm(sb, bindex))))
+			au_fset_fillvdir(arg->flags, WHABLE);
+		do {
+			arg->err = 0;
+			au_fclr_fillvdir(arg->flags, CALLED);
+			/* smp_mb(); */
+			err = vfsub_iterate_dir(hf, &arg->ctx);
+			if (err >= 0)
+				err = arg->err;
+		} while (!err && au_ftest_fillvdir(arg->flags, CALLED));
+
+		/*
+		 * dir_relax() may be good for concurrency, but aufs should not
+		 * use it since it will cause a lockdep problem.
+		 */
+	}
+
+	if (!err && shwh)
+		err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
+
+	au_nhash_wh_free(&arg->whlist);
+
+out_delist:
+	au_nhash_de_free(&arg->delist);
+out:
+	return err;
+}
+
+static int read_vdir(struct file *file, int may_read)
+{
+	int err;
+	unsigned long expire;
+	unsigned char do_read;
+	struct fillvdir_arg arg = {
+		.ctx = {
+			.actor = fillvdir
+		}
+	};
+	struct inode *inode;
+	struct au_vdir *vdir, *allocated;
+
+	err = 0;
+	inode = file_inode(file);
+	IMustLock(inode);
+	IiMustWriteLock(inode);
+	SiMustAnyLock(inode->i_sb);
+
+	allocated = NULL;
+	do_read = 0;
+	expire = au_sbi(inode->i_sb)->si_rdcache;
+	vdir = au_ivdir(inode);
+	if (!vdir) {
+		do_read = 1;
+		vdir = alloc_vdir(file);
+		err = PTR_ERR(vdir);
+		if (IS_ERR(vdir))
+			goto out;
+		err = 0;
+		allocated = vdir;
+	} else if (may_read
+		   && (!inode_eq_iversion(inode, vdir->vd_version)
+		       || time_after(jiffies, vdir->vd_jiffy + expire))) {
+		do_read = 1;
+		err = reinit_vdir(vdir);
+		if (unlikely(err))
+			goto out;
+	}
+
+	if (!do_read)
+		return 0; /* success */
+
+	arg.file = file;
+	arg.vdir = vdir;
+	err = au_do_read_vdir(&arg);
+	if (!err) {
+		/* file->f_pos = 0; */ /* todo: ctx->pos? */
+		vdir->vd_version = inode_query_iversion(inode);
+		vdir->vd_last.ul = 0;
+		vdir->vd_last.p.deblk = vdir->vd_deblk[0];
+		if (allocated)
+			au_set_ivdir(inode, allocated);
+	} else if (allocated)
+		au_vdir_free(allocated);
+
+out:
+	return err;
+}
+
+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
+{
+	int err, rerr;
+	unsigned long ul, n;
+	const unsigned int deblk_sz = src->vd_deblk_sz;
+
+	AuDebugOn(tgt->vd_nblk != 1);
+
+	err = -ENOMEM;
+	if (tgt->vd_nblk < src->vd_nblk) {
+		unsigned char **p;
+
+		p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
+				GFP_NOFS, /*may_shrink*/0);
+		if (unlikely(!p))
+			goto out;
+		tgt->vd_deblk = p;
+	}
+
+	if (tgt->vd_deblk_sz != deblk_sz) {
+		unsigned char *p;
+
+		tgt->vd_deblk_sz = deblk_sz;
+		p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
+				/*may_shrink*/1);
+		if (unlikely(!p))
+			goto out;
+		tgt->vd_deblk[0] = p;
+	}
+	memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
+	tgt->vd_version = src->vd_version;
+	tgt->vd_jiffy = src->vd_jiffy;
+
+	n = src->vd_nblk;
+	for (ul = 1; ul < n; ul++) {
+		tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
+					    GFP_NOFS);
+		if (unlikely(!tgt->vd_deblk[ul]))
+			goto out;
+		tgt->vd_nblk++;
+	}
+	tgt->vd_nblk = n;
+	tgt->vd_last.ul = tgt->vd_last.ul;
+	tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
+	tgt->vd_last.p.deblk += src->vd_last.p.deblk
+		- src->vd_deblk[src->vd_last.ul];
+	/* smp_mb(); */
+	return 0; /* success */
+
+out:
+	rerr = reinit_vdir(tgt);
+	BUG_ON(rerr);
+	return err;
+}
+
+int au_vdir_init(struct file *file)
+{
+	int err;
+	struct inode *inode;
+	struct au_vdir *vdir_cache, *allocated;
+
+	/* test file->f_pos here instead of ctx->pos */
+	err = read_vdir(file, !file->f_pos);
+	if (unlikely(err))
+		goto out;
+
+	allocated = NULL;
+	vdir_cache = au_fvdir_cache(file);
+	if (!vdir_cache) {
+		vdir_cache = alloc_vdir(file);
+		err = PTR_ERR(vdir_cache);
+		if (IS_ERR(vdir_cache))
+			goto out;
+		allocated = vdir_cache;
+	} else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
+		/* test file->f_pos here instead of ctx->pos */
+		err = reinit_vdir(vdir_cache);
+		if (unlikely(err))
+			goto out;
+	} else
+		return 0; /* success */
+
+	inode = file_inode(file);
+	err = copy_vdir(vdir_cache, au_ivdir(inode));
+	if (!err) {
+		file->f_version = inode_query_iversion(inode);
+		if (allocated)
+			au_set_fvdir_cache(file, allocated);
+	} else if (allocated)
+		au_vdir_free(allocated);
+
+out:
+	return err;
+}
+
+static loff_t calc_offset(struct au_vdir *vdir)
+{
+	loff_t offset;
+	union au_vdir_deblk_p p;
+
+	p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
+	offset = vdir->vd_last.p.deblk - p.deblk;
+	offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
+	return offset;
+}
+
+/* returns true or false */
+static int seek_vdir(struct file *file, struct dir_context *ctx)
+{
+	int valid;
+	unsigned int deblk_sz;
+	unsigned long ul, n;
+	loff_t offset;
+	union au_vdir_deblk_p p, deblk_end;
+	struct au_vdir *vdir_cache;
+
+	valid = 1;
+	vdir_cache = au_fvdir_cache(file);
+	offset = calc_offset(vdir_cache);
+	AuDbg("offset %lld\n", offset);
+	if (ctx->pos == offset)
+		goto out;
+
+	vdir_cache->vd_last.ul = 0;
+	vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
+	if (!ctx->pos)
+		goto out;
+
+	valid = 0;
+	deblk_sz = vdir_cache->vd_deblk_sz;
+	ul = div64_u64(ctx->pos, deblk_sz);
+	AuDbg("ul %lu\n", ul);
+	if (ul >= vdir_cache->vd_nblk)
+		goto out;
+
+	n = vdir_cache->vd_nblk;
+	for (; ul < n; ul++) {
+		p.deblk = vdir_cache->vd_deblk[ul];
+		deblk_end.deblk = p.deblk + deblk_sz;
+		offset = ul;
+		offset *= deblk_sz;
+		while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
+			unsigned int l;
+
+			l = calc_size(p.de->de_str.len);
+			offset += l;
+			p.deblk += l;
+		}
+		if (!is_deblk_end(&p, &deblk_end)) {
+			valid = 1;
+			vdir_cache->vd_last.ul = ul;
+			vdir_cache->vd_last.p = p;
+			break;
+		}
+	}
+
+out:
+	/* smp_mb(); */
+	if (!valid)
+		AuDbg("valid %d\n", !valid);
+	return valid;
+}
+
+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
+{
+	unsigned int l, deblk_sz;
+	union au_vdir_deblk_p deblk_end;
+	struct au_vdir *vdir_cache;
+	struct au_vdir_de *de;
+
+	vdir_cache = au_fvdir_cache(file);
+	if (!seek_vdir(file, ctx))
+		return 0;
+
+	deblk_sz = vdir_cache->vd_deblk_sz;
+	while (1) {
+		deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
+		deblk_end.deblk += deblk_sz;
+		while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
+			de = vdir_cache->vd_last.p.de;
+			AuDbg("%.*s, off%lld, i%lu, dt%d\n",
+			      de->de_str.len, de->de_str.name, ctx->pos,
+			      (unsigned long)de->de_ino, de->de_type);
+			if (unlikely(!dir_emit(ctx, de->de_str.name,
+					       de->de_str.len, de->de_ino,
+					       de->de_type))) {
+				/* todo: ignore the error caused by udba? */
+				/* return err; */
+				return 0;
+			}
+
+			l = calc_size(de->de_str.len);
+			vdir_cache->vd_last.p.deblk += l;
+			ctx->pos += l;
+		}
+		if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
+			vdir_cache->vd_last.ul++;
+			vdir_cache->vd_last.p.deblk
+				= vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
+			ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
+			continue;
+		}
+		break;
+	}
+
+	/* smp_mb(); */
+	return 0;
+}
diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
new file mode 100644
index 000000000..732d81cb4
--- /dev/null
+++ b/fs/aufs/vfsub.c
@@ -0,0 +1,894 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for VFS
+ */
+
+#include <linux/mnt_namespace.h>
+#include <linux/namei.h>
+#include <linux/nsproxy.h>
+#include <linux/security.h>
+#include <linux/splice.h>
+#include "aufs.h"
+
+#ifdef CONFIG_AUFS_BR_FUSE
+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
+{
+	if (!au_test_fuse(h_sb) || !au_userns)
+		return 0;
+
+	return is_current_mnt_ns(mnt) ? 0 : -EACCES;
+}
+#endif
+
+int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
+{
+	int err;
+
+	lockdep_off();
+	down_read(&h_sb->s_umount);
+	err = __sync_filesystem(h_sb, wait);
+	up_read(&h_sb->s_umount);
+	lockdep_on();
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_update_h_iattr(struct path *h_path, int *did)
+{
+	int err;
+	struct kstat st;
+	struct super_block *h_sb;
+
+	/* for remote fs, leave work for its getattr or d_revalidate */
+	/* for bad i_attr fs, handle them in aufs_getattr() */
+	/* still some fs may acquire i_mutex. we need to skip them */
+	err = 0;
+	if (!did)
+		did = &err;
+	h_sb = h_path->dentry->d_sb;
+	*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
+	if (*did)
+		err = vfsub_getattr(h_path, &st);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct file *vfsub_dentry_open(struct path *path, int flags)
+{
+	struct file *file;
+
+	file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
+			   current_cred());
+	if (!IS_ERR_OR_NULL(file)
+	    && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		i_readcount_inc(d_inode(path->dentry));
+
+	return file;
+}
+
+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
+{
+	struct file *file;
+
+	lockdep_off();
+	file = filp_open(path,
+			 oflags /* | __FMODE_NONOTIFY */,
+			 mode);
+	lockdep_on();
+	if (IS_ERR(file))
+		goto out;
+	vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+
+out:
+	return file;
+}
+
+/*
+ * Ideally this function should call VFS:do_last() in order to keep all its
+ * checkings. But it is very hard for aufs to regenerate several VFS internal
+ * structure such as nameidata. This is a second (or third) best approach.
+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
+ */
+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
+		      struct vfsub_aopen_args *args, struct au_branch *br)
+{
+	int err;
+	struct file *file = args->file;
+	/* copied from linux/fs/namei.c:atomic_open() */
+	struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
+
+	IMustLock(dir);
+	AuDebugOn(!dir->i_op->atomic_open);
+
+	err = au_br_test_oflag(args->open_flag, br);
+	if (unlikely(err))
+		goto out;
+
+	args->file->f_path.dentry = DENTRY_NOT_SET;
+	args->file->f_path.mnt = au_br_mnt(br);
+	err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
+				     args->create_mode, args->opened);
+	if (err >= 0) {
+		/* some filesystems don't set FILE_CREATED while succeeded? */
+		if (*args->opened & FILE_CREATED)
+			fsnotify_create(dir, dentry);
+	} else
+		goto out;
+
+
+	if (!err) {
+		/* todo: call VFS:may_open() here */
+		err = open_check_o_direct(file);
+		/* todo: ima_file_check() too? */
+		if (!err && (args->open_flag & __FMODE_EXEC))
+			err = deny_write_access(file);
+		if (unlikely(err))
+			/* note that the file is created and still opened */
+			goto out;
+	}
+
+	au_br_get(br);
+	fsnotify_open(file);
+
+out:
+	return err;
+}
+
+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
+{
+	int err;
+
+	err = kern_path(name, flags, path);
+	if (!err && d_is_positive(path->dentry))
+		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
+					     struct dentry *parent, int len)
+{
+	struct path path = {
+		.mnt = NULL
+	};
+
+	path.dentry = lookup_one_len_unlocked(name, parent, len);
+	if (IS_ERR(path.dentry))
+		goto out;
+	if (d_is_positive(path.dentry))
+		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
+
+out:
+	AuTraceErrPtr(path.dentry);
+	return path.dentry;
+}
+
+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
+				    int len)
+{
+	struct path path = {
+		.mnt = NULL
+	};
+
+	/* VFS checks it too, but by WARN_ON_ONCE() */
+	IMustLock(d_inode(parent));
+
+	path.dentry = lookup_one_len(name, parent, len);
+	if (IS_ERR(path.dentry))
+		goto out;
+	if (d_is_positive(path.dentry))
+		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
+
+out:
+	AuTraceErrPtr(path.dentry);
+	return path.dentry;
+}
+
+void vfsub_call_lkup_one(void *args)
+{
+	struct vfsub_lkup_one_args *a = args;
+	*a->errp = vfsub_lkup_one(a->name, a->parent);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
+				 struct dentry *d2, struct au_hinode *hdir2)
+{
+	struct dentry *d;
+
+	lockdep_off();
+	d = lock_rename(d1, d2);
+	lockdep_on();
+	au_hn_suspend(hdir1);
+	if (hdir1 != hdir2)
+		au_hn_suspend(hdir2);
+
+	return d;
+}
+
+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
+			 struct dentry *d2, struct au_hinode *hdir2)
+{
+	au_hn_resume(hdir1);
+	if (hdir1 != hdir2)
+		au_hn_resume(hdir2);
+	lockdep_off();
+	unlock_rename(d1, d2);
+	lockdep_on();
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mknod(path, d, mode, 0);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_create(dir, path->dentry, mode, want_excl);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_symlink(path, d, symname);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_symlink(dir, path->dentry, symname);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mknod(path, d, mode, new_encode_dev(dev));
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_mknod(dir, path->dentry, mode, dev);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+static int au_test_nlink(struct inode *inode)
+{
+	const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
+
+	if (!au_test_fs_no_limit_nlink(inode->i_sb)
+	    || inode->i_nlink < link_max)
+		return 0;
+	return -EMLINK;
+}
+
+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
+	       struct inode **delegated_inode)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	err = au_test_nlink(d_inode(src_dentry));
+	if (unlikely(err))
+		return err;
+
+	/* we don't call may_linkat() */
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_link(src_dentry, path, d);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		/* fuse has different memory inode for the same inumber */
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+			tmp.dentry = src_dentry;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
+		 struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, unsigned int flags)
+{
+	int err;
+	struct path tmp = {
+		.mnt	= path->mnt
+	};
+	struct dentry *d;
+
+	IMustLock(dir);
+	IMustLock(src_dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	tmp.dentry = src_dentry->d_parent;
+	err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
+			 delegated_inode, flags);
+	lockdep_on();
+	if (!err) {
+		int did;
+
+		tmp.dentry = d->d_parent;
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = src_dentry;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+			tmp.dentry = src_dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mkdir(path, d, mode);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_mkdir(dir, path->dentry, mode);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_rmdir(struct inode *dir, struct path *path)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_rmdir(path, d);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_rmdir(dir, path->dentry);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = {
+			.dentry	= path->dentry->d_parent,
+			.mnt	= path->mnt
+		};
+
+		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: support mmap_sem? */
+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
+		     loff_t *ppos)
+{
+	ssize_t err;
+
+	lockdep_off();
+	err = vfs_read(file, ubuf, count, ppos);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+/* todo: kernel_read()? */
+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
+		     loff_t *ppos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		char __user *u;
+	} buf;
+
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = vfsub_read_u(file, buf.u, count, ppos);
+	set_fs(oldfs);
+	return err;
+}
+
+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
+		      loff_t *ppos)
+{
+	ssize_t err;
+
+	lockdep_off();
+	err = vfs_write(file, ubuf, count, ppos);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		const char __user *u;
+	} buf;
+
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = vfsub_write_u(file, buf.u, count, ppos);
+	set_fs(oldfs);
+	return err;
+}
+
+int vfsub_flush(struct file *file, fl_owner_t id)
+{
+	int err;
+
+	err = 0;
+	if (file->f_op->flush) {
+		if (!au_test_nfs(file->f_path.dentry->d_sb))
+			err = file->f_op->flush(file, id);
+		else {
+			lockdep_off();
+			err = file->f_op->flush(file, id);
+			lockdep_on();
+		}
+		if (!err)
+			vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
+		/*ignore*/
+	}
+	return err;
+}
+
+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
+{
+	int err;
+
+	AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
+
+	lockdep_off();
+	err = iterate_dir(file, ctx);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+
+	return err;
+}
+
+long vfsub_splice_to(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags)
+{
+	long err;
+
+	lockdep_off();
+	err = do_splice_to(in, ppos, pipe, len, flags);
+	lockdep_on();
+	file_accessed(in);
+	if (err >= 0)
+		vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
+		       loff_t *ppos, size_t len, unsigned int flags)
+{
+	long err;
+
+	lockdep_off();
+	err = do_splice_from(pipe, out, ppos, len, flags);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+int vfsub_fsync(struct file *file, struct path *path, int datasync)
+{
+	int err;
+
+	/* file can be NULL */
+	lockdep_off();
+	err = vfs_fsync(file, datasync);
+	lockdep_on();
+	if (!err) {
+		if (!path) {
+			AuDebugOn(!file);
+			path = &file->f_path;
+		}
+		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
+	}
+	return err;
+}
+
+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
+		struct file *h_file)
+{
+	int err;
+	struct inode *h_inode;
+	struct super_block *h_sb;
+
+	if (!h_file) {
+		err = vfsub_truncate(h_path, length);
+		goto out;
+	}
+
+	h_inode = d_inode(h_path->dentry);
+	h_sb = h_inode->i_sb;
+	lockdep_off();
+	sb_start_write(h_sb);
+	lockdep_on();
+	err = locks_verify_truncate(h_inode, h_file, length);
+	if (!err)
+		err = security_path_truncate(h_path);
+	if (!err) {
+		lockdep_off();
+		err = do_truncate(h_path->dentry, length, attr, h_file);
+		lockdep_on();
+	}
+	lockdep_off();
+	sb_end_write(h_sb);
+	lockdep_on();
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_vfsub_mkdir_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+	int mode;
+};
+
+static void au_call_vfsub_mkdir(void *args)
+{
+	struct au_vfsub_mkdir_args *a = args;
+	*a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
+}
+
+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
+{
+	int err, do_sio, wkq_err;
+
+	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
+	if (!do_sio) {
+		lockdep_off();
+		err = vfsub_mkdir(dir, path, mode);
+		lockdep_on();
+	} else {
+		struct au_vfsub_mkdir_args args = {
+			.errp	= &err,
+			.dir	= dir,
+			.path	= path,
+			.mode	= mode
+		};
+		wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+
+struct au_vfsub_rmdir_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+};
+
+static void au_call_vfsub_rmdir(void *args)
+{
+	struct au_vfsub_rmdir_args *a = args;
+	*a->errp = vfsub_rmdir(a->dir, a->path);
+}
+
+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
+{
+	int err, do_sio, wkq_err;
+
+	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
+	if (!do_sio) {
+		lockdep_off();
+		err = vfsub_rmdir(dir, path);
+		lockdep_on();
+	} else {
+		struct au_vfsub_rmdir_args args = {
+			.errp	= &err,
+			.dir	= dir,
+			.path	= path
+		};
+		wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct notify_change_args {
+	int *errp;
+	struct path *path;
+	struct iattr *ia;
+	struct inode **delegated_inode;
+};
+
+static void call_notify_change(void *args)
+{
+	struct notify_change_args *a = args;
+	struct inode *h_inode;
+
+	h_inode = d_inode(a->path->dentry);
+	IMustLock(h_inode);
+
+	*a->errp = -EPERM;
+	if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
+		lockdep_off();
+		*a->errp = notify_change(a->path->dentry, a->ia,
+					 a->delegated_inode);
+		lockdep_on();
+		if (!*a->errp)
+			vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
+	}
+	AuTraceErr(*a->errp);
+}
+
+int vfsub_notify_change(struct path *path, struct iattr *ia,
+			struct inode **delegated_inode)
+{
+	int err;
+	struct notify_change_args args = {
+		.errp			= &err,
+		.path			= path,
+		.ia			= ia,
+		.delegated_inode	= delegated_inode
+	};
+
+	call_notify_change(&args);
+
+	return err;
+}
+
+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
+			    struct inode **delegated_inode)
+{
+	int err, wkq_err;
+	struct notify_change_args args = {
+		.errp			= &err,
+		.path			= path,
+		.ia			= ia,
+		.delegated_inode	= delegated_inode
+	};
+
+	wkq_err = au_wkq_wait(call_notify_change, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct unlink_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+	struct inode **delegated_inode;
+};
+
+static void call_unlink(void *args)
+{
+	struct unlink_args *a = args;
+	struct dentry *d = a->path->dentry;
+	struct inode *h_inode;
+	const int stop_sillyrename = (au_test_nfs(d->d_sb)
+				      && au_dcount(d) == 1);
+
+	IMustLock(a->dir);
+
+	a->path->dentry = d->d_parent;
+	*a->errp = security_path_unlink(a->path, d);
+	a->path->dentry = d;
+	if (unlikely(*a->errp))
+		return;
+
+	if (!stop_sillyrename)
+		dget(d);
+	h_inode = NULL;
+	if (d_is_positive(d)) {
+		h_inode = d_inode(d);
+		ihold(h_inode);
+	}
+
+	lockdep_off();
+	*a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
+	lockdep_on();
+	if (!*a->errp) {
+		struct path tmp = {
+			.dentry = d->d_parent,
+			.mnt	= a->path->mnt
+		};
+		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
+	}
+
+	if (!stop_sillyrename)
+		dput(d);
+	if (h_inode)
+		iput(h_inode);
+
+	AuTraceErr(*a->errp);
+}
+
+/*
+ * @dir: must be locked.
+ * @dentry: target dentry.
+ */
+int vfsub_unlink(struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, int force)
+{
+	int err;
+	struct unlink_args args = {
+		.errp			= &err,
+		.dir			= dir,
+		.path			= path,
+		.delegated_inode	= delegated_inode
+	};
+
+	if (!force)
+		call_unlink(&args);
+	else {
+		int wkq_err;
+
+		wkq_err = au_wkq_wait(call_unlink, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h
new file mode 100644
index 000000000..0a4012d11
--- /dev/null
+++ b/fs/aufs/vfsub.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for VFS
+ */
+
+#ifndef __AUFS_VFSUB_H__
+#define __AUFS_VFSUB_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/mount.h>
+#include <linux/posix_acl.h>
+#include <linux/xattr.h>
+#include "debug.h"
+
+/* copied from linux/fs/internal.h */
+/* todo: BAD approach!! */
+extern void __mnt_drop_write(struct vfsmount *);
+extern int open_check_o_direct(struct file *f);
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for lower inode */
+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
+/* reduce? gave up. */
+enum {
+	AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
+	AuLsc_I_PARENT,		/* lower inode, parent first */
+	AuLsc_I_PARENT2,	/* copyup dirs */
+	AuLsc_I_PARENT3,	/* copyup wh */
+	AuLsc_I_CHILD,
+	AuLsc_I_CHILD2,
+	AuLsc_I_End
+};
+
+/* to debug easier, do not make them inlined functions */
+#define MtxMustLock(mtx)	AuDebugOn(!mutex_is_locked(mtx))
+#define IMustLock(i)		AuDebugOn(!inode_is_locked(i))
+
+/* ---------------------------------------------------------------------- */
+
+static inline void vfsub_drop_nlink(struct inode *inode)
+{
+	AuDebugOn(!inode->i_nlink);
+	drop_nlink(inode);
+}
+
+static inline void vfsub_dead_dir(struct inode *inode)
+{
+	AuDebugOn(!S_ISDIR(inode->i_mode));
+	inode->i_flags |= S_DEAD;
+	clear_nlink(inode);
+}
+
+static inline int vfsub_native_ro(struct inode *inode)
+{
+	return sb_rdonly(inode->i_sb)
+		|| IS_RDONLY(inode)
+		/* || IS_APPEND(inode) */
+		|| IS_IMMUTABLE(inode);
+}
+
+#ifdef CONFIG_AUFS_BR_FUSE
+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
+#else
+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
+#endif
+
+int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_update_h_iattr(struct path *h_path, int *did);
+struct file *vfsub_dentry_open(struct path *path, int flags);
+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
+struct vfsub_aopen_args {
+	struct file	*file;
+	unsigned int	open_flag;
+	umode_t		create_mode;
+	int		*opened;
+};
+struct au_branch;
+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
+		      struct vfsub_aopen_args *args, struct au_branch *br);
+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
+
+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
+					     struct dentry *parent, int len);
+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
+				    int len);
+
+struct vfsub_lkup_one_args {
+	struct dentry **errp;
+	struct qstr *name;
+	struct dentry *parent;
+};
+
+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
+					    struct dentry *parent)
+{
+	return vfsub_lookup_one_len(name->name, parent, name->len);
+}
+
+void vfsub_call_lkup_one(void *args);
+
+/* ---------------------------------------------------------------------- */
+
+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
+{
+	int err;
+
+	lockdep_off();
+	err = mnt_want_write(mnt);
+	lockdep_on();
+	return err;
+}
+
+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
+{
+	lockdep_off();
+	mnt_drop_write(mnt);
+	lockdep_on();
+}
+
+#if 0 /* reserved */
+static inline void vfsub_mnt_drop_write_file(struct file *file)
+{
+	lockdep_off();
+	mnt_drop_write_file(file);
+	lockdep_on();
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_hinode;
+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
+				 struct dentry *d2, struct au_hinode *hdir2);
+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
+			 struct dentry *d2, struct au_hinode *hdir2);
+
+int vfsub_create(struct inode *dir, struct path *path, int mode,
+		 bool want_excl);
+int vfsub_symlink(struct inode *dir, struct path *path,
+		  const char *symname);
+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
+	       struct path *path, struct inode **delegated_inode);
+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
+		 struct inode *hdir, struct path *path,
+		 struct inode **delegated_inode, unsigned int flags);
+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
+int vfsub_rmdir(struct inode *dir, struct path *path);
+
+/* ---------------------------------------------------------------------- */
+
+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
+		     loff_t *ppos);
+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
+			loff_t *ppos);
+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
+		      loff_t *ppos);
+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
+		      loff_t *ppos);
+int vfsub_flush(struct file *file, fl_owner_t id);
+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
+
+static inline loff_t vfsub_f_size_read(struct file *file)
+{
+	return i_size_read(file_inode(file));
+}
+
+static inline unsigned int vfsub_file_flags(struct file *file)
+{
+	unsigned int flags;
+
+	spin_lock(&file->f_lock);
+	flags = file->f_flags;
+	spin_unlock(&file->f_lock);
+
+	return flags;
+}
+
+static inline int vfsub_file_execed(struct file *file)
+{
+	/* todo: direct access f_flags */
+	return !!(vfsub_file_flags(file) & __FMODE_EXEC);
+}
+
+#if 0 /* reserved */
+static inline void vfsub_file_accessed(struct file *h_file)
+{
+	file_accessed(h_file);
+	vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
+}
+#endif
+
+#if 0 /* reserved */
+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
+				     struct dentry *h_dentry)
+{
+	struct path h_path = {
+		.dentry	= h_dentry,
+		.mnt	= h_mnt
+	};
+	touch_atime(&h_path);
+	vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
+}
+#endif
+
+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
+				    int flags)
+{
+	return update_time(h_inode, ts, flags);
+	/* no vfsub_update_h_iattr() since we don't have struct path */
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
+{
+	int err;
+
+	err = posix_acl_chmod(h_inode, h_mode);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+#else
+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
+#endif
+
+long vfsub_splice_to(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags);
+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
+		       loff_t *ppos, size_t len, unsigned int flags);
+
+static inline long vfsub_truncate(struct path *path, loff_t length)
+{
+	long err;
+
+	lockdep_off();
+	err = vfs_truncate(path, length);
+	lockdep_on();
+	return err;
+}
+
+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
+		struct file *h_file);
+int vfsub_fsync(struct file *file, struct path *path, int datasync);
+
+/*
+ * re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
+ * ioctl.
+ */
+static inline int vfsub_clone_file_range(struct file *src, struct file *dst,
+					 u64 len)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_clone_file_range(src, 0, dst, 0, len);
+	lockdep_on();
+
+	return err;
+}
+
+/* copy_file_range(2) is a systemcall */
+static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
+					    struct file *dst, loff_t dst_pos,
+					    size_t len, unsigned int flags)
+{
+	ssize_t ssz;
+
+	lockdep_off();
+	ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
+	lockdep_on();
+
+	return ssz;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t err;
+
+	lockdep_off();
+	err = vfs_llseek(file, offset, origin);
+	lockdep_on();
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
+			    struct inode **delegated_inode);
+int vfsub_notify_change(struct path *path, struct iattr *ia,
+			struct inode **delegated_inode);
+int vfsub_unlink(struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, int force);
+
+static inline int vfsub_getattr(const struct path *path, struct kstat *st)
+{
+	return vfs_getattr(path, st, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
+				 const void *value, size_t size, int flags)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_setxattr(dentry, name, value, size, flags);
+	lockdep_on();
+
+	return err;
+}
+
+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_removexattr(dentry, name);
+	lockdep_on();
+
+	return err;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_VFSUB_H__ */
diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c
new file mode 100644
index 000000000..1d365a2e7
--- /dev/null
+++ b/fs/aufs/wbr_policy.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * policies for selecting one among multiple writable branches
+ */
+
+#include <linux/statfs.h>
+#include "aufs.h"
+
+/* subset of cpup_attr() */
+static noinline_for_stack
+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
+{
+	int err, sbits;
+	struct iattr ia;
+	struct inode *h_isrc;
+
+	h_isrc = d_inode(h_src);
+	ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
+	ia.ia_mode = h_isrc->i_mode;
+	ia.ia_uid = h_isrc->i_uid;
+	ia.ia_gid = h_isrc->i_gid;
+	sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
+	au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
+	/* no delegation since it is just created */
+	err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
+
+	/* is this nfs only? */
+	if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
+		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
+		ia.ia_mode = h_isrc->i_mode;
+		err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
+	}
+
+	return err;
+}
+
+#define AuCpdown_PARENT_OPQ	1
+#define AuCpdown_WHED		(1 << 1)
+#define AuCpdown_MADE_DIR	(1 << 2)
+#define AuCpdown_DIROPQ		(1 << 3)
+#define au_ftest_cpdown(flags, name)	((flags) & AuCpdown_##name)
+#define au_fset_cpdown(flags, name) \
+	do { (flags) |= AuCpdown_##name; } while (0)
+#define au_fclr_cpdown(flags, name) \
+	do { (flags) &= ~AuCpdown_##name; } while (0)
+
+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
+			     unsigned int *flags)
+{
+	int err;
+	struct dentry *opq_dentry;
+
+	opq_dentry = au_diropq_create(dentry, bdst);
+	err = PTR_ERR(opq_dentry);
+	if (IS_ERR(opq_dentry))
+		goto out;
+	dput(opq_dentry);
+	au_fset_cpdown(*flags, DIROPQ);
+
+out:
+	return err;
+}
+
+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
+			    struct inode *dir, aufs_bindex_t bdst)
+{
+	int err;
+	struct path h_path;
+	struct au_branch *br;
+
+	br = au_sbr(dentry->d_sb, bdst);
+	h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	err = 0;
+	if (d_is_positive(h_path.dentry)) {
+		h_path.mnt = au_br_mnt(br);
+		err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
+					  dentry);
+	}
+	dput(h_path.dentry);
+
+out:
+	return err;
+}
+
+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg)
+{
+	int err, rerr;
+	aufs_bindex_t bopq, btop;
+	struct path h_path;
+	struct dentry *parent;
+	struct inode *h_dir, *h_inode, *inode, *dir;
+	unsigned int *flags = arg;
+
+	btop = au_dbtop(dentry);
+	/* dentry is di-locked */
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	h_dir = d_inode(h_parent);
+	AuDebugOn(h_dir != au_h_iptr(dir, bdst));
+	IMustLock(h_dir);
+
+	err = au_lkup_neg(dentry, bdst, /*wh*/0);
+	if (unlikely(err < 0))
+		goto out;
+	h_path.dentry = au_h_dptr(dentry, bdst);
+	h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
+	err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
+			      S_IRWXU | S_IRUGO | S_IXUGO);
+	if (unlikely(err))
+		goto out_put;
+	au_fset_cpdown(*flags, MADE_DIR);
+
+	bopq = au_dbdiropq(dentry);
+	au_fclr_cpdown(*flags, WHED);
+	au_fclr_cpdown(*flags, DIROPQ);
+	if (au_dbwh(dentry) == bdst)
+		au_fset_cpdown(*flags, WHED);
+	if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
+		au_fset_cpdown(*flags, PARENT_OPQ);
+	h_inode = d_inode(h_path.dentry);
+	inode_lock_nested(h_inode, AuLsc_I_CHILD);
+	if (au_ftest_cpdown(*flags, WHED)) {
+		err = au_cpdown_dir_opq(dentry, bdst, flags);
+		if (unlikely(err)) {
+			inode_unlock(h_inode);
+			goto out_dir;
+		}
+	}
+
+	err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
+	inode_unlock(h_inode);
+	if (unlikely(err))
+		goto out_opq;
+
+	if (au_ftest_cpdown(*flags, WHED)) {
+		err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
+		if (unlikely(err))
+			goto out_opq;
+	}
+
+	inode = d_inode(dentry);
+	if (au_ibbot(inode) < bdst)
+		au_set_ibbot(inode, bdst);
+	au_set_h_iptr(inode, bdst, au_igrab(h_inode),
+		      au_hi_flags(inode, /*isdir*/1));
+	au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
+	goto out; /* success */
+
+	/* revert */
+out_opq:
+	if (au_ftest_cpdown(*flags, DIROPQ)) {
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		rerr = au_diropq_remove(dentry, bdst);
+		inode_unlock(h_inode);
+		if (unlikely(rerr)) {
+			AuIOErr("failed removing diropq for %pd b%d (%d)\n",
+				dentry, bdst, rerr);
+			err = -EIO;
+			goto out;
+		}
+	}
+out_dir:
+	if (au_ftest_cpdown(*flags, MADE_DIR)) {
+		rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
+		if (unlikely(rerr)) {
+			AuIOErr("failed removing %pd b%d (%d)\n",
+				dentry, bdst, rerr);
+			err = -EIO;
+		}
+	}
+out_put:
+	au_set_h_dptr(dentry, bdst, NULL);
+	if (au_dbbot(dentry) == bdst)
+		au_update_dbbot(dentry);
+out:
+	dput(parent);
+	return err;
+}
+
+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	int err;
+	unsigned int flags;
+
+	flags = 0;
+	err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies for create */
+
+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	int err, i, j, ndentry;
+	aufs_bindex_t bopq;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries, *parent, *d;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	parent = dget_parent(dentry);
+	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
+	if (unlikely(err))
+		goto out_free;
+
+	err = bindex;
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			d = dentries[j];
+			di_read_lock_parent2(d, !AuLock_IR);
+			bopq = au_dbdiropq(d);
+			di_read_unlock(d, !AuLock_IR);
+			if (bopq >= 0 && bopq < err)
+				err = bopq;
+		}
+	}
+
+out_free:
+	dput(parent);
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
+{
+	for (; bindex >= 0; bindex--)
+		if (!au_br_rdonly(au_sbr(sb, bindex)))
+			return bindex;
+	return -EROFS;
+}
+
+/* top down parent */
+static int au_wbr_create_tdp(struct dentry *dentry,
+			     unsigned int flags __maybe_unused)
+{
+	int err;
+	aufs_bindex_t btop, bindex;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	err = btop;
+	if (!au_br_rdonly(au_sbr(sb, btop)))
+		goto out;
+
+	err = -EROFS;
+	parent = dget_parent(dentry);
+	for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		if (!au_br_rdonly(au_sbr(sb, bindex))) {
+			err = bindex;
+			break;
+		}
+	}
+	dput(parent);
+
+	/* bottom up here */
+	if (unlikely(err < 0)) {
+		err = au_wbr_bu(sb, btop - 1);
+		if (err >= 0)
+			err = au_wbr_nonopq(dentry, err);
+	}
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* an exception for the policy other than tdp */
+static int au_wbr_create_exp(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bwh, bdiropq;
+	struct dentry *parent;
+
+	err = -1;
+	bwh = au_dbwh(dentry);
+	parent = dget_parent(dentry);
+	bdiropq = au_dbdiropq(parent);
+	if (bwh >= 0) {
+		if (bdiropq >= 0)
+			err = min(bdiropq, bwh);
+		else
+			err = bwh;
+		AuDbg("%d\n", err);
+	} else if (bdiropq >= 0) {
+		err = bdiropq;
+		AuDbg("%d\n", err);
+	}
+	dput(parent);
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+	if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
+		err = -1;
+
+	AuDbg("%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* round robin */
+static int au_wbr_create_init_rr(struct super_block *sb)
+{
+	int err;
+
+	err = au_wbr_bu(sb, au_sbbot(sb));
+	atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
+	/* smp_mb(); */
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
+{
+	int err, nbr;
+	unsigned int u;
+	aufs_bindex_t bindex, bbot;
+	struct super_block *sb;
+	atomic_t *next;
+
+	err = au_wbr_create_exp(dentry);
+	if (err >= 0)
+		goto out;
+
+	sb = dentry->d_sb;
+	next = &au_sbi(sb)->si_wbr_rr_next;
+	bbot = au_sbbot(sb);
+	nbr = bbot + 1;
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		if (!au_ftest_wbr(flags, DIR)) {
+			err = atomic_dec_return(next) + 1;
+			/* modulo for 0 is meaningless */
+			if (unlikely(!err))
+				err = atomic_dec_return(next) + 1;
+		} else
+			err = atomic_read(next);
+		AuDbg("%d\n", err);
+		u = err;
+		err = u % nbr;
+		AuDbg("%d\n", err);
+		if (!au_br_rdonly(au_sbr(sb, err)))
+			break;
+		err = -EROFS;
+	}
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out:
+	AuDbg("%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* most free space */
+static void au_mfs(struct dentry *dentry, struct dentry *parent)
+{
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_wbr_mfs *mfs;
+	struct dentry *h_parent;
+	aufs_bindex_t bindex, bbot;
+	int err;
+	unsigned long long b, bavail;
+	struct path h_path;
+	/* reduce the stack usage */
+	struct kstatfs *st;
+
+	st = kmalloc(sizeof(*st), GFP_NOFS);
+	if (unlikely(!st)) {
+		AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
+		return;
+	}
+
+	bavail = 0;
+	sb = dentry->d_sb;
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	MtxMustLock(&mfs->mfs_lock);
+	mfs->mfs_bindex = -EROFS;
+	mfs->mfsrr_bytes = 0;
+	if (!parent) {
+		bindex = 0;
+		bbot = au_sbbot(sb);
+	} else {
+		bindex = au_dbtop(parent);
+		bbot = au_dbtaildir(parent);
+	}
+
+	for (; bindex <= bbot; bindex++) {
+		if (parent) {
+			h_parent = au_h_dptr(parent, bindex);
+			if (!h_parent || d_is_negative(h_parent))
+				continue;
+		}
+		br = au_sbr(sb, bindex);
+		if (au_br_rdonly(br))
+			continue;
+
+		/* sb->s_root for NFS is unreliable */
+		h_path.mnt = au_br_mnt(br);
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, st);
+		if (unlikely(err)) {
+			AuWarn1("failed statfs, b%d, %d\n", bindex, err);
+			continue;
+		}
+
+		/* when the available size is equal, select the lower one */
+		BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
+			     || sizeof(b) < sizeof(st->f_bsize));
+		b = st->f_bavail * st->f_bsize;
+		br->br_wbr->wbr_bytes = b;
+		if (b >= bavail) {
+			bavail = b;
+			mfs->mfs_bindex = bindex;
+			mfs->mfs_jiffy = jiffies;
+		}
+	}
+
+	mfs->mfsrr_bytes = bavail;
+	AuDbg("b%d\n", mfs->mfs_bindex);
+	kfree(st);
+}
+
+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	struct dentry *parent;
+	struct super_block *sb;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_exp(dentry);
+	if (err >= 0)
+		goto out;
+
+	sb = dentry->d_sb;
+	parent = NULL;
+	if (au_ftest_wbr(flags, PARENT))
+		parent = dget_parent(dentry);
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
+	    || mfs->mfs_bindex < 0
+	    || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
+		au_mfs(dentry, parent);
+	mutex_unlock(&mfs->mfs_lock);
+	err = mfs->mfs_bindex;
+	dput(parent);
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_init_mfs(struct super_block *sb)
+{
+	struct au_wbr_mfs *mfs;
+
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_init(&mfs->mfs_lock);
+	mfs->mfs_jiffy = 0;
+	mfs->mfs_bindex = -EROFS;
+
+	return 0;
+}
+
+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
+{
+	mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* top down regardless parent, and then mfs */
+static int au_wbr_create_tdmfs(struct dentry *dentry,
+			       unsigned int flags __maybe_unused)
+{
+	int err;
+	aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
+	unsigned long long watermark;
+	struct super_block *sb;
+	struct au_wbr_mfs *mfs;
+	struct au_branch *br;
+	struct dentry *parent;
+
+	sb = dentry->d_sb;
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
+	    || mfs->mfs_bindex < 0)
+		au_mfs(dentry, /*parent*/NULL);
+	watermark = mfs->mfsrr_watermark;
+	bmfs = mfs->mfs_bindex;
+	mutex_unlock(&mfs->mfs_lock);
+
+	/* another style of au_wbr_create_exp() */
+	bwh = au_dbwh(dentry);
+	parent = dget_parent(dentry);
+	btail = au_dbtaildir(parent);
+	if (bwh >= 0 && bwh < btail)
+		btail = bwh;
+
+	err = au_wbr_nonopq(dentry, btail);
+	if (unlikely(err < 0))
+		goto out;
+	btail = err;
+	bfound = -1;
+	for (bindex = 0; bindex <= btail; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_rdonly(br))
+			continue;
+		if (br->br_wbr->wbr_bytes > watermark) {
+			bfound = bindex;
+			break;
+		}
+	}
+	err = bfound;
+	if (err < 0)
+		err = bmfs;
+
+out:
+	dput(parent);
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* most free space and then round robin */
+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_mfs(dentry, flags);
+	if (err >= 0) {
+		mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
+		mutex_lock(&mfs->mfs_lock);
+		if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
+			err = au_wbr_create_rr(dentry, flags);
+		mutex_unlock(&mfs->mfs_lock);
+	}
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_init_mfsrr(struct super_block *sb)
+{
+	int err;
+
+	au_wbr_create_init_mfs(sb); /* ignore */
+	err = au_wbr_create_init_rr(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* top down parent and most free space */
+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
+{
+	int err, e2;
+	unsigned long long b;
+	aufs_bindex_t bindex, btop, bbot;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent;
+	struct au_branch *br;
+
+	err = au_wbr_create_tdp(dentry, flags);
+	if (unlikely(err < 0))
+		goto out;
+	parent = dget_parent(dentry);
+	btop = au_dbtop(parent);
+	bbot = au_dbtaildir(parent);
+	if (btop == bbot)
+		goto out_parent; /* success */
+
+	e2 = au_wbr_create_mfs(dentry, flags);
+	if (e2 < 0)
+		goto out_parent; /* success */
+
+	/* when the available size is equal, select upper one */
+	sb = dentry->d_sb;
+	br = au_sbr(sb, err);
+	b = br->br_wbr->wbr_bytes;
+	AuDbg("b%d, %llu\n", err, b);
+
+	for (bindex = btop; bindex <= bbot; bindex++) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		br = au_sbr(sb, bindex);
+		if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
+			b = br->br_wbr->wbr_bytes;
+			err = bindex;
+			AuDbg("b%d, %llu\n", err, b);
+		}
+	}
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out_parent:
+	dput(parent);
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * - top down parent
+ * - most free space with parent
+ * - most free space round-robin regardless parent
+ */
+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	unsigned long long watermark;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
+	if (unlikely(err < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, err);
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	watermark = mfs->mfsrr_watermark;
+	mutex_unlock(&mfs->mfs_lock);
+	if (br->br_wbr->wbr_bytes < watermark)
+		/* regardless the parent dir */
+		err = au_wbr_create_mfsrr(dentry, flags);
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies for copyup */
+
+/* top down parent */
+static int au_wbr_copyup_tdp(struct dentry *dentry)
+{
+	return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
+}
+
+/* bottom up parent */
+static int au_wbr_copyup_bup(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bindex, btop;
+	struct dentry *parent, *h_parent;
+	struct super_block *sb;
+
+	err = -EROFS;
+	sb = dentry->d_sb;
+	parent = dget_parent(dentry);
+	btop = au_dbtop(parent);
+	for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		if (!au_br_rdonly(au_sbr(sb, bindex))) {
+			err = bindex;
+			break;
+		}
+	}
+	dput(parent);
+
+	/* bottom up here */
+	if (unlikely(err < 0))
+		err = au_wbr_bu(sb, btop - 1);
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* bottom up */
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
+{
+	int err;
+
+	err = au_wbr_bu(dentry->d_sb, btop);
+	AuDbg("b%d\n", err);
+	if (err > btop)
+		err = au_wbr_nonopq(dentry, err);
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_copyup_bu(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t btop;
+
+	btop = au_dbtop(dentry);
+	err = au_wbr_do_copyup_bu(dentry, btop);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
+	[AuWbrCopyup_TDP] = {
+		.copyup	= au_wbr_copyup_tdp
+	},
+	[AuWbrCopyup_BUP] = {
+		.copyup	= au_wbr_copyup_bup
+	},
+	[AuWbrCopyup_BU] = {
+		.copyup	= au_wbr_copyup_bu
+	}
+};
+
+struct au_wbr_create_operations au_wbr_create_ops[] = {
+	[AuWbrCreate_TDP] = {
+		.create	= au_wbr_create_tdp
+	},
+	[AuWbrCreate_RR] = {
+		.create	= au_wbr_create_rr,
+		.init	= au_wbr_create_init_rr
+	},
+	[AuWbrCreate_MFS] = {
+		.create	= au_wbr_create_mfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSV] = {
+		.create	= au_wbr_create_mfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSRR] = {
+		.create	= au_wbr_create_mfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSRRV] = {
+		.create	= au_wbr_create_mfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_TDMFS] = {
+		.create	= au_wbr_create_tdmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_TDMFSV] = {
+		.create	= au_wbr_create_tdmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFS] = {
+		.create	= au_wbr_create_pmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSV] = {
+		.create	= au_wbr_create_pmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSRR] = {
+		.create	= au_wbr_create_pmfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSRRV] = {
+		.create	= au_wbr_create_pmfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	}
+};
diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c
new file mode 100644
index 000000000..c5cf34e2b
--- /dev/null
+++ b/fs/aufs/whout.c
@@ -0,0 +1,1061 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * whiteout for logical deletion and opaque directory
+ */
+
+#include "aufs.h"
+
+#define WH_MASK			S_IRUGO
+
+/*
+ * If a directory contains this file, then it is opaque.  We start with the
+ * .wh. flag so that it is blocked by lookup.
+ */
+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
+					   sizeof(AUFS_WH_DIROPQ) - 1);
+
+/*
+ * generate whiteout name, which is NOT terminated by NULL.
+ * @name: original d_name.name
+ * @len: original d_name.len
+ * @wh: whiteout qstr
+ * returns zero when succeeds, otherwise error.
+ * succeeded value as wh->name should be freed by kfree().
+ */
+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
+{
+	char *p;
+
+	if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
+		return -ENAMETOOLONG;
+
+	wh->len = name->len + AUFS_WH_PFX_LEN;
+	p = kmalloc(wh->len, GFP_NOFS);
+	wh->name = p;
+	if (p) {
+		memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+		memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
+		/* smp_mb(); */
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * test if the @wh_name exists under @h_parent.
+ * @try_sio specifies the necessary of super-io.
+ */
+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
+{
+	int err;
+	struct dentry *wh_dentry;
+
+	if (!try_sio)
+		wh_dentry = vfsub_lkup_one(wh_name, h_parent);
+	else
+		wh_dentry = au_sio_lkup_one(wh_name, h_parent);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry)) {
+		if (err == -ENAMETOOLONG)
+			err = 0;
+		goto out;
+	}
+
+	err = 0;
+	if (d_is_negative(wh_dentry))
+		goto out_wh; /* success */
+
+	err = 1;
+	if (d_is_reg(wh_dentry))
+		goto out_wh; /* success */
+
+	err = -EIO;
+	AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
+		wh_dentry, d_inode(wh_dentry)->i_mode);
+
+out_wh:
+	dput(wh_dentry);
+out:
+	return err;
+}
+
+/*
+ * test if the @h_dentry sets opaque or not.
+ */
+int au_diropq_test(struct dentry *h_dentry)
+{
+	int err;
+	struct inode *h_dir;
+
+	h_dir = d_inode(h_dentry);
+	err = au_wh_test(h_dentry, &diropq_name,
+			 au_test_h_perm_sio(h_dir, MAY_EXEC));
+	return err;
+}
+
+/*
+ * returns a negative dentry whose name is unique and temporary.
+ */
+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
+			     struct qstr *prefix)
+{
+	struct dentry *dentry;
+	int i;
+	char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
+		*name, *p;
+	/* strict atomic_t is unnecessary here */
+	static unsigned short cnt;
+	struct qstr qs;
+
+	BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
+
+	name = defname;
+	qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
+	if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
+		dentry = ERR_PTR(-ENAMETOOLONG);
+		if (unlikely(qs.len > NAME_MAX))
+			goto out;
+		dentry = ERR_PTR(-ENOMEM);
+		name = kmalloc(qs.len + 1, GFP_NOFS);
+		if (unlikely(!name))
+			goto out;
+	}
+
+	/* doubly whiteout-ed */
+	memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
+	p = name + AUFS_WH_PFX_LEN * 2;
+	memcpy(p, prefix->name, prefix->len);
+	p += prefix->len;
+	*p++ = '.';
+	AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
+
+	qs.name = name;
+	for (i = 0; i < 3; i++) {
+		sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
+		dentry = au_sio_lkup_one(&qs, h_parent);
+		if (IS_ERR(dentry) || d_is_negative(dentry))
+			goto out_name;
+		dput(dentry);
+	}
+	/* pr_warn("could not get random name\n"); */
+	dentry = ERR_PTR(-EEXIST);
+	AuDbg("%.*s\n", AuLNPair(&qs));
+	BUG();
+
+out_name:
+	if (name != defname)
+		kfree(name);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/*
+ * rename the @h_dentry on @br to the whiteouted temporary name.
+ */
+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+	struct inode *h_dir, *delegated;
+	struct dentry *h_parent;
+
+	h_parent = h_dentry->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	/* under the same dir, no need to lock_rename() */
+	delegated = NULL;
+	err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
+			   /*flags*/0);
+	AuTraceErr(err);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	dput(h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * functions for removing a whiteout
+ */
+
+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
+{
+	int err, force;
+	struct inode *delegated;
+
+	/*
+	 * forces superio when the dir has a sticky bit.
+	 * this may be a violation of unix fs semantics.
+	 */
+	force = (h_dir->i_mode & S_ISVTX)
+		&& !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
+	delegated = NULL;
+	err = vfsub_unlink(h_dir, h_path, &delegated, force);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	return err;
+}
+
+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
+			struct dentry *dentry)
+{
+	int err;
+
+	err = do_unlink_wh(h_dir, h_path);
+	if (!err && dentry)
+		au_set_dbwh(dentry, -1);
+
+	return err;
+}
+
+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
+			  struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+
+	err = 0;
+	h_path.dentry = vfsub_lkup_one(wh, h_parent);
+	if (IS_ERR(h_path.dentry))
+		err = PTR_ERR(h_path.dentry);
+	else {
+		if (d_is_reg(h_path.dentry))
+			err = do_unlink_wh(d_inode(h_parent), &h_path);
+		dput(h_path.dentry);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * initialize/clean whiteout for a branch
+ */
+
+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
+			const int isdir)
+{
+	int err;
+	struct inode *delegated;
+
+	if (d_is_negative(whpath->dentry))
+		return;
+
+	if (isdir)
+		err = vfsub_rmdir(h_dir, whpath);
+	else {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	}
+	if (unlikely(err))
+		pr_warn("failed removing %pd (%d), ignored.\n",
+			whpath->dentry, err);
+}
+
+static int test_linkable(struct dentry *h_root)
+{
+	struct inode *h_dir = d_inode(h_root);
+
+	if (h_dir->i_op->link)
+		return 0;
+
+	pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
+	       h_root, au_sbtype(h_root->d_sb));
+	return -ENOSYS;
+}
+
+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
+static int au_whdir(struct inode *h_dir, struct path *path)
+{
+	int err;
+
+	err = -EEXIST;
+	if (d_is_negative(path->dentry)) {
+		int mode = S_IRWXU;
+
+		if (au_test_nfs(path->dentry->d_sb))
+			mode |= S_IXUGO;
+		err = vfsub_mkdir(h_dir, path, mode);
+	} else if (d_is_dir(path->dentry))
+		err = 0;
+	else
+		pr_err("unknown %pd exists\n", path->dentry);
+
+	return err;
+}
+
+struct au_wh_base {
+	const struct qstr *name;
+	struct dentry *dentry;
+};
+
+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
+			  struct path *h_path)
+{
+	h_path->dentry = base[AuBrWh_BASE].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/0);
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/1);
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/1);
+}
+
+/*
+ * returns tri-state,
+ * minus: error, caller should print the message
+ * zero: succuess
+ * plus: error, caller should NOT print the message
+ */
+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
+				int do_plink, struct au_wh_base base[],
+				struct path *h_path)
+{
+	int err;
+	struct inode *h_dir;
+
+	h_dir = d_inode(h_root);
+	h_path->dentry = base[AuBrWh_BASE].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/0);
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	if (do_plink) {
+		err = test_linkable(h_root);
+		if (unlikely(err)) {
+			err = 1;
+			goto out;
+		}
+
+		err = au_whdir(h_dir, h_path);
+		if (unlikely(err))
+			goto out;
+		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
+	} else
+		au_wh_clean(h_dir, h_path, /*isdir*/1);
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	err = au_whdir(h_dir, h_path);
+	if (unlikely(err))
+		goto out;
+	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
+
+out:
+	return err;
+}
+
+/*
+ * for the moment, aufs supports the branch filesystem which does not support
+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
+ * copyup failed. finally, such filesystem will not be used as the writable
+ * branch.
+ *
+ * returns tri-state, see above.
+ */
+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
+			 int do_plink, struct au_wh_base base[],
+			 struct path *h_path)
+{
+	int err;
+	struct inode *h_dir;
+
+	WbrWhMustWriteLock(wbr);
+
+	err = test_linkable(h_root);
+	if (unlikely(err)) {
+		err = 1;
+		goto out;
+	}
+
+	/*
+	 * todo: should this create be done in /sbin/mount.aufs helper?
+	 */
+	err = -EEXIST;
+	h_dir = d_inode(h_root);
+	if (d_is_negative(base[AuBrWh_BASE].dentry)) {
+		h_path->dentry = base[AuBrWh_BASE].dentry;
+		err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
+	} else if (d_is_reg(base[AuBrWh_BASE].dentry))
+		err = 0;
+	else
+		pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
+	if (unlikely(err))
+		goto out;
+
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	if (do_plink) {
+		err = au_whdir(h_dir, h_path);
+		if (unlikely(err))
+			goto out;
+		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
+	} else
+		au_wh_clean(h_dir, h_path, /*isdir*/1);
+	wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
+
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	err = au_whdir(h_dir, h_path);
+	if (unlikely(err))
+		goto out;
+	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
+
+out:
+	return err;
+}
+
+/*
+ * initialize the whiteout base file/dir for @br.
+ */
+int au_wh_init(struct au_branch *br, struct super_block *sb)
+{
+	int err, i;
+	const unsigned char do_plink
+		= !!au_opt_test(au_mntflags(sb), PLINK);
+	struct inode *h_dir;
+	struct path path = br->br_path;
+	struct dentry *h_root = path.dentry;
+	struct au_wbr *wbr = br->br_wbr;
+	static const struct qstr base_name[] = {
+		[AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
+					  sizeof(AUFS_BASE_NAME) - 1),
+		[AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
+					   sizeof(AUFS_PLINKDIR_NAME) - 1),
+		[AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
+					  sizeof(AUFS_ORPHDIR_NAME) - 1)
+	};
+	struct au_wh_base base[] = {
+		[AuBrWh_BASE] = {
+			.name	= base_name + AuBrWh_BASE,
+			.dentry	= NULL
+		},
+		[AuBrWh_PLINK] = {
+			.name	= base_name + AuBrWh_PLINK,
+			.dentry	= NULL
+		},
+		[AuBrWh_ORPH] = {
+			.name	= base_name + AuBrWh_ORPH,
+			.dentry	= NULL
+		}
+	};
+
+	if (wbr)
+		WbrWhMustWriteLock(wbr);
+
+	for (i = 0; i < AuBrWh_Last; i++) {
+		/* doubly whiteouted */
+		struct dentry *d;
+
+		d = au_wh_lkup(h_root, (void *)base[i].name, br);
+		err = PTR_ERR(d);
+		if (IS_ERR(d))
+			goto out;
+
+		base[i].dentry = d;
+		AuDebugOn(wbr
+			  && wbr->wbr_wh[i]
+			  && wbr->wbr_wh[i] != base[i].dentry);
+	}
+
+	if (wbr)
+		for (i = 0; i < AuBrWh_Last; i++) {
+			dput(wbr->wbr_wh[i]);
+			wbr->wbr_wh[i] = NULL;
+		}
+
+	err = 0;
+	if (!au_br_writable(br->br_perm)) {
+		h_dir = d_inode(h_root);
+		au_wh_init_ro(h_dir, base, &path);
+	} else if (!au_br_wh_linkable(br->br_perm)) {
+		err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
+		if (err > 0)
+			goto out;
+		else if (err)
+			goto out_err;
+	} else {
+		err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
+		if (err > 0)
+			goto out;
+		else if (err)
+			goto out_err;
+	}
+	goto out; /* success */
+
+out_err:
+	pr_err("an error(%d) on the writable branch %pd(%s)\n",
+	       err, h_root, au_sbtype(h_root->d_sb));
+out:
+	for (i = 0; i < AuBrWh_Last; i++)
+		dput(base[i].dentry);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * whiteouts are all hard-linked usually.
+ * when its link count reaches a ceiling, we create a new whiteout base
+ * asynchronously.
+ */
+
+struct reinit_br_wh {
+	struct super_block *sb;
+	struct au_branch *br;
+};
+
+static void reinit_br_wh(void *arg)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct path h_path;
+	struct reinit_br_wh *a = arg;
+	struct au_wbr *wbr;
+	struct inode *dir, *delegated;
+	struct dentry *h_root;
+	struct au_hinode *hdir;
+
+	err = 0;
+	wbr = a->br->br_wbr;
+	/* big aufs lock */
+	si_noflush_write_lock(a->sb);
+	if (!au_br_writable(a->br->br_perm))
+		goto out;
+	bindex = au_br_index(a->sb, a->br->br_id);
+	if (unlikely(bindex < 0))
+		goto out;
+
+	di_read_lock_parent(a->sb->s_root, AuLock_IR);
+	dir = d_inode(a->sb->s_root);
+	hdir = au_hi(dir, bindex);
+	h_root = au_h_dptr(a->sb->s_root, bindex);
+	AuDebugOn(h_root != au_br_dentry(a->br));
+
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	wbr_wh_write_lock(wbr);
+	err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
+			  h_root, a->br);
+	if (!err) {
+		h_path.dentry = wbr->wbr_whbase;
+		h_path.mnt = au_br_mnt(a->br);
+		delegated = NULL;
+		err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
+				   /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	} else {
+		pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
+		err = 0;
+	}
+	dput(wbr->wbr_whbase);
+	wbr->wbr_whbase = NULL;
+	if (!err)
+		err = au_wh_init(a->br, a->sb);
+	wbr_wh_write_unlock(wbr);
+	au_hn_inode_unlock(hdir);
+	di_read_unlock(a->sb->s_root, AuLock_IR);
+	if (!err)
+		au_fhsm_wrote(a->sb, bindex, /*force*/0);
+
+out:
+	if (wbr)
+		atomic_dec(&wbr->wbr_wh_running);
+	au_br_put(a->br);
+	si_write_unlock(a->sb);
+	au_nwt_done(&au_sbi(a->sb)->si_nowait);
+	kfree(arg);
+	if (unlikely(err))
+		AuIOErr("err %d\n", err);
+}
+
+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
+{
+	int do_dec, wkq_err;
+	struct reinit_br_wh *arg;
+
+	do_dec = 1;
+	if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
+		goto out;
+
+	/* ignore ENOMEM */
+	arg = kmalloc(sizeof(*arg), GFP_NOFS);
+	if (arg) {
+		/*
+		 * dec(wh_running), kfree(arg) and dec(br_count)
+		 * in reinit function
+		 */
+		arg->sb = sb;
+		arg->br = br;
+		au_br_get(br);
+		wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
+		if (unlikely(wkq_err)) {
+			atomic_dec(&br->br_wbr->wbr_wh_running);
+			au_br_put(br);
+			kfree(arg);
+		}
+		do_dec = 0;
+	}
+
+out:
+	if (do_dec)
+		atomic_dec(&br->br_wbr->wbr_wh_running);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create the whiteout @wh.
+ */
+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
+			     struct dentry *wh)
+{
+	int err;
+	struct path h_path = {
+		.dentry = wh
+	};
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *h_parent;
+	struct inode *h_dir, *delegated;
+
+	h_parent = wh->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	br = au_sbr(sb, bindex);
+	h_path.mnt = au_br_mnt(br);
+	wbr = br->br_wbr;
+	wbr_wh_read_lock(wbr);
+	if (wbr->wbr_whbase) {
+		delegated = NULL;
+		err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal link\n");
+			iput(delegated);
+		}
+		if (!err || err != -EMLINK)
+			goto out;
+
+		/* link count full. re-initialize br_whbase. */
+		kick_reinit_br_wh(sb, br);
+	}
+
+	/* return this error in this context */
+	err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
+	if (!err)
+		au_fhsm_wrote(sb, bindex, /*force*/0);
+
+out:
+	wbr_wh_read_unlock(wbr);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create or remove the diropq.
+ */
+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
+				unsigned int flags)
+{
+	struct dentry *opq_dentry, *h_dentry;
+	struct super_block *sb;
+	struct au_branch *br;
+	int err;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, bindex);
+	h_dentry = au_h_dptr(dentry, bindex);
+	opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
+	if (IS_ERR(opq_dentry))
+		goto out;
+
+	if (au_ftest_diropq(flags, CREATE)) {
+		err = link_or_create_wh(sb, bindex, opq_dentry);
+		if (!err) {
+			au_set_dbdiropq(dentry, bindex);
+			goto out; /* success */
+		}
+	} else {
+		struct path tmp = {
+			.dentry = opq_dentry,
+			.mnt	= au_br_mnt(br)
+		};
+		err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
+		if (!err)
+			au_set_dbdiropq(dentry, -1);
+	}
+	dput(opq_dentry);
+	opq_dentry = ERR_PTR(err);
+
+out:
+	return opq_dentry;
+}
+
+struct do_diropq_args {
+	struct dentry **errp;
+	struct dentry *dentry;
+	aufs_bindex_t bindex;
+	unsigned int flags;
+};
+
+static void call_do_diropq(void *args)
+{
+	struct do_diropq_args *a = args;
+	*a->errp = do_diropq(a->dentry, a->bindex, a->flags);
+}
+
+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
+			     unsigned int flags)
+{
+	struct dentry *diropq, *h_dentry;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
+		diropq = do_diropq(dentry, bindex, flags);
+	else {
+		int wkq_err;
+		struct do_diropq_args args = {
+			.errp		= &diropq,
+			.dentry		= dentry,
+			.bindex		= bindex,
+			.flags		= flags
+		};
+
+		wkq_err = au_wkq_wait(call_do_diropq, &args);
+		if (unlikely(wkq_err))
+			diropq = ERR_PTR(wkq_err);
+	}
+
+	return diropq;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * lookup whiteout dentry.
+ * @h_parent: lower parent dentry which must exist and be locked
+ * @base_name: name of dentry which will be whiteouted
+ * returns dentry for whiteout.
+ */
+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
+			  struct au_branch *br)
+{
+	int err;
+	struct qstr wh_name;
+	struct dentry *wh_dentry;
+
+	err = au_wh_name_alloc(&wh_name, base_name);
+	wh_dentry = ERR_PTR(err);
+	if (!err) {
+		wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
+		kfree(wh_name.name);
+	}
+	return wh_dentry;
+}
+
+/*
+ * link/create a whiteout for @dentry on @bindex.
+ */
+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
+			    struct dentry *h_parent)
+{
+	struct dentry *wh_dentry;
+	struct super_block *sb;
+	int err;
+
+	sb = dentry->d_sb;
+	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
+	if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
+		err = link_or_create_wh(sb, bindex, wh_dentry);
+		if (!err) {
+			au_set_dbwh(dentry, bindex);
+			au_fhsm_wrote(sb, bindex, /*force*/0);
+		} else {
+			dput(wh_dentry);
+			wh_dentry = ERR_PTR(err);
+		}
+	}
+
+	return wh_dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* Delete all whiteouts in this directory on branch bindex. */
+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
+			   aufs_bindex_t bindex, struct au_branch *br)
+{
+	int err;
+	unsigned long ul, n;
+	struct qstr wh_name;
+	char *p;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct au_vdir_destr *str;
+
+	err = -ENOMEM;
+	p = (void *)__get_free_page(GFP_NOFS);
+	wh_name.name = p;
+	if (unlikely(!wh_name.name))
+		goto out;
+
+	err = 0;
+	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+	p += AUFS_WH_PFX_LEN;
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (ul = 0; !err && ul < n; ul++, head++) {
+		hlist_for_each_entry(pos, head, wh_hash) {
+			if (pos->wh_bindex != bindex)
+				continue;
+
+			str = &pos->wh_str;
+			if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
+				memcpy(p, str->name, str->len);
+				wh_name.len = AUFS_WH_PFX_LEN + str->len;
+				err = unlink_wh_name(h_dentry, &wh_name, br);
+				if (!err)
+					continue;
+				break;
+			}
+			AuIOErr("whiteout name too long %.*s\n",
+				str->len, str->name);
+			err = -EIO;
+			break;
+		}
+	}
+	free_page((unsigned long)wh_name.name);
+
+out:
+	return err;
+}
+
+struct del_wh_children_args {
+	int *errp;
+	struct dentry *h_dentry;
+	struct au_nhash *whlist;
+	aufs_bindex_t bindex;
+	struct au_branch *br;
+};
+
+static void call_del_wh_children(void *args)
+{
+	struct del_wh_children_args *a = args;
+	*a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
+{
+	struct au_whtmp_rmdir *whtmp;
+	int err;
+	unsigned int rdhash;
+
+	SiMustAnyLock(sb);
+
+	whtmp = kzalloc(sizeof(*whtmp), gfp);
+	if (unlikely(!whtmp)) {
+		whtmp = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	/* no estimation for dir size */
+	rdhash = au_sbi(sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = AUFS_RDHASH_DEF;
+	err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
+	if (unlikely(err)) {
+		kfree(whtmp);
+		whtmp = ERR_PTR(err);
+	}
+
+out:
+	return whtmp;
+}
+
+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
+{
+	if (whtmp->br)
+		au_br_put(whtmp->br);
+	dput(whtmp->wh_dentry);
+	iput(whtmp->dir);
+	au_nhash_wh_free(&whtmp->whlist);
+	kfree(whtmp);
+}
+
+/*
+ * rmdir the whiteouted temporary named dir @h_dentry.
+ * @whlist: whiteouted children.
+ */
+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
+		   struct dentry *wh_dentry, struct au_nhash *whlist)
+{
+	int err;
+	unsigned int h_nlink;
+	struct path h_tmp;
+	struct inode *wh_inode, *h_dir;
+	struct au_branch *br;
+
+	h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
+	IMustLock(h_dir);
+
+	br = au_sbr(dir->i_sb, bindex);
+	wh_inode = d_inode(wh_dentry);
+	inode_lock_nested(wh_inode, AuLsc_I_CHILD);
+
+	/*
+	 * someone else might change some whiteouts while we were sleeping.
+	 * it means this whlist may have an obsoleted entry.
+	 */
+	if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
+		err = del_wh_children(wh_dentry, whlist, bindex, br);
+	else {
+		int wkq_err;
+		struct del_wh_children_args args = {
+			.errp		= &err,
+			.h_dentry	= wh_dentry,
+			.whlist		= whlist,
+			.bindex		= bindex,
+			.br		= br
+		};
+
+		wkq_err = au_wkq_wait(call_del_wh_children, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+	inode_unlock(wh_inode);
+
+	if (!err) {
+		h_tmp.dentry = wh_dentry;
+		h_tmp.mnt = au_br_mnt(br);
+		h_nlink = h_dir->i_nlink;
+		err = vfsub_rmdir(h_dir, &h_tmp);
+		/* some fs doesn't change the parent nlink in some cases */
+		h_nlink -= h_dir->i_nlink;
+	}
+
+	if (!err) {
+		if (au_ibtop(dir) == bindex) {
+			/* todo: dir->i_mutex is necessary */
+			au_cpup_attr_timesizes(dir);
+			if (h_nlink)
+				vfsub_drop_nlink(dir);
+		}
+		return 0; /* success */
+	}
+
+	pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
+	return err;
+}
+
+static void call_rmdir_whtmp(void *args)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct au_whtmp_rmdir *a = args;
+	struct super_block *sb;
+	struct dentry *h_parent;
+	struct inode *h_dir;
+	struct au_hinode *hdir;
+
+	/* rmdir by nfsd may cause deadlock with this i_mutex */
+	/* inode_lock(a->dir); */
+	err = -EROFS;
+	sb = a->dir->i_sb;
+	si_read_lock(sb, !AuLock_FLUSH);
+	if (!au_br_writable(a->br->br_perm))
+		goto out;
+	bindex = au_br_index(sb, a->br->br_id);
+	if (unlikely(bindex < 0))
+		goto out;
+
+	err = -EIO;
+	ii_write_lock_parent(a->dir);
+	h_parent = dget_parent(a->wh_dentry);
+	h_dir = d_inode(h_parent);
+	hdir = au_hi(a->dir, bindex);
+	err = vfsub_mnt_want_write(au_br_mnt(a->br));
+	if (unlikely(err))
+		goto out_mnt;
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
+			  a->br);
+	if (!err)
+		err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
+	au_hn_inode_unlock(hdir);
+	vfsub_mnt_drop_write(au_br_mnt(a->br));
+
+out_mnt:
+	dput(h_parent);
+	ii_write_unlock(a->dir);
+out:
+	/* inode_unlock(a->dir); */
+	au_whtmp_rmdir_free(a);
+	si_read_unlock(sb);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	if (unlikely(err))
+		AuIOErr("err %d\n", err);
+}
+
+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
+			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
+{
+	int wkq_err;
+	struct super_block *sb;
+
+	IMustLock(dir);
+
+	/* all post-process will be done in do_rmdir_whtmp(). */
+	sb = dir->i_sb;
+	args->dir = au_igrab(dir);
+	args->br = au_sbr(sb, bindex);
+	au_br_get(args->br);
+	args->wh_dentry = dget(wh_dentry);
+	wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
+	if (unlikely(wkq_err)) {
+		pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
+		au_whtmp_rmdir_free(args);
+	}
+}
diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h
new file mode 100644
index 000000000..766a1d930
--- /dev/null
+++ b/fs/aufs/whout.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * whiteout for logical deletion and opaque directory
+ */
+
+#ifndef __AUFS_WHOUT_H__
+#define __AUFS_WHOUT_H__
+
+#ifdef __KERNEL__
+
+#include "dir.h"
+
+/* whout.c */
+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
+int au_diropq_test(struct dentry *h_dentry);
+struct au_branch;
+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
+			     struct qstr *prefix);
+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
+			struct dentry *dentry);
+int au_wh_init(struct au_branch *br, struct super_block *sb);
+
+/* diropq flags */
+#define AuDiropq_CREATE	1
+#define au_ftest_diropq(flags, name)	((flags) & AuDiropq_##name)
+#define au_fset_diropq(flags, name) \
+	do { (flags) |= AuDiropq_##name; } while (0)
+#define au_fclr_diropq(flags, name) \
+	do { (flags) &= ~AuDiropq_##name; } while (0)
+
+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
+			     unsigned int flags);
+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
+			  struct au_branch *br);
+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
+			    struct dentry *h_parent);
+
+/* real rmdir for the whiteout-ed dir */
+struct au_whtmp_rmdir {
+	struct inode *dir;
+	struct au_branch *br;
+	struct dentry *wh_dentry;
+	struct au_nhash whlist;
+};
+
+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
+		   struct dentry *wh_dentry, struct au_nhash *whlist);
+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
+			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct dentry *au_diropq_create(struct dentry *dentry,
+					      aufs_bindex_t bindex)
+{
+	return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
+}
+
+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WHOUT_H__ */
diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c
new file mode 100644
index 000000000..a3316e7e5
--- /dev/null
+++ b/fs/aufs/wkq.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * workqueue for asynchronous/super-io operations
+ * todo: try new dredential scheme
+ */
+
+#include <linux/module.h>
+#include "aufs.h"
+
+/* internal workqueue named AUFS_WKQ_NAME */
+
+static struct workqueue_struct *au_wkq;
+
+struct au_wkinfo {
+	struct work_struct wk;
+	struct kobject *kobj;
+
+	unsigned int flags; /* see wkq.h */
+
+	au_wkq_func_t func;
+	void *args;
+
+#ifdef CONFIG_LOCKDEP
+	int dont_check;
+	struct held_lock **hlock;
+#endif
+
+	struct completion *comp;
+};
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Aufs passes some operations to the workqueue such as the internal copyup.
+ * This scheme looks rather unnatural for LOCKDEP debugging feature, since the
+ * job run by workqueue depends upon the locks acquired in the other task.
+ * Delegating a small operation to the workqueue, aufs passes its lockdep
+ * information too. And the job in the workqueue restores the info in order to
+ * pretend as if it acquired those locks. This is just to make LOCKDEP work
+ * correctly and expectedly.
+ */
+
+#ifndef CONFIG_LOCKDEP
+AuStubInt0(au_wkq_lockdep_alloc, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_free, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_pre, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_post, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_init, struct au_wkinfo *wkinfo);
+#else
+static void au_wkq_lockdep_init(struct au_wkinfo *wkinfo)
+{
+	wkinfo->hlock = NULL;
+	wkinfo->dont_check = 0;
+}
+
+/*
+ * 1: matched
+ * 0: unmatched
+ */
+static int au_wkq_lockdep_test(struct lock_class_key *key, const char *name)
+{
+	static DEFINE_SPINLOCK(spin);
+	static struct {
+		char *name;
+		struct lock_class_key *key;
+	} a[] = {
+		{ .name = "&sbinfo->si_rwsem" },
+		{ .name = "&finfo->fi_rwsem" },
+		{ .name = "&dinfo->di_rwsem" },
+		{ .name = "&iinfo->ii_rwsem" }
+	};
+	static int set;
+	int i;
+
+	/* lockless read from 'set.' see below */
+	if (set == ARRAY_SIZE(a)) {
+		for (i = 0; i < ARRAY_SIZE(a); i++)
+			if (a[i].key == key)
+				goto match;
+		goto unmatch;
+	}
+
+	spin_lock(&spin);
+	if (set)
+		for (i = 0; i < ARRAY_SIZE(a); i++)
+			if (a[i].key == key) {
+				spin_unlock(&spin);
+				goto match;
+			}
+	for (i = 0; i < ARRAY_SIZE(a); i++) {
+		if (a[i].key) {
+			if (unlikely(a[i].key == key)) { /* rare but possible */
+				spin_unlock(&spin);
+				goto match;
+			} else
+				continue;
+		}
+		if (strstr(a[i].name, name)) {
+			/*
+			 * the order of these three lines is important for the
+			 * lockless read above.
+			 */
+			a[i].key = key;
+			spin_unlock(&spin);
+			set++;
+			/* AuDbg("%d, %s\n", set, name); */
+			goto match;
+		}
+	}
+	spin_unlock(&spin);
+	goto unmatch;
+
+match:
+	return 1;
+unmatch:
+	return 0;
+}
+
+static int au_wkq_lockdep_alloc(struct au_wkinfo *wkinfo)
+{
+	int err, n;
+	struct task_struct *curr;
+	struct held_lock **hl, *held_locks, *p;
+
+	err = 0;
+	curr = current;
+	wkinfo->dont_check = lockdep_recursing(curr);
+	if (wkinfo->dont_check)
+		goto out;
+	n = curr->lockdep_depth;
+	if (!n)
+		goto out;
+
+	err = -ENOMEM;
+	wkinfo->hlock = kmalloc_array(n + 1, sizeof(*wkinfo->hlock), GFP_NOFS);
+	if (unlikely(!wkinfo->hlock))
+		goto out;
+
+	err = 0;
+#if 0
+	if (0 && au_debug_test()) /* left for debugging */
+		lockdep_print_held_locks(curr);
+#endif
+	held_locks = curr->held_locks;
+	hl = wkinfo->hlock;
+	while (n--) {
+		p = held_locks++;
+		if (au_wkq_lockdep_test(p->instance->key, p->instance->name))
+			*hl++ = p;
+	}
+	*hl = NULL;
+
+out:
+	return err;
+}
+
+static void au_wkq_lockdep_free(struct au_wkinfo *wkinfo)
+{
+	kfree(wkinfo->hlock);
+}
+
+static void au_wkq_lockdep_pre(struct au_wkinfo *wkinfo)
+{
+	struct held_lock *p, **hl = wkinfo->hlock;
+	int subclass;
+
+	if (wkinfo->dont_check)
+		lockdep_off();
+	if (!hl)
+		return;
+	while ((p = *hl++)) { /* assignment */
+		subclass = lockdep_hlock_class(p)->subclass;
+		/* AuDbg("%s, %d\n", p->instance->name, subclass); */
+		if (p->read)
+			rwsem_acquire_read(p->instance, subclass, 0,
+					   /*p->acquire_ip*/_RET_IP_);
+		else
+			rwsem_acquire(p->instance, subclass, 0,
+				      /*p->acquire_ip*/_RET_IP_);
+	}
+}
+
+static void au_wkq_lockdep_post(struct au_wkinfo *wkinfo)
+{
+	struct held_lock *p, **hl = wkinfo->hlock;
+
+	if (wkinfo->dont_check)
+		lockdep_on();
+	if (!hl)
+		return;
+	while ((p = *hl++)) /* assignment */
+		rwsem_release(p->instance, 0, /*p->acquire_ip*/_RET_IP_);
+}
+#endif
+
+static void wkq_func(struct work_struct *wk)
+{
+	struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
+
+	AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
+	AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
+
+	au_wkq_lockdep_pre(wkinfo);
+	wkinfo->func(wkinfo->args);
+	au_wkq_lockdep_post(wkinfo);
+	if (au_ftest_wkq(wkinfo->flags, WAIT))
+		complete(wkinfo->comp);
+	else {
+		kobject_put(wkinfo->kobj);
+		module_put(THIS_MODULE); /* todo: ?? */
+		kfree(wkinfo);
+	}
+}
+
+/*
+ * Since struct completion is large, try allocating it dynamically.
+ */
+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
+#define AuWkqCompDeclare(name)	struct completion *comp = NULL
+
+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
+{
+	*comp = kmalloc(sizeof(**comp), GFP_NOFS);
+	if (*comp) {
+		init_completion(*comp);
+		wkinfo->comp = *comp;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+static void au_wkq_comp_free(struct completion *comp)
+{
+	kfree(comp);
+}
+
+#else
+
+/* no braces */
+#define AuWkqCompDeclare(name) \
+	DECLARE_COMPLETION_ONSTACK(_ ## name); \
+	struct completion *comp = &_ ## name
+
+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
+{
+	wkinfo->comp = *comp;
+	return 0;
+}
+
+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
+{
+	/* empty */
+}
+#endif /* 4KSTACKS */
+
+static void au_wkq_run(struct au_wkinfo *wkinfo)
+{
+	if (au_ftest_wkq(wkinfo->flags, NEST)) {
+		if (au_wkq_test()) {
+			AuWarn1("wkq from wkq, unless silly-rename on NFS,"
+				" due to a dead dir by UDBA?\n");
+			AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
+		}
+	} else
+		au_dbg_verify_kthread();
+
+	if (au_ftest_wkq(wkinfo->flags, WAIT)) {
+		INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
+		queue_work(au_wkq, &wkinfo->wk);
+	} else {
+		INIT_WORK(&wkinfo->wk, wkq_func);
+		schedule_work(&wkinfo->wk);
+	}
+}
+
+/*
+ * Be careful. It is easy to make deadlock happen.
+ * processA: lock, wkq and wait
+ * processB: wkq and wait, lock in wkq
+ * --> deadlock
+ */
+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
+{
+	int err;
+	AuWkqCompDeclare(comp);
+	struct au_wkinfo wkinfo = {
+		.flags	= flags,
+		.func	= func,
+		.args	= args
+	};
+
+	err = au_wkq_comp_alloc(&wkinfo, &comp);
+	if (unlikely(err))
+		goto out;
+	err = au_wkq_lockdep_alloc(&wkinfo);
+	if (unlikely(err))
+		goto out_comp;
+	if (!err) {
+		au_wkq_run(&wkinfo);
+		/* no timeout, no interrupt */
+		wait_for_completion(wkinfo.comp);
+	}
+	au_wkq_lockdep_free(&wkinfo);
+
+out_comp:
+	au_wkq_comp_free(comp);
+out:
+	destroy_work_on_stack(&wkinfo.wk);
+	return err;
+}
+
+/*
+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
+ * problem in a concurrent umounting.
+ */
+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
+		  unsigned int flags)
+{
+	int err;
+	struct au_wkinfo *wkinfo;
+
+	atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
+
+	/*
+	 * wkq_func() must free this wkinfo.
+	 * it highly depends upon the implementation of workqueue.
+	 */
+	err = 0;
+	wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
+	if (wkinfo) {
+		wkinfo->kobj = &au_sbi(sb)->si_kobj;
+		wkinfo->flags = flags & ~AuWkq_WAIT;
+		wkinfo->func = func;
+		wkinfo->args = args;
+		wkinfo->comp = NULL;
+		au_wkq_lockdep_init(wkinfo);
+		kobject_get(wkinfo->kobj);
+		__module_get(THIS_MODULE); /* todo: ?? */
+
+		au_wkq_run(wkinfo);
+	} else {
+		err = -ENOMEM;
+		au_nwt_done(&au_sbi(sb)->si_nowait);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_nwt_init(struct au_nowait_tasks *nwt)
+{
+	atomic_set(&nwt->nw_len, 0);
+	/* smp_mb(); */ /* atomic_set */
+	init_waitqueue_head(&nwt->nw_wq);
+}
+
+void au_wkq_fin(void)
+{
+	destroy_workqueue(au_wkq);
+}
+
+int __init au_wkq_init(void)
+{
+	int err;
+
+	err = 0;
+	au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
+	if (IS_ERR(au_wkq))
+		err = PTR_ERR(au_wkq);
+	else if (!au_wkq)
+		err = -ENOMEM;
+
+	return err;
+}
diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h
new file mode 100644
index 000000000..9fc86c11a
--- /dev/null
+++ b/fs/aufs/wkq.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * workqueue for asynchronous/super-io operations
+ * todo: try new credentials management scheme
+ */
+
+#ifndef __AUFS_WKQ_H__
+#define __AUFS_WKQ_H__
+
+#ifdef __KERNEL__
+
+#include <linux/wait.h>
+
+struct super_block;
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
+ */
+struct au_nowait_tasks {
+	atomic_t		nw_len;
+	wait_queue_head_t	nw_wq;
+};
+
+/* ---------------------------------------------------------------------- */
+
+typedef void (*au_wkq_func_t)(void *args);
+
+/* wkq flags */
+#define AuWkq_WAIT	1
+#define AuWkq_NEST	(1 << 1)
+#define au_ftest_wkq(flags, name)	((flags) & AuWkq_##name)
+#define au_fset_wkq(flags, name) \
+	do { (flags) |= AuWkq_##name; } while (0)
+#define au_fclr_wkq(flags, name) \
+	do { (flags) &= ~AuWkq_##name; } while (0)
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuWkq_NEST
+#define AuWkq_NEST	0
+#endif
+
+/* wkq.c */
+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
+		  unsigned int flags);
+void au_nwt_init(struct au_nowait_tasks *nwt);
+int __init au_wkq_init(void);
+void au_wkq_fin(void);
+
+/* ---------------------------------------------------------------------- */
+
+static inline int au_wkq_test(void)
+{
+	return current->flags & PF_WQ_WORKER;
+}
+
+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
+{
+	return au_wkq_do_wait(AuWkq_WAIT, func, args);
+}
+
+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
+{
+	if (atomic_dec_and_test(&nwt->nw_len))
+		wake_up_all(&nwt->nw_wq);
+}
+
+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
+{
+	wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
+	return 0;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WKQ_H__ */
diff --git a/fs/aufs/xattr.c b/fs/aufs/xattr.c
new file mode 100644
index 000000000..792317c7f
--- /dev/null
+++ b/fs/aufs/xattr.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2014-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * handling xattr functions
+ */
+
+#include <linux/fs.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+#include "aufs.h"
+
+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
+{
+	if (!ignore_flags)
+		goto out;
+	switch (err) {
+	case -ENOMEM:
+	case -EDQUOT:
+		goto out;
+	}
+
+	if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
+		err = 0;
+		goto out;
+	}
+
+#define cmp(brattr, prefix) do {					\
+		if (!strncmp(name, XATTR_##prefix##_PREFIX,		\
+			     XATTR_##prefix##_PREFIX_LEN)) {		\
+			if (ignore_flags & AuBrAttr_ICEX_##brattr)	\
+				err = 0;				\
+			goto out;					\
+		}							\
+	} while (0)
+
+	cmp(SEC, SECURITY);
+	cmp(SYS, SYSTEM);
+	cmp(TR, TRUSTED);
+	cmp(USR, USER);
+#undef cmp
+
+	if (ignore_flags & AuBrAttr_ICEX_OTH)
+		err = 0;
+
+out:
+	return err;
+}
+
+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
+
+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
+			    char *name, char **buf, unsigned int ignore_flags,
+			    unsigned int verbose)
+{
+	int err;
+	ssize_t ssz;
+	struct inode *h_idst;
+
+	ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
+	err = ssz;
+	if (unlikely(err <= 0)) {
+		if (err == -ENODATA
+		    || (err == -EOPNOTSUPP
+			&& ((ignore_flags & au_xattr_out_of_list)
+			    || (au_test_nfs_noacl(d_inode(h_src))
+				&& (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
+				    || !strcmp(name,
+					       XATTR_NAME_POSIX_ACL_DEFAULT))))
+			    ))
+			err = 0;
+		if (err && (verbose || au_debug_test()))
+			pr_err("%s, err %d\n", name, err);
+		goto out;
+	}
+
+	/* unlock it temporary */
+	h_idst = d_inode(h_dst);
+	inode_unlock(h_idst);
+	err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
+	inode_lock_nested(h_idst, AuLsc_I_CHILD2);
+	if (unlikely(err)) {
+		if (verbose || au_debug_test())
+			pr_err("%s, err %d\n", name, err);
+		err = au_xattr_ignore(err, name, ignore_flags);
+	}
+
+out:
+	return err;
+}
+
+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
+		  unsigned int verbose)
+{
+	int err, unlocked, acl_access, acl_default;
+	ssize_t ssz;
+	struct inode *h_isrc, *h_idst;
+	char *value, *p, *o, *e;
+
+	/* try stopping to update the source inode while we are referencing */
+	/* there should not be the parent-child relationship between them */
+	h_isrc = d_inode(h_src);
+	h_idst = d_inode(h_dst);
+	inode_unlock(h_idst);
+	inode_lock_shared_nested(h_isrc, AuLsc_I_CHILD);
+	inode_lock_nested(h_idst, AuLsc_I_CHILD2);
+	unlocked = 0;
+
+	/* some filesystems don't list POSIX ACL, for example tmpfs */
+	ssz = vfs_listxattr(h_src, NULL, 0);
+	err = ssz;
+	if (unlikely(err < 0)) {
+		AuTraceErr(err);
+		if (err == -ENODATA
+		    || err == -EOPNOTSUPP)
+			err = 0;	/* ignore */
+		goto out;
+	}
+
+	err = 0;
+	p = NULL;
+	o = NULL;
+	if (ssz) {
+		err = -ENOMEM;
+		p = kmalloc(ssz, GFP_NOFS);
+		o = p;
+		if (unlikely(!p))
+			goto out;
+		err = vfs_listxattr(h_src, p, ssz);
+	}
+	inode_unlock_shared(h_isrc);
+	unlocked = 1;
+	AuDbg("err %d, ssz %zd\n", err, ssz);
+	if (unlikely(err < 0))
+		goto out_free;
+
+	err = 0;
+	e = p + ssz;
+	value = NULL;
+	acl_access = 0;
+	acl_default = 0;
+	while (!err && p < e) {
+		acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
+				       sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
+		acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
+					sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
+					- 1);
+		err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
+				       verbose);
+		p += strlen(p) + 1;
+	}
+	AuTraceErr(err);
+	ignore_flags |= au_xattr_out_of_list;
+	if (!err && !acl_access) {
+		err = au_do_cpup_xattr(h_dst, h_src,
+				       XATTR_NAME_POSIX_ACL_ACCESS, &value,
+				       ignore_flags, verbose);
+		AuTraceErr(err);
+	}
+	if (!err && !acl_default) {
+		err = au_do_cpup_xattr(h_dst, h_src,
+				       XATTR_NAME_POSIX_ACL_DEFAULT, &value,
+				       ignore_flags, verbose);
+		AuTraceErr(err);
+	}
+
+	kfree(value);
+
+out_free:
+	kfree(o);
+out:
+	if (!unlocked)
+		inode_unlock_shared(h_isrc);
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_smack_reentering(struct super_block *sb)
+{
+#if IS_ENABLED(CONFIG_SECURITY_SMACK)
+	/*
+	 * as a part of lookup, smack_d_instantiate() is called, and it calls
+	 * i_op->getxattr(). ouch.
+	 */
+	return si_pid_test(sb);
+#else
+	return 0;
+#endif
+}
+
+enum {
+	AU_XATTR_LIST,
+	AU_XATTR_GET
+};
+
+struct au_lgxattr {
+	int type;
+	union {
+		struct {
+			char	*list;
+			size_t	size;
+		} list;
+		struct {
+			const char	*name;
+			void		*value;
+			size_t		size;
+		} get;
+	} u;
+};
+
+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
+{
+	ssize_t err;
+	int reenter;
+	struct path h_path;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	reenter = au_smack_reentering(sb);
+	if (!reenter) {
+		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (unlikely(err))
+			goto out;
+	}
+	err = au_h_path_getattr(dentry, /*force*/1, &h_path, reenter);
+	if (unlikely(err))
+		goto out_si;
+	if (unlikely(!h_path.dentry))
+		/* illegally overlapped or something */
+		goto out_di; /* pretending success */
+
+	/* always topmost entry only */
+	switch (arg->type) {
+	case AU_XATTR_LIST:
+		err = vfs_listxattr(h_path.dentry,
+				    arg->u.list.list, arg->u.list.size);
+		break;
+	case AU_XATTR_GET:
+		AuDebugOn(d_is_negative(h_path.dentry));
+		err = vfs_getxattr(h_path.dentry,
+				   arg->u.get.name, arg->u.get.value,
+				   arg->u.get.size);
+		break;
+	}
+
+out_di:
+	if (!reenter)
+		di_read_unlock(dentry, AuLock_IR);
+out_si:
+	if (!reenter)
+		si_read_unlock(sb);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct au_lgxattr arg = {
+		.type = AU_XATTR_LIST,
+		.u.list = {
+			.list	= list,
+			.size	= size
+		},
+	};
+
+	return au_lgxattr(dentry, &arg);
+}
+
+static ssize_t au_getxattr(struct dentry *dentry,
+			   struct inode *inode __maybe_unused,
+			   const char *name, void *value, size_t size)
+{
+	struct au_lgxattr arg = {
+		.type = AU_XATTR_GET,
+		.u.get = {
+			.name	= name,
+			.value	= value,
+			.size	= size
+		},
+	};
+
+	return au_lgxattr(dentry, &arg);
+}
+
+static int au_setxattr(struct dentry *dentry, struct inode *inode,
+		       const char *name, const void *value, size_t size,
+		       int flags)
+{
+	struct au_sxattr arg = {
+		.type = AU_XATTR_SET,
+		.u.set = {
+			.name	= name,
+			.value	= value,
+			.size	= size,
+			.flags	= flags
+		},
+	};
+
+	return au_sxattr(dentry, inode, &arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
+{
+	return au_getxattr(dentry, inode, name, buffer, size);
+}
+
+static int au_xattr_set(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value, size_t size,
+			int flags)
+{
+	return au_setxattr(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler au_xattr_handler = {
+	.name	= "",
+	.prefix	= "",
+	.get	= au_xattr_get,
+	.set	= au_xattr_set
+};
+
+static const struct xattr_handler *au_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+#endif
+	&au_xattr_handler, /* must be last */
+	NULL
+};
+
+void au_xattr_init(struct super_block *sb)
+{
+	sb->s_xattr = au_xattr_handlers;
+}
diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c
new file mode 100644
index 000000000..29f53f9e5
--- /dev/null
+++ b/fs/aufs/xino.c
@@ -0,0 +1,1469 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * external inode number translation table and bitmap
+ */
+
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+			      size_t size, loff_t *pos);
+
+/* todo: unnecessary to support mmap_sem since kernel-space? */
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
+		   loff_t *pos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		char __user *u;
+	} buf;
+	int i;
+	const int prevent_endless = 10;
+
+	i = 0;
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	do {
+		err = func(file, buf.u, size, pos);
+		if (err == -EINTR
+		    && !au_wkq_test()
+		    && fatal_signal_pending(current)) {
+			set_fs(oldfs);
+			err = xino_fread_wkq(func, file, kbuf, size, pos);
+			BUG_ON(err == -EINTR);
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+		}
+	} while (i++ < prevent_endless
+		 && (err == -EAGAIN || err == -EINTR));
+	set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+	if (err > 0)
+		fsnotify_access(file->f_path.dentry);
+#endif
+
+	return err;
+}
+
+struct xino_fread_args {
+	ssize_t *errp;
+	vfs_readf_t func;
+	struct file *file;
+	void *buf;
+	size_t size;
+	loff_t *pos;
+};
+
+static void call_xino_fread(void *args)
+{
+	struct xino_fread_args *a = args;
+	*a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos)
+{
+	ssize_t err;
+	int wkq_err;
+	struct xino_fread_args args = {
+		.errp	= &err,
+		.func	= func,
+		.file	= file,
+		.buf	= buf,
+		.size	= size,
+		.pos	= pos
+	};
+
+	wkq_err = au_wkq_wait(call_xino_fread, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos);
+
+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
+			      size_t size, loff_t *pos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		const char __user *u;
+	} buf;
+	int i;
+	const int prevent_endless = 10;
+
+	i = 0;
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	do {
+		err = func(file, buf.u, size, pos);
+		if (err == -EINTR
+		    && !au_wkq_test()
+		    && fatal_signal_pending(current)) {
+			set_fs(oldfs);
+			err = xino_fwrite_wkq(func, file, kbuf, size, pos);
+			BUG_ON(err == -EINTR);
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+		}
+	} while (i++ < prevent_endless
+		 && (err == -EAGAIN || err == -EINTR));
+	set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+	if (err > 0)
+		fsnotify_modify(file->f_path.dentry);
+#endif
+
+	return err;
+}
+
+struct do_xino_fwrite_args {
+	ssize_t *errp;
+	vfs_writef_t func;
+	struct file *file;
+	void *buf;
+	size_t size;
+	loff_t *pos;
+};
+
+static void call_do_xino_fwrite(void *args)
+{
+	struct do_xino_fwrite_args *a = args;
+	*a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos)
+{
+	ssize_t err;
+	int wkq_err;
+	struct do_xino_fwrite_args args = {
+		.errp	= &err,
+		.func	= func,
+		.file	= file,
+		.buf	= buf,
+		.size	= size,
+		.pos	= pos
+	};
+
+	/*
+	 * it breaks RLIMIT_FSIZE and normal user's limit,
+	 * users should care about quota and real 'filesystem full.'
+	 */
+	wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+		    size_t size, loff_t *pos)
+{
+	ssize_t err;
+
+	if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
+		lockdep_off();
+		err = do_xino_fwrite(func, file, buf, size, pos);
+		lockdep_on();
+	} else {
+		lockdep_off();
+		err = xino_fwrite_wkq(func, file, buf, size, pos);
+		lockdep_on();
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create a new xinofile at the same place/path as @base_file.
+ */
+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
+{
+	struct file *file;
+	struct dentry *base, *parent;
+	struct inode *dir, *delegated;
+	struct qstr *name;
+	struct path path;
+	int err;
+
+	base = base_file->f_path.dentry;
+	parent = base->d_parent; /* dir inode is locked */
+	dir = d_inode(parent);
+	IMustLock(dir);
+
+	file = ERR_PTR(-EINVAL);
+	name = &base->d_name;
+	path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
+	if (IS_ERR(path.dentry)) {
+		file = (void *)path.dentry;
+		pr_err("%pd lookup err %ld\n",
+		       base, PTR_ERR(path.dentry));
+		goto out;
+	}
+
+	/* no need to mnt_want_write() since we call dentry_open() later */
+	err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
+	if (unlikely(err)) {
+		file = ERR_PTR(err);
+		pr_err("%pd create err %d\n", base, err);
+		goto out_dput;
+	}
+
+	path.mnt = base_file->f_path.mnt;
+	file = vfsub_dentry_open(&path,
+				 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+				 /* | __FMODE_NONOTIFY */);
+	if (IS_ERR(file)) {
+		pr_err("%pd open err %ld\n", base, PTR_ERR(file));
+		goto out_dput;
+	}
+
+	delegated = NULL;
+	err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err)) {
+		pr_err("%pd unlink err %d\n", base, err);
+		goto out_fput;
+	}
+
+	if (copy_src) {
+		/* no one can touch copy_src xino */
+		err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
+		if (unlikely(err)) {
+			pr_err("%pd copy err %d\n", base, err);
+			goto out_fput;
+		}
+	}
+	goto out_dput; /* success */
+
+out_fput:
+	fput(file);
+	file = ERR_PTR(err);
+out_dput:
+	dput(path.dentry);
+out:
+	return file;
+}
+
+struct au_xino_lock_dir {
+	struct au_hinode *hdir;
+	struct dentry *parent;
+	struct inode *dir;
+};
+
+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
+			     struct au_xino_lock_dir *ldir)
+{
+	aufs_bindex_t brid, bindex;
+
+	ldir->hdir = NULL;
+	bindex = -1;
+	brid = au_xino_brid(sb);
+	if (brid >= 0)
+		bindex = au_br_index(sb, brid);
+	if (bindex >= 0) {
+		ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
+		au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
+	} else {
+		ldir->parent = dget_parent(xino->f_path.dentry);
+		ldir->dir = d_inode(ldir->parent);
+		inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
+	}
+}
+
+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
+{
+	if (ldir->hdir)
+		au_hn_inode_unlock(ldir->hdir);
+	else {
+		inode_unlock(ldir->dir);
+		dput(ldir->parent);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* trucate xino files asynchronously */
+
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err;
+	unsigned long jiffy;
+	blkcnt_t blocks;
+	aufs_bindex_t bi, bbot;
+	struct kstatfs *st;
+	struct au_branch *br;
+	struct file *new_xino, *file;
+	struct super_block *h_sb;
+	struct au_xino_lock_dir ldir;
+
+	err = -ENOMEM;
+	st = kmalloc(sizeof(*st), GFP_NOFS);
+	if (unlikely(!st))
+		goto out;
+
+	err = -EINVAL;
+	bbot = au_sbbot(sb);
+	if (unlikely(bindex < 0 || bbot < bindex))
+		goto out_st;
+	br = au_sbr(sb, bindex);
+	file = br->br_xino.xi_file;
+	if (!file)
+		goto out_st;
+
+	err = vfs_statfs(&file->f_path, st);
+	if (unlikely(err))
+		AuErr1("statfs err %d, ignored\n", err);
+	jiffy = jiffies;
+	blocks = file_inode(file)->i_blocks;
+	pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
+		bindex, (u64)blocks, st->f_bfree, st->f_blocks);
+
+	au_xino_lock_dir(sb, file, &ldir);
+	/* mnt_want_write() is unnecessary here */
+	new_xino = au_xino_create2(file, file);
+	au_xino_unlock_dir(&ldir);
+	err = PTR_ERR(new_xino);
+	if (IS_ERR(new_xino)) {
+		pr_err("err %d, ignored\n", err);
+		goto out_st;
+	}
+	err = 0;
+	fput(file);
+	br->br_xino.xi_file = new_xino;
+
+	h_sb = au_br_sb(br);
+	for (bi = 0; bi <= bbot; bi++) {
+		if (unlikely(bi == bindex))
+			continue;
+		br = au_sbr(sb, bi);
+		if (au_br_sb(br) != h_sb)
+			continue;
+
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = new_xino;
+		get_file(new_xino);
+	}
+
+	err = vfs_statfs(&new_xino->f_path, st);
+	if (!err) {
+		pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
+			bindex, (u64)file_inode(new_xino)->i_blocks,
+			st->f_bfree, st->f_blocks);
+		if (file_inode(new_xino)->i_blocks < blocks)
+			au_sbi(sb)->si_xino_jiffy = jiffy;
+	} else
+		AuErr1("statfs err %d, ignored\n", err);
+
+out_st:
+	kfree(st);
+out:
+	return err;
+}
+
+struct xino_do_trunc_args {
+	struct super_block *sb;
+	struct au_branch *br;
+};
+
+static void xino_do_trunc(void *_args)
+{
+	struct xino_do_trunc_args *args = _args;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct inode *dir;
+	int err;
+	aufs_bindex_t bindex;
+
+	err = 0;
+	sb = args->sb;
+	dir = d_inode(sb->s_root);
+	br = args->br;
+
+	si_noflush_write_lock(sb);
+	ii_read_lock_parent(dir);
+	bindex = au_br_index(sb, br->br_id);
+	err = au_xino_trunc(sb, bindex);
+	ii_read_unlock(dir);
+	if (unlikely(err))
+		pr_warn("err b%d, (%d)\n", bindex, err);
+	atomic_dec(&br->br_xino_running);
+	au_br_put(br);
+	si_write_unlock(sb);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	kfree(args);
+}
+
+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
+{
+	int err;
+	struct kstatfs st;
+	struct au_sbinfo *sbinfo;
+
+	/* todo: si_xino_expire and the ratio should be customizable */
+	sbinfo = au_sbi(sb);
+	if (time_before(jiffies,
+			sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
+		return 0;
+
+	/* truncation border */
+	err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
+	if (unlikely(err)) {
+		AuErr1("statfs err %d, ignored\n", err);
+		return 0;
+	}
+	if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
+		return 0;
+
+	return 1;
+}
+
+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
+{
+	struct xino_do_trunc_args *args;
+	int wkq_err;
+
+	if (!xino_trunc_test(sb, br))
+		return;
+
+	if (atomic_inc_return(&br->br_xino_running) > 1)
+		goto out;
+
+	/* lock and kfree() will be called in trunc_xino() */
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (unlikely(!args)) {
+		AuErr1("no memory\n");
+		goto out;
+	}
+
+	au_br_get(br);
+	args->sb = sb;
+	args->br = br;
+	wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
+	if (!wkq_err)
+		return; /* success */
+
+	pr_err("wkq %d\n", wkq_err);
+	au_br_put(br);
+	kfree(args);
+
+out:
+	atomic_dec(&br->br_xino_running);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+			    ino_t h_ino, ino_t ino)
+{
+	loff_t pos;
+	ssize_t sz;
+
+	pos = h_ino;
+	if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
+		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
+		return -EFBIG;
+	}
+	pos *= sizeof(ino);
+	sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
+	if (sz == sizeof(ino))
+		return 0; /* success */
+
+	AuIOErr("write failed (%zd)\n", sz);
+	return -EIO;
+}
+
+/*
+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * even if @ino is zero, it is written to the xinofile and means no entry.
+ * if the size of the xino file on a specific filesystem exceeds the watermark,
+ * try truncating it.
+ */
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		  ino_t ino)
+{
+	int err;
+	unsigned int mnt_flags;
+	struct au_branch *br;
+
+	BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
+		     || ((loff_t)-1) > 0);
+	SiMustAnyLock(sb);
+
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, XINO))
+		return 0;
+
+	br = au_sbr(sb, bindex);
+	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
+			       h_ino, ino);
+	if (!err) {
+		if (au_opt_test(mnt_flags, TRUNC_XINO)
+		    && au_test_fs_trunc_xino(au_br_sb(br)))
+			xino_try_trunc(sb, br);
+		return 0; /* success */
+	}
+
+	AuIOErr("write failed (%d)\n", err);
+	return -EIO;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* aufs inode number bitmap */
+
+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
+static ino_t xib_calc_ino(unsigned long pindex, int bit)
+{
+	ino_t ino;
+
+	AuDebugOn(bit < 0 || page_bits <= bit);
+	ino = AUFS_FIRST_INO + pindex * page_bits + bit;
+	return ino;
+}
+
+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
+{
+	AuDebugOn(ino < AUFS_FIRST_INO);
+	ino -= AUFS_FIRST_INO;
+	*pindex = ino / page_bits;
+	*bit = ino % page_bits;
+}
+
+static int xib_pindex(struct super_block *sb, unsigned long pindex)
+{
+	int err;
+	loff_t pos;
+	ssize_t sz;
+	struct au_sbinfo *sbinfo;
+	struct file *xib;
+	unsigned long *p;
+
+	sbinfo = au_sbi(sb);
+	MtxMustLock(&sbinfo->si_xib_mtx);
+	AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
+		  || !au_opt_test(sbinfo->si_mntflags, XINO));
+
+	if (pindex == sbinfo->si_xib_last_pindex)
+		return 0;
+
+	xib = sbinfo->si_xib;
+	p = sbinfo->si_xib_buf;
+	pos = sbinfo->si_xib_last_pindex;
+	pos *= PAGE_SIZE;
+	sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+	if (unlikely(sz != PAGE_SIZE))
+		goto out;
+
+	pos = pindex;
+	pos *= PAGE_SIZE;
+	if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
+		sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
+	else {
+		memset(p, 0, PAGE_SIZE);
+		sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+	}
+	if (sz == PAGE_SIZE) {
+		sbinfo->si_xib_last_pindex = pindex;
+		return 0; /* success */
+	}
+
+out:
+	AuIOErr1("write failed (%zd)\n", sz);
+	err = sz;
+	if (sz >= 0)
+		err = -EIO;
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_xib_clear_bit(struct inode *inode)
+{
+	int err, bit;
+	unsigned long pindex;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	AuDebugOn(inode->i_nlink);
+
+	sb = inode->i_sb;
+	xib_calc_bit(inode->i_ino, &pindex, &bit);
+	AuDebugOn(page_bits <= bit);
+	sbinfo = au_sbi(sb);
+	mutex_lock(&sbinfo->si_xib_mtx);
+	err = xib_pindex(sb, pindex);
+	if (!err) {
+		clear_bit(bit, sbinfo->si_xib_buf);
+		sbinfo->si_xib_next_bit = bit;
+	}
+	mutex_unlock(&sbinfo->si_xib_mtx);
+}
+
+/* for s_op->delete_inode() */
+void au_xino_delete_inode(struct inode *inode, const int unlinked)
+{
+	int err;
+	unsigned int mnt_flags;
+	aufs_bindex_t bindex, bbot, bi;
+	unsigned char try_trunc;
+	struct au_iinfo *iinfo;
+	struct super_block *sb;
+	struct au_hinode *hi;
+	struct inode *h_inode;
+	struct au_branch *br;
+	vfs_writef_t xwrite;
+
+	AuDebugOn(au_is_bad_inode(inode));
+
+	sb = inode->i_sb;
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, XINO)
+	    || inode->i_ino == AUFS_ROOT_INO)
+		return;
+
+	if (unlinked) {
+		au_xigen_inc(inode);
+		au_xib_clear_bit(inode);
+	}
+
+	iinfo = au_ii(inode);
+	bindex = iinfo->ii_btop;
+	if (bindex < 0)
+		return;
+
+	xwrite = au_sbi(sb)->si_xwrite;
+	try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
+	hi = au_hinode(iinfo, bindex);
+	bbot = iinfo->ii_bbot;
+	for (; bindex <= bbot; bindex++, hi++) {
+		h_inode = hi->hi_inode;
+		if (!h_inode
+		    || (!unlinked && h_inode->i_nlink))
+			continue;
+
+		/* inode may not be revalidated */
+		bi = au_br_index(sb, hi->hi_id);
+		if (bi < 0)
+			continue;
+
+		br = au_sbr(sb, bi);
+		err = au_xino_do_write(xwrite, br->br_xino.xi_file,
+				       h_inode->i_ino, /*ino*/0);
+		if (!err && try_trunc
+		    && au_test_fs_trunc_xino(au_br_sb(br)))
+			xino_try_trunc(sb, br);
+	}
+}
+
+/* get an unused inode number from bitmap */
+ino_t au_xino_new_ino(struct super_block *sb)
+{
+	ino_t ino;
+	unsigned long *p, pindex, ul, pend;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+	int free_bit, err;
+
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		return iunique(sb, AUFS_FIRST_INO);
+
+	sbinfo = au_sbi(sb);
+	mutex_lock(&sbinfo->si_xib_mtx);
+	p = sbinfo->si_xib_buf;
+	free_bit = sbinfo->si_xib_next_bit;
+	if (free_bit < page_bits && !test_bit(free_bit, p))
+		goto out; /* success */
+	free_bit = find_first_zero_bit(p, page_bits);
+	if (free_bit < page_bits)
+		goto out; /* success */
+
+	pindex = sbinfo->si_xib_last_pindex;
+	for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
+		err = xib_pindex(sb, ul);
+		if (unlikely(err))
+			goto out_err;
+		free_bit = find_first_zero_bit(p, page_bits);
+		if (free_bit < page_bits)
+			goto out; /* success */
+	}
+
+	file = sbinfo->si_xib;
+	pend = vfsub_f_size_read(file) / PAGE_SIZE;
+	for (ul = pindex + 1; ul <= pend; ul++) {
+		err = xib_pindex(sb, ul);
+		if (unlikely(err))
+			goto out_err;
+		free_bit = find_first_zero_bit(p, page_bits);
+		if (free_bit < page_bits)
+			goto out; /* success */
+	}
+	BUG();
+
+out:
+	set_bit(free_bit, p);
+	sbinfo->si_xib_next_bit = free_bit + 1;
+	pindex = sbinfo->si_xib_last_pindex;
+	mutex_unlock(&sbinfo->si_xib_mtx);
+	ino = xib_calc_ino(pindex, free_bit);
+	AuDbg("i%lu\n", (unsigned long)ino);
+	return ino;
+out_err:
+	mutex_unlock(&sbinfo->si_xib_mtx);
+	AuDbg("i0\n");
+	return 0;
+}
+
+/*
+ * read @ino from xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * if @ino does not exist and @do_new is true, get new one.
+ */
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		 ino_t *ino)
+{
+	int err;
+	ssize_t sz;
+	loff_t pos;
+	struct file *file;
+	struct au_sbinfo *sbinfo;
+
+	*ino = 0;
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		return 0; /* no xino */
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	pos = h_ino;
+	if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
+		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
+		return -EFBIG;
+	}
+	pos *= sizeof(*ino);
+
+	file = au_sbr(sb, bindex)->br_xino.xi_file;
+	if (vfsub_f_size_read(file) < pos + sizeof(*ino))
+		return 0; /* no ino */
+
+	sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
+	if (sz == sizeof(*ino))
+		return 0; /* success */
+
+	err = sz;
+	if (unlikely(sz >= 0)) {
+		err = -EIO;
+		AuIOErr("xino read error (%zd)\n", sz);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* create and set a new xino file */
+
+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
+{
+	struct file *file;
+	struct dentry *h_parent, *d;
+	struct inode *h_dir, *inode;
+	int err;
+
+	/*
+	 * at mount-time, and the xino file is the default path,
+	 * hnotify is disabled so we have no notify events to ignore.
+	 * when a user specified the xino, we cannot get au_hdir to be ignored.
+	 */
+	file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+			       /* | __FMODE_NONOTIFY */,
+			       S_IRUGO | S_IWUGO);
+	if (IS_ERR(file)) {
+		if (!silent)
+			pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
+		return file;
+	}
+
+	/* keep file count */
+	err = 0;
+	inode = file_inode(file);
+	h_parent = dget_parent(file->f_path.dentry);
+	h_dir = d_inode(h_parent);
+	inode_lock_nested(h_dir, AuLsc_I_PARENT);
+	/* mnt_want_write() is unnecessary here */
+	/* no delegation since it is just created */
+	if (inode->i_nlink)
+		err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
+				   /*force*/0);
+	inode_unlock(h_dir);
+	dput(h_parent);
+	if (unlikely(err)) {
+		if (!silent)
+			pr_err("unlink %s(%d)\n", fname, err);
+		goto out;
+	}
+
+	err = -EINVAL;
+	d = file->f_path.dentry;
+	if (unlikely(sb == d->d_sb)) {
+		if (!silent)
+			pr_err("%s must be outside\n", fname);
+		goto out;
+	}
+	if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
+		if (!silent)
+			pr_err("xino doesn't support %s(%s)\n",
+			       fname, au_sbtype(d->d_sb));
+		goto out;
+	}
+	return file; /* success */
+
+out:
+	fput(file);
+	file = ERR_PTR(err);
+	return file;
+}
+
+/*
+ * find another branch who is on the same filesystem of the specified
+ * branch{@btgt}. search until @bbot.
+ */
+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
+			aufs_bindex_t bbot)
+{
+	aufs_bindex_t bindex;
+	struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
+
+	for (bindex = 0; bindex < btgt; bindex++)
+		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
+			return bindex;
+	for (bindex++; bindex <= bbot; bindex++)
+		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
+			return bindex;
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * initialize the xinofile for the specified branch @br
+ * at the place/path where @base_file indicates.
+ * test whether another branch is on the same filesystem or not,
+ * if @do_test is true.
+ */
+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
+	       struct file *base_file, int do_test)
+{
+	int err;
+	ino_t ino;
+	aufs_bindex_t bbot, bindex;
+	struct au_branch *shared_br, *b;
+	struct file *file;
+	struct super_block *tgt_sb;
+
+	shared_br = NULL;
+	bbot = au_sbbot(sb);
+	if (do_test) {
+		tgt_sb = au_br_sb(br);
+		for (bindex = 0; bindex <= bbot; bindex++) {
+			b = au_sbr(sb, bindex);
+			if (tgt_sb == au_br_sb(b)) {
+				shared_br = b;
+				break;
+			}
+		}
+	}
+
+	if (!shared_br || !shared_br->br_xino.xi_file) {
+		struct au_xino_lock_dir ldir;
+
+		au_xino_lock_dir(sb, base_file, &ldir);
+		/* mnt_want_write() is unnecessary here */
+		file = au_xino_create2(base_file, NULL);
+		au_xino_unlock_dir(&ldir);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto out;
+		br->br_xino.xi_file = file;
+	} else {
+		br->br_xino.xi_file = shared_br->br_xino.xi_file;
+		get_file(br->br_xino.xi_file);
+	}
+
+	ino = AUFS_ROOT_INO;
+	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
+			       h_ino, ino);
+	if (unlikely(err)) {
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = NULL;
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* trucate a xino bitmap file */
+
+/* todo: slow */
+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
+{
+	int err, bit;
+	ssize_t sz;
+	unsigned long pindex;
+	loff_t pos, pend;
+	struct au_sbinfo *sbinfo;
+	vfs_readf_t func;
+	ino_t *ino;
+	unsigned long *p;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	MtxMustLock(&sbinfo->si_xib_mtx);
+	p = sbinfo->si_xib_buf;
+	func = sbinfo->si_xread;
+	pend = vfsub_f_size_read(file);
+	pos = 0;
+	while (pos < pend) {
+		sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
+		err = sz;
+		if (unlikely(sz <= 0))
+			goto out;
+
+		err = 0;
+		for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
+			if (unlikely(*ino < AUFS_FIRST_INO))
+				continue;
+
+			xib_calc_bit(*ino, &pindex, &bit);
+			AuDebugOn(page_bits <= bit);
+			err = xib_pindex(sb, pindex);
+			if (!err)
+				set_bit(bit, p);
+			else
+				goto out;
+		}
+	}
+
+out:
+	return err;
+}
+
+static int xib_restore(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	void *page;
+
+	err = -ENOMEM;
+	page = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!page))
+		goto out;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++)
+		if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
+			err = do_xib_restore
+				(sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
+		else
+			AuDbg("b%d\n", bindex);
+	free_page((unsigned long)page);
+
+out:
+	return err;
+}
+
+int au_xib_trunc(struct super_block *sb)
+{
+	int err;
+	ssize_t sz;
+	loff_t pos;
+	struct au_xino_lock_dir ldir;
+	struct au_sbinfo *sbinfo;
+	unsigned long *p;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	if (!au_opt_test(sbinfo->si_mntflags, XINO))
+		goto out;
+
+	file = sbinfo->si_xib;
+	if (vfsub_f_size_read(file) <= PAGE_SIZE)
+		goto out;
+
+	au_xino_lock_dir(sb, file, &ldir);
+	/* mnt_want_write() is unnecessary here */
+	file = au_xino_create2(sbinfo->si_xib, NULL);
+	au_xino_unlock_dir(&ldir);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	fput(sbinfo->si_xib);
+	sbinfo->si_xib = file;
+
+	p = sbinfo->si_xib_buf;
+	memset(p, 0, PAGE_SIZE);
+	pos = 0;
+	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
+	if (unlikely(sz != PAGE_SIZE)) {
+		err = sz;
+		AuIOErr("err %d\n", err);
+		if (sz >= 0)
+			err = -EIO;
+		goto out;
+	}
+
+	mutex_lock(&sbinfo->si_xib_mtx);
+	/* mnt_want_write() is unnecessary here */
+	err = xib_restore(sb);
+	mutex_unlock(&sbinfo->si_xib_mtx);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * xino mount option handlers
+ */
+
+/* xino bitmap */
+static void xino_clear_xib(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	sbinfo->si_xread = NULL;
+	sbinfo->si_xwrite = NULL;
+	if (sbinfo->si_xib)
+		fput(sbinfo->si_xib);
+	sbinfo->si_xib = NULL;
+	if (sbinfo->si_xib_buf)
+		free_page((unsigned long)sbinfo->si_xib_buf);
+	sbinfo->si_xib_buf = NULL;
+}
+
+static int au_xino_set_xib(struct super_block *sb, struct file *base)
+{
+	int err;
+	loff_t pos;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	file = au_xino_create2(base, sbinfo->si_xib);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	if (sbinfo->si_xib)
+		fput(sbinfo->si_xib);
+	sbinfo->si_xib = file;
+	sbinfo->si_xread = vfs_readf(file);
+	sbinfo->si_xwrite = vfs_writef(file);
+
+	err = -ENOMEM;
+	if (!sbinfo->si_xib_buf)
+		sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
+	if (unlikely(!sbinfo->si_xib_buf))
+		goto out_unset;
+
+	sbinfo->si_xib_last_pindex = 0;
+	sbinfo->si_xib_next_bit = 0;
+	if (vfsub_f_size_read(file) < PAGE_SIZE) {
+		pos = 0;
+		err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
+				  PAGE_SIZE, &pos);
+		if (unlikely(err != PAGE_SIZE))
+			goto out_free;
+	}
+	err = 0;
+	goto out; /* success */
+
+out_free:
+	if (sbinfo->si_xib_buf)
+		free_page((unsigned long)sbinfo->si_xib_buf);
+	sbinfo->si_xib_buf = NULL;
+	if (err >= 0)
+		err = -EIO;
+out_unset:
+	fput(sbinfo->si_xib);
+	sbinfo->si_xib = NULL;
+	sbinfo->si_xread = NULL;
+	sbinfo->si_xwrite = NULL;
+out:
+	return err;
+}
+
+/* xino for each branch */
+static void xino_clear_br(struct super_block *sb)
+{
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!br || !br->br_xino.xi_file)
+			continue;
+
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = NULL;
+	}
+}
+
+static int au_xino_set_br(struct super_block *sb, struct file *base)
+{
+	int err;
+	ino_t ino;
+	aufs_bindex_t bindex, bbot, bshared;
+	struct {
+		struct file *old, *new;
+	} *fpair, *p;
+	struct au_branch *br;
+	struct inode *inode;
+	vfs_writef_t writef;
+
+	SiMustWriteLock(sb);
+
+	err = -ENOMEM;
+	bbot = au_sbbot(sb);
+	fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
+	if (unlikely(!fpair))
+		goto out;
+
+	inode = d_inode(sb->s_root);
+	ino = AUFS_ROOT_INO;
+	writef = au_sbi(sb)->si_xwrite;
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
+		bshared = is_sb_shared(sb, bindex, bindex - 1);
+		if (bshared >= 0) {
+			/* shared xino */
+			*p = fpair[bshared];
+			get_file(p->new);
+		}
+
+		if (!p->new) {
+			/* new xino */
+			br = au_sbr(sb, bindex);
+			p->old = br->br_xino.xi_file;
+			p->new = au_xino_create2(base, br->br_xino.xi_file);
+			err = PTR_ERR(p->new);
+			if (IS_ERR(p->new)) {
+				p->new = NULL;
+				goto out_pair;
+			}
+		}
+
+		err = au_xino_do_write(writef, p->new,
+				       au_h_iptr(inode, bindex)->i_ino, ino);
+		if (unlikely(err))
+			goto out_pair;
+	}
+
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
+		br = au_sbr(sb, bindex);
+		if (br->br_xino.xi_file)
+			fput(br->br_xino.xi_file);
+		get_file(p->new);
+		br->br_xino.xi_file = p->new;
+	}
+
+out_pair:
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
+		if (p->new)
+			fput(p->new);
+		else
+			break;
+	kfree(fpair);
+out:
+	return err;
+}
+
+void au_xino_clr(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	au_xigen_clr(sb);
+	xino_clear_xib(sb);
+	xino_clear_br(sb);
+	sbinfo = au_sbi(sb);
+	/* lvalue, do not call au_mntflags() */
+	au_opt_clr(sbinfo->si_mntflags, XINO);
+}
+
+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
+{
+	int err, skip;
+	struct dentry *parent, *cur_parent;
+	struct qstr *dname, *cur_name;
+	struct file *cur_xino;
+	struct inode *dir;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	parent = dget_parent(xino->file->f_path.dentry);
+	if (remount) {
+		skip = 0;
+		dname = &xino->file->f_path.dentry->d_name;
+		cur_xino = sbinfo->si_xib;
+		if (cur_xino) {
+			cur_parent = dget_parent(cur_xino->f_path.dentry);
+			cur_name = &cur_xino->f_path.dentry->d_name;
+			skip = (cur_parent == parent
+				&& au_qstreq(dname, cur_name));
+			dput(cur_parent);
+		}
+		if (skip)
+			goto out;
+	}
+
+	au_opt_set(sbinfo->si_mntflags, XINO);
+	dir = d_inode(parent);
+	inode_lock_nested(dir, AuLsc_I_PARENT);
+	/* mnt_want_write() is unnecessary here */
+	err = au_xino_set_xib(sb, xino->file);
+	if (!err)
+		err = au_xigen_set(sb, xino->file);
+	if (!err)
+		err = au_xino_set_br(sb, xino->file);
+	inode_unlock(dir);
+	if (!err)
+		goto out; /* success */
+
+	/* reset all */
+	AuIOErr("failed creating xino(%d).\n", err);
+	au_xigen_clr(sb);
+	xino_clear_xib(sb);
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create a xinofile at the default place/path.
+ */
+struct file *au_xino_def(struct super_block *sb)
+{
+	struct file *file;
+	char *page, *p;
+	struct au_branch *br;
+	struct super_block *h_sb;
+	struct path path;
+	aufs_bindex_t bbot, bindex, bwr;
+
+	br = NULL;
+	bbot = au_sbbot(sb);
+	bwr = -1;
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_writable(br->br_perm)
+		    && !au_test_fs_bad_xino(au_br_sb(br))) {
+			bwr = bindex;
+			break;
+		}
+	}
+
+	if (bwr >= 0) {
+		file = ERR_PTR(-ENOMEM);
+		page = (void *)__get_free_page(GFP_NOFS);
+		if (unlikely(!page))
+			goto out;
+		path.mnt = au_br_mnt(br);
+		path.dentry = au_h_dptr(sb->s_root, bwr);
+		p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
+		file = (void *)p;
+		if (!IS_ERR(p)) {
+			strcat(p, "/" AUFS_XINO_FNAME);
+			AuDbg("%s\n", p);
+			file = au_xino_create(sb, p, /*silent*/0);
+			if (!IS_ERR(file))
+				au_xino_brid_set(sb, br->br_id);
+		}
+		free_page((unsigned long)page);
+	} else {
+		file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
+		if (IS_ERR(file))
+			goto out;
+		h_sb = file->f_path.dentry->d_sb;
+		if (unlikely(au_test_fs_bad_xino(h_sb))) {
+			pr_err("xino doesn't support %s(%s)\n",
+			       AUFS_XINO_DEFPATH, au_sbtype(h_sb));
+			fput(file);
+			file = ERR_PTR(-EINVAL);
+		}
+		if (!IS_ERR(file))
+			au_xino_brid_set(sb, -1);
+	}
+
+out:
+	return file;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_xino_path(struct seq_file *seq, struct file *file)
+{
+	int err;
+
+	err = au_seq_path(seq, &file->f_path);
+	if (unlikely(err))
+		goto out;
+
+#define Deleted "\\040(deleted)"
+	seq->count -= sizeof(Deleted) - 1;
+	AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
+			 sizeof(Deleted) - 1));
+#undef Deleted
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
+		       ino_t h_ino, int idx)
+{
+	struct au_xino_file *xino;
+
+	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+	xino = &au_sbr(sb, bindex)->br_xino;
+	AuDebugOn(idx < 0 || xino->xi_nondir.total <= idx);
+
+	spin_lock(&xino->xi_nondir.spin);
+	AuDebugOn(xino->xi_nondir.array[idx] != h_ino);
+	xino->xi_nondir.array[idx] = 0;
+	spin_unlock(&xino->xi_nondir.spin);
+	wake_up_all(&xino->xi_nondir.wqh);
+}
+
+static int au_xinondir_find(struct au_xino_file *xino, ino_t h_ino)
+{
+	int found, total, i;
+
+	found = -1;
+	total = xino->xi_nondir.total;
+	for (i = 0; i < total; i++) {
+		if (xino->xi_nondir.array[i] != h_ino)
+			continue;
+		found = i;
+		break;
+	}
+
+	return found;
+}
+
+static int au_xinondir_expand(struct au_xino_file *xino)
+{
+	int err, sz;
+	ino_t *p;
+
+	BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
+
+	err = -ENOMEM;
+	sz = xino->xi_nondir.total * sizeof(ino_t);
+	if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
+		goto out;
+	p = au_kzrealloc(xino->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
+			 /*may_shrink*/0);
+	if (p) {
+		xino->xi_nondir.array = p;
+		xino->xi_nondir.total <<= 1;
+		AuDbg("xi_nondir.total %d\n", xino->xi_nondir.total);
+		err = 0;
+	}
+
+out:
+	return err;
+}
+
+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		      int *idx)
+{
+	int err, found, empty;
+	struct au_xino_file *xino;
+
+	err = 0;
+	*idx = -1;
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		goto out; /* no xino */
+
+	xino = &au_sbr(sb, bindex)->br_xino;
+
+again:
+	spin_lock(&xino->xi_nondir.spin);
+	found = au_xinondir_find(xino, h_ino);
+	if (found == -1) {
+		empty = au_xinondir_find(xino, /*h_ino*/0);
+		if (empty == -1) {
+			empty = xino->xi_nondir.total;
+			err = au_xinondir_expand(xino);
+			if (unlikely(err))
+				goto out_unlock;
+		}
+		xino->xi_nondir.array[empty] = h_ino;
+		*idx = empty;
+	} else {
+		spin_unlock(&xino->xi_nondir.spin);
+		wait_event(xino->xi_nondir.wqh,
+			   xino->xi_nondir.array[found] != h_ino);
+		goto again;
+	}
+
+out_unlock:
+	spin_unlock(&xino->xi_nondir.spin);
+out:
+	return err;
+}
diff --git a/fs/dcache.c b/fs/dcache.c
index 8945e6cab..b66fb04ec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1205,7 +1205,7 @@ enum d_walk_ret {
  *
  * The @enter() and @finish() callbacks are called with d_lock held.
  */
-static void d_walk(struct dentry *parent, void *data,
+void d_walk(struct dentry *parent, void *data,
 		   enum d_walk_ret (*enter)(void *, struct dentry *),
 		   void (*finish)(void *))
 {
@@ -1313,6 +1313,7 @@ rename_retry:
 	seq = 1;
 	goto again;
 }
+EXPORT_SYMBOL_GPL(d_walk);
 
 struct check_mount {
 	struct vfsmount *mnt;
@@ -2931,6 +2932,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
 
 	write_sequnlock(&rename_lock);
 }
+EXPORT_SYMBOL_GPL(d_exchange);
 
 /**
  * d_ancestor - search for an ancestor
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 823770171..4a613c146 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -9,6 +9,7 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include <linux/export.h>
 #include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
@@ -40,6 +41,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 	iput(toput_inode);
 }
 
+/* For TuxOnIce */
+void drop_pagecache(void)
+{
+	iterate_supers(drop_pagecache_sb, NULL);
+}
+
 int drop_caches_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
 {
diff --git a/fs/exec.c b/fs/exec.c
index 7eb8d21bc..182e48bca 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,6 +62,9 @@
 #include <linux/oom.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
+#include <linux/ksm.h>
+
+#include <trace/events/fs.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -109,6 +112,7 @@ bool path_noexec(const struct path *path)
 	return (path->mnt->mnt_flags & MNT_NOEXEC) ||
 	       (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
 }
+EXPORT_SYMBOL_GPL(path_noexec);
 
 #ifdef CONFIG_USELIB
 /*
@@ -861,8 +865,10 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
 	if (err)
 		goto exit;
 
-	if (name->name[0] != '\0')
+	if (name->name[0] != '\0') {
 		fsnotify_open(file);
+		trace_open_exec(name->name);
+	}
 
 out:
 	return file;
@@ -1374,6 +1380,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
 	current->self_exec_id++;
+
 	flush_signal_handlers(current, 0);
 }
 EXPORT_SYMBOL(setup_new_exec);
diff --git a/fs/exfat/Kconfig b/fs/exfat/Kconfig
new file mode 100644
index 000000000..78b32aa2c
--- /dev/null
+++ b/fs/exfat/Kconfig
@@ -0,0 +1,39 @@
+config EXFAT_FS
+	tristate "exFAT fs support"
+	select NLS
+	help
+	  This adds support for the exFAT file system.
+
+config EXFAT_DISCARD
+	bool "enable discard support"
+	depends on EXFAT_FS
+	default y
+
+config EXFAT_DELAYED_SYNC
+	bool "enable delayed sync"
+	depends on EXFAT_FS
+	default n
+
+config EXFAT_KERNEL_DEBUG
+	bool "enable kernel debug features via ioctl"
+	depends on EXFAT_FS
+	default n
+
+config EXFAT_DEBUG_MSG
+	bool "print debug messages"
+	depends on EXFAT_FS
+	default n
+
+config EXFAT_DEFAULT_CODEPAGE
+	int "Default codepage for exFAT"
+	default 437
+	depends on EXFAT_FS
+	help
+	  This option should be set to the codepage of your exFAT filesystems.
+
+config EXFAT_DEFAULT_IOCHARSET
+	string "Default iocharset for exFAT"
+	default "utf8"
+	depends on EXFAT_FS
+	help
+	  Set this to the default input/output character set you'd like exFAT to use.
diff --git a/fs/exfat/LICENSE b/fs/exfat/LICENSE
new file mode 100644
index 000000000..d159169d1
--- /dev/null
+++ b/fs/exfat/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile
new file mode 100644
index 000000000..711ed87f9
--- /dev/null
+++ b/fs/exfat/Makefile
@@ -0,0 +1,54 @@
+#
+# Makefile for Linux FAT12/FAT16/FAT32(VFAT)/FAT64(ExFAT) filesystem driver.
+#
+
+ifneq ($(KERNELRELEASE),)
+# call from kernel build system
+
+obj-$(CONFIG_EXFAT_FS) += exfat.o
+
+exfat-objs := exfat_core.o exfat_super.o exfat_api.o exfat_blkdev.o exfat_cache.o \
+			   exfat_data.o exfat_bitmap.o exfat_nls.o exfat_oal.o exfat_upcase.o
+
+else
+# external module build
+
+EXTRA_FLAGS += -I$(PWD)
+
+#
+# KDIR is a path to a directory containing kernel source.
+# It can be specified on the command line passed to make to enable the module to
+# be built and installed for a kernel other than the one currently running.
+# By default it is the path to the symbolic link created when
+# the current kernel's modules were installed, but
+# any valid path to the directory in which the target kernel's source is located
+# can be provided on the command line.
+#
+KDIR	?= /lib/modules/$(shell uname -r)/build
+MDIR	?= /lib/modules/$(shell uname -r)
+PWD	:= $(shell pwd)
+PWD	:= $(shell pwd)
+
+export CONFIG_EXFAT_FS := m
+
+all:
+	$(MAKE) -C $(KDIR) M=$(PWD) modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(PWD) clean
+
+help:
+	$(MAKE) -C $(KDIR) M=$(PWD) help
+
+install: exfat.ko
+	rm -f ${MDIR}/kernel/fs/exfat/exfat.ko
+	install -m644 -b -D exfat.ko ${MDIR}/kernel/fs/exfat/exfat.ko
+	depmod -aq
+
+uninstall:
+	rm -rf ${MDIR}/kernel/fs/exfat
+	depmod -aq
+
+endif
+
+.PHONY : all clean install uninstall
diff --git a/fs/exfat/README.md b/fs/exfat/README.md
new file mode 100644
index 000000000..feab40038
--- /dev/null
+++ b/fs/exfat/README.md
@@ -0,0 +1,98 @@
+exfat-nofuse
+============
+
+Linux non-fuse read/write kernel driver for the exFAT, FAT12, FAT16 and vfat (FAT32) file systems.<br />
+Originally ported from Android kernel v3.0.
+
+Kudos to ksv1986 for the mutex patch!<br />
+Thanks to JackNorris for being awesome and providing the clear_inode() patch.<br />
+<br />
+Big thanks to lqs for completing the driver!<br />
+Big thanks to benpicco for fixing 3.11.y compatibility!
+
+
+Special thanks to github user AndreiLux for spreading the word about the leak!<br />
+
+
+Installing as a stand-alone module:
+====================================
+
+    make
+    sudo make install
+
+To load the driver manually, run this as root:
+
+    modprobe exfat
+
+You may also specify custom toolchains by using CROSS_COMPILE flag, in my case:
+>CROSS_COMPILE=../dorimanx-SG2-I9100-Kernel/android-toolchain/bin/arm-eabi-
+
+Installing as a part of the kernel:
+======================================
+
+Let's take [linux] as the path to your kernel source dir...
+
+	cd [linux]
+	cp -rvf exfat-nofuse [linux]/fs/exfat
+
+edit [linux]/fs/Kconfig
+```
+ menu "DOS/FAT/NT Filesystems"
+
+  source "fs/fat/Kconfig"
+ +source "fs/exfat/Kconfig"
+  source "fs/ntfs/Kconfig"
+  endmenu
+```
+  
+
+edit [linux]/fs/Makefile
+```
+  obj-$(CONFIG_FAT_FS)    += fat/
+ +obj-$(CONFIG_EXFAT_FS)  += exfat/
+  obj-$(CONFIG_BFS_FS)    += bfs/
+```
+
+	cd [linux]
+	make menuconfig
+
+Go to:
+> File systems > DOS/FAT/NT
+>   check exfat as MODULE (M)
+>   (437) Default codepage for exFAT
+>   (utf8) Default iocharset for exFAT
+
+> ESC to main menu
+> Save an Alternate Configuration File
+> ESC ESC
+
+build your kernel
+
+Have fun.
+
+
+Installing as a DKMS module:
+=================================
+
+You can have even more fun with exfat-nofuse by installing it as a DKMS module has the main advantage of being auto-compiled (and thus, possibly surviving) between kernel upgrades.
+
+First, get dkms. On Ubuntu this should be:
+
+	sudo apt install dkms
+
+Then copy the root of this repository to /usr/share:
+
+	sudo cp -R . /usr/src/exfat-1.2.8 (or whatever version number declared on dkms.conf is)
+	sudo dkms add -m exfat -v 1.2.8
+
+Build and load the module:
+
+	sudo dkms build -m exfat -v 1.2.8
+	sudo dkms install -m exfat -v 1.2.8
+
+Now you have a proper dkms module that will work for a long time... hopefully.
+
+
+
+Free Software for the Free Minds!
+=================================
diff --git a/fs/exfat/dkms.conf b/fs/exfat/dkms.conf
new file mode 100644
index 000000000..d873c0aef
--- /dev/null
+++ b/fs/exfat/dkms.conf
@@ -0,0 +1,7 @@
+PACKAGE_NAME="exfat"
+PACKAGE_VERSION="1.2.8"
+MAKE="KDIR=/lib/modules/$kernelver/build MDIR=/lib/modules/$kernelver make"
+CLEAN="make clean"
+BUILT_MODULE_NAME[0]="exfat"
+AUTOINSTALL="yes"
+DEST_MODULE_LOCATION="/extra"
diff --git a/fs/exfat/exfat-km.mk b/fs/exfat/exfat-km.mk
new file mode 100644
index 000000000..4e3ef07b3
--- /dev/null
+++ b/fs/exfat/exfat-km.mk
@@ -0,0 +1,11 @@
+EXFAT_FOLDER ?= external/exfat-nofuse
+
+EXFAT_MODULE:
+	make clean -C $(EXFAT_FOLDER) KDIR=$(KERNEL_OUT)
+	make -j8 -C $(EXFAT_FOLDER) ARCH=arm KDIR=$(KERNEL_OUT) \
+		$(if $(ARM_CROSS_COMPILE),$(ARM_CROSS_COMPILE),$(KERNEL_CROSS_COMPILE))
+	mv $(EXFAT_FOLDER)/exfat.ko $(KERNEL_MODULES_OUT)
+	$(if $(ARM_EABI_TOOLCHAIN),$(ARM_EABI_TOOLCHAIN)/arm-eabi-strip, \
+		$(KERNEL_TOOLCHAIN_PATH)strip) --strip-unneeded $(KERNEL_MODULES_OUT)/exfat.ko
+
+TARGET_KERNEL_MODULES += EXFAT_MODULE
diff --git a/fs/exfat/exfat_api.c b/fs/exfat/exfat_api.c
new file mode 100644
index 000000000..32b29f0dd
--- /dev/null
+++ b/fs/exfat/exfat_api.c
@@ -0,0 +1,528 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_api.c                                               */
+/*  PURPOSE : exFAT API Glue Layer                                      */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "exfat_version.h"
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_super.h"
+#include "exfat_core.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions                                        */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Global Variable Definitions                                         */
+/*----------------------------------------------------------------------*/
+
+extern struct semaphore z_sem;
+
+/*----------------------------------------------------------------------*/
+/*  Local Variable Definitions                                          */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Local Function Declarations                                         */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/*  Global Function Definitions                                         */
+/*    - All functions for global use have same return value format,     */
+/*      that is, FFS_SUCCESS on success and several FS error code on    */
+/*      various error condition.                                        */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/*  exFAT Filesystem Init & Exit Functions                              */
+/*----------------------------------------------------------------------*/
+
+int FsInit(void)
+{
+	return ffsInit();
+}
+
+int FsShutdown(void)
+{
+	return ffsShutdown();
+}
+
+/*----------------------------------------------------------------------*/
+/*  Volume Management Functions                                         */
+/*----------------------------------------------------------------------*/
+
+/* FsMountVol : mount the file system volume */
+int FsMountVol(struct super_block *sb)
+{
+	int err;
+
+	sm_P(&z_sem);
+
+	err = buf_init(sb);
+	if (!err)
+		err = ffsMountVol(sb);
+	else
+		buf_shutdown(sb);
+
+	sm_V(&z_sem);
+
+	return err;
+} /* end of FsMountVol */
+
+/* FsUmountVol : unmount the file system volume */
+int FsUmountVol(struct super_block *sb)
+{
+	int err;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&z_sem);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsUmountVol(sb);
+	buf_shutdown(sb);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	sm_V(&z_sem);
+
+	return err;
+} /* end of FsUmountVol */
+
+/* FsGetVolInfo : get the information of a file system volume */
+int FsGetVolInfo(struct super_block *sb, VOL_INFO_T *info)
+{
+	int err;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if (info == NULL)
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsGetVolInfo(sb, info);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsGetVolInfo */
+
+/* FsSyncVol : synchronize a file system volume */
+int FsSyncVol(struct super_block *sb, int do_sync)
+{
+	int err;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsSyncVol(sb, do_sync);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsSyncVol */
+
+
+/*----------------------------------------------------------------------*/
+/*  File Operation Functions                                            */
+/*----------------------------------------------------------------------*/
+
+/* FsCreateFile : create a file */
+int FsLookupFile(struct inode *inode, char *path, FILE_ID_T *fid)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if ((fid == NULL) || (path == NULL) || (*path == '\0'))
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsLookupFile(inode, path, fid);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsLookupFile */
+
+/* FsCreateFile : create a file */
+int FsCreateFile(struct inode *inode, char *path, u8 mode, FILE_ID_T *fid)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if ((fid == NULL) || (path == NULL) || (*path == '\0'))
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsCreateFile(inode, path, mode, fid);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsCreateFile */
+
+int FsReadFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of the given file id */
+	if (fid == NULL)
+		return FFS_INVALIDFID;
+
+	/* check the validity of pointer parameters */
+	if (buffer == NULL)
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsReadFile(inode, fid, buffer, count, rcount);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsReadFile */
+
+int FsWriteFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of the given file id */
+	if (fid == NULL)
+		return FFS_INVALIDFID;
+
+	/* check the validity of pointer parameters */
+	if (buffer == NULL)
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsWriteFile(inode, fid, buffer, count, wcount);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsWriteFile */
+
+/* FsTruncateFile : resize the file length */
+int FsTruncateFile(struct inode *inode, u64 old_size, u64 new_size)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	DPRINTK("FsTruncateFile entered (inode %p size %llu)\n", inode, new_size);
+
+	err = ffsTruncateFile(inode, old_size, new_size);
+
+	DPRINTK("FsTruncateFile exitted (%d)\n", err);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsTruncateFile */
+
+/* FsMoveFile : move(rename) a old file into a new file */
+int FsMoveFile(struct inode *old_parent_inode, FILE_ID_T *fid, struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+	int err;
+	struct super_block *sb = old_parent_inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of the given file id */
+	if (fid == NULL)
+		return FFS_INVALIDFID;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsMoveFile(old_parent_inode, fid, new_parent_inode, new_dentry);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsMoveFile */
+
+/* FsRemoveFile : remove a file */
+int FsRemoveFile(struct inode *inode, FILE_ID_T *fid)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of the given file id */
+	if (fid == NULL)
+		return FFS_INVALIDFID;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsRemoveFile(inode, fid);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsRemoveFile */
+
+/* FsSetAttr : set the attribute of a given file */
+int FsSetAttr(struct inode *inode, u32 attr)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsSetAttr(inode, attr);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsSetAttr */
+
+/* FsReadStat : get the information of a given file */
+int FsReadStat(struct inode *inode, DIR_ENTRY_T *info)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsGetStat(inode, info);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsReadStat */
+
+/* FsWriteStat : set the information of a given file */
+int FsWriteStat(struct inode *inode, DIR_ENTRY_T *info)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	DPRINTK("FsWriteStat entered (inode %p info %p\n", inode, info);
+
+	err = ffsSetStat(inode, info);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	DPRINTK("FsWriteStat exited (%d)\n", err);
+
+	return err;
+} /* end of FsWriteStat */
+
+/* FsMapCluster : return the cluster number in the given cluster offset */
+int FsMapCluster(struct inode *inode, s32 clu_offset, u32 *clu)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if (clu == NULL)
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsMapCluster(inode, clu_offset, clu);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsMapCluster */
+
+/*----------------------------------------------------------------------*/
+/*  Directory Operation Functions                                       */
+/*----------------------------------------------------------------------*/
+
+/* FsCreateDir : create(make) a directory */
+int FsCreateDir(struct inode *inode, char *path, FILE_ID_T *fid)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if ((fid == NULL) || (path == NULL) || (*path == '\0'))
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsCreateDir(inode, path, fid);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsCreateDir */
+
+/* FsReadDir : read a directory entry from the opened directory */
+int FsReadDir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of pointer parameters */
+	if (dir_entry == NULL)
+		return FFS_ERROR;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsReadDir(inode, dir_entry);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsReadDir */
+
+/* FsRemoveDir : remove a directory */
+int FsRemoveDir(struct inode *inode, FILE_ID_T *fid)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of the given file id */
+	if (fid == NULL)
+		return FFS_INVALIDFID;
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	err = ffsRemoveDir(inode, fid);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return err;
+} /* end of FsRemoveDir */
+
+EXPORT_SYMBOL(FsMountVol);
+EXPORT_SYMBOL(FsUmountVol);
+EXPORT_SYMBOL(FsGetVolInfo);
+EXPORT_SYMBOL(FsSyncVol);
+EXPORT_SYMBOL(FsLookupFile);
+EXPORT_SYMBOL(FsCreateFile);
+EXPORT_SYMBOL(FsReadFile);
+EXPORT_SYMBOL(FsWriteFile);
+EXPORT_SYMBOL(FsTruncateFile);
+EXPORT_SYMBOL(FsMoveFile);
+EXPORT_SYMBOL(FsRemoveFile);
+EXPORT_SYMBOL(FsSetAttr);
+EXPORT_SYMBOL(FsReadStat);
+EXPORT_SYMBOL(FsWriteStat);
+EXPORT_SYMBOL(FsMapCluster);
+EXPORT_SYMBOL(FsCreateDir);
+EXPORT_SYMBOL(FsReadDir);
+EXPORT_SYMBOL(FsRemoveDir);
+
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+/* FsReleaseCache: Release FAT & buf cache */
+int FsReleaseCache(struct super_block *sb)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* acquire the lock for file system critical section */
+	sm_P(&p_fs->v_sem);
+
+	FAT_release_all(sb);
+	buf_release_all(sb);
+
+	/* release the lock for file system critical section */
+	sm_V(&p_fs->v_sem);
+
+	return 0;
+}
+/* FsReleaseCache */
+
+EXPORT_SYMBOL(FsReleaseCache);
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+/*======================================================================*/
+/*  Local Function Definitions                                          */
+/*======================================================================*/
diff --git a/fs/exfat/exfat_api.h b/fs/exfat/exfat_api.h
new file mode 100644
index 000000000..84bdf612a
--- /dev/null
+++ b/fs/exfat/exfat_api.h
@@ -0,0 +1,206 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_api.h                                               */
+/*  PURPOSE : Header File for exFAT API Glue Layer                      */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_API_H
+#define _EXFAT_API_H
+
+#include <linux/fs.h>
+#include "exfat_config.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions                                        */
+/*----------------------------------------------------------------------*/
+
+#define EXFAT_SUPER_MAGIC       (0x2011BAB0L)
+#define EXFAT_ROOT_INO          1
+
+/* FAT types */
+#define FAT12                   0x01    /* FAT12 */
+#define FAT16                   0x0E    /* Win95 FAT16 (LBA) */
+#define FAT32                   0x0C    /* Win95 FAT32 (LBA) */
+#define EXFAT                   0x07    /* exFAT */
+
+/* file name lengths */
+#define MAX_CHARSET_SIZE        3       /* max size of multi-byte character	*/
+#define MAX_PATH_DEPTH          15      /* max depth of path name */
+#define MAX_NAME_LENGTH         256     /* max len of file name including NULL */
+#define MAX_PATH_LENGTH         260     /* max len of path name including NULL */
+#define DOS_NAME_LENGTH         11      /* DOS file name length excluding NULL */
+#define DOS_PATH_LENGTH         80      /* DOS path name length excluding NULL */
+
+/* file attributes */
+#define ATTR_NORMAL             0x0000
+#define ATTR_READONLY           0x0001
+#define ATTR_HIDDEN             0x0002
+#define ATTR_SYSTEM             0x0004
+#define ATTR_VOLUME             0x0008
+#define ATTR_SUBDIR             0x0010
+#define ATTR_ARCHIVE            0x0020
+#define ATTR_SYMLINK            0x0040
+#define ATTR_EXTEND             0x000F
+#define ATTR_RWMASK             0x007E
+
+/* file creation modes */
+#define FM_REGULAR              0x00
+#define FM_SYMLINK              0x40
+
+/* return values */
+#define FFS_SUCCESS             0
+#define FFS_MEDIAERR            1
+#define FFS_FORMATERR           2
+#define FFS_MOUNTED             3
+#define FFS_NOTMOUNTED          4
+#define FFS_ALIGNMENTERR        5
+#define FFS_SEMAPHOREERR        6
+#define FFS_INVALIDPATH         7
+#define FFS_INVALIDFID          8
+#define FFS_NOTFOUND            9
+#define FFS_FILEEXIST           10
+#define FFS_PERMISSIONERR       11
+#define FFS_NOTOPENED           12
+#define FFS_MAXOPENED           13
+#define FFS_FULL                14
+#define FFS_EOF                 15
+#define FFS_DIRBUSY             16
+#define FFS_MEMORYERR           17
+#define FFS_NAMETOOLONG		18
+#define FFS_ERROR               19
+
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+typedef struct {
+	u16      Year;
+	u16      Month;
+	u16      Day;
+	u16      Hour;
+	u16      Minute;
+	u16      Second;
+	u16      MilliSecond;
+} DATE_TIME_T;
+
+typedef struct {
+	u32      Offset;    /* start sector number of the partition */
+	u32      Size;      /* in sectors */
+} PART_INFO_T;
+
+typedef struct {
+	u32      SecSize;    /* sector size in bytes */
+	u32      DevSize;    /* block device size in sectors */
+} DEV_INFO_T;
+
+typedef struct {
+	u32      FatType;
+	u32      ClusterSize;
+	u32      NumClusters;
+	u32      FreeClusters;
+	u32      UsedClusters;
+} VOL_INFO_T;
+
+/* directory structure */
+typedef struct {
+	u32      dir;
+	s32       size;
+	u8       flags;
+} CHAIN_T;
+
+/* file id structure */
+typedef struct {
+	CHAIN_T     dir;
+	s32       entry;
+	u32      type;
+	u32      attr;
+	u32      start_clu;
+	u64      size;
+	u8       flags;
+	s64       rwoffset;
+	s32       hint_last_off;
+	u32      hint_last_clu;
+} FILE_ID_T;
+
+typedef struct {
+	char        Name[MAX_NAME_LENGTH * MAX_CHARSET_SIZE];
+	char        ShortName[DOS_NAME_LENGTH + 2];     /* used only for FAT12/16/32, not used for exFAT */
+	u32      Attr;
+	u64      Size;
+	u32      NumSubdirs;
+	DATE_TIME_T CreateTimestamp;
+	DATE_TIME_T ModifyTimestamp;
+	DATE_TIME_T AccessTimestamp;
+} DIR_ENTRY_T;
+
+/*======================================================================*/
+/*                                                                      */
+/*                     API FUNCTION DECLARATIONS                        */
+/*                  (CHANGE THIS PART IF REQUIRED)                      */
+/*                                                                      */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+	int FsInit(void);
+	int FsShutdown(void);
+
+/* volume management functions */
+	int FsMountVol(struct super_block *sb);
+	int FsUmountVol(struct super_block *sb);
+	int FsGetVolInfo(struct super_block *sb, VOL_INFO_T *info);
+	int FsSyncVol(struct super_block *sb, int do_sync);
+
+/* file management functions */
+	int FsLookupFile(struct inode *inode, char *path, FILE_ID_T *fid);
+	int FsCreateFile(struct inode *inode, char *path, u8 mode, FILE_ID_T *fid);
+	int FsReadFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+	int FsWriteFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+	int FsTruncateFile(struct inode *inode, u64 old_size, u64 new_size);
+	int FsMoveFile(struct inode *old_parent_inode, FILE_ID_T *fid, struct inode *new_parent_inode, struct dentry *new_dentry);
+	int FsRemoveFile(struct inode *inode, FILE_ID_T *fid);
+	int FsSetAttr(struct inode *inode, u32 attr);
+	int FsReadStat(struct inode *inode, DIR_ENTRY_T *info);
+	int FsWriteStat(struct inode *inode, DIR_ENTRY_T *info);
+	int FsMapCluster(struct inode *inode, s32 clu_offset, u32 *clu);
+
+/* directory management functions */
+	int FsCreateDir(struct inode *inode, char *path, FILE_ID_T *fid);
+	int FsReadDir(struct inode *inode, DIR_ENTRY_T *dir_entry);
+	int FsRemoveDir(struct inode *inode, FILE_ID_T *fid);
+
+/* debug functions */
+s32 FsReleaseCache(struct super_block *sb);
+
+#endif /* _EXFAT_API_H */
diff --git a/fs/exfat/exfat_bitmap.c b/fs/exfat/exfat_bitmap.c
new file mode 100644
index 000000000..b0672dd07
--- /dev/null
+++ b/fs/exfat/exfat_bitmap.c
@@ -0,0 +1,63 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_global.c                                            */
+/*  PURPOSE : exFAT Miscellaneous Functions                             */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include "exfat_config.h"
+#include "exfat_bitmap.h"
+
+/*----------------------------------------------------------------------*/
+/*  Bitmap Manipulation Functions                                       */
+/*----------------------------------------------------------------------*/
+
+#define BITMAP_LOC(v)           ((v) >> 3)
+#define BITMAP_SHIFT(v)         ((v) & 0x07)
+
+s32 exfat_bitmap_test(u8 *bitmap, int i)
+{
+	u8 data;
+
+	data = bitmap[BITMAP_LOC(i)];
+	if ((data >> BITMAP_SHIFT(i)) & 0x01)
+		return 1;
+	return 0;
+} /* end of Bitmap_test */
+
+void exfat_bitmap_set(u8 *bitmap, int i)
+{
+	bitmap[BITMAP_LOC(i)] |= (0x01 << BITMAP_SHIFT(i));
+} /* end of Bitmap_set */
+
+void exfat_bitmap_clear(u8 *bitmap, int i)
+{
+	bitmap[BITMAP_LOC(i)] &= ~(0x01 << BITMAP_SHIFT(i));
+} /* end of Bitmap_clear */
diff --git a/fs/exfat/exfat_bitmap.h b/fs/exfat/exfat_bitmap.h
new file mode 100644
index 000000000..4f482c7b2
--- /dev/null
+++ b/fs/exfat/exfat_bitmap.h
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_global.h                                            */
+/*  PURPOSE : Header File for exFAT Global Definitions & Misc Functions */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_BITMAP_H
+#define _EXFAT_BITMAP_H
+
+#include <linux/types.h>
+
+/*======================================================================*/
+/*                                                                      */
+/*       LIBRARY FUNCTION DECLARATIONS -- OTHER UTILITY FUNCTIONS       */
+/*                    (DO NOT CHANGE THIS PART !!)                      */
+/*                                                                      */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/*  Bitmap Manipulation Functions                                       */
+/*----------------------------------------------------------------------*/
+
+s32	exfat_bitmap_test(u8 *bitmap, int i);
+void	exfat_bitmap_set(u8 *bitmap, int i);
+void	exfat_bitmap_clear(u8 *bitmpa, int i);
+
+#endif /* _EXFAT_BITMAP_H */
diff --git a/fs/exfat/exfat_blkdev.c b/fs/exfat/exfat_blkdev.c
new file mode 100644
index 000000000..eaccfd84e
--- /dev/null
+++ b/fs/exfat/exfat_blkdev.c
@@ -0,0 +1,197 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_blkdev.c                                            */
+/*  PURPOSE : exFAT Block Device Driver Glue Layer                      */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include <linux/blkdev.h>
+#include <linux/log2.h>
+#include "exfat_config.h"
+#include "exfat_blkdev.h"
+#include "exfat_data.h"
+#include "exfat_api.h"
+#include "exfat_super.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions                                        */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Global Variable Definitions                                         */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Local Variable Definitions                                          */
+/*----------------------------------------------------------------------*/
+
+/*======================================================================*/
+/*  Function Definitions                                                */
+/*======================================================================*/
+
+s32 bdev_init(void)
+{
+	return FFS_SUCCESS;
+}
+
+s32 bdev_shutdown(void)
+{
+	return FFS_SUCCESS;
+}
+
+s32 bdev_open(struct super_block *sb)
+{
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_bd->opened)
+		return FFS_SUCCESS;
+
+	p_bd->sector_size      = bdev_logical_block_size(sb->s_bdev);
+	p_bd->sector_size_bits = ilog2(p_bd->sector_size);
+	p_bd->sector_size_mask = p_bd->sector_size - 1;
+	p_bd->num_sectors      = i_size_read(sb->s_bdev->bd_inode) >> p_bd->sector_size_bits;
+
+	p_bd->opened = TRUE;
+
+	return FFS_SUCCESS;
+}
+
+s32 bdev_close(struct super_block *sb)
+{
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (!p_bd->opened)
+		return FFS_SUCCESS;
+
+	p_bd->opened = FALSE;
+	return FFS_SUCCESS;
+}
+
+s32 bdev_read(struct super_block *sb, sector_t secno, struct buffer_head **bh, u32 num_secs, s32 read)
+{
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	long flags = sbi->debug_flags;
+
+	if (flags & EXFAT_DEBUGFLAGS_ERROR_RW)
+		return FFS_MEDIAERR;
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+	if (!p_bd->opened)
+		return FFS_MEDIAERR;
+
+	if (*bh)
+		__brelse(*bh);
+
+	if (read)
+		*bh = __bread(sb->s_bdev, secno, num_secs << p_bd->sector_size_bits);
+	else
+		*bh = __getblk(sb->s_bdev, secno, num_secs << p_bd->sector_size_bits);
+
+	if (*bh)
+		return FFS_SUCCESS;
+
+	WARN(!p_fs->dev_ejected,
+		"[EXFAT] No bh, device seems wrong or to be ejected.\n");
+
+	return FFS_MEDIAERR;
+}
+
+s32 bdev_write(struct super_block *sb, sector_t secno, struct buffer_head *bh, u32 num_secs, s32 sync)
+{
+	s32 count;
+	struct buffer_head *bh2;
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	long flags = sbi->debug_flags;
+
+	if (flags & EXFAT_DEBUGFLAGS_ERROR_RW)
+		return FFS_MEDIAERR;
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+	if (!p_bd->opened)
+		return FFS_MEDIAERR;
+
+	if (secno == bh->b_blocknr) {
+		lock_buffer(bh);
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		if (sync && (sync_dirty_buffer(bh) != 0))
+			return FFS_MEDIAERR;
+	} else {
+		count = num_secs << p_bd->sector_size_bits;
+
+		bh2 = __getblk(sb->s_bdev, secno, count);
+
+		if (bh2 == NULL)
+			goto no_bh;
+
+		lock_buffer(bh2);
+		memcpy(bh2->b_data, bh->b_data, count);
+		set_buffer_uptodate(bh2);
+		mark_buffer_dirty(bh2);
+		unlock_buffer(bh2);
+		if (sync && (sync_dirty_buffer(bh2) != 0)) {
+			__brelse(bh2);
+			goto no_bh;
+		}
+		__brelse(bh2);
+	}
+
+	return FFS_SUCCESS;
+
+no_bh:
+	WARN(!p_fs->dev_ejected,
+		"[EXFAT] No bh, device seems wrong or to be ejected.\n");
+
+	return FFS_MEDIAERR;
+}
+
+s32 bdev_sync(struct super_block *sb)
+{
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	long flags = sbi->debug_flags;
+
+	if (flags & EXFAT_DEBUGFLAGS_ERROR_RW)
+		return FFS_MEDIAERR;
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+	if (!p_bd->opened)
+		return FFS_MEDIAERR;
+
+	return sync_blockdev(sb->s_bdev);
+}
diff --git a/fs/exfat/exfat_blkdev.h b/fs/exfat/exfat_blkdev.h
new file mode 100644
index 000000000..3363b591c
--- /dev/null
+++ b/fs/exfat/exfat_blkdev.h
@@ -0,0 +1,73 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_blkdev.h                                            */
+/*  PURPOSE : Header File for exFAT Block Device Driver Glue Layer      */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_BLKDEV_H
+#define _EXFAT_BLKDEV_H
+
+#include <linux/fs.h>
+#include "exfat_config.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions (Non-Configurable)                     */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+typedef struct __BD_INFO_T {
+	s32 sector_size;      /* in bytes */
+	s32 sector_size_bits;
+	s32 sector_size_mask;
+	s32 num_sectors;      /* total number of sectors in this block device */
+	bool  opened;           /* opened or not */
+} BD_INFO_T;
+
+/*----------------------------------------------------------------------*/
+/*  External Variable Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+s32 bdev_init(void);
+s32 bdev_shutdown(void);
+s32 bdev_open(struct super_block *sb);
+s32 bdev_close(struct super_block *sb);
+s32 bdev_read(struct super_block *sb, sector_t secno, struct buffer_head **bh, u32 num_secs, s32 read);
+s32 bdev_write(struct super_block *sb, sector_t secno, struct buffer_head *bh, u32 num_secs, s32 sync);
+s32 bdev_sync(struct super_block *sb);
+
+#endif /* _EXFAT_BLKDEV_H */
diff --git a/fs/exfat/exfat_cache.c b/fs/exfat/exfat_cache.c
new file mode 100644
index 000000000..4130102e3
--- /dev/null
+++ b/fs/exfat/exfat_cache.c
@@ -0,0 +1,784 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_cache.c                                             */
+/*  PURPOSE : exFAT Cache Manager                                       */
+/*            (FAT Cache & Buffer Cache)                                */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Sung-Kwan Kim] : first writing                        */
+/*                                                                      */
+/************************************************************************/
+
+#include "exfat_config.h"
+#include "exfat_data.h"
+
+#include "exfat_cache.h"
+#include "exfat_super.h"
+#include "exfat_core.h"
+
+/*----------------------------------------------------------------------*/
+/*  Global Variable Definitions                                         */
+/*----------------------------------------------------------------------*/
+
+#define sm_P(s)
+#define sm_V(s)
+
+static s32 __FAT_read(struct super_block *sb, u32 loc, u32 *content);
+static s32 __FAT_write(struct super_block *sb, u32 loc, u32 content);
+
+static BUF_CACHE_T *FAT_cache_find(struct super_block *sb, sector_t sec);
+static BUF_CACHE_T *FAT_cache_get(struct super_block *sb, sector_t sec);
+static void FAT_cache_insert_hash(struct super_block *sb, BUF_CACHE_T *bp);
+static void FAT_cache_remove_hash(BUF_CACHE_T *bp);
+
+static u8 *__buf_getblk(struct super_block *sb, sector_t sec);
+
+static BUF_CACHE_T *buf_cache_find(struct super_block *sb, sector_t sec);
+static BUF_CACHE_T *buf_cache_get(struct super_block *sb, sector_t sec);
+static void buf_cache_insert_hash(struct super_block *sb, BUF_CACHE_T *bp);
+static void buf_cache_remove_hash(BUF_CACHE_T *bp);
+
+static void push_to_mru(BUF_CACHE_T *bp, BUF_CACHE_T *list);
+static void push_to_lru(BUF_CACHE_T *bp, BUF_CACHE_T *list);
+static void move_to_mru(BUF_CACHE_T *bp, BUF_CACHE_T *list);
+static void move_to_lru(BUF_CACHE_T *bp, BUF_CACHE_T *list);
+
+/*======================================================================*/
+/*  Cache Initialization Functions                                      */
+/*======================================================================*/
+
+s32 buf_init(struct super_block *sb)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	int i;
+
+	/* LRU list */
+	p_fs->FAT_cache_lru_list.next = p_fs->FAT_cache_lru_list.prev = &p_fs->FAT_cache_lru_list;
+
+	for (i = 0; i < FAT_CACHE_SIZE; i++) {
+		p_fs->FAT_cache_array[i].drv = -1;
+		p_fs->FAT_cache_array[i].sec = ~0;
+		p_fs->FAT_cache_array[i].flag = 0;
+		p_fs->FAT_cache_array[i].buf_bh = NULL;
+		p_fs->FAT_cache_array[i].prev = p_fs->FAT_cache_array[i].next = NULL;
+		push_to_mru(&(p_fs->FAT_cache_array[i]), &p_fs->FAT_cache_lru_list);
+	}
+
+	p_fs->buf_cache_lru_list.next = p_fs->buf_cache_lru_list.prev = &p_fs->buf_cache_lru_list;
+
+	for (i = 0; i < BUF_CACHE_SIZE; i++) {
+		p_fs->buf_cache_array[i].drv = -1;
+		p_fs->buf_cache_array[i].sec = ~0;
+		p_fs->buf_cache_array[i].flag = 0;
+		p_fs->buf_cache_array[i].buf_bh = NULL;
+		p_fs->buf_cache_array[i].prev = p_fs->buf_cache_array[i].next = NULL;
+		push_to_mru(&(p_fs->buf_cache_array[i]), &p_fs->buf_cache_lru_list);
+	}
+
+	/* HASH list */
+	for (i = 0; i < FAT_CACHE_HASH_SIZE; i++) {
+		p_fs->FAT_cache_hash_list[i].drv = -1;
+		p_fs->FAT_cache_hash_list[i].sec = ~0;
+		p_fs->FAT_cache_hash_list[i].hash_next = p_fs->FAT_cache_hash_list[i].hash_prev = &(p_fs->FAT_cache_hash_list[i]);
+	}
+
+	for (i = 0; i < FAT_CACHE_SIZE; i++)
+		FAT_cache_insert_hash(sb, &(p_fs->FAT_cache_array[i]));
+
+	for (i = 0; i < BUF_CACHE_HASH_SIZE; i++) {
+		p_fs->buf_cache_hash_list[i].drv = -1;
+		p_fs->buf_cache_hash_list[i].sec = ~0;
+		p_fs->buf_cache_hash_list[i].hash_next = p_fs->buf_cache_hash_list[i].hash_prev = &(p_fs->buf_cache_hash_list[i]);
+	}
+
+	for (i = 0; i < BUF_CACHE_SIZE; i++)
+		buf_cache_insert_hash(sb, &(p_fs->buf_cache_array[i]));
+
+	return FFS_SUCCESS;
+} /* end of buf_init */
+
+s32 buf_shutdown(struct super_block *sb)
+{
+	return FFS_SUCCESS;
+} /* end of buf_shutdown */
+
+/*======================================================================*/
+/*  FAT Read/Write Functions                                            */
+/*======================================================================*/
+
+/* in : sb, loc
+  * out: content
+  * returns 0 on success
+  *            -1 on error
+  */
+s32 FAT_read(struct super_block *sb, u32 loc, u32 *content)
+{
+	s32 ret;
+
+	sm_P(&f_sem);
+
+	ret = __FAT_read(sb, loc, content);
+
+	sm_V(&f_sem);
+
+	return ret;
+} /* end of FAT_read */
+
+s32 FAT_write(struct super_block *sb, u32 loc, u32 content)
+{
+	s32 ret;
+
+	sm_P(&f_sem);
+
+	ret = __FAT_write(sb, loc, content);
+
+	sm_V(&f_sem);
+
+	return ret;
+} /* end of FAT_write */
+
+static s32 __FAT_read(struct super_block *sb, u32 loc, u32 *content)
+{
+	s32 off;
+	u32 _content;
+	sector_t sec;
+	u8 *fat_sector, *fat_entry;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_fs->vol_type == FAT12) {
+		sec = p_fs->FAT1_start_sector + ((loc + (loc >> 1)) >> p_bd->sector_size_bits);
+		off = (loc + (loc >> 1)) & p_bd->sector_size_mask;
+
+		if (off == (p_bd->sector_size-1)) {
+			fat_sector = FAT_getblk(sb, sec);
+			if (!fat_sector)
+				return -1;
+
+			_content  = (u32) fat_sector[off];
+
+			fat_sector = FAT_getblk(sb, ++sec);
+			if (!fat_sector)
+				return -1;
+
+			_content |= (u32) fat_sector[0] << 8;
+		} else {
+			fat_sector = FAT_getblk(sb, sec);
+			if (!fat_sector)
+				return -1;
+
+			fat_entry = &(fat_sector[off]);
+			_content = GET16(fat_entry);
+		}
+
+		if (loc & 1)
+			_content >>= 4;
+
+		_content &= 0x00000FFF;
+
+		if (_content >= CLUSTER_16(0x0FF8)) {
+			*content = CLUSTER_32(~0);
+			return 0;
+		} else {
+			*content = CLUSTER_32(_content);
+			return 0;
+		}
+	} else if (p_fs->vol_type == FAT16) {
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-1));
+		off = (loc << 1) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+
+		_content = GET16_A(fat_entry);
+
+		_content &= 0x0000FFFF;
+
+		if (_content >= CLUSTER_16(0xFFF8)) {
+			*content = CLUSTER_32(~0);
+			return 0;
+		} else {
+			*content = CLUSTER_32(_content);
+			return 0;
+		}
+	} else if (p_fs->vol_type == FAT32) {
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-2));
+		off = (loc << 2) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+
+		_content = GET32_A(fat_entry);
+
+		_content &= 0x0FFFFFFF;
+
+		if (_content >= CLUSTER_32(0x0FFFFFF8)) {
+			*content = CLUSTER_32(~0);
+			return 0;
+		} else {
+			*content = CLUSTER_32(_content);
+			return 0;
+		}
+	} else {
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-2));
+		off = (loc << 2) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+		_content = GET32_A(fat_entry);
+
+		if (_content >= CLUSTER_32(0xFFFFFFF8)) {
+			*content = CLUSTER_32(~0);
+			return 0;
+		} else {
+			*content = CLUSTER_32(_content);
+			return 0;
+		}
+	}
+
+	*content = CLUSTER_32(~0);
+	return 0;
+} /* end of __FAT_read */
+
+static s32 __FAT_write(struct super_block *sb, u32 loc, u32 content)
+{
+	s32 off;
+	sector_t sec;
+	u8 *fat_sector, *fat_entry;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_fs->vol_type == FAT12) {
+
+		content &= 0x00000FFF;
+
+		sec = p_fs->FAT1_start_sector + ((loc + (loc >> 1)) >> p_bd->sector_size_bits);
+		off = (loc + (loc >> 1)) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		if (loc & 1) { /* odd */
+
+			content <<= 4;
+
+			if (off == (p_bd->sector_size-1)) {
+				fat_sector[off] = (u8)(content | (fat_sector[off] & 0x0F));
+				FAT_modify(sb, sec);
+
+				fat_sector = FAT_getblk(sb, ++sec);
+				if (!fat_sector)
+					return -1;
+
+				fat_sector[0] = (u8)(content >> 8);
+			} else {
+				fat_entry = &(fat_sector[off]);
+				content |= GET16(fat_entry) & 0x000F;
+
+				SET16(fat_entry, content);
+			}
+		} else { /* even */
+			fat_sector[off] = (u8)(content);
+
+			if (off == (p_bd->sector_size-1)) {
+				fat_sector[off] = (u8)(content);
+				FAT_modify(sb, sec);
+
+				fat_sector = FAT_getblk(sb, ++sec);
+				fat_sector[0] = (u8)((fat_sector[0] & 0xF0) | (content >> 8));
+			} else {
+				fat_entry = &(fat_sector[off]);
+				content |= GET16(fat_entry) & 0xF000;
+
+				SET16(fat_entry, content);
+			}
+		}
+	}
+
+	else if (p_fs->vol_type == FAT16) {
+
+		content &= 0x0000FFFF;
+
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-1));
+		off = (loc << 1) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+
+		SET16_A(fat_entry, content);
+	}
+
+	else if (p_fs->vol_type == FAT32) {
+
+		content &= 0x0FFFFFFF;
+
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-2));
+		off = (loc << 2) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+
+		content |= GET32_A(fat_entry) & 0xF0000000;
+
+		SET32_A(fat_entry, content);
+	}
+
+	else { /* p_fs->vol_type == EXFAT */
+
+		sec = p_fs->FAT1_start_sector + (loc >> (p_bd->sector_size_bits-2));
+		off = (loc << 2) & p_bd->sector_size_mask;
+
+		fat_sector = FAT_getblk(sb, sec);
+		if (!fat_sector)
+			return -1;
+
+		fat_entry = &(fat_sector[off]);
+
+		SET32_A(fat_entry, content);
+	}
+
+	FAT_modify(sb, sec);
+	return 0;
+} /* end of __FAT_write */
+
+u8 *FAT_getblk(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	bp = FAT_cache_find(sb, sec);
+	if (bp != NULL) {
+		move_to_mru(bp, &p_fs->FAT_cache_lru_list);
+		return bp->buf_bh->b_data;
+	}
+
+	bp = FAT_cache_get(sb, sec);
+
+	FAT_cache_remove_hash(bp);
+
+	bp->drv = p_fs->drv;
+	bp->sec = sec;
+	bp->flag = 0;
+
+	FAT_cache_insert_hash(sb, bp);
+
+	if (sector_read(sb, sec, &(bp->buf_bh), 1) != FFS_SUCCESS) {
+		FAT_cache_remove_hash(bp);
+		bp->drv = -1;
+		bp->sec = ~0;
+		bp->flag = 0;
+		bp->buf_bh = NULL;
+
+		move_to_lru(bp, &p_fs->FAT_cache_lru_list);
+		return NULL;
+	}
+
+	return bp->buf_bh->b_data;
+} /* end of FAT_getblk */
+
+void FAT_modify(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+
+	bp = FAT_cache_find(sb, sec);
+	if (bp != NULL)
+		sector_write(sb, sec, bp->buf_bh, 0);
+} /* end of FAT_modify */
+
+void FAT_release_all(struct super_block *sb)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&f_sem);
+
+	bp = p_fs->FAT_cache_lru_list.next;
+	while (bp != &p_fs->FAT_cache_lru_list) {
+		if (bp->drv == p_fs->drv) {
+			bp->drv = -1;
+			bp->sec = ~0;
+			bp->flag = 0;
+
+			if (bp->buf_bh) {
+				__brelse(bp->buf_bh);
+				bp->buf_bh = NULL;
+			}
+		}
+		bp = bp->next;
+	}
+
+	sm_V(&f_sem);
+} /* end of FAT_release_all */
+
+void FAT_sync(struct super_block *sb)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&f_sem);
+
+	bp = p_fs->FAT_cache_lru_list.next;
+	while (bp != &p_fs->FAT_cache_lru_list) {
+		if ((bp->drv == p_fs->drv) && (bp->flag & DIRTYBIT)) {
+			sync_dirty_buffer(bp->buf_bh);
+			bp->flag &= ~(DIRTYBIT);
+		}
+		bp = bp->next;
+	}
+
+	sm_V(&f_sem);
+} /* end of FAT_sync */
+
+static BUF_CACHE_T *FAT_cache_find(struct super_block *sb, sector_t sec)
+{
+	s32 off;
+	BUF_CACHE_T *bp, *hp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	off = (sec + (sec >> p_fs->sectors_per_clu_bits)) & (FAT_CACHE_HASH_SIZE - 1);
+
+	hp = &(p_fs->FAT_cache_hash_list[off]);
+	for (bp = hp->hash_next; bp != hp; bp = bp->hash_next) {
+		if ((bp->drv == p_fs->drv) && (bp->sec == sec)) {
+
+			WARN(!bp->buf_bh, "[EXFAT] FAT_cache has no bh. "
+					  "It will make system panic.\n");
+
+			touch_buffer(bp->buf_bh);
+			return bp;
+		}
+	}
+	return NULL;
+} /* end of FAT_cache_find */
+
+static BUF_CACHE_T *FAT_cache_get(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	bp = p_fs->FAT_cache_lru_list.prev;
+
+
+	move_to_mru(bp, &p_fs->FAT_cache_lru_list);
+	return bp;
+} /* end of FAT_cache_get */
+
+static void FAT_cache_insert_hash(struct super_block *sb, BUF_CACHE_T *bp)
+{
+	s32 off;
+	BUF_CACHE_T *hp;
+	FS_INFO_T *p_fs;
+
+	p_fs = &(EXFAT_SB(sb)->fs_info);
+	off = (bp->sec + (bp->sec >> p_fs->sectors_per_clu_bits)) & (FAT_CACHE_HASH_SIZE-1);
+
+	hp = &(p_fs->FAT_cache_hash_list[off]);
+	bp->hash_next = hp->hash_next;
+	bp->hash_prev = hp;
+	hp->hash_next->hash_prev = bp;
+	hp->hash_next = bp;
+} /* end of FAT_cache_insert_hash */
+
+static void FAT_cache_remove_hash(BUF_CACHE_T *bp)
+{
+	(bp->hash_prev)->hash_next = bp->hash_next;
+	(bp->hash_next)->hash_prev = bp->hash_prev;
+} /* end of FAT_cache_remove_hash */
+
+/*======================================================================*/
+/*  Buffer Read/Write Functions                                         */
+/*======================================================================*/
+
+u8 *buf_getblk(struct super_block *sb, sector_t sec)
+{
+	u8 *buf;
+
+	sm_P(&b_sem);
+
+	buf = __buf_getblk(sb, sec);
+
+	sm_V(&b_sem);
+
+	return buf;
+} /* end of buf_getblk */
+
+static u8 *__buf_getblk(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	bp = buf_cache_find(sb, sec);
+	if (bp != NULL) {
+		move_to_mru(bp, &p_fs->buf_cache_lru_list);
+		return bp->buf_bh->b_data;
+	}
+
+	bp = buf_cache_get(sb, sec);
+
+	buf_cache_remove_hash(bp);
+
+	bp->drv = p_fs->drv;
+	bp->sec = sec;
+	bp->flag = 0;
+
+	buf_cache_insert_hash(sb, bp);
+
+	if (sector_read(sb, sec, &(bp->buf_bh), 1) != FFS_SUCCESS) {
+		buf_cache_remove_hash(bp);
+		bp->drv = -1;
+		bp->sec = ~0;
+		bp->flag = 0;
+		bp->buf_bh = NULL;
+
+		move_to_lru(bp, &p_fs->buf_cache_lru_list);
+		return NULL;
+	}
+
+	return bp->buf_bh->b_data;
+
+} /* end of __buf_getblk */
+
+void buf_modify(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+
+	sm_P(&b_sem);
+
+	bp = buf_cache_find(sb, sec);
+	if (likely(bp != NULL))
+		sector_write(sb, sec, bp->buf_bh, 0);
+
+	WARN(!bp, "[EXFAT] failed to find buffer_cache(sector:%llu).\n",
+	     (unsigned long long)sec);
+
+	sm_V(&b_sem);
+} /* end of buf_modify */
+
+void buf_lock(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+
+	sm_P(&b_sem);
+
+	bp = buf_cache_find(sb, sec);
+	if (likely(bp != NULL))
+		bp->flag |= LOCKBIT;
+
+	WARN(!bp, "[EXFAT] failed to find buffer_cache(sector:%llu).\n",
+	     (unsigned long long)sec);
+
+	sm_V(&b_sem);
+} /* end of buf_lock */
+
+void buf_unlock(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+
+	sm_P(&b_sem);
+
+	bp = buf_cache_find(sb, sec);
+	if (likely(bp != NULL))
+		bp->flag &= ~(LOCKBIT);
+
+	WARN(!bp, "[EXFAT] failed to find buffer_cache(sector:%llu).\n",
+	     (unsigned long long)sec);
+
+	sm_V(&b_sem);
+} /* end of buf_unlock */
+
+void buf_release(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&b_sem);
+
+	bp = buf_cache_find(sb, sec);
+	if (likely(bp != NULL)) {
+		bp->drv = -1;
+		bp->sec = ~0;
+		bp->flag = 0;
+
+		if (bp->buf_bh) {
+			__brelse(bp->buf_bh);
+			bp->buf_bh = NULL;
+		}
+
+		move_to_lru(bp, &p_fs->buf_cache_lru_list);
+	}
+
+	sm_V(&b_sem);
+} /* end of buf_release */
+
+void buf_release_all(struct super_block *sb)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&b_sem);
+
+	bp = p_fs->buf_cache_lru_list.next;
+	while (bp != &p_fs->buf_cache_lru_list) {
+		if (bp->drv == p_fs->drv) {
+			bp->drv = -1;
+			bp->sec = ~0;
+			bp->flag = 0;
+
+			if (bp->buf_bh) {
+				__brelse(bp->buf_bh);
+				bp->buf_bh = NULL;
+			}
+		}
+		bp = bp->next;
+	}
+
+	sm_V(&b_sem);
+} /* end of buf_release_all */
+
+void buf_sync(struct super_block *sb)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	sm_P(&b_sem);
+
+	bp = p_fs->buf_cache_lru_list.next;
+	while (bp != &p_fs->buf_cache_lru_list) {
+		if ((bp->drv == p_fs->drv) && (bp->flag & DIRTYBIT)) {
+			sync_dirty_buffer(bp->buf_bh);
+			bp->flag &= ~(DIRTYBIT);
+		}
+		bp = bp->next;
+	}
+
+	sm_V(&b_sem);
+} /* end of buf_sync */
+
+static BUF_CACHE_T *buf_cache_find(struct super_block *sb, sector_t sec)
+{
+	s32 off;
+	BUF_CACHE_T *bp, *hp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	off = (sec + (sec >> p_fs->sectors_per_clu_bits)) & (BUF_CACHE_HASH_SIZE - 1);
+
+	hp = &(p_fs->buf_cache_hash_list[off]);
+	for (bp = hp->hash_next; bp != hp; bp = bp->hash_next) {
+		if ((bp->drv == p_fs->drv) && (bp->sec == sec)) {
+			touch_buffer(bp->buf_bh);
+			return bp;
+		}
+	}
+	return NULL;
+} /* end of buf_cache_find */
+
+static BUF_CACHE_T *buf_cache_get(struct super_block *sb, sector_t sec)
+{
+	BUF_CACHE_T *bp;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	bp = p_fs->buf_cache_lru_list.prev;
+	while (bp->flag & LOCKBIT)
+		bp = bp->prev;
+
+
+	move_to_mru(bp, &p_fs->buf_cache_lru_list);
+	return bp;
+} /* end of buf_cache_get */
+
+static void buf_cache_insert_hash(struct super_block *sb, BUF_CACHE_T *bp)
+{
+	s32 off;
+	BUF_CACHE_T *hp;
+	FS_INFO_T *p_fs;
+
+	p_fs = &(EXFAT_SB(sb)->fs_info);
+	off = (bp->sec + (bp->sec >> p_fs->sectors_per_clu_bits)) & (BUF_CACHE_HASH_SIZE-1);
+
+	hp = &(p_fs->buf_cache_hash_list[off]);
+	bp->hash_next = hp->hash_next;
+	bp->hash_prev = hp;
+	hp->hash_next->hash_prev = bp;
+	hp->hash_next = bp;
+} /* end of buf_cache_insert_hash */
+
+static void buf_cache_remove_hash(BUF_CACHE_T *bp)
+{
+	(bp->hash_prev)->hash_next = bp->hash_next;
+	(bp->hash_next)->hash_prev = bp->hash_prev;
+} /* end of buf_cache_remove_hash */
+
+/*======================================================================*/
+/*  Local Function Definitions                                          */
+/*======================================================================*/
+
+static void push_to_mru(BUF_CACHE_T *bp, BUF_CACHE_T *list)
+{
+	bp->next = list->next;
+	bp->prev = list;
+	list->next->prev = bp;
+	list->next = bp;
+} /* end of buf_cache_push_to_mru */
+
+static void push_to_lru(BUF_CACHE_T *bp, BUF_CACHE_T *list)
+{
+	bp->prev = list->prev;
+	bp->next = list;
+	list->prev->next = bp;
+	list->prev = bp;
+} /* end of buf_cache_push_to_lru */
+
+static void move_to_mru(BUF_CACHE_T *bp, BUF_CACHE_T *list)
+{
+	bp->prev->next = bp->next;
+	bp->next->prev = bp->prev;
+	push_to_mru(bp, list);
+} /* end of buf_cache_move_to_mru */
+
+static void move_to_lru(BUF_CACHE_T *bp, BUF_CACHE_T *list)
+{
+	bp->prev->next = bp->next;
+	bp->next->prev = bp->prev;
+	push_to_lru(bp, list);
+} /* end of buf_cache_move_to_lru */
diff --git a/fs/exfat/exfat_cache.h b/fs/exfat/exfat_cache.h
new file mode 100644
index 000000000..540e31681
--- /dev/null
+++ b/fs/exfat/exfat_cache.h
@@ -0,0 +1,85 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_cache.h                                             */
+/*  PURPOSE : Header File for exFAT Cache Manager                       */
+/*            (FAT Cache & Buffer Cache)                                */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Sung-Kwan Kim] : first writing                        */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_CACHE_H
+#define _EXFAT_CACHE_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "exfat_config.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions                                        */
+/*----------------------------------------------------------------------*/
+
+#define LOCKBIT                 0x01
+#define DIRTYBIT                0x02
+
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+typedef struct __BUF_CACHE_T {
+	struct __BUF_CACHE_T *next;
+	struct __BUF_CACHE_T *prev;
+	struct __BUF_CACHE_T *hash_next;
+	struct __BUF_CACHE_T *hash_prev;
+	s32                drv;
+	sector_t          sec;
+	u32               flag;
+	struct buffer_head   *buf_bh;
+} BUF_CACHE_T;
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+s32  buf_init(struct super_block *sb);
+s32  buf_shutdown(struct super_block *sb);
+s32  FAT_read(struct super_block *sb, u32 loc, u32 *content);
+s32  FAT_write(struct super_block *sb, u32 loc, u32 content);
+u8 *FAT_getblk(struct super_block *sb, sector_t sec);
+void   FAT_modify(struct super_block *sb, sector_t sec);
+void   FAT_release_all(struct super_block *sb);
+void   FAT_sync(struct super_block *sb);
+u8 *buf_getblk(struct super_block *sb, sector_t sec);
+void   buf_modify(struct super_block *sb, sector_t sec);
+void   buf_lock(struct super_block *sb, sector_t sec);
+void   buf_unlock(struct super_block *sb, sector_t sec);
+void   buf_release(struct super_block *sb, sector_t sec);
+void   buf_release_all(struct super_block *sb);
+void   buf_sync(struct super_block *sb);
+
+#endif /* _EXFAT_CACHE_H */
diff --git a/fs/exfat/exfat_config.h b/fs/exfat/exfat_config.h
new file mode 100644
index 000000000..33c6525e4
--- /dev/null
+++ b/fs/exfat/exfat_config.h
@@ -0,0 +1,69 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_config.h                                            */
+/*  PURPOSE : Header File for exFAT Configuable Policies                */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_CONFIG_H
+#define _EXFAT_CONFIG_H
+
+/*======================================================================*/
+/*                                                                      */
+/*                        FFS CONFIGURATIONS                            */
+/*                  (CHANGE THIS PART IF REQUIRED)                      */
+/*                                                                      */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/* Feature Config                                                       */
+/*----------------------------------------------------------------------*/
+#ifndef CONFIG_EXFAT_DISCARD
+#define CONFIG_EXFAT_DISCARD		1	/* mount option -o discard support */
+#endif
+
+#ifndef CONFIG_EXFAT_DELAYED_SYNC
+#define CONFIG_EXFAT_DELAYED_SYNC 0
+#endif
+
+#ifndef CONFIG_EXFAT_KERNEL_DEBUG
+#define CONFIG_EXFAT_KERNEL_DEBUG	1	/* kernel debug features via ioctl */
+#endif
+
+#ifndef CONFIG_EXFAT_DEBUG_MSG
+#define CONFIG_EXFAT_DEBUG_MSG		0	/* debugging message on/off */
+#endif
+
+#ifndef CONFIG_EXFAT_DEFAULT_CODEPAGE
+#define CONFIG_EXFAT_DEFAULT_CODEPAGE	437
+#define CONFIG_EXFAT_DEFAULT_IOCHARSET	"utf8"
+#endif
+
+#endif /* _EXFAT_CONFIG_H */
diff --git a/fs/exfat/exfat_core.c b/fs/exfat/exfat_core.c
new file mode 100644
index 000000000..143b72155
--- /dev/null
+++ b/fs/exfat/exfat_core.c
@@ -0,0 +1,5138 @@
+/* Some of the source code in this file came from "linux/fs/fat/misc.c".  */
+/*
+ *  linux/fs/fat/misc.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *  22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980
+ *         and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
+ */
+
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_core.c                                              */
+/*  PURPOSE : exFAT File Manager                                        */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include <linux/version.h>
+#include <linux/param.h>
+#include <linux/log2.h>
+
+#include "exfat_bitmap.h"
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+#include "exfat_blkdev.h"
+#include "exfat_cache.h"
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_super.h"
+#include "exfat_core.h"
+
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+static void __set_sb_dirty(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	sb->s_dirt = 1;
+#else
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	sbi->s_dirt = 1;
+#endif
+}
+
+/*----------------------------------------------------------------------*/
+/*  Global Variable Definitions                                         */
+/*----------------------------------------------------------------------*/
+
+extern u8 uni_upcase[];
+
+/*----------------------------------------------------------------------*/
+/*  Local Variable Definitions                                          */
+/*----------------------------------------------------------------------*/
+
+static u8 name_buf[MAX_PATH_LENGTH * MAX_CHARSET_SIZE];
+
+static char *reserved_names[] = {
+	"AUX     ", "CON     ", "NUL     ", "PRN     ",
+	"COM1    ", "COM2    ", "COM3    ", "COM4    ",
+	"COM5    ", "COM6    ", "COM7    ", "COM8    ", "COM9    ",
+	"LPT1    ", "LPT2    ", "LPT3    ", "LPT4    ",
+	"LPT5    ", "LPT6    ", "LPT7    ", "LPT8    ", "LPT9    ",
+	NULL
+};
+
+static u8 free_bit[] = {
+	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, /*   0 ~  19 */
+	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, /*  20 ~  39 */
+	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, /*  40 ~  59 */
+	0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, /*  60 ~  79 */
+	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, /*  80 ~  99 */
+	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, /* 100 ~ 119 */
+	0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, /* 120 ~ 139 */
+	0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, /* 140 ~ 159 */
+	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, /* 160 ~ 179 */
+	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, /* 180 ~ 199 */
+	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, /* 200 ~ 219 */
+	0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, /* 220 ~ 239 */
+	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0                 /* 240 ~ 254 */
+};
+
+static u8 used_bit[] = {
+	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, /*   0 ~  19 */
+	2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, /*  20 ~  39 */
+	2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, /*  40 ~  59 */
+	4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, /*  60 ~  79 */
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, /*  80 ~  99 */
+	3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, /* 100 ~ 119 */
+	4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, /* 120 ~ 139 */
+	3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, /* 140 ~ 159 */
+	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, /* 160 ~ 179 */
+	4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, /* 180 ~ 199 */
+	3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, /* 200 ~ 219 */
+	5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, /* 220 ~ 239 */
+	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8              /* 240 ~ 255 */
+};
+
+/*======================================================================*/
+/*  Global Function Definitions                                         */
+/*======================================================================*/
+
+/* ffsInit : roll back to the initial state of the file system */
+s32 ffsInit(void)
+{
+	s32 ret;
+
+	ret = bdev_init();
+	if (ret)
+		return ret;
+
+	ret = fs_init();
+	if (ret)
+		return ret;
+
+	return FFS_SUCCESS;
+} /* end of ffsInit */
+
+/* ffsShutdown : make free all memory-alloced global buffers */
+s32 ffsShutdown(void)
+{
+	s32 ret;
+	ret = fs_shutdown();
+	if (ret)
+		return ret;
+
+	ret = bdev_shutdown();
+	if (ret)
+		return ret;
+
+	return FFS_SUCCESS;
+} /* end of ffsShutdown */
+
+/* ffsMountVol : mount the file system volume */
+s32 ffsMountVol(struct super_block *sb)
+{
+	int i, ret;
+	PBR_SECTOR_T *p_pbr;
+	struct buffer_head *tmp_bh = NULL;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	printk("[EXFAT] trying to mount...\n");
+
+	sm_init(&p_fs->v_sem);
+	p_fs->dev_ejected = FALSE;
+
+	/* open the block device */
+	if (bdev_open(sb))
+		return FFS_MEDIAERR;
+
+	if (p_bd->sector_size < sb->s_blocksize)
+		return FFS_MEDIAERR;
+	if (p_bd->sector_size > sb->s_blocksize)
+		sb_set_blocksize(sb, p_bd->sector_size);
+
+	/* read Sector 0 */
+	if (sector_read(sb, 0, &tmp_bh, 1) != FFS_SUCCESS)
+		return FFS_MEDIAERR;
+
+	p_fs->PBR_sector = 0;
+
+	p_pbr = (PBR_SECTOR_T *) tmp_bh->b_data;
+
+	/* check the validity of PBR */
+	if (GET16_A(p_pbr->signature) != PBR_SIGNATURE) {
+		brelse(tmp_bh);
+		bdev_close(sb);
+		return FFS_FORMATERR;
+	}
+
+	/* fill fs_stuct */
+	for (i = 0; i < 53; i++)
+		if (p_pbr->bpb[i])
+			break;
+
+	if (i < 53) {
+		if (GET16(p_pbr->bpb+11)) /* num_fat_sectors */
+			ret = fat16_mount(sb, p_pbr);
+		else
+			ret = fat32_mount(sb, p_pbr);
+	} else {
+		ret = exfat_mount(sb, p_pbr);
+	}
+
+	brelse(tmp_bh);
+
+	if (ret) {
+		bdev_close(sb);
+		return ret;
+	}
+
+	if (p_fs->vol_type == EXFAT) {
+		ret = load_alloc_bitmap(sb);
+		if (ret) {
+			bdev_close(sb);
+			return ret;
+		}
+		ret = load_upcase_table(sb);
+		if (ret) {
+			free_alloc_bitmap(sb);
+			bdev_close(sb);
+			return ret;
+		}
+	}
+
+	if (p_fs->dev_ejected) {
+		if (p_fs->vol_type == EXFAT) {
+			free_upcase_table(sb);
+			free_alloc_bitmap(sb);
+		}
+		bdev_close(sb);
+		return FFS_MEDIAERR;
+	}
+
+	printk("[EXFAT] mounted successfully\n");
+
+	return FFS_SUCCESS;
+} /* end of ffsMountVol */
+
+/* ffsUmountVol : umount the file system volume */
+s32 ffsUmountVol(struct super_block *sb)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	printk("[EXFAT] trying to unmount...\n");
+
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+
+	if (p_fs->vol_type == EXFAT) {
+		free_upcase_table(sb);
+		free_alloc_bitmap(sb);
+	}
+
+	FAT_release_all(sb);
+	buf_release_all(sb);
+
+	/* close the block device */
+	bdev_close(sb);
+
+	if (p_fs->dev_ejected) {
+		printk("[EXFAT] unmounted with media errors. "
+			"device's already ejected.\n");
+		return FFS_MEDIAERR;
+	}
+
+	printk("[EXFAT] unmounted successfully\n");
+
+	return FFS_SUCCESS;
+} /* end of ffsUmountVol */
+
+/* ffsGetVolInfo : get the information of a file system volume */
+s32 ffsGetVolInfo(struct super_block *sb, VOL_INFO_T *info)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_fs->used_clusters == (u32) ~0)
+		p_fs->used_clusters = p_fs->fs_func->count_used_clusters(sb);
+
+	info->FatType = p_fs->vol_type;
+	info->ClusterSize = p_fs->cluster_size;
+	info->NumClusters = p_fs->num_clusters - 2; /* clu 0 & 1 */
+	info->UsedClusters = p_fs->used_clusters;
+	info->FreeClusters = info->NumClusters - info->UsedClusters;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsGetVolInfo */
+
+/* ffsSyncVol : synchronize all file system volumes */
+s32 ffsSyncVol(struct super_block *sb, s32 do_sync)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* synchronize the file system */
+	fs_sync(sb, do_sync);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsSyncVol */
+
+/*----------------------------------------------------------------------*/
+/*  File Operation Functions                                            */
+/*----------------------------------------------------------------------*/
+
+/* ffsLookupFile : lookup a file */
+s32 ffsLookupFile(struct inode *inode, char *path, FILE_ID_T *fid)
+{
+	s32 ret, dentry, num_entries;
+	CHAIN_T dir;
+	UNI_NAME_T uni_name;
+	DOS_NAME_T dos_name;
+	DENTRY_T *ep, *ep2;
+	ENTRY_SET_CACHE_T *es = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	DPRINTK("ffsLookupFile entered\n");
+
+	/* check the validity of directory name in the given pathname */
+	ret = resolve_path(inode, path, &dir, &uni_name);
+	if (ret)
+		return ret;
+
+	ret = get_num_entries_and_dos_name(sb, &dir, &uni_name, &num_entries, &dos_name);
+	if (ret)
+		return ret;
+
+	/* search the file name for directories */
+	dentry = p_fs->fs_func->find_dir_entry(sb, &dir, &uni_name, num_entries, &dos_name, TYPE_ALL);
+	if (dentry < -1)
+		return FFS_NOTFOUND;
+
+	fid->dir.dir = dir.dir;
+	fid->dir.size = dir.size;
+	fid->dir.flags = dir.flags;
+	fid->entry = dentry;
+
+	if (dentry == -1) {
+		fid->type = TYPE_DIR;
+		fid->rwoffset = 0;
+		fid->hint_last_off = -1;
+
+		fid->attr = ATTR_SUBDIR;
+		fid->flags = 0x01;
+		fid->size = 0;
+		fid->start_clu = p_fs->root_dir;
+	} else {
+		if (p_fs->vol_type == EXFAT) {
+			es = get_entry_set_in_dir(sb, &dir, dentry, ES_2_ENTRIES, &ep);
+			if (!es)
+				return FFS_MEDIAERR;
+			ep2 = ep+1;
+		} else {
+			ep = get_entry_in_dir(sb, &dir, dentry, NULL);
+			if (!ep)
+				return FFS_MEDIAERR;
+			ep2 = ep;
+		}
+
+		fid->type = p_fs->fs_func->get_entry_type(ep);
+		fid->rwoffset = 0;
+		fid->hint_last_off = -1;
+		fid->attr = p_fs->fs_func->get_entry_attr(ep);
+
+		fid->size = p_fs->fs_func->get_entry_size(ep2);
+		if ((fid->type == TYPE_FILE) && (fid->size == 0)) {
+			fid->flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+			fid->start_clu = CLUSTER_32(~0);
+		} else {
+			fid->flags = p_fs->fs_func->get_entry_flag(ep2);
+			fid->start_clu = p_fs->fs_func->get_entry_clu0(ep2);
+		}
+
+		if (p_fs->vol_type == EXFAT)
+			release_entry_set(es);
+	}
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	DPRINTK("ffsLookupFile exited successfully\n");
+
+	return FFS_SUCCESS;
+} /* end of ffsLookupFile */
+
+/* ffsCreateFile : create a file */
+s32 ffsCreateFile(struct inode *inode, char *path, u8 mode, FILE_ID_T *fid)
+{
+	s32 ret/*, dentry*/;
+	CHAIN_T dir;
+	UNI_NAME_T uni_name;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	/* check the validity of directory name in the given pathname */
+	ret = resolve_path(inode, path, &dir, &uni_name);
+	if (ret)
+		return ret;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	/* create a new file */
+	ret = create_file(inode, &dir, &uni_name, mode, fid);
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return ret;
+} /* end of ffsCreateFile */
+
+/* ffsReadFile : read data from a opened file */
+s32 ffsReadFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount)
+{
+	s32 offset, sec_offset, clu_offset;
+	u32 clu;
+	sector_t LogSector;
+	u64 oneblkread, read_bytes;
+	struct buffer_head *tmp_bh = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	/* check if the given file ID is opened */
+	if (fid->type != TYPE_FILE)
+		return FFS_PERMISSIONERR;
+
+	if (fid->rwoffset > fid->size)
+		fid->rwoffset = fid->size;
+
+	if (count > (fid->size - fid->rwoffset))
+		count = fid->size - fid->rwoffset;
+
+	if (count == 0) {
+		if (rcount != NULL)
+			*rcount = 0;
+		return FFS_EOF;
+	}
+
+	read_bytes = 0;
+
+	while (count > 0) {
+		clu_offset = (s32)(fid->rwoffset >> p_fs->cluster_size_bits);
+		clu = fid->start_clu;
+
+		if (fid->flags == 0x03) {
+			clu += clu_offset;
+		} else {
+			/* hint information */
+			if ((clu_offset > 0) && (fid->hint_last_off > 0) &&
+				(clu_offset >= fid->hint_last_off)) {
+				clu_offset -= fid->hint_last_off;
+				clu = fid->hint_last_clu;
+			}
+
+			while (clu_offset > 0) {
+				/* clu = FAT_read(sb, clu); */
+				if (FAT_read(sb, clu, &clu) == -1)
+					return FFS_MEDIAERR;
+
+				clu_offset--;
+			}
+		}
+
+		/* hint information */
+		fid->hint_last_off = (s32)(fid->rwoffset >> p_fs->cluster_size_bits);
+		fid->hint_last_clu = clu;
+
+		offset = (s32)(fid->rwoffset & (p_fs->cluster_size-1)); /* byte offset in cluster   */
+		sec_offset = offset >> p_bd->sector_size_bits;            /* sector offset in cluster */
+		offset &= p_bd->sector_size_mask;                         /* byte offset in sector    */
+
+		LogSector = START_SECTOR(clu) + sec_offset;
+
+		oneblkread = (u64)(p_bd->sector_size - offset);
+		if (oneblkread > count)
+			oneblkread = count;
+
+		if ((offset == 0) && (oneblkread == p_bd->sector_size)) {
+			if (sector_read(sb, LogSector, &tmp_bh, 1) != FFS_SUCCESS)
+				goto err_out;
+			memcpy(((char *) buffer)+read_bytes, ((char *) tmp_bh->b_data), (s32) oneblkread);
+		} else {
+			if (sector_read(sb, LogSector, &tmp_bh, 1) != FFS_SUCCESS)
+				goto err_out;
+			memcpy(((char *) buffer)+read_bytes, ((char *) tmp_bh->b_data)+offset, (s32) oneblkread);
+		}
+		count -= oneblkread;
+		read_bytes += oneblkread;
+		fid->rwoffset += oneblkread;
+	}
+	brelse(tmp_bh);
+
+err_out:
+	/* set the size of read bytes */
+	if (rcount != NULL)
+		*rcount = read_bytes;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsReadFile */
+
+/* ffsWriteFile : write data into a opened file */
+s32 ffsWriteFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount)
+{
+	s32 modified = FALSE, offset, sec_offset, clu_offset;
+	s32 num_clusters, num_alloc, num_alloced = (s32) ~0;
+	u32 clu, last_clu;
+	sector_t LogSector, sector = 0;
+	u64 oneblkwrite, write_bytes;
+	CHAIN_T new_clu;
+	TIMESTAMP_T tm;
+	DENTRY_T *ep, *ep2;
+	ENTRY_SET_CACHE_T *es = NULL;
+	struct buffer_head *tmp_bh = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	/* check if the given file ID is opened */
+	if (fid->type != TYPE_FILE)
+		return FFS_PERMISSIONERR;
+
+	if (fid->rwoffset > fid->size)
+		fid->rwoffset = fid->size;
+
+	if (count == 0) {
+		if (wcount != NULL)
+			*wcount = 0;
+		return FFS_SUCCESS;
+	}
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	if (fid->size == 0)
+		num_clusters = 0;
+	else
+		num_clusters = (s32)((fid->size-1) >> p_fs->cluster_size_bits) + 1;
+
+	write_bytes = 0;
+
+	while (count > 0) {
+		clu_offset = (s32)(fid->rwoffset >> p_fs->cluster_size_bits);
+		clu = last_clu = fid->start_clu;
+
+		if (fid->flags == 0x03) {
+			if ((clu_offset > 0) && (clu != CLUSTER_32(~0))) {
+				last_clu += clu_offset - 1;
+
+				if (clu_offset == num_clusters)
+					clu = CLUSTER_32(~0);
+				else
+					clu += clu_offset;
+			}
+		} else {
+			/* hint information */
+			if ((clu_offset > 0) && (fid->hint_last_off > 0) &&
+				(clu_offset >= fid->hint_last_off)) {
+				clu_offset -= fid->hint_last_off;
+				clu = fid->hint_last_clu;
+			}
+
+			while ((clu_offset > 0) && (clu != CLUSTER_32(~0))) {
+				last_clu = clu;
+				/* clu = FAT_read(sb, clu); */
+				if (FAT_read(sb, clu, &clu) == -1)
+					return FFS_MEDIAERR;
+
+				clu_offset--;
+			}
+		}
+
+		if (clu == CLUSTER_32(~0)) {
+			num_alloc = (s32)((count-1) >> p_fs->cluster_size_bits) + 1;
+			new_clu.dir = (last_clu == CLUSTER_32(~0)) ? CLUSTER_32(~0) : last_clu+1;
+			new_clu.size = 0;
+			new_clu.flags = fid->flags;
+
+			/* (1) allocate a chain of clusters */
+			num_alloced = p_fs->fs_func->alloc_cluster(sb, num_alloc, &new_clu);
+			if (num_alloced == 0)
+				break;
+			else if (num_alloced < 0)
+				return FFS_MEDIAERR;
+
+			/* (2) append to the FAT chain */
+			if (last_clu == CLUSTER_32(~0)) {
+				if (new_clu.flags == 0x01)
+					fid->flags = 0x01;
+				fid->start_clu = new_clu.dir;
+				modified = TRUE;
+			} else {
+				if (new_clu.flags != fid->flags) {
+					exfat_chain_cont_cluster(sb, fid->start_clu, num_clusters);
+					fid->flags = 0x01;
+					modified = TRUE;
+				}
+				if (new_clu.flags == 0x01)
+					FAT_write(sb, last_clu, new_clu.dir);
+			}
+
+			num_clusters += num_alloced;
+			clu = new_clu.dir;
+		}
+
+		/* hint information */
+		fid->hint_last_off = (s32)(fid->rwoffset >> p_fs->cluster_size_bits);
+		fid->hint_last_clu = clu;
+
+		offset = (s32)(fid->rwoffset & (p_fs->cluster_size-1)); /* byte offset in cluster   */
+		sec_offset = offset >> p_bd->sector_size_bits;            /* sector offset in cluster */
+		offset &= p_bd->sector_size_mask;                         /* byte offset in sector    */
+
+		LogSector = START_SECTOR(clu) + sec_offset;
+
+		oneblkwrite = (u64)(p_bd->sector_size - offset);
+		if (oneblkwrite > count)
+			oneblkwrite = count;
+
+		if ((offset == 0) && (oneblkwrite == p_bd->sector_size)) {
+			if (sector_read(sb, LogSector, &tmp_bh, 0) != FFS_SUCCESS)
+				goto err_out;
+			memcpy(((char *) tmp_bh->b_data), ((char *) buffer)+write_bytes, (s32) oneblkwrite);
+			if (sector_write(sb, LogSector, tmp_bh, 0) != FFS_SUCCESS) {
+				brelse(tmp_bh);
+				goto err_out;
+			}
+		} else {
+			if ((offset > 0) || ((fid->rwoffset+oneblkwrite) < fid->size)) {
+				if (sector_read(sb, LogSector, &tmp_bh, 1) != FFS_SUCCESS)
+					goto err_out;
+			} else {
+				if (sector_read(sb, LogSector, &tmp_bh, 0) != FFS_SUCCESS)
+					goto err_out;
+			}
+
+			memcpy(((char *) tmp_bh->b_data)+offset, ((char *) buffer)+write_bytes, (s32) oneblkwrite);
+			if (sector_write(sb, LogSector, tmp_bh, 0) != FFS_SUCCESS) {
+				brelse(tmp_bh);
+				goto err_out;
+			}
+		}
+
+		count -= oneblkwrite;
+		write_bytes += oneblkwrite;
+		fid->rwoffset += oneblkwrite;
+
+		fid->attr |= ATTR_ARCHIVE;
+
+		if (fid->size < fid->rwoffset) {
+			fid->size = fid->rwoffset;
+			modified = TRUE;
+		}
+	}
+
+	brelse(tmp_bh);
+
+	/* (3) update the direcoty entry */
+	if (p_fs->vol_type == EXFAT) {
+		es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+		if (es == NULL)
+			goto err_out;
+		ep2 = ep+1;
+	} else {
+		ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+		if (!ep)
+			goto err_out;
+		ep2 = ep;
+	}
+
+	p_fs->fs_func->set_entry_time(ep, tm_current(&tm), TM_MODIFY);
+	p_fs->fs_func->set_entry_attr(ep, fid->attr);
+
+	if (p_fs->vol_type != EXFAT)
+		buf_modify(sb, sector);
+
+	if (modified) {
+		if (p_fs->fs_func->get_entry_flag(ep2) != fid->flags)
+			p_fs->fs_func->set_entry_flag(ep2, fid->flags);
+
+		if (p_fs->fs_func->get_entry_size(ep2) != fid->size)
+			p_fs->fs_func->set_entry_size(ep2, fid->size);
+
+		if (p_fs->fs_func->get_entry_clu0(ep2) != fid->start_clu)
+			p_fs->fs_func->set_entry_clu0(ep2, fid->start_clu);
+
+		if (p_fs->vol_type != EXFAT)
+			buf_modify(sb, sector);
+	}
+
+	if (p_fs->vol_type == EXFAT) {
+		update_dir_checksum_with_entry_set(sb, es);
+		release_entry_set(es);
+	}
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+err_out:
+	/* set the size of written bytes */
+	if (wcount != NULL)
+		*wcount = write_bytes;
+
+	if (num_alloced == 0)
+		return FFS_FULL;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsWriteFile */
+
+/* ffsTruncateFile : resize the file length */
+s32 ffsTruncateFile(struct inode *inode, u64 old_size, u64 new_size)
+{
+	s32 num_clusters;
+	u32 last_clu = CLUSTER_32(0);
+	sector_t sector = 0;
+	CHAIN_T clu;
+	TIMESTAMP_T tm;
+	DENTRY_T *ep, *ep2;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+	ENTRY_SET_CACHE_T *es = NULL;
+
+	/* check if the given file ID is opened */
+	if (fid->type != TYPE_FILE)
+		return FFS_PERMISSIONERR;
+
+	if (fid->size != old_size) {
+		printk(KERN_ERR "[EXFAT] truncate : can't skip it because of "
+				"size-mismatch(old:%lld->fid:%lld).\n"
+				,old_size, fid->size);
+	}
+
+	if (old_size <= new_size)
+		return FFS_SUCCESS;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	clu.dir = fid->start_clu;
+	clu.size = (s32)((old_size-1) >> p_fs->cluster_size_bits) + 1;
+	clu.flags = fid->flags;
+
+	if (new_size > 0) {
+		num_clusters = (s32)((new_size-1) >> p_fs->cluster_size_bits) + 1;
+
+		if (clu.flags == 0x03) {
+			clu.dir += num_clusters;
+		} else {
+			while (num_clusters > 0) {
+				last_clu = clu.dir;
+				if (FAT_read(sb, clu.dir, &(clu.dir)) == -1)
+					return FFS_MEDIAERR;
+				num_clusters--;
+			}
+		}
+
+		clu.size -= num_clusters;
+	}
+
+	fid->size = new_size;
+	fid->attr |= ATTR_ARCHIVE;
+	if (new_size == 0) {
+		fid->flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+		fid->start_clu = CLUSTER_32(~0);
+	}
+
+	/* (1) update the directory entry */
+	if (p_fs->vol_type == EXFAT) {
+		es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+		if (es == NULL)
+			return FFS_MEDIAERR;
+		ep2 = ep+1;
+	} else {
+		ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+		if (!ep)
+			return FFS_MEDIAERR;
+		ep2 = ep;
+	}
+
+	p_fs->fs_func->set_entry_time(ep, tm_current(&tm), TM_MODIFY);
+	p_fs->fs_func->set_entry_attr(ep, fid->attr);
+
+	p_fs->fs_func->set_entry_size(ep2, new_size);
+	if (new_size == 0) {
+		p_fs->fs_func->set_entry_flag(ep2, 0x01);
+		p_fs->fs_func->set_entry_clu0(ep2, CLUSTER_32(0));
+	}
+
+	if (p_fs->vol_type != EXFAT)
+		buf_modify(sb, sector);
+	else {
+		update_dir_checksum_with_entry_set(sb, es);
+		release_entry_set(es);
+	}
+
+	/* (2) cut off from the FAT chain */
+	if (last_clu != CLUSTER_32(0)) {
+		if (fid->flags == 0x01)
+			FAT_write(sb, last_clu, CLUSTER_32(~0));
+	}
+
+	/* (3) free the clusters */
+	p_fs->fs_func->free_cluster(sb, &clu, 0);
+
+	/* hint information */
+	fid->hint_last_off = -1;
+	if (fid->rwoffset > fid->size)
+		fid->rwoffset = fid->size;
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsTruncateFile */
+
+static void update_parent_info(FILE_ID_T *fid, struct inode *parent_inode)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(parent_inode->i_sb)->fs_info);
+	FILE_ID_T *parent_fid = &(EXFAT_I(parent_inode)->fid);
+
+	if (unlikely((parent_fid->flags != fid->dir.flags)
+		|| (parent_fid->size != (fid->dir.size<<p_fs->cluster_size_bits))
+		|| (parent_fid->start_clu != fid->dir.dir))) {
+
+		fid->dir.dir = parent_fid->start_clu;
+		fid->dir.flags = parent_fid->flags;
+		fid->dir.size = ((parent_fid->size + (p_fs->cluster_size-1))
+						>> p_fs->cluster_size_bits);
+	}
+}
+
+/* ffsMoveFile : move(rename) a old file into a new file */
+s32 ffsMoveFile(struct inode *old_parent_inode, FILE_ID_T *fid, struct inode *new_parent_inode, struct dentry *new_dentry)
+{
+	s32 ret;
+	s32 dentry;
+	CHAIN_T olddir, newdir;
+	CHAIN_T *p_dir = NULL;
+	UNI_NAME_T uni_name;
+	DENTRY_T *ep;
+	struct super_block *sb = old_parent_inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	u8 *new_path = (u8 *) new_dentry->d_name.name;
+	struct inode *new_inode = new_dentry->d_inode;
+	int num_entries;
+	FILE_ID_T *new_fid = NULL;
+	s32 new_entry = 0;
+
+	/* check the validity of pointer parameters */
+	if ((new_path == NULL) || (*new_path == '\0'))
+		return FFS_ERROR;
+
+	update_parent_info(fid, old_parent_inode);
+
+	olddir.dir = fid->dir.dir;
+	olddir.size = fid->dir.size;
+	olddir.flags = fid->dir.flags;
+
+	dentry = fid->entry;
+
+	/* check if the old file is "." or ".." */
+	if (p_fs->vol_type != EXFAT) {
+		if ((olddir.dir != p_fs->root_dir) && (dentry < 2))
+			return FFS_PERMISSIONERR;
+	}
+
+	ep = get_entry_in_dir(sb, &olddir, dentry, NULL);
+	if (!ep)
+		return FFS_MEDIAERR;
+
+	if (p_fs->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+		return FFS_PERMISSIONERR;
+
+	/* check whether new dir is existing directory and empty */
+	if (new_inode) {
+		u32 entry_type;
+
+		ret = FFS_MEDIAERR;
+		new_fid = &EXFAT_I(new_inode)->fid;
+
+		update_parent_info(new_fid, new_parent_inode);
+
+		p_dir = &(new_fid->dir);
+		new_entry = new_fid->entry;
+		ep = get_entry_in_dir(sb, p_dir, new_entry, NULL);
+		if (!ep)
+			goto out;
+
+		entry_type = p_fs->fs_func->get_entry_type(ep);
+
+		if (entry_type == TYPE_DIR) {
+			CHAIN_T new_clu;
+			new_clu.dir = new_fid->start_clu;
+			new_clu.size = (s32)((new_fid->size-1) >> p_fs->cluster_size_bits) + 1;
+			new_clu.flags = new_fid->flags;
+
+			if (!is_dir_empty(sb, &new_clu))
+				return FFS_FILEEXIST;
+		}
+	}
+
+	/* check the validity of directory name in the given new pathname */
+	ret = resolve_path(new_parent_inode, new_path, &newdir, &uni_name);
+	if (ret)
+		return ret;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	if (olddir.dir == newdir.dir)
+		ret = rename_file(new_parent_inode, &olddir, dentry, &uni_name, fid);
+	else
+		ret = move_file(new_parent_inode, &olddir, dentry, &newdir, &uni_name, fid);
+
+	if ((ret == FFS_SUCCESS) && new_inode) {
+		/* delete entries of new_dir */
+		ep = get_entry_in_dir(sb, p_dir, new_entry, NULL);
+		if (!ep)
+			goto out;
+
+		num_entries = p_fs->fs_func->count_ext_entries(sb, p_dir, new_entry, ep);
+		if (num_entries < 0)
+			goto out;
+		p_fs->fs_func->delete_dir_entry(sb, p_dir, new_entry, 0, num_entries+1);
+	}
+out:
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return ret;
+} /* end of ffsMoveFile */
+
+/* ffsRemoveFile : remove a file */
+s32 ffsRemoveFile(struct inode *inode, FILE_ID_T *fid)
+{
+	s32 dentry;
+	CHAIN_T dir, clu_to_free;
+	DENTRY_T *ep;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	dir.dir = fid->dir.dir;
+	dir.size = fid->dir.size;
+	dir.flags = fid->dir.flags;
+
+	dentry = fid->entry;
+
+	ep = get_entry_in_dir(sb, &dir, dentry, NULL);
+	if (!ep)
+		return FFS_MEDIAERR;
+
+	if (p_fs->fs_func->get_entry_attr(ep) & ATTR_READONLY)
+		return FFS_PERMISSIONERR;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	/* (1) update the directory entry */
+	remove_file(inode, &dir, dentry);
+
+	clu_to_free.dir = fid->start_clu;
+	clu_to_free.size = (s32)((fid->size-1) >> p_fs->cluster_size_bits) + 1;
+	clu_to_free.flags = fid->flags;
+
+	/* (2) free the clusters */
+	p_fs->fs_func->free_cluster(sb, &clu_to_free, 0);
+
+	fid->size = 0;
+	fid->start_clu = CLUSTER_32(~0);
+	fid->flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsRemoveFile */
+
+/* ffsSetAttr : set the attribute of a given file */
+s32 ffsSetAttr(struct inode *inode, u32 attr)
+{
+	u32 type;
+	sector_t sector = 0;
+	DENTRY_T *ep;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+	u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+	ENTRY_SET_CACHE_T *es = NULL;
+
+	if (fid->attr == attr) {
+		if (p_fs->dev_ejected)
+			return FFS_MEDIAERR;
+		return FFS_SUCCESS;
+	}
+
+	if (is_dir) {
+		if ((fid->dir.dir == p_fs->root_dir) &&
+			(fid->entry == -1)) {
+			if (p_fs->dev_ejected)
+				return FFS_MEDIAERR;
+			return FFS_SUCCESS;
+		}
+	}
+
+	/* get the directory entry of given file */
+	if (p_fs->vol_type == EXFAT) {
+		es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+		if (es == NULL)
+			return FFS_MEDIAERR;
+	} else {
+		ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+		if (!ep)
+			return FFS_MEDIAERR;
+	}
+
+	type = p_fs->fs_func->get_entry_type(ep);
+
+	if (((type == TYPE_FILE) && (attr & ATTR_SUBDIR)) ||
+		((type == TYPE_DIR) && (!(attr & ATTR_SUBDIR)))) {
+		s32 err;
+		if (p_fs->dev_ejected)
+			err = FFS_MEDIAERR;
+		else
+			err = FFS_ERROR;
+
+		if (p_fs->vol_type == EXFAT)
+			release_entry_set(es);
+		return err;
+	}
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	/* set the file attribute */
+	fid->attr = attr;
+	p_fs->fs_func->set_entry_attr(ep, attr);
+
+	if (p_fs->vol_type != EXFAT)
+		buf_modify(sb, sector);
+	else {
+		update_dir_checksum_with_entry_set(sb, es);
+		release_entry_set(es);
+	}
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsSetAttr */
+
+/* ffsGetStat : get the information of a given file */
+s32 ffsGetStat(struct inode *inode, DIR_ENTRY_T *info)
+{
+	sector_t sector = 0;
+	s32 count;
+	CHAIN_T dir;
+	UNI_NAME_T uni_name;
+	TIMESTAMP_T tm;
+	DENTRY_T *ep, *ep2;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+	ENTRY_SET_CACHE_T *es = NULL;
+	u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+	DPRINTK("ffsGetStat entered\n");
+
+	if (is_dir) {
+		if ((fid->dir.dir == p_fs->root_dir) &&
+			(fid->entry == -1)) {
+			info->Attr = ATTR_SUBDIR;
+			memset((char *) &info->CreateTimestamp, 0, sizeof(DATE_TIME_T));
+			memset((char *) &info->ModifyTimestamp, 0, sizeof(DATE_TIME_T));
+			memset((char *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+			strcpy(info->ShortName, ".");
+			strcpy(info->Name, ".");
+
+			dir.dir = p_fs->root_dir;
+			dir.flags = 0x01;
+
+			if (p_fs->root_dir == CLUSTER_32(0)) /* FAT16 root_dir */
+				info->Size = p_fs->dentries_in_root << DENTRY_SIZE_BITS;
+			else
+				info->Size = count_num_clusters(sb, &dir) << p_fs->cluster_size_bits;
+
+			count = count_dos_name_entries(sb, &dir, TYPE_DIR);
+			if (count < 0)
+				return FFS_MEDIAERR;
+			info->NumSubdirs = count;
+
+			if (p_fs->dev_ejected)
+				return FFS_MEDIAERR;
+			return FFS_SUCCESS;
+		}
+	}
+
+	/* get the directory entry of given file or directory */
+	if (p_fs->vol_type == EXFAT) {
+		es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_2_ENTRIES, &ep);
+		if (es == NULL)
+			return FFS_MEDIAERR;
+		ep2 = ep+1;
+	} else {
+		ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+		if (!ep)
+			return FFS_MEDIAERR;
+		ep2 = ep;
+		buf_lock(sb, sector);
+	}
+
+	/* set FILE_INFO structure using the acquired DENTRY_T */
+	info->Attr = p_fs->fs_func->get_entry_attr(ep);
+
+	p_fs->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+	info->CreateTimestamp.Year = tm.year;
+	info->CreateTimestamp.Month = tm.mon;
+	info->CreateTimestamp.Day = tm.day;
+	info->CreateTimestamp.Hour = tm.hour;
+	info->CreateTimestamp.Minute = tm.min;
+	info->CreateTimestamp.Second = tm.sec;
+	info->CreateTimestamp.MilliSecond = 0;
+
+	p_fs->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+	info->ModifyTimestamp.Year = tm.year;
+	info->ModifyTimestamp.Month = tm.mon;
+	info->ModifyTimestamp.Day = tm.day;
+	info->ModifyTimestamp.Hour = tm.hour;
+	info->ModifyTimestamp.Minute = tm.min;
+	info->ModifyTimestamp.Second = tm.sec;
+	info->ModifyTimestamp.MilliSecond = 0;
+
+	memset((char *) &info->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+	*(uni_name.name) = 0x0;
+	/* XXX this is very bad for exfat cuz name is already included in es.
+	 API should be revised */
+	p_fs->fs_func->get_uni_name_from_ext_entry(sb, &(fid->dir), fid->entry, uni_name.name);
+	if (*(uni_name.name) == 0x0 && p_fs->vol_type != EXFAT)
+		get_uni_name_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x1);
+	nls_uniname_to_cstring(sb, info->Name, &uni_name);
+
+	if (p_fs->vol_type == EXFAT) {
+		info->NumSubdirs = 2;
+	} else {
+		buf_unlock(sb, sector);
+		get_uni_name_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x0);
+		nls_uniname_to_cstring(sb, info->ShortName, &uni_name);
+		info->NumSubdirs = 0;
+	}
+
+	info->Size = p_fs->fs_func->get_entry_size(ep2);
+
+	if (p_fs->vol_type == EXFAT)
+		release_entry_set(es);
+
+	if (is_dir) {
+		dir.dir = fid->start_clu;
+		dir.flags = 0x01;
+
+		if (info->Size == 0)
+			info->Size = (u64) count_num_clusters(sb, &dir) << p_fs->cluster_size_bits;
+
+		count = count_dos_name_entries(sb, &dir, TYPE_DIR);
+		if (count < 0)
+			return FFS_MEDIAERR;
+		info->NumSubdirs += count;
+	}
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	DPRINTK("ffsGetStat exited successfully\n");
+	return FFS_SUCCESS;
+} /* end of ffsGetStat */
+
+/* ffsSetStat : set the information of a given file */
+s32 ffsSetStat(struct inode *inode, DIR_ENTRY_T *info)
+{
+	sector_t sector = 0;
+	TIMESTAMP_T tm;
+	DENTRY_T *ep, *ep2;
+	ENTRY_SET_CACHE_T *es = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+	u8 is_dir = (fid->type == TYPE_DIR) ? 1 : 0;
+
+	if (is_dir) {
+		if ((fid->dir.dir == p_fs->root_dir) &&
+			(fid->entry == -1)) {
+			if (p_fs->dev_ejected)
+				return FFS_MEDIAERR;
+			return FFS_SUCCESS;
+		}
+	}
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	/* get the directory entry of given file or directory */
+	if (p_fs->vol_type == EXFAT) {
+		es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+		if (es == NULL)
+			return FFS_MEDIAERR;
+		ep2 = ep+1;
+	} else {
+		/* for other than exfat */
+		ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+		if (!ep)
+			return FFS_MEDIAERR;
+		ep2 = ep;
+	}
+
+
+	p_fs->fs_func->set_entry_attr(ep, info->Attr);
+
+	/* set FILE_INFO structure using the acquired DENTRY_T */
+	tm.sec  = info->CreateTimestamp.Second;
+	tm.min  = info->CreateTimestamp.Minute;
+	tm.hour = info->CreateTimestamp.Hour;
+	tm.day  = info->CreateTimestamp.Day;
+	tm.mon  = info->CreateTimestamp.Month;
+	tm.year = info->CreateTimestamp.Year;
+	p_fs->fs_func->set_entry_time(ep, &tm, TM_CREATE);
+
+	tm.sec  = info->ModifyTimestamp.Second;
+	tm.min  = info->ModifyTimestamp.Minute;
+	tm.hour = info->ModifyTimestamp.Hour;
+	tm.day  = info->ModifyTimestamp.Day;
+	tm.mon  = info->ModifyTimestamp.Month;
+	tm.year = info->ModifyTimestamp.Year;
+	p_fs->fs_func->set_entry_time(ep, &tm, TM_MODIFY);
+
+
+	p_fs->fs_func->set_entry_size(ep2, info->Size);
+
+	if (p_fs->vol_type != EXFAT) {
+		buf_modify(sb, sector);
+	} else {
+		update_dir_checksum_with_entry_set(sb, es);
+		release_entry_set(es);
+	}
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsSetStat */
+
+s32 ffsMapCluster(struct inode *inode, s32 clu_offset, u32 *clu)
+{
+	s32 num_clusters, num_alloced, modified = FALSE;
+	u32 last_clu;
+	sector_t sector = 0;
+	CHAIN_T new_clu;
+	DENTRY_T *ep;
+	ENTRY_SET_CACHE_T *es = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+
+	fid->rwoffset = (s64)(clu_offset) << p_fs->cluster_size_bits;
+
+	if (EXFAT_I(inode)->mmu_private == 0)
+		num_clusters = 0;
+	else
+		num_clusters = (s32)((EXFAT_I(inode)->mmu_private-1) >> p_fs->cluster_size_bits) + 1;
+
+	*clu = last_clu = fid->start_clu;
+
+	if (fid->flags == 0x03) {
+		if ((clu_offset > 0) && (*clu != CLUSTER_32(~0))) {
+			last_clu += clu_offset - 1;
+
+			if (clu_offset == num_clusters)
+				*clu = CLUSTER_32(~0);
+			else
+				*clu += clu_offset;
+		}
+	} else {
+		/* hint information */
+		if ((clu_offset > 0) && (fid->hint_last_off > 0) &&
+			(clu_offset >= fid->hint_last_off)) {
+			clu_offset -= fid->hint_last_off;
+			*clu = fid->hint_last_clu;
+		}
+
+		while ((clu_offset > 0) && (*clu != CLUSTER_32(~0))) {
+			last_clu = *clu;
+			if (FAT_read(sb, *clu, clu) == -1)
+				return FFS_MEDIAERR;
+			clu_offset--;
+		}
+	}
+
+	if (*clu == CLUSTER_32(~0)) {
+		fs_set_vol_flags(sb, VOL_DIRTY);
+
+		new_clu.dir = (last_clu == CLUSTER_32(~0)) ? CLUSTER_32(~0) : last_clu+1;
+		new_clu.size = 0;
+		new_clu.flags = fid->flags;
+
+		/* (1) allocate a cluster */
+		num_alloced = p_fs->fs_func->alloc_cluster(sb, 1, &new_clu);
+		if (num_alloced < 0)
+			return FFS_MEDIAERR;
+		else if (num_alloced == 0)
+			return FFS_FULL;
+
+		/* (2) append to the FAT chain */
+		if (last_clu == CLUSTER_32(~0)) {
+			if (new_clu.flags == 0x01)
+				fid->flags = 0x01;
+			fid->start_clu = new_clu.dir;
+			modified = TRUE;
+		} else {
+			if (new_clu.flags != fid->flags) {
+				exfat_chain_cont_cluster(sb, fid->start_clu, num_clusters);
+				fid->flags = 0x01;
+				modified = TRUE;
+			}
+			if (new_clu.flags == 0x01)
+				FAT_write(sb, last_clu, new_clu.dir);
+		}
+
+		num_clusters += num_alloced;
+		*clu = new_clu.dir;
+
+		if (p_fs->vol_type == EXFAT) {
+			es = get_entry_set_in_dir(sb, &(fid->dir), fid->entry, ES_ALL_ENTRIES, &ep);
+			if (es == NULL)
+				return FFS_MEDIAERR;
+			/* get stream entry */
+			ep++;
+		}
+
+		/* (3) update directory entry */
+		if (modified) {
+			if (p_fs->vol_type != EXFAT) {
+				ep = get_entry_in_dir(sb, &(fid->dir), fid->entry, &sector);
+				if (!ep)
+					return FFS_MEDIAERR;
+			}
+
+			if (p_fs->fs_func->get_entry_flag(ep) != fid->flags)
+				p_fs->fs_func->set_entry_flag(ep, fid->flags);
+
+			if (p_fs->fs_func->get_entry_clu0(ep) != fid->start_clu)
+				p_fs->fs_func->set_entry_clu0(ep, fid->start_clu);
+
+			if (p_fs->vol_type != EXFAT)
+				buf_modify(sb, sector);
+		}
+
+		if (p_fs->vol_type == EXFAT) {
+			update_dir_checksum_with_entry_set(sb, es);
+			release_entry_set(es);
+		}
+
+		/* add number of new blocks to inode */
+		inode->i_blocks += num_alloced << (p_fs->cluster_size_bits - 9);
+	}
+
+	/* hint information */
+	fid->hint_last_off = (s32)(fid->rwoffset >> p_fs->cluster_size_bits);
+	fid->hint_last_clu = *clu;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsMapCluster */
+
+/*----------------------------------------------------------------------*/
+/*  Directory Operation Functions                                       */
+/*----------------------------------------------------------------------*/
+
+/* ffsCreateDir : create(make) a directory */
+s32 ffsCreateDir(struct inode *inode, char *path, FILE_ID_T *fid)
+{
+	s32 ret/*, dentry*/;
+	CHAIN_T dir;
+	UNI_NAME_T uni_name;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	DPRINTK("ffsCreateDir entered\n");
+
+	/* check the validity of directory name in the given old pathname */
+	ret = resolve_path(inode, path, &dir, &uni_name);
+	if (ret)
+		return ret;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	ret = create_dir(inode, &dir, &uni_name, fid);
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return ret;
+} /* end of ffsCreateDir */
+
+/* ffsReadDir : read a directory entry from the opened directory */
+s32 ffsReadDir(struct inode *inode, DIR_ENTRY_T *dir_entry)
+{
+	int i, dentry, clu_offset;
+	s32 dentries_per_clu, dentries_per_clu_bits = 0;
+	u32 type;
+	sector_t sector;
+	CHAIN_T dir, clu;
+	UNI_NAME_T uni_name;
+	TIMESTAMP_T tm;
+	DENTRY_T *ep;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+
+	/* check if the given file ID is opened */
+	if (fid->type != TYPE_DIR)
+		return FFS_PERMISSIONERR;
+
+	if (fid->entry == -1) {
+		dir.dir = p_fs->root_dir;
+		dir.flags = 0x01;
+	} else {
+		dir.dir = fid->start_clu;
+		dir.size = (s32)(fid->size >> p_fs->cluster_size_bits);
+		dir.flags = fid->flags;
+	}
+
+	dentry = (s32) fid->rwoffset;
+
+	if (dir.dir == CLUSTER_32(0)) { /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+
+		if (dentry == dentries_per_clu) {
+			clu.dir = CLUSTER_32(~0);
+		} else {
+			clu.dir = dir.dir;
+			clu.size = dir.size;
+			clu.flags = dir.flags;
+		}
+	} else {
+		dentries_per_clu = p_fs->dentries_per_clu;
+		dentries_per_clu_bits = ilog2(dentries_per_clu);
+
+		clu_offset = dentry >> dentries_per_clu_bits;
+		clu.dir = dir.dir;
+		clu.size = dir.size;
+		clu.flags = dir.flags;
+
+		if (clu.flags == 0x03) {
+			clu.dir += clu_offset;
+			clu.size -= clu_offset;
+		} else {
+			/* hint_information */
+			if ((clu_offset > 0) && (fid->hint_last_off > 0) &&
+				(clu_offset >= fid->hint_last_off)) {
+				clu_offset -= fid->hint_last_off;
+				clu.dir = fid->hint_last_clu;
+			}
+
+			while (clu_offset > 0) {
+				/* clu.dir = FAT_read(sb, clu.dir); */
+				if (FAT_read(sb, clu.dir, &(clu.dir)) == -1)
+					return FFS_MEDIAERR;
+
+				clu_offset--;
+			}
+		}
+	}
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		if (dir.dir == CLUSTER_32(0)) /* FAT16 root_dir */
+			i = dentry % dentries_per_clu;
+		else
+			i = dentry & (dentries_per_clu-1);
+
+		for ( ; i < dentries_per_clu; i++, dentry++) {
+			ep = get_entry_in_dir(sb, &clu, i, &sector);
+			if (!ep)
+				return FFS_MEDIAERR;
+
+			type = p_fs->fs_func->get_entry_type(ep);
+
+			if (type == TYPE_UNUSED)
+				break;
+
+			if ((type != TYPE_FILE) && (type != TYPE_DIR))
+				continue;
+
+			buf_lock(sb, sector);
+			dir_entry->Attr = p_fs->fs_func->get_entry_attr(ep);
+
+			p_fs->fs_func->get_entry_time(ep, &tm, TM_CREATE);
+			dir_entry->CreateTimestamp.Year = tm.year;
+			dir_entry->CreateTimestamp.Month = tm.mon;
+			dir_entry->CreateTimestamp.Day = tm.day;
+			dir_entry->CreateTimestamp.Hour = tm.hour;
+			dir_entry->CreateTimestamp.Minute = tm.min;
+			dir_entry->CreateTimestamp.Second = tm.sec;
+			dir_entry->CreateTimestamp.MilliSecond = 0;
+
+			p_fs->fs_func->get_entry_time(ep, &tm, TM_MODIFY);
+			dir_entry->ModifyTimestamp.Year = tm.year;
+			dir_entry->ModifyTimestamp.Month = tm.mon;
+			dir_entry->ModifyTimestamp.Day = tm.day;
+			dir_entry->ModifyTimestamp.Hour = tm.hour;
+			dir_entry->ModifyTimestamp.Minute = tm.min;
+			dir_entry->ModifyTimestamp.Second = tm.sec;
+			dir_entry->ModifyTimestamp.MilliSecond = 0;
+
+			memset((char *) &dir_entry->AccessTimestamp, 0, sizeof(DATE_TIME_T));
+
+			*(uni_name.name) = 0x0;
+			p_fs->fs_func->get_uni_name_from_ext_entry(sb, &dir, dentry, uni_name.name);
+			if (*(uni_name.name) == 0x0 && p_fs->vol_type != EXFAT)
+				get_uni_name_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x1);
+			nls_uniname_to_cstring(sb, dir_entry->Name, &uni_name);
+			buf_unlock(sb, sector);
+
+			if (p_fs->vol_type == EXFAT) {
+				ep = get_entry_in_dir(sb, &clu, i+1, NULL);
+				if (!ep)
+					return FFS_MEDIAERR;
+			} else {
+				get_uni_name_from_dos_entry(sb, (DOS_DENTRY_T *) ep, &uni_name, 0x0);
+				nls_uniname_to_cstring(sb, dir_entry->ShortName, &uni_name);
+			}
+
+			dir_entry->Size = p_fs->fs_func->get_entry_size(ep);
+
+			/* hint information */
+			if (dir.dir == CLUSTER_32(0)) { /* FAT16 root_dir */
+			} else {
+				fid->hint_last_off = dentry >> dentries_per_clu_bits;
+				fid->hint_last_clu = clu.dir;
+			}
+
+			fid->rwoffset = (s64) ++dentry;
+
+			if (p_fs->dev_ejected)
+				return FFS_MEDIAERR;
+
+			return FFS_SUCCESS;
+		}
+
+		if (dir.dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (clu.flags == 0x03) {
+			if ((--clu.size) > 0)
+				clu.dir++;
+			else
+				clu.dir = CLUSTER_32(~0);
+		} else {
+			/* clu.dir = FAT_read(sb, clu.dir); */
+			if (FAT_read(sb, clu.dir, &(clu.dir)) == -1)
+				return FFS_MEDIAERR;
+		}
+	}
+
+	*(dir_entry->Name) = '\0';
+
+	fid->rwoffset = (s64) ++dentry;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsReadDir */
+
+/* ffsRemoveDir : remove a directory */
+s32 ffsRemoveDir(struct inode *inode, FILE_ID_T *fid)
+{
+	s32 dentry;
+	CHAIN_T dir, clu_to_free;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	dir.dir = fid->dir.dir;
+	dir.size = fid->dir.size;
+	dir.flags = fid->dir.flags;
+
+	dentry = fid->entry;
+
+	/* check if the file is "." or ".." */
+	if (p_fs->vol_type != EXFAT) {
+		if ((dir.dir != p_fs->root_dir) && (dentry < 2))
+			return FFS_PERMISSIONERR;
+	}
+
+	clu_to_free.dir = fid->start_clu;
+	clu_to_free.size = (s32)((fid->size-1) >> p_fs->cluster_size_bits) + 1;
+	clu_to_free.flags = fid->flags;
+
+	if (!is_dir_empty(sb, &clu_to_free))
+		return FFS_FILEEXIST;
+
+	fs_set_vol_flags(sb, VOL_DIRTY);
+
+	/* (1) update the directory entry */
+	remove_file(inode, &dir, dentry);
+
+	/* (2) free the clusters */
+	p_fs->fs_func->free_cluster(sb, &clu_to_free, 1);
+
+	fid->size = 0;
+	fid->start_clu = CLUSTER_32(~0);
+	fid->flags = (p_fs->vol_type == EXFAT)? 0x03: 0x01;
+
+#ifdef CONFIG_EXFAT_DELAYED_SYNC
+	fs_sync(sb, 0);
+	fs_set_vol_flags(sb, VOL_CLEAN);
+#endif
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	return FFS_SUCCESS;
+} /* end of ffsRemoveDir */
+
+/*======================================================================*/
+/*  Local Function Definitions                                          */
+/*======================================================================*/
+
+/*
+ *  File System Management Functions
+ */
+
+s32 fs_init(void)
+{
+	/* critical check for system requirement on size of DENTRY_T structure */
+	if (sizeof(DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(DOS_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(EXT_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(FILE_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(STRM_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(NAME_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(BMAP_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(CASE_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	if (sizeof(VOLM_DENTRY_T) != DENTRY_SIZE)
+		return FFS_ALIGNMENTERR;
+
+	return FFS_SUCCESS;
+} /* end of fs_init */
+
+s32 fs_shutdown(void)
+{
+	return FFS_SUCCESS;
+} /* end of fs_shutdown */
+
+void fs_set_vol_flags(struct super_block *sb, u32 new_flag)
+{
+	PBR_SECTOR_T *p_pbr;
+	BPBEX_T *p_bpb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_fs->vol_flag == new_flag)
+		return;
+
+	p_fs->vol_flag = new_flag;
+
+	if (p_fs->vol_type == EXFAT) {
+		if (p_fs->pbr_bh == NULL) {
+			if (sector_read(sb, p_fs->PBR_sector, &(p_fs->pbr_bh), 1) != FFS_SUCCESS)
+				return;
+		}
+
+		p_pbr = (PBR_SECTOR_T *) p_fs->pbr_bh->b_data;
+		p_bpb = (BPBEX_T *) p_pbr->bpb;
+		SET16(p_bpb->vol_flags, (u16) new_flag);
+
+		/* XXX duyoung
+		 what can we do here? (cuz fs_set_vol_flags() is void) */
+		if ((new_flag == VOL_DIRTY) && (!buffer_dirty(p_fs->pbr_bh)))
+			sector_write(sb, p_fs->PBR_sector, p_fs->pbr_bh, 1);
+		else
+			sector_write(sb, p_fs->PBR_sector, p_fs->pbr_bh, 0);
+	}
+} /* end of fs_set_vol_flags */
+
+void fs_sync(struct super_block *sb, s32 do_sync)
+{
+	if (do_sync)
+		bdev_sync(sb);
+} /* end of fs_sync */
+
+void fs_error(struct super_block *sb)
+{
+	struct exfat_mount_options *opts = &EXFAT_SB(sb)->options;
+
+	if (opts->errors == EXFAT_ERRORS_PANIC)
+		panic("[EXFAT] Filesystem panic from previous error\n");
+	else if ((opts->errors == EXFAT_ERRORS_RO) && !(sb->s_flags & MS_RDONLY)) {
+		sb->s_flags |= MS_RDONLY;
+		printk(KERN_ERR "[EXFAT] Filesystem has been set read-only\n");
+	}
+}
+
+/*
+ *  Cluster Management Functions
+ */
+
+s32 clear_cluster(struct super_block *sb, u32 clu)
+{
+	sector_t s, n;
+	s32 ret = FFS_SUCCESS;
+	struct buffer_head *tmp_bh = NULL;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (clu == CLUSTER_32(0)) { /* FAT16 root_dir */
+		s = p_fs->root_start_sector;
+		n = p_fs->data_start_sector;
+	} else {
+		s = START_SECTOR(clu);
+		n = s + p_fs->sectors_per_clu;
+	}
+
+	for (; s < n; s++) {
+		ret = sector_read(sb, s, &tmp_bh, 0);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		memset((char *) tmp_bh->b_data, 0x0, p_bd->sector_size);
+		ret = sector_write(sb, s, tmp_bh, 0);
+		if (ret != FFS_SUCCESS)
+			break;
+	}
+
+	brelse(tmp_bh);
+	return ret;
+} /* end of clear_cluster */
+
+s32 fat_alloc_cluster(struct super_block *sb, s32 num_alloc, CHAIN_T *p_chain)
+{
+	int i, num_clusters = 0;
+	u32 new_clu, last_clu = CLUSTER_32(~0), read_clu;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	new_clu = p_chain->dir;
+	if (new_clu == CLUSTER_32(~0))
+		new_clu = p_fs->clu_srch_ptr;
+	else if (new_clu >= p_fs->num_clusters)
+		new_clu = 2;
+
+	__set_sb_dirty(sb);
+
+	p_chain->dir = CLUSTER_32(~0);
+
+	for (i = 2; i < p_fs->num_clusters; i++) {
+		if (FAT_read(sb, new_clu, &read_clu) != 0)
+			return -1;
+
+		if (read_clu == CLUSTER_32(0)) {
+			if (FAT_write(sb, new_clu, CLUSTER_32(~0)) < 0)
+				return -1;
+			num_clusters++;
+
+			if (p_chain->dir == CLUSTER_32(~0))
+				p_chain->dir = new_clu;
+			else {
+				if (FAT_write(sb, last_clu, new_clu) < 0)
+					return -1;
+			}
+
+			last_clu = new_clu;
+
+			if ((--num_alloc) == 0) {
+				p_fs->clu_srch_ptr = new_clu;
+				if (p_fs->used_clusters != (u32) ~0)
+					p_fs->used_clusters += num_clusters;
+
+				return num_clusters;
+			}
+		}
+		if ((++new_clu) >= p_fs->num_clusters)
+			new_clu = 2;
+	}
+
+	p_fs->clu_srch_ptr = new_clu;
+	if (p_fs->used_clusters != (u32) ~0)
+		p_fs->used_clusters += num_clusters;
+
+	return num_clusters;
+} /* end of fat_alloc_cluster */
+
+s32 exfat_alloc_cluster(struct super_block *sb, s32 num_alloc, CHAIN_T *p_chain)
+{
+	s32 num_clusters = 0;
+	u32 hint_clu, new_clu, last_clu = CLUSTER_32(~0);
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	hint_clu = p_chain->dir;
+	if (hint_clu == CLUSTER_32(~0)) {
+		hint_clu = test_alloc_bitmap(sb, p_fs->clu_srch_ptr-2);
+		if (hint_clu == CLUSTER_32(~0))
+			return 0;
+	} else if (hint_clu >= p_fs->num_clusters) {
+		hint_clu = 2;
+		p_chain->flags = 0x01;
+	}
+
+	__set_sb_dirty(sb);
+
+	p_chain->dir = CLUSTER_32(~0);
+
+	while ((new_clu = test_alloc_bitmap(sb, hint_clu-2)) != CLUSTER_32(~0)) {
+		if (new_clu != hint_clu) {
+			if (p_chain->flags == 0x03) {
+				exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters);
+				p_chain->flags = 0x01;
+			}
+		}
+
+		if (set_alloc_bitmap(sb, new_clu-2) != FFS_SUCCESS)
+			return -1;
+
+		num_clusters++;
+
+		if (p_chain->flags == 0x01) {
+			if (FAT_write(sb, new_clu, CLUSTER_32(~0)) < 0)
+				return -1;
+		}
+
+		if (p_chain->dir == CLUSTER_32(~0)) {
+			p_chain->dir = new_clu;
+		} else {
+			if (p_chain->flags == 0x01) {
+				if (FAT_write(sb, last_clu, new_clu) < 0)
+					return -1;
+			}
+		}
+		last_clu = new_clu;
+
+		if ((--num_alloc) == 0) {
+			p_fs->clu_srch_ptr = hint_clu;
+			if (p_fs->used_clusters != (u32) ~0)
+				p_fs->used_clusters += num_clusters;
+
+			p_chain->size += num_clusters;
+			return num_clusters;
+		}
+
+		hint_clu = new_clu + 1;
+		if (hint_clu >= p_fs->num_clusters) {
+			hint_clu = 2;
+
+			if (p_chain->flags == 0x03) {
+				exfat_chain_cont_cluster(sb, p_chain->dir, num_clusters);
+				p_chain->flags = 0x01;
+			}
+		}
+	}
+
+	p_fs->clu_srch_ptr = hint_clu;
+	if (p_fs->used_clusters != (u32) ~0)
+		p_fs->used_clusters += num_clusters;
+
+	p_chain->size += num_clusters;
+	return num_clusters;
+} /* end of exfat_alloc_cluster */
+
+void fat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+	s32 num_clusters = 0;
+	u32 clu, prev;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	int i;
+	sector_t sector;
+
+	if ((p_chain->dir == CLUSTER_32(0)) || (p_chain->dir == CLUSTER_32(~0)))
+		return;
+	__set_sb_dirty(sb);
+	clu = p_chain->dir;
+
+	if (p_chain->size <= 0)
+		return;
+
+	do {
+		if (p_fs->dev_ejected)
+			break;
+
+		if (do_relse) {
+			sector = START_SECTOR(clu);
+			for (i = 0; i < p_fs->sectors_per_clu; i++)
+				buf_release(sb, sector+i);
+		}
+
+		prev = clu;
+		if (FAT_read(sb, clu, &clu) == -1)
+			break;
+
+		if (FAT_write(sb, prev, CLUSTER_32(0)) < 0)
+			break;
+		num_clusters++;
+
+	} while (clu != CLUSTER_32(~0));
+
+	if (p_fs->used_clusters != (u32) ~0)
+		p_fs->used_clusters -= num_clusters;
+} /* end of fat_free_cluster */
+
+void exfat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse)
+{
+	s32 num_clusters = 0;
+	u32 clu;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	int i;
+	sector_t sector;
+
+	if ((p_chain->dir == CLUSTER_32(0)) || (p_chain->dir == CLUSTER_32(~0)))
+		return;
+
+	if (p_chain->size <= 0) {
+		printk(KERN_ERR "[EXFAT] free_cluster : skip free-req clu:%u, "
+				"because of zero-size truncation\n"
+				,p_chain->dir);
+		return;
+	}
+
+	__set_sb_dirty(sb);
+	clu = p_chain->dir;
+
+	if (p_chain->flags == 0x03) {
+		do {
+			if (do_relse) {
+				sector = START_SECTOR(clu);
+				for (i = 0; i < p_fs->sectors_per_clu; i++)
+					buf_release(sb, sector+i);
+			}
+
+			if (clr_alloc_bitmap(sb, clu-2) != FFS_SUCCESS)
+				break;
+			clu++;
+
+			num_clusters++;
+		} while (num_clusters < p_chain->size);
+	} else {
+		do {
+			if (p_fs->dev_ejected)
+				break;
+
+			if (do_relse) {
+				sector = START_SECTOR(clu);
+				for (i = 0; i < p_fs->sectors_per_clu; i++)
+					buf_release(sb, sector+i);
+			}
+
+			if (clr_alloc_bitmap(sb, clu-2) != FFS_SUCCESS)
+				break;
+
+			if (FAT_read(sb, clu, &clu) == -1)
+				break;
+			num_clusters++;
+		} while ((clu != CLUSTER_32(0)) && (clu != CLUSTER_32(~0)));
+	}
+
+	if (p_fs->used_clusters != (u32) ~0)
+		p_fs->used_clusters -= num_clusters;
+} /* end of exfat_free_cluster */
+
+u32 find_last_cluster(struct super_block *sb, CHAIN_T *p_chain)
+{
+	u32 clu, next;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	clu = p_chain->dir;
+
+	if (p_chain->flags == 0x03) {
+		clu += p_chain->size - 1;
+	} else {
+		while ((FAT_read(sb, clu, &next) == 0) && (next != CLUSTER_32(~0))) {
+			if (p_fs->dev_ejected)
+				break;
+			clu = next;
+		}
+	}
+
+	return clu;
+} /* end of find_last_cluster */
+
+s32 count_num_clusters(struct super_block *sb, CHAIN_T *p_chain)
+{
+	int i, count = 0;
+	u32 clu;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if ((p_chain->dir == CLUSTER_32(0)) || (p_chain->dir == CLUSTER_32(~0)))
+		return 0;
+
+	clu = p_chain->dir;
+
+	if (p_chain->flags == 0x03) {
+		count = p_chain->size;
+	} else {
+		for (i = 2; i < p_fs->num_clusters; i++) {
+			count++;
+			if (FAT_read(sb, clu, &clu) != 0)
+				return 0;
+			if (clu == CLUSTER_32(~0))
+				break;
+		}
+	}
+
+	return count;
+} /* end of count_num_clusters */
+
+s32 fat_count_used_clusters(struct super_block *sb)
+{
+	int i, count = 0;
+	u32 clu;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	for (i = 2; i < p_fs->num_clusters; i++) {
+		if (FAT_read(sb, i, &clu) != 0)
+			break;
+		if (clu != CLUSTER_32(0))
+			count++;
+	}
+
+	return count;
+} /* end of fat_count_used_clusters */
+
+s32 exfat_count_used_clusters(struct super_block *sb)
+{
+	int i, map_i, map_b, count = 0;
+	u8 k;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	map_i = map_b = 0;
+
+	for (i = 2; i < p_fs->num_clusters; i += 8) {
+		k = *(((u8 *) p_fs->vol_amap[map_i]->b_data) + map_b);
+		count += used_bit[k];
+
+		if ((++map_b) >= p_bd->sector_size) {
+			map_i++;
+			map_b = 0;
+		}
+	}
+
+	return count;
+} /* end of exfat_count_used_clusters */
+
+void exfat_chain_cont_cluster(struct super_block *sb, u32 chain, s32 len)
+{
+	if (len == 0)
+		return;
+
+	while (len > 1) {
+		if (FAT_write(sb, chain, chain+1) < 0)
+			break;
+		chain++;
+		len--;
+	}
+	FAT_write(sb, chain, CLUSTER_32(~0));
+} /* end of exfat_chain_cont_cluster */
+
+/*
+ *  Allocation Bitmap Management Functions
+ */
+
+s32 load_alloc_bitmap(struct super_block *sb)
+{
+	int i, j, ret;
+	u32 map_size;
+	u32 type;
+	sector_t sector;
+	CHAIN_T clu;
+	BMAP_DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	clu.dir = p_fs->root_dir;
+	clu.flags = 0x01;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		for (i = 0; i < p_fs->dentries_per_clu; i++) {
+			ep = (BMAP_DENTRY_T *) get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return FFS_MEDIAERR;
+
+			type = p_fs->fs_func->get_entry_type((DENTRY_T *) ep);
+
+			if (type == TYPE_UNUSED)
+				break;
+			if (type != TYPE_BITMAP)
+				continue;
+
+			if (ep->flags == 0x0) {
+				p_fs->map_clu  = GET32_A(ep->start_clu);
+				map_size = (u32) GET64_A(ep->size);
+
+				p_fs->map_sectors = ((map_size-1) >> p_bd->sector_size_bits) + 1;
+
+				p_fs->vol_amap = (struct buffer_head **) kmalloc(sizeof(struct buffer_head *) * p_fs->map_sectors, GFP_KERNEL);
+				if (p_fs->vol_amap == NULL)
+					return FFS_MEMORYERR;
+
+				sector = START_SECTOR(p_fs->map_clu);
+
+				for (j = 0; j < p_fs->map_sectors; j++) {
+					p_fs->vol_amap[j] = NULL;
+					ret = sector_read(sb, sector+j, &(p_fs->vol_amap[j]), 1);
+					if (ret != FFS_SUCCESS) {
+						/*  release all buffers and free vol_amap */
+						i = 0;
+						while (i < j)
+							brelse(p_fs->vol_amap[i++]);
+
+						if (p_fs->vol_amap)
+							kfree(p_fs->vol_amap);
+						p_fs->vol_amap = NULL;
+						return ret;
+					}
+				}
+
+				p_fs->pbr_bh = NULL;
+				return FFS_SUCCESS;
+			}
+		}
+
+		if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+			return FFS_MEDIAERR;
+	}
+
+	return FFS_FORMATERR;
+} /* end of load_alloc_bitmap */
+
+void free_alloc_bitmap(struct super_block *sb)
+{
+	int i;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	brelse(p_fs->pbr_bh);
+
+	for (i = 0; i < p_fs->map_sectors; i++)
+		__brelse(p_fs->vol_amap[i]);
+
+	if (p_fs->vol_amap)
+		kfree(p_fs->vol_amap);
+	p_fs->vol_amap = NULL;
+} /* end of free_alloc_bitmap */
+
+s32 set_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+	int i, b;
+	sector_t sector;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	i = clu >> (p_bd->sector_size_bits + 3);
+	b = clu & ((p_bd->sector_size << 3) - 1);
+
+	sector = START_SECTOR(p_fs->map_clu) + i;
+
+	exfat_bitmap_set((u8 *) p_fs->vol_amap[i]->b_data, b);
+
+	return sector_write(sb, sector, p_fs->vol_amap[i], 0);
+} /* end of set_alloc_bitmap */
+
+s32 clr_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+	int i, b;
+	sector_t sector;
+#ifdef CONFIG_EXFAT_DISCARD
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct exfat_mount_options *opts = &sbi->options;
+	int ret;
+#endif /* CONFIG_EXFAT_DISCARD */
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	i = clu >> (p_bd->sector_size_bits + 3);
+	b = clu & ((p_bd->sector_size << 3) - 1);
+
+	sector = START_SECTOR(p_fs->map_clu) + i;
+
+	exfat_bitmap_clear((u8 *) p_fs->vol_amap[i]->b_data, b);
+
+	return sector_write(sb, sector, p_fs->vol_amap[i], 0);
+
+#ifdef CONFIG_EXFAT_DISCARD
+	if (opts->discard) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+		ret = sb_issue_discard(sb, START_SECTOR(clu), (1 << p_fs->sectors_per_clu_bits));
+#else
+		ret = sb_issue_discard(sb, START_SECTOR(clu), (1 << p_fs->sectors_per_clu_bits), GFP_NOFS, 0);
+#endif
+		if (ret == -EOPNOTSUPP) {
+			printk(KERN_WARNING "discard not supported by device, disabling");
+			opts->discard = 0;
+		}
+	}
+#endif /* CONFIG_EXFAT_DISCARD */
+} /* end of clr_alloc_bitmap */
+
+u32 test_alloc_bitmap(struct super_block *sb, u32 clu)
+{
+	int i, map_i, map_b;
+	u32 clu_base, clu_free;
+	u8 k, clu_mask;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	clu_base = (clu & ~(0x7)) + 2;
+	clu_mask = (1 << (clu - clu_base + 2)) - 1;
+
+	map_i = clu >> (p_bd->sector_size_bits + 3);
+	map_b = (clu >> 3) & p_bd->sector_size_mask;
+
+	for (i = 2; i < p_fs->num_clusters; i += 8) {
+		k = *(((u8 *) p_fs->vol_amap[map_i]->b_data) + map_b);
+		if (clu_mask > 0) {
+			k |= clu_mask;
+			clu_mask = 0;
+		}
+		if (k < 0xFF) {
+			clu_free = clu_base + free_bit[k];
+			if (clu_free < p_fs->num_clusters)
+				return clu_free;
+		}
+		clu_base += 8;
+
+		if (((++map_b) >= p_bd->sector_size) || (clu_base >= p_fs->num_clusters)) {
+			if ((++map_i) >= p_fs->map_sectors) {
+				clu_base = 2;
+				map_i = 0;
+			}
+			map_b = 0;
+		}
+	}
+
+	return CLUSTER_32(~0);
+} /* end of test_alloc_bitmap */
+
+void sync_alloc_bitmap(struct super_block *sb)
+{
+	int i;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_fs->vol_amap == NULL)
+		return;
+
+	for (i = 0; i < p_fs->map_sectors; i++)
+		sync_dirty_buffer(p_fs->vol_amap[i]);
+} /* end of sync_alloc_bitmap */
+
+/*
+ *  Upcase table Management Functions
+ */
+s32 __load_upcase_table(struct super_block *sb, sector_t sector, u32 num_sectors, u32 utbl_checksum)
+{
+	int i, ret = FFS_ERROR;
+	u32 j;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	struct buffer_head *tmp_bh = NULL;
+	sector_t end_sector = num_sectors + sector;
+
+	u8	skip = FALSE;
+	u32	index = 0;
+	u16	uni = 0;
+	u16 **upcase_table;
+
+	u32 checksum = 0;
+
+	upcase_table = p_fs->vol_utbl = (u16 **) kmalloc(UTBL_COL_COUNT * sizeof(u16 *), GFP_KERNEL);
+	if (upcase_table == NULL)
+		return FFS_MEMORYERR;
+	memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+
+	while (sector < end_sector) {
+		ret = sector_read(sb, sector, &tmp_bh, 1);
+		if (ret != FFS_SUCCESS) {
+			DPRINTK("sector read (0x%llX)fail\n", (unsigned long long)sector);
+			goto error;
+		}
+		sector++;
+
+		for (i = 0; i < p_bd->sector_size && index <= 0xFFFF; i += 2) {
+			uni = GET16(((u8 *) tmp_bh->b_data)+i);
+
+			checksum = ((checksum & 1) ? 0x80000000 : 0) + (checksum >> 1) + *(((u8 *) tmp_bh->b_data)+i);
+			checksum = ((checksum & 1) ? 0x80000000 : 0) + (checksum >> 1) + *(((u8 *) tmp_bh->b_data)+(i+1));
+
+			if (skip) {
+				DPRINTK("skip from 0x%X ", index);
+				index += uni;
+				DPRINTK("to 0x%X (amount of 0x%X)\n", index, uni);
+				skip = FALSE;
+			} else if (uni == index)
+				index++;
+			else if (uni == 0xFFFF)
+				skip = TRUE;
+			else { /* uni != index , uni != 0xFFFF */
+				u16 col_index = get_col_index(index);
+
+				if (upcase_table[col_index] == NULL) {
+					DPRINTK("alloc = 0x%X\n", col_index);
+					upcase_table[col_index] = (u16 *) kmalloc(UTBL_ROW_COUNT * sizeof(u16), GFP_KERNEL);
+					if (upcase_table[col_index] == NULL) {
+						ret = FFS_MEMORYERR;
+						goto error;
+					}
+
+					for (j = 0; j < UTBL_ROW_COUNT; j++)
+						upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+				}
+
+				upcase_table[col_index][get_row_index(index)] = uni;
+				index++;
+			}
+		}
+	}
+	if (index >= 0xFFFF && utbl_checksum == checksum) {
+		if (tmp_bh)
+			brelse(tmp_bh);
+		return FFS_SUCCESS;
+	}
+	ret = FFS_ERROR;
+error:
+	if (tmp_bh)
+		brelse(tmp_bh);
+	free_upcase_table(sb);
+	return ret;
+}
+
+s32 __load_default_upcase_table(struct super_block *sb)
+{
+	int i, ret = FFS_ERROR;
+	u32 j;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	u8	skip = FALSE;
+	u32	index = 0;
+	u16	uni = 0;
+	u16 **upcase_table;
+
+	upcase_table = p_fs->vol_utbl = (u16 **) kmalloc(UTBL_COL_COUNT * sizeof(u16 *), GFP_KERNEL);
+	if (upcase_table == NULL)
+		return FFS_MEMORYERR;
+	memset(upcase_table, 0, UTBL_COL_COUNT * sizeof(u16 *));
+
+	for (i = 0; index <= 0xFFFF && i < NUM_UPCASE*2; i += 2) {
+		uni = GET16(uni_upcase + i);
+		if (skip) {
+			DPRINTK("skip from 0x%X ", index);
+			index += uni;
+			DPRINTK("to 0x%X (amount of 0x%X)\n", index, uni);
+			skip = FALSE;
+		} else if (uni == index)
+			index++;
+		else if (uni == 0xFFFF)
+			skip = TRUE;
+		else { /* uni != index , uni != 0xFFFF */
+			u16 col_index = get_col_index(index);
+
+			if (upcase_table[col_index] == NULL) {
+				DPRINTK("alloc = 0x%X\n", col_index);
+				upcase_table[col_index] = (u16 *) kmalloc(UTBL_ROW_COUNT * sizeof(u16), GFP_KERNEL);
+				if (upcase_table[col_index] == NULL) {
+					ret = FFS_MEMORYERR;
+					goto error;
+				}
+
+				for (j = 0; j < UTBL_ROW_COUNT; j++)
+					upcase_table[col_index][j] = (col_index << LOW_INDEX_BIT) | j;
+			}
+
+			upcase_table[col_index][get_row_index(index)] = uni;
+			index++;
+		}
+	}
+
+	if (index >= 0xFFFF)
+		return FFS_SUCCESS;
+
+error:
+	/* FATAL error: default upcase table has error */
+	free_upcase_table(sb);
+	return ret;
+}
+
+s32 load_upcase_table(struct super_block *sb)
+{
+	int i;
+	u32 tbl_clu, tbl_size;
+	sector_t sector;
+	u32 type, num_sectors;
+	CHAIN_T clu;
+	CASE_DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	clu.dir = p_fs->root_dir;
+	clu.flags = 0x01;
+
+	if (p_fs->dev_ejected)
+		return FFS_MEDIAERR;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		for (i = 0; i < p_fs->dentries_per_clu; i++) {
+			ep = (CASE_DENTRY_T *) get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return FFS_MEDIAERR;
+
+			type = p_fs->fs_func->get_entry_type((DENTRY_T *) ep);
+
+			if (type == TYPE_UNUSED)
+				break;
+			if (type != TYPE_UPCASE)
+				continue;
+
+			tbl_clu  = GET32_A(ep->start_clu);
+			tbl_size = (u32) GET64_A(ep->size);
+
+			sector = START_SECTOR(tbl_clu);
+			num_sectors = ((tbl_size-1) >> p_bd->sector_size_bits) + 1;
+			if (__load_upcase_table(sb, sector, num_sectors, GET32_A(ep->checksum)) != FFS_SUCCESS)
+				break;
+			else
+				return FFS_SUCCESS;
+		}
+		if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+			return FFS_MEDIAERR;
+	}
+	/* load default upcase table */
+	return __load_default_upcase_table(sb);
+} /* end of load_upcase_table */
+
+void free_upcase_table(struct super_block *sb)
+{
+	u32 i;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	u16 **upcase_table;
+
+	upcase_table = p_fs->vol_utbl;
+	for (i = 0; i < UTBL_COL_COUNT; i++) {
+		if (upcase_table[i])
+			kfree(upcase_table[i]);
+	}
+
+	if (p_fs->vol_utbl)
+		kfree(p_fs->vol_utbl);
+	p_fs->vol_utbl = NULL;
+} /* end of free_upcase_table */
+
+/*
+ *  Directory Entry Management Functions
+ */
+
+u32 fat_get_entry_type(DENTRY_T *p_entry)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+	if (*(ep->name) == 0x0)
+		return TYPE_UNUSED;
+
+	else if (*(ep->name) == 0xE5)
+		return TYPE_DELETED;
+
+	else if (ep->attr == ATTR_EXTEND)
+		return TYPE_EXTEND;
+
+	else if ((ep->attr & (ATTR_SUBDIR|ATTR_VOLUME)) == ATTR_VOLUME)
+		return TYPE_VOLUME;
+
+	else if ((ep->attr & (ATTR_SUBDIR|ATTR_VOLUME)) == ATTR_SUBDIR)
+		return TYPE_DIR;
+
+	return TYPE_FILE;
+} /* end of fat_get_entry_type */
+
+u32 exfat_get_entry_type(DENTRY_T *p_entry)
+{
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+	if (ep->type == 0x0) {
+		return TYPE_UNUSED;
+	} else if (ep->type < 0x80) {
+		return TYPE_DELETED;
+	} else if (ep->type == 0x80) {
+		return TYPE_INVALID;
+	} else if (ep->type < 0xA0) {
+		if (ep->type == 0x81) {
+			return TYPE_BITMAP;
+		} else if (ep->type == 0x82) {
+			return TYPE_UPCASE;
+		} else if (ep->type == 0x83) {
+			return TYPE_VOLUME;
+		} else if (ep->type == 0x85) {
+			if (GET16_A(ep->attr) & ATTR_SUBDIR)
+				return TYPE_DIR;
+			else
+				return TYPE_FILE;
+		}
+		return TYPE_CRITICAL_PRI;
+	} else if (ep->type < 0xC0) {
+		if (ep->type == 0xA0)
+			return TYPE_GUID;
+		else if (ep->type == 0xA1)
+			return TYPE_PADDING;
+		else if (ep->type == 0xA2)
+			return TYPE_ACLTAB;
+		return TYPE_BENIGN_PRI;
+	} else if (ep->type < 0xE0) {
+		if (ep->type == 0xC0)
+			return TYPE_STREAM;
+		else if (ep->type == 0xC1)
+			return TYPE_EXTEND;
+		else if (ep->type == 0xC2)
+			return TYPE_ACL;
+		return TYPE_CRITICAL_SEC;
+	}
+
+	return TYPE_BENIGN_SEC;
+} /* end of exfat_get_entry_type */
+
+void fat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+	if (type == TYPE_UNUSED)
+		*(ep->name) = 0x0;
+
+	else if (type == TYPE_DELETED)
+		*(ep->name) = 0xE5;
+
+	else if (type == TYPE_EXTEND)
+		ep->attr = ATTR_EXTEND;
+
+	else if (type == TYPE_DIR)
+		ep->attr = ATTR_SUBDIR;
+
+	else if (type == TYPE_FILE)
+		ep->attr = ATTR_ARCHIVE;
+
+	else if (type == TYPE_SYMLINK)
+		ep->attr = ATTR_ARCHIVE | ATTR_SYMLINK;
+} /* end of fat_set_entry_type */
+
+void exfat_set_entry_type(DENTRY_T *p_entry, u32 type)
+{
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+	if (type == TYPE_UNUSED) {
+		ep->type = 0x0;
+	} else if (type == TYPE_DELETED) {
+		ep->type &= ~0x80;
+	} else if (type == TYPE_STREAM) {
+		ep->type = 0xC0;
+	} else if (type == TYPE_EXTEND) {
+		ep->type = 0xC1;
+	} else if (type == TYPE_BITMAP) {
+		ep->type = 0x81;
+	} else if (type == TYPE_UPCASE) {
+		ep->type = 0x82;
+	} else if (type == TYPE_VOLUME) {
+		ep->type = 0x83;
+	} else if (type == TYPE_DIR) {
+		ep->type = 0x85;
+		SET16_A(ep->attr, ATTR_SUBDIR);
+	} else if (type == TYPE_FILE) {
+		ep->type = 0x85;
+		SET16_A(ep->attr, ATTR_ARCHIVE);
+	} else if (type == TYPE_SYMLINK) {
+		ep->type = 0x85;
+		SET16_A(ep->attr, ATTR_ARCHIVE | ATTR_SYMLINK);
+	}
+} /* end of exfat_set_entry_type */
+
+u32 fat_get_entry_attr(DENTRY_T *p_entry)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	return (u32) ep->attr;
+} /* end of fat_get_entry_attr */
+
+u32 exfat_get_entry_attr(DENTRY_T *p_entry)
+{
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+	return (u32) GET16_A(ep->attr);
+} /* end of exfat_get_entry_attr */
+
+void fat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	ep->attr = (u8) attr;
+} /* end of fat_set_entry_attr */
+
+void exfat_set_entry_attr(DENTRY_T *p_entry, u32 attr)
+{
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+	SET16_A(ep->attr, (u16) attr);
+} /* end of exfat_set_entry_attr */
+
+u8 fat_get_entry_flag(DENTRY_T *p_entry)
+{
+	return 0x01;
+} /* end of fat_get_entry_flag */
+
+u8 exfat_get_entry_flag(DENTRY_T *p_entry)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	return ep->flags;
+} /* end of exfat_get_entry_flag */
+
+void fat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+} /* end of fat_set_entry_flag */
+
+void exfat_set_entry_flag(DENTRY_T *p_entry, u8 flags)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	ep->flags = flags;
+} /* end of exfat_set_entry_flag */
+
+u32 fat_get_entry_clu0(DENTRY_T *p_entry)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	return ((u32) GET16_A(ep->start_clu_hi) << 16) | GET16_A(ep->start_clu_lo);
+} /* end of fat_get_entry_clu0 */
+
+u32 exfat_get_entry_clu0(DENTRY_T *p_entry)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	return GET32_A(ep->start_clu);
+} /* end of exfat_get_entry_clu0 */
+
+void fat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	SET16_A(ep->start_clu_lo, CLUSTER_16(start_clu));
+	SET16_A(ep->start_clu_hi, CLUSTER_16(start_clu >> 16));
+} /* end of fat_set_entry_clu0 */
+
+void exfat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	SET32_A(ep->start_clu, start_clu);
+} /* end of exfat_set_entry_clu0 */
+
+u64 fat_get_entry_size(DENTRY_T *p_entry)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	return (u64) GET32_A(ep->size);
+} /* end of fat_get_entry_size */
+
+u64 exfat_get_entry_size(DENTRY_T *p_entry)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	return GET64_A(ep->valid_size);
+} /* end of exfat_get_entry_size */
+
+void fat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+	SET32_A(ep->size, (u32) size);
+} /* end of fat_set_entry_size */
+
+void exfat_set_entry_size(DENTRY_T *p_entry, u64 size)
+{
+	STRM_DENTRY_T *ep = (STRM_DENTRY_T *) p_entry;
+	SET64_A(ep->valid_size, size);
+	SET64_A(ep->size, size);
+} /* end of exfat_set_entry_size */
+
+void fat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+	u16 t = 0x00, d = 0x21;
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+	switch (mode) {
+	case TM_CREATE:
+		t = GET16_A(ep->create_time);
+		d = GET16_A(ep->create_date);
+		break;
+	case TM_MODIFY:
+		t = GET16_A(ep->modify_time);
+		d = GET16_A(ep->modify_date);
+		break;
+	}
+
+	tp->sec  = (t & 0x001F) << 1;
+	tp->min  = (t >> 5) & 0x003F;
+	tp->hour = (t >> 11);
+	tp->day  = (d & 0x001F);
+	tp->mon  = (d >> 5) & 0x000F;
+	tp->year = (d >> 9);
+} /* end of fat_get_entry_time */
+
+void exfat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+	u16 t = 0x00, d = 0x21;
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+	switch (mode) {
+	case TM_CREATE:
+		t = GET16_A(ep->create_time);
+		d = GET16_A(ep->create_date);
+		break;
+	case TM_MODIFY:
+		t = GET16_A(ep->modify_time);
+		d = GET16_A(ep->modify_date);
+		break;
+	case TM_ACCESS:
+		t = GET16_A(ep->access_time);
+		d = GET16_A(ep->access_date);
+		break;
+	}
+
+	tp->sec  = (t & 0x001F) << 1;
+	tp->min  = (t >> 5) & 0x003F;
+	tp->hour = (t >> 11);
+	tp->day  = (d & 0x001F);
+	tp->mon  = (d >> 5) & 0x000F;
+	tp->year = (d >> 9);
+} /* end of exfat_get_entry_time */
+
+void fat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+	u16 t, d;
+	DOS_DENTRY_T *ep = (DOS_DENTRY_T *) p_entry;
+
+	t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+	d = (tp->year <<  9) | (tp->mon << 5) |  tp->day;
+
+	switch (mode) {
+	case TM_CREATE:
+		SET16_A(ep->create_time, t);
+		SET16_A(ep->create_date, d);
+		break;
+	case TM_MODIFY:
+		SET16_A(ep->modify_time, t);
+		SET16_A(ep->modify_date, d);
+		break;
+	}
+} /* end of fat_set_entry_time */
+
+void exfat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode)
+{
+	u16 t, d;
+	FILE_DENTRY_T *ep = (FILE_DENTRY_T *) p_entry;
+
+	t = (tp->hour << 11) | (tp->min << 5) | (tp->sec >> 1);
+	d = (tp->year <<  9) | (tp->mon << 5) |  tp->day;
+
+	switch (mode) {
+	case TM_CREATE:
+		SET16_A(ep->create_time, t);
+		SET16_A(ep->create_date, d);
+		break;
+	case TM_MODIFY:
+		SET16_A(ep->modify_time, t);
+		SET16_A(ep->modify_date, d);
+		break;
+	case TM_ACCESS:
+		SET16_A(ep->access_time, t);
+		SET16_A(ep->access_date, d);
+		break;
+	}
+} /* end of exfat_set_entry_time */
+
+s32 fat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type,
+						 u32 start_clu, u64 size)
+{
+	sector_t sector;
+	DOS_DENTRY_T *dos_ep;
+
+	dos_ep = (DOS_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!dos_ep)
+		return FFS_MEDIAERR;
+
+	init_dos_entry(dos_ep, type, start_clu);
+	buf_modify(sb, sector);
+
+	return FFS_SUCCESS;
+} /* end of fat_init_dir_entry */
+
+s32 exfat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type,
+						   u32 start_clu, u64 size)
+{
+	sector_t sector;
+	u8 flags;
+	FILE_DENTRY_T *file_ep;
+	STRM_DENTRY_T *strm_ep;
+
+	flags = (type == TYPE_FILE) ? 0x01 : 0x03;
+
+	/* we cannot use get_entry_set_in_dir here because file ep is not initialized yet */
+	file_ep = (FILE_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!file_ep)
+		return FFS_MEDIAERR;
+
+	strm_ep = (STRM_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry+1, &sector);
+	if (!strm_ep)
+		return FFS_MEDIAERR;
+
+	init_file_entry(file_ep, type);
+	buf_modify(sb, sector);
+
+	init_strm_entry(strm_ep, flags, start_clu, size);
+	buf_modify(sb, sector);
+
+	return FFS_SUCCESS;
+} /* end of exfat_init_dir_entry */
+
+s32 fat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+						 UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+	int i;
+	sector_t sector;
+	u8 chksum;
+	u16 *uniname = p_uniname->name;
+	DOS_DENTRY_T *dos_ep;
+	EXT_DENTRY_T *ext_ep;
+
+	dos_ep = (DOS_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!dos_ep)
+		return FFS_MEDIAERR;
+
+	dos_ep->lcase = p_dosname->name_case;
+	memcpy(dos_ep->name, p_dosname->name, DOS_NAME_LENGTH);
+	buf_modify(sb, sector);
+
+	if ((--num_entries) > 0) {
+		chksum = calc_checksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+		for (i = 1; i < num_entries; i++) {
+			ext_ep = (EXT_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry-i, &sector);
+			if (!ext_ep)
+				return FFS_MEDIAERR;
+
+			init_ext_entry(ext_ep, i, chksum, uniname);
+			buf_modify(sb, sector);
+			uniname += 13;
+		}
+
+		ext_ep = (EXT_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry-i, &sector);
+		if (!ext_ep)
+			return FFS_MEDIAERR;
+
+		init_ext_entry(ext_ep, i+0x40, chksum, uniname);
+		buf_modify(sb, sector);
+	}
+
+	return FFS_SUCCESS;
+} /* end of fat_init_ext_entry */
+
+s32 exfat_init_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+						   UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+	int i;
+	sector_t sector;
+	u16 *uniname = p_uniname->name;
+	FILE_DENTRY_T *file_ep;
+	STRM_DENTRY_T *strm_ep;
+	NAME_DENTRY_T *name_ep;
+
+	file_ep = (FILE_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!file_ep)
+		return FFS_MEDIAERR;
+
+	file_ep->num_ext = (u8)(num_entries - 1);
+	buf_modify(sb, sector);
+
+	strm_ep = (STRM_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry+1, &sector);
+	if (!strm_ep)
+		return FFS_MEDIAERR;
+
+	strm_ep->name_len = p_uniname->name_len;
+	SET16_A(strm_ep->name_hash, p_uniname->name_hash);
+	buf_modify(sb, sector);
+
+	for (i = 2; i < num_entries; i++) {
+		name_ep = (NAME_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry+i, &sector);
+		if (!name_ep)
+			return FFS_MEDIAERR;
+
+		init_name_entry(name_ep, uniname);
+		buf_modify(sb, sector);
+		uniname += 15;
+	}
+
+	update_dir_checksum(sb, p_dir, entry);
+
+	return FFS_SUCCESS;
+} /* end of exfat_init_ext_entry */
+
+void init_dos_entry(DOS_DENTRY_T *ep, u32 type, u32 start_clu)
+{
+	TIMESTAMP_T tm, *tp;
+
+	fat_set_entry_type((DENTRY_T *) ep, type);
+	SET16_A(ep->start_clu_lo, CLUSTER_16(start_clu));
+	SET16_A(ep->start_clu_hi, CLUSTER_16(start_clu >> 16));
+	SET32_A(ep->size, 0);
+
+	tp = tm_current(&tm);
+	fat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+	fat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+	SET16_A(ep->access_date, 0);
+	ep->create_time_ms = 0;
+} /* end of init_dos_entry */
+
+void init_ext_entry(EXT_DENTRY_T *ep, s32 order, u8 chksum, u16 *uniname)
+{
+	int i;
+	u8 end = FALSE;
+
+	fat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+	ep->order = (u8) order;
+	ep->sysid = 0;
+	ep->checksum = chksum;
+	SET16_A(ep->start_clu, 0);
+
+	for (i = 0; i < 10; i += 2) {
+		if (!end) {
+			SET16(ep->unicode_0_4+i, *uniname);
+			if (*uniname == 0x0)
+				end = TRUE;
+			else
+				uniname++;
+		} else {
+			SET16(ep->unicode_0_4+i, 0xFFFF);
+		}
+	}
+
+	for (i = 0; i < 12; i += 2) {
+		if (!end) {
+			SET16_A(ep->unicode_5_10+i, *uniname);
+			if (*uniname == 0x0)
+				end = TRUE;
+			else
+				uniname++;
+		} else {
+			SET16_A(ep->unicode_5_10+i, 0xFFFF);
+		}
+	}
+
+	for (i = 0; i < 4; i += 2) {
+		if (!end) {
+			SET16_A(ep->unicode_11_12+i, *uniname);
+			if (*uniname == 0x0)
+				end = TRUE;
+			else
+				uniname++;
+		} else {
+			SET16_A(ep->unicode_11_12+i, 0xFFFF);
+		}
+	}
+} /* end of init_ext_entry */
+
+void init_file_entry(FILE_DENTRY_T *ep, u32 type)
+{
+	TIMESTAMP_T tm, *tp;
+
+	exfat_set_entry_type((DENTRY_T *) ep, type);
+
+	tp = tm_current(&tm);
+	exfat_set_entry_time((DENTRY_T *) ep, tp, TM_CREATE);
+	exfat_set_entry_time((DENTRY_T *) ep, tp, TM_MODIFY);
+	exfat_set_entry_time((DENTRY_T *) ep, tp, TM_ACCESS);
+	ep->create_time_ms = 0;
+	ep->modify_time_ms = 0;
+	ep->access_time_ms = 0;
+} /* end of init_file_entry */
+
+void init_strm_entry(STRM_DENTRY_T *ep, u8 flags, u32 start_clu, u64 size)
+{
+	exfat_set_entry_type((DENTRY_T *) ep, TYPE_STREAM);
+	ep->flags = flags;
+	SET32_A(ep->start_clu, start_clu);
+	SET64_A(ep->valid_size, size);
+	SET64_A(ep->size, size);
+} /* end of init_strm_entry */
+
+void init_name_entry(NAME_DENTRY_T *ep, u16 *uniname)
+{
+	int i;
+
+	exfat_set_entry_type((DENTRY_T *) ep, TYPE_EXTEND);
+	ep->flags = 0x0;
+
+	for (i = 0; i < 30; i++, i++) {
+		SET16_A(ep->unicode_0_14+i, *uniname);
+		if (*uniname == 0x0)
+			break;
+		uniname++;
+	}
+} /* end of init_name_entry */
+
+void fat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+	int i;
+	sector_t sector;
+	DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	for (i = num_entries-1; i >= order; i--) {
+		ep = get_entry_in_dir(sb, p_dir, entry-i, &sector);
+		if (!ep)
+			return;
+
+		p_fs->fs_func->set_entry_type(ep, TYPE_DELETED);
+		buf_modify(sb, sector);
+	}
+} /* end of fat_delete_dir_entry */
+
+void exfat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries)
+{
+	int i;
+	sector_t sector;
+	DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	for (i = order; i < num_entries; i++) {
+		ep = get_entry_in_dir(sb, p_dir, entry+i, &sector);
+		if (!ep)
+			return;
+
+		p_fs->fs_func->set_entry_type(ep, TYPE_DELETED);
+		buf_modify(sb, sector);
+	}
+} /* end of exfat_delete_dir_entry */
+
+void update_dir_checksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry)
+{
+	int i, num_entries;
+	sector_t sector;
+	u16 chksum;
+	FILE_DENTRY_T *file_ep;
+	DENTRY_T *ep;
+
+	file_ep = (FILE_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!file_ep)
+		return;
+
+	buf_lock(sb, sector);
+
+	num_entries = (s32) file_ep->num_ext + 1;
+	chksum = calc_checksum_2byte((void *) file_ep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
+
+	for (i = 1; i < num_entries; i++) {
+		ep = get_entry_in_dir(sb, p_dir, entry+i, NULL);
+		if (!ep) {
+			buf_unlock(sb, sector);
+			return;
+		}
+
+		chksum = calc_checksum_2byte((void *) ep, DENTRY_SIZE, chksum, CS_DEFAULT);
+	}
+
+	SET16_A(file_ep->checksum, chksum);
+	buf_modify(sb, sector);
+	buf_unlock(sb, sector);
+} /* end of update_dir_checksum */
+
+void update_dir_checksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+	DENTRY_T *ep;
+	u16 chksum = 0;
+	s32 chksum_type = CS_DIR_ENTRY, i;
+
+	ep = (DENTRY_T *)&(es->__buf);
+	for (i = 0; i < es->num_entries; i++) {
+		DPRINTK("update_dir_checksum_with_entry_set ep %p\n", ep);
+		chksum = calc_checksum_2byte((void *) ep, DENTRY_SIZE, chksum, chksum_type);
+		ep++;
+		chksum_type = CS_DEFAULT;
+	}
+
+	ep = (DENTRY_T *)&(es->__buf);
+	SET16_A(((FILE_DENTRY_T *)ep)->checksum, chksum);
+	write_whole_entry_set(sb, es);
+}
+
+static s32 _walk_fat_chain(struct super_block *sb, CHAIN_T *p_dir, s32 byte_offset, u32 *clu)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	s32 clu_offset;
+	u32 cur_clu;
+
+	clu_offset = byte_offset >> p_fs->cluster_size_bits;
+	cur_clu = p_dir->dir;
+
+	if (p_dir->flags == 0x03) {
+		cur_clu += clu_offset;
+	} else {
+		while (clu_offset > 0) {
+			if (FAT_read(sb, cur_clu, &cur_clu) == -1)
+				return FFS_MEDIAERR;
+			clu_offset--;
+		}
+	}
+
+	if (clu)
+		*clu = cur_clu;
+	return FFS_SUCCESS;
+}
+s32 find_location(struct super_block *sb, CHAIN_T *p_dir, s32 entry, sector_t *sector, s32 *offset)
+{
+	s32 off, ret;
+	u32 clu = 0;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	off = entry << DENTRY_SIZE_BITS;
+
+	if (p_dir->dir == CLUSTER_32(0)) { /* FAT16 root_dir */
+		*offset = off & p_bd->sector_size_mask;
+		*sector = off >> p_bd->sector_size_bits;
+		*sector += p_fs->root_start_sector;
+	} else {
+		ret = _walk_fat_chain(sb, p_dir, off, &clu);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		off &= p_fs->cluster_size - 1;	/* byte offset in cluster */
+
+		*offset = off & p_bd->sector_size_mask;	/* byte offset in sector    */
+		*sector = off >> p_bd->sector_size_bits;	/* sector offset in cluster */
+		*sector += START_SECTOR(clu);
+	}
+	return FFS_SUCCESS;
+} /* end of find_location */
+
+DENTRY_T *get_entry_with_sector(struct super_block *sb, sector_t sector, s32 offset)
+{
+	u8 *buf;
+
+	buf = buf_getblk(sb, sector);
+
+	if (buf == NULL)
+		return NULL;
+
+	return (DENTRY_T *)(buf + offset);
+} /* end of get_entry_with_sector */
+
+DENTRY_T *get_entry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, sector_t *sector)
+{
+	s32 off;
+	sector_t sec;
+	u8 *buf;
+
+	if (find_location(sb, p_dir, entry, &sec, &off) != FFS_SUCCESS)
+		return NULL;
+
+	buf = buf_getblk(sb, sec);
+
+	if (buf == NULL)
+		return NULL;
+
+	if (sector != NULL)
+		*sector = sec;
+	return (DENTRY_T *)(buf + off);
+} /* end of get_entry_in_dir */
+
+
+/* returns a set of dentries for a file or dir.
+ * Note that this is a copy (dump) of dentries so that user should call write_entry_set()
+ * to apply changes made in this entry set to the real device.
+ * in:
+ *   sb+p_dir+entry: indicates a file/dir
+ *   type:  specifies how many dentries should be included.
+ * out:
+ *   file_ep: will point the first dentry(= file dentry) on success
+ * return:
+ *   pointer of entry set on success,
+ *   NULL on failure.
+ */
+
+#define ES_MODE_STARTED				0
+#define ES_MODE_GET_FILE_ENTRY			1
+#define ES_MODE_GET_STRM_ENTRY			2
+#define ES_MODE_GET_NAME_ENTRY			3
+#define ES_MODE_GET_CRITICAL_SEC_ENTRY		4
+ENTRY_SET_CACHE_T *get_entry_set_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep)
+{
+	s32 off, ret, byte_offset;
+	u32 clu = 0;
+	sector_t sec;
+	u32 entry_type;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	ENTRY_SET_CACHE_T *es = NULL;
+	DENTRY_T *ep, *pos;
+	u8 *buf;
+	u8 num_entries;
+	s32 mode = ES_MODE_STARTED;
+
+	DPRINTK("get_entry_set_in_dir entered\n");
+	DPRINTK("p_dir dir %u flags %x size %d\n", p_dir->dir, p_dir->flags, p_dir->size);
+
+	byte_offset = entry << DENTRY_SIZE_BITS;
+	ret = _walk_fat_chain(sb, p_dir, byte_offset, &clu);
+	if (ret != FFS_SUCCESS)
+		return NULL;
+
+
+	byte_offset &= p_fs->cluster_size - 1;	/* byte offset in cluster */
+
+	off = byte_offset & p_bd->sector_size_mask;	/* byte offset in sector    */
+	sec = byte_offset >> p_bd->sector_size_bits;	/* sector offset in cluster */
+	sec += START_SECTOR(clu);
+
+	buf = buf_getblk(sb, sec);
+	if (buf == NULL)
+		goto err_out;
+
+
+	ep = (DENTRY_T *)(buf + off);
+	entry_type = p_fs->fs_func->get_entry_type(ep);
+
+	if ((entry_type != TYPE_FILE)
+		&& (entry_type != TYPE_DIR))
+		goto err_out;
+
+	if (type == ES_ALL_ENTRIES)
+		num_entries = ((FILE_DENTRY_T *)ep)->num_ext+1;
+	else
+		num_entries = type;
+
+	DPRINTK("trying to kmalloc %zx bytes for %d entries\n", offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries)  * sizeof(DENTRY_T), num_entries);
+	es = kmalloc(offsetof(ENTRY_SET_CACHE_T, __buf) + (num_entries)  * sizeof(DENTRY_T), GFP_KERNEL);
+	if (es == NULL)
+		goto err_out;
+
+	es->num_entries = num_entries;
+	es->sector = sec;
+	es->offset = off;
+	es->alloc_flag = p_dir->flags;
+
+	pos = (DENTRY_T *) &(es->__buf);
+
+	while(num_entries) {
+		/* instead of copying whole sector, we will check every entry.
+		 * this will provide minimum stablity and consistancy.
+		 */
+
+		entry_type = p_fs->fs_func->get_entry_type(ep);
+
+		if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED))
+			goto err_out;
+
+		switch (mode) {
+		case ES_MODE_STARTED:
+			if  ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR))
+				mode = ES_MODE_GET_FILE_ENTRY;
+			else
+				goto err_out;
+			break;
+		case ES_MODE_GET_FILE_ENTRY:
+			if (entry_type == TYPE_STREAM)
+				mode = ES_MODE_GET_STRM_ENTRY;
+			else
+				goto err_out;
+			break;
+		case ES_MODE_GET_STRM_ENTRY:
+			if (entry_type == TYPE_EXTEND)
+				mode = ES_MODE_GET_NAME_ENTRY;
+			else
+				goto err_out;
+			break;
+		case ES_MODE_GET_NAME_ENTRY:
+			if (entry_type == TYPE_EXTEND)
+				break;
+			else if (entry_type == TYPE_STREAM)
+				goto err_out;
+			else if (entry_type & TYPE_CRITICAL_SEC)
+				mode = ES_MODE_GET_CRITICAL_SEC_ENTRY;
+			else
+				goto err_out;
+			break;
+		case ES_MODE_GET_CRITICAL_SEC_ENTRY:
+			if ((entry_type == TYPE_EXTEND) || (entry_type == TYPE_STREAM))
+				goto err_out;
+			else if ((entry_type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC)
+				goto err_out;
+			break;
+		}
+
+		memcpy(pos, ep, sizeof(DENTRY_T));
+
+		if (--num_entries == 0)
+			break;
+
+		if (((off + DENTRY_SIZE) & p_bd->sector_size_mask) < (off &  p_bd->sector_size_mask)) {
+			/* get the next sector */
+			if (IS_LAST_SECTOR_IN_CLUSTER(sec)) {
+				if (es->alloc_flag == 0x03) {
+					clu++;
+				} else {
+					if (FAT_read(sb, clu, &clu) == -1)
+						goto err_out;
+				}
+				sec = START_SECTOR(clu);
+			} else {
+				sec++;
+			}
+			buf = buf_getblk(sb, sec);
+			if (buf == NULL)
+				goto err_out;
+			off = 0;
+			ep = (DENTRY_T *)(buf);
+		} else {
+			ep++;
+			off += DENTRY_SIZE;
+		}
+		pos++;
+	}
+
+	if (file_ep)
+		*file_ep = (DENTRY_T *)&(es->__buf);
+
+	DPRINTK("es sec %llu offset %d flags %d, num_entries %u buf ptr %p\n",
+		   (unsigned long long)es->sector, es->offset, es->alloc_flag,
+		   es->num_entries, &(es->__buf));
+	DPRINTK("get_entry_set_in_dir exited %p\n", es);
+	return es;
+err_out:
+	DPRINTK("get_entry_set_in_dir exited NULL (es %p)\n", es);
+	if (es)
+		kfree(es);
+	return NULL;
+}
+
+void release_entry_set(ENTRY_SET_CACHE_T *es)
+{
+	DPRINTK("release_entry_set %p\n", es);
+	if (es)
+		kfree(es);
+}
+
+
+static s32 __write_partial_entries_in_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es, sector_t sec, s32 off, u32 count)
+{
+	s32 num_entries, buf_off = (off - es->offset);
+	u32 remaining_byte_in_sector, copy_entries;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	u32 clu;
+	u8 *buf, *esbuf = (u8 *)&(es->__buf);
+
+	DPRINTK("__write_partial_entries_in_entry_set entered\n");
+	DPRINTK("es %p sec %llu off %d count %d\n", es, (unsigned long long)sec, off, count);
+	num_entries = count;
+
+	while (num_entries) {
+		/* white per sector base */
+		remaining_byte_in_sector = (1 << p_bd->sector_size_bits) - off;
+		copy_entries = MIN(remaining_byte_in_sector >> DENTRY_SIZE_BITS , num_entries);
+		buf = buf_getblk(sb, sec);
+		if (buf == NULL)
+			goto err_out;
+		DPRINTK("es->buf %p buf_off %u\n", esbuf, buf_off);
+		DPRINTK("copying %d entries from %p to sector %llu\n", copy_entries, (esbuf + buf_off), (unsigned long long)sec);
+		memcpy(buf + off, esbuf + buf_off, copy_entries << DENTRY_SIZE_BITS);
+		buf_modify(sb, sec);
+		num_entries -= copy_entries;
+
+		if (num_entries) {
+			/* get next sector */
+			if (IS_LAST_SECTOR_IN_CLUSTER(sec)) {
+				clu = GET_CLUSTER_FROM_SECTOR(sec);
+				if (es->alloc_flag == 0x03) {
+					clu++;
+				} else {
+					if (FAT_read(sb, clu, &clu) == -1)
+						goto err_out;
+				}
+				sec = START_SECTOR(clu);
+			} else {
+				sec++;
+			}
+			off = 0;
+			buf_off += copy_entries << DENTRY_SIZE_BITS;
+		}
+	}
+
+	DPRINTK("__write_partial_entries_in_entry_set exited successfully\n");
+	return FFS_SUCCESS;
+err_out:
+	DPRINTK("__write_partial_entries_in_entry_set failed\n");
+	return FFS_ERROR;
+}
+
+/* write back all entries in entry set */
+s32 write_whole_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es)
+{
+	return __write_partial_entries_in_entry_set(sb, es, es->sector, es->offset, es->num_entries);
+}
+
+/* write back some entries in entry set */
+s32 write_partial_entries_in_entry_set (struct super_block *sb, ENTRY_SET_CACHE_T *es, DENTRY_T *ep, u32 count)
+{
+	s32 ret, byte_offset, off;
+	u32 clu=0;
+	sector_t sec;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	CHAIN_T dir;
+
+	/* vaidity check */
+	if (ep + count  > ((DENTRY_T *)&(es->__buf)) + es->num_entries)
+		return FFS_ERROR;
+
+	dir.dir = GET_CLUSTER_FROM_SECTOR(es->sector);
+	dir.flags = es->alloc_flag;
+	dir.size = 0xffffffff;		/* XXX */
+
+	byte_offset = (es->sector - START_SECTOR(dir.dir)) << p_bd->sector_size_bits;
+	byte_offset += ((void **)ep - &(es->__buf)) + es->offset;
+
+	ret =_walk_fat_chain(sb, &dir, byte_offset, &clu);
+	if (ret != FFS_SUCCESS)
+		return ret;
+	byte_offset &= p_fs->cluster_size - 1;	/* byte offset in cluster */
+	off = byte_offset & p_bd->sector_size_mask;	/* byte offset in sector    */
+	sec = byte_offset >> p_bd->sector_size_bits;	/* sector offset in cluster */
+	sec += START_SECTOR(clu);
+	return __write_partial_entries_in_entry_set(sb, es, sec, off, count);
+}
+
+/* search EMPTY CONTINUOUS "num_entries" entries */
+s32 search_deleted_or_unused_entry(struct super_block *sb, CHAIN_T *p_dir, s32 num_entries)
+{
+	int i, dentry, num_empty = 0;
+	s32 dentries_per_clu;
+	u32 type;
+	CHAIN_T clu;
+	DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	if (p_fs->hint_uentry.dir == p_dir->dir) {
+		if (p_fs->hint_uentry.entry == -1)
+			return -1;
+
+		clu.dir = p_fs->hint_uentry.clu.dir;
+		clu.size = p_fs->hint_uentry.clu.size;
+		clu.flags = p_fs->hint_uentry.clu.flags;
+
+		dentry = p_fs->hint_uentry.entry;
+	} else {
+		p_fs->hint_uentry.entry = -1;
+
+		clu.dir = p_dir->dir;
+		clu.size = p_dir->size;
+		clu.flags = p_dir->flags;
+
+		dentry = 0;
+	}
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+			i = dentry % dentries_per_clu;
+		else
+			i = dentry & (dentries_per_clu-1);
+
+		for (; i < dentries_per_clu; i++, dentry++) {
+			ep = get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return -1;
+
+			type = p_fs->fs_func->get_entry_type(ep);
+
+			if (type == TYPE_UNUSED) {
+				num_empty++;
+				if (p_fs->hint_uentry.entry == -1) {
+					p_fs->hint_uentry.dir = p_dir->dir;
+					p_fs->hint_uentry.entry = dentry;
+
+					p_fs->hint_uentry.clu.dir = clu.dir;
+					p_fs->hint_uentry.clu.size = clu.size;
+					p_fs->hint_uentry.clu.flags = clu.flags;
+				}
+			} else if (type == TYPE_DELETED) {
+				num_empty++;
+			} else {
+				num_empty = 0;
+			}
+
+			if (num_empty >= num_entries) {
+				p_fs->hint_uentry.dir = CLUSTER_32(~0);
+				p_fs->hint_uentry.entry = -1;
+
+				if (p_fs->vol_type == EXFAT)
+					return dentry - (num_entries-1);
+				else
+					return dentry;
+			}
+		}
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (clu.flags == 0x03) {
+			if ((--clu.size) > 0)
+				clu.dir++;
+			else
+				clu.dir = CLUSTER_32(~0);
+		} else {
+			if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+				return -1;
+		}
+	}
+
+	return -1;
+} /* end of search_deleted_or_unused_entry */
+
+s32 find_empty_entry(struct inode *inode, CHAIN_T *p_dir, s32 num_entries)
+{
+	s32 ret, dentry;
+	u32 last_clu;
+	sector_t sector;
+	u64 size = 0;
+	CHAIN_T clu;
+	DENTRY_T *ep = NULL;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		return search_deleted_or_unused_entry(sb, p_dir, num_entries);
+
+	while ((dentry = search_deleted_or_unused_entry(sb, p_dir, num_entries)) < 0) {
+		if (p_fs->dev_ejected)
+			break;
+
+		if (p_fs->vol_type == EXFAT) {
+			if (p_dir->dir != p_fs->root_dir)
+				size = i_size_read(inode);
+		}
+
+		last_clu = find_last_cluster(sb, p_dir);
+		clu.dir = last_clu + 1;
+		clu.size = 0;
+		clu.flags = p_dir->flags;
+
+		/* (1) allocate a cluster */
+		ret = p_fs->fs_func->alloc_cluster(sb, 1, &clu);
+		if (ret < 1)
+			return -1;
+
+		if (clear_cluster(sb, clu.dir) != FFS_SUCCESS)
+			return -1;
+
+		/* (2) append to the FAT chain */
+		if (clu.flags != p_dir->flags) {
+			exfat_chain_cont_cluster(sb, p_dir->dir, p_dir->size);
+			p_dir->flags = 0x01;
+			p_fs->hint_uentry.clu.flags = 0x01;
+		}
+		if (clu.flags == 0x01)
+			if (FAT_write(sb, last_clu, clu.dir) < 0)
+				return -1;
+
+		if (p_fs->hint_uentry.entry == -1) {
+			p_fs->hint_uentry.dir = p_dir->dir;
+			p_fs->hint_uentry.entry = p_dir->size << (p_fs->cluster_size_bits - DENTRY_SIZE_BITS);
+
+			p_fs->hint_uentry.clu.dir = clu.dir;
+			p_fs->hint_uentry.clu.size = 0;
+			p_fs->hint_uentry.clu.flags = clu.flags;
+		}
+		p_fs->hint_uentry.clu.size++;
+		p_dir->size++;
+
+		/* (3) update the directory entry */
+		if (p_fs->vol_type == EXFAT) {
+			if (p_dir->dir != p_fs->root_dir) {
+				size += p_fs->cluster_size;
+
+				ep = get_entry_in_dir(sb, &(fid->dir), fid->entry+1, &sector);
+				if (!ep)
+					return -1;
+				p_fs->fs_func->set_entry_size(ep, size);
+				p_fs->fs_func->set_entry_flag(ep, p_dir->flags);
+				buf_modify(sb, sector);
+
+				update_dir_checksum(sb, &(fid->dir), fid->entry);
+			}
+		}
+
+		i_size_write(inode, i_size_read(inode)+p_fs->cluster_size);
+		EXFAT_I(inode)->mmu_private += p_fs->cluster_size;
+		EXFAT_I(inode)->fid.size += p_fs->cluster_size;
+		EXFAT_I(inode)->fid.flags = p_dir->flags;
+		inode->i_blocks += 1 << (p_fs->cluster_size_bits - 9);
+	}
+
+	return dentry;
+} /* end of find_empty_entry */
+
+/* return values of fat_find_dir_entry()
+   >= 0 : return dir entiry position with the name in dir
+   -1 : (root dir, ".") it is the root dir itself
+   -2 : entry with the name does not exist */
+s32 fat_find_dir_entry(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type)
+{
+	int i, dentry = 0, lossy = FALSE, len;
+	s32 order = 0, is_feasible_entry = TRUE, has_ext_entry = FALSE;
+	s32 dentries_per_clu;
+	u32 entry_type;
+	u16 entry_uniname[14], *uniname = NULL, unichar;
+	CHAIN_T clu;
+	DENTRY_T *ep;
+	DOS_DENTRY_T *dos_ep;
+	EXT_DENTRY_T *ext_ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_dir->dir == p_fs->root_dir) {
+		if ((!nls_uniname_cmp(sb, p_uniname->name, (u16 *) UNI_CUR_DIR_NAME)) ||
+			(!nls_uniname_cmp(sb, p_uniname->name, (u16 *) UNI_PAR_DIR_NAME)))
+			return -1; // special case, root directory itself
+	}
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	clu.dir = p_dir->dir;
+	clu.flags = p_dir->flags;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		for (i = 0; i < dentries_per_clu; i++, dentry++) {
+			ep = get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return -2;
+
+			entry_type = p_fs->fs_func->get_entry_type(ep);
+
+			if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+				if ((type == TYPE_ALL) || (type == entry_type)) {
+					if (is_feasible_entry && has_ext_entry)
+						return dentry;
+
+					dos_ep = (DOS_DENTRY_T *) ep;
+					if ((!lossy) && (!nls_dosname_cmp(sb, p_dosname->name, dos_ep->name)))
+						return dentry;
+				}
+				is_feasible_entry = TRUE;
+				has_ext_entry = FALSE;
+			} else if (entry_type == TYPE_EXTEND) {
+				if (is_feasible_entry) {
+					ext_ep = (EXT_DENTRY_T *) ep;
+					if (ext_ep->order > 0x40) {
+						order = (s32)(ext_ep->order - 0x40);
+						uniname = p_uniname->name + 13 * (order-1);
+					} else {
+						order = (s32) ext_ep->order;
+						uniname -= 13;
+					}
+
+					len = extract_uni_name_from_ext_entry(ext_ep, entry_uniname, order);
+
+					unichar = *(uniname+len);
+					*(uniname+len) = 0x0;
+
+					if (nls_uniname_cmp(sb, uniname, entry_uniname))
+						is_feasible_entry = FALSE;
+
+					*(uniname+len) = unichar;
+				}
+				has_ext_entry = TRUE;
+			} else if (entry_type == TYPE_UNUSED) {
+				return -2;
+			} else {
+				is_feasible_entry = TRUE;
+				has_ext_entry = FALSE;
+			}
+		}
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+			return -2;
+	}
+
+	return -2;
+} /* end of fat_find_dir_entry */
+
+/* return values of exfat_find_dir_entry()
+   >= 0 : return dir entiry position with the name in dir
+   -1 : (root dir, ".") it is the root dir itself
+   -2 : entry with the name does not exist */
+s32 exfat_find_dir_entry(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type)
+{
+	int i = 0, dentry = 0, num_ext_entries = 0, len, step;
+	s32 order = 0, is_feasible_entry = FALSE;
+	s32 dentries_per_clu, num_empty = 0;
+	u32 entry_type;
+	u16 entry_uniname[16], *uniname = NULL, unichar;
+	CHAIN_T clu;
+	DENTRY_T *ep;
+	FILE_DENTRY_T *file_ep;
+	STRM_DENTRY_T *strm_ep;
+	NAME_DENTRY_T *name_ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_dir->dir == p_fs->root_dir) {
+		if ((!nls_uniname_cmp(sb, p_uniname->name, (u16 *) UNI_CUR_DIR_NAME)) ||
+			(!nls_uniname_cmp(sb, p_uniname->name, (u16 *) UNI_PAR_DIR_NAME)))
+			return -1; // special case, root directory itself
+	}
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	clu.dir = p_dir->dir;
+	clu.size = p_dir->size;
+	clu.flags = p_dir->flags;
+
+	p_fs->hint_uentry.dir = p_dir->dir;
+	p_fs->hint_uentry.entry = -1;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		while (i < dentries_per_clu) {
+			ep = get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return -2;
+
+			entry_type = p_fs->fs_func->get_entry_type(ep);
+			step = 1;
+
+			if ((entry_type == TYPE_UNUSED) || (entry_type == TYPE_DELETED)) {
+				is_feasible_entry = FALSE;
+
+				if (p_fs->hint_uentry.entry == -1) {
+					num_empty++;
+
+					if (num_empty == 1) {
+						p_fs->hint_uentry.clu.dir = clu.dir;
+						p_fs->hint_uentry.clu.size = clu.size;
+						p_fs->hint_uentry.clu.flags = clu.flags;
+					}
+					if ((num_empty >= num_entries) || (entry_type == TYPE_UNUSED))
+						p_fs->hint_uentry.entry = dentry - (num_empty-1);
+				}
+
+				if (entry_type == TYPE_UNUSED)
+					return -2;
+			} else {
+				num_empty = 0;
+
+				if ((entry_type == TYPE_FILE) || (entry_type == TYPE_DIR)) {
+					file_ep = (FILE_DENTRY_T *) ep;
+					if ((type == TYPE_ALL) || (type == entry_type)) {
+						num_ext_entries = file_ep->num_ext;
+						is_feasible_entry = TRUE;
+					} else {
+						is_feasible_entry = FALSE;
+						step = file_ep->num_ext + 1;
+					}
+				} else if (entry_type == TYPE_STREAM) {
+					if (is_feasible_entry) {
+						strm_ep = (STRM_DENTRY_T *) ep;
+						if (p_uniname->name_hash == GET16_A(strm_ep->name_hash) &&
+						    p_uniname->name_len == strm_ep->name_len) {
+							order = 1;
+						} else {
+							is_feasible_entry = FALSE;
+							step = num_ext_entries;
+						}
+					}
+				} else if (entry_type == TYPE_EXTEND) {
+					if (is_feasible_entry) {
+						name_ep = (NAME_DENTRY_T *) ep;
+
+						if ((++order) == 2)
+							uniname = p_uniname->name;
+						else
+							uniname += 15;
+
+						len = extract_uni_name_from_name_entry(name_ep, entry_uniname, order);
+
+						unichar = *(uniname+len);
+						*(uniname+len) = 0x0;
+
+						if (nls_uniname_cmp(sb, uniname, entry_uniname)) {
+							is_feasible_entry = FALSE;
+							step = num_ext_entries - order + 1;
+						} else if (order == num_ext_entries) {
+							p_fs->hint_uentry.dir = CLUSTER_32(~0);
+							p_fs->hint_uentry.entry = -1;
+							return dentry - (num_ext_entries);
+						}
+
+						*(uniname+len) = unichar;
+					}
+				} else {
+					is_feasible_entry = FALSE;
+				}
+			}
+
+			i += step;
+			dentry += step;
+		}
+
+		i -= dentries_per_clu;
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (clu.flags == 0x03) {
+			if ((--clu.size) > 0)
+				clu.dir++;
+			else
+				clu.dir = CLUSTER_32(~0);
+		} else {
+			if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+				return -2;
+		}
+	}
+
+	return -2;
+} /* end of exfat_find_dir_entry */
+
+/* returns -1 on error */
+s32 fat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+	s32 count = 0;
+	u8 chksum;
+	DOS_DENTRY_T *dos_ep = (DOS_DENTRY_T *) p_entry;
+	EXT_DENTRY_T *ext_ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	chksum = calc_checksum_1byte((void *) dos_ep->name, DOS_NAME_LENGTH, 0);
+
+	for (entry--; entry >= 0; entry--) {
+		ext_ep = (EXT_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, NULL);
+		if (!ext_ep)
+			return -1;
+
+		if ((p_fs->fs_func->get_entry_type((DENTRY_T *) ext_ep) == TYPE_EXTEND) &&
+			(ext_ep->checksum == chksum)) {
+			count++;
+			if (ext_ep->order > 0x40)
+				return count;
+		} else {
+			return count;
+		}
+	}
+
+	return count;
+} /* end of fat_count_ext_entries */
+
+/* returns -1 on error */
+s32 exfat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry)
+{
+	int i, count = 0;
+	u32 type;
+	FILE_DENTRY_T *file_ep = (FILE_DENTRY_T *) p_entry;
+	DENTRY_T *ext_ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	for (i = 0, entry++; i < file_ep->num_ext; i++, entry++) {
+		ext_ep = get_entry_in_dir(sb, p_dir, entry, NULL);
+		if (!ext_ep)
+			return -1;
+
+		type = p_fs->fs_func->get_entry_type(ext_ep);
+		if ((type == TYPE_EXTEND) || (type == TYPE_STREAM))
+			count++;
+		else
+			return count;
+	}
+
+	return count;
+} /* end of exfat_count_ext_entries */
+
+/* returns -1 on error */
+s32 count_dos_name_entries(struct super_block *sb, CHAIN_T *p_dir, u32 type)
+{
+	int i, count = 0;
+	s32 dentries_per_clu;
+	u32 entry_type;
+	CHAIN_T clu;
+	DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	clu.dir = p_dir->dir;
+	clu.size = p_dir->size;
+	clu.flags = p_dir->flags;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		for (i = 0; i < dentries_per_clu; i++) {
+			ep = get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return -1;
+
+			entry_type = p_fs->fs_func->get_entry_type(ep);
+
+			if (entry_type == TYPE_UNUSED)
+				return count;
+			if (!(type & TYPE_CRITICAL_PRI) && !(type & TYPE_BENIGN_PRI))
+				continue;
+
+			if ((type == TYPE_ALL) || (type == entry_type))
+				count++;
+		}
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (clu.flags == 0x03) {
+			if ((--clu.size) > 0)
+				clu.dir++;
+			else
+				clu.dir = CLUSTER_32(~0);
+		} else {
+			if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+				return -1;
+		}
+	}
+
+	return count;
+} /* end of count_dos_name_entries */
+
+bool is_dir_empty(struct super_block *sb, CHAIN_T *p_dir)
+{
+	int i, count = 0;
+	s32 dentries_per_clu;
+	u32 type;
+	CHAIN_T clu;
+	DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	clu.dir = p_dir->dir;
+	clu.size = p_dir->size;
+	clu.flags = p_dir->flags;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		for (i = 0; i < dentries_per_clu; i++) {
+			ep = get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				break;
+
+			type = p_fs->fs_func->get_entry_type(ep);
+
+			if (type == TYPE_UNUSED)
+				return TRUE;
+			if ((type != TYPE_FILE) && (type != TYPE_DIR))
+				continue;
+
+			if (p_dir->dir == CLUSTER_32(0)) { /* FAT16 root_dir */
+				return FALSE;
+			} else {
+				if (p_fs->vol_type == EXFAT)
+					return FALSE;
+				if ((p_dir->dir == p_fs->root_dir) || ((++count) > 2))
+					return FALSE;
+			}
+		}
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (clu.flags == 0x03) {
+			if ((--clu.size) > 0)
+				clu.dir++;
+			else
+				clu.dir = CLUSTER_32(~0);
+		} else {
+			if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+				break;
+		}
+	}
+
+	return TRUE;
+} /* end of is_dir_empty */
+
+/*
+ *  Name Conversion Functions
+ */
+
+/* input  : dir, uni_name
+   output : num_of_entry, dos_name(format : aaaaaa~1.bbb) */
+s32 get_num_entries_and_dos_name(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 *entries, DOS_NAME_T *p_dosname)
+{
+	s32 ret, num_entries, lossy = FALSE;
+	char **r;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	num_entries = p_fs->fs_func->calc_num_entries(p_uniname);
+	if (num_entries == 0)
+		return FFS_INVALIDPATH;
+
+	if (p_fs->vol_type != EXFAT) {
+		nls_uniname_to_dosname(sb, p_dosname, p_uniname, &lossy);
+
+		if (lossy) {
+			ret = fat_generate_dos_name(sb, p_dir, p_dosname);
+			if (ret)
+				return ret;
+		} else {
+			for (r = reserved_names; *r; r++) {
+				if (!strncmp((void *) p_dosname->name, *r, 8))
+					return FFS_INVALIDPATH;
+			}
+
+			if (p_dosname->name_case != 0xFF)
+				num_entries = 1;
+		}
+
+		if (num_entries > 1)
+			p_dosname->name_case = 0x0;
+	}
+
+	*entries = num_entries;
+
+	return FFS_SUCCESS;
+} /* end of get_num_entries_and_dos_name */
+
+void get_uni_name_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode)
+{
+	DOS_NAME_T dos_name;
+
+	if (mode == 0x0)
+		dos_name.name_case = 0x0;
+	else
+		dos_name.name_case = ep->lcase;
+
+	memcpy(dos_name.name, ep->name, DOS_NAME_LENGTH);
+	nls_dosname_to_uniname(sb, p_uniname, &dos_name);
+} /* end of get_uni_name_from_dos_entry */
+
+void fat_get_uni_name_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+	int i;
+	EXT_DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	for (entry--, i = 1; entry >= 0; entry--, i++) {
+		ep = (EXT_DENTRY_T *) get_entry_in_dir(sb, p_dir, entry, NULL);
+		if (!ep)
+			return;
+
+		if (p_fs->fs_func->get_entry_type((DENTRY_T *) ep) == TYPE_EXTEND) {
+			extract_uni_name_from_ext_entry(ep, uniname, i);
+			if (ep->order > 0x40)
+				return;
+		} else {
+			return;
+		}
+
+		uniname += 13;
+	}
+} /* end of fat_get_uni_name_from_ext_entry */
+
+void exfat_get_uni_name_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname)
+{
+	int i;
+	DENTRY_T *ep;
+	ENTRY_SET_CACHE_T *es;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	es = get_entry_set_in_dir(sb, p_dir, entry, ES_ALL_ENTRIES, &ep);
+	if (es == NULL || es->num_entries < 3) {
+		if (es)
+			release_entry_set(es);
+		return;
+	}
+
+	ep += 2;
+
+	/*
+	* First entry  : file entry
+	* Second entry : stream-extension entry
+	* Third entry  : first file-name entry
+	* So, the index of first file-name dentry should start from 2.
+	*/
+	for (i = 2; i < es->num_entries; i++, ep++) {
+		if (p_fs->fs_func->get_entry_type(ep) == TYPE_EXTEND)
+			extract_uni_name_from_name_entry((NAME_DENTRY_T *)ep, uniname, i);
+		else
+			goto out;
+		uniname += 15;
+	}
+
+out:
+	release_entry_set(es);
+} /* end of exfat_get_uni_name_from_ext_entry */
+
+s32 extract_uni_name_from_ext_entry(EXT_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+	int i, len = 0;
+
+	for (i = 0; i < 10; i += 2) {
+		*uniname = GET16(ep->unicode_0_4+i);
+		if (*uniname == 0x0)
+			return len;
+		uniname++;
+		len++;
+	}
+
+	if (order < 20) {
+		for (i = 0; i < 12; i += 2) {
+			*uniname = GET16_A(ep->unicode_5_10+i);
+			if (*uniname == 0x0)
+				return len;
+			uniname++;
+			len++;
+		}
+	} else {
+		for (i = 0; i < 8; i += 2) {
+			*uniname = GET16_A(ep->unicode_5_10+i);
+			if (*uniname == 0x0)
+				return len;
+			uniname++;
+			len++;
+		}
+		*uniname = 0x0; /* uniname[MAX_NAME_LENGTH-1] */
+		return len;
+	}
+
+	for (i = 0; i < 4; i += 2) {
+		*uniname = GET16_A(ep->unicode_11_12+i);
+		if (*uniname == 0x0)
+			return len;
+		uniname++;
+		len++;
+	}
+
+	*uniname = 0x0;
+	return len;
+
+} /* end of extract_uni_name_from_ext_entry */
+
+s32 extract_uni_name_from_name_entry(NAME_DENTRY_T *ep, u16 *uniname, s32 order)
+{
+	int i, len = 0;
+
+	for (i = 0; i < 30; i += 2) {
+		*uniname = GET16_A(ep->unicode_0_14+i);
+		if (*uniname == 0x0)
+			return len;
+		uniname++;
+		len++;
+	}
+
+	*uniname = 0x0;
+	return len;
+
+} /* end of extract_uni_name_from_name_entry */
+
+s32 fat_generate_dos_name(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname)
+{
+	int i, j, count = 0, count_begin = FALSE;
+	s32 dentries_per_clu;
+	u32 type;
+	u8 bmap[128/* 1 ~ 1023 */];
+	CHAIN_T clu;
+	DOS_DENTRY_T *ep;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	memset(bmap, 0, sizeof bmap);
+	exfat_bitmap_set(bmap, 0);
+
+	if (p_dir->dir == CLUSTER_32(0)) /* FAT16 root_dir */
+		dentries_per_clu = p_fs->dentries_in_root;
+	else
+		dentries_per_clu = p_fs->dentries_per_clu;
+
+	clu.dir = p_dir->dir;
+	clu.flags = p_dir->flags;
+
+	while (clu.dir != CLUSTER_32(~0)) {
+		if (p_fs->dev_ejected)
+			break;
+
+		for (i = 0; i < dentries_per_clu; i++) {
+			ep = (DOS_DENTRY_T *) get_entry_in_dir(sb, &clu, i, NULL);
+			if (!ep)
+				return FFS_MEDIAERR;
+
+			type = p_fs->fs_func->get_entry_type((DENTRY_T *) ep);
+
+			if (type == TYPE_UNUSED)
+				break;
+			if ((type != TYPE_FILE) && (type != TYPE_DIR))
+				continue;
+
+			count = 0;
+			count_begin = FALSE;
+
+			for (j = 0; j < 8; j++) {
+				if (ep->name[j] == ' ')
+					break;
+
+				if (ep->name[j] == '~') {
+					count_begin = TRUE;
+				} else if (count_begin) {
+					if ((ep->name[j] >= '0') && (ep->name[j] <= '9')) {
+						count = count * 10 + (ep->name[j] - '0');
+					} else {
+						count = 0;
+						count_begin = FALSE;
+					}
+				}
+			}
+
+			if ((count > 0) && (count < 1024))
+				exfat_bitmap_set(bmap, count);
+		}
+
+		if (p_dir->dir == CLUSTER_32(0))
+			break; /* FAT16 root_dir */
+
+		if (FAT_read(sb, clu.dir, &(clu.dir)) != 0)
+			return FFS_MEDIAERR;
+	}
+
+	count = 0;
+	for (i = 0; i < 128; i++) {
+		if (bmap[i] != 0xFF) {
+			for (j = 0; j < 8; j++) {
+				if (exfat_bitmap_test(&(bmap[i]), j) == 0) {
+					count = (i << 3) + j;
+					break;
+				}
+			}
+			if (count != 0)
+				break;
+		}
+	}
+
+	if ((count == 0) || (count >= 1024))
+		return FFS_FILEEXIST;
+	else
+		fat_attach_count_to_dos_name(p_dosname->name, count);
+
+	/* Now dos_name has DOS~????.EXT */
+	return FFS_SUCCESS;
+} /* end of generate_dos_name */
+
+void fat_attach_count_to_dos_name(u8 *dosname, s32 count)
+{
+	int i, j, length;
+	char str_count[6];
+
+	snprintf(str_count, sizeof str_count, "~%d", count);
+	length = strlen(str_count);
+
+	i = j = 0;
+	while (j <= (8 - length)) {
+		i = j;
+		if (dosname[j] == ' ')
+			break;
+		if (dosname[j] & 0x80)
+			j += 2;
+		else
+			j++;
+	}
+
+	for (j = 0; j < length; i++, j++)
+		dosname[i] = (u8) str_count[j];
+
+	if (i == 7)
+		dosname[7] = ' ';
+
+} /* end of attach_count_to_dos_name */
+
+s32 fat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+	s32 len;
+
+	len = p_uniname->name_len;
+	if (len == 0)
+		return 0;
+
+	/* 1 dos name entry + extended entries */
+	return (len-1) / 13 + 2;
+
+} /* end of calc_num_enties */
+
+s32 exfat_calc_num_entries(UNI_NAME_T *p_uniname)
+{
+	s32 len;
+
+	len = p_uniname->name_len;
+	if (len == 0)
+		return 0;
+
+	/* 1 file entry + 1 stream entry + name entries */
+	return (len-1) / 15 + 3;
+
+} /* end of exfat_calc_num_enties */
+
+u8 calc_checksum_1byte(void *data, s32 len, u8 chksum)
+{
+	int i;
+	u8 *c = (u8 *) data;
+
+	for (i = 0; i < len; i++, c++)
+		chksum = (((chksum & 1) << 7) | ((chksum & 0xFE) >> 1)) + *c;
+
+	return chksum;
+} /* end of calc_checksum_1byte */
+
+u16 calc_checksum_2byte(void *data, s32 len, u16 chksum, s32 type)
+{
+	int i;
+	u8 *c = (u8 *) data;
+
+	switch (type) {
+	case CS_DIR_ENTRY:
+		for (i = 0; i < len; i++, c++) {
+			if ((i == 2) || (i == 3))
+				continue;
+			chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + (u16) *c;
+		}
+		break;
+	default
+			:
+		for (i = 0; i < len; i++, c++)
+			chksum = (((chksum & 1) << 15) | ((chksum & 0xFFFE) >> 1)) + (u16) *c;
+	}
+
+	return chksum;
+} /* end of calc_checksum_2byte */
+
+u32 calc_checksum_4byte(void *data, s32 len, u32 chksum, s32 type)
+{
+	int i;
+	u8 *c = (u8 *) data;
+
+	switch (type) {
+	case CS_PBR_SECTOR:
+		for (i = 0; i < len; i++, c++) {
+			if ((i == 106) || (i == 107) || (i == 112))
+				continue;
+			chksum = (((chksum & 1) << 31) | ((chksum & 0xFFFFFFFE) >> 1)) + (u32) *c;
+		}
+		break;
+	default
+			:
+		for (i = 0; i < len; i++, c++)
+			chksum = (((chksum & 1) << 31) | ((chksum & 0xFFFFFFFE) >> 1)) + (u32) *c;
+	}
+
+	return chksum;
+} /* end of calc_checksum_4byte */
+
+/*
+ *  Name Resolution Functions
+ */
+
+/* return values of resolve_path()
+   > 0 : return the length of the path
+   < 0 : return error */
+s32 resolve_path(struct inode *inode, char *path, CHAIN_T *p_dir, UNI_NAME_T *p_uniname)
+{
+	s32 lossy = FALSE;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+
+	if (strlen(path) >= (MAX_NAME_LENGTH * MAX_CHARSET_SIZE))
+		return FFS_INVALIDPATH;
+
+	strcpy(name_buf, path);
+
+	nls_cstring_to_uniname(sb, p_uniname, name_buf, &lossy);
+	if (lossy)
+		return FFS_INVALIDPATH;
+
+	fid->size = i_size_read(inode);
+
+	p_dir->dir = fid->start_clu;
+	p_dir->size = (s32)(fid->size >> p_fs->cluster_size_bits);
+	p_dir->flags = fid->flags;
+
+	return FFS_SUCCESS;
+}
+
+/*
+ *  File Operation Functions
+ */
+static FS_FUNC_T fat_fs_func = {
+	.alloc_cluster = fat_alloc_cluster,
+	.free_cluster = fat_free_cluster,
+	.count_used_clusters = fat_count_used_clusters,
+
+	.init_dir_entry = fat_init_dir_entry,
+	.init_ext_entry = fat_init_ext_entry,
+	.find_dir_entry = fat_find_dir_entry,
+	.delete_dir_entry = fat_delete_dir_entry,
+	.get_uni_name_from_ext_entry = fat_get_uni_name_from_ext_entry,
+	.count_ext_entries = fat_count_ext_entries,
+	.calc_num_entries = fat_calc_num_entries,
+
+	.get_entry_type = fat_get_entry_type,
+	.set_entry_type = fat_set_entry_type,
+	.get_entry_attr = fat_get_entry_attr,
+	.set_entry_attr = fat_set_entry_attr,
+	.get_entry_flag = fat_get_entry_flag,
+	.set_entry_flag = fat_set_entry_flag,
+	.get_entry_clu0 = fat_get_entry_clu0,
+	.set_entry_clu0 = fat_set_entry_clu0,
+	.get_entry_size = fat_get_entry_size,
+	.set_entry_size = fat_set_entry_size,
+	.get_entry_time = fat_get_entry_time,
+	.set_entry_time = fat_set_entry_time,
+};
+
+
+s32 fat16_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr)
+{
+	s32 num_reserved, num_root_sectors;
+	BPB16_T *p_bpb = (BPB16_T *) p_pbr->bpb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_bpb->num_fats == 0)
+		return FFS_FORMATERR;
+
+	num_root_sectors = GET16(p_bpb->num_root_entries) << DENTRY_SIZE_BITS;
+	num_root_sectors = ((num_root_sectors-1) >> p_bd->sector_size_bits) + 1;
+
+	p_fs->sectors_per_clu = p_bpb->sectors_per_clu;
+	p_fs->sectors_per_clu_bits = ilog2(p_bpb->sectors_per_clu);
+	p_fs->cluster_size_bits = p_fs->sectors_per_clu_bits + p_bd->sector_size_bits;
+	p_fs->cluster_size = 1 << p_fs->cluster_size_bits;
+
+	p_fs->num_FAT_sectors = GET16(p_bpb->num_fat_sectors);
+
+	p_fs->FAT1_start_sector = p_fs->PBR_sector + GET16(p_bpb->num_reserved);
+	if (p_bpb->num_fats == 1)
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector;
+	else
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector + p_fs->num_FAT_sectors;
+
+	p_fs->root_start_sector = p_fs->FAT2_start_sector + p_fs->num_FAT_sectors;
+	p_fs->data_start_sector = p_fs->root_start_sector + num_root_sectors;
+
+	p_fs->num_sectors = GET16(p_bpb->num_sectors);
+	if (p_fs->num_sectors == 0)
+		p_fs->num_sectors = GET32(p_bpb->num_huge_sectors);
+
+	num_reserved = p_fs->data_start_sector - p_fs->PBR_sector;
+	p_fs->num_clusters = ((p_fs->num_sectors - num_reserved) >> p_fs->sectors_per_clu_bits) + 2;
+	/* because the cluster index starts with 2 */
+
+	if (p_fs->num_clusters < FAT12_THRESHOLD)
+		p_fs->vol_type = FAT12;
+	else
+		p_fs->vol_type = FAT16;
+	p_fs->vol_id = GET32(p_bpb->vol_serial);
+
+	p_fs->root_dir = 0;
+	p_fs->dentries_in_root = GET16(p_bpb->num_root_entries);
+	p_fs->dentries_per_clu = 1 << (p_fs->cluster_size_bits - DENTRY_SIZE_BITS);
+
+	p_fs->vol_flag = VOL_CLEAN;
+	p_fs->clu_srch_ptr = 2;
+	p_fs->used_clusters = (u32) ~0;
+
+	p_fs->fs_func = &fat_fs_func;
+
+	return FFS_SUCCESS;
+} /* end of fat16_mount */
+
+s32 fat32_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr)
+{
+	s32 num_reserved;
+	BPB32_T *p_bpb = (BPB32_T *) p_pbr->bpb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_bpb->num_fats == 0)
+		return FFS_FORMATERR;
+
+	p_fs->sectors_per_clu = p_bpb->sectors_per_clu;
+	p_fs->sectors_per_clu_bits = ilog2(p_bpb->sectors_per_clu);
+	p_fs->cluster_size_bits = p_fs->sectors_per_clu_bits + p_bd->sector_size_bits;
+	p_fs->cluster_size = 1 << p_fs->cluster_size_bits;
+
+	p_fs->num_FAT_sectors = GET32(p_bpb->num_fat32_sectors);
+
+	p_fs->FAT1_start_sector = p_fs->PBR_sector + GET16(p_bpb->num_reserved);
+	if (p_bpb->num_fats == 1)
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector;
+	else
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector + p_fs->num_FAT_sectors;
+
+	p_fs->root_start_sector = p_fs->FAT2_start_sector + p_fs->num_FAT_sectors;
+	p_fs->data_start_sector = p_fs->root_start_sector;
+
+	p_fs->num_sectors = GET32(p_bpb->num_huge_sectors);
+	num_reserved = p_fs->data_start_sector - p_fs->PBR_sector;
+
+	p_fs->num_clusters = ((p_fs->num_sectors-num_reserved) >> p_fs->sectors_per_clu_bits) + 2;
+	/* because the cluster index starts with 2 */
+
+	p_fs->vol_type = FAT32;
+	p_fs->vol_id = GET32(p_bpb->vol_serial);
+
+	p_fs->root_dir = GET32(p_bpb->root_cluster);
+	p_fs->dentries_in_root = 0;
+	p_fs->dentries_per_clu = 1 << (p_fs->cluster_size_bits - DENTRY_SIZE_BITS);
+
+	p_fs->vol_flag = VOL_CLEAN;
+	p_fs->clu_srch_ptr = 2;
+	p_fs->used_clusters = (u32) ~0;
+
+	p_fs->fs_func = &fat_fs_func;
+
+	return FFS_SUCCESS;
+} /* end of fat32_mount */
+
+static FS_FUNC_T exfat_fs_func = {
+	.alloc_cluster = exfat_alloc_cluster,
+	.free_cluster = exfat_free_cluster,
+	.count_used_clusters = exfat_count_used_clusters,
+
+	.init_dir_entry = exfat_init_dir_entry,
+	.init_ext_entry = exfat_init_ext_entry,
+	.find_dir_entry = exfat_find_dir_entry,
+	.delete_dir_entry = exfat_delete_dir_entry,
+	.get_uni_name_from_ext_entry = exfat_get_uni_name_from_ext_entry,
+	.count_ext_entries = exfat_count_ext_entries,
+	.calc_num_entries = exfat_calc_num_entries,
+
+	.get_entry_type = exfat_get_entry_type,
+	.set_entry_type = exfat_set_entry_type,
+	.get_entry_attr = exfat_get_entry_attr,
+	.set_entry_attr = exfat_set_entry_attr,
+	.get_entry_flag = exfat_get_entry_flag,
+	.set_entry_flag = exfat_set_entry_flag,
+	.get_entry_clu0 = exfat_get_entry_clu0,
+	.set_entry_clu0 = exfat_set_entry_clu0,
+	.get_entry_size = exfat_get_entry_size,
+	.set_entry_size = exfat_set_entry_size,
+	.get_entry_time = exfat_get_entry_time,
+	.set_entry_time = exfat_set_entry_time,
+};
+
+s32 exfat_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr)
+{
+	BPBEX_T *p_bpb = (BPBEX_T *) p_pbr->bpb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+
+	if (p_bpb->num_fats == 0)
+		return FFS_FORMATERR;
+
+	p_fs->sectors_per_clu = 1 << p_bpb->sectors_per_clu_bits;
+	p_fs->sectors_per_clu_bits = p_bpb->sectors_per_clu_bits;
+	p_fs->cluster_size_bits = p_fs->sectors_per_clu_bits + p_bd->sector_size_bits;
+	p_fs->cluster_size = 1 << p_fs->cluster_size_bits;
+
+	p_fs->num_FAT_sectors = GET32(p_bpb->fat_length);
+
+	p_fs->FAT1_start_sector = p_fs->PBR_sector + GET32(p_bpb->fat_offset);
+	if (p_bpb->num_fats == 1)
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector;
+	else
+		p_fs->FAT2_start_sector = p_fs->FAT1_start_sector + p_fs->num_FAT_sectors;
+
+	p_fs->root_start_sector = p_fs->PBR_sector + GET32(p_bpb->clu_offset);
+	p_fs->data_start_sector = p_fs->root_start_sector;
+
+	p_fs->num_sectors = GET64(p_bpb->vol_length);
+	p_fs->num_clusters = GET32(p_bpb->clu_count) + 2;
+	/* because the cluster index starts with 2 */
+
+	p_fs->vol_type = EXFAT;
+	p_fs->vol_id = GET32(p_bpb->vol_serial);
+
+	p_fs->root_dir = GET32(p_bpb->root_cluster);
+	p_fs->dentries_in_root = 0;
+	p_fs->dentries_per_clu = 1 << (p_fs->cluster_size_bits - DENTRY_SIZE_BITS);
+
+	p_fs->vol_flag = (u32) GET16(p_bpb->vol_flags);
+	p_fs->clu_srch_ptr = 2;
+	p_fs->used_clusters = (u32) ~0;
+
+	p_fs->fs_func = &exfat_fs_func;
+
+	return FFS_SUCCESS;
+} /* end of exfat_mount */
+
+s32 create_dir(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+	s32 ret, dentry, num_entries;
+	u64 size;
+	CHAIN_T clu;
+	DOS_NAME_T dos_name, dot_name;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name);
+	if (ret)
+		return ret;
+
+	/* find_empty_entry must be called before alloc_cluster */
+	dentry = find_empty_entry(inode, p_dir, num_entries);
+	if (dentry < 0)
+		return FFS_FULL;
+
+	clu.dir = CLUSTER_32(~0);
+	clu.size = 0;
+	clu.flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+
+	/* (1) allocate a cluster */
+	ret = p_fs->fs_func->alloc_cluster(sb, 1, &clu);
+	if (ret < 0)
+		return FFS_MEDIAERR;
+	else if (ret == 0)
+		return FFS_FULL;
+
+	ret = clear_cluster(sb, clu.dir);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	if (p_fs->vol_type == EXFAT) {
+		size = p_fs->cluster_size;
+	} else {
+		size = 0;
+
+		/* initialize the . and .. entry
+		   Information for . points to itself
+		   Information for .. points to parent dir */
+
+		dot_name.name_case = 0x0;
+		memcpy(dot_name.name, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH);
+
+		ret = p_fs->fs_func->init_dir_entry(sb, &clu, 0, TYPE_DIR, clu.dir, 0);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		ret = p_fs->fs_func->init_ext_entry(sb, &clu, 0, 1, NULL, &dot_name);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		memcpy(dot_name.name, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH);
+
+		if (p_dir->dir == p_fs->root_dir)
+			ret = p_fs->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, CLUSTER_32(0), 0);
+		else
+			ret = p_fs->fs_func->init_dir_entry(sb, &clu, 1, TYPE_DIR, p_dir->dir, 0);
+
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		ret = p_fs->fs_func->init_ext_entry(sb, &clu, 1, 1, NULL, &dot_name);
+		if (ret != FFS_SUCCESS)
+			return ret;
+	}
+
+	/* (2) update the directory entry */
+	/* make sub-dir entry in parent directory */
+	ret = p_fs->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_DIR, clu.dir, size);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	ret = p_fs->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	fid->dir.dir = p_dir->dir;
+	fid->dir.size = p_dir->size;
+	fid->dir.flags = p_dir->flags;
+	fid->entry = dentry;
+
+	fid->attr = ATTR_SUBDIR;
+	fid->flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+	fid->size = size;
+	fid->start_clu = clu.dir;
+
+	fid->type = TYPE_DIR;
+	fid->rwoffset = 0;
+	fid->hint_last_off = -1;
+
+	return FFS_SUCCESS;
+} /* end of create_dir */
+
+s32 create_file(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, u8 mode, FILE_ID_T *fid)
+{
+	s32 ret, dentry, num_entries;
+	DOS_NAME_T dos_name;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_entries, &dos_name);
+	if (ret)
+		return ret;
+
+	/* find_empty_entry must be called before alloc_cluster() */
+	dentry = find_empty_entry(inode, p_dir, num_entries);
+	if (dentry < 0)
+		return FFS_FULL;
+
+	/* (1) update the directory entry */
+	/* fill the dos name directory entry information of the created file.
+	   the first cluster is not determined yet. (0) */
+	ret = p_fs->fs_func->init_dir_entry(sb, p_dir, dentry, TYPE_FILE | mode, CLUSTER_32(0), 0);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	ret = p_fs->fs_func->init_ext_entry(sb, p_dir, dentry, num_entries, p_uniname, &dos_name);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	fid->dir.dir = p_dir->dir;
+	fid->dir.size = p_dir->size;
+	fid->dir.flags = p_dir->flags;
+	fid->entry = dentry;
+
+	fid->attr = ATTR_ARCHIVE | mode;
+	fid->flags = (p_fs->vol_type == EXFAT) ? 0x03 : 0x01;
+	fid->size = 0;
+	fid->start_clu = CLUSTER_32(~0);
+
+	fid->type = TYPE_FILE;
+	fid->rwoffset = 0;
+	fid->hint_last_off = -1;
+
+	return FFS_SUCCESS;
+} /* end of create_file */
+
+void remove_file(struct inode *inode, CHAIN_T *p_dir, s32 entry)
+{
+	s32 num_entries;
+	sector_t sector;
+	DENTRY_T *ep;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	ep = get_entry_in_dir(sb, p_dir, entry, &sector);
+	if (!ep)
+		return;
+
+	buf_lock(sb, sector);
+
+	/* buf_lock() before call count_ext_entries() */
+	num_entries = p_fs->fs_func->count_ext_entries(sb, p_dir, entry, ep);
+	if (num_entries < 0) {
+		buf_unlock(sb, sector);
+		return;
+	}
+	num_entries++;
+
+	buf_unlock(sb, sector);
+
+	/* (1) update the directory entry */
+	p_fs->fs_func->delete_dir_entry(sb, p_dir, entry, 0, num_entries);
+} /* end of remove_file */
+
+s32 rename_file(struct inode *inode, CHAIN_T *p_dir, s32 oldentry, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+	s32 ret, newentry = -1, num_old_entries, num_new_entries;
+	sector_t sector_old, sector_new;
+	DOS_NAME_T dos_name;
+	DENTRY_T *epold, *epnew;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	epold = get_entry_in_dir(sb, p_dir, oldentry, &sector_old);
+	if (!epold)
+		return FFS_MEDIAERR;
+
+	buf_lock(sb, sector_old);
+
+	/* buf_lock() before call count_ext_entries() */
+	num_old_entries = p_fs->fs_func->count_ext_entries(sb, p_dir, oldentry, epold);
+	if (num_old_entries < 0) {
+		buf_unlock(sb, sector_old);
+		return FFS_MEDIAERR;
+	}
+	num_old_entries++;
+
+	ret = get_num_entries_and_dos_name(sb, p_dir, p_uniname, &num_new_entries, &dos_name);
+	if (ret) {
+		buf_unlock(sb, sector_old);
+		return ret;
+	}
+
+	if (num_old_entries < num_new_entries) {
+		newentry = find_empty_entry(inode, p_dir, num_new_entries);
+		if (newentry < 0) {
+			buf_unlock(sb, sector_old);
+			return FFS_FULL;
+		}
+
+		epnew = get_entry_in_dir(sb, p_dir, newentry, &sector_new);
+		if (!epnew) {
+			buf_unlock(sb, sector_old);
+			return FFS_MEDIAERR;
+		}
+
+		memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+		if (p_fs->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+			p_fs->fs_func->set_entry_attr(epnew, p_fs->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+			fid->attr |= ATTR_ARCHIVE;
+		}
+		buf_modify(sb, sector_new);
+		buf_unlock(sb, sector_old);
+
+		if (p_fs->vol_type == EXFAT) {
+			epold = get_entry_in_dir(sb, p_dir, oldentry+1, &sector_old);
+			buf_lock(sb, sector_old);
+			epnew = get_entry_in_dir(sb, p_dir, newentry+1, &sector_new);
+
+			if (!epold || !epnew) {
+				buf_unlock(sb, sector_old);
+				return FFS_MEDIAERR;
+			}
+
+			memcpy((void *) epnew, (void *) epold, DENTRY_SIZE);
+			buf_modify(sb, sector_new);
+			buf_unlock(sb, sector_old);
+		}
+
+		ret = p_fs->fs_func->init_ext_entry(sb, p_dir, newentry, num_new_entries, p_uniname, &dos_name);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		p_fs->fs_func->delete_dir_entry(sb, p_dir, oldentry, 0, num_old_entries);
+		fid->entry = newentry;
+	} else {
+		if (p_fs->fs_func->get_entry_type(epold) == TYPE_FILE) {
+			p_fs->fs_func->set_entry_attr(epold, p_fs->fs_func->get_entry_attr(epold) | ATTR_ARCHIVE);
+			fid->attr |= ATTR_ARCHIVE;
+		}
+		buf_modify(sb, sector_old);
+		buf_unlock(sb, sector_old);
+
+		ret = p_fs->fs_func->init_ext_entry(sb, p_dir, oldentry, num_new_entries, p_uniname, &dos_name);
+		if (ret != FFS_SUCCESS)
+			return ret;
+
+		p_fs->fs_func->delete_dir_entry(sb, p_dir, oldentry, num_new_entries, num_old_entries);
+	}
+
+	return FFS_SUCCESS;
+} /* end of rename_file */
+
+s32 move_file(struct inode *inode, CHAIN_T *p_olddir, s32 oldentry, CHAIN_T *p_newdir, UNI_NAME_T *p_uniname, FILE_ID_T *fid)
+{
+	s32 ret, newentry, num_new_entries, num_old_entries;
+	sector_t sector_mov, sector_new;
+	CHAIN_T clu;
+	DOS_NAME_T dos_name;
+	DENTRY_T *epmov, *epnew;
+	struct super_block *sb = inode->i_sb;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	epmov = get_entry_in_dir(sb, p_olddir, oldentry, &sector_mov);
+	if (!epmov)
+		return FFS_MEDIAERR;
+
+	/* check if the source and target directory is the same */
+	if (p_fs->fs_func->get_entry_type(epmov) == TYPE_DIR &&
+		p_fs->fs_func->get_entry_clu0(epmov) == p_newdir->dir)
+		return FFS_INVALIDPATH;
+
+	buf_lock(sb, sector_mov);
+
+	/* buf_lock() before call count_ext_entries() */
+	num_old_entries = p_fs->fs_func->count_ext_entries(sb, p_olddir, oldentry, epmov);
+	if (num_old_entries < 0) {
+		buf_unlock(sb, sector_mov);
+		return FFS_MEDIAERR;
+	}
+	num_old_entries++;
+
+	ret = get_num_entries_and_dos_name(sb, p_newdir, p_uniname, &num_new_entries, &dos_name);
+	if (ret) {
+		buf_unlock(sb, sector_mov);
+		return ret;
+	}
+
+	newentry = find_empty_entry(inode, p_newdir, num_new_entries);
+	if (newentry < 0) {
+		buf_unlock(sb, sector_mov);
+		return FFS_FULL;
+	}
+
+	epnew = get_entry_in_dir(sb, p_newdir, newentry, &sector_new);
+	if (!epnew) {
+		buf_unlock(sb, sector_mov);
+		return FFS_MEDIAERR;
+	}
+
+	memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+	if (p_fs->fs_func->get_entry_type(epnew) == TYPE_FILE) {
+		p_fs->fs_func->set_entry_attr(epnew, p_fs->fs_func->get_entry_attr(epnew) | ATTR_ARCHIVE);
+		fid->attr |= ATTR_ARCHIVE;
+	}
+	buf_modify(sb, sector_new);
+	buf_unlock(sb, sector_mov);
+
+	if (p_fs->vol_type == EXFAT) {
+		epmov = get_entry_in_dir(sb, p_olddir, oldentry+1, &sector_mov);
+		buf_lock(sb, sector_mov);
+		epnew = get_entry_in_dir(sb, p_newdir, newentry+1, &sector_new);
+		if (!epmov || !epnew) {
+			buf_unlock(sb, sector_mov);
+			return FFS_MEDIAERR;
+		}
+
+		memcpy((void *) epnew, (void *) epmov, DENTRY_SIZE);
+		buf_modify(sb, sector_new);
+		buf_unlock(sb, sector_mov);
+	} else if (p_fs->fs_func->get_entry_type(epnew) == TYPE_DIR) {
+		/* change ".." pointer to new parent dir */
+		clu.dir = p_fs->fs_func->get_entry_clu0(epnew);
+		clu.flags = 0x01;
+
+		epnew = get_entry_in_dir(sb, &clu, 1, &sector_new);
+		if (!epnew)
+			return FFS_MEDIAERR;
+
+		if (p_newdir->dir == p_fs->root_dir)
+			p_fs->fs_func->set_entry_clu0(epnew, CLUSTER_32(0));
+		else
+			p_fs->fs_func->set_entry_clu0(epnew, p_newdir->dir);
+		buf_modify(sb, sector_new);
+	}
+
+	ret = p_fs->fs_func->init_ext_entry(sb, p_newdir, newentry, num_new_entries, p_uniname, &dos_name);
+	if (ret != FFS_SUCCESS)
+		return ret;
+
+	p_fs->fs_func->delete_dir_entry(sb, p_olddir, oldentry, 0, num_old_entries);
+
+	fid->dir.dir = p_newdir->dir;
+	fid->dir.size = p_newdir->size;
+	fid->dir.flags = p_newdir->flags;
+
+	fid->entry = newentry;
+
+	return FFS_SUCCESS;
+} /* end of move_file */
+
+/*
+ *  Sector Read/Write Functions
+ */
+
+s32 sector_read(struct super_block *sb, sector_t sec, struct buffer_head **bh, s32 read)
+{
+	s32 ret = FFS_MEDIAERR;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if ((sec >= (p_fs->PBR_sector+p_fs->num_sectors)) && (p_fs->num_sectors > 0)) {
+		printk("[EXFAT] sector_read: out of range error! (sec = %llu)\n", (unsigned long long)sec);
+		fs_error(sb);
+		return ret;
+	}
+
+	if (!p_fs->dev_ejected) {
+		ret = bdev_read(sb, sec, bh, 1, read);
+		if (ret != FFS_SUCCESS)
+			p_fs->dev_ejected = TRUE;
+	}
+
+	return ret;
+} /* end of sector_read */
+
+s32 sector_write(struct super_block *sb, sector_t sec, struct buffer_head *bh, s32 sync)
+{
+	s32 ret = FFS_MEDIAERR;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (sec >= (p_fs->PBR_sector+p_fs->num_sectors) && (p_fs->num_sectors > 0)) {
+		printk("[EXFAT] sector_write: out of range error! (sec = %llu)\n", (unsigned long long)sec);
+		fs_error(sb);
+		return ret;
+	}
+
+	if (bh == NULL) {
+		printk("[EXFAT] sector_write: bh is NULL!\n");
+		fs_error(sb);
+		return ret;
+	}
+
+	if (!p_fs->dev_ejected) {
+		ret = bdev_write(sb, sec, bh, 1, sync);
+		if (ret != FFS_SUCCESS)
+			p_fs->dev_ejected = TRUE;
+	}
+
+	return ret;
+} /* end of sector_write */
+
+s32 multi_sector_read(struct super_block *sb, sector_t sec, struct buffer_head **bh, s32 num_secs, s32 read)
+{
+	s32 ret = FFS_MEDIAERR;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (((sec+num_secs) > (p_fs->PBR_sector+p_fs->num_sectors)) && (p_fs->num_sectors > 0)) {
+		printk("[EXFAT] multi_sector_read: out of range error! (sec = %llu, num_secs = %d)\n",
+		       (unsigned long long)sec, num_secs);
+		fs_error(sb);
+		return ret;
+	}
+
+	if (!p_fs->dev_ejected) {
+		ret = bdev_read(sb, sec, bh, num_secs, read);
+		if (ret != FFS_SUCCESS)
+			p_fs->dev_ejected = TRUE;
+	}
+
+	return ret;
+} /* end of multi_sector_read */
+
+s32 multi_sector_write(struct super_block *sb, sector_t sec, struct buffer_head *bh, s32 num_secs, s32 sync)
+{
+	s32 ret = FFS_MEDIAERR;
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if ((sec+num_secs) > (p_fs->PBR_sector+p_fs->num_sectors) && (p_fs->num_sectors > 0)) {
+		printk("[EXFAT] multi_sector_write: out of range error! (sec = %llu, num_secs = %d)\n",
+		       (unsigned long long)sec, num_secs);
+		fs_error(sb);
+		return ret;
+	}
+	if (bh == NULL) {
+		printk("[EXFAT] multi_sector_write: bh is NULL!\n");
+		fs_error(sb);
+		return ret;
+	}
+
+	if (!p_fs->dev_ejected) {
+		ret = bdev_write(sb, sec, bh, num_secs, sync);
+		if (ret != FFS_SUCCESS)
+			p_fs->dev_ejected = TRUE;
+	}
+
+	return ret;
+} /* end of multi_sector_write */
diff --git a/fs/exfat/exfat_core.h b/fs/exfat/exfat_core.h
new file mode 100644
index 000000000..52d05c700
--- /dev/null
+++ b/fs/exfat/exfat_core.h
@@ -0,0 +1,671 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_core.h                                              */
+/*  PURPOSE : Header File for exFAT File Manager                        */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_H
+#define _EXFAT_H
+
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+
+#include "exfat_blkdev.h"
+#include "exfat_cache.h"
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_cache.h"
+
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+  /* For Debugging Purpose */
+	/* IOCTL code 'f' used by
+	 *   - file systems typically #0~0x1F
+	 *   - embedded terminal devices #128~
+	 *   - exts for debugging purpose #99
+	 * number 100 and 101 is availble now but has possible conflicts
+	 */
+#define EXFAT_IOC_GET_DEBUGFLAGS       _IOR('f', 100, long)
+#define EXFAT_IOC_SET_DEBUGFLAGS       _IOW('f', 101, long)
+
+#define EXFAT_DEBUGFLAGS_INVALID_UMOUNT        0x01
+#define EXFAT_DEBUGFLAGS_ERROR_RW              0x02
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+	/*----------------------------------------------------------------------*/
+	/*  Constant & Macro Definitions                                        */
+	/*----------------------------------------------------------------------*/
+
+#define DENTRY_SIZE             32          /* dir entry size */
+#define DENTRY_SIZE_BITS        5
+
+/* PBR entries */
+#define PBR_SIGNATURE           0xAA55
+#define EXT_SIGNATURE           0xAA550000
+#define VOL_LABEL               "NO NAME    " /* size should be 11 */
+#define OEM_NAME                "MSWIN4.1"  /* size should be 8 */
+#define STR_FAT12               "FAT12   "  /* size should be 8 */
+#define STR_FAT16               "FAT16   "  /* size should be 8 */
+#define STR_FAT32               "FAT32   "  /* size should be 8 */
+#define STR_EXFAT               "EXFAT   "  /* size should be 8 */
+#define VOL_CLEAN               0x0000
+#define VOL_DIRTY               0x0002
+
+/* max number of clusters */
+#define FAT12_THRESHOLD         4087        /* 2^12 - 1 + 2 (clu 0 & 1) */
+#define FAT16_THRESHOLD         65527       /* 2^16 - 1 + 2 */
+#define FAT32_THRESHOLD         268435457   /* 2^28 - 1 + 2 */
+#define EXFAT_THRESHOLD         268435457   /* 2^28 - 1 + 2 */
+
+/* file types */
+#define TYPE_UNUSED             0x0000
+#define TYPE_DELETED            0x0001
+#define TYPE_INVALID            0x0002
+#define TYPE_CRITICAL_PRI       0x0100
+#define TYPE_BITMAP             0x0101
+#define TYPE_UPCASE             0x0102
+#define TYPE_VOLUME             0x0103
+#define TYPE_DIR                0x0104
+#define TYPE_FILE               0x011F
+#define TYPE_SYMLINK            0x015F
+#define TYPE_CRITICAL_SEC       0x0200
+#define TYPE_STREAM             0x0201
+#define TYPE_EXTEND             0x0202
+#define TYPE_ACL                0x0203
+#define TYPE_BENIGN_PRI         0x0400
+#define TYPE_GUID               0x0401
+#define TYPE_PADDING            0x0402
+#define TYPE_ACLTAB             0x0403
+#define TYPE_BENIGN_SEC         0x0800
+#define TYPE_ALL                0x0FFF
+
+/* time modes */
+#define TM_CREATE               0
+#define TM_MODIFY               1
+#define TM_ACCESS               2
+
+/* checksum types */
+#define CS_DIR_ENTRY            0
+#define CS_PBR_SECTOR           1
+#define CS_DEFAULT              2
+
+#define CLUSTER_16(x)           ((u16)(x))
+#define CLUSTER_32(x)           ((u32)(x))
+
+#define FALSE			0
+#define TRUE			1
+
+#define MIN(a, b)		(((a) < (b)) ? (a) : (b))
+#define MAX(a, b)		(((a) > (b)) ? (a) : (b))
+
+#define START_SECTOR(x) \
+	((((sector_t)((x) - 2)) << p_fs->sectors_per_clu_bits) + p_fs->data_start_sector)
+
+#define IS_LAST_SECTOR_IN_CLUSTER(sec) \
+		((((sec) - p_fs->data_start_sector + 1) & ((1 <<  p_fs->sectors_per_clu_bits) - 1)) == 0)
+
+#define GET_CLUSTER_FROM_SECTOR(sec)			\
+		((u32)((((sec) - p_fs->data_start_sector) >> p_fs->sectors_per_clu_bits) + 2))
+
+#define GET16(p_src) \
+	(((u16)(p_src)[0]) | (((u16)(p_src)[1]) << 8))
+#define GET32(p_src) \
+	(((u32)(p_src)[0]) | (((u32)(p_src)[1]) << 8) | \
+	(((u32)(p_src)[2]) << 16) | (((u32)(p_src)[3]) << 24))
+#define GET64(p_src) \
+	(((u64)(p_src)[0]) | (((u64)(p_src)[1]) << 8) | \
+	(((u64)(p_src)[2]) << 16) | (((u64)(p_src)[3]) << 24) | \
+	(((u64)(p_src)[4]) << 32) | (((u64)(p_src)[5]) << 40) | \
+	(((u64)(p_src)[6]) << 48) | (((u64)(p_src)[7]) << 56))
+
+
+#define SET16(p_dst, src)                                  \
+	do {                                              \
+		(p_dst)[0] = (u8)(src);                     \
+		(p_dst)[1] = (u8)(((u16)(src)) >> 8);       \
+	} while (0)
+#define SET32(p_dst, src)                                  \
+	do {                                              \
+		(p_dst)[0] = (u8)(src);                     \
+		(p_dst)[1] = (u8)(((u32)(src)) >> 8);       \
+		(p_dst)[2] = (u8)(((u32)(src)) >> 16);      \
+		(p_dst)[3] = (u8)(((u32)(src)) >> 24);      \
+	} while (0)
+#define SET64(p_dst, src)                                  \
+	do {                                              \
+		(p_dst)[0] = (u8)(src);                   \
+		(p_dst)[1] = (u8)(((u64)(src)) >> 8);     \
+		(p_dst)[2] = (u8)(((u64)(src)) >> 16);    \
+		(p_dst)[3] = (u8)(((u64)(src)) >> 24);    \
+		(p_dst)[4] = (u8)(((u64)(src)) >> 32);    \
+		(p_dst)[5] = (u8)(((u64)(src)) >> 40);    \
+		(p_dst)[6] = (u8)(((u64)(src)) >> 48);    \
+		(p_dst)[7] = (u8)(((u64)(src)) >> 56);    \
+	} while (0)
+
+#ifdef __LITTLE_ENDIAN
+#define GET16_A(p_src)		(*((u16 *)(p_src)))
+#define GET32_A(p_src)		(*((u32 *)(p_src)))
+#define GET64_A(p_src)		(*((u64 *)(p_src)))
+#define SET16_A(p_dst, src)	(*((u16 *)(p_dst)) = (u16)(src))
+#define SET32_A(p_dst, src)	(*((u32 *)(p_dst)) = (u32)(src))
+#define SET64_A(p_dst, src)	(*((u64 *)(p_dst)) = (u64)(src))
+#else /* BIG_ENDIAN */
+#define GET16_A(p_src)		GET16(p_src)
+#define GET32_A(p_src)		GET32(p_src)
+#define GET64_A(p_src)		GET64(p_src)
+#define SET16_A(p_dst, src)	SET16(p_dst, src)
+#define SET32_A(p_dst, src)	SET32(p_dst, src)
+#define SET64_A(p_dst, src)	SET64(p_dst, src)
+#endif
+
+/* Upcase tabel mecro */
+#define HIGH_INDEX_BIT (8)
+#define HIGH_INDEX_MASK (0xFF00)
+#define LOW_INDEX_BIT (16-HIGH_INDEX_BIT)
+#define UTBL_ROW_COUNT (1<<LOW_INDEX_BIT)
+#define UTBL_COL_COUNT (1<<HIGH_INDEX_BIT)
+
+#if CONFIG_EXFAT_DEBUG_MSG
+#define DPRINTK(...)			\
+	do {								\
+		printk("[EXFAT] " __VA_ARGS__);	\
+	} while (0)
+#else
+#define DPRINTK(...)
+#endif
+
+static inline u16 get_col_index(u16 i)
+{
+	return i >> LOW_INDEX_BIT;
+}
+static inline u16 get_row_index(u16 i)
+{
+	return i & ~HIGH_INDEX_MASK;
+}
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+/* MS_DOS FAT partition boot record (512 bytes) */
+typedef struct {
+	u8       jmp_boot[3];
+	u8       oem_name[8];
+	u8       bpb[109];
+	u8       boot_code[390];
+	u8       signature[2];
+} PBR_SECTOR_T;
+
+/* MS-DOS FAT12/16 BIOS parameter block (51 bytes) */
+typedef struct {
+	u8       sector_size[2];
+	u8       sectors_per_clu;
+	u8       num_reserved[2];
+	u8       num_fats;
+	u8       num_root_entries[2];
+	u8       num_sectors[2];
+	u8       media_type;
+	u8       num_fat_sectors[2];
+	u8       sectors_in_track[2];
+	u8       num_heads[2];
+	u8       num_hid_sectors[4];
+	u8       num_huge_sectors[4];
+
+	u8       phy_drv_no;
+	u8       reserved;
+	u8       ext_signature;
+	u8       vol_serial[4];
+	u8       vol_label[11];
+	u8       vol_type[8];
+} BPB16_T;
+
+/* MS-DOS FAT32 BIOS parameter block (79 bytes) */
+typedef struct {
+	u8       sector_size[2];
+	u8       sectors_per_clu;
+	u8       num_reserved[2];
+	u8       num_fats;
+	u8       num_root_entries[2];
+	u8       num_sectors[2];
+	u8       media_type;
+	u8       num_fat_sectors[2];
+	u8       sectors_in_track[2];
+	u8       num_heads[2];
+	u8       num_hid_sectors[4];
+	u8       num_huge_sectors[4];
+	u8       num_fat32_sectors[4];
+	u8       ext_flags[2];
+	u8       fs_version[2];
+	u8       root_cluster[4];
+	u8       fsinfo_sector[2];
+	u8       backup_sector[2];
+	u8       reserved[12];
+
+	u8       phy_drv_no;
+	u8       ext_reserved;
+	u8       ext_signature;
+	u8       vol_serial[4];
+	u8       vol_label[11];
+	u8       vol_type[8];
+} BPB32_T;
+
+/* MS-DOS EXFAT BIOS parameter block (109 bytes) */
+typedef struct {
+	u8       reserved1[53];
+	u8       vol_offset[8];
+	u8       vol_length[8];
+	u8       fat_offset[4];
+	u8       fat_length[4];
+	u8       clu_offset[4];
+	u8       clu_count[4];
+	u8       root_cluster[4];
+	u8       vol_serial[4];
+	u8       fs_version[2];
+	u8       vol_flags[2];
+	u8       sector_size_bits;
+	u8       sectors_per_clu_bits;
+	u8       num_fats;
+	u8       phy_drv_no;
+	u8       perc_in_use;
+	u8       reserved2[7];
+} BPBEX_T;
+
+/* MS-DOS FAT file system information sector (512 bytes) */
+typedef struct {
+	u8       signature1[4];
+	u8       reserved1[480];
+	u8       signature2[4];
+	u8       free_cluster[4];
+	u8       next_cluster[4];
+	u8       reserved2[14];
+	u8       signature3[2];
+} FSI_SECTOR_T;
+
+/* MS-DOS FAT directory entry (32 bytes) */
+typedef struct {
+	u8       dummy[32];
+} DENTRY_T;
+
+typedef struct {
+	u8       name[DOS_NAME_LENGTH];
+	u8       attr;
+	u8       lcase;
+	u8       create_time_ms;
+	u8       create_time[2];
+	u8       create_date[2];
+	u8       access_date[2];
+	u8       start_clu_hi[2];
+	u8       modify_time[2];
+	u8       modify_date[2];
+	u8       start_clu_lo[2];
+	u8       size[4];
+} DOS_DENTRY_T;
+
+/* MS-DOS FAT extended directory entry (32 bytes) */
+typedef struct {
+	u8       order;
+	u8       unicode_0_4[10];
+	u8       attr;
+	u8       sysid;
+	u8       checksum;
+	u8       unicode_5_10[12];
+	u8       start_clu[2];
+	u8       unicode_11_12[4];
+} EXT_DENTRY_T;
+
+/* MS-DOS EXFAT file directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       num_ext;
+	u8       checksum[2];
+	u8       attr[2];
+	u8       reserved1[2];
+	u8       create_time[2];
+	u8       create_date[2];
+	u8       modify_time[2];
+	u8       modify_date[2];
+	u8       access_time[2];
+	u8       access_date[2];
+	u8       create_time_ms;
+	u8       modify_time_ms;
+	u8       access_time_ms;
+	u8       reserved2[9];
+} FILE_DENTRY_T;
+
+/* MS-DOS EXFAT stream extension directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       flags;
+	u8       reserved1;
+	u8       name_len;
+	u8       name_hash[2];
+	u8       reserved2[2];
+	u8       valid_size[8];
+	u8       reserved3[4];
+	u8       start_clu[4];
+	u8       size[8];
+} STRM_DENTRY_T;
+
+/* MS-DOS EXFAT file name directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       flags;
+	u8       unicode_0_14[30];
+} NAME_DENTRY_T;
+
+/* MS-DOS EXFAT allocation bitmap directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       flags;
+	u8       reserved[18];
+	u8       start_clu[4];
+	u8       size[8];
+} BMAP_DENTRY_T;
+
+/* MS-DOS EXFAT up-case table directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       reserved1[3];
+	u8       checksum[4];
+	u8       reserved2[12];
+	u8       start_clu[4];
+	u8       size[8];
+} CASE_DENTRY_T;
+
+/* MS-DOS EXFAT volume label directory entry (32 bytes) */
+typedef struct {
+	u8       type;
+	u8       label_len;
+	u8       unicode_0_10[22];
+	u8       reserved[8];
+} VOLM_DENTRY_T;
+
+/* unused entry hint information */
+typedef struct {
+	u32      dir;
+	s32       entry;
+	CHAIN_T     clu;
+} UENTRY_T;
+
+typedef struct {
+	s32       (*alloc_cluster)(struct super_block *sb, s32 num_alloc, CHAIN_T *p_chain);
+	void        (*free_cluster)(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);
+	s32       (*count_used_clusters)(struct super_block *sb);
+
+	s32      (*init_dir_entry)(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type,
+								 u32 start_clu, u64 size);
+	s32      (*init_ext_entry)(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries,
+								 UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname);
+	s32       (*find_dir_entry)(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type);
+	void        (*delete_dir_entry)(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 offset, s32 num_entries);
+	void        (*get_uni_name_from_ext_entry)(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname);
+	s32       (*count_ext_entries)(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry);
+	s32       (*calc_num_entries)(UNI_NAME_T *p_uniname);
+
+	u32      (*get_entry_type)(DENTRY_T *p_entry);
+	void        (*set_entry_type)(DENTRY_T *p_entry, u32 type);
+	u32      (*get_entry_attr)(DENTRY_T *p_entry);
+	void        (*set_entry_attr)(DENTRY_T *p_entry, u32 attr);
+	u8       (*get_entry_flag)(DENTRY_T *p_entry);
+	void        (*set_entry_flag)(DENTRY_T *p_entry, u8 flag);
+	u32      (*get_entry_clu0)(DENTRY_T *p_entry);
+	void        (*set_entry_clu0)(DENTRY_T *p_entry, u32 clu0);
+	u64      (*get_entry_size)(DENTRY_T *p_entry);
+	void        (*set_entry_size)(DENTRY_T *p_entry, u64 size);
+	void        (*get_entry_time)(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+	void        (*set_entry_time)(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+} FS_FUNC_T;
+
+typedef struct __FS_INFO_T {
+	u32      drv;                    /* drive ID */
+	u32      vol_type;               /* volume FAT type */
+	u32      vol_id;                 /* volume serial number */
+
+	u64      num_sectors;            /* num of sectors in volume */
+	u32      num_clusters;           /* num of clusters in volume */
+	u32      cluster_size;           /* cluster size in bytes */
+	u32      cluster_size_bits;
+	u32      sectors_per_clu;        /* cluster size in sectors */
+	u32      sectors_per_clu_bits;
+
+	u32      PBR_sector;             /* PBR sector */
+	u32      FAT1_start_sector;      /* FAT1 start sector */
+	u32      FAT2_start_sector;      /* FAT2 start sector */
+	u32      root_start_sector;      /* root dir start sector */
+	u32      data_start_sector;      /* data area start sector */
+	u32      num_FAT_sectors;        /* num of FAT sectors */
+
+	u32      root_dir;               /* root dir cluster */
+	u32      dentries_in_root;       /* num of dentries in root dir */
+	u32      dentries_per_clu;       /* num of dentries per cluster */
+
+	u32      vol_flag;               /* volume dirty flag */
+	struct buffer_head *pbr_bh;         /* PBR sector */
+
+	u32      map_clu;                /* allocation bitmap start cluster */
+	u32      map_sectors;            /* num of allocation bitmap sectors */
+	struct buffer_head **vol_amap;      /* allocation bitmap */
+
+	u16      **vol_utbl;               /* upcase table */
+
+	u32      clu_srch_ptr;           /* cluster search pointer */
+	u32      used_clusters;          /* number of used clusters */
+	UENTRY_T    hint_uentry;         /* unused entry hint information */
+
+	u32      dev_ejected;            /* block device operation error flag */
+
+	FS_FUNC_T	*fs_func;
+	struct semaphore v_sem;
+
+	/* FAT cache */
+	BUF_CACHE_T FAT_cache_array[FAT_CACHE_SIZE];
+	BUF_CACHE_T FAT_cache_lru_list;
+	BUF_CACHE_T FAT_cache_hash_list[FAT_CACHE_HASH_SIZE];
+
+	/* buf cache */
+	BUF_CACHE_T buf_cache_array[BUF_CACHE_SIZE];
+	BUF_CACHE_T buf_cache_lru_list;
+	BUF_CACHE_T buf_cache_hash_list[BUF_CACHE_HASH_SIZE];
+} FS_INFO_T;
+
+#define ES_2_ENTRIES		2
+#define ES_3_ENTRIES		3
+#define ES_ALL_ENTRIES	0
+
+typedef struct {
+	sector_t	sector;	/* sector number that contains file_entry */
+	s32	offset;		/* byte offset in the sector */
+	s32	alloc_flag;	/* flag in stream entry. 01 for cluster chain, 03 for contig. clusteres. */
+	u32 num_entries;
+
+	/* __buf should be the last member */
+	void *__buf;
+} ENTRY_SET_CACHE_T;
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+/* file system initialization & shutdown functions */
+s32 ffsInit(void);
+s32 ffsShutdown(void);
+
+/* volume management functions */
+s32 ffsMountVol(struct super_block *sb);
+s32 ffsUmountVol(struct super_block *sb);
+s32 ffsCheckVol(struct super_block *sb);
+s32 ffsGetVolInfo(struct super_block *sb, VOL_INFO_T *info);
+s32 ffsSyncVol(struct super_block *sb, s32 do_sync);
+
+/* file management functions */
+s32 ffsLookupFile(struct inode *inode, char *path, FILE_ID_T *fid);
+s32 ffsCreateFile(struct inode *inode, char *path, u8 mode, FILE_ID_T *fid);
+s32 ffsReadFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *rcount);
+s32 ffsWriteFile(struct inode *inode, FILE_ID_T *fid, void *buffer, u64 count, u64 *wcount);
+s32 ffsTruncateFile(struct inode *inode, u64 old_size, u64 new_size);
+s32 ffsMoveFile(struct inode *old_parent_inode, FILE_ID_T *fid, struct inode *new_parent_inode, struct dentry *new_dentry);
+s32 ffsRemoveFile(struct inode *inode, FILE_ID_T *fid);
+s32 ffsSetAttr(struct inode *inode, u32 attr);
+s32 ffsGetStat(struct inode *inode, DIR_ENTRY_T *info);
+s32 ffsSetStat(struct inode *inode, DIR_ENTRY_T *info);
+s32 ffsMapCluster(struct inode *inode, s32 clu_offset, u32 *clu);
+
+/* directory management functions */
+s32 ffsCreateDir(struct inode *inode, char *path, FILE_ID_T *fid);
+s32 ffsReadDir(struct inode *inode, DIR_ENTRY_T *dir_ent);
+s32 ffsRemoveDir(struct inode *inode, FILE_ID_T *fid);
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations (NOT TO UPPER LAYER)                 */
+/*----------------------------------------------------------------------*/
+
+/* fs management functions */
+s32  fs_init(void);
+s32  fs_shutdown(void);
+void   fs_set_vol_flags(struct super_block *sb, u32 new_flag);
+void   fs_sync(struct super_block *sb, s32 do_sync);
+void   fs_error(struct super_block *sb);
+
+/* cluster management functions */
+s32   clear_cluster(struct super_block *sb, u32 clu);
+s32  fat_alloc_cluster(struct super_block *sb, s32 num_alloc, CHAIN_T *p_chain);
+s32  exfat_alloc_cluster(struct super_block *sb, s32 num_alloc, CHAIN_T *p_chain);
+void   fat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);
+void   exfat_free_cluster(struct super_block *sb, CHAIN_T *p_chain, s32 do_relse);
+u32 find_last_cluster(struct super_block *sb, CHAIN_T *p_chain);
+s32  count_num_clusters(struct super_block *sb, CHAIN_T *dir);
+s32  fat_count_used_clusters(struct super_block *sb);
+s32  exfat_count_used_clusters(struct super_block *sb);
+void   exfat_chain_cont_cluster(struct super_block *sb, u32 chain, s32 len);
+
+/* allocation bitmap management functions */
+s32  load_alloc_bitmap(struct super_block *sb);
+void   free_alloc_bitmap(struct super_block *sb);
+s32   set_alloc_bitmap(struct super_block *sb, u32 clu);
+s32   clr_alloc_bitmap(struct super_block *sb, u32 clu);
+u32 test_alloc_bitmap(struct super_block *sb, u32 clu);
+void   sync_alloc_bitmap(struct super_block *sb);
+
+/* upcase table management functions */
+s32  load_upcase_table(struct super_block *sb);
+void   free_upcase_table(struct super_block *sb);
+
+/* dir entry management functions */
+u32 fat_get_entry_type(DENTRY_T *p_entry);
+u32 exfat_get_entry_type(DENTRY_T *p_entry);
+void   fat_set_entry_type(DENTRY_T *p_entry, u32 type);
+void   exfat_set_entry_type(DENTRY_T *p_entry, u32 type);
+u32 fat_get_entry_attr(DENTRY_T *p_entry);
+u32 exfat_get_entry_attr(DENTRY_T *p_entry);
+void   fat_set_entry_attr(DENTRY_T *p_entry, u32 attr);
+void   exfat_set_entry_attr(DENTRY_T *p_entry, u32 attr);
+u8  fat_get_entry_flag(DENTRY_T *p_entry);
+u8  exfat_get_entry_flag(DENTRY_T *p_entry);
+void   fat_set_entry_flag(DENTRY_T *p_entry, u8 flag);
+void   exfat_set_entry_flag(DENTRY_T *p_entry, u8 flag);
+u32 fat_get_entry_clu0(DENTRY_T *p_entry);
+u32 exfat_get_entry_clu0(DENTRY_T *p_entry);
+void   fat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu);
+void   exfat_set_entry_clu0(DENTRY_T *p_entry, u32 start_clu);
+u64 fat_get_entry_size(DENTRY_T *p_entry);
+u64 exfat_get_entry_size(DENTRY_T *p_entry);
+void   fat_set_entry_size(DENTRY_T *p_entry, u64 size);
+void   exfat_set_entry_size(DENTRY_T *p_entry, u64 size);
+void   fat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+void   exfat_get_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+void   fat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+void   exfat_set_entry_time(DENTRY_T *p_entry, TIMESTAMP_T *tp, u8 mode);
+s32   fat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, u32 start_clu, u64 size);
+s32   exfat_init_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, u32 start_clu, u64 size);
+s32   fat_init_ext_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname);
+s32   exfat_init_ext_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 num_entries, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname);
+void   init_dos_entry(DOS_DENTRY_T *ep, u32 type, u32 start_clu);
+void   init_ext_entry(EXT_DENTRY_T *ep, s32 order, u8 chksum, u16 *uniname);
+void   init_file_entry(FILE_DENTRY_T *ep, u32 type);
+void   init_strm_entry(STRM_DENTRY_T *ep, u8 flags, u32 start_clu, u64 size);
+void   init_name_entry(NAME_DENTRY_T *ep, u16 *uniname);
+void   fat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries);
+void   exfat_delete_dir_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, s32 order, s32 num_entries);
+
+s32   find_location(struct super_block *sb, CHAIN_T *p_dir, s32 entry, sector_t *sector, s32 *offset);
+DENTRY_T *get_entry_with_sector(struct super_block *sb, sector_t sector, s32 offset);
+DENTRY_T *get_entry_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, sector_t *sector);
+ENTRY_SET_CACHE_T *get_entry_set_in_dir(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u32 type, DENTRY_T **file_ep);
+void release_entry_set(ENTRY_SET_CACHE_T *es);
+s32 write_whole_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es);
+s32 write_partial_entries_in_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es, DENTRY_T *ep, u32 count);
+s32  search_deleted_or_unused_entry(struct super_block *sb, CHAIN_T *p_dir, s32 num_entries);
+s32  find_empty_entry(struct inode *inode, CHAIN_T *p_dir, s32 num_entries);
+s32  fat_find_dir_entry(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type);
+s32  exfat_find_dir_entry(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 num_entries, DOS_NAME_T *p_dosname, u32 type);
+s32  fat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry);
+s32  exfat_count_ext_entries(struct super_block *sb, CHAIN_T *p_dir, s32 entry, DENTRY_T *p_entry);
+s32  count_dos_name_entries(struct super_block *sb, CHAIN_T *p_dir, u32 type);
+void   update_dir_checksum(struct super_block *sb, CHAIN_T *p_dir, s32 entry);
+void update_dir_checksum_with_entry_set(struct super_block *sb, ENTRY_SET_CACHE_T *es);
+bool   is_dir_empty(struct super_block *sb, CHAIN_T *p_dir);
+
+/* name conversion functions */
+s32  get_num_entries_and_dos_name(struct super_block *sb, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, s32 *entries, DOS_NAME_T *p_dosname);
+void   get_uni_name_from_dos_entry(struct super_block *sb, DOS_DENTRY_T *ep, UNI_NAME_T *p_uniname, u8 mode);
+void   fat_get_uni_name_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname);
+void   exfat_get_uni_name_from_ext_entry(struct super_block *sb, CHAIN_T *p_dir, s32 entry, u16 *uniname);
+s32  extract_uni_name_from_ext_entry(EXT_DENTRY_T *ep, u16 *uniname, s32 order);
+s32  extract_uni_name_from_name_entry(NAME_DENTRY_T *ep, u16 *uniname, s32 order);
+s32  fat_generate_dos_name(struct super_block *sb, CHAIN_T *p_dir, DOS_NAME_T *p_dosname);
+void   fat_attach_count_to_dos_name(u8 *dosname, s32 count);
+s32  fat_calc_num_entries(UNI_NAME_T *p_uniname);
+s32  exfat_calc_num_entries(UNI_NAME_T *p_uniname);
+u8  calc_checksum_1byte(void *data, s32 len, u8 chksum);
+u16 calc_checksum_2byte(void *data, s32 len, u16 chksum, s32 type);
+u32 calc_checksum_4byte(void *data, s32 len, u32 chksum, s32 type);
+
+/* name resolution functions */
+s32  resolve_path(struct inode *inode, char *path, CHAIN_T *p_dir, UNI_NAME_T *p_uniname);
+s32  resolve_name(u8 *name, u8 **arg);
+
+/* file operation functions */
+s32  fat16_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr);
+s32  fat32_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr);
+s32  exfat_mount(struct super_block *sb, PBR_SECTOR_T *p_pbr);
+s32  create_dir(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, FILE_ID_T *fid);
+s32  create_file(struct inode *inode, CHAIN_T *p_dir, UNI_NAME_T *p_uniname, u8 mode, FILE_ID_T *fid);
+void   remove_file(struct inode *inode, CHAIN_T *p_dir, s32 entry);
+s32  rename_file(struct inode *inode, CHAIN_T *p_dir, s32 old_entry, UNI_NAME_T *p_uniname, FILE_ID_T *fid);
+s32  move_file(struct inode *inode, CHAIN_T *p_olddir, s32 oldentry, CHAIN_T *p_newdir, UNI_NAME_T *p_uniname, FILE_ID_T *fid);
+
+/* sector read/write functions */
+s32   sector_read(struct super_block *sb, sector_t sec, struct buffer_head **bh, s32 read);
+s32   sector_write(struct super_block *sb, sector_t sec, struct buffer_head *bh, s32 sync);
+s32   multi_sector_read(struct super_block *sb, sector_t sec, struct buffer_head **bh, s32 num_secs, s32 read);
+s32   multi_sector_write(struct super_block *sb, sector_t sec, struct buffer_head *bh, s32 num_secs, s32 sync);
+
+#endif /* _EXFAT_H */
diff --git a/fs/exfat/exfat_data.c b/fs/exfat/exfat_data.c
new file mode 100644
index 000000000..65da07aff
--- /dev/null
+++ b/fs/exfat/exfat_data.c
@@ -0,0 +1,77 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_data.c                                              */
+/*  PURPOSE : exFAT Configuable Data Definitions                        */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+
+#include "exfat_blkdev.h"
+#include "exfat_cache.h"
+#include "exfat_nls.h"
+#include "exfat_super.h"
+#include "exfat_core.h"
+
+/*======================================================================*/
+/*                                                                      */
+/*                    GLOBAL VARIABLE DEFINITIONS                       */
+/*                                                                      */
+/*======================================================================*/
+
+/*----------------------------------------------------------------------*/
+/*  File Manager                                                        */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Buffer Manager                                                      */
+/*----------------------------------------------------------------------*/
+
+/* FAT cache */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+DECLARE_MUTEX(f_sem);
+#else
+DEFINE_SEMAPHORE(f_sem);
+#endif
+BUF_CACHE_T FAT_cache_array[FAT_CACHE_SIZE];
+BUF_CACHE_T FAT_cache_lru_list;
+BUF_CACHE_T FAT_cache_hash_list[FAT_CACHE_HASH_SIZE];
+
+/* buf cache */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+DECLARE_MUTEX(b_sem);
+#else
+DEFINE_SEMAPHORE(b_sem);
+#endif
+BUF_CACHE_T buf_cache_array[BUF_CACHE_SIZE];
+BUF_CACHE_T buf_cache_lru_list;
+BUF_CACHE_T buf_cache_hash_list[BUF_CACHE_HASH_SIZE];
diff --git a/fs/exfat/exfat_data.h b/fs/exfat/exfat_data.h
new file mode 100644
index 000000000..53b0e3939
--- /dev/null
+++ b/fs/exfat/exfat_data.h
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_data.h                                              */
+/*  PURPOSE : Header File for exFAT Configuable Constants               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_DATA_H
+#define _EXFAT_DATA_H
+
+#include "exfat_config.h"
+
+/*======================================================================*/
+/*                                                                      */
+/*                        FFS CONFIGURATIONS                            */
+/*                  (CHANGE THIS PART IF REQUIRED)                      */
+/*                                                                      */
+/*======================================================================*/
+
+/* max number of root directory entries in FAT12/16 */
+/* (should be an exponential value of 2)            */
+#define MAX_DENTRY              512
+
+/* cache size (in number of sectors)                */
+/* (should be an exponential value of 2)            */
+#define FAT_CACHE_SIZE          128
+#define FAT_CACHE_HASH_SIZE     64
+#define BUF_CACHE_SIZE          256
+#define BUF_CACHE_HASH_SIZE     64
+
+#endif /* _EXFAT_DATA_H */
diff --git a/fs/exfat/exfat_nls.c b/fs/exfat/exfat_nls.c
new file mode 100644
index 000000000..a48b3d05a
--- /dev/null
+++ b/fs/exfat/exfat_nls.c
@@ -0,0 +1,448 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_nls.c                                               */
+/*  PURPOSE : exFAT NLS Manager                                         */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include "exfat_config.h"
+#include "exfat_data.h"
+
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_super.h"
+#include "exfat_core.h"
+
+#include <linux/nls.h>
+
+/*----------------------------------------------------------------------*/
+/*  Global Variable Definitions                                         */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Local Variable Definitions                                          */
+/*----------------------------------------------------------------------*/
+
+static u16 bad_dos_chars[] = {
+	/* + , ; = [ ] */
+	0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D,
+	0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D,
+	0
+};
+
+static u16 bad_uni_chars[] = {
+	/* " * / : < > ? \ | */
+	0x0022,         0x002A, 0x002F, 0x003A,
+	0x003C, 0x003E, 0x003F, 0x005C, 0x007C,
+	0
+};
+
+/*----------------------------------------------------------------------*/
+/*  Local Function Declarations                                         */
+/*----------------------------------------------------------------------*/
+
+static s32  convert_uni_to_ch(struct nls_table *nls, u8 *ch, u16 uni, s32 *lossy);
+static s32  convert_ch_to_uni(struct nls_table *nls, u16 *uni, u8 *ch, s32 *lossy);
+
+/*======================================================================*/
+/*  Global Function Definitions                                         */
+/*======================================================================*/
+
+u16 nls_upper(struct super_block *sb, u16 a)
+{
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+
+	if (EXFAT_SB(sb)->options.casesensitive)
+		return a;
+	if (p_fs->vol_utbl != NULL && (p_fs->vol_utbl)[get_col_index(a)] != NULL)
+		return (p_fs->vol_utbl)[get_col_index(a)][get_row_index(a)];
+	else
+		return a;
+}
+
+u16 *nls_wstrchr(u16 *str, u16 wchar)
+{
+	while (*str) {
+		if (*(str++) == wchar)
+			return str;
+	}
+
+	return 0;
+}
+
+s32 nls_dosname_cmp(struct super_block *sb, u8 *a, u8 *b)
+{
+	return strncmp((void *) a, (void *) b, DOS_NAME_LENGTH);
+} /* end of nls_dosname_cmp */
+
+s32 nls_uniname_cmp(struct super_block *sb, u16 *a, u16 *b)
+{
+	int i;
+
+	for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) {
+		if (nls_upper(sb, *a) != nls_upper(sb, *b))
+			return 1;
+		if (*a == 0x0)
+			return 0;
+	}
+	return 0;
+} /* end of nls_uniname_cmp */
+
+void nls_uniname_to_dosname(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname, s32 *p_lossy)
+{
+	int i, j, len, lossy = FALSE;
+	u8 buf[MAX_CHARSET_SIZE];
+	u8 lower = 0, upper = 0;
+	u8 *dosname = p_dosname->name;
+	u16 *uniname = p_uniname->name;
+	u16 *p, *last_period;
+	struct nls_table *nls = EXFAT_SB(sb)->nls_disk;
+
+	for (i = 0; i < DOS_NAME_LENGTH; i++)
+		*(dosname+i) = ' ';
+
+	if (!nls_uniname_cmp(sb, uniname, (u16 *) UNI_CUR_DIR_NAME)) {
+		*(dosname) = '.';
+		p_dosname->name_case = 0x0;
+		if (p_lossy != NULL)
+			*p_lossy = FALSE;
+		return;
+	}
+
+	if (!nls_uniname_cmp(sb, uniname, (u16 *) UNI_PAR_DIR_NAME)) {
+		*(dosname) = '.';
+		*(dosname+1) = '.';
+		p_dosname->name_case = 0x0;
+		if (p_lossy != NULL)
+			*p_lossy = FALSE;
+		return;
+	}
+
+	/* search for the last embedded period */
+	last_period = NULL;
+	for (p = uniname; *p; p++) {
+		if (*p == (u16) '.')
+			last_period = p;
+	}
+
+	i = 0;
+	while (i < DOS_NAME_LENGTH) {
+		if (i == 8) {
+			if (last_period == NULL)
+				break;
+
+			if (uniname <= last_period) {
+				if (uniname < last_period)
+					lossy = TRUE;
+				uniname = last_period + 1;
+			}
+		}
+
+		if (*uniname == (u16) '\0') {
+			break;
+		} else if (*uniname == (u16) ' ') {
+			lossy = TRUE;
+		} else if (*uniname == (u16) '.') {
+			if (uniname < last_period)
+				lossy = TRUE;
+			else
+				i = 8;
+		} else if (nls_wstrchr(bad_dos_chars, *uniname)) {
+			lossy = TRUE;
+			*(dosname+i) = '_';
+			i++;
+		} else {
+			len = convert_uni_to_ch(nls, buf, *uniname, &lossy);
+
+			if (len > 1) {
+				if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH))
+					break;
+
+				if ((i <  8) && ((i+len) > 8)) {
+					i = 8;
+					continue;
+				}
+
+				lower = 0xFF;
+
+				for (j = 0; j < len; j++, i++)
+					*(dosname+i) = *(buf+j);
+			} else { /* len == 1 */
+				if ((*buf >= 'a') && (*buf <= 'z')) {
+					*(dosname+i) = *buf - ('a' - 'A');
+
+					if (i < 8)
+						lower |= 0x08;
+					else
+						lower |= 0x10;
+				} else if ((*buf >= 'A') && (*buf <= 'Z')) {
+					*(dosname+i) = *buf;
+
+					if (i < 8)
+						upper |= 0x08;
+					else
+						upper |= 0x10;
+				} else {
+					*(dosname+i) = *buf;
+				}
+				i++;
+			}
+		}
+
+		uniname++;
+	}
+
+	if (*dosname == 0xE5)
+		*dosname = 0x05;
+
+	if (*uniname != 0x0)
+		lossy = TRUE;
+
+	if (upper & lower)
+		p_dosname->name_case = 0xFF;
+	else
+		p_dosname->name_case = lower;
+
+	if (p_lossy != NULL)
+		*p_lossy = lossy;
+} /* end of nls_uniname_to_dosname */
+
+void nls_dosname_to_uniname(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname)
+{
+	int i = 0, j, n = 0;
+	u8 buf[DOS_NAME_LENGTH+2];
+	u8 *dosname = p_dosname->name;
+	u16 *uniname = p_uniname->name;
+	struct nls_table *nls = EXFAT_SB(sb)->nls_disk;
+
+	if (*dosname == 0x05) {
+		*buf = 0xE5;
+		i++;
+		n++;
+	}
+
+	for (; i < 8; i++, n++) {
+		if (*(dosname+i) == ' ')
+			break;
+
+		if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && (p_dosname->name_case & 0x08))
+			*(buf+n) = *(dosname+i) + ('a' - 'A');
+		else
+			*(buf+n) = *(dosname+i);
+	}
+	if (*(dosname+8) != ' ') {
+		*(buf+n) = '.';
+		n++;
+	}
+
+	for (i = 8; i < DOS_NAME_LENGTH; i++, n++) {
+		if (*(dosname+i) == ' ')
+			break;
+
+		if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && (p_dosname->name_case & 0x10))
+			*(buf+n) = *(dosname+i) + ('a' - 'A');
+		else
+			*(buf+n) = *(dosname+i);
+	}
+	*(buf+n) = '\0';
+
+	i = j = 0;
+	while (j < (MAX_NAME_LENGTH-1)) {
+		if (*(buf+i) == '\0')
+			break;
+
+		i += convert_ch_to_uni(nls, uniname, (buf+i), NULL);
+
+		uniname++;
+		j++;
+	}
+
+	*uniname = (u16) '\0';
+} /* end of nls_dosname_to_uniname */
+
+void nls_uniname_to_cstring(struct super_block *sb, u8 *p_cstring, UNI_NAME_T *p_uniname)
+{
+	int i, j, len;
+	u8 buf[MAX_CHARSET_SIZE];
+	u16 *uniname = p_uniname->name;
+	struct nls_table *nls = EXFAT_SB(sb)->nls_io;
+
+	if (nls == NULL) {
+		len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, p_cstring, MAX_NAME_LENGTH);
+		p_cstring[len] = 0;
+		return;
+	}
+
+	i = 0;
+	while (i < (MAX_NAME_LENGTH-1)) {
+		if (*uniname == (u16) '\0')
+			break;
+
+		len = convert_uni_to_ch(nls, buf, *uniname, NULL);
+
+		if (len > 1) {
+			for (j = 0; j < len; j++)
+				*p_cstring++ = (char) *(buf+j);
+		} else { /* len == 1 */
+			*p_cstring++ = (char) *buf;
+		}
+
+		uniname++;
+		i++;
+	}
+
+	*p_cstring = '\0';
+} /* end of nls_uniname_to_cstring */
+
+void nls_cstring_to_uniname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 *p_lossy)
+{
+	int i, j, lossy = FALSE;
+	u8 *end_of_name;
+	u8 upname[MAX_NAME_LENGTH * 2];
+	u16 *uniname = p_uniname->name;
+	struct nls_table *nls = EXFAT_SB(sb)->nls_io;
+
+
+	/* strip all trailing spaces */
+	end_of_name = p_cstring + strlen((char *) p_cstring);
+
+	while (*(--end_of_name) == ' ') {
+		if (end_of_name < p_cstring)
+			break;
+	}
+	*(++end_of_name) = '\0';
+
+	if (strcmp((char *) p_cstring, ".") && strcmp((char *) p_cstring, "..")) {
+
+		/* strip all trailing periods */
+		while (*(--end_of_name) == '.') {
+			if (end_of_name < p_cstring)
+				break;
+		}
+		*(++end_of_name) = '\0';
+	}
+
+	if (*p_cstring == '\0')
+		lossy = TRUE;
+
+	if (nls == NULL) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,0,101)
+		i = utf8s_to_utf16s(p_cstring, MAX_NAME_LENGTH, uniname);
+#else
+		i = utf8s_to_utf16s(p_cstring, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, uniname, MAX_NAME_LENGTH);
+#endif
+		for (j = 0; j < i; j++)
+			SET16_A(upname + j * 2, nls_upper(sb, uniname[j]));
+		uniname[i] = '\0';
+	}
+	else {
+		i = j = 0;
+		while (j < (MAX_NAME_LENGTH-1)) {
+			if (*(p_cstring+i) == '\0')
+				break;
+
+			i += convert_ch_to_uni(nls, uniname, (u8 *)(p_cstring+i), &lossy);
+
+			if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
+				lossy = TRUE;
+
+			SET16_A(upname + j * 2, nls_upper(sb, *uniname));
+
+			uniname++;
+			j++;
+		}
+
+		if (*(p_cstring+i) != '\0')
+			lossy = TRUE;
+		*uniname = (u16) '\0';
+	}
+
+	p_uniname->name_len = j;
+	p_uniname->name_hash = calc_checksum_2byte((void *) upname, j<<1, 0, CS_DEFAULT);
+
+	if (p_lossy != NULL)
+		*p_lossy = lossy;
+} /* end of nls_cstring_to_uniname */
+
+/*======================================================================*/
+/*  Local Function Definitions                                          */
+/*======================================================================*/
+
+static s32 convert_ch_to_uni(struct nls_table *nls, u16 *uni, u8 *ch, s32 *lossy)
+{
+	int len;
+
+	*uni = 0x0;
+
+	if (ch[0] < 0x80) {
+		*uni = (u16) ch[0];
+		return 1;
+	}
+
+	len = nls->char2uni(ch, NLS_MAX_CHARSET_SIZE, uni);
+	if (len < 0) {
+		/* conversion failed */
+		printk("%s: fail to use nls\n", __func__);
+		if (lossy != NULL)
+			*lossy = TRUE;
+		*uni = (u16) '_';
+		if (!strcmp(nls->charset, "utf8"))
+			return 1;
+		else
+			return 2;
+	}
+
+	return len;
+} /* end of convert_ch_to_uni */
+
+static s32 convert_uni_to_ch(struct nls_table *nls, u8 *ch, u16 uni, s32 *lossy)
+{
+	int len;
+
+	ch[0] = 0x0;
+
+	if (uni < 0x0080) {
+		ch[0] = (u8) uni;
+		return 1;
+	}
+
+	len = nls->uni2char(uni, ch, NLS_MAX_CHARSET_SIZE);
+	if (len < 0) {
+		/* conversion failed */
+		printk("%s: fail to use nls\n", __func__);
+		if (lossy != NULL)
+			*lossy = TRUE;
+		ch[0] = '_';
+		return 1;
+	}
+
+	return len;
+
+} /* end of convert_uni_to_ch */
diff --git a/fs/exfat/exfat_nls.h b/fs/exfat/exfat_nls.h
new file mode 100644
index 000000000..bc516d762
--- /dev/null
+++ b/fs/exfat/exfat_nls.h
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_nls.h                                               */
+/*  PURPOSE : Header File for exFAT NLS Manager                         */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_NLS_H
+#define _EXFAT_NLS_H
+
+#include <linux/types.h>
+#include <linux/nls.h>
+
+#include "exfat_config.h"
+#include "exfat_api.h"
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions                                        */
+/*----------------------------------------------------------------------*/
+
+#define NUM_UPCASE              2918
+
+#define DOS_CUR_DIR_NAME        ".          "
+#define DOS_PAR_DIR_NAME        "..         "
+
+#ifdef __LITTLE_ENDIAN
+#define UNI_CUR_DIR_NAME        ".\0"
+#define UNI_PAR_DIR_NAME        ".\0.\0"
+#else
+#define UNI_CUR_DIR_NAME        "\0."
+#define UNI_PAR_DIR_NAME        "\0.\0."
+#endif
+
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+/* DOS name stucture */
+typedef struct {
+	u8       name[DOS_NAME_LENGTH];
+	u8       name_case;
+} DOS_NAME_T;
+
+/* unicode name stucture */
+typedef struct {
+	u16      name[MAX_NAME_LENGTH];
+	u16      name_hash;
+	u8       name_len;
+} UNI_NAME_T;
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+/* NLS management function */
+u16 nls_upper(struct super_block *sb, u16 a);
+s32  nls_dosname_cmp(struct super_block *sb, u8 *a, u8 *b);
+s32  nls_uniname_cmp(struct super_block *sb, u16 *a, u16 *b);
+void   nls_uniname_to_dosname(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname, s32 *p_lossy);
+void   nls_dosname_to_uniname(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname);
+void   nls_uniname_to_cstring(struct super_block *sb, u8 *p_cstring, UNI_NAME_T *p_uniname);
+void   nls_cstring_to_uniname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 *p_lossy);
+
+#endif /* _EXFAT_NLS_H */
diff --git a/fs/exfat/exfat_oal.c b/fs/exfat/exfat_oal.c
new file mode 100644
index 000000000..743544244
--- /dev/null
+++ b/fs/exfat/exfat_oal.c
@@ -0,0 +1,196 @@
+/* Some of the source code in this file came from "linux/fs/fat/misc.c".  */
+/*
+ *  linux/fs/fat/misc.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *  22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980
+ *         and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
+ */
+
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_oal.c                                               */
+/*  PURPOSE : exFAT OS Adaptation Layer                                 */
+/*            (Semaphore Functions & Real-Time Clock Functions)         */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include <linux/semaphore.h>
+#include <linux/time.h>
+
+#include "exfat_config.h"
+#include "exfat_api.h"
+#include "exfat_oal.h"
+
+/*======================================================================*/
+/*                                                                      */
+/*            SEMAPHORE FUNCTIONS                                       */
+/*                                                                      */
+/*======================================================================*/
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+DECLARE_MUTEX(z_sem);
+#else
+DEFINE_SEMAPHORE(z_sem);
+#endif
+
+s32 sm_init(struct semaphore *sm)
+{
+	sema_init(sm, 1);
+	return 0;
+} /* end of sm_init */
+
+s32 sm_P(struct semaphore *sm)
+{
+	down(sm);
+	return 0;
+} /* end of sm_P */
+
+void sm_V(struct semaphore *sm)
+{
+	up(sm);
+} /* end of sm_V */
+
+
+/*======================================================================*/
+/*                                                                      */
+/*            REAL-TIME CLOCK FUNCTIONS                                 */
+/*                                                                      */
+/*======================================================================*/
+
+extern struct timezone sys_tz;
+
+/*
+ * The epoch of FAT timestamp is 1980.
+ *     :  bits  : value
+ * date:  0 -  4: day    (1 -  31)
+ * date:  5 -  8: month  (1 -  12)
+ * date:  9 - 15: year   (0 - 127) from 1980
+ * time:  0 -  4: sec    (0 -  29) 2sec counts
+ * time:  5 - 10: min    (0 -  59)
+ * time: 11 - 15: hour   (0 -  23)
+ */
+#define UNIX_SECS_1980   315532800L
+
+#if BITS_PER_LONG == 64
+#define UNIX_SECS_2108   4354819200L
+#endif
+/* days between 1.1.70 and 1.1.80 (2 leap days) */
+#define DAYS_DELTA_DECADE	(365 * 10 + 2)
+/* 120 (2100 - 1980) isn't leap year */
+#define NO_LEAP_YEAR_2100    (120)
+#define IS_LEAP_YEAR(y)  (!((y) & 3) && (y) != NO_LEAP_YEAR_2100)
+
+#define SECS_PER_MIN     (60)
+#define SECS_PER_HOUR    (60 * SECS_PER_MIN)
+#define SECS_PER_DAY     (24 * SECS_PER_HOUR)
+
+#define MAKE_LEAP_YEAR(leap_year, year)                         \
+	do {                                                    \
+		if (unlikely(year > NO_LEAP_YEAR_2100))         \
+			leap_year = ((year + 3) / 4) - 1;       \
+		else                                            \
+			leap_year = ((year + 3) / 4);           \
+	} while (0)
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+static time_t accum_days_in_year[] = {
+	/* Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec */
+	0,   0,  31,  59,  90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
+};
+
+TIMESTAMP_T *tm_current(TIMESTAMP_T *tp)
+{
+	struct timespec ts;
+	time_t second, day, leap_day, month, year;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
+	ts = CURRENT_TIME_SEC;
+#else
+	ktime_get_real_ts(&ts);
+#endif
+
+	second = ts.tv_sec;
+	second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
+
+	/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
+	if (second < UNIX_SECS_1980) {
+		tp->sec  = 0;
+		tp->min  = 0;
+		tp->hour = 0;
+		tp->day  = 1;
+		tp->mon  = 1;
+		tp->year = 0;
+		return tp;
+	}
+#if BITS_PER_LONG == 64
+	if (second >= UNIX_SECS_2108) {
+		tp->sec  = 59;
+		tp->min  = 59;
+		tp->hour = 23;
+		tp->day  = 31;
+		tp->mon  = 12;
+		tp->year = 127;
+		return tp;
+	}
+#endif
+
+	day = second / SECS_PER_DAY - DAYS_DELTA_DECADE;
+	year = day / 365;
+
+	MAKE_LEAP_YEAR(leap_day, year);
+	if (year * 365 + leap_day > day)
+		year--;
+
+	MAKE_LEAP_YEAR(leap_day, year);
+
+	day -= year * 365 + leap_day;
+
+	if (IS_LEAP_YEAR(year) && day == accum_days_in_year[3]) {
+		month = 2;
+	} else {
+		if (IS_LEAP_YEAR(year) && day > accum_days_in_year[3])
+			day--;
+		for (month = 1; month < 12; month++) {
+			if (accum_days_in_year[month + 1] > day)
+				break;
+		}
+	}
+	day -= accum_days_in_year[month];
+
+	tp->sec  = second % SECS_PER_MIN;
+	tp->min  = (second / SECS_PER_MIN) % 60;
+	tp->hour = (second / SECS_PER_HOUR) % 24;
+	tp->day  = day + 1;
+	tp->mon  = month;
+	tp->year = year;
+
+	return tp;
+} /* end of tm_current */
diff --git a/fs/exfat/exfat_oal.h b/fs/exfat/exfat_oal.h
new file mode 100644
index 000000000..b6dd7897a
--- /dev/null
+++ b/fs/exfat/exfat_oal.h
@@ -0,0 +1,74 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_oal.h                                               */
+/*  PURPOSE : Header File for exFAT OS Adaptation Layer                 */
+/*            (Semaphore Functions & Real-Time Clock Functions)         */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef _EXFAT_OAL_H
+#define _EXFAT_OAL_H
+
+#include <linux/semaphore.h>
+#include "exfat_config.h"
+#include <linux/version.h>
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions (Configurable)                         */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Constant & Macro Definitions (Non-Configurable)                     */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*  Type Definitions                                                    */
+/*----------------------------------------------------------------------*/
+
+typedef struct {
+	u16      sec;        /* 0 ~ 59               */
+	u16      min;        /* 0 ~ 59               */
+	u16      hour;       /* 0 ~ 23               */
+	u16      day;        /* 1 ~ 31               */
+	u16      mon;        /* 1 ~ 12               */
+	u16      year;       /* 0 ~ 127 (since 1980) */
+} TIMESTAMP_T;
+
+/*----------------------------------------------------------------------*/
+/*  External Function Declarations                                      */
+/*----------------------------------------------------------------------*/
+
+s32 sm_init(struct semaphore *sm);
+s32 sm_P(struct semaphore *sm);
+void  sm_V(struct semaphore *sm);
+
+TIMESTAMP_T *tm_current(TIMESTAMP_T *tm);
+
+#endif /* _EXFAT_OAL_H */
diff --git a/fs/exfat/exfat_super.c b/fs/exfat/exfat_super.c
new file mode 100644
index 000000000..656991645
--- /dev/null
+++ b/fs/exfat/exfat_super.c
@@ -0,0 +1,2711 @@
+/* Some of the source code in this file came from "linux/fs/fat/file.c","linux/fs/fat/inode.c" and "linux/fs/fat/misc.c".  */
+/*
+ *  linux/fs/fat/file.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *
+ *  regular file handling primitives for fat-based filesystems
+ */
+
+/*
+ *  linux/fs/fat/inode.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *  VFAT extensions by Gordon Chaffee, merged with msdos fs by Henrik Storner
+ *  Rewritten for the constant inumbers support by Al Viro
+ *
+ *  Fixes:
+ *
+ *    Max Cohan: Fixed invalid FSINFO offset when info_sector is 0
+ */
+
+/*
+ *  linux/fs/fat/misc.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *  22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980
+ *         and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
+ */
+
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+#include <linux/smp_lock.h>
+#endif
+#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/mpage.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/mount.h>
+#include <linux/vfs.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
+#include <linux/aio.h>
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#include <linux/iversion.h>
+#endif
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
+#include <linux/backing-dev.h>
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <asm/current.h>
+#include <asm/unaligned.h>
+
+#include "exfat_version.h"
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+
+#include "exfat_blkdev.h"
+#include "exfat_cache.h"
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_core.h"
+
+#include "exfat_super.h"
+
+static struct kmem_cache *exfat_inode_cachep;
+
+static int exfat_default_codepage = CONFIG_EXFAT_DEFAULT_CODEPAGE;
+static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET;
+
+extern struct timezone sys_tz;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
+#define current_time(x)	(CURRENT_TIME_SEC)
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
+#define USE_NEW_IVERSION_API
+#define INC_IVERSION(x) (inode_inc_iversion(x))
+#define GET_IVERSION(x) (inode_peek_iversion_raw(x))
+#define SET_IVERSION(x,y) (inode_set_iversion(x, y))
+#else
+#define INC_IVERSION(x) (x->i_version++)
+#define GET_IVERSION(x) (x->i_version)
+#define SET_IVERSION(x,y) (x->i_version = y)
+#endif
+
+#define CHECK_ERR(x)	BUG_ON(x)
+
+#define UNIX_SECS_1980    315532800L
+
+#if BITS_PER_LONG == 64
+#define UNIX_SECS_2108    4354819200L
+#endif
+/* days between 1.1.70 and 1.1.80 (2 leap days) */
+#define DAYS_DELTA_DECADE    (365 * 10 + 2)
+/* 120 (2100 - 1980) isn't leap year */
+#define NO_LEAP_YEAR_2100    (120)
+#define IS_LEAP_YEAR(y)    (!((y) & 0x3) && (y) != NO_LEAP_YEAR_2100)
+
+#define SECS_PER_MIN    (60)
+#define SECS_PER_HOUR   (60 * SECS_PER_MIN)
+#define SECS_PER_DAY    (24 * SECS_PER_HOUR)
+
+#define MAKE_LEAP_YEAR(leap_year, year)                         \
+	do {                                                    \
+		if (unlikely(year > NO_LEAP_YEAR_2100))         \
+			leap_year = ((year + 3) / 4) - 1;       \
+		else                                            \
+			leap_year = ((year + 3) / 4);           \
+	} while (0)
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+static time_t accum_days_in_year[] = {
+	/* Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec */
+	0,   0,  31,  59,  90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
+};
+
+static void _exfat_truncate(struct inode *inode, loff_t old_size);
+
+/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
+void exfat_time_fat2unix(struct exfat_sb_info *sbi, struct timespec *ts,
+						 DATE_TIME_T *tp)
+{
+	time_t year = tp->Year;
+	time_t ld;
+
+	MAKE_LEAP_YEAR(ld, year);
+
+	if (IS_LEAP_YEAR(year) && (tp->Month) > 2)
+		ld++;
+
+	ts->tv_sec =  tp->Second  + tp->Minute * SECS_PER_MIN
+				  + tp->Hour * SECS_PER_HOUR
+				  + (year * 365 + ld + accum_days_in_year[(tp->Month)] + (tp->Day - 1) + DAYS_DELTA_DECADE) * SECS_PER_DAY
+				  + sys_tz.tz_minuteswest * SECS_PER_MIN;
+	ts->tv_nsec = 0;
+}
+
+/* Convert linear UNIX date to a FAT time/date pair. */
+void exfat_time_unix2fat(struct exfat_sb_info *sbi, struct timespec *ts,
+						 DATE_TIME_T *tp)
+{
+	time_t second = ts->tv_sec;
+	time_t day, month, year;
+	time_t ld;
+
+	second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
+
+	/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
+	if (second < UNIX_SECS_1980) {
+		tp->Second  = 0;
+		tp->Minute  = 0;
+		tp->Hour = 0;
+		tp->Day  = 1;
+		tp->Month  = 1;
+		tp->Year = 0;
+		return;
+	}
+#if (BITS_PER_LONG == 64)
+	if (second >= UNIX_SECS_2108) {
+		tp->Second  = 59;
+		tp->Minute  = 59;
+		tp->Hour = 23;
+		tp->Day  = 31;
+		tp->Month  = 12;
+		tp->Year = 127;
+		return;
+	}
+#endif
+	day = second / SECS_PER_DAY - DAYS_DELTA_DECADE;
+	year = day / 365;
+	MAKE_LEAP_YEAR(ld, year);
+	if (year * 365 + ld > day)
+		year--;
+
+	MAKE_LEAP_YEAR(ld, year);
+	day -= year * 365 + ld;
+
+	if (IS_LEAP_YEAR(year) && day == accum_days_in_year[3]) {
+		month = 2;
+	} else {
+		if (IS_LEAP_YEAR(year) && day > accum_days_in_year[3])
+			day--;
+		for (month = 1; month < 12; month++) {
+			if (accum_days_in_year[month + 1] > day)
+				break;
+		}
+	}
+	day -= accum_days_in_year[month];
+
+	tp->Second  = second % SECS_PER_MIN;
+	tp->Minute  = (second / SECS_PER_MIN) % 60;
+	tp->Hour = (second / SECS_PER_HOUR) % 24;
+	tp->Day  = day + 1;
+	tp->Month  = month;
+	tp->Year = year;
+}
+
+static struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+static int exfat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static long exfat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+static int exfat_sync_inode(struct inode *inode);
+static struct inode *exfat_build_inode(struct super_block *sb, FILE_ID_T *fid, loff_t i_pos);
+static void exfat_detach(struct inode *inode);
+static void exfat_attach(struct inode *inode, loff_t i_pos);
+static inline unsigned long exfat_hash(loff_t i_pos);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
+static int exfat_write_inode(struct inode *inode, int wait);
+#else
+static int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
+#endif
+static void exfat_write_super(struct super_block *sb);
+
+static void __lock_super(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	lock_super(sb);
+#else
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	mutex_lock(&sbi->s_lock);
+#endif
+}
+
+static void __unlock_super(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	unlock_super(sb);
+#else
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	mutex_unlock(&sbi->s_lock);
+#endif
+}
+
+static int __is_sb_dirty(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	return sb->s_dirt;
+#else
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	return sbi->s_dirt;
+#endif
+}
+
+static void __set_sb_clean(struct super_block *sb)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	sb->s_dirt = 0;
+#else
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	sbi->s_dirt = 0;
+#endif
+}
+
+static int __exfat_revalidate(struct dentry *dentry)
+{
+	return 0;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+static int exfat_revalidate(struct dentry *dentry, unsigned int flags)
+#else
+static int exfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,00)
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+#endif
+
+	if (dentry->d_inode)
+		return 1;
+	return __exfat_revalidate(dentry);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+static int exfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+#else
+static int exfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+#else
+	unsigned int flags;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,00)
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+#endif
+
+	flags = nd ? nd->flags : 0;
+#endif
+
+	if (dentry->d_inode)
+		return 1;
+
+	if (!flags)
+		return 0;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,00)
+	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+		return 0;
+#else
+	if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
+		if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+			return 0;
+	}
+#endif
+
+	return __exfat_revalidate(dentry);
+}
+
+static unsigned int __exfat_striptail_len(unsigned int len, const char *name)
+{
+	while (len && name[len - 1] == '.')
+		len--;
+	return len;
+}
+
+static unsigned int exfat_striptail_len(const struct qstr *qstr)
+{
+	return __exfat_striptail_len(qstr->len, qstr->name);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+static int exfat_d_hash(const struct dentry *dentry, struct qstr *qstr)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+static int exfat_d_hash(struct dentry *dentry, struct qstr *qstr)
+#else
+static int exfat_d_hash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+	qstr->hash = full_name_hash(dentry, qstr->name, exfat_striptail_len(qstr));
+#else
+	qstr->hash = full_name_hash(qstr->name, exfat_striptail_len(qstr));
+#endif
+	return 0;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+static int exfat_d_hashi(const struct dentry *dentry, struct qstr *qstr)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+static int exfat_d_hashi(struct dentry *dentry, struct qstr *qstr)
+#else
+static int exfat_d_hashi(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
+#endif
+{
+	struct super_block *sb = dentry->d_sb;
+	const unsigned char *name;
+	unsigned int len;
+	unsigned long hash;
+
+	name = qstr->name;
+	len = exfat_striptail_len(qstr);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+	hash = init_name_hash(dentry);
+#else
+	hash = init_name_hash();
+#endif
+	while (len--)
+		hash = partial_name_hash(nls_upper(sb, *name++), hash);
+	qstr->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+static int exfat_cmpi(const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+static int exfat_cmpi(const struct dentry *parent, const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+static int exfat_cmpi(struct dentry *parent, struct qstr *a, struct qstr *b)
+#else
+static int exfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+	struct nls_table *t = EXFAT_SB(dentry->d_sb)->nls_io;
+#else
+	struct nls_table *t = EXFAT_SB(parent->d_sb)->nls_io;
+#endif
+	unsigned int alen, blen;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+	alen = exfat_striptail_len(a);
+	blen = exfat_striptail_len(b);
+#else
+	alen = exfat_striptail_len(name);
+	blen = __exfat_striptail_len(len, str);
+#endif
+	if (alen == blen) {
+		if (t == NULL) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+			if (strncasecmp(a->name, b->name, alen) == 0)
+#else
+			if (strncasecmp(name->name, str, alen) == 0)
+#endif
+				return 0;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+		} else if (nls_strnicmp(t, a->name, b->name, alen) == 0)
+#else
+		} else if (nls_strnicmp(t, name->name, str, alen) == 0)
+#endif
+			return 0;
+	}
+	return 1;
+}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+static int exfat_cmp(const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+static int exfat_cmp(const struct dentry *parent, const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+static int exfat_cmp(struct dentry *parent, struct qstr *a,
+			struct qstr *b)
+#else
+static int exfat_cmp(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
+#endif
+{
+	unsigned int alen, blen;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+	alen = exfat_striptail_len(a);
+	blen = exfat_striptail_len(b);
+#else
+	alen = exfat_striptail_len(name);
+	blen = __exfat_striptail_len(len, str);
+#endif
+	if (alen == blen) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+		if (strncmp(a->name, b->name, alen) == 0)
+#else
+		if (strncmp(name->name, str, alen) == 0)
+#endif
+			return 0;
+	}
+	return 1;
+}
+
+static const struct dentry_operations exfat_ci_dentry_ops = {
+	.d_revalidate   = exfat_revalidate_ci,
+	.d_hash         = exfat_d_hashi,
+	.d_compare      = exfat_cmpi,
+};
+
+static const struct dentry_operations exfat_dentry_ops = {
+	.d_revalidate   = exfat_revalidate,
+	.d_hash         = exfat_d_hash,
+	.d_compare      = exfat_cmp,
+};
+
+/*======================================================================*/
+/*  Directory Entry Operations                                          */
+/*======================================================================*/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+static int exfat_readdir(struct file *filp, struct dir_context *ctx)
+#else
+static int exfat_readdir(struct file *filp, void *dirent, filldir_t filldir)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
+	struct inode *inode = file_inode(filp);
+#else
+	struct inode *inode = filp->f_path.dentry->d_inode;
+#endif
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+	BD_INFO_T *p_bd = &(EXFAT_SB(sb)->bd_info);
+	DIR_ENTRY_T de;
+	unsigned long inum;
+	loff_t cpos;
+	int err = 0;
+
+	__lock_super(sb);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	cpos = ctx->pos;
+#else
+	cpos = filp->f_pos;
+#endif
+	/* Fake . and .. for the root directory. */
+	if ((p_fs->vol_type == EXFAT) || (inode->i_ino == EXFAT_ROOT_INO)) {
+		while (cpos < 2) {
+			if (inode->i_ino == EXFAT_ROOT_INO)
+				inum = EXFAT_ROOT_INO;
+			else if (cpos == 0)
+				inum = inode->i_ino;
+			else /* (cpos == 1) */
+				inum = parent_ino(filp->f_path.dentry);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+			if (!dir_emit_dots(filp, ctx))
+#else
+			if (filldir(dirent, "..", cpos+1, cpos, inum, DT_DIR) < 0)
+#endif
+				goto out;
+			cpos++;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+			ctx->pos++;
+#else
+			filp->f_pos++;
+#endif
+		}
+		if (cpos == 2)
+			cpos = 0;
+	}
+	if (cpos & (DENTRY_SIZE - 1)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+get_new:
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+	EXFAT_I(inode)->fid.rwoffset = cpos >> DENTRY_SIZE_BITS;
+
+	err = FsReadDir(inode, &de);
+	if (err) {
+		/* at least we tried to read a sector
+		 * move cpos to next sector position (should be aligned)
+		 */
+		if (err == FFS_MEDIAERR) {
+			cpos += 1 << p_bd->sector_size_bits;
+			cpos &= ~((1 << p_bd->sector_size_bits)-1);
+		}
+
+		err = -EIO;
+		goto end_of_dir;
+	}
+
+	cpos = EXFAT_I(inode)->fid.rwoffset << DENTRY_SIZE_BITS;
+
+	if (!de.Name[0])
+		goto end_of_dir;
+
+	if (!memcmp(de.ShortName, DOS_CUR_DIR_NAME, DOS_NAME_LENGTH)) {
+		inum = inode->i_ino;
+	} else if (!memcmp(de.ShortName, DOS_PAR_DIR_NAME, DOS_NAME_LENGTH)) {
+		inum = parent_ino(filp->f_path.dentry);
+	} else {
+		loff_t i_pos = ((loff_t) EXFAT_I(inode)->fid.start_clu << 32) |
+					   ((EXFAT_I(inode)->fid.rwoffset-1) & 0xffffffff);
+
+		struct inode *tmp = exfat_iget(sb, i_pos);
+		if (tmp) {
+			inum = tmp->i_ino;
+			iput(tmp);
+		} else {
+			inum = iunique(sb, EXFAT_ROOT_INO);
+		}
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	if (!dir_emit(ctx, de.Name, strlen(de.Name), inum,
+		(de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
+#else
+	if (filldir(dirent, de.Name, strlen(de.Name), cpos-1, inum,
+				(de.Attr & ATTR_SUBDIR) ? DT_DIR : DT_REG) < 0)
+#endif
+		goto out;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	ctx->pos = cpos;
+#else
+	filp->f_pos = cpos;
+#endif
+	goto get_new;
+
+end_of_dir:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	ctx->pos = cpos;
+#else
+	filp->f_pos = cpos;
+#endif
+out:
+	__unlock_super(sb);
+	return err;
+}
+
+static int exfat_ioctl_volume_id(struct inode *dir)
+{
+	struct super_block *sb = dir->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+
+	return p_fs->vol_id;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+static int exfat_generic_ioctl(struct inode *inode, struct file *filp,
+							   unsigned int cmd, unsigned long arg)
+#else
+static long exfat_generic_ioctl(struct file *filp,
+								unsigned int cmd, unsigned long arg)
+#endif
+{
+#if !(LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36))
+    #if !(LINUX_VERSION_CODE < KERNEL_VERSION(3,18,3))
+		  struct inode *inode = filp->f_path.dentry->d_inode;
+    #else
+		  struct inode *inode = filp->f_dentry->d_inode;
+	#endif
+#endif
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	unsigned int flags;
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+	switch (cmd) {
+	case EXFAT_IOCTL_GET_VOLUME_ID:
+		return exfat_ioctl_volume_id(inode);
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	case EXFAT_IOC_GET_DEBUGFLAGS: {
+		struct super_block *sb = inode->i_sb;
+		struct exfat_sb_info *sbi = EXFAT_SB(sb);
+
+		flags = sbi->debug_flags;
+		return put_user(flags, (int __user *)arg);
+	}
+	case EXFAT_IOC_SET_DEBUGFLAGS: {
+		struct super_block *sb = inode->i_sb;
+		struct exfat_sb_info *sbi = EXFAT_SB(sb);
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		__lock_super(sb);
+		sbi->debug_flags = flags;
+		__unlock_super(sb);
+
+		return 0;
+	}
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+	default:
+		return -ENOTTY; /* Inappropriate ioctl for device */
+	}
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
+static int exfat_file_fsync(struct file *filp, struct dentry *dentry,
+				int datasync)
+#else
+static int exfat_file_fsync(struct file *filp, int datasync)
+#endif
+{
+	struct inode *inode = filp->f_mapping->host;
+	struct super_block *sb = inode->i_sb;
+	int res, err;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
+	res = simple_fsync(filp, dentry, datasync);
+#else
+	res = generic_file_fsync(filp, datasync);
+#endif
+	err = FsSyncVol(sb, 1);
+
+	return res ? res : err;
+}
+#endif
+
+const struct file_operations exfat_dir_operations = {
+	.llseek     = generic_file_llseek,
+	.read       = generic_read_dir,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	.iterate    = exfat_readdir,
+#else
+	.readdir    = exfat_readdir,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	.ioctl      = exfat_generic_ioctl,
+	.fsync      = exfat_file_fsync,
+#else
+	.unlocked_ioctl = exfat_generic_ioctl,
+	.fsync      = generic_file_fsync,
+#endif
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+						bool excl)
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+						struct nameidata *nd)
+#else
+static int exfat_create(struct inode *dir, struct dentry *dentry, int mode,
+						struct nameidata *nd)
+#endif
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+	FILE_ID_T fid;
+	loff_t i_pos;
+	int err;
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_create entered\n");
+
+	err = FsCreateFile(dir, (u8 *) dentry->d_name.name, FM_REGULAR, &fid);
+	if (err) {
+		if (err == FFS_INVALIDPATH)
+			err = -EINVAL;
+		else if (err == FFS_FILEEXIST)
+			err = -EEXIST;
+		else if (err == FFS_FULL)
+			err = -ENOSPC;
+		else if (err == FFS_NAMETOOLONG)
+			err = -ENAMETOOLONG;
+		else
+			err = -EIO;
+		goto out;
+	}
+	INC_IVERSION(dir);
+	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+	if (IS_DIRSYNC(dir))
+		(void) exfat_sync_inode(dir);
+	else
+		mark_inode_dirty(dir);
+
+	i_pos = ((loff_t) fid.dir.dir << 32) | (fid.entry & 0xffffffff);
+
+	inode = exfat_build_inode(sb, &fid, i_pos);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out;
+	}
+	INC_IVERSION(inode);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	/* timestamp is already written, so mark_inode_dirty() is unnecessary. */
+
+	dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+	d_instantiate(dentry, inode);
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_create exited\n");
+	return err;
+}
+
+static int exfat_find(struct inode *dir, struct qstr *qname,
+					  FILE_ID_T *fid)
+{
+	int err;
+
+	if (qname->len == 0)
+		return -ENOENT;
+
+	err = FsLookupFile(dir, (u8 *) qname->name, fid);
+	if (err)
+		return -ENOENT;
+
+	return 0;
+}
+
+static int exfat_d_anon_disconn(struct dentry *dentry)
+{
+	return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
+						   unsigned int flags)
+#else
+static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
+						   struct nameidata *nd)
+#endif
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+	struct dentry *alias;
+	int err;
+	FILE_ID_T fid;
+	loff_t i_pos;
+	u64 ret;
+	mode_t i_mode;
+
+	__lock_super(sb);
+	DPRINTK("exfat_lookup entered\n");
+	err = exfat_find(dir, &dentry->d_name, &fid);
+	if (err) {
+		if (err == -ENOENT) {
+			inode = NULL;
+			goto out;
+		}
+		goto error;
+	}
+
+	i_pos = ((loff_t) fid.dir.dir << 32) | (fid.entry & 0xffffffff);
+	inode = exfat_build_inode(sb, &fid, i_pos);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto error;
+	}
+
+	i_mode = inode->i_mode;
+	if (S_ISLNK(i_mode) && !EXFAT_I(inode)->target) {
+		EXFAT_I(inode)->target = kmalloc(i_size_read(inode)+1, GFP_KERNEL);
+		if (!EXFAT_I(inode)->target) {
+			err = -ENOMEM;
+			goto error;
+		}
+		FsReadFile(dir, &fid, EXFAT_I(inode)->target, i_size_read(inode), &ret);
+		*(EXFAT_I(inode)->target + i_size_read(inode)) = '\0';
+	}
+
+	alias = d_find_alias(inode);
+	if (alias && !exfat_d_anon_disconn(alias)) {
+		CHECK_ERR(d_unhashed(alias));
+		if (!S_ISDIR(i_mode))
+			d_move(alias, dentry);
+		iput(inode);
+		__unlock_super(sb);
+		DPRINTK("exfat_lookup exited 1\n");
+		return alias;
+	} else {
+		dput(alias);
+	}
+out:
+	__unlock_super(sb);
+	dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
+	dentry->d_op = sb->s_root->d_op;
+	dentry = d_splice_alias(inode, dentry);
+	if (dentry) {
+		dentry->d_op = sb->s_root->d_op;
+		dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+	}
+#else
+	dentry = d_splice_alias(inode, dentry);
+	if (dentry)
+		dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+#endif
+	DPRINTK("exfat_lookup exited 2\n");
+	return dentry;
+
+error:
+	__unlock_super(sb);
+	DPRINTK("exfat_lookup exited 3\n");
+	return ERR_PTR(err);
+}
+
+static int exfat_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb = dir->i_sb;
+	int err;
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_unlink entered\n");
+
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+
+	err = FsRemoveFile(dir, &(EXFAT_I(inode)->fid));
+	if (err) {
+		if (err == FFS_PERMISSIONERR)
+			err = -EPERM;
+		else
+			err = -EIO;
+		goto out;
+	}
+	INC_IVERSION(dir);
+	dir->i_mtime = dir->i_atime = current_time(dir);
+	if (IS_DIRSYNC(dir))
+		(void) exfat_sync_inode(dir);
+	else
+		mark_inode_dirty(dir);
+
+	clear_nlink(inode);
+	inode->i_mtime = inode->i_atime = current_time(inode);
+	exfat_detach(inode);
+	remove_inode_hash(inode);
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_unlink exited\n");
+	return err;
+}
+
+static int exfat_symlink(struct inode *dir, struct dentry *dentry, const char *target)
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+	FILE_ID_T fid;
+	loff_t i_pos;
+	int err;
+	u64 len = (u64) strlen(target);
+	u64 ret;
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_symlink entered\n");
+
+	err = FsCreateFile(dir, (u8 *) dentry->d_name.name, FM_SYMLINK, &fid);
+	if (err) {
+		if (err == FFS_INVALIDPATH)
+			err = -EINVAL;
+		else if (err == FFS_FILEEXIST)
+			err = -EEXIST;
+		else if (err == FFS_FULL)
+			err = -ENOSPC;
+		else
+			err = -EIO;
+		goto out;
+	}
+
+	err = FsWriteFile(dir, &fid, (char *) target, len, &ret);
+
+	if (err) {
+		FsRemoveFile(dir, &fid);
+
+		if (err == FFS_FULL)
+			err = -ENOSPC;
+		else
+			err = -EIO;
+		goto out;
+	}
+
+	INC_IVERSION(dir);
+	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+	if (IS_DIRSYNC(dir))
+		(void) exfat_sync_inode(dir);
+	else
+		mark_inode_dirty(dir);
+
+	i_pos = ((loff_t) fid.dir.dir << 32) | (fid.entry & 0xffffffff);
+
+	inode = exfat_build_inode(sb, &fid, i_pos);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out;
+	}
+	INC_IVERSION(inode);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+	EXFAT_I(inode)->target = kmalloc(len+1, GFP_KERNEL);
+	if (!EXFAT_I(inode)->target) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memcpy(EXFAT_I(inode)->target, target, len+1);
+
+	dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+	d_instantiate(dentry, inode);
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_symlink exited\n");
+	return err;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+#else
+static int exfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+#endif
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+	FILE_ID_T fid;
+	loff_t i_pos;
+	int err;
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_mkdir entered\n");
+
+	err = FsCreateDir(dir, (u8 *) dentry->d_name.name, &fid);
+	if (err) {
+		if (err == FFS_INVALIDPATH)
+			err = -EINVAL;
+		else if (err == FFS_FILEEXIST)
+			err = -EEXIST;
+		else if (err == FFS_FULL)
+			err = -ENOSPC;
+		else if (err == FFS_NAMETOOLONG)
+			err = -ENAMETOOLONG;
+		else
+			err = -EIO;
+		goto out;
+	}
+	INC_IVERSION(dir);
+	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+	if (IS_DIRSYNC(dir))
+		(void) exfat_sync_inode(dir);
+	else
+		mark_inode_dirty(dir);
+	inc_nlink(dir);
+
+	i_pos = ((loff_t) fid.dir.dir << 32) | (fid.entry & 0xffffffff);
+
+	inode = exfat_build_inode(sb, &fid, i_pos);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out;
+	}
+	INC_IVERSION(inode);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
+
+	dentry->d_time = GET_IVERSION(dentry->d_parent->d_inode);
+	d_instantiate(dentry, inode);
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_mkdir exited\n");
+	return err;
+}
+
+static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb = dir->i_sb;
+	int err;
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_rmdir entered\n");
+
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+
+	err = FsRemoveDir(dir, &(EXFAT_I(inode)->fid));
+	if (err) {
+		if (err == FFS_INVALIDPATH)
+			err = -EINVAL;
+		else if (err == FFS_FILEEXIST)
+			err = -ENOTEMPTY;
+		else if (err == FFS_NOTFOUND)
+			err = -ENOENT;
+		else if (err == FFS_DIRBUSY)
+			err = -EBUSY;
+		else
+			err = -EIO;
+		goto out;
+	}
+	INC_IVERSION(dir);
+	dir->i_mtime = dir->i_atime = current_time(dir);
+	if (IS_DIRSYNC(dir))
+		(void) exfat_sync_inode(dir);
+	else
+		mark_inode_dirty(dir);
+	drop_nlink(dir);
+
+	clear_nlink(inode);
+	inode->i_mtime = inode->i_atime = current_time(inode);
+	exfat_detach(inode);
+	remove_inode_hash(inode);
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_rmdir exited\n");
+	return err;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0)
+static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+						struct inode *new_dir, struct dentry *new_dentry,
+						unsigned int flags)
+#else
+static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry,
+						struct inode *new_dir, struct dentry *new_dentry)
+#endif
+{
+	struct inode *old_inode, *new_inode;
+	struct super_block *sb = old_dir->i_sb;
+	loff_t i_pos;
+	int err;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0)
+	if (flags)
+		return -EINVAL;
+#endif
+
+	__lock_super(sb);
+
+	DPRINTK("exfat_rename entered\n");
+
+	old_inode = old_dentry->d_inode;
+	new_inode = new_dentry->d_inode;
+
+	EXFAT_I(old_inode)->fid.size = i_size_read(old_inode);
+
+	err = FsMoveFile(old_dir, &(EXFAT_I(old_inode)->fid), new_dir, new_dentry);
+	if (err) {
+		if (err == FFS_PERMISSIONERR)
+			err = -EPERM;
+		else if (err == FFS_INVALIDPATH)
+			err = -EINVAL;
+		else if (err == FFS_FILEEXIST)
+			err = -EEXIST;
+		else if (err == FFS_NOTFOUND)
+			err = -ENOENT;
+		else if (err == FFS_FULL)
+			err = -ENOSPC;
+		else
+			err = -EIO;
+		goto out;
+	}
+	INC_IVERSION(new_dir);
+	new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = current_time(new_dir);
+	if (IS_DIRSYNC(new_dir))
+		(void) exfat_sync_inode(new_dir);
+	else
+		mark_inode_dirty(new_dir);
+
+	i_pos = ((loff_t) EXFAT_I(old_inode)->fid.dir.dir << 32) |
+			(EXFAT_I(old_inode)->fid.entry & 0xffffffff);
+
+	exfat_detach(old_inode);
+	exfat_attach(old_inode, i_pos);
+	if (IS_DIRSYNC(new_dir))
+		(void) exfat_sync_inode(old_inode);
+	else
+		mark_inode_dirty(old_inode);
+
+	if ((S_ISDIR(old_inode->i_mode)) && (old_dir != new_dir)) {
+		drop_nlink(old_dir);
+		if (!new_inode)
+			inc_nlink(new_dir);
+	}
+	INC_IVERSION(old_dir);
+	old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+	if (IS_DIRSYNC(old_dir))
+		(void) exfat_sync_inode(old_dir);
+	else
+		mark_inode_dirty(old_dir);
+
+	if (new_inode) {
+		exfat_detach(new_inode);
+		drop_nlink(new_inode);
+		if (S_ISDIR(new_inode->i_mode))
+			drop_nlink(new_inode);
+		new_inode->i_ctime = current_time(new_inode);
+	}
+
+out:
+	__unlock_super(sb);
+	DPRINTK("exfat_rename exited\n");
+	return err;
+}
+
+static int exfat_cont_expand(struct inode *inode, loff_t size)
+{
+	struct address_space *mapping = inode->i_mapping;
+	loff_t start = i_size_read(inode), count = size - i_size_read(inode);
+	int err, err2;
+
+	err = generic_cont_expand_simple(inode, size);
+	if (err != 0)
+		return err;
+
+	inode->i_ctime = inode->i_mtime = current_time(inode);
+	mark_inode_dirty(inode);
+
+	if (IS_SYNC(inode)) {
+		err = filemap_fdatawrite_range(mapping, start, start + count - 1);
+		err2 = sync_mapping_buffers(mapping);
+		err = (err) ? (err) : (err2);
+		err2 = write_inode_now(inode, 1);
+		err = (err) ? (err) : (err2);
+		if (!err)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+			err =  wait_on_page_writeback_range(mapping,
+					start >> PAGE_CACHE_SHIFT,
+					(start + count - 1) >> PAGE_CACHE_SHIFT);
+#else
+			err =  filemap_fdatawait_range(mapping, start, start + count - 1);
+#endif
+	}
+	return err;
+}
+
+static int exfat_allow_set_time(struct exfat_sb_info *sbi, struct inode *inode)
+{
+	mode_t allow_utime = sbi->options.allow_utime;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+	if (!uid_eq(current_fsuid(), inode->i_uid))
+#else
+	if (current_fsuid() != inode->i_uid)
+#endif
+	{
+		if (in_group_p(inode->i_gid))
+			allow_utime >>= 3;
+		if (allow_utime & MAY_WRITE)
+			return 1;
+	}
+
+	/* use a default check */
+	return 0;
+}
+
+static int exfat_sanitize_mode(const struct exfat_sb_info *sbi,
+							   struct inode *inode, umode_t *mode_ptr)
+{
+	mode_t i_mode, mask, perm;
+
+	i_mode = inode->i_mode;
+
+	if (S_ISREG(i_mode) || S_ISLNK(i_mode))
+		mask = sbi->options.fs_fmask;
+	else
+		mask = sbi->options.fs_dmask;
+
+	perm = *mode_ptr & ~(S_IFMT | mask);
+
+	/* Of the r and x bits, all (subject to umask) must be present.*/
+	if ((perm & (S_IRUGO | S_IXUGO)) != (i_mode & (S_IRUGO|S_IXUGO)))
+		return -EPERM;
+
+	if (exfat_mode_can_hold_ro(inode)) {
+		/* Of the w bits, either all (subject to umask) or none must be present. */
+		if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
+			return -EPERM;
+	} else {
+		/* If exfat_mode_can_hold_ro(inode) is false, can't change w bits. */
+		if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
+			return -EPERM;
+	}
+
+	*mode_ptr &= S_IFMT | perm;
+
+	return 0;
+}
+
+static int exfat_setattr(struct dentry *dentry, struct iattr *attr)
+{
+
+	struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb);
+	struct inode *inode = dentry->d_inode;
+	unsigned int ia_valid;
+	int error;
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,35)
+	loff_t old_size;
+#endif
+
+	DPRINTK("exfat_setattr entered\n");
+
+	if ((attr->ia_valid & ATTR_SIZE)
+		&& (attr->ia_size > i_size_read(inode))) {
+		error = exfat_cont_expand(inode, attr->ia_size);
+		if (error || attr->ia_valid == ATTR_SIZE)
+			return error;
+		attr->ia_valid &= ~ATTR_SIZE;
+	}
+
+	ia_valid = attr->ia_valid;
+
+	if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET))
+		&& exfat_allow_set_time(sbi, inode)) {
+		attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET);
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0)
+	error = setattr_prepare(dentry, attr);
+#else
+	error = inode_change_ok(inode, attr);
+#endif
+	attr->ia_valid = ia_valid;
+	if (error)
+		return error;
+
+	if (((attr->ia_valid & ATTR_UID) &&
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+		 (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
+		((attr->ia_valid & ATTR_GID) &&
+		 (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
+#else
+		 (attr->ia_uid != sbi->options.fs_uid)) ||
+		((attr->ia_valid & ATTR_GID) &&
+		 (attr->ia_gid != sbi->options.fs_gid)) ||
+#endif
+		((attr->ia_valid & ATTR_MODE) &&
+		 (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | S_IRWXUGO)))) {
+		return -EPERM;
+	}
+
+	/*
+	 * We don't return -EPERM here. Yes, strange, but this is too
+	 * old behavior.
+	 */
+	if (attr->ia_valid & ATTR_MODE) {
+		if (exfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
+			attr->ia_valid &= ~ATTR_MODE;
+	}
+
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	if (attr->ia_valid)
+		error = inode_setattr(inode, attr);
+#else
+	if (attr->ia_valid & ATTR_SIZE) {
+		old_size = i_size_read(inode);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,00)
+		down_write(&EXFAT_I(inode)->truncate_lock);
+		truncate_setsize(inode, attr->ia_size);
+		_exfat_truncate(inode, old_size);
+		up_write(&EXFAT_I(inode)->truncate_lock);
+#else
+		truncate_setsize(inode, attr->ia_size);
+		_exfat_truncate(inode, old_size);
+#endif
+	}
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+#endif
+
+	DPRINTK("exfat_setattr exited\n");
+	return error;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+static int exfat_getattr(const struct path *path, struct kstat *stat,
+			 u32 request_mask, unsigned int flags)
+{
+	struct inode *inode = path->dentry->d_inode;
+#else
+static int exfat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+#endif
+
+	DPRINTK("exfat_getattr entered\n");
+
+	generic_fillattr(inode, stat);
+	stat->blksize = EXFAT_SB(inode->i_sb)->fs_info.cluster_size;
+
+	DPRINTK("exfat_getattr exited\n");
+	return 0;
+}
+
+const struct inode_operations exfat_dir_inode_operations = {
+	.create        = exfat_create,
+	.lookup        = exfat_lookup,
+	.unlink        = exfat_unlink,
+	.symlink       = exfat_symlink,
+	.mkdir         = exfat_mkdir,
+	.rmdir         = exfat_rmdir,
+	.rename        = exfat_rename,
+	.setattr       = exfat_setattr,
+	.getattr       = exfat_getattr,
+};
+
+/*======================================================================*/
+/*  File Operations                                                     */
+/*======================================================================*/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
+static const char *exfat_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
+{
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	if (ei->target != NULL) {
+		char *cookie = ei->target;
+		if (cookie != NULL) {
+			return (char *)(ei->target);
+		}
+	}
+	return NULL;
+}
+#elif LINUX_VERSION_CODE > KERNEL_VERSION(4,1,0)
+static const char *exfat_follow_link(struct dentry *dentry, void **cookie)
+{
+	struct exfat_inode_info *ei = EXFAT_I(dentry->d_inode);
+	return *cookie = (char *)(ei->target);
+}
+#else
+static void *exfat_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct exfat_inode_info *ei = EXFAT_I(dentry->d_inode);
+	nd_set_link(nd, (char *)(ei->target));
+	return NULL;
+}
+#endif
+
+const struct inode_operations exfat_symlink_inode_operations = {
+	#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
+		.readlink    = generic_readlink,
+	#endif
+	#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
+		.follow_link = exfat_follow_link,
+	#endif
+	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
+		.get_link = exfat_get_link,
+	#endif
+};
+
+static int exfat_file_release(struct inode *inode, struct file *filp)
+{
+	struct super_block *sb = inode->i_sb;
+
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+	FsSyncVol(sb, 0);
+	return 0;
+}
+
+const struct file_operations exfat_file_operations = {
+	.llseek      = generic_file_llseek,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
+	.read        = do_sync_read,
+	.write       = do_sync_write,
+	.aio_read    = generic_file_aio_read,
+	.aio_write   = generic_file_aio_write,
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
+	.read        = new_sync_read,
+	.write       = new_sync_write,
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
+	.read_iter   = generic_file_read_iter,
+	.write_iter  = generic_file_write_iter,
+#endif
+	.mmap        = generic_file_mmap,
+	.release     = exfat_file_release,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	.ioctl       = exfat_generic_ioctl,
+	.fsync       = exfat_file_fsync,
+#else
+	.unlocked_ioctl  = exfat_generic_ioctl,
+	.fsync       = generic_file_fsync,
+#endif
+	.splice_read = generic_file_splice_read,
+};
+
+static void _exfat_truncate(struct inode *inode, loff_t old_size)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+	int err;
+
+	__lock_super(sb);
+
+	/*
+	 * This protects against truncating a file bigger than it was then
+	 * trying to write into the hole.
+	 */
+	if (EXFAT_I(inode)->mmu_private > i_size_read(inode))
+		EXFAT_I(inode)->mmu_private = i_size_read(inode);
+
+	if (EXFAT_I(inode)->fid.start_clu == 0)
+		goto out;
+
+	err = FsTruncateFile(inode, old_size, i_size_read(inode));
+	if (err)
+		goto out;
+
+	inode->i_ctime = inode->i_mtime = current_time(inode);
+	if (IS_DIRSYNC(inode))
+		(void) exfat_sync_inode(inode);
+	else
+		mark_inode_dirty(inode);
+
+	inode->i_blocks = ((i_size_read(inode) + (p_fs->cluster_size - 1))
+					   & ~((loff_t)p_fs->cluster_size - 1)) >> 9;
+out:
+	__unlock_super(sb);
+}
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,36)
+static void exfat_truncate(struct inode *inode)
+{
+	_exfat_truncate(inode, i_size_read(inode));
+}
+#endif
+
+const struct inode_operations exfat_file_inode_operations = {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,36)
+	.truncate    = exfat_truncate,
+#endif
+	.setattr     = exfat_setattr,
+	.getattr     = exfat_getattr,
+};
+
+/*======================================================================*/
+/*  Address Space Operations                                            */
+/*======================================================================*/
+
+static int exfat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+					  unsigned long *mapped_blocks, int *create)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+	BD_INFO_T *p_bd = &(sbi->bd_info);
+	const unsigned long blocksize = sb->s_blocksize;
+	const unsigned char blocksize_bits = sb->s_blocksize_bits;
+	sector_t last_block;
+	int err, clu_offset, sec_offset;
+	unsigned int cluster;
+
+	*phys = 0;
+	*mapped_blocks = 0;
+
+	if ((p_fs->vol_type == FAT12) || (p_fs->vol_type == FAT16)) {
+		if (inode->i_ino == EXFAT_ROOT_INO) {
+			if (sector < (p_fs->dentries_in_root >> (p_bd->sector_size_bits-DENTRY_SIZE_BITS))) {
+				*phys = sector + p_fs->root_start_sector;
+				*mapped_blocks = 1;
+			}
+			return 0;
+		}
+	}
+
+	last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
+	if (sector >= last_block) {
+		if (*create == 0)
+			return 0;
+	} else {
+		*create = 0;
+	}
+
+	clu_offset = sector >> p_fs->sectors_per_clu_bits;  /* cluster offset */
+	sec_offset = sector & (p_fs->sectors_per_clu - 1);  /* sector offset in cluster */
+
+	EXFAT_I(inode)->fid.size = i_size_read(inode);
+
+	err = FsMapCluster(inode, clu_offset, &cluster);
+
+	if (err) {
+		if (err == FFS_FULL)
+			return -ENOSPC;
+		else
+			return -EIO;
+	} else if (cluster != CLUSTER_32(~0)) {
+		*phys = START_SECTOR(cluster) + sec_offset;
+		*mapped_blocks = p_fs->sectors_per_clu - sec_offset;
+	}
+
+	return 0;
+}
+
+static int exfat_get_block(struct inode *inode, sector_t iblock,
+						   struct buffer_head *bh_result, int create)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+	int err;
+	unsigned long mapped_blocks;
+	sector_t phys;
+
+	__lock_super(sb);
+
+	err = exfat_bmap(inode, iblock, &phys, &mapped_blocks, &create);
+	if (err) {
+		__unlock_super(sb);
+		return err;
+	}
+
+	if (phys) {
+		max_blocks = min(mapped_blocks, max_blocks);
+		if (create) {
+			EXFAT_I(inode)->mmu_private += max_blocks << sb->s_blocksize_bits;
+			set_buffer_new(bh_result);
+		}
+		map_bh(bh_result, sb, phys);
+	}
+
+	bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+	__unlock_super(sb);
+
+	return 0;
+}
+
+static int exfat_readpage(struct file *file, struct page *page)
+{
+	int ret;
+	ret =  mpage_readpage(page, exfat_get_block);
+	return ret;
+}
+
+static int exfat_readpages(struct file *file, struct address_space *mapping,
+				   struct list_head *pages, unsigned nr_pages)
+{
+	int ret;
+	ret =  mpage_readpages(mapping, pages, nr_pages, exfat_get_block);
+	return ret;
+}
+
+static int exfat_writepage(struct page *page, struct writeback_control *wbc)
+{
+	int ret;
+	ret = block_write_full_page(page, exfat_get_block, wbc);
+	return ret;
+}
+
+static int exfat_writepages(struct address_space *mapping,
+						struct writeback_control *wbc)
+{
+	int ret;
+	ret = mpage_writepages(mapping, wbc, exfat_get_block);
+	return ret;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+static void exfat_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+	if (to > i_size_read(inode)) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,0)
+		truncate_pagecache(inode, i_size_read(inode));
+#else
+		truncate_pagecache(inode, to, i_size_read(inode));
+#endif
+		EXFAT_I(inode)->fid.size = i_size_read(inode);
+		_exfat_truncate(inode, i_size_read(inode));
+	}
+}
+#endif
+
+static int exfat_write_begin(struct file *file, struct address_space *mapping,
+				 loff_t pos, unsigned len, unsigned flags,
+					 struct page **pagep, void **fsdata)
+{
+	int ret;
+	*pagep = NULL;
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				   exfat_get_block,
+				   &EXFAT_I(mapping->host)->mmu_private);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+	if (ret < 0)
+		exfat_write_failed(mapping, pos+len);
+#endif
+	return ret;
+}
+
+static int exfat_write_end(struct file *file, struct address_space *mapping,
+				   loff_t pos, unsigned len, unsigned copied,
+					   struct page *pagep, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	FILE_ID_T *fid = &(EXFAT_I(inode)->fid);
+	int err;
+
+	err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+	if (err < len)
+		exfat_write_failed(mapping, pos+len);
+#endif
+
+	if (!(err < 0) && !(fid->attr & ATTR_ARCHIVE)) {
+		inode->i_mtime = inode->i_ctime = current_time(inode);
+		fid->attr |= ATTR_ARCHIVE;
+		mark_inode_dirty(inode);
+	}
+	return err;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
+#ifdef CONFIG_AIO_OPTIMIZATION
+static ssize_t exfat_direct_IO(int rw, struct kiocb *iocb,
+						struct iov_iter *iter, loff_t offset)
+#else
+static ssize_t exfat_direct_IO(int rw, struct kiocb *iocb,
+					   const struct iovec *iov,
+					   loff_t offset, unsigned long nr_segs)
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
+static ssize_t exfat_direct_IO(int rw, struct kiocb *iocb,
+					   struct iov_iter *iter, loff_t offset)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
+static ssize_t exfat_direct_IO(struct kiocb *iocb,
+					   struct iov_iter *iter, loff_t offset)
+#else /* >= 4.7.x */
+static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+#endif
+{
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+#endif
+	ssize_t ret;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0)
+	int rw;
+
+	rw = iov_iter_rw(iter);
+#endif
+
+	if (rw == WRITE) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
+#ifdef CONFIG_AIO_OPTIMIZATION
+		if (EXFAT_I(inode)->mmu_private <
+					(offset + iov_iter_count(iter)))
+#else
+		if (EXFAT_I(inode)->mmu_private < (offset + iov_length(iov, nr_segs)))
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
+		if (EXFAT_I(inode)->mmu_private < (offset + iov_iter_count(iter)))
+#else
+		if (EXFAT_I(inode)->mmu_private < iov_iter_count(iter))
+#endif
+			return 0;
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0)
+	ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
+	ret = blockdev_direct_IO(iocb, inode, iter,
+					offset, exfat_get_block);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
+	ret = blockdev_direct_IO(rw, iocb, inode, iter,
+					offset, exfat_get_block);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
+#ifdef CONFIG_AIO_OPTIMIZATION
+	ret = blockdev_direct_IO(rw, iocb, inode, iter,
+					offset, exfat_get_block);
+#else
+	ret = blockdev_direct_IO(rw, iocb, inode, iov,
+					offset, nr_segs, exfat_get_block);
+#endif
+#else
+        ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+					offset, nr_segs, exfat_get_block, NULL);
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0)
+	if ((ret < 0) && (rw & WRITE))
+		exfat_write_failed(mapping, iov_iter_count(iter));
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
+	if ((ret < 0) && (rw & WRITE))
+		exfat_write_failed(mapping, offset+iov_iter_count(iter));
+#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+	if ((ret < 0) && (rw & WRITE))
+#ifdef CONFIG_AIO_OPTIMIZATION
+		exfat_write_failed(mapping, offset+iov_iter_count(iter));
+#else
+		exfat_write_failed(mapping, offset+iov_length(iov, nr_segs));
+#endif
+#endif
+	return ret;
+}
+
+static sector_t _exfat_bmap(struct address_space *mapping, sector_t block)
+{
+	sector_t blocknr;
+
+	/* exfat_get_cluster() assumes the requested blocknr isn't truncated. */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,00)
+	down_read(&EXFAT_I(mapping->host)->truncate_lock);
+	blocknr = generic_block_bmap(mapping, block, exfat_get_block);
+	up_read(&EXFAT_I(mapping->host)->truncate_lock);
+#else
+	down_read(&EXFAT_I(mapping->host)->i_alloc_sem);
+	blocknr = generic_block_bmap(mapping, block, exfat_get_block);
+	up_read(&EXFAT_I(mapping->host)->i_alloc_sem);
+#endif
+
+	return blocknr;
+}
+
+const struct address_space_operations exfat_aops = {
+	.readpage    = exfat_readpage,
+	.readpages   = exfat_readpages,
+	.writepage   = exfat_writepage,
+	.writepages  = exfat_writepages,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
+	.sync_page   = block_sync_page,
+#endif
+	.write_begin = exfat_write_begin,
+	.write_end   = exfat_write_end,
+	.direct_IO   = exfat_direct_IO,
+	.bmap        = _exfat_bmap
+};
+
+/*======================================================================*/
+/*  Super Operations                                                    */
+/*======================================================================*/
+
+static inline unsigned long exfat_hash(loff_t i_pos)
+{
+	return hash_32(i_pos, EXFAT_HASH_BITS);
+}
+
+static struct inode *exfat_iget(struct super_block *sb, loff_t i_pos)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct exfat_inode_info *info;
+	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
+	struct inode *inode = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
+	struct hlist_node *node;
+
+	spin_lock(&sbi->inode_hash_lock);
+	hlist_for_each_entry(info, node, head, i_hash_fat) {
+#else
+	spin_lock(&sbi->inode_hash_lock);
+	hlist_for_each_entry(info, head, i_hash_fat) {
+#endif
+		CHECK_ERR(info->vfs_inode.i_sb != sb);
+
+		if (i_pos != info->i_pos)
+			continue;
+		inode = igrab(&info->vfs_inode);
+		if (inode)
+			break;
+	}
+	spin_unlock(&sbi->inode_hash_lock);
+	return inode;
+}
+
+static void exfat_attach(struct inode *inode, loff_t i_pos)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
+
+	spin_lock(&sbi->inode_hash_lock);
+	EXFAT_I(inode)->i_pos = i_pos;
+	hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head);
+	spin_unlock(&sbi->inode_hash_lock);
+}
+
+static void exfat_detach(struct inode *inode)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+
+	spin_lock(&sbi->inode_hash_lock);
+	hlist_del_init(&EXFAT_I(inode)->i_hash_fat);
+	EXFAT_I(inode)->i_pos = 0;
+	spin_unlock(&sbi->inode_hash_lock);
+}
+
+/* doesn't deal with root inode */
+static int exfat_fill_inode(struct inode *inode, FILE_ID_T *fid)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+	DIR_ENTRY_T info;
+
+	memcpy(&(EXFAT_I(inode)->fid), fid, sizeof(FILE_ID_T));
+
+	FsReadStat(inode, &info);
+
+	EXFAT_I(inode)->i_pos = 0;
+	EXFAT_I(inode)->target = NULL;
+	inode->i_uid = sbi->options.fs_uid;
+	inode->i_gid = sbi->options.fs_gid;
+	INC_IVERSION(inode);
+	inode->i_generation = get_seconds();
+
+	if (info.Attr & ATTR_SUBDIR) { /* directory */
+		inode->i_generation &= ~1;
+		inode->i_mode = exfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+		inode->i_op = &exfat_dir_inode_operations;
+		inode->i_fop = &exfat_dir_operations;
+
+		i_size_write(inode, info.Size);
+		EXFAT_I(inode)->mmu_private = i_size_read(inode);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,00)
+		set_nlink(inode, info.NumSubdirs);
+#else
+		inode->i_nlink = info.NumSubdirs;
+#endif
+	} else if (info.Attr & ATTR_SYMLINK) { /* symbolic link */
+		inode->i_generation |= 1;
+		inode->i_mode = exfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+		inode->i_op = &exfat_symlink_inode_operations;
+
+		i_size_write(inode, info.Size);
+		EXFAT_I(inode)->mmu_private = i_size_read(inode);
+	} else { /* regular file */
+		inode->i_generation |= 1;
+		inode->i_mode = exfat_make_mode(sbi, info.Attr, S_IRWXUGO);
+		inode->i_op = &exfat_file_inode_operations;
+		inode->i_fop = &exfat_file_operations;
+		inode->i_mapping->a_ops = &exfat_aops;
+		inode->i_mapping->nrpages = 0;
+
+		i_size_write(inode, info.Size);
+		EXFAT_I(inode)->mmu_private = i_size_read(inode);
+	}
+	exfat_save_attr(inode, info.Attr);
+
+	inode->i_blocks = ((i_size_read(inode) + (p_fs->cluster_size - 1))
+					   & ~((loff_t)p_fs->cluster_size - 1)) >> 9;
+
+	exfat_time_fat2unix(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+	exfat_time_fat2unix(sbi, &inode->i_ctime, &info.CreateTimestamp);
+	exfat_time_fat2unix(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+	return 0;
+}
+
+static struct inode *exfat_build_inode(struct super_block *sb,
+									   FILE_ID_T *fid, loff_t i_pos) {
+	struct inode *inode;
+	int err;
+
+	inode = exfat_iget(sb, i_pos);
+	if (inode)
+		goto out;
+	inode = new_inode(sb);
+	if (!inode) {
+		inode = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	inode->i_ino = iunique(sb, EXFAT_ROOT_INO);
+	SET_IVERSION(inode, 1);
+	err = exfat_fill_inode(inode, fid);
+	if (err) {
+		iput(inode);
+		inode = ERR_PTR(err);
+		goto out;
+	}
+	exfat_attach(inode, i_pos);
+	insert_inode_hash(inode);
+out:
+	return inode;
+}
+
+static int exfat_sync_inode(struct inode *inode)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
+	return exfat_write_inode(inode, 0);
+#else
+	return exfat_write_inode(inode, NULL);
+#endif
+}
+
+static struct inode *exfat_alloc_inode(struct super_block *sb)
+{
+	struct exfat_inode_info *ei;
+
+	ei = kmem_cache_alloc(exfat_inode_cachep, GFP_NOFS);
+	if (!ei)
+		return NULL;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,00)
+	init_rwsem(&ei->truncate_lock);
+#endif
+
+	return &ei->vfs_inode;
+}
+
+static void exfat_destroy_inode(struct inode *inode)
+{
+	if (EXFAT_I(inode)->target)
+		kfree(EXFAT_I(inode)->target);
+	EXFAT_I(inode)->target = NULL;
+
+	kmem_cache_free(exfat_inode_cachep, EXFAT_I(inode));
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
+static int exfat_write_inode(struct inode *inode, int wait)
+#else
+static int exfat_write_inode(struct inode *inode, struct writeback_control *wbc)
+#endif
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	DIR_ENTRY_T info;
+
+	if (inode->i_ino == EXFAT_ROOT_INO)
+		return 0;
+
+	info.Attr = exfat_make_attr(inode);
+	info.Size = i_size_read(inode);
+
+	exfat_time_unix2fat(sbi, &inode->i_mtime, &info.ModifyTimestamp);
+	exfat_time_unix2fat(sbi, &inode->i_ctime, &info.CreateTimestamp);
+	exfat_time_unix2fat(sbi, &inode->i_atime, &info.AccessTimestamp);
+
+	FsWriteStat(inode, &info);
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+static void exfat_delete_inode(struct inode *inode)
+{
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+}
+
+static void exfat_clear_inode(struct inode *inode)
+{
+	exfat_detach(inode);
+	remove_inode_hash(inode);
+}
+#else
+static void exfat_evict_inode(struct inode *inode)
+{
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (!inode->i_nlink)
+		i_size_write(inode, 0);
+	invalidate_inode_buffers(inode);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
+	end_writeback(inode);
+#else
+	clear_inode(inode);
+#endif
+	exfat_detach(inode);
+
+	remove_inode_hash(inode);
+}
+#endif
+
+static void exfat_free_super(struct exfat_sb_info *sbi)
+{
+	if (sbi->nls_disk)
+		unload_nls(sbi->nls_disk);
+	if (sbi->nls_io)
+		unload_nls(sbi->nls_io);
+	if (sbi->options.iocharset != exfat_default_iocharset)
+		kfree(sbi->options.iocharset);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+	/* mutex_init is in exfat_fill_super function. only for 3.7+ */
+	mutex_destroy(&sbi->s_lock);
+#endif
+	kfree(sbi);
+}
+
+static void exfat_put_super(struct super_block *sb)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	if (__is_sb_dirty(sb))
+		exfat_write_super(sb);
+
+	FsUmountVol(sb);
+
+	sb->s_fs_info = NULL;
+	exfat_free_super(sbi);
+}
+
+static void exfat_write_super(struct super_block *sb)
+{
+	__lock_super(sb);
+
+	__set_sb_clean(sb);
+
+	if (!(sb->s_flags & MS_RDONLY))
+		FsSyncVol(sb, 1);
+
+	__unlock_super(sb);
+}
+
+static int exfat_sync_fs(struct super_block *sb, int wait)
+{
+	int err = 0;
+
+	if (__is_sb_dirty(sb)) {
+		__lock_super(sb);
+		__set_sb_clean(sb);
+		err = FsSyncVol(sb, 1);
+		__unlock_super(sb);
+	}
+
+	return err;
+}
+
+static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+	FS_INFO_T *p_fs = &(EXFAT_SB(sb)->fs_info);
+	VOL_INFO_T info;
+
+	if (p_fs->used_clusters == (u32) ~0) {
+		if (FFS_MEDIAERR == FsGetVolInfo(sb, &info))
+			return -EIO;
+
+	} else {
+		info.FatType = p_fs->vol_type;
+		info.ClusterSize = p_fs->cluster_size;
+		info.NumClusters = p_fs->num_clusters - 2;
+		info.UsedClusters = p_fs->used_clusters;
+		info.FreeClusters = info.NumClusters - info.UsedClusters;
+
+		if (p_fs->dev_ejected)
+			printk("[EXFAT] statfs on device is ejected\n");
+	}
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = info.ClusterSize;
+	buf->f_blocks = info.NumClusters;
+	buf->f_bfree = info.FreeClusters;
+	buf->f_bavail = info.FreeClusters;
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+	buf->f_namelen = 260;
+
+	return 0;
+}
+
+static int exfat_remount(struct super_block *sb, int *flags, char *data)
+{
+	*flags |= MS_NODIRATIME;
+	return 0;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+static int exfat_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(root->d_sb);
+#else
+static int exfat_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(mnt->mnt_sb);
+#endif
+	struct exfat_mount_options *opts = &sbi->options;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+	if (__kuid_val(opts->fs_uid))
+		seq_printf(m, ",uid=%u", __kuid_val(opts->fs_uid));
+	if (__kgid_val(opts->fs_gid))
+		seq_printf(m, ",gid=%u", __kgid_val(opts->fs_gid));
+#else
+	if (opts->fs_uid != 0)
+		seq_printf(m, ",uid=%u", opts->fs_uid);
+	if (opts->fs_gid != 0)
+		seq_printf(m, ",gid=%u", opts->fs_gid);
+#endif
+	seq_printf(m, ",fmask=%04o", opts->fs_fmask);
+	seq_printf(m, ",dmask=%04o", opts->fs_dmask);
+	if (opts->allow_utime)
+		seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
+	if (sbi->nls_disk)
+		seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
+	if (sbi->nls_io)
+		seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
+	seq_printf(m, ",namecase=%u", opts->casesensitive);
+	if (opts->errors == EXFAT_ERRORS_CONT)
+		seq_puts(m, ",errors=continue");
+	else if (opts->errors == EXFAT_ERRORS_PANIC)
+		seq_puts(m, ",errors=panic");
+	else
+		seq_puts(m, ",errors=remount-ro");
+#ifdef CONFIG_EXFAT_DISCARD
+	if (opts->discard)
+		seq_printf(m, ",discard");
+#endif
+	return 0;
+}
+
+const struct super_operations exfat_sops = {
+	.alloc_inode   = exfat_alloc_inode,
+	.destroy_inode = exfat_destroy_inode,
+	.write_inode   = exfat_write_inode,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+	.delete_inode  = exfat_delete_inode,
+	.clear_inode   = exfat_clear_inode,
+#else
+	.evict_inode  = exfat_evict_inode,
+#endif
+	.put_super     = exfat_put_super,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+	.write_super   = exfat_write_super,
+#endif
+	.sync_fs       = exfat_sync_fs,
+	.statfs        = exfat_statfs,
+	.remount_fs    = exfat_remount,
+	.show_options  = exfat_show_options,
+};
+
+/*======================================================================*/
+/*  Export Operations                                                   */
+/*======================================================================*/
+
+static struct inode *exfat_nfs_get_inode(struct super_block *sb,
+				       u64 ino, u32 generation)
+{
+	struct inode *inode = NULL;
+	if (ino < EXFAT_ROOT_INO)
+		return inode;
+	inode = ilookup(sb, ino);
+
+	if (inode && generation && (inode->i_generation != generation)) {
+		iput(inode);
+		inode = NULL;
+	}
+
+	return inode;
+}
+
+static struct dentry *exfat_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    exfat_nfs_get_inode);
+}
+
+static struct dentry *exfat_fh_to_parent(struct super_block *sb, struct fid *fid,
+				int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    exfat_nfs_get_inode);
+}
+
+const struct export_operations exfat_export_ops = {
+	.fh_to_dentry   = exfat_fh_to_dentry,
+	.fh_to_parent   = exfat_fh_to_parent,
+};
+
+/*======================================================================*/
+/*  Super Block Read Operations                                         */
+/*======================================================================*/
+
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_umask,
+	Opt_dmask,
+	Opt_fmask,
+	Opt_allow_utime,
+	Opt_codepage,
+	Opt_charset,
+	Opt_namecase,
+	Opt_debug,
+	Opt_err_cont,
+	Opt_err_panic,
+	Opt_err_ro,
+	Opt_utf8_hack,
+	Opt_err,
+#ifdef CONFIG_EXFAT_DISCARD
+	Opt_discard,
+#endif /* EXFAT_CONFIG_DISCARD */
+};
+
+static const match_table_t exfat_tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_umask, "umask=%o"},
+	{Opt_dmask, "dmask=%o"},
+	{Opt_fmask, "fmask=%o"},
+	{Opt_allow_utime, "allow_utime=%o"},
+	{Opt_codepage, "codepage=%u"},
+	{Opt_charset, "iocharset=%s"},
+	{Opt_namecase, "namecase=%u"},
+	{Opt_debug, "debug"},
+	{Opt_err_cont, "errors=continue"},
+	{Opt_err_panic, "errors=panic"},
+	{Opt_err_ro, "errors=remount-ro"},
+	{Opt_utf8_hack, "utf8"},
+#ifdef CONFIG_EXFAT_DISCARD
+	{Opt_discard, "discard"},
+#endif /* CONFIG_EXFAT_DISCARD */
+	{Opt_err, NULL}
+};
+
+static int parse_options(char *options, int silent, int *debug,
+						 struct exfat_mount_options *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	char *iocharset;
+
+	opts->fs_uid = current_uid();
+	opts->fs_gid = current_gid();
+	opts->fs_fmask = opts->fs_dmask = current->fs->umask;
+	opts->allow_utime = (unsigned short) -1;
+	opts->codepage = exfat_default_codepage;
+	opts->iocharset = exfat_default_iocharset;
+	opts->casesensitive = 0;
+	opts->errors = EXFAT_ERRORS_RO;
+#ifdef CONFIG_EXFAT_DISCARD
+	opts->discard = 0;
+#endif
+	*debug = 0;
+
+	if (!options)
+		goto out;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, exfat_tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+			opts->fs_uid = KUIDT_INIT(option);
+#else
+			opts->fs_uid = option;
+#endif
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+			opts->fs_gid = KGIDT_INIT(option);
+#else
+			opts->fs_gid = option;
+#endif
+			break;
+		case Opt_umask:
+		case Opt_dmask:
+		case Opt_fmask:
+			if (match_octal(&args[0], &option))
+				return 0;
+			if (token != Opt_dmask)
+				opts->fs_fmask = option;
+			if (token != Opt_fmask)
+				opts->fs_dmask = option;
+			break;
+		case Opt_allow_utime:
+			if (match_octal(&args[0], &option))
+				return 0;
+			opts->allow_utime = option & (S_IWGRP | S_IWOTH);
+			break;
+		case Opt_codepage:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->codepage = option;
+			break;
+		case Opt_charset:
+			if (opts->iocharset != exfat_default_iocharset)
+				kfree(opts->iocharset);
+			iocharset = match_strdup(&args[0]);
+			if (!iocharset)
+				return -ENOMEM;
+			opts->iocharset = iocharset;
+			break;
+		case Opt_namecase:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->casesensitive = option;
+			break;
+		case Opt_err_cont:
+			opts->errors = EXFAT_ERRORS_CONT;
+			break;
+		case Opt_err_panic:
+			opts->errors = EXFAT_ERRORS_PANIC;
+			break;
+		case Opt_err_ro:
+			opts->errors = EXFAT_ERRORS_RO;
+			break;
+		case Opt_debug:
+			*debug = 1;
+			break;
+#ifdef CONFIG_EXFAT_DISCARD
+		case Opt_discard:
+			opts->discard = 1;
+			break;
+#endif /* CONFIG_EXFAT_DISCARD */
+		case Opt_utf8_hack:
+			break;
+		default:
+			if (!silent)
+				printk(KERN_ERR "[EXFAT] Unrecognized mount option %s or missing value\n", p);
+			return -EINVAL;
+		}
+	}
+
+out:
+	if (opts->allow_utime == (unsigned short) -1)
+		opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
+
+	return 0;
+}
+
+static void exfat_hash_init(struct super_block *sb)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	int i;
+
+	spin_lock_init(&sbi->inode_hash_lock);
+	for (i = 0; i < EXFAT_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+}
+
+static int exfat_read_root(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	FS_INFO_T *p_fs = &(sbi->fs_info);
+	DIR_ENTRY_T info;
+
+	EXFAT_I(inode)->fid.dir.dir = p_fs->root_dir;
+	EXFAT_I(inode)->fid.dir.flags = 0x01;
+	EXFAT_I(inode)->fid.entry = -1;
+	EXFAT_I(inode)->fid.start_clu = p_fs->root_dir;
+	EXFAT_I(inode)->fid.flags = 0x01;
+	EXFAT_I(inode)->fid.type = TYPE_DIR;
+	EXFAT_I(inode)->fid.rwoffset = 0;
+	EXFAT_I(inode)->fid.hint_last_off = -1;
+
+	EXFAT_I(inode)->target = NULL;
+
+	FsReadStat(inode, &info);
+
+	inode->i_uid = sbi->options.fs_uid;
+	inode->i_gid = sbi->options.fs_gid;
+	INC_IVERSION(inode);
+	inode->i_generation = 0;
+	inode->i_mode = exfat_make_mode(sbi, ATTR_SUBDIR, S_IRWXUGO);
+	inode->i_op = &exfat_dir_inode_operations;
+	inode->i_fop = &exfat_dir_operations;
+
+	i_size_write(inode, info.Size);
+	inode->i_blocks = ((i_size_read(inode) + (p_fs->cluster_size - 1))
+					   & ~((loff_t)p_fs->cluster_size - 1)) >> 9;
+	EXFAT_I(inode)->i_pos = ((loff_t) p_fs->root_dir << 32) | 0xffffffff;
+	EXFAT_I(inode)->mmu_private = i_size_read(inode);
+
+	exfat_save_attr(inode, ATTR_SUBDIR);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,00)
+	set_nlink(inode, info.NumSubdirs + 2);
+#else
+	inode->i_nlink = info.NumSubdirs + 2;
+#endif
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
+static void setup_dops(struct super_block *sb)
+{
+	if (EXFAT_SB(sb)->options.casesensitive == 0)
+		sb->s_d_op = &exfat_ci_dentry_ops;
+	else
+		sb->s_d_op = &exfat_dentry_ops;
+}
+#endif
+
+static int exfat_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *root_inode = NULL;
+	struct exfat_sb_info *sbi;
+	int debug, ret;
+	long error;
+	char buf[50];
+
+	/*
+	 * GFP_KERNEL is ok here, because while we do hold the
+	 * supeblock lock, memory pressure can't call back into
+	 * the filesystem, since we're only just about to mount
+	 * it and have no inodes etc active!
+	 */
+	sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+	mutex_init(&sbi->s_lock);
+#endif
+	sb->s_fs_info = sbi;
+	sb->s_flags |= MS_NODIRATIME;
+	sb->s_magic = EXFAT_SUPER_MAGIC;
+	sb->s_op = &exfat_sops;
+	sb->s_export_op = &exfat_export_ops;
+
+	error = parse_options(data, silent, &debug, &sbi->options);
+	if (error)
+		goto out_fail;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
+	setup_dops(sb);
+#endif
+
+	error = -EIO;
+	sb_min_blocksize(sb, 512);
+	sb->s_maxbytes = 0x7fffffffffffffffLL;    /* maximum file size */
+
+	ret = FsMountVol(sb);
+	if (ret) {
+		if (!silent)
+			printk(KERN_ERR "[EXFAT] FsMountVol failed\n");
+
+		goto out_fail;
+	}
+
+	/* set up enough so that it can read an inode */
+	exfat_hash_init(sb);
+
+	/*
+	 * The low byte of FAT's first entry must have same value with
+	 * media-field.  But in real world, too many devices is
+	 * writing wrong value.  So, removed that validity check.
+	 *
+	 * if (FAT_FIRST_ENT(sb, media) != first)
+	 */
+
+	/* codepage is not meaningful in exfat */
+	if (sbi->fs_info.vol_type != EXFAT) {
+		error = -EINVAL;
+		sprintf(buf, "cp%d", sbi->options.codepage);
+		sbi->nls_disk = load_nls(buf);
+		if (!sbi->nls_disk) {
+			printk(KERN_ERR "[EXFAT] Codepage %s not found\n", buf);
+			goto out_fail2;
+		}
+	}
+
+	sbi->nls_io = load_nls(sbi->options.iocharset);
+
+	error = -ENOMEM;
+	root_inode = new_inode(sb);
+	if (!root_inode)
+		goto out_fail2;
+	root_inode->i_ino = EXFAT_ROOT_INO;
+	SET_IVERSION(root_inode, 1);
+
+	error = exfat_read_root(root_inode);
+	if (error < 0)
+		goto out_fail2;
+	error = -ENOMEM;
+	exfat_attach(root_inode, EXFAT_I(root_inode)->i_pos);
+	insert_inode_hash(root_inode);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,00)
+	sb->s_root = d_make_root(root_inode);
+#else
+	sb->s_root = d_alloc_root(root_inode);
+#endif
+	if (!sb->s_root) {
+		printk(KERN_ERR "[EXFAT] Getting the root inode failed\n");
+		goto out_fail2;
+	}
+
+	return 0;
+
+out_fail2:
+	FsUmountVol(sb);
+out_fail:
+	if (root_inode)
+		iput(root_inode);
+	sb->s_fs_info = NULL;
+	exfat_free_super(sbi);
+	return error;
+}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static int exfat_get_sb(struct file_system_type *fs_type,
+						int flags, const char *dev_name,
+						void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, exfat_fill_super, mnt);
+}
+#else
+static struct dentry *exfat_fs_mount(struct file_system_type *fs_type,
+									 int flags, const char *dev_name,
+									 void *data) {
+	return mount_bdev(fs_type, flags, dev_name, data, exfat_fill_super);
+}
+#endif
+
+static void init_once(void *foo)
+{
+	struct exfat_inode_info *ei = (struct exfat_inode_info *)foo;
+
+	INIT_HLIST_NODE(&ei->i_hash_fat);
+	inode_init_once(&ei->vfs_inode);
+}
+
+static int __init exfat_init_inodecache(void)
+{
+	exfat_inode_cachep = kmem_cache_create("exfat_inode_cache",
+										   sizeof(struct exfat_inode_info),
+										   0, (SLAB_RECLAIM_ACCOUNT|
+												   SLAB_MEM_SPREAD),
+										   init_once);
+	if (exfat_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __exit exfat_destroy_inodecache(void)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0)
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+#endif
+	kmem_cache_destroy(exfat_inode_cachep);
+}
+
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+static void exfat_debug_kill_sb(struct super_block *sb)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct block_device *bdev = sb->s_bdev;
+
+	long flags;
+
+	if (sbi) {
+		flags = sbi->debug_flags;
+
+		if (flags & EXFAT_DEBUGFLAGS_INVALID_UMOUNT) {
+			/* invalidate_bdev drops all device cache include dirty.
+			   we use this to simulate device removal */
+			FsReleaseCache(sb);
+			invalidate_bdev(bdev);
+		}
+	}
+
+	kill_block_super(sb);
+}
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+
+static struct file_system_type exfat_fs_type = {
+	.owner       = THIS_MODULE,
+#if defined(CONFIG_MACH_LGE) || defined(CONFIG_HTC_BATT_CORE)
+	.name        = "texfat",
+#else
+	.name        = "exfat",
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	.get_sb      = exfat_get_sb,
+#else
+	.mount       = exfat_fs_mount,
+#endif
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	.kill_sb    = exfat_debug_kill_sb,
+#else
+	.kill_sb    = kill_block_super,
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+	.fs_flags    = FS_REQUIRES_DEV,
+};
+
+static int __init init_exfat(void)
+{
+	int err;
+
+	err = FsInit();
+	if (err) {
+		if (err == FFS_MEMORYERR)
+			return -ENOMEM;
+		else
+			return -EIO;
+	}
+
+	printk(KERN_INFO "exFAT: Version %s\n", EXFAT_VERSION);
+
+	err = exfat_init_inodecache();
+	if (err)
+		goto out;
+
+	err = register_filesystem(&exfat_fs_type);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	FsShutdown();
+	return err;
+}
+
+static void __exit exit_exfat(void)
+{
+	exfat_destroy_inodecache();
+	unregister_filesystem(&exfat_fs_type);
+	FsShutdown();
+}
+
+module_init(init_exfat);
+module_exit(exit_exfat);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("exFAT Filesystem Driver");
+#ifdef MODULE_ALIAS_FS
+#if defined(CONFIG_MACH_LGE) || defined(CONFIG_HTC_BATT_CORE)
+MODULE_ALIAS_FS("texfat");
+#else
+MODULE_ALIAS_FS("exfat");
+#endif
+#endif
\ No newline at end of file
diff --git a/fs/exfat/exfat_super.h b/fs/exfat/exfat_super.h
new file mode 100644
index 000000000..916811e3d
--- /dev/null
+++ b/fs/exfat/exfat_super.h
@@ -0,0 +1,171 @@
+/* Some of the source code in this file came from "linux/fs/fat/fat.h".  */
+
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _EXFAT_LINUX_H
+#define _EXFAT_LINUX_H
+
+#include <linux/buffer_head.h>
+#include <linux/string.h>
+#include <linux/nls.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/swap.h>
+
+#include "exfat_config.h"
+#include "exfat_data.h"
+#include "exfat_oal.h"
+
+#include "exfat_blkdev.h"
+#include "exfat_cache.h"
+#include "exfat_nls.h"
+#include "exfat_api.h"
+#include "exfat_core.h"
+
+#define EXFAT_ERRORS_CONT  1    /* ignore error and continue */
+#define EXFAT_ERRORS_PANIC 2    /* panic on error */
+#define EXFAT_ERRORS_RO    3    /* remount r/o on error */
+
+/* ioctl command */
+#define EXFAT_IOCTL_GET_VOLUME_ID _IOR('r', 0x12, __u32)
+
+struct exfat_mount_options {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+	kuid_t fs_uid;
+	kgid_t fs_gid;
+#else
+	uid_t fs_uid;
+	gid_t fs_gid;
+#endif
+	unsigned short fs_fmask;
+	unsigned short fs_dmask;
+	unsigned short allow_utime; /* permission for setting the [am]time */
+	unsigned short codepage;    /* codepage for shortname conversions */
+	char *iocharset;            /* charset for filename input/display */
+	unsigned char casesensitive;
+	unsigned char errors;       /* on error: continue, panic, remount-ro */
+#ifdef CONFIG_EXFAT_DISCARD
+	unsigned char discard;      /* flag on if -o dicard specified and device support discard() */
+#endif /* CONFIG_EXFAT_DISCARD */
+};
+
+#define EXFAT_HASH_BITS    8
+#define EXFAT_HASH_SIZE    (1UL << EXFAT_HASH_BITS)
+
+/*
+ * EXFAT file system in-core superblock data
+ */
+struct exfat_sb_info {
+	FS_INFO_T fs_info;
+	BD_INFO_T bd_info;
+
+	struct exfat_mount_options options;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,00)
+	int s_dirt;
+	struct mutex s_lock;
+#endif
+	struct nls_table *nls_disk; /* Codepage used on disk */
+	struct nls_table *nls_io;   /* Charset used for input and display */
+
+	struct inode *fat_inode;
+
+	spinlock_t inode_hash_lock;
+	struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+#ifdef CONFIG_EXFAT_KERNEL_DEBUG
+	long debug_flags;
+#endif /* CONFIG_EXFAT_KERNEL_DEBUG */
+};
+
+/*
+ * EXFAT file system inode data in memory
+ */
+struct exfat_inode_info {
+	FILE_ID_T fid;
+	char  *target;
+	/* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
+	loff_t mmu_private;         /* physically allocated size */
+	loff_t i_pos;               /* on-disk position of directory entry or 0 */
+	struct hlist_node i_hash_fat;	/* hash by i_location */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,00)
+	struct rw_semaphore truncate_lock;
+#endif
+	struct inode vfs_inode;
+	struct rw_semaphore i_alloc_sem; /* protect bmap against truncate */
+};
+
+#define EXFAT_SB(sb)		((struct exfat_sb_info *)((sb)->s_fs_info))
+
+static inline struct exfat_inode_info *EXFAT_I(struct inode *inode)
+{
+	return container_of(inode, struct exfat_inode_info, vfs_inode);
+}
+
+/*
+ * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
+ * save ATTR_RO instead of ->i_mode.
+ *
+ * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
+ * bit, it's just used as flag for app.
+ */
+static inline int exfat_mode_can_hold_ro(struct inode *inode)
+{
+	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+
+	if (S_ISDIR(inode->i_mode))
+		return 0;
+
+	if ((~sbi->options.fs_fmask) & S_IWUGO)
+		return 1;
+	return 0;
+}
+
+/* Convert attribute bits and a mask to the UNIX mode. */
+static inline mode_t exfat_make_mode(struct exfat_sb_info *sbi,
+									 u32 attr, mode_t mode)
+{
+	if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR))
+		mode &= ~S_IWUGO;
+
+	if (attr & ATTR_SUBDIR)
+		return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
+	else if (attr & ATTR_SYMLINK)
+		return (mode & ~sbi->options.fs_dmask) | S_IFLNK;
+	else
+		return (mode & ~sbi->options.fs_fmask) | S_IFREG;
+}
+
+/* Return the FAT attribute byte for this inode */
+static inline u32 exfat_make_attr(struct inode *inode)
+{
+	if (exfat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
+		return (EXFAT_I(inode)->fid.attr) | ATTR_READONLY;
+	else
+		return EXFAT_I(inode)->fid.attr;
+}
+
+static inline void exfat_save_attr(struct inode *inode, u32 attr)
+{
+	if (exfat_mode_can_hold_ro(inode))
+		EXFAT_I(inode)->fid.attr = attr & ATTR_RWMASK;
+	else
+		EXFAT_I(inode)->fid.attr = attr & (ATTR_RWMASK | ATTR_READONLY);
+}
+
+#endif /* _EXFAT_LINUX_H */
diff --git a/fs/exfat/exfat_upcase.c b/fs/exfat/exfat_upcase.c
new file mode 100644
index 000000000..3807f37ca
--- /dev/null
+++ b/fs/exfat/exfat_upcase.c
@@ -0,0 +1,405 @@
+/*
+ *  Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_upcase.c                                            */
+/*  PURPOSE : exFAT Up-case Table                                       */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY (Ver 0.9)                                          */
+/*                                                                      */
+/*  - 2010.11.15 [Joosun Hahn] : first writing                          */
+/*                                                                      */
+/************************************************************************/
+
+#include "exfat_config.h"
+
+#include "exfat_nls.h"
+
+const u8 uni_upcase[NUM_UPCASE<<1] = {
+	0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00,
+	0x08, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x0B, 0x00, 0x0C, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0F, 0x00,
+	0x10, 0x00, 0x11, 0x00, 0x12, 0x00, 0x13, 0x00, 0x14, 0x00, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00,
+	0x18, 0x00, 0x19, 0x00, 0x1A, 0x00, 0x1B, 0x00, 0x1C, 0x00, 0x1D, 0x00, 0x1E, 0x00, 0x1F, 0x00,
+	0x20, 0x00, 0x21, 0x00, 0x22, 0x00, 0x23, 0x00, 0x24, 0x00, 0x25, 0x00, 0x26, 0x00, 0x27, 0x00,
+	0x28, 0x00, 0x29, 0x00, 0x2A, 0x00, 0x2B, 0x00, 0x2C, 0x00, 0x2D, 0x00, 0x2E, 0x00, 0x2F, 0x00,
+	0x30, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x34, 0x00, 0x35, 0x00, 0x36, 0x00, 0x37, 0x00,
+	0x38, 0x00, 0x39, 0x00, 0x3A, 0x00, 0x3B, 0x00, 0x3C, 0x00, 0x3D, 0x00, 0x3E, 0x00, 0x3F, 0x00,
+	0x40, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+	0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+	0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+	0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x5B, 0x00, 0x5C, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x5F, 0x00,
+	0x60, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x44, 0x00, 0x45, 0x00, 0x46, 0x00, 0x47, 0x00,
+	0x48, 0x00, 0x49, 0x00, 0x4A, 0x00, 0x4B, 0x00, 0x4C, 0x00, 0x4D, 0x00, 0x4E, 0x00, 0x4F, 0x00,
+	0x50, 0x00, 0x51, 0x00, 0x52, 0x00, 0x53, 0x00, 0x54, 0x00, 0x55, 0x00, 0x56, 0x00, 0x57, 0x00,
+	0x58, 0x00, 0x59, 0x00, 0x5A, 0x00, 0x7B, 0x00, 0x7C, 0x00, 0x7D, 0x00, 0x7E, 0x00, 0x7F, 0x00,
+	0x80, 0x00, 0x81, 0x00, 0x82, 0x00, 0x83, 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00,
+	0x88, 0x00, 0x89, 0x00, 0x8A, 0x00, 0x8B, 0x00, 0x8C, 0x00, 0x8D, 0x00, 0x8E, 0x00, 0x8F, 0x00,
+	0x90, 0x00, 0x91, 0x00, 0x92, 0x00, 0x93, 0x00, 0x94, 0x00, 0x95, 0x00, 0x96, 0x00, 0x97, 0x00,
+	0x98, 0x00, 0x99, 0x00, 0x9A, 0x00, 0x9B, 0x00, 0x9C, 0x00, 0x9D, 0x00, 0x9E, 0x00, 0x9F, 0x00,
+	0xA0, 0x00, 0xA1, 0x00, 0xA2, 0x00, 0xA3, 0x00, 0xA4, 0x00, 0xA5, 0x00, 0xA6, 0x00, 0xA7, 0x00,
+	0xA8, 0x00, 0xA9, 0x00, 0xAA, 0x00, 0xAB, 0x00, 0xAC, 0x00, 0xAD, 0x00, 0xAE, 0x00, 0xAF, 0x00,
+	0xB0, 0x00, 0xB1, 0x00, 0xB2, 0x00, 0xB3, 0x00, 0xB4, 0x00, 0xB5, 0x00, 0xB6, 0x00, 0xB7, 0x00,
+	0xB8, 0x00, 0xB9, 0x00, 0xBA, 0x00, 0xBB, 0x00, 0xBC, 0x00, 0xBD, 0x00, 0xBE, 0x00, 0xBF, 0x00,
+	0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+	0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+	0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xD7, 0x00,
+	0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0xDF, 0x00,
+	0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00, 0xC4, 0x00, 0xC5, 0x00, 0xC6, 0x00, 0xC7, 0x00,
+	0xC8, 0x00, 0xC9, 0x00, 0xCA, 0x00, 0xCB, 0x00, 0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
+	0xD0, 0x00, 0xD1, 0x00, 0xD2, 0x00, 0xD3, 0x00, 0xD4, 0x00, 0xD5, 0x00, 0xD6, 0x00, 0xF7, 0x00,
+	0xD8, 0x00, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00, 0xDC, 0x00, 0xDD, 0x00, 0xDE, 0x00, 0x78, 0x01,
+	0x00, 0x01, 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x04, 0x01, 0x04, 0x01, 0x06, 0x01, 0x06, 0x01,
+	0x08, 0x01, 0x08, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x0C, 0x01, 0x0C, 0x01, 0x0E, 0x01, 0x0E, 0x01,
+	0x10, 0x01, 0x10, 0x01, 0x12, 0x01, 0x12, 0x01, 0x14, 0x01, 0x14, 0x01, 0x16, 0x01, 0x16, 0x01,
+	0x18, 0x01, 0x18, 0x01, 0x1A, 0x01, 0x1A, 0x01, 0x1C, 0x01, 0x1C, 0x01, 0x1E, 0x01, 0x1E, 0x01,
+	0x20, 0x01, 0x20, 0x01, 0x22, 0x01, 0x22, 0x01, 0x24, 0x01, 0x24, 0x01, 0x26, 0x01, 0x26, 0x01,
+	0x28, 0x01, 0x28, 0x01, 0x2A, 0x01, 0x2A, 0x01, 0x2C, 0x01, 0x2C, 0x01, 0x2E, 0x01, 0x2E, 0x01,
+	0x30, 0x01, 0x31, 0x01, 0x32, 0x01, 0x32, 0x01, 0x34, 0x01, 0x34, 0x01, 0x36, 0x01, 0x36, 0x01,
+	0x38, 0x01, 0x39, 0x01, 0x39, 0x01, 0x3B, 0x01, 0x3B, 0x01, 0x3D, 0x01, 0x3D, 0x01, 0x3F, 0x01,
+	0x3F, 0x01, 0x41, 0x01, 0x41, 0x01, 0x43, 0x01, 0x43, 0x01, 0x45, 0x01, 0x45, 0x01, 0x47, 0x01,
+	0x47, 0x01, 0x49, 0x01, 0x4A, 0x01, 0x4A, 0x01, 0x4C, 0x01, 0x4C, 0x01, 0x4E, 0x01, 0x4E, 0x01,
+	0x50, 0x01, 0x50, 0x01, 0x52, 0x01, 0x52, 0x01, 0x54, 0x01, 0x54, 0x01, 0x56, 0x01, 0x56, 0x01,
+	0x58, 0x01, 0x58, 0x01, 0x5A, 0x01, 0x5A, 0x01, 0x5C, 0x01, 0x5C, 0x01, 0x5E, 0x01, 0x5E, 0x01,
+	0x60, 0x01, 0x60, 0x01, 0x62, 0x01, 0x62, 0x01, 0x64, 0x01, 0x64, 0x01, 0x66, 0x01, 0x66, 0x01,
+	0x68, 0x01, 0x68, 0x01, 0x6A, 0x01, 0x6A, 0x01, 0x6C, 0x01, 0x6C, 0x01, 0x6E, 0x01, 0x6E, 0x01,
+	0x70, 0x01, 0x70, 0x01, 0x72, 0x01, 0x72, 0x01, 0x74, 0x01, 0x74, 0x01, 0x76, 0x01, 0x76, 0x01,
+	0x78, 0x01, 0x79, 0x01, 0x79, 0x01, 0x7B, 0x01, 0x7B, 0x01, 0x7D, 0x01, 0x7D, 0x01, 0x7F, 0x01,
+	0x43, 0x02, 0x81, 0x01, 0x82, 0x01, 0x82, 0x01, 0x84, 0x01, 0x84, 0x01, 0x86, 0x01, 0x87, 0x01,
+	0x87, 0x01, 0x89, 0x01, 0x8A, 0x01, 0x8B, 0x01, 0x8B, 0x01, 0x8D, 0x01, 0x8E, 0x01, 0x8F, 0x01,
+	0x90, 0x01, 0x91, 0x01, 0x91, 0x01, 0x93, 0x01, 0x94, 0x01, 0xF6, 0x01, 0x96, 0x01, 0x97, 0x01,
+	0x98, 0x01, 0x98, 0x01, 0x3D, 0x02, 0x9B, 0x01, 0x9C, 0x01, 0x9D, 0x01, 0x20, 0x02, 0x9F, 0x01,
+	0xA0, 0x01, 0xA0, 0x01, 0xA2, 0x01, 0xA2, 0x01, 0xA4, 0x01, 0xA4, 0x01, 0xA6, 0x01, 0xA7, 0x01,
+	0xA7, 0x01, 0xA9, 0x01, 0xAA, 0x01, 0xAB, 0x01, 0xAC, 0x01, 0xAC, 0x01, 0xAE, 0x01, 0xAF, 0x01,
+	0xAF, 0x01, 0xB1, 0x01, 0xB2, 0x01, 0xB3, 0x01, 0xB3, 0x01, 0xB5, 0x01, 0xB5, 0x01, 0xB7, 0x01,
+	0xB8, 0x01, 0xB8, 0x01, 0xBA, 0x01, 0xBB, 0x01, 0xBC, 0x01, 0xBC, 0x01, 0xBE, 0x01, 0xF7, 0x01,
+	0xC0, 0x01, 0xC1, 0x01, 0xC2, 0x01, 0xC3, 0x01, 0xC4, 0x01, 0xC5, 0x01, 0xC4, 0x01, 0xC7, 0x01,
+	0xC8, 0x01, 0xC7, 0x01, 0xCA, 0x01, 0xCB, 0x01, 0xCA, 0x01, 0xCD, 0x01, 0xCD, 0x01, 0xCF, 0x01,
+	0xCF, 0x01, 0xD1, 0x01, 0xD1, 0x01, 0xD3, 0x01, 0xD3, 0x01, 0xD5, 0x01, 0xD5, 0x01, 0xD7, 0x01,
+	0xD7, 0x01, 0xD9, 0x01, 0xD9, 0x01, 0xDB, 0x01, 0xDB, 0x01, 0x8E, 0x01, 0xDE, 0x01, 0xDE, 0x01,
+	0xE0, 0x01, 0xE0, 0x01, 0xE2, 0x01, 0xE2, 0x01, 0xE4, 0x01, 0xE4, 0x01, 0xE6, 0x01, 0xE6, 0x01,
+	0xE8, 0x01, 0xE8, 0x01, 0xEA, 0x01, 0xEA, 0x01, 0xEC, 0x01, 0xEC, 0x01, 0xEE, 0x01, 0xEE, 0x01,
+	0xF0, 0x01, 0xF1, 0x01, 0xF2, 0x01, 0xF1, 0x01, 0xF4, 0x01, 0xF4, 0x01, 0xF6, 0x01, 0xF7, 0x01,
+	0xF8, 0x01, 0xF8, 0x01, 0xFA, 0x01, 0xFA, 0x01, 0xFC, 0x01, 0xFC, 0x01, 0xFE, 0x01, 0xFE, 0x01,
+	0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x04, 0x02, 0x04, 0x02, 0x06, 0x02, 0x06, 0x02,
+	0x08, 0x02, 0x08, 0x02, 0x0A, 0x02, 0x0A, 0x02, 0x0C, 0x02, 0x0C, 0x02, 0x0E, 0x02, 0x0E, 0x02,
+	0x10, 0x02, 0x10, 0x02, 0x12, 0x02, 0x12, 0x02, 0x14, 0x02, 0x14, 0x02, 0x16, 0x02, 0x16, 0x02,
+	0x18, 0x02, 0x18, 0x02, 0x1A, 0x02, 0x1A, 0x02, 0x1C, 0x02, 0x1C, 0x02, 0x1E, 0x02, 0x1E, 0x02,
+	0x20, 0x02, 0x21, 0x02, 0x22, 0x02, 0x22, 0x02, 0x24, 0x02, 0x24, 0x02, 0x26, 0x02, 0x26, 0x02,
+	0x28, 0x02, 0x28, 0x02, 0x2A, 0x02, 0x2A, 0x02, 0x2C, 0x02, 0x2C, 0x02, 0x2E, 0x02, 0x2E, 0x02,
+	0x30, 0x02, 0x30, 0x02, 0x32, 0x02, 0x32, 0x02, 0x34, 0x02, 0x35, 0x02, 0x36, 0x02, 0x37, 0x02,
+	0x38, 0x02, 0x39, 0x02, 0x65, 0x2C, 0x3B, 0x02, 0x3B, 0x02, 0x3D, 0x02, 0x66, 0x2C, 0x3F, 0x02,
+	0x40, 0x02, 0x41, 0x02, 0x41, 0x02, 0x43, 0x02, 0x44, 0x02, 0x45, 0x02, 0x46, 0x02, 0x46, 0x02,
+	0x48, 0x02, 0x48, 0x02, 0x4A, 0x02, 0x4A, 0x02, 0x4C, 0x02, 0x4C, 0x02, 0x4E, 0x02, 0x4E, 0x02,
+	0x50, 0x02, 0x51, 0x02, 0x52, 0x02, 0x81, 0x01, 0x86, 0x01, 0x55, 0x02, 0x89, 0x01, 0x8A, 0x01,
+	0x58, 0x02, 0x8F, 0x01, 0x5A, 0x02, 0x90, 0x01, 0x5C, 0x02, 0x5D, 0x02, 0x5E, 0x02, 0x5F, 0x02,
+	0x93, 0x01, 0x61, 0x02, 0x62, 0x02, 0x94, 0x01, 0x64, 0x02, 0x65, 0x02, 0x66, 0x02, 0x67, 0x02,
+	0x97, 0x01, 0x96, 0x01, 0x6A, 0x02, 0x62, 0x2C, 0x6C, 0x02, 0x6D, 0x02, 0x6E, 0x02, 0x9C, 0x01,
+	0x70, 0x02, 0x71, 0x02, 0x9D, 0x01, 0x73, 0x02, 0x74, 0x02, 0x9F, 0x01, 0x76, 0x02, 0x77, 0x02,
+	0x78, 0x02, 0x79, 0x02, 0x7A, 0x02, 0x7B, 0x02, 0x7C, 0x02, 0x64, 0x2C, 0x7E, 0x02, 0x7F, 0x02,
+	0xA6, 0x01, 0x81, 0x02, 0x82, 0x02, 0xA9, 0x01, 0x84, 0x02, 0x85, 0x02, 0x86, 0x02, 0x87, 0x02,
+	0xAE, 0x01, 0x44, 0x02, 0xB1, 0x01, 0xB2, 0x01, 0x45, 0x02, 0x8D, 0x02, 0x8E, 0x02, 0x8F, 0x02,
+	0x90, 0x02, 0x91, 0x02, 0xB7, 0x01, 0x93, 0x02, 0x94, 0x02, 0x95, 0x02, 0x96, 0x02, 0x97, 0x02,
+	0x98, 0x02, 0x99, 0x02, 0x9A, 0x02, 0x9B, 0x02, 0x9C, 0x02, 0x9D, 0x02, 0x9E, 0x02, 0x9F, 0x02,
+	0xA0, 0x02, 0xA1, 0x02, 0xA2, 0x02, 0xA3, 0x02, 0xA4, 0x02, 0xA5, 0x02, 0xA6, 0x02, 0xA7, 0x02,
+	0xA8, 0x02, 0xA9, 0x02, 0xAA, 0x02, 0xAB, 0x02, 0xAC, 0x02, 0xAD, 0x02, 0xAE, 0x02, 0xAF, 0x02,
+	0xB0, 0x02, 0xB1, 0x02, 0xB2, 0x02, 0xB3, 0x02, 0xB4, 0x02, 0xB5, 0x02, 0xB6, 0x02, 0xB7, 0x02,
+	0xB8, 0x02, 0xB9, 0x02, 0xBA, 0x02, 0xBB, 0x02, 0xBC, 0x02, 0xBD, 0x02, 0xBE, 0x02, 0xBF, 0x02,
+	0xC0, 0x02, 0xC1, 0x02, 0xC2, 0x02, 0xC3, 0x02, 0xC4, 0x02, 0xC5, 0x02, 0xC6, 0x02, 0xC7, 0x02,
+	0xC8, 0x02, 0xC9, 0x02, 0xCA, 0x02, 0xCB, 0x02, 0xCC, 0x02, 0xCD, 0x02, 0xCE, 0x02, 0xCF, 0x02,
+	0xD0, 0x02, 0xD1, 0x02, 0xD2, 0x02, 0xD3, 0x02, 0xD4, 0x02, 0xD5, 0x02, 0xD6, 0x02, 0xD7, 0x02,
+	0xD8, 0x02, 0xD9, 0x02, 0xDA, 0x02, 0xDB, 0x02, 0xDC, 0x02, 0xDD, 0x02, 0xDE, 0x02, 0xDF, 0x02,
+	0xE0, 0x02, 0xE1, 0x02, 0xE2, 0x02, 0xE3, 0x02, 0xE4, 0x02, 0xE5, 0x02, 0xE6, 0x02, 0xE7, 0x02,
+	0xE8, 0x02, 0xE9, 0x02, 0xEA, 0x02, 0xEB, 0x02, 0xEC, 0x02, 0xED, 0x02, 0xEE, 0x02, 0xEF, 0x02,
+	0xF0, 0x02, 0xF1, 0x02, 0xF2, 0x02, 0xF3, 0x02, 0xF4, 0x02, 0xF5, 0x02, 0xF6, 0x02, 0xF7, 0x02,
+	0xF8, 0x02, 0xF9, 0x02, 0xFA, 0x02, 0xFB, 0x02, 0xFC, 0x02, 0xFD, 0x02, 0xFE, 0x02, 0xFF, 0x02,
+	0x00, 0x03, 0x01, 0x03, 0x02, 0x03, 0x03, 0x03, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x07, 0x03,
+	0x08, 0x03, 0x09, 0x03, 0x0A, 0x03, 0x0B, 0x03, 0x0C, 0x03, 0x0D, 0x03, 0x0E, 0x03, 0x0F, 0x03,
+	0x10, 0x03, 0x11, 0x03, 0x12, 0x03, 0x13, 0x03, 0x14, 0x03, 0x15, 0x03, 0x16, 0x03, 0x17, 0x03,
+	0x18, 0x03, 0x19, 0x03, 0x1A, 0x03, 0x1B, 0x03, 0x1C, 0x03, 0x1D, 0x03, 0x1E, 0x03, 0x1F, 0x03,
+	0x20, 0x03, 0x21, 0x03, 0x22, 0x03, 0x23, 0x03, 0x24, 0x03, 0x25, 0x03, 0x26, 0x03, 0x27, 0x03,
+	0x28, 0x03, 0x29, 0x03, 0x2A, 0x03, 0x2B, 0x03, 0x2C, 0x03, 0x2D, 0x03, 0x2E, 0x03, 0x2F, 0x03,
+	0x30, 0x03, 0x31, 0x03, 0x32, 0x03, 0x33, 0x03, 0x34, 0x03, 0x35, 0x03, 0x36, 0x03, 0x37, 0x03,
+	0x38, 0x03, 0x39, 0x03, 0x3A, 0x03, 0x3B, 0x03, 0x3C, 0x03, 0x3D, 0x03, 0x3E, 0x03, 0x3F, 0x03,
+	0x40, 0x03, 0x41, 0x03, 0x42, 0x03, 0x43, 0x03, 0x44, 0x03, 0x45, 0x03, 0x46, 0x03, 0x47, 0x03,
+	0x48, 0x03, 0x49, 0x03, 0x4A, 0x03, 0x4B, 0x03, 0x4C, 0x03, 0x4D, 0x03, 0x4E, 0x03, 0x4F, 0x03,
+	0x50, 0x03, 0x51, 0x03, 0x52, 0x03, 0x53, 0x03, 0x54, 0x03, 0x55, 0x03, 0x56, 0x03, 0x57, 0x03,
+	0x58, 0x03, 0x59, 0x03, 0x5A, 0x03, 0x5B, 0x03, 0x5C, 0x03, 0x5D, 0x03, 0x5E, 0x03, 0x5F, 0x03,
+	0x60, 0x03, 0x61, 0x03, 0x62, 0x03, 0x63, 0x03, 0x64, 0x03, 0x65, 0x03, 0x66, 0x03, 0x67, 0x03,
+	0x68, 0x03, 0x69, 0x03, 0x6A, 0x03, 0x6B, 0x03, 0x6C, 0x03, 0x6D, 0x03, 0x6E, 0x03, 0x6F, 0x03,
+	0x70, 0x03, 0x71, 0x03, 0x72, 0x03, 0x73, 0x03, 0x74, 0x03, 0x75, 0x03, 0x76, 0x03, 0x77, 0x03,
+	0x78, 0x03, 0x79, 0x03, 0x7A, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03, 0x7E, 0x03, 0x7F, 0x03,
+	0x80, 0x03, 0x81, 0x03, 0x82, 0x03, 0x83, 0x03, 0x84, 0x03, 0x85, 0x03, 0x86, 0x03, 0x87, 0x03,
+	0x88, 0x03, 0x89, 0x03, 0x8A, 0x03, 0x8B, 0x03, 0x8C, 0x03, 0x8D, 0x03, 0x8E, 0x03, 0x8F, 0x03,
+	0x90, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+	0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+	0xA0, 0x03, 0xA1, 0x03, 0xA2, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+	0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x86, 0x03, 0x88, 0x03, 0x89, 0x03, 0x8A, 0x03,
+	0xB0, 0x03, 0x91, 0x03, 0x92, 0x03, 0x93, 0x03, 0x94, 0x03, 0x95, 0x03, 0x96, 0x03, 0x97, 0x03,
+	0x98, 0x03, 0x99, 0x03, 0x9A, 0x03, 0x9B, 0x03, 0x9C, 0x03, 0x9D, 0x03, 0x9E, 0x03, 0x9F, 0x03,
+	0xA0, 0x03, 0xA1, 0x03, 0xA3, 0x03, 0xA3, 0x03, 0xA4, 0x03, 0xA5, 0x03, 0xA6, 0x03, 0xA7, 0x03,
+	0xA8, 0x03, 0xA9, 0x03, 0xAA, 0x03, 0xAB, 0x03, 0x8C, 0x03, 0x8E, 0x03, 0x8F, 0x03, 0xCF, 0x03,
+	0xD0, 0x03, 0xD1, 0x03, 0xD2, 0x03, 0xD3, 0x03, 0xD4, 0x03, 0xD5, 0x03, 0xD6, 0x03, 0xD7, 0x03,
+	0xD8, 0x03, 0xD8, 0x03, 0xDA, 0x03, 0xDA, 0x03, 0xDC, 0x03, 0xDC, 0x03, 0xDE, 0x03, 0xDE, 0x03,
+	0xE0, 0x03, 0xE0, 0x03, 0xE2, 0x03, 0xE2, 0x03, 0xE4, 0x03, 0xE4, 0x03, 0xE6, 0x03, 0xE6, 0x03,
+	0xE8, 0x03, 0xE8, 0x03, 0xEA, 0x03, 0xEA, 0x03, 0xEC, 0x03, 0xEC, 0x03, 0xEE, 0x03, 0xEE, 0x03,
+	0xF0, 0x03, 0xF1, 0x03, 0xF9, 0x03, 0xF3, 0x03, 0xF4, 0x03, 0xF5, 0x03, 0xF6, 0x03, 0xF7, 0x03,
+	0xF7, 0x03, 0xF9, 0x03, 0xFA, 0x03, 0xFA, 0x03, 0xFC, 0x03, 0xFD, 0x03, 0xFE, 0x03, 0xFF, 0x03,
+	0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+	0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+	0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+	0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+	0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+	0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+	0x10, 0x04, 0x11, 0x04, 0x12, 0x04, 0x13, 0x04, 0x14, 0x04, 0x15, 0x04, 0x16, 0x04, 0x17, 0x04,
+	0x18, 0x04, 0x19, 0x04, 0x1A, 0x04, 0x1B, 0x04, 0x1C, 0x04, 0x1D, 0x04, 0x1E, 0x04, 0x1F, 0x04,
+	0x20, 0x04, 0x21, 0x04, 0x22, 0x04, 0x23, 0x04, 0x24, 0x04, 0x25, 0x04, 0x26, 0x04, 0x27, 0x04,
+	0x28, 0x04, 0x29, 0x04, 0x2A, 0x04, 0x2B, 0x04, 0x2C, 0x04, 0x2D, 0x04, 0x2E, 0x04, 0x2F, 0x04,
+	0x00, 0x04, 0x01, 0x04, 0x02, 0x04, 0x03, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x04, 0x07, 0x04,
+	0x08, 0x04, 0x09, 0x04, 0x0A, 0x04, 0x0B, 0x04, 0x0C, 0x04, 0x0D, 0x04, 0x0E, 0x04, 0x0F, 0x04,
+	0x60, 0x04, 0x60, 0x04, 0x62, 0x04, 0x62, 0x04, 0x64, 0x04, 0x64, 0x04, 0x66, 0x04, 0x66, 0x04,
+	0x68, 0x04, 0x68, 0x04, 0x6A, 0x04, 0x6A, 0x04, 0x6C, 0x04, 0x6C, 0x04, 0x6E, 0x04, 0x6E, 0x04,
+	0x70, 0x04, 0x70, 0x04, 0x72, 0x04, 0x72, 0x04, 0x74, 0x04, 0x74, 0x04, 0x76, 0x04, 0x76, 0x04,
+	0x78, 0x04, 0x78, 0x04, 0x7A, 0x04, 0x7A, 0x04, 0x7C, 0x04, 0x7C, 0x04, 0x7E, 0x04, 0x7E, 0x04,
+	0x80, 0x04, 0x80, 0x04, 0x82, 0x04, 0x83, 0x04, 0x84, 0x04, 0x85, 0x04, 0x86, 0x04, 0x87, 0x04,
+	0x88, 0x04, 0x89, 0x04, 0x8A, 0x04, 0x8A, 0x04, 0x8C, 0x04, 0x8C, 0x04, 0x8E, 0x04, 0x8E, 0x04,
+	0x90, 0x04, 0x90, 0x04, 0x92, 0x04, 0x92, 0x04, 0x94, 0x04, 0x94, 0x04, 0x96, 0x04, 0x96, 0x04,
+	0x98, 0x04, 0x98, 0x04, 0x9A, 0x04, 0x9A, 0x04, 0x9C, 0x04, 0x9C, 0x04, 0x9E, 0x04, 0x9E, 0x04,
+	0xA0, 0x04, 0xA0, 0x04, 0xA2, 0x04, 0xA2, 0x04, 0xA4, 0x04, 0xA4, 0x04, 0xA6, 0x04, 0xA6, 0x04,
+	0xA8, 0x04, 0xA8, 0x04, 0xAA, 0x04, 0xAA, 0x04, 0xAC, 0x04, 0xAC, 0x04, 0xAE, 0x04, 0xAE, 0x04,
+	0xB0, 0x04, 0xB0, 0x04, 0xB2, 0x04, 0xB2, 0x04, 0xB4, 0x04, 0xB4, 0x04, 0xB6, 0x04, 0xB6, 0x04,
+	0xB8, 0x04, 0xB8, 0x04, 0xBA, 0x04, 0xBA, 0x04, 0xBC, 0x04, 0xBC, 0x04, 0xBE, 0x04, 0xBE, 0x04,
+	0xC0, 0x04, 0xC1, 0x04, 0xC1, 0x04, 0xC3, 0x04, 0xC3, 0x04, 0xC5, 0x04, 0xC5, 0x04, 0xC7, 0x04,
+	0xC7, 0x04, 0xC9, 0x04, 0xC9, 0x04, 0xCB, 0x04, 0xCB, 0x04, 0xCD, 0x04, 0xCD, 0x04, 0xC0, 0x04,
+	0xD0, 0x04, 0xD0, 0x04, 0xD2, 0x04, 0xD2, 0x04, 0xD4, 0x04, 0xD4, 0x04, 0xD6, 0x04, 0xD6, 0x04,
+	0xD8, 0x04, 0xD8, 0x04, 0xDA, 0x04, 0xDA, 0x04, 0xDC, 0x04, 0xDC, 0x04, 0xDE, 0x04, 0xDE, 0x04,
+	0xE0, 0x04, 0xE0, 0x04, 0xE2, 0x04, 0xE2, 0x04, 0xE4, 0x04, 0xE4, 0x04, 0xE6, 0x04, 0xE6, 0x04,
+	0xE8, 0x04, 0xE8, 0x04, 0xEA, 0x04, 0xEA, 0x04, 0xEC, 0x04, 0xEC, 0x04, 0xEE, 0x04, 0xEE, 0x04,
+	0xF0, 0x04, 0xF0, 0x04, 0xF2, 0x04, 0xF2, 0x04, 0xF4, 0x04, 0xF4, 0x04, 0xF6, 0x04, 0xF6, 0x04,
+	0xF8, 0x04, 0xF8, 0x04, 0xFA, 0x04, 0xFA, 0x04, 0xFC, 0x04, 0xFC, 0x04, 0xFE, 0x04, 0xFE, 0x04,
+	0x00, 0x05, 0x00, 0x05, 0x02, 0x05, 0x02, 0x05, 0x04, 0x05, 0x04, 0x05, 0x06, 0x05, 0x06, 0x05,
+	0x08, 0x05, 0x08, 0x05, 0x0A, 0x05, 0x0A, 0x05, 0x0C, 0x05, 0x0C, 0x05, 0x0E, 0x05, 0x0E, 0x05,
+	0x10, 0x05, 0x10, 0x05, 0x12, 0x05, 0x12, 0x05, 0x14, 0x05, 0x15, 0x05, 0x16, 0x05, 0x17, 0x05,
+	0x18, 0x05, 0x19, 0x05, 0x1A, 0x05, 0x1B, 0x05, 0x1C, 0x05, 0x1D, 0x05, 0x1E, 0x05, 0x1F, 0x05,
+	0x20, 0x05, 0x21, 0x05, 0x22, 0x05, 0x23, 0x05, 0x24, 0x05, 0x25, 0x05, 0x26, 0x05, 0x27, 0x05,
+	0x28, 0x05, 0x29, 0x05, 0x2A, 0x05, 0x2B, 0x05, 0x2C, 0x05, 0x2D, 0x05, 0x2E, 0x05, 0x2F, 0x05,
+	0x30, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+	0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+	0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+	0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+	0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0x57, 0x05,
+	0x58, 0x05, 0x59, 0x05, 0x5A, 0x05, 0x5B, 0x05, 0x5C, 0x05, 0x5D, 0x05, 0x5E, 0x05, 0x5F, 0x05,
+	0x60, 0x05, 0x31, 0x05, 0x32, 0x05, 0x33, 0x05, 0x34, 0x05, 0x35, 0x05, 0x36, 0x05, 0x37, 0x05,
+	0x38, 0x05, 0x39, 0x05, 0x3A, 0x05, 0x3B, 0x05, 0x3C, 0x05, 0x3D, 0x05, 0x3E, 0x05, 0x3F, 0x05,
+	0x40, 0x05, 0x41, 0x05, 0x42, 0x05, 0x43, 0x05, 0x44, 0x05, 0x45, 0x05, 0x46, 0x05, 0x47, 0x05,
+	0x48, 0x05, 0x49, 0x05, 0x4A, 0x05, 0x4B, 0x05, 0x4C, 0x05, 0x4D, 0x05, 0x4E, 0x05, 0x4F, 0x05,
+	0x50, 0x05, 0x51, 0x05, 0x52, 0x05, 0x53, 0x05, 0x54, 0x05, 0x55, 0x05, 0x56, 0x05, 0xFF, 0xFF,
+	0xF6, 0x17, 0x63, 0x2C, 0x7E, 0x1D, 0x7F, 0x1D, 0x80, 0x1D, 0x81, 0x1D, 0x82, 0x1D, 0x83, 0x1D,
+	0x84, 0x1D, 0x85, 0x1D, 0x86, 0x1D, 0x87, 0x1D, 0x88, 0x1D, 0x89, 0x1D, 0x8A, 0x1D, 0x8B, 0x1D,
+	0x8C, 0x1D, 0x8D, 0x1D, 0x8E, 0x1D, 0x8F, 0x1D, 0x90, 0x1D, 0x91, 0x1D, 0x92, 0x1D, 0x93, 0x1D,
+	0x94, 0x1D, 0x95, 0x1D, 0x96, 0x1D, 0x97, 0x1D, 0x98, 0x1D, 0x99, 0x1D, 0x9A, 0x1D, 0x9B, 0x1D,
+	0x9C, 0x1D, 0x9D, 0x1D, 0x9E, 0x1D, 0x9F, 0x1D, 0xA0, 0x1D, 0xA1, 0x1D, 0xA2, 0x1D, 0xA3, 0x1D,
+	0xA4, 0x1D, 0xA5, 0x1D, 0xA6, 0x1D, 0xA7, 0x1D, 0xA8, 0x1D, 0xA9, 0x1D, 0xAA, 0x1D, 0xAB, 0x1D,
+	0xAC, 0x1D, 0xAD, 0x1D, 0xAE, 0x1D, 0xAF, 0x1D, 0xB0, 0x1D, 0xB1, 0x1D, 0xB2, 0x1D, 0xB3, 0x1D,
+	0xB4, 0x1D, 0xB5, 0x1D, 0xB6, 0x1D, 0xB7, 0x1D, 0xB8, 0x1D, 0xB9, 0x1D, 0xBA, 0x1D, 0xBB, 0x1D,
+	0xBC, 0x1D, 0xBD, 0x1D, 0xBE, 0x1D, 0xBF, 0x1D, 0xC0, 0x1D, 0xC1, 0x1D, 0xC2, 0x1D, 0xC3, 0x1D,
+	0xC4, 0x1D, 0xC5, 0x1D, 0xC6, 0x1D, 0xC7, 0x1D, 0xC8, 0x1D, 0xC9, 0x1D, 0xCA, 0x1D, 0xCB, 0x1D,
+	0xCC, 0x1D, 0xCD, 0x1D, 0xCE, 0x1D, 0xCF, 0x1D, 0xD0, 0x1D, 0xD1, 0x1D, 0xD2, 0x1D, 0xD3, 0x1D,
+	0xD4, 0x1D, 0xD5, 0x1D, 0xD6, 0x1D, 0xD7, 0x1D, 0xD8, 0x1D, 0xD9, 0x1D, 0xDA, 0x1D, 0xDB, 0x1D,
+	0xDC, 0x1D, 0xDD, 0x1D, 0xDE, 0x1D, 0xDF, 0x1D, 0xE0, 0x1D, 0xE1, 0x1D, 0xE2, 0x1D, 0xE3, 0x1D,
+	0xE4, 0x1D, 0xE5, 0x1D, 0xE6, 0x1D, 0xE7, 0x1D, 0xE8, 0x1D, 0xE9, 0x1D, 0xEA, 0x1D, 0xEB, 0x1D,
+	0xEC, 0x1D, 0xED, 0x1D, 0xEE, 0x1D, 0xEF, 0x1D, 0xF0, 0x1D, 0xF1, 0x1D, 0xF2, 0x1D, 0xF3, 0x1D,
+	0xF4, 0x1D, 0xF5, 0x1D, 0xF6, 0x1D, 0xF7, 0x1D, 0xF8, 0x1D, 0xF9, 0x1D, 0xFA, 0x1D, 0xFB, 0x1D,
+	0xFC, 0x1D, 0xFD, 0x1D, 0xFE, 0x1D, 0xFF, 0x1D, 0x00, 0x1E, 0x00, 0x1E, 0x02, 0x1E, 0x02, 0x1E,
+	0x04, 0x1E, 0x04, 0x1E, 0x06, 0x1E, 0x06, 0x1E, 0x08, 0x1E, 0x08, 0x1E, 0x0A, 0x1E, 0x0A, 0x1E,
+	0x0C, 0x1E, 0x0C, 0x1E, 0x0E, 0x1E, 0x0E, 0x1E, 0x10, 0x1E, 0x10, 0x1E, 0x12, 0x1E, 0x12, 0x1E,
+	0x14, 0x1E, 0x14, 0x1E, 0x16, 0x1E, 0x16, 0x1E, 0x18, 0x1E, 0x18, 0x1E, 0x1A, 0x1E, 0x1A, 0x1E,
+	0x1C, 0x1E, 0x1C, 0x1E, 0x1E, 0x1E, 0x1E, 0x1E, 0x20, 0x1E, 0x20, 0x1E, 0x22, 0x1E, 0x22, 0x1E,
+	0x24, 0x1E, 0x24, 0x1E, 0x26, 0x1E, 0x26, 0x1E, 0x28, 0x1E, 0x28, 0x1E, 0x2A, 0x1E, 0x2A, 0x1E,
+	0x2C, 0x1E, 0x2C, 0x1E, 0x2E, 0x1E, 0x2E, 0x1E, 0x30, 0x1E, 0x30, 0x1E, 0x32, 0x1E, 0x32, 0x1E,
+	0x34, 0x1E, 0x34, 0x1E, 0x36, 0x1E, 0x36, 0x1E, 0x38, 0x1E, 0x38, 0x1E, 0x3A, 0x1E, 0x3A, 0x1E,
+	0x3C, 0x1E, 0x3C, 0x1E, 0x3E, 0x1E, 0x3E, 0x1E, 0x40, 0x1E, 0x40, 0x1E, 0x42, 0x1E, 0x42, 0x1E,
+	0x44, 0x1E, 0x44, 0x1E, 0x46, 0x1E, 0x46, 0x1E, 0x48, 0x1E, 0x48, 0x1E, 0x4A, 0x1E, 0x4A, 0x1E,
+	0x4C, 0x1E, 0x4C, 0x1E, 0x4E, 0x1E, 0x4E, 0x1E, 0x50, 0x1E, 0x50, 0x1E, 0x52, 0x1E, 0x52, 0x1E,
+	0x54, 0x1E, 0x54, 0x1E, 0x56, 0x1E, 0x56, 0x1E, 0x58, 0x1E, 0x58, 0x1E, 0x5A, 0x1E, 0x5A, 0x1E,
+	0x5C, 0x1E, 0x5C, 0x1E, 0x5E, 0x1E, 0x5E, 0x1E, 0x60, 0x1E, 0x60, 0x1E, 0x62, 0x1E, 0x62, 0x1E,
+	0x64, 0x1E, 0x64, 0x1E, 0x66, 0x1E, 0x66, 0x1E, 0x68, 0x1E, 0x68, 0x1E, 0x6A, 0x1E, 0x6A, 0x1E,
+	0x6C, 0x1E, 0x6C, 0x1E, 0x6E, 0x1E, 0x6E, 0x1E, 0x70, 0x1E, 0x70, 0x1E, 0x72, 0x1E, 0x72, 0x1E,
+	0x74, 0x1E, 0x74, 0x1E, 0x76, 0x1E, 0x76, 0x1E, 0x78, 0x1E, 0x78, 0x1E, 0x7A, 0x1E, 0x7A, 0x1E,
+	0x7C, 0x1E, 0x7C, 0x1E, 0x7E, 0x1E, 0x7E, 0x1E, 0x80, 0x1E, 0x80, 0x1E, 0x82, 0x1E, 0x82, 0x1E,
+	0x84, 0x1E, 0x84, 0x1E, 0x86, 0x1E, 0x86, 0x1E, 0x88, 0x1E, 0x88, 0x1E, 0x8A, 0x1E, 0x8A, 0x1E,
+	0x8C, 0x1E, 0x8C, 0x1E, 0x8E, 0x1E, 0x8E, 0x1E, 0x90, 0x1E, 0x90, 0x1E, 0x92, 0x1E, 0x92, 0x1E,
+	0x94, 0x1E, 0x94, 0x1E, 0x96, 0x1E, 0x97, 0x1E, 0x98, 0x1E, 0x99, 0x1E, 0x9A, 0x1E, 0x9B, 0x1E,
+	0x9C, 0x1E, 0x9D, 0x1E, 0x9E, 0x1E, 0x9F, 0x1E, 0xA0, 0x1E, 0xA0, 0x1E, 0xA2, 0x1E, 0xA2, 0x1E,
+	0xA4, 0x1E, 0xA4, 0x1E, 0xA6, 0x1E, 0xA6, 0x1E, 0xA8, 0x1E, 0xA8, 0x1E, 0xAA, 0x1E, 0xAA, 0x1E,
+	0xAC, 0x1E, 0xAC, 0x1E, 0xAE, 0x1E, 0xAE, 0x1E, 0xB0, 0x1E, 0xB0, 0x1E, 0xB2, 0x1E, 0xB2, 0x1E,
+	0xB4, 0x1E, 0xB4, 0x1E, 0xB6, 0x1E, 0xB6, 0x1E, 0xB8, 0x1E, 0xB8, 0x1E, 0xBA, 0x1E, 0xBA, 0x1E,
+	0xBC, 0x1E, 0xBC, 0x1E, 0xBE, 0x1E, 0xBE, 0x1E, 0xC0, 0x1E, 0xC0, 0x1E, 0xC2, 0x1E, 0xC2, 0x1E,
+	0xC4, 0x1E, 0xC4, 0x1E, 0xC6, 0x1E, 0xC6, 0x1E, 0xC8, 0x1E, 0xC8, 0x1E, 0xCA, 0x1E, 0xCA, 0x1E,
+	0xCC, 0x1E, 0xCC, 0x1E, 0xCE, 0x1E, 0xCE, 0x1E, 0xD0, 0x1E, 0xD0, 0x1E, 0xD2, 0x1E, 0xD2, 0x1E,
+	0xD4, 0x1E, 0xD4, 0x1E, 0xD6, 0x1E, 0xD6, 0x1E, 0xD8, 0x1E, 0xD8, 0x1E, 0xDA, 0x1E, 0xDA, 0x1E,
+	0xDC, 0x1E, 0xDC, 0x1E, 0xDE, 0x1E, 0xDE, 0x1E, 0xE0, 0x1E, 0xE0, 0x1E, 0xE2, 0x1E, 0xE2, 0x1E,
+	0xE4, 0x1E, 0xE4, 0x1E, 0xE6, 0x1E, 0xE6, 0x1E, 0xE8, 0x1E, 0xE8, 0x1E, 0xEA, 0x1E, 0xEA, 0x1E,
+	0xEC, 0x1E, 0xEC, 0x1E, 0xEE, 0x1E, 0xEE, 0x1E, 0xF0, 0x1E, 0xF0, 0x1E, 0xF2, 0x1E, 0xF2, 0x1E,
+	0xF4, 0x1E, 0xF4, 0x1E, 0xF6, 0x1E, 0xF6, 0x1E, 0xF8, 0x1E, 0xF8, 0x1E, 0xFA, 0x1E, 0xFB, 0x1E,
+	0xFC, 0x1E, 0xFD, 0x1E, 0xFE, 0x1E, 0xFF, 0x1E, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+	0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x08, 0x1F, 0x09, 0x1F, 0x0A, 0x1F, 0x0B, 0x1F,
+	0x0C, 0x1F, 0x0D, 0x1F, 0x0E, 0x1F, 0x0F, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+	0x1C, 0x1F, 0x1D, 0x1F, 0x16, 0x1F, 0x17, 0x1F, 0x18, 0x1F, 0x19, 0x1F, 0x1A, 0x1F, 0x1B, 0x1F,
+	0x1C, 0x1F, 0x1D, 0x1F, 0x1E, 0x1F, 0x1F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+	0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x28, 0x1F, 0x29, 0x1F, 0x2A, 0x1F, 0x2B, 0x1F,
+	0x2C, 0x1F, 0x2D, 0x1F, 0x2E, 0x1F, 0x2F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+	0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x38, 0x1F, 0x39, 0x1F, 0x3A, 0x1F, 0x3B, 0x1F,
+	0x3C, 0x1F, 0x3D, 0x1F, 0x3E, 0x1F, 0x3F, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+	0x4C, 0x1F, 0x4D, 0x1F, 0x46, 0x1F, 0x47, 0x1F, 0x48, 0x1F, 0x49, 0x1F, 0x4A, 0x1F, 0x4B, 0x1F,
+	0x4C, 0x1F, 0x4D, 0x1F, 0x4E, 0x1F, 0x4F, 0x1F, 0x50, 0x1F, 0x59, 0x1F, 0x52, 0x1F, 0x5B, 0x1F,
+	0x54, 0x1F, 0x5D, 0x1F, 0x56, 0x1F, 0x5F, 0x1F, 0x58, 0x1F, 0x59, 0x1F, 0x5A, 0x1F, 0x5B, 0x1F,
+	0x5C, 0x1F, 0x5D, 0x1F, 0x5E, 0x1F, 0x5F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+	0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0x68, 0x1F, 0x69, 0x1F, 0x6A, 0x1F, 0x6B, 0x1F,
+	0x6C, 0x1F, 0x6D, 0x1F, 0x6E, 0x1F, 0x6F, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F,
+	0xCA, 0x1F, 0xCB, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+	0xFA, 0x1F, 0xFB, 0x1F, 0x7E, 0x1F, 0x7F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+	0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x88, 0x1F, 0x89, 0x1F, 0x8A, 0x1F, 0x8B, 0x1F,
+	0x8C, 0x1F, 0x8D, 0x1F, 0x8E, 0x1F, 0x8F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+	0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0x98, 0x1F, 0x99, 0x1F, 0x9A, 0x1F, 0x9B, 0x1F,
+	0x9C, 0x1F, 0x9D, 0x1F, 0x9E, 0x1F, 0x9F, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+	0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xA8, 0x1F, 0xA9, 0x1F, 0xAA, 0x1F, 0xAB, 0x1F,
+	0xAC, 0x1F, 0xAD, 0x1F, 0xAE, 0x1F, 0xAF, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xB2, 0x1F, 0xBC, 0x1F,
+	0xB4, 0x1F, 0xB5, 0x1F, 0xB6, 0x1F, 0xB7, 0x1F, 0xB8, 0x1F, 0xB9, 0x1F, 0xBA, 0x1F, 0xBB, 0x1F,
+	0xBC, 0x1F, 0xBD, 0x1F, 0xBE, 0x1F, 0xBF, 0x1F, 0xC0, 0x1F, 0xC1, 0x1F, 0xC2, 0x1F, 0xC3, 0x1F,
+	0xC4, 0x1F, 0xC5, 0x1F, 0xC6, 0x1F, 0xC7, 0x1F, 0xC8, 0x1F, 0xC9, 0x1F, 0xCA, 0x1F, 0xCB, 0x1F,
+	0xC3, 0x1F, 0xCD, 0x1F, 0xCE, 0x1F, 0xCF, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xD2, 0x1F, 0xD3, 0x1F,
+	0xD4, 0x1F, 0xD5, 0x1F, 0xD6, 0x1F, 0xD7, 0x1F, 0xD8, 0x1F, 0xD9, 0x1F, 0xDA, 0x1F, 0xDB, 0x1F,
+	0xDC, 0x1F, 0xDD, 0x1F, 0xDE, 0x1F, 0xDF, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xE2, 0x1F, 0xE3, 0x1F,
+	0xE4, 0x1F, 0xEC, 0x1F, 0xE6, 0x1F, 0xE7, 0x1F, 0xE8, 0x1F, 0xE9, 0x1F, 0xEA, 0x1F, 0xEB, 0x1F,
+	0xEC, 0x1F, 0xED, 0x1F, 0xEE, 0x1F, 0xEF, 0x1F, 0xF0, 0x1F, 0xF1, 0x1F, 0xF2, 0x1F, 0xF3, 0x1F,
+	0xF4, 0x1F, 0xF5, 0x1F, 0xF6, 0x1F, 0xF7, 0x1F, 0xF8, 0x1F, 0xF9, 0x1F, 0xFA, 0x1F, 0xFB, 0x1F,
+	0xF3, 0x1F, 0xFD, 0x1F, 0xFE, 0x1F, 0xFF, 0x1F, 0x00, 0x20, 0x01, 0x20, 0x02, 0x20, 0x03, 0x20,
+	0x04, 0x20, 0x05, 0x20, 0x06, 0x20, 0x07, 0x20, 0x08, 0x20, 0x09, 0x20, 0x0A, 0x20, 0x0B, 0x20,
+	0x0C, 0x20, 0x0D, 0x20, 0x0E, 0x20, 0x0F, 0x20, 0x10, 0x20, 0x11, 0x20, 0x12, 0x20, 0x13, 0x20,
+	0x14, 0x20, 0x15, 0x20, 0x16, 0x20, 0x17, 0x20, 0x18, 0x20, 0x19, 0x20, 0x1A, 0x20, 0x1B, 0x20,
+	0x1C, 0x20, 0x1D, 0x20, 0x1E, 0x20, 0x1F, 0x20, 0x20, 0x20, 0x21, 0x20, 0x22, 0x20, 0x23, 0x20,
+	0x24, 0x20, 0x25, 0x20, 0x26, 0x20, 0x27, 0x20, 0x28, 0x20, 0x29, 0x20, 0x2A, 0x20, 0x2B, 0x20,
+	0x2C, 0x20, 0x2D, 0x20, 0x2E, 0x20, 0x2F, 0x20, 0x30, 0x20, 0x31, 0x20, 0x32, 0x20, 0x33, 0x20,
+	0x34, 0x20, 0x35, 0x20, 0x36, 0x20, 0x37, 0x20, 0x38, 0x20, 0x39, 0x20, 0x3A, 0x20, 0x3B, 0x20,
+	0x3C, 0x20, 0x3D, 0x20, 0x3E, 0x20, 0x3F, 0x20, 0x40, 0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20,
+	0x44, 0x20, 0x45, 0x20, 0x46, 0x20, 0x47, 0x20, 0x48, 0x20, 0x49, 0x20, 0x4A, 0x20, 0x4B, 0x20,
+	0x4C, 0x20, 0x4D, 0x20, 0x4E, 0x20, 0x4F, 0x20, 0x50, 0x20, 0x51, 0x20, 0x52, 0x20, 0x53, 0x20,
+	0x54, 0x20, 0x55, 0x20, 0x56, 0x20, 0x57, 0x20, 0x58, 0x20, 0x59, 0x20, 0x5A, 0x20, 0x5B, 0x20,
+	0x5C, 0x20, 0x5D, 0x20, 0x5E, 0x20, 0x5F, 0x20, 0x60, 0x20, 0x61, 0x20, 0x62, 0x20, 0x63, 0x20,
+	0x64, 0x20, 0x65, 0x20, 0x66, 0x20, 0x67, 0x20, 0x68, 0x20, 0x69, 0x20, 0x6A, 0x20, 0x6B, 0x20,
+	0x6C, 0x20, 0x6D, 0x20, 0x6E, 0x20, 0x6F, 0x20, 0x70, 0x20, 0x71, 0x20, 0x72, 0x20, 0x73, 0x20,
+	0x74, 0x20, 0x75, 0x20, 0x76, 0x20, 0x77, 0x20, 0x78, 0x20, 0x79, 0x20, 0x7A, 0x20, 0x7B, 0x20,
+	0x7C, 0x20, 0x7D, 0x20, 0x7E, 0x20, 0x7F, 0x20, 0x80, 0x20, 0x81, 0x20, 0x82, 0x20, 0x83, 0x20,
+	0x84, 0x20, 0x85, 0x20, 0x86, 0x20, 0x87, 0x20, 0x88, 0x20, 0x89, 0x20, 0x8A, 0x20, 0x8B, 0x20,
+	0x8C, 0x20, 0x8D, 0x20, 0x8E, 0x20, 0x8F, 0x20, 0x90, 0x20, 0x91, 0x20, 0x92, 0x20, 0x93, 0x20,
+	0x94, 0x20, 0x95, 0x20, 0x96, 0x20, 0x97, 0x20, 0x98, 0x20, 0x99, 0x20, 0x9A, 0x20, 0x9B, 0x20,
+	0x9C, 0x20, 0x9D, 0x20, 0x9E, 0x20, 0x9F, 0x20, 0xA0, 0x20, 0xA1, 0x20, 0xA2, 0x20, 0xA3, 0x20,
+	0xA4, 0x20, 0xA5, 0x20, 0xA6, 0x20, 0xA7, 0x20, 0xA8, 0x20, 0xA9, 0x20, 0xAA, 0x20, 0xAB, 0x20,
+	0xAC, 0x20, 0xAD, 0x20, 0xAE, 0x20, 0xAF, 0x20, 0xB0, 0x20, 0xB1, 0x20, 0xB2, 0x20, 0xB3, 0x20,
+	0xB4, 0x20, 0xB5, 0x20, 0xB6, 0x20, 0xB7, 0x20, 0xB8, 0x20, 0xB9, 0x20, 0xBA, 0x20, 0xBB, 0x20,
+	0xBC, 0x20, 0xBD, 0x20, 0xBE, 0x20, 0xBF, 0x20, 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20,
+	0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20, 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20,
+	0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20, 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20,
+	0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20, 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20,
+	0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20, 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20,
+	0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20, 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20,
+	0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20, 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20,
+	0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20, 0xF8, 0x20, 0xF9, 0x20, 0xFA, 0x20, 0xFB, 0x20,
+	0xFC, 0x20, 0xFD, 0x20, 0xFE, 0x20, 0xFF, 0x20, 0x00, 0x21, 0x01, 0x21, 0x02, 0x21, 0x03, 0x21,
+	0x04, 0x21, 0x05, 0x21, 0x06, 0x21, 0x07, 0x21, 0x08, 0x21, 0x09, 0x21, 0x0A, 0x21, 0x0B, 0x21,
+	0x0C, 0x21, 0x0D, 0x21, 0x0E, 0x21, 0x0F, 0x21, 0x10, 0x21, 0x11, 0x21, 0x12, 0x21, 0x13, 0x21,
+	0x14, 0x21, 0x15, 0x21, 0x16, 0x21, 0x17, 0x21, 0x18, 0x21, 0x19, 0x21, 0x1A, 0x21, 0x1B, 0x21,
+	0x1C, 0x21, 0x1D, 0x21, 0x1E, 0x21, 0x1F, 0x21, 0x20, 0x21, 0x21, 0x21, 0x22, 0x21, 0x23, 0x21,
+	0x24, 0x21, 0x25, 0x21, 0x26, 0x21, 0x27, 0x21, 0x28, 0x21, 0x29, 0x21, 0x2A, 0x21, 0x2B, 0x21,
+	0x2C, 0x21, 0x2D, 0x21, 0x2E, 0x21, 0x2F, 0x21, 0x30, 0x21, 0x31, 0x21, 0x32, 0x21, 0x33, 0x21,
+	0x34, 0x21, 0x35, 0x21, 0x36, 0x21, 0x37, 0x21, 0x38, 0x21, 0x39, 0x21, 0x3A, 0x21, 0x3B, 0x21,
+	0x3C, 0x21, 0x3D, 0x21, 0x3E, 0x21, 0x3F, 0x21, 0x40, 0x21, 0x41, 0x21, 0x42, 0x21, 0x43, 0x21,
+	0x44, 0x21, 0x45, 0x21, 0x46, 0x21, 0x47, 0x21, 0x48, 0x21, 0x49, 0x21, 0x4A, 0x21, 0x4B, 0x21,
+	0x4C, 0x21, 0x4D, 0x21, 0x32, 0x21, 0x4F, 0x21, 0x50, 0x21, 0x51, 0x21, 0x52, 0x21, 0x53, 0x21,
+	0x54, 0x21, 0x55, 0x21, 0x56, 0x21, 0x57, 0x21, 0x58, 0x21, 0x59, 0x21, 0x5A, 0x21, 0x5B, 0x21,
+	0x5C, 0x21, 0x5D, 0x21, 0x5E, 0x21, 0x5F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+	0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+	0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x60, 0x21, 0x61, 0x21, 0x62, 0x21, 0x63, 0x21,
+	0x64, 0x21, 0x65, 0x21, 0x66, 0x21, 0x67, 0x21, 0x68, 0x21, 0x69, 0x21, 0x6A, 0x21, 0x6B, 0x21,
+	0x6C, 0x21, 0x6D, 0x21, 0x6E, 0x21, 0x6F, 0x21, 0x80, 0x21, 0x81, 0x21, 0x82, 0x21, 0x83, 0x21,
+	0x83, 0x21, 0xFF, 0xFF, 0x4B, 0x03, 0xB6, 0x24, 0xB7, 0x24, 0xB8, 0x24, 0xB9, 0x24, 0xBA, 0x24,
+	0xBB, 0x24, 0xBC, 0x24, 0xBD, 0x24, 0xBE, 0x24, 0xBF, 0x24, 0xC0, 0x24, 0xC1, 0x24, 0xC2, 0x24,
+	0xC3, 0x24, 0xC4, 0x24, 0xC5, 0x24, 0xC6, 0x24, 0xC7, 0x24, 0xC8, 0x24, 0xC9, 0x24, 0xCA, 0x24,
+	0xCB, 0x24, 0xCC, 0x24, 0xCD, 0x24, 0xCE, 0x24, 0xCF, 0x24, 0xFF, 0xFF, 0x46, 0x07, 0x00, 0x2C,
+	0x01, 0x2C, 0x02, 0x2C, 0x03, 0x2C, 0x04, 0x2C, 0x05, 0x2C, 0x06, 0x2C, 0x07, 0x2C, 0x08, 0x2C,
+	0x09, 0x2C, 0x0A, 0x2C, 0x0B, 0x2C, 0x0C, 0x2C, 0x0D, 0x2C, 0x0E, 0x2C, 0x0F, 0x2C, 0x10, 0x2C,
+	0x11, 0x2C, 0x12, 0x2C, 0x13, 0x2C, 0x14, 0x2C, 0x15, 0x2C, 0x16, 0x2C, 0x17, 0x2C, 0x18, 0x2C,
+	0x19, 0x2C, 0x1A, 0x2C, 0x1B, 0x2C, 0x1C, 0x2C, 0x1D, 0x2C, 0x1E, 0x2C, 0x1F, 0x2C, 0x20, 0x2C,
+	0x21, 0x2C, 0x22, 0x2C, 0x23, 0x2C, 0x24, 0x2C, 0x25, 0x2C, 0x26, 0x2C, 0x27, 0x2C, 0x28, 0x2C,
+	0x29, 0x2C, 0x2A, 0x2C, 0x2B, 0x2C, 0x2C, 0x2C, 0x2D, 0x2C, 0x2E, 0x2C, 0x5F, 0x2C, 0x60, 0x2C,
+	0x60, 0x2C, 0x62, 0x2C, 0x63, 0x2C, 0x64, 0x2C, 0x65, 0x2C, 0x66, 0x2C, 0x67, 0x2C, 0x67, 0x2C,
+	0x69, 0x2C, 0x69, 0x2C, 0x6B, 0x2C, 0x6B, 0x2C, 0x6D, 0x2C, 0x6E, 0x2C, 0x6F, 0x2C, 0x70, 0x2C,
+	0x71, 0x2C, 0x72, 0x2C, 0x73, 0x2C, 0x74, 0x2C, 0x75, 0x2C, 0x75, 0x2C, 0x77, 0x2C, 0x78, 0x2C,
+	0x79, 0x2C, 0x7A, 0x2C, 0x7B, 0x2C, 0x7C, 0x2C, 0x7D, 0x2C, 0x7E, 0x2C, 0x7F, 0x2C, 0x80, 0x2C,
+	0x80, 0x2C, 0x82, 0x2C, 0x82, 0x2C, 0x84, 0x2C, 0x84, 0x2C, 0x86, 0x2C, 0x86, 0x2C, 0x88, 0x2C,
+	0x88, 0x2C, 0x8A, 0x2C, 0x8A, 0x2C, 0x8C, 0x2C, 0x8C, 0x2C, 0x8E, 0x2C, 0x8E, 0x2C, 0x90, 0x2C,
+	0x90, 0x2C, 0x92, 0x2C, 0x92, 0x2C, 0x94, 0x2C, 0x94, 0x2C, 0x96, 0x2C, 0x96, 0x2C, 0x98, 0x2C,
+	0x98, 0x2C, 0x9A, 0x2C, 0x9A, 0x2C, 0x9C, 0x2C, 0x9C, 0x2C, 0x9E, 0x2C, 0x9E, 0x2C, 0xA0, 0x2C,
+	0xA0, 0x2C, 0xA2, 0x2C, 0xA2, 0x2C, 0xA4, 0x2C, 0xA4, 0x2C, 0xA6, 0x2C, 0xA6, 0x2C, 0xA8, 0x2C,
+	0xA8, 0x2C, 0xAA, 0x2C, 0xAA, 0x2C, 0xAC, 0x2C, 0xAC, 0x2C, 0xAE, 0x2C, 0xAE, 0x2C, 0xB0, 0x2C,
+	0xB0, 0x2C, 0xB2, 0x2C, 0xB2, 0x2C, 0xB4, 0x2C, 0xB4, 0x2C, 0xB6, 0x2C, 0xB6, 0x2C, 0xB8, 0x2C,
+	0xB8, 0x2C, 0xBA, 0x2C, 0xBA, 0x2C, 0xBC, 0x2C, 0xBC, 0x2C, 0xBE, 0x2C, 0xBE, 0x2C, 0xC0, 0x2C,
+	0xC0, 0x2C, 0xC2, 0x2C, 0xC2, 0x2C, 0xC4, 0x2C, 0xC4, 0x2C, 0xC6, 0x2C, 0xC6, 0x2C, 0xC8, 0x2C,
+	0xC8, 0x2C, 0xCA, 0x2C, 0xCA, 0x2C, 0xCC, 0x2C, 0xCC, 0x2C, 0xCE, 0x2C, 0xCE, 0x2C, 0xD0, 0x2C,
+	0xD0, 0x2C, 0xD2, 0x2C, 0xD2, 0x2C, 0xD4, 0x2C, 0xD4, 0x2C, 0xD6, 0x2C, 0xD6, 0x2C, 0xD8, 0x2C,
+	0xD8, 0x2C, 0xDA, 0x2C, 0xDA, 0x2C, 0xDC, 0x2C, 0xDC, 0x2C, 0xDE, 0x2C, 0xDE, 0x2C, 0xE0, 0x2C,
+	0xE0, 0x2C, 0xE2, 0x2C, 0xE2, 0x2C, 0xE4, 0x2C, 0xE5, 0x2C, 0xE6, 0x2C, 0xE7, 0x2C, 0xE8, 0x2C,
+	0xE9, 0x2C, 0xEA, 0x2C, 0xEB, 0x2C, 0xEC, 0x2C, 0xED, 0x2C, 0xEE, 0x2C, 0xEF, 0x2C, 0xF0, 0x2C,
+	0xF1, 0x2C, 0xF2, 0x2C, 0xF3, 0x2C, 0xF4, 0x2C, 0xF5, 0x2C, 0xF6, 0x2C, 0xF7, 0x2C, 0xF8, 0x2C,
+	0xF9, 0x2C, 0xFA, 0x2C, 0xFB, 0x2C, 0xFC, 0x2C, 0xFD, 0x2C, 0xFE, 0x2C, 0xFF, 0x2C, 0xA0, 0x10,
+	0xA1, 0x10, 0xA2, 0x10, 0xA3, 0x10, 0xA4, 0x10, 0xA5, 0x10, 0xA6, 0x10, 0xA7, 0x10, 0xA8, 0x10,
+	0xA9, 0x10, 0xAA, 0x10, 0xAB, 0x10, 0xAC, 0x10, 0xAD, 0x10, 0xAE, 0x10, 0xAF, 0x10, 0xB0, 0x10,
+	0xB1, 0x10, 0xB2, 0x10, 0xB3, 0x10, 0xB4, 0x10, 0xB5, 0x10, 0xB6, 0x10, 0xB7, 0x10, 0xB8, 0x10,
+	0xB9, 0x10, 0xBA, 0x10, 0xBB, 0x10, 0xBC, 0x10, 0xBD, 0x10, 0xBE, 0x10, 0xBF, 0x10, 0xC0, 0x10,
+	0xC1, 0x10, 0xC2, 0x10, 0xC3, 0x10, 0xC4, 0x10, 0xC5, 0x10, 0xFF, 0xFF, 0x1B, 0xD2, 0x21, 0xFF,
+	0x22, 0xFF, 0x23, 0xFF, 0x24, 0xFF, 0x25, 0xFF, 0x26, 0xFF, 0x27, 0xFF, 0x28, 0xFF, 0x29, 0xFF,
+	0x2A, 0xFF, 0x2B, 0xFF, 0x2C, 0xFF, 0x2D, 0xFF, 0x2E, 0xFF, 0x2F, 0xFF, 0x30, 0xFF, 0x31, 0xFF,
+	0x32, 0xFF, 0x33, 0xFF, 0x34, 0xFF, 0x35, 0xFF, 0x36, 0xFF, 0x37, 0xFF, 0x38, 0xFF, 0x39, 0xFF,
+	0x3A, 0xFF, 0x5B, 0xFF, 0x5C, 0xFF, 0x5D, 0xFF, 0x5E, 0xFF, 0x5F, 0xFF, 0x60, 0xFF, 0x61, 0xFF,
+	0x62, 0xFF, 0x63, 0xFF, 0x64, 0xFF, 0x65, 0xFF, 0x66, 0xFF, 0x67, 0xFF, 0x68, 0xFF, 0x69, 0xFF,
+	0x6A, 0xFF, 0x6B, 0xFF, 0x6C, 0xFF, 0x6D, 0xFF, 0x6E, 0xFF, 0x6F, 0xFF, 0x70, 0xFF, 0x71, 0xFF,
+	0x72, 0xFF, 0x73, 0xFF, 0x74, 0xFF, 0x75, 0xFF, 0x76, 0xFF, 0x77, 0xFF, 0x78, 0xFF, 0x79, 0xFF,
+	0x7A, 0xFF, 0x7B, 0xFF, 0x7C, 0xFF, 0x7D, 0xFF, 0x7E, 0xFF, 0x7F, 0xFF, 0x80, 0xFF, 0x81, 0xFF,
+	0x82, 0xFF, 0x83, 0xFF, 0x84, 0xFF, 0x85, 0xFF, 0x86, 0xFF, 0x87, 0xFF, 0x88, 0xFF, 0x89, 0xFF,
+	0x8A, 0xFF, 0x8B, 0xFF, 0x8C, 0xFF, 0x8D, 0xFF, 0x8E, 0xFF, 0x8F, 0xFF, 0x90, 0xFF, 0x91, 0xFF,
+	0x92, 0xFF, 0x93, 0xFF, 0x94, 0xFF, 0x95, 0xFF, 0x96, 0xFF, 0x97, 0xFF, 0x98, 0xFF, 0x99, 0xFF,
+	0x9A, 0xFF, 0x9B, 0xFF, 0x9C, 0xFF, 0x9D, 0xFF, 0x9E, 0xFF, 0x9F, 0xFF, 0xA0, 0xFF, 0xA1, 0xFF,
+	0xA2, 0xFF, 0xA3, 0xFF, 0xA4, 0xFF, 0xA5, 0xFF, 0xA6, 0xFF, 0xA7, 0xFF, 0xA8, 0xFF, 0xA9, 0xFF,
+	0xAA, 0xFF, 0xAB, 0xFF, 0xAC, 0xFF, 0xAD, 0xFF, 0xAE, 0xFF, 0xAF, 0xFF, 0xB0, 0xFF, 0xB1, 0xFF,
+	0xB2, 0xFF, 0xB3, 0xFF, 0xB4, 0xFF, 0xB5, 0xFF, 0xB6, 0xFF, 0xB7, 0xFF, 0xB8, 0xFF, 0xB9, 0xFF,
+	0xBA, 0xFF, 0xBB, 0xFF, 0xBC, 0xFF, 0xBD, 0xFF, 0xBE, 0xFF, 0xBF, 0xFF, 0xC0, 0xFF, 0xC1, 0xFF,
+	0xC2, 0xFF, 0xC3, 0xFF, 0xC4, 0xFF, 0xC5, 0xFF, 0xC6, 0xFF, 0xC7, 0xFF, 0xC8, 0xFF, 0xC9, 0xFF,
+	0xCA, 0xFF, 0xCB, 0xFF, 0xCC, 0xFF, 0xCD, 0xFF, 0xCE, 0xFF, 0xCF, 0xFF, 0xD0, 0xFF, 0xD1, 0xFF,
+	0xD2, 0xFF, 0xD3, 0xFF, 0xD4, 0xFF, 0xD5, 0xFF, 0xD6, 0xFF, 0xD7, 0xFF, 0xD8, 0xFF, 0xD9, 0xFF,
+	0xDA, 0xFF, 0xDB, 0xFF, 0xDC, 0xFF, 0xDD, 0xFF, 0xDE, 0xFF, 0xDF, 0xFF, 0xE0, 0xFF, 0xE1, 0xFF,
+	0xE2, 0xFF, 0xE3, 0xFF, 0xE4, 0xFF, 0xE5, 0xFF, 0xE6, 0xFF, 0xE7, 0xFF, 0xE8, 0xFF, 0xE9, 0xFF,
+	0xEA, 0xFF, 0xEB, 0xFF, 0xEC, 0xFF, 0xED, 0xFF, 0xEE, 0xFF, 0xEF, 0xFF, 0xF0, 0xFF, 0xF1, 0xFF,
+	0xF2, 0xFF, 0xF3, 0xFF, 0xF4, 0xFF, 0xF5, 0xFF, 0xF6, 0xFF, 0xF7, 0xFF, 0xF8, 0xFF, 0xF9, 0xFF,
+	0xFA, 0xFF, 0xFB, 0xFF, 0xFC, 0xFF, 0xFD, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF
+};
diff --git a/fs/exfat/exfat_version.h b/fs/exfat/exfat_version.h
new file mode 100644
index 000000000..a93fa46be
--- /dev/null
+++ b/fs/exfat/exfat_version.h
@@ -0,0 +1,19 @@
+/************************************************************************/
+/*                                                                      */
+/*  PROJECT : exFAT & FAT12/16/32 File System                           */
+/*  FILE    : exfat_version.h                                           */
+/*  PURPOSE : exFAT File Manager                                        */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  NOTES                                                               */
+/*                                                                      */
+/*----------------------------------------------------------------------*/
+/*  REVISION HISTORY                                                    */
+/*                                                                      */
+/*  - 2012.02.10 : Release Version 1.1.0                                */
+/*  - 2012.04.02 : P1 : Change Module License to Samsung Proprietary    */
+/*  - 2012.06.07 : P2 : Fixed incorrect filename problem                */
+/*                                                                      */
+/************************************************************************/
+
+#define EXFAT_VERSION  "1.2.9"
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1e97f1fda..bdd1c6cff 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -32,7 +32,7 @@
 
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
-static int setfl(int fd, struct file * filp, unsigned long arg)
+int setfl(int fd, struct file * filp, unsigned long arg)
 {
 	struct inode * inode = file_inode(filp);
 	int error = 0;
@@ -63,6 +63,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 
 	if (filp->f_op->check_flags)
 		error = filp->f_op->check_flags(arg);
+	if (!error && filp->f_op->setfl)
+		error = filp->f_op->setfl(filp, arg);
 	if (error)
 		return error;
 
@@ -83,6 +85,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
  out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(setfl);
 
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                      int force)
diff --git a/fs/file_table.c b/fs/file_table.c
index 7ec0b3e5f..819ee0705 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -147,6 +147,7 @@ over:
 	}
 	return ERR_PTR(-ENFILE);
 }
+EXPORT_SYMBOL_GPL(get_empty_filp);
 
 /**
  * alloc_file - allocate and initialize a 'struct file'
@@ -257,6 +258,7 @@ void flush_delayed_fput(void)
 {
 	delayed_fput(NULL);
 }
+EXPORT_SYMBOL_GPL(flush_delayed_fput);
 
 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
@@ -299,6 +301,7 @@ void __fput_sync(struct file *file)
 }
 
 EXPORT_SYMBOL(fput);
+EXPORT_SYMBOL_GPL(__fput_sync);
 
 void put_filp(struct file *file)
 {
@@ -307,6 +310,7 @@ void put_filp(struct file *file)
 		file_free(file);
 	}
 }
+EXPORT_SYMBOL_GPL(put_filp);
 
 void __init files_init(void)
 {
diff --git a/fs/inode.c b/fs/inode.c
index ef362364d..d93653e6c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1659,7 +1659,7 @@ EXPORT_SYMBOL(generic_update_time);
  * This does the actual work of updating an inodes time or version.  Must have
  * had called mnt_want_write() before calling this.
  */
-static int update_time(struct inode *inode, struct timespec *time, int flags)
+int update_time(struct inode *inode, struct timespec *time, int flags)
 {
 	int (*update_time)(struct inode *, struct timespec *, int);
 
@@ -1668,6 +1668,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
 
 	return update_time(inode, time, flags);
 }
+EXPORT_SYMBOL_GPL(update_time);
 
 /**
  *	touch_atime	-	update the access time
diff --git a/fs/namespace.c b/fs/namespace.c
index c3ed9dc78..86c7d018a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -517,6 +517,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
 	mnt_dec_writers(real_mount(mnt));
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(__mnt_drop_write);
 
 /**
  * mnt_drop_write - give up write access to a mount
@@ -846,6 +847,13 @@ static inline int check_mnt(struct mount *mnt)
 	return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
 
+/* for aufs, CONFIG_AUFS_BR_FUSE */
+int is_current_mnt_ns(struct vfsmount *mnt)
+{
+	return check_mnt(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(is_current_mnt_ns);
+
 /*
  * vfsmount lock must be held for write
  */
@@ -1882,6 +1890,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(iterate_mounts);
 
 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
 {
diff --git a/fs/notify/group.c b/fs/notify/group.c
index b7a4b6a69..5a69d6024 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,6 +22,7 @@
 #include <linux/srcu.h>
 #include <linux/rculist.h>
 #include <linux/wait.h>
+#include <linux/module.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
@@ -109,6 +110,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
 {
 	refcount_inc(&group->refcnt);
 }
+EXPORT_SYMBOL_GPL(fsnotify_get_group);
 
 /*
  * Drop a reference to a group.  Free it if it's through.
@@ -118,6 +120,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
 	if (refcount_dec_and_test(&group->refcnt))
 		fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -147,6 +150,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
 	return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index e9191b416..1f8ccfadd 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -108,6 +108,7 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	WARN_ON_ONCE(!refcount_read(&mark->refcnt));
 	refcount_inc(&mark->refcnt);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
@@ -392,6 +393,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	mutex_unlock(&group->mark_mutex);
 	fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -606,6 +608,7 @@ err:
 	fsnotify_put_mark(mark);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
 		      struct vfsmount *mnt, int allow_dups)
@@ -741,6 +744,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
 	fsnotify_get_group(group);
 	mark->group = group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
diff --git a/fs/open.c b/fs/open.c
index 7ea118471..861efbefa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -34,6 +34,9 @@
 
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs.h>
+
 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	struct file *filp)
 {
@@ -64,6 +67,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	inode_unlock(dentry->d_inode);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(do_truncate);
 
 long vfs_truncate(const struct path *path, loff_t length)
 {
@@ -691,6 +695,7 @@ int open_check_o_direct(struct file *f)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(open_check_o_direct);
 
 static int do_dentry_open(struct file *f,
 			  struct inode *inode,
@@ -1063,6 +1068,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 		} else {
 			fsnotify_open(f);
 			fd_install(fd, f);
+			trace_do_sys_open(tmp->name, flags, mode);
 		}
 	}
 	putname(tmp);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 929832432..e5dabc018 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -467,7 +467,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
 		seq_printf(m, "0 0 0\n");
 	else
 		seq_printf(m, "%llu %llu %lu\n",
-		   (unsigned long long)task->se.sum_exec_runtime,
+		   (unsigned long long)tsk_seruntime(task),
 		   (unsigned long long)task->sched_info.run_delay,
 		   task->sched_info.pcount);
 
@@ -2014,7 +2014,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
 	down_read(&mm->mmap_sem);
 	vma = find_exact_vma(mm, vm_start, vm_end);
 	if (vma && vma->vm_file) {
-		*path = vma->vm_file->f_path;
+		*path = vma_pr_or_file(vma)->f_path;
 		path_get(path);
 		rc = 0;
 	}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864..3208972b4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -118,6 +118,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		   global_zone_page_state(NR_KERNEL_STACK_KB));
 	show_val_kb(m, "PageTables:     ",
 		    global_zone_page_state(NR_PAGETABLE));
+#ifdef CONFIG_UKSM
+	show_val_kb(m, "KsmZeroPages:     ",
+		    global_zone_page_state(NR_UKSM_ZERO_PAGES));
+#endif
 #ifdef CONFIG_QUICKLIST
 	show_val_kb(m, "Quicklists:     ", quicklist_total_size());
 #endif
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 75634379f..7c0dc0ff4 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
 	file = region->vm_file;
 
 	if (file) {
-		struct inode *inode = file_inode(region->vm_file);
+		struct inode *inode;
+
+		file = vmr_pr_or_file(region);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ec6d2983a..34c71934f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -311,7 +311,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 	const char *name = NULL;
 
 	if (file) {
-		struct inode *inode = file_inode(vma->vm_file);
+		struct inode *inode;
+
+		file = vma_pr_or_file(vma);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -1741,7 +1744,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
 	struct vm_area_struct *vma = v;
 	struct numa_maps *md = &numa_priv->md;
-	struct file *file = vma->vm_file;
+	struct file *file = vma_pr_or_file(vma);
 	struct mm_struct *mm = vma->vm_mm;
 	struct mm_walk walk = {
 		.hugetlb_entry = gather_hugetlb_stats,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 5b62f57bd..dfb4a3bd0 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -156,7 +156,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
 	file = vma->vm_file;
 
 	if (file) {
-		struct inode *inode = file_inode(vma->vm_file);
+		struct inode *inode;
+
+		file = vma_pr_or_file(vma);
+		inode = file_inode(file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/fs/read_write.c b/fs/read_write.c
index f8547b82d..d423a5fa3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -454,6 +454,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vfs_read);
 
 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 {
@@ -484,6 +485,30 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
 		return -EINVAL;
 }
 
+vfs_readf_t vfs_readf(struct file *file)
+{
+	const struct file_operations *fop = file->f_op;
+
+	if (fop->read)
+		return fop->read;
+	if (fop->read_iter)
+		return new_sync_read;
+	return ERR_PTR(-ENOSYS);
+}
+EXPORT_SYMBOL_GPL(vfs_readf);
+
+vfs_writef_t vfs_writef(struct file *file)
+{
+	const struct file_operations *fop = file->f_op;
+
+	if (fop->write)
+		return fop->write;
+	if (fop->write_iter)
+		return new_sync_write;
+	return ERR_PTR(-ENOSYS);
+}
+EXPORT_SYMBOL_GPL(vfs_writef);
+
 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
 {
 	mm_segment_t old_fs;
@@ -552,6 +577,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vfs_write);
 
 static inline loff_t file_pos_read(struct file *file)
 {
diff --git a/fs/splice.c b/fs/splice.c
index 39e2dc01a..ce01a7429 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -837,8 +837,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
 /*
  * Attempt to initiate a splice from pipe to file.
  */
-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
-			   loff_t *ppos, size_t len, unsigned int flags)
+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+		    loff_t *ppos, size_t len, unsigned int flags)
 {
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
 				loff_t *, size_t, unsigned int);
@@ -850,13 +850,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 
 	return splice_write(pipe, out, ppos, len, flags);
 }
+EXPORT_SYMBOL_GPL(do_splice_from);
 
 /*
  * Attempt to initiate a splice from a file to a pipe.
  */
-static long do_splice_to(struct file *in, loff_t *ppos,
-			 struct pipe_inode_info *pipe, size_t len,
-			 unsigned int flags)
+long do_splice_to(struct file *in, loff_t *ppos,
+		  struct pipe_inode_info *pipe, size_t len,
+		  unsigned int flags)
 {
 	ssize_t (*splice_read)(struct file *, loff_t *,
 			       struct pipe_inode_info *, size_t, unsigned int);
@@ -879,6 +880,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 
 	return splice_read(in, ppos, pipe, len, flags);
 }
+EXPORT_SYMBOL_GPL(do_splice_to);
 
 /**
  * splice_direct_to_actor - splices data directly between two non-pipes
diff --git a/fs/super.c b/fs/super.c
index afbf4d220..5cc644b5c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,7 +38,7 @@
 #include "internal.h"
 
 
-static LIST_HEAD(super_blocks);
+LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
 
 static char *sb_writers_name[SB_FREEZE_LEVELS] = {
diff --git a/fs/sync.c b/fs/sync.c
index 6e0a2cbaf..47a78bdd7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -28,7 +28,7 @@
  * wait == 1 case since in that case write_inode() functions do
  * sync_dirty_buffer() and thus effectively write one block at a time.
  */
-static int __sync_filesystem(struct super_block *sb, int wait)
+int __sync_filesystem(struct super_block *sb, int wait)
 {
 	if (wait)
 		sync_inodes_sb(sb);
@@ -39,6 +39,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
 }
+EXPORT_SYMBOL_GPL(__sync_filesystem);
 
 /*
  * Write out and wait upon all dirty data associated with this
diff --git a/fs/xattr.c b/fs/xattr.c
index 61cd28ba2..35570cd84 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,6 +297,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
 	*xattr_value = value;
 	return error;
 }
+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
 
 ssize_t
 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
diff --git a/include/Documentation/ABI/testing/debugfs-aufs b/include/Documentation/ABI/testing/debugfs-aufs
new file mode 100644
index 000000000..99642d105
--- /dev/null
+++ b/include/Documentation/ABI/testing/debugfs-aufs
@@ -0,0 +1,50 @@
+What:		/debug/aufs/si_<id>/
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		Under /debug/aufs, a directory named si_<id> is created
+		per aufs mount, where <id> is a unique id generated
+		internally.
+
+What:		/debug/aufs/si_<id>/plink
+Date:		Apr 2013
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It has three lines and shows the information about the
+		pseudo-link. The first line is a single number
+		representing a number of buckets. The second line is a
+		number of pseudo-links per buckets (separated by a
+		blank). The last line is a single number representing a
+		total number of psedo-links.
+		When the aufs mount option 'noplink' is specified, it
+		will show "1\n0\n0\n".
+
+What:		/debug/aufs/si_<id>/xib
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xib (External Inode Number
+		Bitmap), its block size and file size.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
+
+What:		/debug/aufs/si_<id>/xino0, xino1 ... xinoN
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xino (External Inode Number
+		Translation Table), its link count, block size and file
+		size.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
+
+What:		/debug/aufs/si_<id>/xigen
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the consumed blocks by xigen (External Inode
+		Generation Table), its block size and file size.
+		If CONFIG_AUFS_EXPORT is disabled, this entry will not
+		be created.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
diff --git a/include/Documentation/ABI/testing/sysfs-aufs b/include/Documentation/ABI/testing/sysfs-aufs
new file mode 100644
index 000000000..82f951849
--- /dev/null
+++ b/include/Documentation/ABI/testing/sysfs-aufs
@@ -0,0 +1,31 @@
+What:		/sys/fs/aufs/si_<id>/
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		Under /sys/fs/aufs, a directory named si_<id> is created
+		per aufs mount, where <id> is a unique id generated
+		internally.
+
+What:		/sys/fs/aufs/si_<id>/br0, br1 ... brN
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the abolute path of a member directory (which
+		is called branch) in aufs, and its permission.
+
+What:		/sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
+Date:		July 2013
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the id of a member directory (which is called
+		branch) in aufs.
+
+What:		/sys/fs/aufs/si_<id>/xi_path
+Date:		March 2009
+Contact:	J. R. Okajima <hooanon05g@gmail.com>
+Description:
+		It shows the abolute path of XINO (External Inode Number
+		Bitmap, Translation Table and Generation Table) file
+		even if it is the default path.
+		When the aufs mount option 'noxino' is specified, it
+		will be empty. About XINO files, see the aufs manual.
diff --git a/include/Documentation/filesystems/aufs/README b/include/Documentation/filesystems/aufs/README
new file mode 100644
index 000000000..fa82b6394
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/README
@@ -0,0 +1,393 @@
+
+Aufs4 -- advanced multi layered unification filesystem version 4.x
+http://aufs.sf.net
+Junjiro R. Okajima
+
+
+0. Introduction
+----------------------------------------
+In the early days, aufs was entirely re-designed and re-implemented
+Unionfs Version 1.x series. Adding many original ideas, approaches,
+improvements and implementations, it becomes totally different from
+Unionfs while keeping the basic features.
+Recently, Unionfs Version 2.x series begin taking some of the same
+approaches to aufs1's.
+Unionfs is being developed by Professor Erez Zadok at Stony Brook
+University and his team.
+
+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
+If you want older kernel version support, try aufs2-2.6.git or
+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
+
+Note: it becomes clear that "Aufs was rejected. Let's give it up."
+      According to Christoph Hellwig, linux rejects all union-type
+      filesystems but UnionMount.
+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
+
+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
+    UnionMount, and he pointed out an issue around a directory mutex
+    lock and aufs addressed it. But it is still unsure whether aufs will
+    be merged (or any other union solution).
+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
+
+
+1. Features
+----------------------------------------
+- unite several directories into a single virtual filesystem. The member
+  directory is called as a branch.
+- you can specify the permission flags to the branch, which are 'readonly',
+  'readwrite' and 'whiteout-able.'
+- by upper writable branch, internal copyup and whiteout, files/dirs on
+  readonly branch are modifiable logically.
+- dynamic branch manipulation, add, del.
+- etc...
+
+Also there are many enhancements in aufs, such as:
+- test only the highest one for the directory permission (dirperm1)
+- copyup on open (coo=)
+- 'move' policy for copy-up between two writable branches, after
+  checking free space.
+- xattr, acl
+- readdir(3) in userspace.
+- keep inode number by external inode number table
+- keep the timestamps of file/dir in internal copyup operation
+- seekable directory, supporting NFS readdir.
+- whiteout is hardlinked in order to reduce the consumption of inodes
+  on branch
+- do not copyup, nor create a whiteout when it is unnecessary
+- revert a single systemcall when an error occurs in aufs
+- remount interface instead of ioctl
+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
+- loopback mounted filesystem as a branch
+- kernel thread for removing the dir who has a plenty of whiteouts
+- support copyup sparse file (a file which has a 'hole' in it)
+- default permission flags for branches
+- selectable permission flags for ro branch, whether whiteout can
+  exist or not
+- export via NFS.
+- support <sysfs>/fs/aufs and <debugfs>/aufs.
+- support multiple writable branches, some policies to select one
+  among multiple writable branches.
+- a new semantics for link(2) and rename(2) to support multiple
+  writable branches.
+- no glibc changes are required.
+- pseudo hardlink (hardlink over branches)
+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
+  including NFS or remote filesystem branch.
+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
+- and more...
+
+Currently these features are dropped temporary from aufs4.
+See design/08plan.txt in detail.
+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
+  (robr)
+- statistics of aufs thread (/sys/fs/aufs/stat)
+
+Features or just an idea in the future (see also design/*.txt),
+- reorder the branch index without del/re-add.
+- permanent xino files for NFSD
+- an option for refreshing the opened files after add/del branches
+- light version, without branch manipulation. (unnecessary?)
+- copyup in userspace
+- inotify in userspace
+- readv/writev
+
+
+2. Download
+----------------------------------------
+There are three GIT trees for aufs4, aufs4-linux.git,
+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
+"aufs-util.git."
+While the aufs-util is always necessary, you need either of aufs4-linux
+or aufs4-standalone.
+
+The aufs4-linux tree includes the whole linux mainline GIT tree,
+git://git.kernel.org/.../torvalds/linux.git.
+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
+build aufs4 as an external kernel module.
+Several extra patches are not included in this tree. Only
+aufs4-standalone tree contains them. They are described in the later
+section "Configuration and Compilation."
+
+On the other hand, the aufs4-standalone tree has only aufs source files
+and necessary patches, and you can select CONFIG_AUFS_FS=m.
+But you need to apply all aufs patches manually.
+
+You will find GIT branches whose name is in form of "aufs4.x" where "x"
+represents the linux kernel version, "linux-4.x". For instance,
+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
+"aufs4.x-rcN" branch.
+
+o aufs4-linux tree
+$ git clone --reference /your/linux/git/tree \
+	git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
+- if you don't have linux GIT tree, then remove "--reference ..."
+$ cd aufs4-linux.git
+$ git checkout origin/aufs4.0
+
+Or You may want to directly git-pull aufs into your linux GIT tree, and
+leave the patch-work to GIT.
+$ cd /your/linux/git/tree
+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
+$ git fetch aufs4
+$ git checkout -b my4.0 v4.0
+$ (add your local change...)
+$ git pull aufs4 aufs4.0
+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
+- you may need to solve some conflicts between your_changes and
+  aufs4.0. in this case, git-rerere is recommended so that you can
+  solve the similar conflicts automatically when you upgrade to 4.1 or
+  later in the future.
+
+o aufs4-standalone tree
+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
+$ cd aufs4-standalone.git
+$ git checkout origin/aufs4.0
+
+o aufs-util tree
+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
+- note that the public aufs-util.git is on SourceForge instead of
+  GitHUB.
+$ cd aufs-util.git
+$ git checkout origin/aufs4.0
+
+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
+The minor version number, 'x' in '4.x', of aufs may not always
+follow the minor version number of the kernel.
+Because changes in the kernel that cause the use of a new
+minor version number do not always require changes to aufs-util.
+
+Since aufs-util has its own minor version number, you may not be
+able to find a GIT branch in aufs-util for your kernel's
+exact minor version number.
+In this case, you should git-checkout the branch for the
+nearest lower number.
+
+For (an unreleased) example:
+If you are using "linux-4.10" and the "aufs4.10" branch
+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
+or something numerically smaller is the branch for your kernel.
+
+Also you can view all branches by
+	$ git branch -a
+
+
+3. Configuration and Compilation
+----------------------------------------
+Make sure you have git-checkout'ed the correct branch.
+
+For aufs4-linux tree,
+- enable CONFIG_AUFS_FS.
+- set other aufs configurations if necessary.
+
+For aufs4-standalone tree,
+There are several ways to build.
+
+1.
+- apply ./aufs4-kbuild.patch to your kernel source files.
+- apply ./aufs4-base.patch too.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too, if you have a plan to set
+  CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
+  kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
+- enable CONFIG_AUFS_FS, you can select either
+  =m or =y.
+- and build your kernel as usual.
+- install the built kernel.
+  Note: Since linux-3.9, every filesystem module requires an alias
+  "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+  modules.aliases file if you set CONFIG_AUFS_FS=m.
+- install the header files too by "make headers_install" to the
+  directory where you specify. By default, it is $PWD/usr.
+  "make help" shows a brief note for headers_install.
+- and reboot your system.
+
+2.
+- module only (CONFIG_AUFS_FS=m).
+- apply ./aufs4-base.patch to your kernel source files.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too.
+- build your kernel, don't forget "make headers_install", and reboot.
+- edit ./config.mk and set other aufs configurations if necessary.
+  Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
+  every aufs configurations.
+- build the module by simple "make".
+  Note: Since linux-3.9, every filesystem module requires an alias
+  "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+  modules.aliases file.
+- you can specify ${KDIR} make variable which points to your kernel
+  source tree.
+- install the files
+  + run "make install" to install the aufs module, or copy the built
+    $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
+  + run "make install_headers" (instead of headers_install) to install
+    the modified aufs header file (you can specify DESTDIR which is
+    available in aufs standalone version's Makefile only), or copy
+    $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
+    you like manually. By default, the target directory is $PWD/usr.
+- no need to apply aufs4-kbuild.patch, nor copying source files to your
+  kernel source tree.
+
+Note: The header file aufs_type.h is necessary to build aufs-util
+      as well as "make headers_install" in the kernel source tree.
+      headers_install is subject to be forgotten, but it is essentially
+      necessary, not only for building aufs-util.
+      You may not meet problems without headers_install in some older
+      version though.
+
+And then,
+- read README in aufs-util, build and install it
+- note that your distribution may contain an obsoleted version of
+  aufs_type.h in /usr/include/linux or something. When you build aufs
+  utilities, make sure that your compiler refers the correct aufs header
+  file which is built by "make headers_install."
+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
+  then run "make install_ulib" too. And refer to the aufs manual in
+  detail.
+
+There several other patches in aufs4-standalone.git. They are all
+optional. When you meet some problems, they will help you.
+- aufs4-loopback.patch
+  Supports a nested loopback mount in a branch-fs. This patch is
+  unnecessary until aufs produces a message like "you may want to try
+  another patch for loopback file".
+- vfs-ino.patch
+  Modifies a system global kernel internal function get_next_ino() in
+  order to stop assigning 0 for an inode-number. Not directly related to
+  aufs, but recommended generally.
+- tmpfs-idr.patch
+  Keeps the tmpfs inode number as the lowest value. Effective to reduce
+  the size of aufs XINO files for tmpfs branch. Also it prevents the
+  duplication of inode number, which is important for backup tools and
+  other utilities. When you find aufs XINO files for tmpfs branch
+  growing too much, try this patch.
+- lockdep-debug.patch
+  Because aufs is not only an ordinary filesystem (callee of VFS), but
+  also a caller of VFS functions for branch filesystems, subclassing of
+  the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
+  feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
+  need to apply this debug patch to expand several constant values.
+  If don't know what LOCKDEP, then you don't have apply this patch.
+
+
+4. Usage
+----------------------------------------
+At first, make sure aufs-util are installed, and please read the aufs
+manual, aufs.5 in aufs-util.git tree.
+$ man -l aufs.5
+
+And then,
+$ mkdir /tmp/rw /tmp/aufs
+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
+
+Here is another example. The result is equivalent.
+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
+  Or
+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
+# mount -o remount,append:${HOME} /tmp/aufs
+
+Then, you can see whole tree of your home dir through /tmp/aufs. If
+you modify a file under /tmp/aufs, the one on your home directory is
+not affected, instead the same named file will be newly created under
+/tmp/rw. And all of your modification to a file will be applied to
+the one under /tmp/rw. This is called the file based Copy on Write
+(COW) method.
+Aufs mount options are described in aufs.5.
+If you run chroot or something and make your aufs as a root directory,
+then you need to customize the shutdown script. See the aufs manual in
+detail.
+
+Additionally, there are some sample usages of aufs which are a
+diskless system with network booting, and LiveCD over NFS.
+See sample dir in CVS tree on SourceForge.
+
+
+5. Contact
+----------------------------------------
+When you have any problems or strange behaviour in aufs, please let me
+know with:
+- /proc/mounts (instead of the output of mount(8))
+- /sys/module/aufs/*
+- /sys/fs/aufs/* (if you have them)
+- /debug/aufs/* (if you have them)
+- linux kernel version
+  if your kernel is not plain, for example modified by distributor,
+  the url where i can download its source is necessary too.
+- aufs version which was printed at loading the module or booting the
+  system, instead of the date you downloaded.
+- configuration (define/undefine CONFIG_AUFS_xxx)
+- kernel configuration or /proc/config.gz (if you have it)
+- behaviour which you think to be incorrect
+- actual operation, reproducible one is better
+- mailto: aufs-users at lists.sourceforge.net
+
+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
+and Feature Requests) on SourceForge. Please join and write to
+aufs-users ML.
+
+
+6. Acknowledgements
+----------------------------------------
+Thanks to everyone who have tried and are using aufs, whoever
+have reported a bug or any feedback.
+
+Especially donators:
+Tomas Matejicek(slax.org) made a donation (much more than once).
+	Since Apr 2010, Tomas M (the author of Slax and Linux Live
+	scripts) is making "doubling" donations.
+	Unfortunately I cannot list all of the donators, but I really
+	appreciate.
+	It ends Aug 2010, but the ordinary donation URL is still available.
+	<http://sourceforge.net/donate/index.php?group_id=167503>
+Dai Itasaka made a donation (2007/8).
+Chuck Smith made a donation (2008/4, 10 and 12).
+Henk Schoneveld made a donation (2008/9).
+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
+Francois Dupoux made a donation (2008/11).
+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
+	aufs2 GIT tree (2009/2).
+William Grant made a donation (2009/3).
+Patrick Lane made a donation (2009/4).
+The Mail Archive (mail-archive.com) made donations (2009/5).
+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
+Pavel Pronskiy made a donation (2011/2).
+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
+	Networks (Ed Wildgoose) made a donation for hardware (2011/3).
+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
+11).
+Sam Liddicott made a donation (2011/9).
+Era Scarecrow made a donation (2013/4).
+Bor Ratajc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+POIRETTE Marc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+lauri kasvandik made a donation (2013/5).
+"pemasu from Finland" made a donation (2013/7).
+The Parted Magic Project made a donation (2013/9 and 11).
+Pavel Barta made a donation (2013/10).
+Nikolay Pertsev made a donation (2014/5).
+James B made a donation (2014/7 and 2015/7).
+Stefano Di Biase made a donation (2014/8).
+Daniel Epellei made a donation (2015/1).
+OmegaPhil made a donation (2016/1).
+Tomasz Szewczyk made a donation (2016/4).
+James Burry made a donation (2016/12).
+
+Thank you very much.
+Donations are always, including future donations, very important and
+helpful for me to keep on developing aufs.
+
+
+7.
+----------------------------------------
+If you are an experienced user, no explanation is needed. Aufs is
+just a linux filesystem.
+
+
+Enjoy!
+
+# Local variables: ;
+# mode: text;
+# End: ;
diff --git a/include/Documentation/filesystems/aufs/design/01intro.txt b/include/Documentation/filesystems/aufs/design/01intro.txt
new file mode 100644
index 000000000..aa1052982
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/01intro.txt
@@ -0,0 +1,171 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Introduction
+----------------------------------------
+
+aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s]
+1. abbrev. for "advanced multi-layered unification filesystem".
+2. abbrev. for "another unionfs".
+3. abbrev. for "auf das" in German which means "on the" in English.
+   Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
+   But "Filesystem aufs Filesystem" is hard to understand.
+4. abbrev. for "African Urban Fashion Show".
+
+AUFS is a filesystem with features:
+- multi layered stackable unification filesystem, the member directory
+  is called as a branch.
+- branch permission and attribute, 'readonly', 'real-readonly',
+  'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
+  combination.
+- internal "file copy-on-write".
+- logical deletion, whiteout.
+- dynamic branch manipulation, adding, deleting and changing permission.
+- allow bypassing aufs, user's direct branch access.
+- external inode number translation table and bitmap which maintains the
+  persistent aufs inode number.
+- seekable directory, including NFS readdir.
+- file mapping, mmap and sharing pages.
+- pseudo-link, hardlink over branches.
+- loopback mounted filesystem as a branch.
+- several policies to select one among multiple writable branches.
+- revert a single systemcall when an error occurs in aufs.
+- and more...
+
+
+Multi Layered Stackable Unification Filesystem
+----------------------------------------------------------------------
+Most people already knows what it is.
+It is a filesystem which unifies several directories and provides a
+merged single directory. When users access a file, the access will be
+passed/re-directed/converted (sorry, I am not sure which English word is
+correct) to the real file on the member filesystem. The member
+filesystem is called 'lower filesystem' or 'branch' and has a mode
+'readonly' and 'readwrite.' And the deletion for a file on the lower
+readonly branch is handled by creating 'whiteout' on the upper writable
+branch.
+
+On LKML, there have been discussions about UnionMount (Jan Blunck,
+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
+different approaches to implement the merged-view.
+The former tries putting it into VFS, and the latter implements as a
+separate filesystem.
+(If I misunderstand about these implementations, please let me know and
+I shall correct it. Because it is a long time ago when I read their
+source files last time).
+
+UnionMount's approach will be able to small, but may be hard to share
+branches between several UnionMount since the whiteout in it is
+implemented in the inode on branch filesystem and always
+shared. According to Bharata's post, readdir does not seems to be
+finished yet.
+There are several missing features known in this implementations such as
+- for users, the inode number may change silently. eg. copy-up.
+- link(2) may break by copy-up.
+- read(2) may get an obsoleted filedata (fstat(2) too).
+- fcntl(F_SETLK) may be broken by copy-up.
+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
+  open(O_RDWR).
+
+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
+merged into mainline. This is another implementation of UnionMount as a
+separated filesystem. All the limitations and known problems which
+UnionMount are equally inherited to "overlay" filesystem.
+
+Unionfs has a longer history. When I started implementing a stackable
+filesystem (Aug 2005), it already existed. It has virtual super_block,
+inode, dentry and file objects and they have an array pointing lower
+same kind objects. After contributing many patches for Unionfs, I
+re-started my project AUFS (Jun 2006).
+
+In AUFS, the structure of filesystem resembles to Unionfs, but I
+implemented my own ideas, approaches and enhancements and it became
+totally different one.
+
+Comparing DM snapshot and fs based implementation
+- the number of bytes to be copied between devices is much smaller.
+- the type of filesystem must be one and only.
+- the fs must be writable, no readonly fs, even for the lower original
+  device. so the compression fs will not be usable. but if we use
+  loopback mount, we may address this issue.
+  for instance,
+	mount /cdrom/squashfs.img /sq
+	losetup /sq/ext2.img
+	losetup /somewhere/cow
+	dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
+- it will be difficult (or needs more operations) to extract the
+  difference between the original device and COW.
+- DM snapshot-merge may help a lot when users try merging. in the
+  fs-layer union, users will use rsync(1).
+
+You may want to read my old paper "Filesystems in LiveCD"
+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
+
+
+Several characters/aspects/persona of aufs
+----------------------------------------------------------------------
+
+Aufs has several characters, aspects or persona.
+1. a filesystem, callee of VFS helper
+2. sub-VFS, caller of VFS helper for branches
+3. a virtual filesystem which maintains persistent inode number
+4. reader/writer of files on branches such like an application
+
+1. Callee of VFS Helper
+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
+unlink(2) from an application reaches sys_unlink() kernel function and
+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
+calls filesystem specific unlink operation. Actually aufs implements the
+unlink operation but it behaves like a redirector.
+
+2. Caller of VFS Helper for Branches
+aufs_unlink() passes the unlink request to the branch filesystem as if
+it were called from VFS. So the called unlink operation of the branch
+filesystem acts as usual. As a caller of VFS helper, aufs should handle
+every necessary pre/post operation for the branch filesystem.
+- acquire the lock for the parent dir on a branch
+- lookup in a branch
+- revalidate dentry on a branch
+- mnt_want_write() for a branch
+- vfs_unlink() for a branch
+- mnt_drop_write() for a branch
+- release the lock on a branch
+
+3. Persistent Inode Number
+One of the most important issue for a filesystem is to maintain inode
+numbers. This is particularly important to support exporting a
+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
+backend block device for its own. But some storage is necessary to
+keep and maintain the inode numbers. It may be a large space and may not
+suit to keep in memory. Aufs rents some space from its first writable
+branch filesystem (by default) and creates file(s) on it. These files
+are created by aufs internally and removed soon (currently) keeping
+opened.
+Note: Because these files are removed, they are totally gone after
+      unmounting aufs. It means the inode numbers are not persistent
+      across unmount or reboot. I have a plan to make them really
+      persistent which will be important for aufs on NFS server.
+
+4. Read/Write Files Internally (copy-on-write)
+Because a branch can be readonly, when you write a file on it, aufs will
+"copy-up" it to the upper writable branch internally. And then write the
+originally requested thing to the file. Generally kernel doesn't
+open/read/write file actively. In aufs, even a single write may cause a
+internal "file copy". This behaviour is very similar to cp(1) command.
+
+Some people may think it is better to pass such work to user space
+helper, instead of doing in kernel space. Actually I am still thinking
+about it. But currently I have implemented it in kernel space.
diff --git a/include/Documentation/filesystems/aufs/design/02struct.txt b/include/Documentation/filesystems/aufs/design/02struct.txt
new file mode 100644
index 000000000..f5fb6a8ad
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/02struct.txt
@@ -0,0 +1,258 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Basic Aufs Internal Structure
+
+Superblock/Inode/Dentry/File Objects
+----------------------------------------------------------------------
+As like an ordinary filesystem, aufs has its own
+superblock/inode/dentry/file objects. All these objects have a
+dynamically allocated array and store the same kind of pointers to the
+lower filesystem, branch.
+For example, when you build a union with one readwrite branch and one
+readonly, mounted /au, /rw and /ro respectively.
+- /au = /rw + /ro
+- /ro/fileA exists but /rw/fileA
+
+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
+pointers are stored in a aufs dentry. The array in aufs dentry will be,
+- [0] = NULL (because /rw/fileA doesn't exist)
+- [1] = /ro/fileA
+
+This style of an array is essentially same to the aufs
+superblock/inode/dentry/file objects.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+branches dynamically, these objects has its own generation. When
+branches are changed, the generation in aufs superblock is
+incremented. And a generation in other object are compared when it is
+accessed. When a generation in other objects are obsoleted, aufs
+refreshes the internal array.
+
+
+Superblock
+----------------------------------------------------------------------
+Additionally aufs superblock has some data for policies to select one
+among multiple writable branches, XIB files, pseudo-links and kobject.
+See below in detail.
+About the policies which supports copy-down a directory, see
+wbr_policy.txt too.
+
+
+Branch and XINO(External Inode Number Translation Table)
+----------------------------------------------------------------------
+Every branch has its own xino (external inode number translation table)
+file. The xino file is created and unlinked by aufs internally. When two
+members of a union exist on the same filesystem, they share the single
+xino file.
+The struct of a xino file is simple, just a sequence of aufs inode
+numbers which is indexed by the lower inode number.
+In the above sample, assume the inode number of /ro/fileA is i111 and
+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
+
+When the inode numbers are not contiguous, the xino file will be sparse
+which has a hole in it and doesn't consume as much disk space as it
+might appear. If your branch filesystem consumes disk space for such
+holes, then you should specify 'xino=' option at mounting aufs.
+
+Aufs has a mount option to free the disk blocks for such holes in XINO
+files on tmpfs or ramdisk. But it is not so effective actually. If you
+meet a problem of disk shortage due to XINO files, then you should try
+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
+The patch localizes the assignment inumbers per tmpfs-mount and avoid
+the holes in XINO files.
+
+Also a writable branch has three kinds of "whiteout bases". All these
+are existed when the branch is joined to aufs, and their names are
+whiteout-ed doubly, so that users will never see their names in aufs
+hierarchy.
+1. a regular file which will be hardlinked to all whiteouts.
+2. a directory to store a pseudo-link.
+3. a directory to store an "orphan"-ed file temporary.
+
+1. Whiteout Base
+   When you remove a file on a readonly branch, aufs handles it as a
+   logical deletion and creates a whiteout on the upper writable branch
+   as a hardlink of this file in order not to consume inode on the
+   writable branch.
+2. Pseudo-link Dir
+   See below, Pseudo-link.
+3. Step-Parent Dir
+   When "fileC" exists on the lower readonly branch only and it is
+   opened and removed with its parent dir, and then user writes
+   something into it, then aufs copies-up fileC to this
+   directory. Because there is no other dir to store fileC. After
+   creating a file under this dir, the file is unlinked.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+dynamically, a branch has its own id. When the branch order changes,
+aufs finds the new index by searching the branch id.
+
+
+Pseudo-link
+----------------------------------------------------------------------
+Assume "fileA" exists on the lower readonly branch only and it is
+hardlinked to "fileB" on the branch. When you write something to fileA,
+aufs copies-up it to the upper writable branch. Additionally aufs
+creates a hardlink under the Pseudo-link Directory of the writable
+branch. The inode of a pseudo-link is kept in aufs super_block as a
+simple list. If fileB is read after unlinking fileA, aufs returns
+filedata from the pseudo-link instead of the lower readonly
+branch. Because the pseudo-link is based upon the inode, to keep the
+inode number by xino (see above) is essentially necessary.
+
+All the hardlinks under the Pseudo-link Directory of the writable branch
+should be restored in a proper location later. Aufs provides a utility
+to do this. The userspace helpers executed at remounting and unmounting
+aufs by default.
+During this utility is running, it puts aufs into the pseudo-link
+maintenance mode. In this mode, only the process which began the
+maintenance mode (and its child processes) is allowed to operate in
+aufs. Some other processes which are not related to the pseudo-link will
+be allowed to run too, but the rest have to return an error or wait
+until the maintenance mode ends. If a process already acquires an inode
+mutex (in VFS), it has to return an error.
+
+
+XIB(external inode number bitmap)
+----------------------------------------------------------------------
+Addition to the xino file per a branch, aufs has an external inode number
+bitmap in a superblock object. It is also an internal file such like a
+xino file.
+It is a simple bitmap to mark whether the aufs inode number is in-use or
+not.
+To reduce the file I/O, aufs prepares a single memory page to cache xib.
+
+As well as XINO files, aufs has a feature to truncate/refresh XIB to
+reduce the number of consumed disk blocks for these files.
+
+
+Virtual or Vertical Dir, and Readdir in Userspace
+----------------------------------------------------------------------
+In order to support multiple layers (branches), aufs readdir operation
+constructs a virtual dir block on memory. For readdir, aufs calls
+vfs_readdir() internally for each dir on branches, merges their entries
+with eliminating the whiteout-ed ones, and sets it to file (dir)
+object. So the file object has its entry list until it is closed. The
+entry list will be updated when the file position is zero and becomes
+obsoleted. This decision is made in aufs automatically.
+
+The dynamically allocated memory block for the name of entries has a
+unit of 512 bytes (by default) and stores the names contiguously (no
+padding). Another block for each entry is handled by kmem_cache too.
+During building dir blocks, aufs creates hash list and judging whether
+the entry is whiteouted by its upper branch or already listed.
+The merged result is cached in the corresponding inode object and
+maintained by a customizable life-time option.
+
+Some people may call it can be a security hole or invite DoS attack
+since the opened and once readdir-ed dir (file object) holds its entry
+list and becomes a pressure for system memory. But I'd say it is similar
+to files under /proc or /sys. The virtual files in them also holds a
+memory page (generally) while they are opened. When an idea to reduce
+memory for them is introduced, it will be applied to aufs too.
+For those who really hate this situation, I've developed readdir(3)
+library which operates this merging in userspace. You just need to set
+LD_PRELOAD environment variable, and aufs will not consume no memory in
+kernel space for readdir(3).
+
+
+Workqueue
+----------------------------------------------------------------------
+Aufs sometimes requires privilege access to a branch. For instance,
+in copy-up/down operation. When a user process is going to make changes
+to a file which exists in the lower readonly branch only, and the mode
+of one of ancestor directories may not be writable by a user
+process. Here aufs copy-up the file with its ancestors and they may
+require privilege to set its owner/group/mode/etc.
+This is a typical case of a application character of aufs (see
+Introduction).
+
+Aufs uses workqueue synchronously for this case. It creates its own
+workqueue. The workqueue is a kernel thread and has privilege. Aufs
+passes the request to call mkdir or write (for example), and wait for
+its completion. This approach solves a problem of a signal handler
+simply.
+If aufs didn't adopt the workqueue and changed the privilege of the
+process, then the process may receive the unexpected SIGXFSZ or other
+signals.
+
+Also aufs uses the system global workqueue ("events" kernel thread) too
+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
+whiteout base and etc. This is unrelated to a privilege.
+Most of aufs operation tries acquiring a rw_semaphore for aufs
+superblock at the beginning, at the same time waits for the completion
+of all queued asynchronous tasks.
+
+
+Whiteout
+----------------------------------------------------------------------
+The whiteout in aufs is very similar to Unionfs's. That is represented
+by its filename. UnionMount takes an approach of a file mode, but I am
+afraid several utilities (find(1) or something) will have to support it.
+
+Basically the whiteout represents "logical deletion" which stops aufs to
+lookup further, but also it represents "dir is opaque" which also stop
+further lookup.
+
+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
+In order to make several functions in a single systemcall to be
+revertible, aufs adopts an approach to rename a directory to a temporary
+unique whiteouted name.
+For example, in rename(2) dir where the target dir already existed, aufs
+renames the target dir to a temporary unique whiteouted name before the
+actual rename on a branch, and then handles other actions (make it opaque,
+update the attributes, etc). If an error happens in these actions, aufs
+simply renames the whiteouted name back and returns an error. If all are
+succeeded, aufs registers a function to remove the whiteouted unique
+temporary name completely and asynchronously to the system global
+workqueue.
+
+
+Copy-up
+----------------------------------------------------------------------
+It is a well-known feature or concept.
+When user modifies a file on a readonly branch, aufs operate "copy-up"
+internally and makes change to the new file on the upper writable branch.
+When the trigger systemcall does not update the timestamps of the parent
+dir, aufs reverts it after copy-up.
+
+
+Move-down (aufs3.9 and later)
+----------------------------------------------------------------------
+"Copy-up" is one of the essential feature in aufs. It copies a file from
+the lower readonly branch to the upper writable branch when a user
+changes something about the file.
+"Move-down" is an opposite action of copy-up. Basically this action is
+ran manually instead of automatically and internally.
+For desgin and implementation, aufs has to consider these issues.
+- whiteout for the file may exist on the lower branch.
+- ancestor directories may not exist on the lower branch.
+- diropq for the ancestor directories may exist on the upper branch.
+- free space on the lower branch will reduce.
+- another access to the file may happen during moving-down, including
+  UDBA (see "Revalidate Dentry and UDBA").
+- the file should not be hard-linked nor pseudo-linked. they should be
+  handled by auplink utility later.
+
+Sometimes users want to move-down a file from the upper writable branch
+to the lower readonly or writable branch. For instance,
+- the free space of the upper writable branch is going to run out.
+- create a new intermediate branch between the upper and lower branch.
+- etc.
+
+For this purpose, use "aumvdown" command in aufs-util.git.
diff --git a/include/Documentation/filesystems/aufs/design/03atomic_open.txt b/include/Documentation/filesystems/aufs/design/03atomic_open.txt
new file mode 100644
index 000000000..1b0699f8b
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/03atomic_open.txt
@@ -0,0 +1,85 @@
+
+# Copyright (C) 2015-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Support for a branch who has its ->atomic_open()
+----------------------------------------------------------------------
+The filesystems who implement its ->atomic_open() are not majority. For
+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
+sure whether all filesystems who have ->atomic_open() behave like this,
+but NFSv4 surely returns the error.
+
+In order to support ->atomic_open() for aufs, there are a few
+approaches.
+
+A. Introduce aufs_atomic_open()
+   - calls one of VFS:do_last(), lookup_open() or atomic_open() for
+     branch fs.
+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
+   an aufs user Pip Cet's approach
+   - calls aufs_create(), VFS finish_open() and notify_change().
+   - pass fake-mode to finish_open(), and then correct the mode by
+     notify_change().
+C. Extend aufs_open() to call branch fs's ->atomic_open()
+   - no aufs_atomic_open().
+   - aufs_lookup() registers the TID to an aufs internal object.
+   - aufs_create() does nothing when the matching TID is registered, but
+     registers the mode.
+   - aufs_open() calls branch fs's ->atomic_open() when the matching
+     TID is registered.
+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
+   credential
+   - no aufs_atomic_open().
+   - aufs_create() registers the TID to an internal object. this info
+     represents "this process created this file just now."
+   - when aufs gets EACCES from branch fs's ->open(), then confirm the
+     registered TID and re-try open() with superuser's credential.
+
+Pros and cons for each approach.
+
+A.
+   - straightforward but highly depends upon VFS internal.
+   - the atomic behavaiour is kept.
+   - some of parameters such as nameidata are hard to reproduce for
+     branch fs.
+   - large overhead.
+B.
+   - easy to implement.
+   - the atomic behavaiour is lost.
+C.
+   - the atomic behavaiour is kept.
+   - dirty and tricky.
+   - VFS checks whether the file is created correctly after calling
+     ->create(), which means this approach doesn't work.
+D.
+   - easy to implement.
+   - the atomic behavaiour is lost.
+   - to open a file with superuser's credential and give it to a user
+     process is a bad idea, since the file object keeps the credential
+     in it. It may affect LSM or something. This approach doesn't work
+     either.
+
+The approach A is ideal, but it hard to implement. So here is a
+variation of A, which is to be implemented.
+
+A-1. Introduce aufs_atomic_open()
+     - calls branch fs ->atomic_open() if exists. otherwise calls
+       vfs_create() and finish_open().
+     - the demerit is that the several checks after branch fs
+       ->atomic_open() are lost. in the ordinary case, the checks are
+       done by VFS:do_last(), lookup_open() and atomic_open(). some can
+       be implemented in aufs, but not all I am afraid.
diff --git a/include/Documentation/filesystems/aufs/design/03lookup.txt b/include/Documentation/filesystems/aufs/design/03lookup.txt
new file mode 100644
index 000000000..80ae63bce
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/03lookup.txt
@@ -0,0 +1,113 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Lookup in a Branch
+----------------------------------------------------------------------
+Since aufs has a character of sub-VFS (see Introduction), it operates
+lookup for branches as VFS does. It may be a heavy work. But almost all
+lookup operation in aufs is the simplest case, ie. lookup only an entry
+directly connected to its parent. Digging down the directory hierarchy
+is unnecessary. VFS has a function lookup_one_len() for that use, and
+aufs calls it.
+
+When a branch is a remote filesystem, aufs basically relies upon its
+->d_revalidate(), also aufs forces the hardest revalidate tests for
+them.
+For d_revalidate, aufs implements three levels of revalidate tests. See
+"Revalidate Dentry and UDBA" in detail.
+
+
+Test Only the Highest One for the Directory Permission (dirperm1 option)
+----------------------------------------------------------------------
+Let's try case study.
+- aufs has two branches, upper readwrite and lower readonly.
+  /au = /rw + /ro
+- "dirA" exists under /ro, but /rw. and its mode is 0700.
+- user invoked "chmod a+rx /au/dirA"
+- the internal copy-up is activated and "/rw/dirA" is created and its
+  permission bits are set to world readable.
+- then "/au/dirA" becomes world readable?
+
+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
+or it may be a natively readonly filesystem. If aufs respects the lower
+branch, it should not respond readdir request from other users. But user
+allowed it by chmod. Should really aufs rejects showing the entries
+under /ro/dirA?
+
+To be honest, I don't have a good solution for this case. So aufs
+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
+users.
+When dirperm1 is specified, aufs checks only the highest one for the
+directory permission, and shows the entries. Otherwise, as usual, checks
+every dir existing on all branches and rejects the request.
+
+As a side effect, dirperm1 option improves the performance of aufs
+because the number of permission check is reduced when the number of
+branch is many.
+
+
+Revalidate Dentry and UDBA (User's Direct Branch Access)
+----------------------------------------------------------------------
+Generally VFS helpers re-validate a dentry as a part of lookup.
+0. digging down the directory hierarchy.
+1. lock the parent dir by its i_mutex.
+2. lookup the final (child) entry.
+3. revalidate it.
+4. call the actual operation (create, unlink, etc.)
+5. unlock the parent dir
+
+If the filesystem implements its ->d_revalidate() (step 3), then it is
+called. Actually aufs implements it and checks the dentry on a branch is
+still valid.
+But it is not enough. Because aufs has to release the lock for the
+parent dir on a branch at the end of ->lookup() (step 2) and
+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
+held by VFS.
+If the file on a branch is changed directly, eg. bypassing aufs, after
+aufs released the lock, then the subsequent operation may cause
+something unpleasant result.
+
+This situation is a result of VFS architecture, ->lookup() and
+->d_revalidate() is separated. But I never say it is wrong. It is a good
+design from VFS's point of view. It is just not suitable for sub-VFS
+character in aufs.
+
+Aufs supports such case by three level of revalidation which is
+selectable by user.
+1. Simple Revalidate
+   Addition to the native flow in VFS's, confirm the child-parent
+   relationship on the branch just after locking the parent dir on the
+   branch in the "actual operation" (step 4). When this validation
+   fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
+   checks the validation of the dentry on branches.
+2. Monitor Changes Internally by Inotify/Fsnotify
+   Addition to above, in the "actual operation" (step 4) aufs re-lookup
+   the dentry on the branch, and returns EBUSY if it finds different
+   dentry.
+   Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
+   during it is in cache. When the event is notified, aufs registers a
+   function to kernel 'events' thread by schedule_work(). And the
+   function sets some special status to the cached aufs dentry and inode
+   private data. If they are not cached, then aufs has nothing to
+   do. When the same file is accessed through aufs (step 0-3) later,
+   aufs will detect the status and refresh all necessary data.
+   In this mode, aufs has to ignore the event which is fired by aufs
+   itself.
+3. No Extra Validation
+   This is the simplest test and doesn't add any additional revalidation
+   test, and skip the revalidation in step 4. It is useful and improves
+   aufs performance when system surely hide the aufs branches from user,
+   by over-mounting something (or another method).
diff --git a/include/Documentation/filesystems/aufs/design/04branch.txt b/include/Documentation/filesystems/aufs/design/04branch.txt
new file mode 100644
index 000000000..0c1289737
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/04branch.txt
@@ -0,0 +1,74 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Branch Manipulation
+
+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
+and changing its permission/attribute, there are a lot of works to do.
+
+
+Add a Branch
+----------------------------------------------------------------------
+o Confirm the adding dir exists outside of aufs, including loopback
+  mount, and its various attributes.
+o Initialize the xino file and whiteout bases if necessary.
+  See struct.txt.
+
+o Check the owner/group/mode of the directory
+  When the owner/group/mode of the adding directory differs from the
+  existing branch, aufs issues a warning because it may impose a
+  security risk.
+  For example, when a upper writable branch has a world writable empty
+  top directory, a malicious user can create any files on the writable
+  branch directly, like copy-up and modify manually. If something like
+  /etc/{passwd,shadow} exists on the lower readonly branch but the upper
+  writable branch, and the writable branch is world-writable, then a
+  malicious guy may create /etc/passwd on the writable branch directly
+  and the infected file will be valid in aufs.
+  I am afraid it can be a security issue, but aufs can do nothing except
+  producing a warning.
+
+
+Delete a Branch
+----------------------------------------------------------------------
+o Confirm the deleting branch is not busy
+  To be general, there is one merit to adopt "remount" interface to
+  manipulate branches. It is to discard caches. At deleting a branch,
+  aufs checks the still cached (and connected) dentries and inodes. If
+  there are any, then they are all in-use. An inode without its
+  corresponding dentry can be alive alone (for example, inotify/fsnotify case).
+
+  For the cached one, aufs checks whether the same named entry exists on
+  other branches.
+  If the cached one is a directory, because aufs provides a merged view
+  to users, as long as one dir is left on any branch aufs can show the
+  dir to users. In this case, the branch can be removed from aufs.
+  Otherwise aufs rejects deleting the branch.
+
+  If any file on the deleting branch is opened by aufs, then aufs
+  rejects deleting.
+
+
+Modify the Permission of a Branch
+----------------------------------------------------------------------
+o Re-initialize or remove the xino file and whiteout bases if necessary.
+  See struct.txt.
+
+o rw --> ro: Confirm the modifying branch is not busy
+  Aufs rejects the request if any of these conditions are true.
+  - a file on the branch is mmap-ed.
+  - a regular file on the branch is opened for write and there is no
+    same named entry on the upper branch.
diff --git a/include/Documentation/filesystems/aufs/design/05wbr_policy.txt b/include/Documentation/filesystems/aufs/design/05wbr_policy.txt
new file mode 100644
index 000000000..cc5b7979c
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/05wbr_policy.txt
@@ -0,0 +1,64 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Policies to Select One among Multiple Writable Branches
+----------------------------------------------------------------------
+When the number of writable branch is more than one, aufs has to decide
+the target branch for file creation or copy-up. By default, the highest
+writable branch which has the parent (or ancestor) dir of the target
+file is chosen (top-down-parent policy).
+By user's request, aufs implements some other policies to select the
+writable branch, for file creation several policies, round-robin,
+most-free-space, and other policies. For copy-up, top-down-parent,
+bottom-up-parent, bottom-up and others.
+
+As expected, the round-robin policy selects the branch in circular. When
+you have two writable branches and creates 10 new files, 5 files will be
+created for each branch. mkdir(2) systemcall is an exception. When you
+create 10 new directories, all will be created on the same branch.
+And the most-free-space policy selects the one which has most free
+space among the writable branches. The amount of free space will be
+checked by aufs internally, and users can specify its time interval.
+
+The policies for copy-up is more simple,
+top-down-parent is equivalent to the same named on in create policy,
+bottom-up-parent selects the writable branch where the parent dir
+exists and the nearest upper one from the copyup-source,
+bottom-up selects the nearest upper writable branch from the
+copyup-source, regardless the existence of the parent dir.
+
+There are some rules or exceptions to apply these policies.
+- If there is a readonly branch above the policy-selected branch and
+  the parent dir is marked as opaque (a variation of whiteout), or the
+  target (creating) file is whiteout-ed on the upper readonly branch,
+  then the result of the policy is ignored and the target file will be
+  created on the nearest upper writable branch than the readonly branch.
+- If there is a writable branch above the policy-selected branch and
+  the parent dir is marked as opaque or the target file is whiteouted
+  on the branch, then the result of the policy is ignored and the target
+  file will be created on the highest one among the upper writable
+  branches who has diropq or whiteout. In case of whiteout, aufs removes
+  it as usual.
+- link(2) and rename(2) systemcalls are exceptions in every policy.
+  They try selecting the branch where the source exists as possible
+  since copyup a large file will take long time. If it can't be,
+  ie. the branch where the source exists is readonly, then they will
+  follow the copyup policy.
+- There is an exception for rename(2) when the target exists.
+  If the rename target exists, aufs compares the index of the branches
+  where the source and the target exists and selects the higher
+  one. If the selected branch is readonly, then aufs follows the
+  copyup policy.
diff --git a/include/Documentation/filesystems/aufs/design/06dirren.dot b/include/Documentation/filesystems/aufs/design/06dirren.dot
new file mode 100644
index 000000000..2d62bb6dd
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/06dirren.dot
@@ -0,0 +1,31 @@
+
+// to view this graph, run dot(1) command in GRAPHVIZ.
+
+digraph G {
+node [shape=box];
+whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"];
+
+node [shape=oval];
+
+aufs_rename -> whinfo [label="store/remove"];
+
+node [shape=oval];
+inode_list [label="h_inum list in branch\ncache"];
+
+node [shape=box];
+whinode [label="h_inum list file"];
+
+node [shape=oval];
+brmgmt [label="br_add/del/mod/umount"];
+
+brmgmt -> inode_list [label="create/remove"];
+brmgmt -> whinode [label="load/store"];
+
+inode_list -> whinode [style=dashed,dir=both];
+
+aufs_rename -> inode_list [label="add/del"];
+
+aufs_lookup -> inode_list [label="search"];
+
+aufs_lookup -> whinfo [label="load/remove"];
+}
diff --git a/include/Documentation/filesystems/aufs/design/06dirren.txt b/include/Documentation/filesystems/aufs/design/06dirren.txt
new file mode 100644
index 000000000..1f3cb86d9
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/06dirren.txt
@@ -0,0 +1,102 @@
+
+# Copyright (C) 2017-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Special handling for renaming a directory (DIRREN)
+----------------------------------------------------------------------
+First, let's assume we have a simple usecase.
+
+- /u = /rw + /ro
+- /rw/dirA exists
+- /ro/dirA and /ro/dirA/file exist too
+- there is no dirB on both branches
+- a user issues rename("dirA", "dirB")
+
+Now, what should aufs behave against this rename(2)?
+There are a few possible cases.
+
+A. returns EROFS.
+   since dirA exists on a readonly branch which cannot be renamed.
+B. returns EXDEV.
+   it is possible to copy-up dirA (only the dir itself), but the child
+   entries ("file" in this case) should not be. it must be a bad
+   approach to copy-up recursively.
+C. returns a success.
+   even the branch /ro is readonly, aufs tries renaming it. Obviously it
+   is a violation of aufs' policy.
+D. construct an extra information which indicates that /ro/dirA should
+   be handled as the name of dirB.
+   overlayfs has a similar feature called REDIRECT.
+
+Until now, aufs implements the case B only which returns EXDEV, and
+expects the userspace application behaves like mv(1) which tries
+issueing rename(2) recursively.
+
+A new aufs feature called DIRREN is introduced which implements the case
+D. There are several "extra information" added.
+
+1. detailed info per renamed directory
+   path: /rw/dirB/$AUFS_WH_DR_INFO_PFX.<lower branch-id>
+2. the inode-number list of directories on a branch
+   path: /rw/dirB/$AUFS_WH_DR_BRHINO
+
+The filename of "detailed info per directory" represents the lower
+branch, and its format is
+- a type of the branch id
+  one of these.
+  + uuid (not implemented yet)
+  + fsid
+  + dev
+- the inode-number of the branch root dir
+
+And it contains these info in a single regular file.
+- magic number
+- branch's inode-number of the logically renamed dir
+- the name of the before-renamed dir
+
+The "detailed info per directory" file is created in aufs rename(2), and
+loaded in any lookup.
+The info is considered in lookup for the matching case only. Here
+"matching" means that the root of branch (in the info filename) is same
+to the current looking-up branch. After looking-up the before-renamed
+name, the inode-number is compared. And the matched dentry is used.
+
+The "inode-number list of directories" is a regular file which contains
+simply the inode-numbers on the branch. The file is created or updated
+in removing the branch, and loaded in adding the branch. Its lifetime is
+equal to the branch.
+The list is refered in lookup, and when the current target inode is
+found in the list, the aufs tries loading the "detailed info per
+directory" and get the changed and valid name of the dir.
+
+Theoretically these "extra informaiton" may be able to be put into XATTR
+in the dir inode. But aufs doesn't choose this way because
+1. XATTR may not be supported by the branch (or its configuration)
+2. XATTR may have its size limit.
+3. XATTR may be less easy to convert than a regular file, when the
+   format of the info is changed in the future.
+At the same time, I agree that the regular file approach is much slower
+than XATTR approach. So, in the future, aufs may take the XATTR or other
+better approach.
+
+This DIRREN feature is enabled by aufs configuration, and is activated
+by a new mount option.
+
+For the more complicated case, there is a work with UDBA option, which
+is to dected the direct access to the branches (by-passing aufs) and to
+maintain the cashes in aufs. Since a single cached aufs dentry may
+contains two names, before- and after-rename, the name comparision in
+UDBA handler may not work correctly. In this case, the behaviour will be
+equivalen to udba=reval case.
diff --git a/include/Documentation/filesystems/aufs/design/06fhsm.txt b/include/Documentation/filesystems/aufs/design/06fhsm.txt
new file mode 100644
index 000000000..ddfebecdf
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/06fhsm.txt
@@ -0,0 +1,120 @@
+
+# Copyright (C) 2011-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+File-based Hierarchical Storage Management (FHSM)
+----------------------------------------------------------------------
+Hierarchical Storage Management (or HSM) is a well-known feature in the
+storage world. Aufs provides this feature as file-based with multiple
+writable branches, based upon the principle of "Colder, the Lower".
+Here the word "colder" means that the less used files, and "lower" means
+that the position in the order of the stacked branches vertically.
+These multiple writable branches are prioritized, ie. the topmost one
+should be the fastest drive and be used heavily.
+
+o Characters in aufs FHSM story
+- aufs itself and a new branch attribute.
+- a new ioctl interface to move-down and to establish a connection with
+  the daemon ("move-down" is a converse of "copy-up").
+- userspace tool and daemon.
+
+The userspace daemon establishes a connection with aufs and waits for
+the notification. The notified information is very similar to struct
+statfs containing the number of consumed blocks and inodes.
+When the consumed blocks/inodes of a branch exceeds the user-specified
+upper watermark, the daemon activates its move-down process until the
+consumed blocks/inodes reaches the user-specified lower watermark.
+
+The actual move-down is done by aufs based upon the request from
+user-space since we need to maintain the inode number and the internal
+pointer arrays in aufs.
+
+Currently aufs FHSM handles the regular files only. Additionally they
+must not be hard-linked nor pseudo-linked.
+
+
+o Cowork of aufs and the user-space daemon
+  During the userspace daemon established the connection, aufs sends a
+  small notification to it whenever aufs writes something into the
+  writable branch. But it may cost high since aufs issues statfs(2)
+  internally. So user can specify a new option to cache the
+  info. Actually the notification is controlled by these factors.
+  + the specified cache time.
+  + classified as "force" by aufs internally.
+  Until the specified time expires, aufs doesn't send the info
+  except the forced cases. When aufs decide forcing, the info is always
+  notified to userspace.
+  For example, the number of free inodes is generally large enough and
+  the shortage of it happens rarely. So aufs doesn't force the
+  notification when creating a new file, directory and others. This is
+  the typical case which aufs doesn't force.
+  When aufs writes the actual filedata and the files consumes any of new
+  blocks, the aufs forces notifying.
+
+
+o Interfaces in aufs
+- New branch attribute.
+  + fhsm
+    Specifies that the branch is managed by FHSM feature. In other word,
+    participant in the FHSM.
+    When nofhsm is set to the branch, it will not be the source/target
+    branch of the move-down operation. This attribute is set
+    independently from coo and moo attributes, and if you want full
+    FHSM, you should specify them as well.
+- New mount option.
+  + fhsm_sec
+    Specifies a second to suppress many less important info to be
+    notified.
+- New ioctl.
+  + AUFS_CTL_FHSM_FD
+    create a new file descriptor which userspace can read the notification
+    (a subset of struct statfs) from aufs.
+- Module parameter 'brs'
+  It has to be set to 1. Otherwise the new mount option 'fhsm' will not
+  be set.
+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
+  When there are two or more branches with fhsm attributes,
+  /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
+  terminates it. As a result of remounting and branch-manipulation, the
+  number of branches with fhsm attribute can be one. In this case,
+  /sbin/mount.aufs will terminate the user-space daemon.
+
+
+Finally the operation is done as these steps in kernel-space.
+- make sure that,
+  + no one else is using the file.
+  + the file is not hard-linked.
+  + the file is not pseudo-linked.
+  + the file is a regular file.
+  + the parent dir is not opaqued.
+- find the target writable branch.
+- make sure the file is not whiteout-ed by the upper (than the target)
+  branch.
+- make the parent dir on the target branch.
+- mutex lock the inode on the branch.
+- unlink the whiteout on the target branch (if exists).
+- lookup and create the whiteout-ed temporary name on the target branch.
+- copy the file as the whiteout-ed temporary name on the target branch.
+- rename the whiteout-ed temporary name to the original name.
+- unlink the file on the source branch.
+- maintain the internal pointer array and the external inode number
+  table (XINO).
+- maintain the timestamps and other attributes of the parent dir and the
+  file.
+
+And of course, in every step, an error may happen. So the operation
+should restore the original file state after an error happens.
diff --git a/include/Documentation/filesystems/aufs/design/06mmap.txt b/include/Documentation/filesystems/aufs/design/06mmap.txt
new file mode 100644
index 000000000..9a0a096b1
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/06mmap.txt
@@ -0,0 +1,72 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+mmap(2) -- File Memory Mapping
+----------------------------------------------------------------------
+In aufs, the file-mapped pages are handled by a branch fs directly, no
+interaction with aufs. It means aufs_mmap() calls the branch fs's
+->mmap().
+This approach is simple and good, but there is one problem.
+Under /proc, several entries show the mmapped files by its path (with
+device and inode number), and the printed path will be the path on the
+branch fs's instead of virtual aufs's.
+This is not a problem in most cases, but some utilities lsof(1) (and its
+user) may expect the path on aufs.
+
+To address this issue, aufs adds a new member called vm_prfile in struct
+vm_area_struct (and struct vm_region). The original vm_file points to
+the file on the branch fs in order to handle everything correctly as
+usual. The new vm_prfile points to a virtual file in aufs, and the
+show-functions in procfs refers to vm_prfile if it is set.
+Also we need to maintain several other places where touching vm_file
+such like
+- fork()/clone() copies vma and the reference count of vm_file is
+  incremented.
+- merging vma maintains the ref count too.
+
+This is not a good approach. It just fakes the printed path. But it
+leaves all behaviour around f_mapping unchanged. This is surely an
+advantage.
+Actually aufs had adopted another complicated approach which calls
+generic_file_mmap() and handles struct vm_operations_struct. In this
+approach, aufs met a hard problem and I could not solve it without
+switching the approach.
+
+There may be one more another approach which is
+- bind-mount the branch-root onto the aufs-root internally
+- grab the new vfsmount (ie. struct mount)
+- lazy-umount the branch-root internally
+- in open(2) the aufs-file, open the branch-file with the hidden
+  vfsmount (instead of the original branch's vfsmount)
+- ideally this "bind-mount and lazy-umount" should be done atomically,
+  but it may be possible from userspace by the mount helper.
+
+Adding the internal hidden vfsmount and using it in opening a file, the
+file path under /proc will be printed correctly. This approach looks
+smarter, but is not possible I am afraid.
+- aufs-root may be bind-mount later. when it happens, another hidden
+  vfsmount will be required.
+- it is hard to get the chance to bind-mount and lazy-umount
+  + in kernel-space, FS can have vfsmount in open(2) via
+    file->f_path, and aufs can know its vfsmount. But several locks are
+    already acquired, and if aufs tries to bind-mount and lazy-umount
+    here, then it may cause a deadlock.
+  + in user-space, bind-mount doesn't invoke the mount helper.
+- since /proc shows dev and ino, aufs has to give vma these info. it
+  means a new member vm_prinode will be necessary. this is essentially
+  equivalent to vm_prfile described above.
+
+I have to give up this "looks-smater" approach.
diff --git a/include/Documentation/filesystems/aufs/design/06xattr.txt b/include/Documentation/filesystems/aufs/design/06xattr.txt
new file mode 100644
index 000000000..be441e8d3
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/06xattr.txt
@@ -0,0 +1,96 @@
+
+# Copyright (C) 2014-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Listing XATTR/EA and getting the value
+----------------------------------------------------------------------
+For the inode standard attributes (owner, group, timestamps, etc.), aufs
+shows the values from the topmost existing file. This behaviour is good
+for the non-dir entries since the bahaviour exactly matches the shown
+information. But for the directories, aufs considers all the same named
+entries on the lower branches. Which means, if one of the lower entry
+rejects readdir call, then aufs returns an error even if the topmost
+entry allows it. This behaviour is necessary to respect the branch fs's
+security, but can make users confused since the user-visible standard
+attributes don't match the behaviour.
+To address this issue, aufs has a mount option called dirperm1 which
+checks the permission for the topmost entry only, and ignores the lower
+entry's permission.
+
+A similar issue can happen around XATTR.
+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
+always set. Otherwise these very unpleasant situation would happen.
+- listxattr(2) may return the duplicated entries.
+- users may not be able to remove or reset the XATTR forever,
+
+
+XATTR/EA support in the internal (copy,move)-(up,down)
+----------------------------------------------------------------------
+Generally the extended attributes of inode are categorized as these.
+- "security" for LSM and capability.
+- "system" for posix ACL, 'acl' mount option is required for the branch
+  fs generally.
+- "trusted" for userspace, CAP_SYS_ADMIN is required.
+- "user" for userspace, 'user_xattr' mount option is required for the
+  branch fs generally.
+
+Moreover there are some other categories. Aufs handles these rather
+unpopular categories as the ordinary ones, ie. there is no special
+condition nor exception.
+
+In copy-up, the support for XATTR on the dst branch may differ from the
+src branch. In this case, the copy-up operation will get an error and
+the original user operation which triggered the copy-up will fail. It
+can happen that even all copy-up will fail.
+When both of src and dst branches support XATTR and if an error occurs
+during copying XATTR, then the copy-up should fail obviously. That is a
+good reason and aufs should return an error to userspace. But when only
+the src branch support that XATTR, aufs should not return an error.
+For example, the src branch supports ACL but the dst branch doesn't
+because the dst branch may natively un-support it or temporary
+un-support it due to "noacl" mount option. Of course, the dst branch fs
+may NOT return an error even if the XATTR is not supported. It is
+totally up to the branch fs.
+
+Anyway when the aufs internal copy-up gets an error from the dst branch
+fs, then aufs tries removing the just copied entry and returns the error
+to the userspace. The worst case of this situation will be all copy-up
+will fail.
+
+For the copy-up operation, there two basic approaches.
+- copy the specified XATTR only (by category above), and return the
+  error unconditionally if it happens.
+- copy all XATTR, and ignore the error on the specified category only.
+
+In order to support XATTR and to implement the correct behaviour, aufs
+chooses the latter approach and introduces some new branch attributes,
+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
+They correspond to the XATTR namespaces (see above). Additionally, to be
+convenient, "icex" is also provided which means all "icex*" attributes
+are set (here the word "icex" stands for "ignore copy-error on XATTR").
+
+The meaning of these attributes is to ignore the error from setting
+XATTR on that branch.
+Note that aufs tries copying all XATTR unconditionally, and ignores the
+error from the dst branch according to the specified attributes.
+
+Some XATTR may have its default value. The default value may come from
+the parent dir or the environment. If the default value is set at the
+file creating-time, it will be overwritten by copy-up.
+Some contradiction may happen I am afraid.
+Do we need another attribute to stop copying XATTR? I am unsure. For
+now, aufs implements the branch attributes to ignore the error.
diff --git a/include/Documentation/filesystems/aufs/design/07export.txt b/include/Documentation/filesystems/aufs/design/07export.txt
new file mode 100644
index 000000000..bb700cb18
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/07export.txt
@@ -0,0 +1,58 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Export Aufs via NFS
+----------------------------------------------------------------------
+Here is an approach.
+- like xino/xib, add a new file 'xigen' which stores aufs inode
+  generation.
+- iget_locked(): initialize aufs inode generation for a new inode, and
+  store it in xigen file.
+- destroy_inode(): increment aufs inode generation and store it in xigen
+  file. it is necessary even if it is not unlinked, because any data of
+  inode may be changed by UDBA.
+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
+  build file handle by
+  + branch id (4 bytes)
+  + superblock generation (4 bytes)
+  + inode number (4 or 8 bytes)
+  + parent dir inode number (4 or 8 bytes)
+  + inode generation (4 bytes))
+  + return value of exportfs_encode_fh() for the parent on a branch (4
+    bytes)
+  + file handle for a branch (by exportfs_encode_fh())
+- fh_to_dentry():
+  + find the index of a branch from its id in handle, and check it is
+    still exist in aufs.
+  + 1st level: get the inode number from handle and search it in cache.
+  + 2nd level: if not found in cache, get the parent inode number from
+    the handle and search it in cache. and then open the found parent
+    dir, find the matching inode number by vfs_readdir() and get its
+    name, and call lookup_one_len() for the target dentry.
+  + 3rd level: if the parent dir is not cached, call
+    exportfs_decode_fh() for a branch and get the parent on a branch,
+    build a pathname of it, convert it a pathname in aufs, call
+    path_lookup(). now aufs gets a parent dir dentry, then handle it as
+    the 2nd level.
+  + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
+    for every branch, but not itself. to get this, (currently) aufs
+    searches in current->nsproxy->mnt_ns list. it may not be a good
+    idea, but I didn't get other approach.
+  + test the generation of the gotten inode.
+- every inode operation: they may get EBUSY due to UDBA. in this case,
+  convert it into ESTALE for NFSD.
+- readdir(): call lockdep_on/off() because filldir in NFSD calls
+  lookup_one_len(), vfs_getattr(), encode_fh() and others.
diff --git a/include/Documentation/filesystems/aufs/design/08shwh.txt b/include/Documentation/filesystems/aufs/design/08shwh.txt
new file mode 100644
index 000000000..1dad573f6
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/08shwh.txt
@@ -0,0 +1,52 @@
+
+# Copyright (C) 2005-2018 Junjiro R. Okajima
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Show Whiteout Mode (shwh)
+----------------------------------------------------------------------
+Generally aufs hides the name of whiteouts. But in some cases, to show
+them is very useful for users. For instance, creating a new middle layer
+(branch) by merging existing layers.
+
+(borrowing aufs1 HOW-TO from a user, Michael Towers)
+When you have three branches,
+- Bottom: 'system', squashfs (underlying base system), read-only
+- Middle: 'mods', squashfs, read-only
+- Top: 'overlay', ram (tmpfs), read-write
+
+The top layer is loaded at boot time and saved at shutdown, to preserve
+the changes made to the system during the session.
+When larger changes have been made, or smaller changes have accumulated,
+the size of the saved top layer data grows. At this point, it would be
+nice to be able to merge the two overlay branches ('mods' and 'overlay')
+and rewrite the 'mods' squashfs, clearing the top layer and thus
+restoring save and load speed.
+
+This merging is simplified by the use of another aufs mount, of just the
+two overlay branches using the 'shwh' option.
+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
+	aufs /livesys/merge_union
+
+A merged view of these two branches is then available at
+/livesys/merge_union, and the new feature is that the whiteouts are
+visible!
+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
+writing to all branches. Also the default mode for all branches is 'ro'.
+It is now possible to save the combined contents of the two overlay
+branches to a new squashfs, e.g.:
+# mksquashfs /livesys/merge_union /path/to/newmods.squash
+
+This new squashfs archive can be stored on the boot device and the
+initramfs will use it to replace the old one at the next boot.
diff --git a/include/Documentation/filesystems/aufs/design/10dynop.txt b/include/Documentation/filesystems/aufs/design/10dynop.txt
new file mode 100644
index 000000000..710313c08
--- /dev/null
+++ b/include/Documentation/filesystems/aufs/design/10dynop.txt
@@ -0,0 +1,47 @@
+
+# Copyright (C) 2010-2018 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Dynamically customizable FS operations
+----------------------------------------------------------------------
+Generally FS operations (struct inode_operations, struct
+address_space_operations, struct file_operations, etc.) are defined as
+"static const", but it never means that FS have only one set of
+operation. Some FS have multiple sets of them. For instance, ext2 has
+three sets, one for XIP, for NOBH, and for normal.
+Since aufs overrides and redirects these operations, sometimes aufs has
+to change its behaviour according to the branch FS type. More importantly
+VFS acts differently if a function (member in the struct) is set or
+not. It means aufs should have several sets of operations and select one
+among them according to the branch FS definition.
+
+In order to solve this problem and not to affect the behaviour of VFS,
+aufs defines these operations dynamically. For instance, aufs defines
+dummy direct_IO function for struct address_space_operations, but it may
+not be set to the address_space_operations actually. When the branch FS
+doesn't have it, aufs doesn't set it to its address_space_operations
+while the function definition itself is still alive. So the behaviour
+itself will not change, and it will return an error when direct_IO is
+not set.
+
+The lifetime of these dynamically generated operation object is
+maintained by aufs branch object. When the branch is removed from aufs,
+the reference counter of the object is decremented. When it reaches
+zero, the dynamically generated operation object will be freed.
+
+This approach is designed to support AIO (io_submit), Direct I/O and
+XIP (DAX) mainly.
+Currently this approach is applied to address_space_operations for
+regular files only.
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index bfbb44a5a..8dcd7f167 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -781,12 +781,25 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 extern void untrack_pfn_moved(struct vm_area_struct *vma);
 #endif
 
+#ifdef CONFIG_UKSM
+static inline int is_uksm_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long uksm_zero_pfn;
+	return pfn == uksm_zero_pfn;
+}
+#else
+static inline int is_uksm_zero_pfn(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
 #ifdef __HAVE_COLOR_ZERO_PAGE
 static inline int is_zero_pfn(unsigned long pfn)
 {
 	extern unsigned long zero_pfn;
 	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
-	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT) || is_uksm_zero_pfn(pfn);
 }
 
 #define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
@@ -795,7 +808,7 @@ static inline int is_zero_pfn(unsigned long pfn)
 static inline int is_zero_pfn(unsigned long pfn)
 {
 	extern unsigned long zero_pfn;
-	return pfn == zero_pfn;
+	return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
 }
 
 static inline unsigned long my_zero_pfn(unsigned long addr)
diff --git a/include/fs/aufs/Kconfig b/include/fs/aufs/Kconfig
new file mode 100644
index 000000000..9f4364257
--- /dev/null
+++ b/include/fs/aufs/Kconfig
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: GPL-2.0
+config AUFS_FS
+	tristate "Aufs (Advanced multi layered unification filesystem) support"
+	help
+	Aufs is a stackable unification filesystem such as Unionfs,
+	which unifies several directories and provides a merged single
+	directory.
+	In the early days, aufs was entirely re-designed and
+	re-implemented Unionfs Version 1.x series. Introducing many
+	original ideas, approaches and improvements, it becomes totally
+	different from Unionfs while keeping the basic features.
+
+if AUFS_FS
+choice
+	prompt "Maximum number of branches"
+	default AUFS_BRANCH_MAX_127
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_127
+	bool "127"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_511
+	bool "511"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_1023
+	bool "1023"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+config AUFS_BRANCH_MAX_32767
+	bool "32767"
+	help
+	Specifies the maximum number of branches (or member directories)
+	in a single aufs. The larger value consumes more system
+	resources and has a minor impact to performance.
+endchoice
+
+config AUFS_SBILIST
+	bool
+	depends on AUFS_MAGIC_SYSRQ || PROC_FS
+	default y
+	help
+	Automatic configuration for internal use.
+	When aufs supports Magic SysRq or /proc, enabled automatically.
+
+config AUFS_HNOTIFY
+	bool "Detect direct branch access (bypassing aufs)"
+	help
+	If you want to modify files on branches directly, eg. bypassing aufs,
+	and want aufs to detect the changes of them fully, then enable this
+	option and use 'udba=notify' mount option.
+	Currently there is only one available configuration, "fsnotify".
+	It will have a negative impact to the performance.
+	See detail in aufs.5.
+
+choice
+	prompt "method" if AUFS_HNOTIFY
+	default AUFS_HFSNOTIFY
+config AUFS_HFSNOTIFY
+	bool "fsnotify"
+	select FSNOTIFY
+endchoice
+
+config AUFS_EXPORT
+	bool "NFS-exportable aufs"
+	depends on EXPORTFS
+	help
+	If you want to export your mounted aufs via NFS, then enable this
+	option. There are several requirements for this configuration.
+	See detail in aufs.5.
+
+config AUFS_INO_T_64
+	bool
+	depends on AUFS_EXPORT
+	depends on 64BIT && !(ALPHA || S390)
+	default y
+	help
+	Automatic configuration for internal use.
+	/* typedef unsigned long/int __kernel_ino_t */
+	/* alpha and s390x are int */
+
+config AUFS_XATTR
+	bool "support for XATTR/EA (including Security Labels)"
+	help
+	If your branch fs supports XATTR/EA and you want to make them
+	available in aufs too, then enable this opsion and specify the
+	branch attributes for EA.
+	See detail in aufs.5.
+
+config AUFS_FHSM
+	bool "File-based Hierarchical Storage Management"
+	help
+	Hierarchical Storage Management (or HSM) is a well-known feature
+	in the storage world. Aufs provides this feature as file-based.
+	with multiple branches.
+	These multiple branches are prioritized, ie. the topmost one
+	should be the fastest drive and be used heavily.
+
+config AUFS_RDU
+	bool "Readdir in userspace"
+	help
+	Aufs has two methods to provide a merged view for a directory,
+	by a user-space library and by kernel-space natively. The latter
+	is always enabled but sometimes large and slow.
+	If you enable this option, install the library in aufs2-util
+	package, and set some environment variables for your readdir(3),
+	then the work will be handled in user-space which generally
+	shows better performance in most cases.
+	See detail in aufs.5.
+
+config AUFS_DIRREN
+	bool "Workaround for rename(2)-ing a directory"
+	help
+	By default, aufs returns EXDEV error in renameing a dir who has
+	his child on the lower branch, since it is a bad idea to issue
+	rename(2) internally for every lower branch. But user may not
+	accept this behaviour. So here is a workaround to allow such
+	rename(2) and store some extra infromation on the writable
+	branch. Obviously this costs high (and I don't like it).
+	To use this feature, you need to enable this configuration AND
+	to specify the mount option `dirren.'
+	See details in aufs.5 and the design documents.
+
+config AUFS_SHWH
+	bool "Show whiteouts"
+	help
+	If you want to make the whiteouts in aufs visible, then enable
+	this option and specify 'shwh' mount option. Although it may
+	sounds like philosophy or something, but in technically it
+	simply shows the name of whiteout with keeping its behaviour.
+
+config AUFS_BR_RAMFS
+	bool "Ramfs (initramfs/rootfs) as an aufs branch"
+	help
+	If you want to use ramfs as an aufs branch fs, then enable this
+	option. Generally tmpfs is recommended.
+	Aufs prohibited them to be a branch fs by default, because
+	initramfs becomes unusable after switch_root or something
+	generally. If you sets initramfs as an aufs branch and boot your
+	system by switch_root, you will meet a problem easily since the
+	files in initramfs may be inaccessible.
+	Unless you are going to use ramfs as an aufs branch fs without
+	switch_root or something, leave it N.
+
+config AUFS_BR_FUSE
+	bool "Fuse fs as an aufs branch"
+	depends on FUSE_FS
+	select AUFS_POLL
+	help
+	If you want to use fuse-based userspace filesystem as an aufs
+	branch fs, then enable this option.
+	It implements the internal poll(2) operation which is
+	implemented by fuse only (curretnly).
+
+config AUFS_POLL
+	bool
+	help
+	Automatic configuration for internal use.
+
+config AUFS_BR_HFSPLUS
+	bool "Hfsplus as an aufs branch"
+	depends on HFSPLUS_FS
+	default y
+	help
+	If you want to use hfsplus fs as an aufs branch fs, then enable
+	this option. This option introduces a small overhead at
+	copying-up a file on hfsplus.
+
+config AUFS_BDEV_LOOP
+	bool
+	depends on BLK_DEV_LOOP
+	default y
+	help
+	Automatic configuration for internal use.
+	Convert =[ym] into =y.
+
+config AUFS_DEBUG
+	bool "Debug aufs"
+	help
+	Enable this to compile aufs internal debug code.
+	It will have a negative impact to the performance.
+
+config AUFS_MAGIC_SYSRQ
+	bool
+	depends on AUFS_DEBUG && MAGIC_SYSRQ
+	default y
+	help
+	Automatic configuration for internal use.
+	When aufs supports Magic SysRq, enabled automatically.
+endif
diff --git a/include/fs/aufs/Makefile b/include/fs/aufs/Makefile
new file mode 100644
index 000000000..2c819a649
--- /dev/null
+++ b/include/fs/aufs/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include ${src}/magic.mk
+ifeq (${CONFIG_AUFS_FS},m)
+include ${src}/conf.mk
+endif
+-include ${src}/priv_def.mk
+
+# cf. include/linux/kernel.h
+# enable pr_debug
+ccflags-y += -DDEBUG
+# sparse requires the full pathname
+ifdef M
+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
+else
+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
+endif
+
+obj-$(CONFIG_AUFS_FS) += aufs.o
+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
+	wkq.o vfsub.o dcsub.o \
+	cpup.o whout.o wbr_policy.o \
+	dinfo.o dentry.o \
+	dynop.o \
+	finfo.o file.o f_op.o \
+	dir.o vdir.o \
+	iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
+	mvdown.o ioctl.o
+
+# all are boolean
+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
+aufs-$(CONFIG_SYSFS) += sysfs.o
+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
+aufs-$(CONFIG_AUFS_EXPORT) += export.o
+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
+aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
+aufs-$(CONFIG_AUFS_POLL) += poll.o
+aufs-$(CONFIG_AUFS_RDU) += rdu.o
+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
diff --git a/include/fs/aufs/aufs.h b/include/fs/aufs/aufs.h
new file mode 100644
index 000000000..bcf0a2e40
--- /dev/null
+++ b/include/fs/aufs/aufs.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * all header files
+ */
+
+#ifndef __AUFS_H__
+#define __AUFS_H__
+
+#ifdef __KERNEL__
+
+#define AuStub(type, name, body, ...) \
+	static inline type name(__VA_ARGS__) { body; }
+
+#define AuStubVoid(name, ...) \
+	AuStub(void, name, , __VA_ARGS__)
+#define AuStubInt0(name, ...) \
+	AuStub(int, name, return 0, __VA_ARGS__)
+
+#include "debug.h"
+
+#include "branch.h"
+#include "cpup.h"
+#include "dcsub.h"
+#include "dbgaufs.h"
+#include "dentry.h"
+#include "dir.h"
+#include "dirren.h"
+#include "dynop.h"
+#include "file.h"
+#include "fstype.h"
+#include "hbl.h"
+#include "inode.h"
+#include "loop.h"
+#include "module.h"
+#include "opts.h"
+#include "rwsem.h"
+#include "super.h"
+#include "sysaufs.h"
+#include "vfsub.h"
+#include "whout.h"
+#include "wkq.h"
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_H__ */
diff --git a/include/fs/aufs/branch.c b/include/fs/aufs/branch.c
new file mode 100644
index 000000000..eeca66c00
--- /dev/null
+++ b/include/fs/aufs/branch.c
@@ -0,0 +1,1432 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * branch management
+ */
+
+#include <linux/compat.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+/*
+ * free a single branch
+ */
+static void au_br_do_free(struct au_branch *br)
+{
+	int i;
+	struct au_wbr *wbr;
+	struct au_dykey **key;
+
+	au_hnotify_fin_br(br);
+	/* always, regardless the mount option */
+	au_dr_hino_free(&br->br_dirren);
+
+	if (br->br_xino.xi_file)
+		fput(br->br_xino.xi_file);
+	for (i = br->br_xino.xi_nondir.total - 1; i >= 0; i--)
+		AuDebugOn(br->br_xino.xi_nondir.array[i]);
+	kfree(br->br_xino.xi_nondir.array);
+
+	AuDebugOn(au_br_count(br));
+	au_br_count_fin(br);
+
+	wbr = br->br_wbr;
+	if (wbr) {
+		for (i = 0; i < AuBrWh_Last; i++)
+			dput(wbr->wbr_wh[i]);
+		AuDebugOn(atomic_read(&wbr->wbr_wh_running));
+		AuRwDestroy(&wbr->wbr_wh_rwsem);
+	}
+
+	if (br->br_fhsm) {
+		au_br_fhsm_fin(br->br_fhsm);
+		kfree(br->br_fhsm);
+	}
+
+	key = br->br_dykey;
+	for (i = 0; i < AuBrDynOp; i++, key++)
+		if (*key)
+			au_dy_put(*key);
+		else
+			break;
+
+	/* recursive lock, s_umount of branch's */
+	lockdep_off();
+	path_put(&br->br_path);
+	lockdep_on();
+	kfree(wbr);
+	kfree(br);
+}
+
+/*
+ * frees all branches
+ */
+void au_br_free(struct au_sbinfo *sbinfo)
+{
+	aufs_bindex_t bmax;
+	struct au_branch **br;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	bmax = sbinfo->si_bbot + 1;
+	br = sbinfo->si_branch;
+	while (bmax--)
+		au_br_do_free(*br++);
+}
+
+/*
+ * find the index of a branch which is specified by @br_id.
+ */
+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
+{
+	aufs_bindex_t bindex, bbot;
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (au_sbr_id(sb, bindex) == br_id)
+			return bindex;
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * add a branch
+ */
+
+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
+			struct dentry *h_root)
+{
+	if (unlikely(h_adding == h_root
+		     || au_test_loopback_overlap(sb, h_adding)))
+		return 1;
+	if (h_adding->d_sb != h_root->d_sb)
+		return 0;
+	return au_test_subdir(h_adding, h_root)
+		|| au_test_subdir(h_root, h_adding);
+}
+
+/*
+ * returns a newly allocated branch. @new_nbranch is a number of branches
+ * after adding a branch.
+ */
+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
+				     int perm)
+{
+	struct au_branch *add_branch;
+	struct dentry *root;
+	struct inode *inode;
+	int err;
+
+	err = -ENOMEM;
+	add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
+	if (unlikely(!add_branch))
+		goto out;
+	add_branch->br_xino.xi_nondir.total = 8; /* initial size */
+	add_branch->br_xino.xi_nondir.array
+		= kcalloc(add_branch->br_xino.xi_nondir.total, sizeof(ino_t),
+			  GFP_NOFS);
+	if (unlikely(!add_branch->br_xino.xi_nondir.array))
+		goto out_br;
+
+	err = au_hnotify_init_br(add_branch, perm);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (au_br_writable(perm)) {
+		/* may be freed separately at changing the branch permission */
+		add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
+					     GFP_NOFS);
+		if (unlikely(!add_branch->br_wbr))
+			goto out_hnotify;
+	}
+
+	if (au_br_fhsm(perm)) {
+		err = au_fhsm_br_alloc(add_branch);
+		if (unlikely(err))
+			goto out_wbr;
+	}
+
+	root = sb->s_root;
+	err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
+	if (!err)
+		err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
+	if (!err) {
+		inode = d_inode(root);
+		err = au_hinode_realloc(au_ii(inode), new_nbranch,
+					/*may_shrink*/0);
+	}
+	if (!err)
+		return add_branch; /* success */
+
+out_wbr:
+	kfree(add_branch->br_wbr);
+out_hnotify:
+	au_hnotify_fin_br(add_branch);
+out_xinondir:
+	kfree(add_branch->br_xino.xi_nondir.array);
+out_br:
+	kfree(add_branch);
+out:
+	return ERR_PTR(err);
+}
+
+/*
+ * test if the branch permission is legal or not.
+ */
+static int test_br(struct inode *inode, int brperm, char *path)
+{
+	int err;
+
+	err = (au_br_writable(brperm) && IS_RDONLY(inode));
+	if (!err)
+		goto out;
+
+	err = -EINVAL;
+	pr_err("write permission for readonly mount or inode, %s\n", path);
+
+out:
+	return err;
+}
+
+/*
+ * returns:
+ * 0: success, the caller will add it
+ * plus: success, it is already unified, the caller should ignore it
+ * minus: error
+ */
+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *root, *h_dentry;
+	struct inode *inode, *h_inode;
+
+	root = sb->s_root;
+	bbot = au_sbbot(sb);
+	if (unlikely(bbot >= 0
+		     && au_find_dbindex(root, add->path.dentry) >= 0)) {
+		err = 1;
+		if (!remount) {
+			err = -EINVAL;
+			pr_err("%s duplicated\n", add->pathname);
+		}
+		goto out;
+	}
+
+	err = -ENOSPC; /* -E2BIG; */
+	if (unlikely(AUFS_BRANCH_MAX <= add->bindex
+		     || AUFS_BRANCH_MAX - 1 <= bbot)) {
+		pr_err("number of branches exceeded %s\n", add->pathname);
+		goto out;
+	}
+
+	err = -EDOM;
+	if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
+		pr_err("bad index %d\n", add->bindex);
+		goto out;
+	}
+
+	inode = d_inode(add->path.dentry);
+	err = -ENOENT;
+	if (unlikely(!inode->i_nlink)) {
+		pr_err("no existence %s\n", add->pathname);
+		goto out;
+	}
+
+	err = -EINVAL;
+	if (unlikely(inode->i_sb == sb)) {
+		pr_err("%s must be outside\n", add->pathname);
+		goto out;
+	}
+
+	if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
+		pr_err("unsupported filesystem, %s (%s)\n",
+		       add->pathname, au_sbtype(inode->i_sb));
+		goto out;
+	}
+
+	if (unlikely(inode->i_sb->s_stack_depth)) {
+		pr_err("already stacked, %s (%s)\n",
+		       add->pathname, au_sbtype(inode->i_sb));
+		goto out;
+	}
+
+	err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
+	if (unlikely(err))
+		goto out;
+
+	if (bbot < 0)
+		return 0; /* success */
+
+	err = -EINVAL;
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (unlikely(test_overlap(sb, add->path.dentry,
+					  au_h_dptr(root, bindex)))) {
+			pr_err("%s is overlapped\n", add->pathname);
+			goto out;
+		}
+
+	err = 0;
+	if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
+		h_dentry = au_h_dptr(root, 0);
+		h_inode = d_inode(h_dentry);
+		if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
+		    || !uid_eq(h_inode->i_uid, inode->i_uid)
+		    || !gid_eq(h_inode->i_gid, inode->i_gid))
+			pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
+				add->pathname,
+				i_uid_read(inode), i_gid_read(inode),
+				(inode->i_mode & S_IALLUGO),
+				i_uid_read(h_inode), i_gid_read(h_inode),
+				(h_inode->i_mode & S_IALLUGO));
+	}
+
+out:
+	return err;
+}
+
+/*
+ * initialize or clean the whiteouts for an adding branch
+ */
+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
+			 int new_perm)
+{
+	int err, old_perm;
+	aufs_bindex_t bindex;
+	struct inode *h_inode;
+	struct au_wbr *wbr;
+	struct au_hinode *hdir;
+	struct dentry *h_dentry;
+
+	err = vfsub_mnt_want_write(au_br_mnt(br));
+	if (unlikely(err))
+		goto out;
+
+	wbr = br->br_wbr;
+	old_perm = br->br_perm;
+	br->br_perm = new_perm;
+	hdir = NULL;
+	h_inode = NULL;
+	bindex = au_br_index(sb, br->br_id);
+	if (0 <= bindex) {
+		hdir = au_hi(d_inode(sb->s_root), bindex);
+		au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	} else {
+		h_dentry = au_br_dentry(br);
+		h_inode = d_inode(h_dentry);
+		inode_lock_nested(h_inode, AuLsc_I_PARENT);
+	}
+	if (!wbr)
+		err = au_wh_init(br, sb);
+	else {
+		wbr_wh_write_lock(wbr);
+		err = au_wh_init(br, sb);
+		wbr_wh_write_unlock(wbr);
+	}
+	if (hdir)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(h_inode);
+	vfsub_mnt_drop_write(au_br_mnt(br));
+	br->br_perm = old_perm;
+
+	if (!err && wbr && !au_br_writable(new_perm)) {
+		kfree(wbr);
+		br->br_wbr = NULL;
+	}
+
+out:
+	return err;
+}
+
+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
+		       int perm)
+{
+	int err;
+	struct kstatfs kst;
+	struct au_wbr *wbr;
+
+	wbr = br->br_wbr;
+	au_rw_init(&wbr->wbr_wh_rwsem);
+	atomic_set(&wbr->wbr_wh_running, 0);
+
+	/*
+	 * a limit for rmdir/rename a dir
+	 * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
+	 */
+	err = vfs_statfs(&br->br_path, &kst);
+	if (unlikely(err))
+		goto out;
+	err = -EINVAL;
+	if (kst.f_namelen >= NAME_MAX)
+		err = au_br_init_wh(sb, br, perm);
+	else
+		pr_err("%pd(%s), unsupported namelen %ld\n",
+		       au_br_dentry(br),
+		       au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
+
+out:
+	return err;
+}
+
+/* initialize a new branch */
+static int au_br_init(struct au_branch *br, struct super_block *sb,
+		      struct au_opt_add *add)
+{
+	int err;
+	struct inode *h_inode;
+
+	err = 0;
+	spin_lock_init(&br->br_xino.xi_nondir.spin);
+	init_waitqueue_head(&br->br_xino.xi_nondir.wqh);
+	br->br_perm = add->perm;
+	br->br_path = add->path; /* set first, path_get() later */
+	spin_lock_init(&br->br_dykey_lock);
+	au_br_count_init(br);
+	atomic_set(&br->br_xino_running, 0);
+	br->br_id = au_new_br_id(sb);
+	AuDebugOn(br->br_id < 0);
+
+	/* always, regardless the given option */
+	err = au_dr_br_init(sb, br, &add->path);
+	if (unlikely(err))
+		goto out_err;
+
+	if (au_br_writable(add->perm)) {
+		err = au_wbr_init(br, sb, add->perm);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	if (au_opt_test(au_mntflags(sb), XINO)) {
+		h_inode = d_inode(add->path.dentry);
+		err = au_xino_br(sb, br, h_inode->i_ino,
+				 au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
+		if (unlikely(err)) {
+			AuDebugOn(br->br_xino.xi_file);
+			goto out_err;
+		}
+	}
+
+	sysaufs_br_init(br);
+	path_get(&br->br_path);
+	goto out; /* success */
+
+out_err:
+	memset(&br->br_path, 0, sizeof(br->br_path));
+out:
+	return err;
+}
+
+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
+			     struct au_branch *br, aufs_bindex_t bbot,
+			     aufs_bindex_t amount)
+{
+	struct au_branch **brp;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	brp = sbinfo->si_branch + bindex;
+	memmove(brp + 1, brp, sizeof(*brp) * amount);
+	*brp = br;
+	sbinfo->si_bbot++;
+	if (unlikely(bbot < 0))
+		sbinfo->si_bbot = 0;
+}
+
+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
+			     aufs_bindex_t bbot, aufs_bindex_t amount)
+{
+	struct au_hdentry *hdp;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	hdp = au_hdentry(dinfo, bindex);
+	memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
+	au_h_dentry_init(hdp);
+	dinfo->di_bbot++;
+	if (unlikely(bbot < 0))
+		dinfo->di_btop = 0;
+}
+
+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
+			     aufs_bindex_t bbot, aufs_bindex_t amount)
+{
+	struct au_hinode *hip;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	hip = au_hinode(iinfo, bindex);
+	memmove(hip + 1, hip, sizeof(*hip) * amount);
+	au_hinode_init(hip);
+	iinfo->ii_bbot++;
+	if (unlikely(bbot < 0))
+		iinfo->ii_btop = 0;
+}
+
+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
+			 aufs_bindex_t bindex)
+{
+	struct dentry *root, *h_dentry;
+	struct inode *root_inode, *h_inode;
+	aufs_bindex_t bbot, amount;
+
+	root = sb->s_root;
+	root_inode = d_inode(root);
+	bbot = au_sbbot(sb);
+	amount = bbot + 1 - bindex;
+	h_dentry = au_br_dentry(br);
+	au_sbilist_lock();
+	au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
+	au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
+	au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
+	au_set_h_dptr(root, bindex, dget(h_dentry));
+	h_inode = d_inode(h_dentry);
+	au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
+	au_sbilist_unlock();
+}
+
+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
+{
+	int err;
+	aufs_bindex_t bbot, add_bindex;
+	struct dentry *root, *h_dentry;
+	struct inode *root_inode;
+	struct au_branch *add_branch;
+
+	root = sb->s_root;
+	root_inode = d_inode(root);
+	IMustLock(root_inode);
+	IiMustWriteLock(root_inode);
+	err = test_add(sb, add, remount);
+	if (unlikely(err < 0))
+		goto out;
+	if (err) {
+		err = 0;
+		goto out; /* success */
+	}
+
+	bbot = au_sbbot(sb);
+	add_branch = au_br_alloc(sb, bbot + 2, add->perm);
+	err = PTR_ERR(add_branch);
+	if (IS_ERR(add_branch))
+		goto out;
+
+	err = au_br_init(add_branch, sb, add);
+	if (unlikely(err)) {
+		au_br_do_free(add_branch);
+		goto out;
+	}
+
+	add_bindex = add->bindex;
+	if (!remount)
+		au_br_do_add(sb, add_branch, add_bindex);
+	else {
+		sysaufs_brs_del(sb, add_bindex);
+		au_br_do_add(sb, add_branch, add_bindex);
+		sysaufs_brs_add(sb, add_bindex);
+	}
+
+	h_dentry = add->path.dentry;
+	if (!add_bindex) {
+		au_cpup_attr_all(root_inode, /*force*/1);
+		sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
+	} else
+		au_add_nlink(root_inode, d_inode(h_dentry));
+
+	/*
+	 * this test/set prevents aufs from handling unnecesary notify events
+	 * of xino files, in case of re-adding a writable branch which was
+	 * once detached from aufs.
+	 */
+	if (au_xino_brid(sb) < 0
+	    && au_br_writable(add_branch->br_perm)
+	    && !au_test_fs_bad_xino(h_dentry->d_sb)
+	    && add_branch->br_xino.xi_file
+	    && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
+		au_xino_brid_set(sb, add_branch->br_id);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
+				       unsigned long long max __maybe_unused,
+				       void *arg)
+{
+	unsigned long long n;
+	struct file **p, *f;
+	struct hlist_bl_head *files;
+	struct hlist_bl_node *pos;
+	struct au_finfo *finfo;
+
+	n = 0;
+	p = a;
+	files = &au_sbi(sb)->si_files;
+	hlist_bl_lock(files);
+	hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
+		f = finfo->fi_file;
+		if (file_count(f)
+		    && !special_file(file_inode(f)->i_mode)) {
+			get_file(f);
+			*p++ = f;
+			n++;
+			AuDebugOn(n > max);
+		}
+	}
+	hlist_bl_unlock(files);
+
+	return n;
+}
+
+static struct file **au_farray_alloc(struct super_block *sb,
+				     unsigned long long *max)
+{
+	*max = au_nfiles(sb);
+	return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
+}
+
+static void au_farray_free(struct file **a, unsigned long long max)
+{
+	unsigned long long ull;
+
+	for (ull = 0; ull < max; ull++)
+		if (a[ull])
+			fput(a[ull]);
+	kvfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * delete a branch
+ */
+
+/* to show the line number, do not make it inlined function */
+#define AuVerbose(do_info, fmt, ...) do { \
+	if (do_info) \
+		pr_info(fmt, ##__VA_ARGS__); \
+} while (0)
+
+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
+			 aufs_bindex_t bbot)
+{
+	return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
+}
+
+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
+			 aufs_bindex_t bbot)
+{
+	return au_test_ibusy(d_inode(dentry), btop, bbot);
+}
+
+/*
+ * test if the branch is deletable or not.
+ */
+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
+			    unsigned int sigen, const unsigned int verbose)
+{
+	int err, i, j, ndentry;
+	aufs_bindex_t btop, bbot;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry *d;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, root, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	for (i = 0; !err && i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		ndentry = dpage->ndentry;
+		for (j = 0; !err && j < ndentry; j++) {
+			d = dpage->dentries[j];
+			AuDebugOn(au_dcount(d) <= 0);
+			if (!au_digen_test(d, sigen)) {
+				di_read_lock_child(d, AuLock_IR);
+				if (unlikely(au_dbrange_test(d))) {
+					di_read_unlock(d, AuLock_IR);
+					continue;
+				}
+			} else {
+				di_write_lock_child(d);
+				if (unlikely(au_dbrange_test(d))) {
+					di_write_unlock(d);
+					continue;
+				}
+				err = au_reval_dpath(d, sigen);
+				if (!err)
+					di_downgrade_lock(d, AuLock_IR);
+				else {
+					di_write_unlock(d);
+					break;
+				}
+			}
+
+			/* AuDbgDentry(d); */
+			btop = au_dbtop(d);
+			bbot = au_dbbot(d);
+			if (btop <= bindex
+			    && bindex <= bbot
+			    && au_h_dptr(d, bindex)
+			    && au_test_dbusy(d, btop, bbot)) {
+				err = -EBUSY;
+				AuVerbose(verbose, "busy %pd\n", d);
+				AuDbgDentry(d);
+			}
+			di_read_unlock(d, AuLock_IR);
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
+			   unsigned int sigen, const unsigned int verbose)
+{
+	int err;
+	unsigned long long max, ull;
+	struct inode *i, **array;
+	aufs_bindex_t btop, bbot;
+
+	array = au_iarray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	AuDbg("b%d\n", bindex);
+	for (ull = 0; !err && ull < max; ull++) {
+		i = array[ull];
+		if (unlikely(!i))
+			break;
+		if (i->i_ino == AUFS_ROOT_INO)
+			continue;
+
+		/* AuDbgInode(i); */
+		if (au_iigen(i, NULL) == sigen)
+			ii_read_lock_child(i);
+		else {
+			ii_write_lock_child(i);
+			err = au_refresh_hinode_self(i);
+			au_iigen_dec(i);
+			if (!err)
+				ii_downgrade_lock(i);
+			else {
+				ii_write_unlock(i);
+				break;
+			}
+		}
+
+		btop = au_ibtop(i);
+		bbot = au_ibbot(i);
+		if (btop <= bindex
+		    && bindex <= bbot
+		    && au_h_iptr(i, bindex)
+		    && au_test_ibusy(i, btop, bbot)) {
+			err = -EBUSY;
+			AuVerbose(verbose, "busy i%lu\n", i->i_ino);
+			AuDbgInode(i);
+		}
+		ii_read_unlock(i);
+	}
+	au_iarray_free(array, max);
+
+out:
+	return err;
+}
+
+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
+			      const unsigned int verbose)
+{
+	int err;
+	unsigned int sigen;
+
+	sigen = au_sigen(root->d_sb);
+	DiMustNoWaiters(root);
+	IiMustNoWaiters(d_inode(root));
+	di_write_unlock(root);
+	err = test_dentry_busy(root, bindex, sigen, verbose);
+	if (!err)
+		err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
+	di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
+
+	return err;
+}
+
+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
+			 struct file **to_free, int *idx)
+{
+	int err;
+	unsigned char matched, root;
+	aufs_bindex_t bindex, bbot;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+
+	err = 0;
+	root = IS_ROOT(file->f_path.dentry);
+	if (root) {
+		get_file(file);
+		to_free[*idx] = file;
+		(*idx)++;
+		goto out;
+	}
+
+	matched = 0;
+	fidir = au_fi(file)->fi_hdir;
+	AuDebugOn(!fidir);
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
+		hfile = fidir->fd_hfile + bindex;
+		if (!hfile->hf_file)
+			continue;
+
+		if (hfile->hf_br->br_id == br_id) {
+			matched = 1;
+			break;
+		}
+	}
+	if (matched)
+		err = -EBUSY;
+
+out:
+	return err;
+}
+
+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
+			  struct file **to_free, int opened)
+{
+	int err, idx;
+	unsigned long long ull, max;
+	aufs_bindex_t btop;
+	struct file *file, **array;
+	struct dentry *root;
+	struct au_hfile *hfile;
+
+	array = au_farray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	idx = 0;
+	root = sb->s_root;
+	di_write_unlock(root);
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		fi_read_lock(file);
+		btop = au_fbtop(file);
+		if (!d_is_dir(file->f_path.dentry)) {
+			hfile = &au_fi(file)->fi_htop;
+			if (hfile->hf_br->br_id == br_id)
+				err = -EBUSY;
+		} else
+			err = test_dir_busy(file, br_id, to_free, &idx);
+		fi_read_unlock(file);
+		if (unlikely(err))
+			break;
+	}
+	di_write_lock_child(root);
+	au_farray_free(array, max);
+	AuDebugOn(idx > opened);
+
+out:
+	return err;
+}
+
+static void br_del_file(struct file **to_free, unsigned long long opened,
+			  aufs_bindex_t br_id)
+{
+	unsigned long long ull;
+	aufs_bindex_t bindex, btop, bbot, bfound;
+	struct file *file;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+
+	for (ull = 0; ull < opened; ull++) {
+		file = to_free[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		AuDebugOn(!d_is_dir(file->f_path.dentry));
+		bfound = -1;
+		fidir = au_fi(file)->fi_hdir;
+		AuDebugOn(!fidir);
+		fi_write_lock(file);
+		btop = au_fbtop(file);
+		bbot = au_fbbot_dir(file);
+		for (bindex = btop; bindex <= bbot; bindex++) {
+			hfile = fidir->fd_hfile + bindex;
+			if (!hfile->hf_file)
+				continue;
+
+			if (hfile->hf_br->br_id == br_id) {
+				bfound = bindex;
+				break;
+			}
+		}
+		AuDebugOn(bfound < 0);
+		au_set_h_fptr(file, bfound, NULL);
+		if (bfound == btop) {
+			for (btop++; btop <= bbot; btop++)
+				if (au_hf_dir(file, btop)) {
+					au_set_fbtop(file, btop);
+					break;
+				}
+		}
+		fi_write_unlock(file);
+	}
+}
+
+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
+			     const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_branch **brp, **p;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	brp = sbinfo->si_branch + bindex;
+	if (bindex < bbot)
+		memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
+	sbinfo->si_branch[0 + bbot] = NULL;
+	sbinfo->si_bbot--;
+
+	p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		sbinfo->si_branch = p;
+	/* harmless error */
+}
+
+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_hdentry *hdp, *p;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	hdp = au_hdentry(dinfo, bindex);
+	if (bindex < bbot)
+		memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
+	/* au_h_dentry_init(au_hdentry(dinfo, bbot); */
+	dinfo->di_bbot--;
+
+	p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		dinfo->di_hdentry = p;
+	/* harmless error */
+}
+
+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
+			     const aufs_bindex_t bbot)
+{
+	struct au_hinode *hip, *p;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	hip = au_hinode(iinfo, bindex);
+	if (bindex < bbot)
+		memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
+	/* au_hinode_init(au_hinode(iinfo, bbot)); */
+	iinfo->ii_bbot--;
+
+	p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
+			/*may_shrink*/1);
+	if (p)
+		iinfo->ii_hinode = p;
+	/* harmless error */
+}
+
+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
+			 struct au_branch *br)
+{
+	aufs_bindex_t bbot;
+	struct au_sbinfo *sbinfo;
+	struct dentry *root, *h_root;
+	struct inode *inode, *h_inode;
+	struct au_hinode *hinode;
+
+	SiMustWriteLock(sb);
+
+	root = sb->s_root;
+	inode = d_inode(root);
+	sbinfo = au_sbi(sb);
+	bbot = sbinfo->si_bbot;
+
+	h_root = au_h_dptr(root, bindex);
+	hinode = au_hi(inode, bindex);
+	h_inode = au_igrab(hinode->hi_inode);
+	au_hiput(hinode);
+
+	au_sbilist_lock();
+	au_br_do_del_brp(sbinfo, bindex, bbot);
+	au_br_do_del_hdp(au_di(root), bindex, bbot);
+	au_br_do_del_hip(au_ii(inode), bindex, bbot);
+	au_sbilist_unlock();
+
+	/* ignore an error */
+	au_dr_br_fin(sb, br); /* always, regardless the mount option */
+
+	dput(h_root);
+	iput(h_inode);
+	au_br_do_free(br);
+}
+
+static unsigned long long empty_cb(struct super_block *sb, void *array,
+				   unsigned long long max, void *arg)
+{
+	return max;
+}
+
+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
+{
+	int err, rerr, i;
+	unsigned long long opened;
+	unsigned int mnt_flags;
+	aufs_bindex_t bindex, bbot, br_id;
+	unsigned char do_wh, verbose;
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *root;
+	struct file **to_free;
+
+	err = 0;
+	opened = 0;
+	to_free = NULL;
+	root = sb->s_root;
+	bindex = au_find_dbindex(root, del->h_path.dentry);
+	if (bindex < 0) {
+		if (remount)
+			goto out; /* success */
+		err = -ENOENT;
+		pr_err("%s no such branch\n", del->pathname);
+		goto out;
+	}
+	AuDbg("bindex b%d\n", bindex);
+
+	err = -EBUSY;
+	mnt_flags = au_mntflags(sb);
+	verbose = !!au_opt_test(mnt_flags, VERBOSE);
+	bbot = au_sbbot(sb);
+	if (unlikely(!bbot)) {
+		AuVerbose(verbose, "no more branches left\n");
+		goto out;
+	}
+	br = au_sbr(sb, bindex);
+	AuDebugOn(!path_equal(&br->br_path, &del->h_path));
+
+	br_id = br->br_id;
+	opened = au_br_count(br);
+	if (unlikely(opened)) {
+		to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
+		err = PTR_ERR(to_free);
+		if (IS_ERR(to_free))
+			goto out;
+
+		err = test_file_busy(sb, br_id, to_free, opened);
+		if (unlikely(err)) {
+			AuVerbose(verbose, "%llu file(s) opened\n", opened);
+			goto out;
+		}
+	}
+
+	wbr = br->br_wbr;
+	do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
+	if (do_wh) {
+		/* instead of WbrWhMustWriteLock(wbr) */
+		SiMustWriteLock(sb);
+		for (i = 0; i < AuBrWh_Last; i++) {
+			dput(wbr->wbr_wh[i]);
+			wbr->wbr_wh[i] = NULL;
+		}
+	}
+
+	err = test_children_busy(root, bindex, verbose);
+	if (unlikely(err)) {
+		if (do_wh)
+			goto out_wh;
+		goto out;
+	}
+
+	err = 0;
+	if (to_free) {
+		/*
+		 * now we confirmed the branch is deletable.
+		 * let's free the remaining opened dirs on the branch.
+		 */
+		di_write_unlock(root);
+		br_del_file(to_free, opened, br_id);
+		di_write_lock_child(root);
+	}
+
+	if (!remount)
+		au_br_do_del(sb, bindex, br);
+	else {
+		sysaufs_brs_del(sb, bindex);
+		au_br_do_del(sb, bindex, br);
+		sysaufs_brs_add(sb, bindex);
+	}
+
+	if (!bindex) {
+		au_cpup_attr_all(d_inode(root), /*force*/1);
+		sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
+	} else
+		au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
+	if (au_opt_test(mnt_flags, PLINK))
+		au_plink_half_refresh(sb, br_id);
+
+	if (au_xino_brid(sb) == br_id)
+		au_xino_brid_set(sb, -1);
+	goto out; /* success */
+
+out_wh:
+	/* revert */
+	rerr = au_br_init_wh(sb, br, br->br_perm);
+	if (rerr)
+		pr_warn("failed re-creating base whiteout, %s. (%d)\n",
+			del->pathname, rerr);
+out:
+	if (to_free)
+		au_farray_free(to_free, opened);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
+{
+	int err;
+	aufs_bindex_t btop, bbot;
+	struct aufs_ibusy ibusy;
+	struct inode *inode, *h_inode;
+
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = copy_from_user(&ibusy, arg, sizeof(ibusy));
+	if (!err)
+		err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+
+	err = -EINVAL;
+	si_read_lock(sb, AuLock_FLUSH);
+	if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
+		goto out_unlock;
+
+	err = 0;
+	ibusy.h_ino = 0; /* invalid */
+	inode = ilookup(sb, ibusy.ino);
+	if (!inode
+	    || inode->i_ino == AUFS_ROOT_INO
+	    || au_is_bad_inode(inode))
+		goto out_unlock;
+
+	ii_read_lock_child(inode);
+	btop = au_ibtop(inode);
+	bbot = au_ibbot(inode);
+	if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
+		h_inode = au_h_iptr(inode, ibusy.bindex);
+		if (h_inode && au_test_ibusy(inode, btop, bbot))
+			ibusy.h_ino = h_inode->i_ino;
+	}
+	ii_read_unlock(inode);
+	iput(inode);
+
+out_unlock:
+	si_read_unlock(sb);
+	if (!err) {
+		err = __put_user(ibusy.h_ino, &arg->h_ino);
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+	}
+out:
+	return err;
+}
+
+long au_ibusy_ioctl(struct file *file, unsigned long arg)
+{
+	return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
+{
+	return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * change a branch permission
+ */
+
+static void au_warn_ima(void)
+{
+#ifdef CONFIG_IMA
+	/* since it doesn't support mark_files_ro() */
+	AuWarn1("RW -> RO makes IMA to produce wrong message\n");
+#endif
+}
+
+static int do_need_sigen_inc(int a, int b)
+{
+	return au_br_whable(a) && !au_br_whable(b);
+}
+
+static int need_sigen_inc(int old, int new)
+{
+	return do_need_sigen_inc(old, new)
+		|| do_need_sigen_inc(new, old);
+}
+
+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err, do_warn;
+	unsigned int mnt_flags;
+	unsigned long long ull, max;
+	aufs_bindex_t br_id;
+	unsigned char verbose, writer;
+	struct file *file, *hf, **array;
+	struct au_hfile *hfile;
+
+	mnt_flags = au_mntflags(sb);
+	verbose = !!au_opt_test(mnt_flags, VERBOSE);
+
+	array = au_farray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	do_warn = 0;
+	br_id = au_sbr_id(sb, bindex);
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (unlikely(!file))
+			break;
+
+		/* AuDbg("%pD\n", file); */
+		fi_read_lock(file);
+		if (unlikely(au_test_mmapped(file))) {
+			err = -EBUSY;
+			AuVerbose(verbose, "mmapped %pD\n", file);
+			AuDbgFile(file);
+			FiMustNoWaiters(file);
+			fi_read_unlock(file);
+			goto out_array;
+		}
+
+		hfile = &au_fi(file)->fi_htop;
+		hf = hfile->hf_file;
+		if (!d_is_reg(file->f_path.dentry)
+		    || !(file->f_mode & FMODE_WRITE)
+		    || hfile->hf_br->br_id != br_id
+		    || !(hf->f_mode & FMODE_WRITE))
+			array[ull] = NULL;
+		else {
+			do_warn = 1;
+			get_file(file);
+		}
+
+		FiMustNoWaiters(file);
+		fi_read_unlock(file);
+		fput(file);
+	}
+
+	err = 0;
+	if (do_warn)
+		au_warn_ima();
+
+	for (ull = 0; ull < max; ull++) {
+		file = array[ull];
+		if (!file)
+			continue;
+
+		/* todo: already flushed? */
+		/*
+		 * fs/super.c:mark_files_ro() is gone, but aufs keeps its
+		 * approach which resets f_mode and calls mnt_drop_write() and
+		 * file_release_write() for each file, because the branch
+		 * attribute in aufs world is totally different from the native
+		 * fs rw/ro mode.
+		*/
+		/* fi_read_lock(file); */
+		hfile = &au_fi(file)->fi_htop;
+		hf = hfile->hf_file;
+		/* fi_read_unlock(file); */
+		spin_lock(&hf->f_lock);
+		writer = !!(hf->f_mode & FMODE_WRITER);
+		hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
+		spin_unlock(&hf->f_lock);
+		if (writer) {
+			put_write_access(file_inode(hf));
+			__mnt_drop_write(hf->f_path.mnt);
+		}
+	}
+
+out_array:
+	au_farray_free(array, max);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
+	      int *do_refresh)
+{
+	int err, rerr;
+	aufs_bindex_t bindex;
+	struct dentry *root;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	root = sb->s_root;
+	bindex = au_find_dbindex(root, mod->h_root);
+	if (bindex < 0) {
+		if (remount)
+			return 0; /* success */
+		err = -ENOENT;
+		pr_err("%s no such branch\n", mod->path);
+		goto out;
+	}
+	AuDbg("bindex b%d\n", bindex);
+
+	err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
+	if (unlikely(err))
+		goto out;
+
+	br = au_sbr(sb, bindex);
+	AuDebugOn(mod->h_root != au_br_dentry(br));
+	if (br->br_perm == mod->perm)
+		return 0; /* success */
+
+	/* pre-allocate for non-fhsm --> fhsm */
+	bf = NULL;
+	if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
+		err = au_fhsm_br_alloc(br);
+		if (unlikely(err))
+			goto out;
+		bf = br->br_fhsm;
+		br->br_fhsm = NULL;
+	}
+
+	if (au_br_writable(br->br_perm)) {
+		/* remove whiteout base */
+		err = au_br_init_wh(sb, br, mod->perm);
+		if (unlikely(err))
+			goto out_bf;
+
+		if (!au_br_writable(mod->perm)) {
+			/* rw --> ro, file might be mmapped */
+			DiMustNoWaiters(root);
+			IiMustNoWaiters(d_inode(root));
+			di_write_unlock(root);
+			err = au_br_mod_files_ro(sb, bindex);
+			/* aufs_write_lock() calls ..._child() */
+			di_write_lock_child(root);
+
+			if (unlikely(err)) {
+				rerr = -ENOMEM;
+				br->br_wbr = kzalloc(sizeof(*br->br_wbr),
+						     GFP_NOFS);
+				if (br->br_wbr)
+					rerr = au_wbr_init(br, sb, br->br_perm);
+				if (unlikely(rerr)) {
+					AuIOErr("nested error %d (%d)\n",
+						rerr, err);
+					br->br_perm = mod->perm;
+				}
+			}
+		}
+	} else if (au_br_writable(mod->perm)) {
+		/* ro --> rw */
+		err = -ENOMEM;
+		br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
+		if (br->br_wbr) {
+			err = au_wbr_init(br, sb, mod->perm);
+			if (unlikely(err)) {
+				kfree(br->br_wbr);
+				br->br_wbr = NULL;
+			}
+		}
+	}
+	if (unlikely(err))
+		goto out_bf;
+
+	if (au_br_fhsm(br->br_perm)) {
+		if (!au_br_fhsm(mod->perm)) {
+			/* fhsm --> non-fhsm */
+			au_br_fhsm_fin(br->br_fhsm);
+			kfree(br->br_fhsm);
+			br->br_fhsm = NULL;
+		}
+	} else if (au_br_fhsm(mod->perm))
+		/* non-fhsm --> fhsm */
+		br->br_fhsm = bf;
+
+	*do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
+	br->br_perm = mod->perm;
+	goto out; /* success */
+
+out_bf:
+	kfree(bf);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
+{
+	int err;
+	struct kstatfs kstfs;
+
+	err = vfs_statfs(&br->br_path, &kstfs);
+	if (!err) {
+		stfs->f_blocks = kstfs.f_blocks;
+		stfs->f_bavail = kstfs.f_bavail;
+		stfs->f_files = kstfs.f_files;
+		stfs->f_ffree = kstfs.f_ffree;
+	}
+
+	return err;
+}
diff --git a/include/fs/aufs/branch.h b/include/fs/aufs/branch.h
new file mode 100644
index 000000000..65fdd2aa6
--- /dev/null
+++ b/include/fs/aufs/branch.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * branch filesystems and xino for them
+ */
+
+#ifndef __AUFS_BRANCH_H__
+#define __AUFS_BRANCH_H__
+
+#ifdef __KERNEL__
+
+#include <linux/mount.h>
+#include "dirren.h"
+#include "dynop.h"
+#include "rwsem.h"
+#include "super.h"
+
+/* ---------------------------------------------------------------------- */
+
+/* a xino file */
+struct au_xino_file {
+	struct file		*xi_file;
+	struct {
+		spinlock_t		spin;
+		ino_t			*array;
+		int			total;
+		/* reserved for future use */
+		/* unsigned long	*bitmap; */
+		wait_queue_head_t	wqh;
+	} xi_nondir;
+
+	/* todo: make xino files an array to support huge inode number */
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		 *xi_dbgaufs;
+#endif
+};
+
+/* File-based Hierarchical Storage Management */
+struct au_br_fhsm {
+#ifdef CONFIG_AUFS_FHSM
+	struct mutex		bf_lock;
+	unsigned long		bf_jiffy;
+	struct aufs_stfs	bf_stfs;
+	int			bf_readable;
+#endif
+};
+
+/* members for writable branch only */
+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
+struct au_wbr {
+	struct au_rwsem		wbr_wh_rwsem;
+	struct dentry		*wbr_wh[AuBrWh_Last];
+	atomic_t		wbr_wh_running;
+#define wbr_whbase		wbr_wh[AuBrWh_BASE]	/* whiteout base */
+#define wbr_plink		wbr_wh[AuBrWh_PLINK]	/* pseudo-link dir */
+#define wbr_orph		wbr_wh[AuBrWh_ORPH]	/* dir for orphans */
+
+	/* mfs mode */
+	unsigned long long	wbr_bytes;
+};
+
+/* ext2 has 3 types of operations at least, ext3 has 4 */
+#define AuBrDynOp (AuDyLast * 4)
+
+#ifdef CONFIG_AUFS_HFSNOTIFY
+/* support for asynchronous destruction */
+struct au_br_hfsnotify {
+	struct fsnotify_group	*hfsn_group;
+};
+#endif
+
+/* sysfs entries */
+struct au_brsysfs {
+	char			name[16];
+	struct attribute	attr;
+};
+
+enum {
+	AuBrSysfs_BR,
+	AuBrSysfs_BRID,
+	AuBrSysfs_Last
+};
+
+/* protected by superblock rwsem */
+struct au_branch {
+	struct au_xino_file	br_xino;
+
+	aufs_bindex_t		br_id;
+
+	int			br_perm;
+	struct path		br_path;
+	spinlock_t		br_dykey_lock;
+	struct au_dykey		*br_dykey[AuBrDynOp];
+	struct percpu_counter	br_count;
+
+	struct au_wbr		*br_wbr;
+	struct au_br_fhsm	*br_fhsm;
+
+	/* xino truncation */
+	atomic_t		br_xino_running;
+
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	struct au_br_hfsnotify	*br_hfsn;
+#endif
+
+#ifdef CONFIG_SYSFS
+	/* entries under sysfs per mount-point */
+	struct au_brsysfs	br_sysfs[AuBrSysfs_Last];
+#endif
+
+	struct au_dr_br		br_dirren;
+};
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
+{
+	return br->br_path.mnt;
+}
+
+static inline struct dentry *au_br_dentry(struct au_branch *br)
+{
+	return br->br_path.dentry;
+}
+
+static inline struct super_block *au_br_sb(struct au_branch *br)
+{
+	return au_br_mnt(br)->mnt_sb;
+}
+
+static inline void au_br_get(struct au_branch *br)
+{
+	percpu_counter_inc(&br->br_count);
+}
+
+static inline void au_br_put(struct au_branch *br)
+{
+	percpu_counter_dec(&br->br_count);
+}
+
+static inline s64 au_br_count(struct au_branch *br)
+{
+	return percpu_counter_sum(&br->br_count);
+}
+
+static inline void au_br_count_init(struct au_branch *br)
+{
+	percpu_counter_init(&br->br_count, 0, GFP_NOFS);
+}
+
+static inline void au_br_count_fin(struct au_branch *br)
+{
+	percpu_counter_destroy(&br->br_count);
+}
+
+static inline int au_br_rdonly(struct au_branch *br)
+{
+	return (sb_rdonly(au_br_sb(br))
+		|| !au_br_writable(br->br_perm))
+		? -EROFS : 0;
+}
+
+static inline int au_br_hnotifyable(int brperm __maybe_unused)
+{
+#ifdef CONFIG_AUFS_HNOTIFY
+	return !(brperm & AuBrPerm_RR);
+#else
+	return 0;
+#endif
+}
+
+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
+{
+	int err, exec_flag;
+
+	err = 0;
+	exec_flag = oflag & __FMODE_EXEC;
+	if (unlikely(exec_flag && path_noexec(&br->br_path)))
+		err = -EACCES;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* branch.c */
+struct au_sbinfo;
+void au_br_free(struct au_sbinfo *sinfo);
+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
+struct au_opt_add;
+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
+struct au_opt_del;
+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
+long au_ibusy_ioctl(struct file *file, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
+#endif
+struct au_opt_mod;
+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
+	      int *do_refresh);
+struct aufs_stfs;
+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
+
+/* xino.c */
+static const loff_t au_loff_max = LLONG_MAX;
+
+int au_xib_trunc(struct super_block *sb);
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
+		   loff_t *pos);
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+		    size_t size, loff_t *pos);
+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
+ino_t au_xino_new_ino(struct super_block *sb);
+void au_xino_delete_inode(struct inode *inode, const int unlinked);
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		  ino_t ino);
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		 ino_t *ino);
+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
+	       struct file *base_file, int do_test);
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
+
+struct au_opt_xino;
+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
+void au_xino_clr(struct super_block *sb);
+struct file *au_xino_def(struct super_block *sb);
+int au_xino_path(struct seq_file *seq, struct file *file);
+
+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
+		       ino_t h_ino, int idx);
+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		      int *idx);
+
+/* ---------------------------------------------------------------------- */
+
+/* Superblock to branch */
+static inline
+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_sbr(sb, bindex)->br_id;
+}
+
+static inline
+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_mnt(au_sbr(sb, bindex));
+}
+
+static inline
+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_sb(au_sbr(sb, bindex));
+}
+
+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
+{
+	au_br_get(au_sbr(sb, bindex));
+}
+
+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
+{
+	au_br_put(au_sbr(sb, bindex));
+}
+
+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_sbr(sb, bindex)->br_perm;
+}
+
+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
+{
+	return au_br_whable(au_sbr_perm(sb, bindex));
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define wbr_wh_read_lock(wbr)	au_rw_read_lock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_lock(wbr)	au_rw_write_lock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_read_trylock(wbr)	au_rw_read_trylock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_trylock(wbr) au_rw_write_trylock(&(wbr)->wbr_wh_rwsem)
+/*
+#define wbr_wh_read_trylock_nested(wbr) \
+	au_rw_read_trylock_nested(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_trylock_nested(wbr) \
+	au_rw_write_trylock_nested(&(wbr)->wbr_wh_rwsem)
+*/
+
+#define wbr_wh_read_unlock(wbr)	au_rw_read_unlock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_write_unlock(wbr)	au_rw_write_unlock(&(wbr)->wbr_wh_rwsem)
+#define wbr_wh_downgrade_lock(wbr)	au_rw_dgrade_lock(&(wbr)->wbr_wh_rwsem)
+
+#define WbrWhMustNoWaiters(wbr)	AuRwMustNoWaiters(&(wbr)->wbr_wh_rwsem)
+#define WbrWhMustAnyLock(wbr)	AuRwMustAnyLock(&(wbr)->wbr_wh_rwsem)
+#define WbrWhMustWriteLock(wbr)	AuRwMustWriteLock(&(wbr)->wbr_wh_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_FHSM
+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
+{
+	mutex_init(&brfhsm->bf_lock);
+	brfhsm->bf_jiffy = 0;
+	brfhsm->bf_readable = 0;
+}
+
+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
+{
+	mutex_destroy(&brfhsm->bf_lock);
+}
+#else
+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_BRANCH_H__ */
diff --git a/include/fs/aufs/conf.mk b/include/fs/aufs/conf.mk
new file mode 100644
index 000000000..12782f8e0
--- /dev/null
+++ b/include/fs/aufs/conf.mk
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+
+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
+
+define AuConf
+ifdef ${1}
+AuConfStr += ${1}=${${1}}
+endif
+endef
+
+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
+	SBILIST \
+	HNOTIFY HFSNOTIFY \
+	EXPORT INO_T_64 \
+	XATTR \
+	FHSM \
+	RDU \
+	DIRREN \
+	SHWH \
+	BR_RAMFS \
+	BR_FUSE POLL \
+	BR_HFSPLUS \
+	BDEV_LOOP \
+	DEBUG MAGIC_SYSRQ
+$(foreach i, ${AuConfAll}, \
+	$(eval $(call AuConf,CONFIG_AUFS_${i})))
+
+AuConfName = ${obj}/conf.str
+${AuConfName}.tmp: FORCE
+	@echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
+${AuConfName}: ${AuConfName}.tmp
+	@diff -q $< $@ > /dev/null 2>&1 || { \
+	echo '  GEN    ' $@; \
+	cp -p $< $@; \
+	}
+FORCE:
+clean-files += ${AuConfName} ${AuConfName}.tmp
+${obj}/sysfs.o: ${AuConfName}
+
+-include ${srctree}/${src}/conf_priv.mk
diff --git a/include/fs/aufs/cpup.c b/include/fs/aufs/cpup.c
new file mode 100644
index 000000000..1289cd965
--- /dev/null
+++ b/include/fs/aufs/cpup.c
@@ -0,0 +1,1442 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * copy-up functions, see wbr_policy.c for copy-down
+ */
+
+#include <linux/fs_stack.h>
+#include <linux/mm.h>
+#include <linux/task_work.h>
+#include "aufs.h"
+
+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
+{
+	const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
+		| S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
+
+	BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
+
+	dst->i_flags |= iflags & ~mask;
+	if (au_test_fs_notime(dst->i_sb))
+		dst->i_flags |= S_NOATIME | S_NOCMTIME;
+}
+
+void au_cpup_attr_timesizes(struct inode *inode)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	fsstack_copy_attr_times(inode, h_inode);
+	fsstack_copy_inode_size(inode, h_inode);
+}
+
+void au_cpup_attr_nlink(struct inode *inode, int force)
+{
+	struct inode *h_inode;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+
+	sb = inode->i_sb;
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (!force
+	    && !S_ISDIR(h_inode->i_mode)
+	    && au_opt_test(au_mntflags(sb), PLINK)
+	    && au_plink_test(inode))
+		return;
+
+	/*
+	 * 0 can happen in revalidating.
+	 * h_inode->i_mutex may not be held here, but it is harmless since once
+	 * i_nlink reaches 0, it will never become positive except O_TMPFILE
+	 * case.
+	 * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
+	 *	 the incorrect link count.
+	 */
+	set_nlink(inode, h_inode->i_nlink);
+
+	/*
+	 * fewer nlink makes find(1) noisy, but larger nlink doesn't.
+	 * it may includes whplink directory.
+	 */
+	if (S_ISDIR(h_inode->i_mode)) {
+		bbot = au_ibbot(inode);
+		for (bindex++; bindex <= bbot; bindex++) {
+			h_inode = au_h_iptr(inode, bindex);
+			if (h_inode)
+				au_add_nlink(inode, h_inode);
+		}
+	}
+}
+
+void au_cpup_attr_changeable(struct inode *inode)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	inode->i_mode = h_inode->i_mode;
+	inode->i_uid = h_inode->i_uid;
+	inode->i_gid = h_inode->i_gid;
+	au_cpup_attr_timesizes(inode);
+	au_cpup_attr_flags(inode, h_inode->i_flags);
+}
+
+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
+{
+	struct au_iinfo *iinfo = au_ii(inode);
+
+	IiMustWriteLock(inode);
+
+	iinfo->ii_higen = h_inode->i_generation;
+	iinfo->ii_hsb1 = h_inode->i_sb;
+}
+
+void au_cpup_attr_all(struct inode *inode, int force)
+{
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	au_cpup_attr_changeable(inode);
+	if (inode->i_nlink > 0)
+		au_cpup_attr_nlink(inode, force);
+	inode->i_rdev = h_inode->i_rdev;
+	inode->i_blkbits = h_inode->i_blkbits;
+	au_cpup_igen(inode, h_inode);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
+
+/* keep the timestamps of the parent dir when cpup */
+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
+		    struct path *h_path)
+{
+	struct inode *h_inode;
+
+	dt->dt_dentry = dentry;
+	dt->dt_h_path = *h_path;
+	h_inode = d_inode(h_path->dentry);
+	dt->dt_atime = h_inode->i_atime;
+	dt->dt_mtime = h_inode->i_mtime;
+	/* smp_mb(); */
+}
+
+void au_dtime_revert(struct au_dtime *dt)
+{
+	struct iattr attr;
+	int err;
+
+	attr.ia_atime = dt->dt_atime;
+	attr.ia_mtime = dt->dt_mtime;
+	attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
+		| ATTR_ATIME | ATTR_ATIME_SET;
+
+	/* no delegation since this is a directory */
+	err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
+	if (unlikely(err))
+		pr_warn("restoring timestamps failed(%d). ignored\n", err);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* internal use only */
+struct au_cpup_reg_attr {
+	int		valid;
+	struct kstat	st;
+	unsigned int	iflags; /* inode->i_flags */
+};
+
+static noinline_for_stack
+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
+	       struct au_cpup_reg_attr *h_src_attr)
+{
+	int err, sbits, icex;
+	unsigned int mnt_flags;
+	unsigned char verbose;
+	struct iattr ia;
+	struct path h_path;
+	struct inode *h_isrc, *h_idst;
+	struct kstat *h_st;
+	struct au_branch *br;
+
+	h_path.dentry = au_h_dptr(dst, bindex);
+	h_idst = d_inode(h_path.dentry);
+	br = au_sbr(dst->d_sb, bindex);
+	h_path.mnt = au_br_mnt(br);
+	h_isrc = d_inode(h_src);
+	ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
+		| ATTR_ATIME | ATTR_MTIME
+		| ATTR_ATIME_SET | ATTR_MTIME_SET;
+	if (h_src_attr && h_src_attr->valid) {
+		h_st = &h_src_attr->st;
+		ia.ia_uid = h_st->uid;
+		ia.ia_gid = h_st->gid;
+		ia.ia_atime = h_st->atime;
+		ia.ia_mtime = h_st->mtime;
+		if (h_idst->i_mode != h_st->mode
+		    && !S_ISLNK(h_idst->i_mode)) {
+			ia.ia_valid |= ATTR_MODE;
+			ia.ia_mode = h_st->mode;
+		}
+		sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
+		au_cpup_attr_flags(h_idst, h_src_attr->iflags);
+	} else {
+		ia.ia_uid = h_isrc->i_uid;
+		ia.ia_gid = h_isrc->i_gid;
+		ia.ia_atime = h_isrc->i_atime;
+		ia.ia_mtime = h_isrc->i_mtime;
+		if (h_idst->i_mode != h_isrc->i_mode
+		    && !S_ISLNK(h_idst->i_mode)) {
+			ia.ia_valid |= ATTR_MODE;
+			ia.ia_mode = h_isrc->i_mode;
+		}
+		sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
+		au_cpup_attr_flags(h_idst, h_isrc->i_flags);
+	}
+	/* no delegation since it is just created */
+	err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
+
+	/* is this nfs only? */
+	if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
+		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
+		ia.ia_mode = h_isrc->i_mode;
+		err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
+	}
+
+	icex = br->br_perm & AuBrAttr_ICEX;
+	if (!err) {
+		mnt_flags = au_mntflags(dst->d_sb);
+		verbose = !!au_opt_test(mnt_flags, VERBOSE);
+		err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
+			   char *buf, unsigned long blksize)
+{
+	int err;
+	size_t sz, rbytes, wbytes;
+	unsigned char all_zero;
+	char *p, *zp;
+	struct inode *h_inode;
+	/* reduce stack usage */
+	struct iattr *ia;
+
+	zp = page_address(ZERO_PAGE(0));
+	if (unlikely(!zp))
+		return -ENOMEM; /* possible? */
+
+	err = 0;
+	all_zero = 0;
+	while (len) {
+		AuDbg("len %lld\n", len);
+		sz = blksize;
+		if (len < blksize)
+			sz = len;
+
+		rbytes = 0;
+		/* todo: signal_pending? */
+		while (!rbytes || err == -EAGAIN || err == -EINTR) {
+			rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
+			err = rbytes;
+		}
+		if (unlikely(err < 0))
+			break;
+
+		all_zero = 0;
+		if (len >= rbytes && rbytes == blksize)
+			all_zero = !memcmp(buf, zp, rbytes);
+		if (!all_zero) {
+			wbytes = rbytes;
+			p = buf;
+			while (wbytes) {
+				size_t b;
+
+				b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
+				err = b;
+				/* todo: signal_pending? */
+				if (unlikely(err == -EAGAIN || err == -EINTR))
+					continue;
+				if (unlikely(err < 0))
+					break;
+				wbytes -= b;
+				p += b;
+			}
+			if (unlikely(err < 0))
+				break;
+		} else {
+			loff_t res;
+
+			AuLabel(hole);
+			res = vfsub_llseek(dst, rbytes, SEEK_CUR);
+			err = res;
+			if (unlikely(res < 0))
+				break;
+		}
+		len -= rbytes;
+		err = 0;
+	}
+
+	/* the last block may be a hole */
+	if (!err && all_zero) {
+		AuLabel(last hole);
+
+		err = 1;
+		if (au_test_nfs(dst->f_path.dentry->d_sb)) {
+			/* nfs requires this step to make last hole */
+			/* is this only nfs? */
+			do {
+				/* todo: signal_pending? */
+				err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
+			} while (err == -EAGAIN || err == -EINTR);
+			if (err == 1)
+				dst->f_pos--;
+		}
+
+		if (err == 1) {
+			ia = (void *)buf;
+			ia->ia_size = dst->f_pos;
+			ia->ia_valid = ATTR_SIZE | ATTR_FILE;
+			ia->ia_file = dst;
+			h_inode = file_inode(dst);
+			inode_lock_nested(h_inode, AuLsc_I_CHILD2);
+			/* no delegation since it is just created */
+			err = vfsub_notify_change(&dst->f_path, ia,
+						  /*delegated*/NULL);
+			inode_unlock(h_inode);
+		}
+	}
+
+	return err;
+}
+
+int au_copy_file(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	unsigned long blksize;
+	unsigned char do_kfree;
+	char *buf;
+
+	err = -ENOMEM;
+	blksize = dst->f_path.dentry->d_sb->s_blocksize;
+	if (!blksize || PAGE_SIZE < blksize)
+		blksize = PAGE_SIZE;
+	AuDbg("blksize %lu\n", blksize);
+	do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
+	if (do_kfree)
+		buf = kmalloc(blksize, GFP_NOFS);
+	else
+		buf = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!buf))
+		goto out;
+
+	if (len > (1 << 22))
+		AuDbg("copying a large file %lld\n", (long long)len);
+
+	src->f_pos = 0;
+	dst->f_pos = 0;
+	err = au_do_copy_file(dst, src, len, buf, blksize);
+	if (do_kfree)
+		kfree(buf);
+	else
+		free_page((unsigned long)buf);
+
+out:
+	return err;
+}
+
+static int au_do_copy(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	struct super_block *h_src_sb;
+	struct inode *h_src_inode;
+
+	h_src_inode = file_inode(src);
+	h_src_sb = h_src_inode->i_sb;
+
+	/* XFS acquires inode_lock */
+	if (!au_test_xfs(h_src_sb))
+		err = au_copy_file(dst, src, len);
+	else {
+		inode_unlock_shared(h_src_inode);
+		err = au_copy_file(dst, src, len);
+		vfsub_inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+	}
+
+	return err;
+}
+
+static int au_clone_or_copy(struct file *dst, struct file *src, loff_t len)
+{
+	int err;
+	struct super_block *h_src_sb;
+	struct inode *h_src_inode;
+
+	h_src_inode = file_inode(src);
+	h_src_sb = h_src_inode->i_sb;
+	if (h_src_sb != file_inode(dst)->i_sb
+	    || !dst->f_op->clone_file_range) {
+		err = au_do_copy(dst, src, len);
+		goto out;
+	}
+
+	if (!au_test_nfs(h_src_sb)) {
+		inode_unlock_shared(h_src_inode);
+		err = vfsub_clone_file_range(src, dst, len);
+		vfsub_inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+	} else
+		err = vfsub_clone_file_range(src, dst, len);
+	/* older XFS has a condition in cloning */
+	if (unlikely(err != -EOPNOTSUPP))
+		goto out;
+
+	/* the backend fs on NFS may not support cloning */
+	err = au_do_copy(dst, src, len);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * to support a sparse file which is opened with O_APPEND,
+ * we need to close the file.
+ */
+static int au_cp_regular(struct au_cp_generic *cpg)
+{
+	int err, i;
+	enum { SRC, DST };
+	struct {
+		aufs_bindex_t bindex;
+		unsigned int flags;
+		struct dentry *dentry;
+		int force_wr;
+		struct file *file;
+		void *label;
+	} *f, file[] = {
+		{
+			.bindex = cpg->bsrc,
+			.flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
+			.label = &&out
+		},
+		{
+			.bindex = cpg->bdst,
+			.flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
+			.force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
+			.label = &&out_src
+		}
+	};
+	struct super_block *sb, *h_src_sb;
+	struct inode *h_src_inode;
+	struct task_struct *tsk = current;
+
+	/* bsrc branch can be ro/rw. */
+	sb = cpg->dentry->d_sb;
+	f = file;
+	for (i = 0; i < 2; i++, f++) {
+		f->dentry = au_h_dptr(cpg->dentry, f->bindex);
+		f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
+				    /*file*/NULL, f->force_wr);
+		err = PTR_ERR(f->file);
+		if (IS_ERR(f->file))
+			goto *f->label;
+	}
+
+	/* try stopping to update while we copyup */
+	h_src_inode = d_inode(file[SRC].dentry);
+	h_src_sb = h_src_inode->i_sb;
+	if (!au_test_nfs(h_src_sb))
+		IMustLock(h_src_inode);
+	err = au_clone_or_copy(file[DST].file, file[SRC].file, cpg->len);
+
+	/* i wonder if we had O_NO_DELAY_FPUT flag */
+	if (tsk->flags & PF_KTHREAD)
+		__fput_sync(file[DST].file);
+	else {
+		/* it happend actually */
+		fput(file[DST].file);
+		/*
+		 * too bad.
+		 * we have to call both since we don't know which place the file
+		 * was added to.
+		 */
+		task_work_run();
+		flush_delayed_fput();
+	}
+	au_sbr_put(sb, file[DST].bindex);
+
+out_src:
+	fput(file[SRC].file);
+	au_sbr_put(sb, file[SRC].bindex);
+out:
+	return err;
+}
+
+static int au_do_cpup_regular(struct au_cp_generic *cpg,
+			      struct au_cpup_reg_attr *h_src_attr)
+{
+	int err, rerr;
+	loff_t l;
+	struct path h_path;
+	struct inode *h_src_inode, *h_dst_inode;
+
+	err = 0;
+	h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
+	l = i_size_read(h_src_inode);
+	if (cpg->len == -1 || l < cpg->len)
+		cpg->len = l;
+	if (cpg->len) {
+		/* try stopping to update while we are referencing */
+		vfsub_inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
+		au_pin_hdir_unlock(cpg->pin);
+
+		h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
+		h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
+		h_src_attr->iflags = h_src_inode->i_flags;
+		if (!au_test_nfs(h_src_inode->i_sb))
+			err = vfsub_getattr(&h_path, &h_src_attr->st);
+		else {
+			inode_unlock_shared(h_src_inode);
+			err = vfsub_getattr(&h_path, &h_src_attr->st);
+			vfsub_inode_lock_shared_nested(h_src_inode,
+						       AuLsc_I_CHILD);
+		}
+		if (unlikely(err)) {
+			inode_unlock_shared(h_src_inode);
+			goto out;
+		}
+		h_src_attr->valid = 1;
+		if (!au_test_nfs(h_src_inode->i_sb)) {
+			err = au_cp_regular(cpg);
+			inode_unlock_shared(h_src_inode);
+		} else {
+			inode_unlock_shared(h_src_inode);
+			err = au_cp_regular(cpg);
+		}
+		rerr = au_pin_hdir_relock(cpg->pin);
+		if (!err && rerr)
+			err = rerr;
+	}
+	if (!err && (h_src_inode->i_state & I_LINKABLE)) {
+		h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
+		h_dst_inode = d_inode(h_path.dentry);
+		spin_lock(&h_dst_inode->i_lock);
+		h_dst_inode->i_state |= I_LINKABLE;
+		spin_unlock(&h_dst_inode->i_lock);
+	}
+
+out:
+	return err;
+}
+
+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
+			      struct inode *h_dir)
+{
+	int err, symlen;
+	mm_segment_t old_fs;
+	union {
+		char *k;
+		char __user *u;
+	} sym;
+
+	err = -ENOMEM;
+	sym.k = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!sym.k))
+		goto out;
+
+	/* unnecessary to support mmap_sem since symlink is not mmap-able */
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	symlen = vfs_readlink(h_src, sym.u, PATH_MAX);
+	err = symlen;
+	set_fs(old_fs);
+
+	if (symlen > 0) {
+		sym.k[symlen] = 0;
+		err = vfsub_symlink(h_dir, h_path, sym.k);
+	}
+	free_page((unsigned long)sym.k);
+
+out:
+	return err;
+}
+
+/*
+ * regardless 'acl' option, reset all ACL.
+ * All ACL will be copied up later from the original entry on the lower branch.
+ */
+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
+{
+	int err;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_dentry = h_path->dentry;
+	h_inode = d_inode(h_dentry);
+	/* forget_all_cached_acls(h_inode)); */
+	err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
+	AuTraceErr(err);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	if (!err)
+		err = vfsub_acl_chmod(h_inode, mode);
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
+			  struct inode *h_dir, struct path *h_path)
+{
+	int err;
+	struct inode *dir, *inode;
+
+	err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
+	AuTraceErr(err);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * strange behaviour from the users view,
+	 * particularry setattr case
+	 */
+	dir = d_inode(dst_parent);
+	if (au_ibtop(dir) == cpg->bdst)
+		au_cpup_attr_nlink(dir, /*force*/1);
+	inode = d_inode(cpg->dentry);
+	au_cpup_attr_nlink(inode, /*force*/1);
+
+out:
+	return err;
+}
+
+static noinline_for_stack
+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
+	       struct au_cpup_reg_attr *h_src_attr)
+{
+	int err;
+	umode_t mode;
+	unsigned int mnt_flags;
+	unsigned char isdir, isreg, force;
+	const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
+	struct au_dtime dt;
+	struct path h_path;
+	struct dentry *h_src, *h_dst, *h_parent;
+	struct inode *h_inode, *h_dir;
+	struct super_block *sb;
+
+	/* bsrc branch can be ro/rw. */
+	h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
+	h_inode = d_inode(h_src);
+	AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
+
+	/* try stopping to be referenced while we are creating */
+	h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
+	if (au_ftest_cpup(cpg->flags, RENAME))
+		AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
+				  AUFS_WH_PFX_LEN));
+	h_parent = h_dst->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+	AuDebugOn(h_parent != h_dst->d_parent);
+
+	sb = cpg->dentry->d_sb;
+	h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
+	if (do_dt) {
+		h_path.dentry = h_parent;
+		au_dtime_store(&dt, dst_parent, &h_path);
+	}
+	h_path.dentry = h_dst;
+
+	isreg = 0;
+	isdir = 0;
+	mode = h_inode->i_mode;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		isreg = 1;
+		err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
+				   /*want_excl*/true);
+		if (!err)
+			err = au_do_cpup_regular(cpg, h_src_attr);
+		break;
+	case S_IFDIR:
+		isdir = 1;
+		err = vfsub_mkdir(h_dir, &h_path, mode);
+		if (!err)
+			err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
+		break;
+	case S_IFLNK:
+		err = au_do_cpup_symlink(&h_path, h_src, h_dir);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+		AuDebugOn(!capable(CAP_MKNOD));
+		/*FALLTHROUGH*/
+	case S_IFIFO:
+	case S_IFSOCK:
+		err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
+		break;
+	default:
+		AuIOErr("Unknown inode type 0%o\n", mode);
+		err = -EIO;
+	}
+	if (!err)
+		err = au_reset_acl(h_dir, &h_path, mode);
+
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, UDBA_NONE)
+	    && !isdir
+	    && au_opt_test(mnt_flags, XINO)
+	    && (h_inode->i_nlink == 1
+		|| (h_inode->i_state & I_LINKABLE))
+	    /* todo: unnecessary? */
+	    /* && d_inode(cpg->dentry)->i_nlink == 1 */
+	    && cpg->bdst < cpg->bsrc
+	    && !au_ftest_cpup(cpg->flags, KEEPLINO))
+		au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
+		/* ignore this error */
+
+	if (!err) {
+		force = 0;
+		if (isreg) {
+			force = !!cpg->len;
+			if (cpg->len == -1)
+				force = !!i_size_read(h_inode);
+		}
+		au_fhsm_wrote(sb, cpg->bdst, force);
+	}
+
+	if (do_dt)
+		au_dtime_revert(&dt);
+	return err;
+}
+
+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
+{
+	int err;
+	struct dentry *dentry, *h_dentry, *h_parent, *parent;
+	struct inode *h_dir;
+	aufs_bindex_t bdst;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	h_dentry = au_h_dptr(dentry, bdst);
+	if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
+		dget(h_dentry);
+		au_set_h_dptr(dentry, bdst, NULL);
+		err = au_lkup_neg(dentry, bdst, /*wh*/0);
+		if (!err)
+			h_path->dentry = dget(au_h_dptr(dentry, bdst));
+		au_set_h_dptr(dentry, bdst, h_dentry);
+	} else {
+		err = 0;
+		parent = dget_parent(dentry);
+		h_parent = au_h_dptr(parent, bdst);
+		dput(parent);
+		h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
+		if (IS_ERR(h_path->dentry))
+			err = PTR_ERR(h_path->dentry);
+	}
+	if (unlikely(err))
+		goto out;
+
+	h_parent = h_dentry->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+	AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
+	/* no delegation since it is just created */
+	err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
+			   /*flags*/0);
+	dput(h_path->dentry);
+
+out:
+	return err;
+}
+
+/*
+ * copyup the @dentry from @bsrc to @bdst.
+ * the caller must set the both of lower dentries.
+ * @len is for truncating when it is -1 copyup the entire file.
+ * in link/rename cases, @dst_parent may be different from the real one.
+ * basic->bsrc can be larger than basic->bdst.
+ * aufs doesn't touch the credential so
+ * security_inode_copy_up{,_xattr}() are unnecrssary.
+ */
+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
+{
+	int err, rerr;
+	aufs_bindex_t old_ibtop;
+	unsigned char isdir, plink;
+	struct dentry *h_src, *h_dst, *h_parent;
+	struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct path h_path;
+		struct au_cpup_reg_attr h_src_attr;
+	} *a;
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+	a->h_src_attr.valid = 0;
+
+	sb = cpg->dentry->d_sb;
+	br = au_sbr(sb, cpg->bdst);
+	a->h_path.mnt = au_br_mnt(br);
+	h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
+	h_parent = h_dst->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
+	inode = d_inode(cpg->dentry);
+
+	if (!dst_parent)
+		dst_parent = dget_parent(cpg->dentry);
+	else
+		dget(dst_parent);
+
+	plink = !!au_opt_test(au_mntflags(sb), PLINK);
+	dst_inode = au_h_iptr(inode, cpg->bdst);
+	if (dst_inode) {
+		if (unlikely(!plink)) {
+			err = -EIO;
+			AuIOErr("hi%lu(i%lu) exists on b%d "
+				"but plink is disabled\n",
+				dst_inode->i_ino, inode->i_ino, cpg->bdst);
+			goto out_parent;
+		}
+
+		if (dst_inode->i_nlink) {
+			const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
+
+			h_src = au_plink_lkup(inode, cpg->bdst);
+			err = PTR_ERR(h_src);
+			if (IS_ERR(h_src))
+				goto out_parent;
+			if (unlikely(d_is_negative(h_src))) {
+				err = -EIO;
+				AuIOErr("i%lu exists on b%d "
+					"but not pseudo-linked\n",
+					inode->i_ino, cpg->bdst);
+				dput(h_src);
+				goto out_parent;
+			}
+
+			if (do_dt) {
+				a->h_path.dentry = h_parent;
+				au_dtime_store(&a->dt, dst_parent, &a->h_path);
+			}
+
+			a->h_path.dentry = h_dst;
+			delegated = NULL;
+			err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
+			if (!err && au_ftest_cpup(cpg->flags, RENAME))
+				err = au_do_ren_after_cpup(cpg, &a->h_path);
+			if (do_dt)
+				au_dtime_revert(&a->dt);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+			dput(h_src);
+			goto out_parent;
+		} else
+			/* todo: cpup_wh_file? */
+			/* udba work */
+			au_update_ibrange(inode, /*do_put_zero*/1);
+	}
+
+	isdir = S_ISDIR(inode->i_mode);
+	old_ibtop = au_ibtop(inode);
+	err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
+	if (unlikely(err))
+		goto out_rev;
+	dst_inode = d_inode(h_dst);
+	inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
+	/* todo: necessary? */
+	/* au_pin_hdir_unlock(cpg->pin); */
+
+	err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
+	if (unlikely(err)) {
+		/* todo: necessary? */
+		/* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
+		inode_unlock(dst_inode);
+		goto out_rev;
+	}
+
+	if (cpg->bdst < old_ibtop) {
+		if (S_ISREG(inode->i_mode)) {
+			err = au_dy_iaop(inode, cpg->bdst, dst_inode);
+			if (unlikely(err)) {
+				/* ignore an error */
+				/* au_pin_hdir_relock(cpg->pin); */
+				inode_unlock(dst_inode);
+				goto out_rev;
+			}
+		}
+		au_set_ibtop(inode, cpg->bdst);
+	} else
+		au_set_ibbot(inode, cpg->bdst);
+	au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
+		      au_hi_flags(inode, isdir));
+
+	/* todo: necessary? */
+	/* err = au_pin_hdir_relock(cpg->pin); */
+	inode_unlock(dst_inode);
+	if (unlikely(err))
+		goto out_rev;
+
+	src_inode = d_inode(h_src);
+	if (!isdir
+	    && (src_inode->i_nlink > 1
+		|| src_inode->i_state & I_LINKABLE)
+	    && plink)
+		au_plink_append(inode, cpg->bdst, h_dst);
+
+	if (au_ftest_cpup(cpg->flags, RENAME)) {
+		a->h_path.dentry = h_dst;
+		err = au_do_ren_after_cpup(cpg, &a->h_path);
+	}
+	if (!err)
+		goto out_parent; /* success */
+
+	/* revert */
+out_rev:
+	a->h_path.dentry = h_parent;
+	au_dtime_store(&a->dt, dst_parent, &a->h_path);
+	a->h_path.dentry = h_dst;
+	rerr = 0;
+	if (d_is_positive(h_dst)) {
+		if (!isdir) {
+			/* no delegation since it is just created */
+			rerr = vfsub_unlink(h_dir, &a->h_path,
+					    /*delegated*/NULL, /*force*/0);
+		} else
+			rerr = vfsub_rmdir(h_dir, &a->h_path);
+	}
+	au_dtime_revert(&a->dt);
+	if (rerr) {
+		AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
+		err = -EIO;
+	}
+out_parent:
+	dput(dst_parent);
+	kfree(a);
+out:
+	return err;
+}
+
+#if 0 /* reserved */
+struct au_cpup_single_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+	struct dentry *dst_parent;
+};
+
+static void au_call_cpup_single(void *args)
+{
+	struct au_cpup_single_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_single(a->cpg, a->dst_parent);
+	au_pin_hdir_release(a->cpg->pin);
+}
+#endif
+
+/*
+ * prevent SIGXFSZ in copy-up.
+ * testing CAP_MKNOD is for generic fs,
+ * but CAP_FSETID is for xfs only, currently.
+ */
+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
+{
+	int do_sio;
+	struct super_block *sb;
+	struct inode *h_dir;
+
+	do_sio = 0;
+	sb = au_pinned_parent(pin)->d_sb;
+	if (!au_wkq_test()
+	    && (!au_sbi(sb)->si_plink_maint_pid
+		|| au_plink_maint(sb, AuLock_NOPLM))) {
+		switch (mode & S_IFMT) {
+		case S_IFREG:
+			/* no condition about RLIMIT_FSIZE and the file size */
+			do_sio = 1;
+			break;
+		case S_IFCHR:
+		case S_IFBLK:
+			do_sio = !capable(CAP_MKNOD);
+			break;
+		}
+		if (!do_sio)
+			do_sio = ((mode & (S_ISUID | S_ISGID))
+				  && !capable(CAP_FSETID));
+		/* this workaround may be removed in the future */
+		if (!do_sio) {
+			h_dir = au_pinned_h_dir(pin);
+			do_sio = h_dir->i_mode & S_ISVTX;
+		}
+	}
+
+	return do_sio;
+}
+
+#if 0 /* reserved */
+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
+{
+	int err, wkq_err;
+	struct dentry *h_dentry;
+
+	h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
+	if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
+		err = au_cpup_single(cpg, dst_parent);
+	else {
+		struct au_cpup_single_args args = {
+			.errp		= &err,
+			.cpg		= cpg,
+			.dst_parent	= dst_parent
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_single, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+#endif
+
+/*
+ * copyup the @dentry from the first active lower branch to @bdst,
+ * using au_cpup_single().
+ */
+static int au_cpup_simple(struct au_cp_generic *cpg)
+{
+	int err;
+	unsigned int flags_orig;
+	struct dentry *dentry;
+
+	AuDebugOn(cpg->bsrc < 0);
+
+	dentry = cpg->dentry;
+	DiMustWriteLock(dentry);
+
+	err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
+	if (!err) {
+		flags_orig = cpg->flags;
+		au_fset_cpup(cpg->flags, RENAME);
+		err = au_cpup_single(cpg, NULL);
+		cpg->flags = flags_orig;
+		if (!err)
+			return 0; /* success */
+
+		/* revert */
+		au_set_h_dptr(dentry, cpg->bdst, NULL);
+		au_set_dbtop(dentry, cpg->bsrc);
+	}
+
+	return err;
+}
+
+struct au_cpup_simple_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+};
+
+static void au_call_cpup_simple(void *args)
+{
+	struct au_cpup_simple_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_simple(a->cpg);
+	au_pin_hdir_release(a->cpg->pin);
+}
+
+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
+{
+	int err, wkq_err;
+	struct dentry *dentry, *parent;
+	struct file *h_file;
+	struct inode *h_dir;
+
+	dentry = cpg->dentry;
+	h_file = NULL;
+	if (au_ftest_cpup(cpg->flags, HOPEN)) {
+		AuDebugOn(cpg->bsrc < 0);
+		h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
+		err = PTR_ERR(h_file);
+		if (IS_ERR(h_file))
+			goto out;
+	}
+
+	parent = dget_parent(dentry);
+	h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
+	if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
+	    && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
+		err = au_cpup_simple(cpg);
+	else {
+		struct au_cpup_simple_args args = {
+			.errp		= &err,
+			.cpg		= cpg
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	dput(parent);
+	if (h_file)
+		au_h_open_post(dentry, cpg->bsrc, h_file);
+
+out:
+	return err;
+}
+
+int au_sio_cpup_simple(struct au_cp_generic *cpg)
+{
+	aufs_bindex_t bsrc, bbot;
+	struct dentry *dentry, *h_dentry;
+
+	if (cpg->bsrc < 0) {
+		dentry = cpg->dentry;
+		bbot = au_dbbot(dentry);
+		for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
+			h_dentry = au_h_dptr(dentry, bsrc);
+			if (h_dentry) {
+				AuDebugOn(d_is_negative(h_dentry));
+				break;
+			}
+		}
+		AuDebugOn(bsrc > bbot);
+		cpg->bsrc = bsrc;
+	}
+	AuDebugOn(cpg->bsrc <= cpg->bdst);
+	return au_do_sio_cpup_simple(cpg);
+}
+
+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
+{
+	AuDebugOn(cpg->bdst <= cpg->bsrc);
+	return au_do_sio_cpup_simple(cpg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * copyup the deleted file for writing.
+ */
+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
+			 struct file *file)
+{
+	int err;
+	unsigned int flags_orig;
+	aufs_bindex_t bsrc_orig;
+	struct au_dinfo *dinfo;
+	struct {
+		struct au_hdentry *hd;
+		struct dentry *h_dentry;
+	} hdst, hsrc;
+
+	dinfo = au_di(cpg->dentry);
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	bsrc_orig = cpg->bsrc;
+	cpg->bsrc = dinfo->di_btop;
+	hdst.hd = au_hdentry(dinfo, cpg->bdst);
+	hdst.h_dentry = hdst.hd->hd_dentry;
+	hdst.hd->hd_dentry = wh_dentry;
+	dinfo->di_btop = cpg->bdst;
+
+	hsrc.h_dentry = NULL;
+	if (file) {
+		hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
+		hsrc.h_dentry = hsrc.hd->hd_dentry;
+		hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
+	}
+	flags_orig = cpg->flags;
+	cpg->flags = !AuCpup_DTIME;
+	err = au_cpup_single(cpg, /*h_parent*/NULL);
+	cpg->flags = flags_orig;
+	if (file) {
+		if (!err)
+			err = au_reopen_nondir(file);
+		hsrc.hd->hd_dentry = hsrc.h_dentry;
+	}
+	hdst.hd->hd_dentry = hdst.h_dentry;
+	dinfo->di_btop = cpg->bsrc;
+	cpg->bsrc = bsrc_orig;
+
+	return err;
+}
+
+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
+{
+	int err;
+	aufs_bindex_t bdst;
+	struct au_dtime dt;
+	struct dentry *dentry, *parent, *h_parent, *wh_dentry;
+	struct au_branch *br;
+	struct path h_path;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	br = au_sbr(dentry->d_sb, bdst);
+	parent = dget_parent(dentry);
+	h_parent = au_h_dptr(parent, bdst);
+	wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out;
+
+	h_path.dentry = h_parent;
+	h_path.mnt = au_br_mnt(br);
+	au_dtime_store(&dt, parent, &h_path);
+	err = au_do_cpup_wh(cpg, wh_dentry, file);
+	if (unlikely(err))
+		goto out_wh;
+
+	dget(wh_dentry);
+	h_path.dentry = wh_dentry;
+	if (!d_is_dir(wh_dentry)) {
+		/* no delegation since it is just created */
+		err = vfsub_unlink(d_inode(h_parent), &h_path,
+				   /*delegated*/NULL, /*force*/0);
+	} else
+		err = vfsub_rmdir(d_inode(h_parent), &h_path);
+	if (unlikely(err)) {
+		AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
+			wh_dentry, err);
+		err = -EIO;
+	}
+	au_dtime_revert(&dt);
+	au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
+
+out_wh:
+	dput(wh_dentry);
+out:
+	dput(parent);
+	return err;
+}
+
+struct au_cpup_wh_args {
+	int *errp;
+	struct au_cp_generic *cpg;
+	struct file *file;
+};
+
+static void au_call_cpup_wh(void *args)
+{
+	struct au_cpup_wh_args *a = args;
+
+	au_pin_hdir_acquire_nest(a->cpg->pin);
+	*a->errp = au_cpup_wh(a->cpg, a->file);
+	au_pin_hdir_release(a->cpg->pin);
+}
+
+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
+{
+	int err, wkq_err;
+	aufs_bindex_t bdst;
+	struct dentry *dentry, *parent, *h_orph, *h_parent;
+	struct inode *dir, *h_dir, *h_tmpdir;
+	struct au_wbr *wbr;
+	struct au_pin wh_pin, *pin_orig;
+
+	dentry = cpg->dentry;
+	bdst = cpg->bdst;
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	h_orph = NULL;
+	h_parent = NULL;
+	h_dir = au_igrab(au_h_iptr(dir, bdst));
+	h_tmpdir = h_dir;
+	pin_orig = NULL;
+	if (!h_dir->i_nlink) {
+		wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
+		h_orph = wbr->wbr_orph;
+
+		h_parent = dget(au_h_dptr(parent, bdst));
+		au_set_h_dptr(parent, bdst, dget(h_orph));
+		h_tmpdir = d_inode(h_orph);
+		au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
+
+		inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
+		/* todo: au_h_open_pre()? */
+
+		pin_orig = cpg->pin;
+		au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
+			    AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
+		cpg->pin = &wh_pin;
+	}
+
+	if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
+	    && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
+		err = au_cpup_wh(cpg, file);
+	else {
+		struct au_cpup_wh_args args = {
+			.errp	= &err,
+			.cpg	= cpg,
+			.file	= file
+		};
+		wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	if (h_orph) {
+		inode_unlock(h_tmpdir);
+		/* todo: au_h_open_post()? */
+		au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
+		au_set_h_dptr(parent, bdst, h_parent);
+		AuDebugOn(!pin_orig);
+		cpg->pin = pin_orig;
+	}
+	iput(h_dir);
+	dput(parent);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * generic routine for both of copy-up and copy-down.
+ */
+/* cf. revalidate function in file.c */
+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
+	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg),
+	       void *arg)
+{
+	int err;
+	struct au_pin pin;
+	struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
+
+	err = 0;
+	parent = dget_parent(dentry);
+	if (IS_ROOT(parent))
+		goto out;
+
+	au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
+		    au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
+
+	/* do not use au_dpage */
+	real_parent = parent;
+	while (1) {
+		dput(parent);
+		parent = dget_parent(dentry);
+		h_parent = au_h_dptr(parent, bdst);
+		if (h_parent)
+			goto out; /* success */
+
+		/* find top dir which is necessary to cpup */
+		do {
+			d = parent;
+			dput(parent);
+			parent = dget_parent(d);
+			di_read_lock_parent3(parent, !AuLock_IR);
+			h_parent = au_h_dptr(parent, bdst);
+			di_read_unlock(parent, !AuLock_IR);
+		} while (!h_parent);
+
+		if (d != real_parent)
+			di_write_lock_child3(d);
+
+		/* somebody else might create while we were sleeping */
+		h_dentry = au_h_dptr(d, bdst);
+		if (!h_dentry || d_is_negative(h_dentry)) {
+			if (h_dentry)
+				au_update_dbtop(d);
+
+			au_pin_set_dentry(&pin, d);
+			err = au_do_pin(&pin);
+			if (!err) {
+				err = cp(d, bdst, &pin, h_parent, arg);
+				au_unpin(&pin);
+			}
+		}
+
+		if (d != real_parent)
+			di_write_unlock(d);
+		if (unlikely(err))
+			break;
+	}
+
+out:
+	dput(parent);
+	return err;
+}
+
+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
+		       struct au_pin *pin,
+		       struct dentry *h_parent __maybe_unused,
+		       void *arg __maybe_unused)
+{
+	struct au_cp_generic cpg = {
+		.dentry	= dentry,
+		.bdst	= bdst,
+		.bsrc	= -1,
+		.len	= 0,
+		.pin	= pin,
+		.flags	= AuCpup_DTIME
+	};
+	return au_sio_cpup_simple(&cpg);
+}
+
+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
+}
+
+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	int err;
+	struct dentry *parent;
+	struct inode *dir;
+
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	err = 0;
+	if (au_h_iptr(dir, bdst))
+		goto out;
+
+	di_read_unlock(parent, AuLock_IR);
+	di_write_lock_parent(parent);
+	/* someone else might change our inode while we were sleeping */
+	if (!au_h_iptr(dir, bdst))
+		err = au_cpup_dirs(dentry, bdst);
+	di_downgrade_lock(parent, AuLock_IR);
+
+out:
+	dput(parent);
+	return err;
+}
diff --git a/include/fs/aufs/cpup.h b/include/fs/aufs/cpup.h
new file mode 100644
index 000000000..99295266c
--- /dev/null
+++ b/include/fs/aufs/cpup.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * copy-up/down functions
+ */
+
+#ifndef __AUFS_CPUP_H__
+#define __AUFS_CPUP_H__
+
+#ifdef __KERNEL__
+
+#include <linux/path.h>
+
+struct inode;
+struct file;
+struct au_pin;
+
+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
+void au_cpup_attr_timesizes(struct inode *inode);
+void au_cpup_attr_nlink(struct inode *inode, int force);
+void au_cpup_attr_changeable(struct inode *inode);
+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
+void au_cpup_attr_all(struct inode *inode, int force);
+
+/* ---------------------------------------------------------------------- */
+
+struct au_cp_generic {
+	struct dentry	*dentry;
+	aufs_bindex_t	bdst, bsrc;
+	loff_t		len;
+	struct au_pin	*pin;
+	unsigned int	flags;
+};
+
+/* cpup flags */
+#define AuCpup_DTIME		1		/* do dtime_store/revert */
+#define AuCpup_KEEPLINO		(1 << 1)	/* do not clear the lower xino,
+						   for link(2) */
+#define AuCpup_RENAME		(1 << 2)	/* rename after cpup */
+#define AuCpup_HOPEN		(1 << 3)	/* call h_open_pre/post() in
+						   cpup */
+#define AuCpup_OVERWRITE	(1 << 4)	/* allow overwriting the
+						   existing entry */
+#define AuCpup_RWDST		(1 << 5)	/* force write target even if
+						   the branch is marked as RO */
+
+#ifndef CONFIG_AUFS_BR_HFSPLUS
+#undef AuCpup_HOPEN
+#define AuCpup_HOPEN		0
+#endif
+
+#define au_ftest_cpup(flags, name)	((flags) & AuCpup_##name)
+#define au_fset_cpup(flags, name) \
+	do { (flags) |= AuCpup_##name; } while (0)
+#define au_fclr_cpup(flags, name) \
+	do { (flags) &= ~AuCpup_##name; } while (0)
+
+int au_copy_file(struct file *dst, struct file *src, loff_t len);
+int au_sio_cpup_simple(struct au_cp_generic *cpg);
+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
+
+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
+	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg),
+	       void *arg);
+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+
+/* ---------------------------------------------------------------------- */
+
+/* keep timestamps when copyup */
+struct au_dtime {
+	struct dentry *dt_dentry;
+	struct path dt_h_path;
+	struct timespec dt_atime, dt_mtime;
+};
+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
+		    struct path *h_path);
+void au_dtime_revert(struct au_dtime *dt);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_CPUP_H__ */
diff --git a/include/fs/aufs/dbgaufs.c b/include/fs/aufs/dbgaufs.c
new file mode 100644
index 000000000..b85f0694b
--- /dev/null
+++ b/include/fs/aufs/dbgaufs.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debugfs interface
+ */
+
+#include <linux/debugfs.h>
+#include "aufs.h"
+
+#ifndef CONFIG_SYSFS
+#error DEBUG_FS depends upon SYSFS
+#endif
+
+static struct dentry *dbgaufs;
+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+/* 20 is max digits length of ulong 64 */
+struct dbgaufs_arg {
+	int n;
+	char a[20 * 4];
+};
+
+/*
+ * common function for all XINO files
+ */
+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
+			      struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
+{
+	int err;
+	struct kstat st;
+	struct dbgaufs_arg *p;
+
+	err = -ENOMEM;
+	p = kmalloc(sizeof(*p), GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = 0;
+	p->n = 0;
+	file->private_data = p;
+	if (!xf)
+		goto out;
+
+	err = vfsub_getattr(&xf->f_path, &st);
+	if (!err) {
+		if (do_fcnt)
+			p->n = snprintf
+				(p->a, sizeof(p->a), "%ld, %llux%u %lld\n",
+				 (long)file_count(xf), st.blocks, st.blksize,
+				 (long long)st.size);
+		else
+			p->n = snprintf(p->a, sizeof(p->a), "%llux%u %lld\n",
+					st.blocks, st.blksize,
+					(long long)st.size);
+		AuDebugOn(p->n >= sizeof(p->a));
+	} else {
+		p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
+		err = 0;
+	}
+
+out:
+	return err;
+
+}
+
+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct dbgaufs_arg *p;
+
+	p = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dbgaufs_plink_arg {
+	int n;
+	char a[];
+};
+
+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
+				 struct file *file)
+{
+	free_page((unsigned long)file->private_data);
+	return 0;
+}
+
+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
+{
+	int err, i, limit;
+	unsigned long n, sum;
+	struct dbgaufs_plink_arg *p;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct hlist_bl_head *hbl;
+
+	err = -ENOMEM;
+	p = (void *)get_zeroed_page(GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = -EFBIG;
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		limit = PAGE_SIZE - sizeof(p->n);
+
+		/* the number of buckets */
+		n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
+		p->n += n;
+		limit -= n;
+
+		sum = 0;
+		for (i = 0, hbl = sbinfo->si_plink; i < AuPlink_NHASH;
+		     i++, hbl++) {
+			n = au_hbl_count(hbl);
+			sum += n;
+
+			n = snprintf(p->a + p->n, limit, "%lu ", n);
+			p->n += n;
+			limit -= n;
+			if (unlikely(limit <= 0))
+				goto out_free;
+		}
+		p->a[p->n - 1] = '\n';
+
+		/* the sum of plinks */
+		n = snprintf(p->a + p->n, limit, "%lu\n", sum);
+		p->n += n;
+		limit -= n;
+		if (unlikely(limit <= 0))
+			goto out_free;
+	} else {
+#define str "1\n0\n0\n"
+		p->n = sizeof(str) - 1;
+		strcpy(p->a, str);
+#undef str
+	}
+	si_read_unlock(sb);
+
+	err = 0;
+	file->private_data = p;
+	goto out; /* success */
+
+out_free:
+	free_page((unsigned long)p);
+out:
+	return err;
+}
+
+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct dbgaufs_plink_arg *p;
+
+	p = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
+}
+
+static const struct file_operations dbgaufs_plink_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_plink_open,
+	.release	= dbgaufs_plink_release,
+	.read		= dbgaufs_plink_read
+};
+
+/* ---------------------------------------------------------------------- */
+
+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
+	si_read_unlock(sb);
+	return err;
+}
+
+static const struct file_operations dbgaufs_xib_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xib_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+/* ---------------------------------------------------------------------- */
+
+#define DbgaufsXi_PREFIX "xi"
+
+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
+{
+	int err;
+	long l;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct file *xf;
+	struct qstr *name;
+
+	err = -ENOENT;
+	xf = NULL;
+	name = &file->f_path.dentry->d_name;
+	if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
+		     || memcmp(name->name, DbgaufsXi_PREFIX,
+			       sizeof(DbgaufsXi_PREFIX) - 1)))
+		goto out;
+	err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
+	if (unlikely(err))
+		goto out;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	if (l <= au_sbbot(sb)) {
+		xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
+		err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
+	} else
+		err = -ENOENT;
+	si_read_unlock(sb);
+
+out:
+	return err;
+}
+
+static const struct file_operations dbgaufs_xino_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xino_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
+{
+	aufs_bindex_t bbot;
+	struct au_branch *br;
+	struct au_xino_file *xi;
+
+	if (!au_sbi(sb)->si_dbgaufs)
+		return;
+
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		xi = &br->br_xino;
+		/* debugfs acquires the parent i_mutex */
+		lockdep_off();
+		debugfs_remove(xi->xi_dbgaufs);
+		lockdep_on();
+		xi->xi_dbgaufs = NULL;
+	}
+}
+
+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_sbinfo *sbinfo;
+	struct dentry *parent;
+	struct au_branch *br;
+	struct au_xino_file *xi;
+	aufs_bindex_t bbot;
+	char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
+
+	sbinfo = au_sbi(sb);
+	parent = sbinfo->si_dbgaufs;
+	if (!parent)
+		return;
+
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
+		br = au_sbr(sb, bindex);
+		xi = &br->br_xino;
+		AuDebugOn(xi->xi_dbgaufs);
+		/* debugfs acquires the parent i_mutex */
+		lockdep_off();
+		xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
+						     sbinfo, &dbgaufs_xino_fop);
+		lockdep_on();
+		/* ignore an error */
+		if (unlikely(!xi->xi_dbgaufs))
+			AuWarn1("failed %s under debugfs\n", name);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_EXPORT
+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+
+	sbinfo = inode->i_private;
+	sb = sbinfo->si_sb;
+	si_noflush_read_lock(sb);
+	err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
+	si_read_unlock(sb);
+	return err;
+}
+
+static const struct file_operations dbgaufs_xigen_fop = {
+	.owner		= THIS_MODULE,
+	.open		= dbgaufs_xigen_open,
+	.release	= dbgaufs_xi_release,
+	.read		= dbgaufs_xi_read
+};
+
+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	err = -EIO;
+	sbinfo->si_dbgaufs_xigen = debugfs_create_file
+		("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_xigen_fop);
+	if (sbinfo->si_dbgaufs_xigen)
+		err = 0;
+
+	return err;
+}
+#else
+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
+{
+	return 0;
+}
+#endif /* CONFIG_AUFS_EXPORT */
+
+/* ---------------------------------------------------------------------- */
+
+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
+{
+	/*
+	 * This function is a dynamic '__fin' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	debugfs_remove_recursive(sbinfo->si_dbgaufs);
+	sbinfo->si_dbgaufs = NULL;
+	kobject_put(&sbinfo->si_kobj);
+}
+
+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+	char name[SysaufsSiNameLen];
+
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+
+	err = -ENOENT;
+	if (!dbgaufs) {
+		AuErr1("/debug/aufs is uninitialized\n");
+		goto out;
+	}
+
+	err = -EIO;
+	sysaufs_name(sbinfo, name);
+	sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
+	if (unlikely(!sbinfo->si_dbgaufs))
+		goto out;
+	kobject_get(&sbinfo->si_kobj);
+
+	sbinfo->si_dbgaufs_xib = debugfs_create_file
+		("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_xib_fop);
+	if (unlikely(!sbinfo->si_dbgaufs_xib))
+		goto out_dir;
+
+	sbinfo->si_dbgaufs_plink = debugfs_create_file
+		("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
+		 &dbgaufs_plink_fop);
+	if (unlikely(!sbinfo->si_dbgaufs_plink))
+		goto out_dir;
+
+	err = dbgaufs_xigen_init(sbinfo);
+	if (!err)
+		goto out; /* success */
+
+out_dir:
+	dbgaufs_si_fin(sbinfo);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void dbgaufs_fin(void)
+{
+	debugfs_remove(dbgaufs);
+}
+
+int __init dbgaufs_init(void)
+{
+	int err;
+
+	err = -EIO;
+	dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
+	if (dbgaufs)
+		err = 0;
+	return err;
+}
diff --git a/include/fs/aufs/dbgaufs.h b/include/fs/aufs/dbgaufs.h
new file mode 100644
index 000000000..f8d7b74e8
--- /dev/null
+++ b/include/fs/aufs/dbgaufs.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debugfs interface
+ */
+
+#ifndef __DBGAUFS_H__
+#define __DBGAUFS_H__
+
+#ifdef __KERNEL__
+
+struct super_block;
+struct au_sbinfo;
+
+#ifdef CONFIG_DEBUG_FS
+/* dbgaufs.c */
+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
+void dbgaufs_fin(void);
+int __init dbgaufs_init(void);
+#else
+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
+AuStubVoid(dbgaufs_fin, void)
+AuStubInt0(__init dbgaufs_init, void)
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* __KERNEL__ */
+#endif /* __DBGAUFS_H__ */
diff --git a/include/fs/aufs/dcsub.c b/include/fs/aufs/dcsub.c
new file mode 100644
index 000000000..61c1a2553
--- /dev/null
+++ b/include/fs/aufs/dcsub.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for dentry cache
+ */
+
+#include "aufs.h"
+
+static void au_dpage_free(struct au_dpage *dpage)
+{
+	int i;
+	struct dentry **p;
+
+	p = dpage->dentries;
+	for (i = 0; i < dpage->ndentry; i++)
+		dput(*p++);
+	free_page((unsigned long)dpage->dentries);
+}
+
+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
+{
+	int err;
+	void *p;
+
+	err = -ENOMEM;
+	dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
+	if (unlikely(!dpages->dpages))
+		goto out;
+
+	p = (void *)__get_free_page(gfp);
+	if (unlikely(!p))
+		goto out_dpages;
+
+	dpages->dpages[0].ndentry = 0;
+	dpages->dpages[0].dentries = p;
+	dpages->ndpage = 1;
+	return 0; /* success */
+
+out_dpages:
+	kfree(dpages->dpages);
+out:
+	return err;
+}
+
+void au_dpages_free(struct au_dcsub_pages *dpages)
+{
+	int i;
+	struct au_dpage *p;
+
+	p = dpages->dpages;
+	for (i = 0; i < dpages->ndpage; i++)
+		au_dpage_free(p++);
+	kfree(dpages->dpages);
+}
+
+static int au_dpages_append(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, gfp_t gfp)
+{
+	int err, sz;
+	struct au_dpage *dpage;
+	void *p;
+
+	dpage = dpages->dpages + dpages->ndpage - 1;
+	sz = PAGE_SIZE / sizeof(dentry);
+	if (unlikely(dpage->ndentry >= sz)) {
+		AuLabel(new dpage);
+		err = -ENOMEM;
+		sz = dpages->ndpage * sizeof(*dpages->dpages);
+		p = au_kzrealloc(dpages->dpages, sz,
+				 sz + sizeof(*dpages->dpages), gfp,
+				 /*may_shrink*/0);
+		if (unlikely(!p))
+			goto out;
+
+		dpages->dpages = p;
+		dpage = dpages->dpages + dpages->ndpage;
+		p = (void *)__get_free_page(gfp);
+		if (unlikely(!p))
+			goto out;
+
+		dpage->ndentry = 0;
+		dpage->dentries = p;
+		dpages->ndpage++;
+	}
+
+	AuDebugOn(au_dcount(dentry) <= 0);
+	dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
+	return 0; /* success */
+
+out:
+	return err;
+}
+
+/* todo: BAD approach */
+/* copied from linux/fs/dcache.c */
+enum d_walk_ret {
+	D_WALK_CONTINUE,
+	D_WALK_QUIT,
+	D_WALK_NORETRY,
+	D_WALK_SKIP,
+};
+
+extern void d_walk(struct dentry *parent, void *data,
+		   enum d_walk_ret (*enter)(void *, struct dentry *),
+		   void (*finish)(void *));
+
+struct ac_dpages_arg {
+	int err;
+	struct au_dcsub_pages *dpages;
+	struct super_block *sb;
+	au_dpages_test test;
+	void *arg;
+};
+
+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
+{
+	enum d_walk_ret ret;
+	struct ac_dpages_arg *arg = _arg;
+
+	ret = D_WALK_CONTINUE;
+	if (dentry->d_sb == arg->sb
+	    && !IS_ROOT(dentry)
+	    && au_dcount(dentry) > 0
+	    && au_di(dentry)
+	    && (!arg->test || arg->test(dentry, arg->arg))) {
+		arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
+		if (unlikely(arg->err))
+			ret = D_WALK_QUIT;
+	}
+
+	return ret;
+}
+
+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
+		   au_dpages_test test, void *arg)
+{
+	struct ac_dpages_arg args = {
+		.err	= 0,
+		.dpages	= dpages,
+		.sb	= root->d_sb,
+		.test	= test,
+		.arg	= arg
+	};
+
+	d_walk(root, &args, au_call_dpages_append, NULL);
+
+	return args.err;
+}
+
+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
+		       int do_include, au_dpages_test test, void *arg)
+{
+	int err;
+
+	err = 0;
+	write_seqlock(&rename_lock);
+	spin_lock(&dentry->d_lock);
+	if (do_include
+	    && au_dcount(dentry) > 0
+	    && (!test || test(dentry, arg)))
+		err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
+	spin_unlock(&dentry->d_lock);
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * RCU for vfsmount is unnecessary since this is a traverse in a single
+	 * mount
+	 */
+	while (!IS_ROOT(dentry)) {
+		dentry = dentry->d_parent; /* rename_lock is locked */
+		spin_lock(&dentry->d_lock);
+		if (au_dcount(dentry) > 0
+		    && (!test || test(dentry, arg)))
+			err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
+		spin_unlock(&dentry->d_lock);
+		if (unlikely(err))
+			break;
+	}
+
+out:
+	write_sequnlock(&rename_lock);
+	return err;
+}
+
+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
+{
+	return au_di(dentry) && dentry->d_sb == arg;
+}
+
+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, int do_include)
+{
+	return au_dcsub_pages_rev(dpages, dentry, do_include,
+				  au_dcsub_dpages_aufs, dentry->d_sb);
+}
+
+int au_test_subdir(struct dentry *d1, struct dentry *d2)
+{
+	struct path path[2] = {
+		{
+			.dentry = d1
+		},
+		{
+			.dentry = d2
+		}
+	};
+
+	return path_is_under(path + 0, path + 1);
+}
diff --git a/include/fs/aufs/dcsub.h b/include/fs/aufs/dcsub.h
new file mode 100644
index 000000000..50e81bf68
--- /dev/null
+++ b/include/fs/aufs/dcsub.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for dentry cache
+ */
+
+#ifndef __AUFS_DCSUB_H__
+#define __AUFS_DCSUB_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+
+struct au_dpage {
+	int ndentry;
+	struct dentry **dentries;
+};
+
+struct au_dcsub_pages {
+	int ndpage;
+	struct au_dpage *dpages;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dcsub.c */
+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
+void au_dpages_free(struct au_dcsub_pages *dpages);
+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
+		   au_dpages_test test, void *arg);
+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
+		       int do_include, au_dpages_test test, void *arg);
+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
+			    struct dentry *dentry, int do_include);
+int au_test_subdir(struct dentry *d1, struct dentry *d2);
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * todo: in linux-3.13, several similar (but faster) helpers are added to
+ * include/linux/dcache.h. Try them (in the future).
+ */
+
+static inline int au_d_hashed_positive(struct dentry *d)
+{
+	int err;
+	struct inode *inode = d_inode(d);
+
+	err = 0;
+	if (unlikely(d_unhashed(d)
+		     || d_is_negative(d)
+		     || !inode->i_nlink))
+		err = -ENOENT;
+	return err;
+}
+
+static inline int au_d_linkable(struct dentry *d)
+{
+	int err;
+	struct inode *inode = d_inode(d);
+
+	err = au_d_hashed_positive(d);
+	if (err
+	    && d_is_positive(d)
+	    && (inode->i_state & I_LINKABLE))
+		err = 0;
+	return err;
+}
+
+static inline int au_d_alive(struct dentry *d)
+{
+	int err;
+	struct inode *inode;
+
+	err = 0;
+	if (!IS_ROOT(d))
+		err = au_d_hashed_positive(d);
+	else {
+		inode = d_inode(d);
+		if (unlikely(d_unlinked(d)
+			     || d_is_negative(d)
+			     || !inode->i_nlink))
+			err = -ENOENT;
+	}
+	return err;
+}
+
+static inline int au_alive_dir(struct dentry *d)
+{
+	int err;
+
+	err = au_d_alive(d);
+	if (unlikely(err || IS_DEADDIR(d_inode(d))))
+		err = -ENOENT;
+	return err;
+}
+
+static inline int au_qstreq(struct qstr *a, struct qstr *b)
+{
+	return a->len == b->len
+		&& !memcmp(a->name, b->name, a->len);
+}
+
+/*
+ * by the commit
+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
+ *			taking d_lock
+ * the type of d_lockref.count became int, but the inlined function d_count()
+ * still returns unsigned int.
+ * I don't know why. Maybe it is for every d_count() users?
+ * Anyway au_dcount() lives on.
+ */
+static inline int au_dcount(struct dentry *d)
+{
+	return (int)d_count(d);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DCSUB_H__ */
diff --git a/include/fs/aufs/debug.c b/include/fs/aufs/debug.c
new file mode 100644
index 000000000..87a1d2283
--- /dev/null
+++ b/include/fs/aufs/debug.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debug print functions
+ */
+
+#include "aufs.h"
+
+/* Returns 0, or -errno.  arg is in kp->arg. */
+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
+{
+	int err, n;
+
+	err = kstrtoint(val, 0, &n);
+	if (!err) {
+		if (n > 0)
+			au_debug_on();
+		else
+			au_debug_off();
+	}
+	return err;
+}
+
+/* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
+{
+	atomic_t *a;
+
+	a = kp->arg;
+	return sprintf(buffer, "%d", atomic_read(a));
+}
+
+static struct kernel_param_ops param_ops_atomic_t = {
+	.set = param_atomic_t_set,
+	.get = param_atomic_t_get
+	/* void (*free)(void *arg) */
+};
+
+atomic_t aufs_debug = ATOMIC_INIT(0);
+MODULE_PARM_DESC(debug, "debug print");
+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
+
+DEFINE_MUTEX(au_dbg_mtx);	/* just to serialize the dbg msgs */
+char *au_plevel = KERN_DEBUG;
+#define dpri(fmt, ...) do {					\
+	if ((au_plevel						\
+	     && strcmp(au_plevel, KERN_DEBUG))			\
+	    || au_debug_test())					\
+		printk("%s" fmt, au_plevel, ##__VA_ARGS__);	\
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+
+void au_dpri_whlist(struct au_nhash *whlist)
+{
+	unsigned long ul, n;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (ul = 0; ul < n; ul++) {
+		hlist_for_each_entry(pos, head, wh_hash)
+			dpri("b%d, %.*s, %d\n",
+			     pos->wh_bindex,
+			     pos->wh_str.len, pos->wh_str.name,
+			     pos->wh_str.len);
+		head++;
+	}
+}
+
+void au_dpri_vdir(struct au_vdir *vdir)
+{
+	unsigned long ul;
+	union au_vdir_deblk_p p;
+	unsigned char *o;
+
+	if (!vdir || IS_ERR(vdir)) {
+		dpri("err %ld\n", PTR_ERR(vdir));
+		return;
+	}
+
+	dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
+	     vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
+	     vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
+	for (ul = 0; ul < vdir->vd_nblk; ul++) {
+		p.deblk = vdir->vd_deblk[ul];
+		o = p.deblk;
+		dpri("[%lu]: %p\n", ul, o);
+	}
+}
+
+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
+			struct dentry *wh)
+{
+	char *n = NULL;
+	int l = 0;
+
+	if (!inode || IS_ERR(inode)) {
+		dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
+		return -1;
+	}
+
+	/* the type of i_blocks depends upon CONFIG_LBDAF */
+	BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
+		     && sizeof(inode->i_blocks) != sizeof(u64));
+	if (wh) {
+		n = (void *)wh->d_name.name;
+		l = wh->d_name.len;
+	}
+
+	dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
+	     " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
+	     bindex, inode,
+	     inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
+	     atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
+	     i_size_read(inode), (unsigned long long)inode->i_blocks,
+	     hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
+	     inode->i_mapping ? inode->i_mapping->nrpages : 0,
+	     inode->i_state, inode->i_flags, inode->i_version,
+	     inode->i_generation,
+	     l ? ", wh " : "", l, n);
+	return 0;
+}
+
+void au_dpri_inode(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+	aufs_bindex_t bindex;
+	int err, hn;
+
+	err = do_pri_inode(-1, inode, -1, NULL);
+	if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
+		return;
+
+	iinfo = au_ii(inode);
+	dpri("i-1: btop %d, bbot %d, gen %d\n",
+	     iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
+	if (iinfo->ii_btop < 0)
+		return;
+	hn = 0;
+	for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
+		hi = au_hinode(iinfo, bindex);
+		hn = !!au_hn(hi);
+		do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
+	}
+}
+
+void au_dpri_dalias(struct inode *inode)
+{
+	struct dentry *d;
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
+		au_dpri_dentry(d);
+	spin_unlock(&inode->i_lock);
+}
+
+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
+{
+	struct dentry *wh = NULL;
+	int hn;
+	struct inode *inode;
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+
+	if (!dentry || IS_ERR(dentry)) {
+		dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
+		return -1;
+	}
+	/* do not call dget_parent() here */
+	/* note: access d_xxx without d_lock */
+	dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
+	     bindex, dentry, dentry,
+	     dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
+	     au_dcount(dentry), dentry->d_flags,
+	     d_unhashed(dentry) ? "un" : "");
+	hn = -1;
+	inode = NULL;
+	if (d_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (inode
+	    && au_test_aufs(dentry->d_sb)
+	    && bindex >= 0
+	    && !au_is_bad_inode(inode)) {
+		iinfo = au_ii(inode);
+		hi = au_hinode(iinfo, bindex);
+		hn = !!au_hn(hi);
+		wh = hi->hi_whdentry;
+	}
+	do_pri_inode(bindex, inode, hn, wh);
+	return 0;
+}
+
+void au_dpri_dentry(struct dentry *dentry)
+{
+	struct au_dinfo *dinfo;
+	aufs_bindex_t bindex;
+	int err;
+
+	err = do_pri_dentry(-1, dentry);
+	if (err || !au_test_aufs(dentry->d_sb))
+		return;
+
+	dinfo = au_di(dentry);
+	if (!dinfo)
+		return;
+	dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
+	     dinfo->di_btop, dinfo->di_bbot,
+	     dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
+	     dinfo->di_tmpfile);
+	if (dinfo->di_btop < 0)
+		return;
+	for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
+		do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
+}
+
+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
+{
+	char a[32];
+
+	if (!file || IS_ERR(file)) {
+		dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
+		return -1;
+	}
+	a[0] = 0;
+	if (bindex < 0
+	    && !IS_ERR_OR_NULL(file->f_path.dentry)
+	    && au_test_aufs(file->f_path.dentry->d_sb)
+	    && au_fi(file))
+		snprintf(a, sizeof(a), ", gen %d, mmapped %d",
+			 au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
+	dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
+	     bindex, file->f_mode, file->f_flags, (long)file_count(file),
+	     file->f_version, file->f_pos, a);
+	if (!IS_ERR_OR_NULL(file->f_path.dentry))
+		do_pri_dentry(bindex, file->f_path.dentry);
+	return 0;
+}
+
+void au_dpri_file(struct file *file)
+{
+	struct au_finfo *finfo;
+	struct au_fidir *fidir;
+	struct au_hfile *hfile;
+	aufs_bindex_t bindex;
+	int err;
+
+	err = do_pri_file(-1, file);
+	if (err
+	    || IS_ERR_OR_NULL(file->f_path.dentry)
+	    || !au_test_aufs(file->f_path.dentry->d_sb))
+		return;
+
+	finfo = au_fi(file);
+	if (!finfo)
+		return;
+	if (finfo->fi_btop < 0)
+		return;
+	fidir = finfo->fi_hdir;
+	if (!fidir)
+		do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
+	else
+		for (bindex = finfo->fi_btop;
+		     bindex >= 0 && bindex <= fidir->fd_bbot;
+		     bindex++) {
+			hfile = fidir->fd_hfile + bindex;
+			do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
+		}
+}
+
+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
+{
+	struct vfsmount *mnt;
+	struct super_block *sb;
+
+	if (!br || IS_ERR(br))
+		goto out;
+	mnt = au_br_mnt(br);
+	if (!mnt || IS_ERR(mnt))
+		goto out;
+	sb = mnt->mnt_sb;
+	if (!sb || IS_ERR(sb))
+		goto out;
+
+	dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
+	     "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
+	     "xino %d\n",
+	     bindex, br->br_perm, br->br_id, au_br_count(br),
+	     br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
+	     sb->s_flags, sb->s_count,
+	     atomic_read(&sb->s_active), !!br->br_xino.xi_file);
+	return 0;
+
+out:
+	dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
+	return -1;
+}
+
+void au_dpri_sb(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	aufs_bindex_t bindex;
+	int err;
+	/* to reuduce stack size */
+	struct {
+		struct vfsmount mnt;
+		struct au_branch fake;
+	} *a;
+
+	/* this function can be called from magic sysrq */
+	a = kzalloc(sizeof(*a), GFP_ATOMIC);
+	if (unlikely(!a)) {
+		dpri("no memory\n");
+		return;
+	}
+
+	a->mnt.mnt_sb = sb;
+	a->fake.br_path.mnt = &a->mnt;
+	au_br_count_init(&a->fake);
+	err = do_pri_br(-1, &a->fake);
+	au_br_count_fin(&a->fake);
+	kfree(a);
+	dpri("dev 0x%x\n", sb->s_dev);
+	if (err || !au_test_aufs(sb))
+		return;
+
+	sbinfo = au_sbi(sb);
+	if (!sbinfo)
+		return;
+	dpri("nw %d, gen %u, kobj %d\n",
+	     atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
+	     kref_read(&sbinfo->si_kobj.kref));
+	for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
+		do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
+{
+	struct inode *h_inode, *inode = d_inode(dentry);
+	struct dentry *h_dentry;
+	aufs_bindex_t bindex, bbot, bi;
+
+	if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
+		return;
+
+	bbot = au_dbbot(dentry);
+	bi = au_ibbot(inode);
+	if (bi < bbot)
+		bbot = bi;
+	bindex = au_dbtop(dentry);
+	bi = au_ibtop(inode);
+	if (bi > bindex)
+		bindex = bi;
+
+	for (; bindex <= bbot; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		h_inode = au_h_iptr(inode, bindex);
+		if (unlikely(h_inode != d_inode(h_dentry))) {
+			au_debug_on();
+			AuDbg("b%d, %s:%d\n", bindex, func, line);
+			AuDbgDentry(dentry);
+			AuDbgInode(inode);
+			au_debug_off();
+			BUG();
+		}
+	}
+}
+
+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
+{
+	int err, i, j;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	AuDebugOn(err);
+	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
+	AuDebugOn(err);
+	for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		for (j = dpage->ndentry - 1; !err && j >= 0; j--)
+			AuDebugOn(au_digen_test(dentries[j], sigen));
+	}
+	au_dpages_free(&dpages);
+}
+
+void au_dbg_verify_kthread(void)
+{
+	if (au_wkq_test()) {
+		au_dbg_blocked();
+		/*
+		 * It may be recursive, but udba=notify between two aufs mounts,
+		 * where a single ro branch is shared, is not a problem.
+		 */
+		/* WARN_ON(1); */
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+int __init au_debug_init(void)
+{
+	aufs_bindex_t bindex;
+	struct au_vdir_destr destr;
+
+	bindex = -1;
+	AuDebugOn(bindex >= 0);
+
+	destr.len = -1;
+	AuDebugOn(destr.len < NAME_MAX);
+
+#ifdef CONFIG_4KSTACKS
+	pr_warn("CONFIG_4KSTACKS is defined.\n");
+#endif
+
+	return 0;
+}
diff --git a/include/fs/aufs/debug.h b/include/fs/aufs/debug.h
new file mode 100644
index 000000000..c1325b139
--- /dev/null
+++ b/include/fs/aufs/debug.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * debug print functions
+ */
+
+#ifndef __AUFS_DEBUG_H__
+#define __AUFS_DEBUG_H__
+
+#ifdef __KERNEL__
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sysrq.h>
+
+#ifdef CONFIG_AUFS_DEBUG
+#define AuDebugOn(a)		BUG_ON(a)
+
+/* module parameter */
+extern atomic_t aufs_debug;
+static inline void au_debug_on(void)
+{
+	atomic_inc(&aufs_debug);
+}
+static inline void au_debug_off(void)
+{
+	atomic_dec_if_positive(&aufs_debug);
+}
+
+static inline int au_debug_test(void)
+{
+	return atomic_read(&aufs_debug) > 0;
+}
+#else
+#define AuDebugOn(a)		do {} while (0)
+AuStubVoid(au_debug_on, void)
+AuStubVoid(au_debug_off, void)
+AuStubInt0(au_debug_test, void)
+#endif /* CONFIG_AUFS_DEBUG */
+
+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
+
+/* ---------------------------------------------------------------------- */
+
+/* debug print */
+
+#define AuDbg(fmt, ...) do { \
+	if (au_debug_test()) \
+		pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
+} while (0)
+#define AuLabel(l)		AuDbg(#l "\n")
+#define AuIOErr(fmt, ...)	pr_err("I/O Error, " fmt, ##__VA_ARGS__)
+#define AuWarn1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		pr_warn(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuErr1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		pr_err(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuIOErr1(fmt, ...) do { \
+	static unsigned char _c; \
+	if (!_c++) \
+		AuIOErr(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define AuUnsupportMsg	"This operation is not supported." \
+			" Please report this application to aufs-users ML."
+#define AuUnsupport(fmt, ...) do { \
+	pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
+	dump_stack(); \
+} while (0)
+
+#define AuTraceErr(e) do { \
+	if (unlikely((e) < 0)) \
+		AuDbg("err %d\n", (int)(e)); \
+} while (0)
+
+#define AuTraceErrPtr(p) do { \
+	if (IS_ERR(p)) \
+		AuDbg("err %ld\n", PTR_ERR(p)); \
+} while (0)
+
+/* dirty macros for debug print, use with "%.*s" and caution */
+#define AuLNPair(qstr)		(qstr)->len, (qstr)->name
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry;
+#ifdef CONFIG_AUFS_DEBUG
+extern struct mutex au_dbg_mtx;
+extern char *au_plevel;
+struct au_nhash;
+void au_dpri_whlist(struct au_nhash *whlist);
+struct au_vdir;
+void au_dpri_vdir(struct au_vdir *vdir);
+struct inode;
+void au_dpri_inode(struct inode *inode);
+void au_dpri_dalias(struct inode *inode);
+void au_dpri_dentry(struct dentry *dentry);
+struct file;
+void au_dpri_file(struct file *filp);
+struct super_block;
+void au_dpri_sb(struct super_block *sb);
+
+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
+void au_dbg_verify_kthread(void);
+
+int __init au_debug_init(void);
+
+#define AuDbgWhlist(w) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#w "\n"); \
+	au_dpri_whlist(w); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgVdir(v) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#v "\n"); \
+	au_dpri_vdir(v); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgInode(i) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#i "\n"); \
+	au_dpri_inode(i); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgDAlias(i) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#i "\n"); \
+	au_dpri_dalias(i); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgDentry(d) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#d "\n"); \
+	au_dpri_dentry(d); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgFile(f) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#f "\n"); \
+	au_dpri_file(f); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgSb(sb) do { \
+	mutex_lock(&au_dbg_mtx); \
+	AuDbg(#sb "\n"); \
+	au_dpri_sb(sb); \
+	mutex_unlock(&au_dbg_mtx); \
+} while (0)
+
+#define AuDbgSym(addr) do {				\
+	char sym[KSYM_SYMBOL_LEN];			\
+	sprint_symbol(sym, (unsigned long)addr);	\
+	AuDbg("%s\n", sym);				\
+} while (0)
+#else
+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
+AuStubVoid(au_dbg_verify_kthread, void)
+AuStubInt0(__init au_debug_init, void)
+
+#define AuDbgWhlist(w)		do {} while (0)
+#define AuDbgVdir(v)		do {} while (0)
+#define AuDbgInode(i)		do {} while (0)
+#define AuDbgDAlias(i)		do {} while (0)
+#define AuDbgDentry(d)		do {} while (0)
+#define AuDbgFile(f)		do {} while (0)
+#define AuDbgSb(sb)		do {} while (0)
+#define AuDbgSym(addr)		do {} while (0)
+#endif /* CONFIG_AUFS_DEBUG */
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
+int __init au_sysrq_init(void);
+void au_sysrq_fin(void);
+
+#ifdef CONFIG_HW_CONSOLE
+#define au_dbg_blocked() do { \
+	WARN_ON(1); \
+	handle_sysrq('w'); \
+} while (0)
+#else
+AuStubVoid(au_dbg_blocked, void)
+#endif
+
+#else
+AuStubInt0(__init au_sysrq_init, void)
+AuStubVoid(au_sysrq_fin, void)
+AuStubVoid(au_dbg_blocked, void)
+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DEBUG_H__ */
diff --git a/include/fs/aufs/dentry.c b/include/fs/aufs/dentry.c
new file mode 100644
index 000000000..51b846324
--- /dev/null
+++ b/include/fs/aufs/dentry.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * lookup and dentry operations
+ */
+
+#include <linux/namei.h>
+#include "aufs.h"
+
+/*
+ * returns positive/negative dentry, NULL or an error.
+ * NULL means whiteout-ed or not-found.
+ */
+static struct dentry*
+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
+	     aufs_bindex_t bindex, struct au_do_lookup_args *args)
+{
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_branch *br;
+	int wh_found, opq;
+	unsigned char wh_able;
+	const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
+	const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
+							  IGNORE_PERM);
+
+	wh_found = 0;
+	br = au_sbr(dentry->d_sb, bindex);
+	wh_able = !!au_br_whable(br->br_perm);
+	if (wh_able)
+		wh_found = au_wh_test(h_parent, &args->whname, ignore_perm);
+	h_dentry = ERR_PTR(wh_found);
+	if (!wh_found)
+		goto real_lookup;
+	if (unlikely(wh_found < 0))
+		goto out;
+
+	/* We found a whiteout */
+	/* au_set_dbbot(dentry, bindex); */
+	au_set_dbwh(dentry, bindex);
+	if (!allow_neg)
+		return NULL; /* success */
+
+real_lookup:
+	if (!ignore_perm)
+		h_dentry = vfsub_lkup_one(args->name, h_parent);
+	else
+		h_dentry = au_sio_lkup_one(args->name, h_parent);
+	if (IS_ERR(h_dentry)) {
+		if (PTR_ERR(h_dentry) == -ENAMETOOLONG
+		    && !allow_neg)
+			h_dentry = NULL;
+		goto out;
+	}
+
+	h_inode = d_inode(h_dentry);
+	if (d_is_negative(h_dentry)) {
+		if (!allow_neg)
+			goto out_neg;
+	} else if (wh_found
+		   || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
+		goto out_neg;
+	else if (au_ftest_lkup(args->flags, DIRREN)
+		 /* && h_inode */
+		 && !au_dr_lkup_h_ino(args, bindex, h_inode->i_ino)) {
+		AuDbg("b%d %pd ignored hi%llu\n", bindex, h_dentry,
+		      (unsigned long long)h_inode->i_ino);
+		goto out_neg;
+	}
+
+	if (au_dbbot(dentry) <= bindex)
+		au_set_dbbot(dentry, bindex);
+	if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
+		au_set_dbtop(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, h_dentry);
+
+	if (!d_is_dir(h_dentry)
+	    || !wh_able
+	    || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
+		goto out; /* success */
+
+	vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
+	opq = au_diropq_test(h_dentry);
+	inode_unlock_shared(h_inode);
+	if (opq > 0)
+		au_set_dbdiropq(dentry, bindex);
+	else if (unlikely(opq < 0)) {
+		au_set_h_dptr(dentry, bindex, NULL);
+		h_dentry = ERR_PTR(opq);
+	}
+	goto out;
+
+out_neg:
+	dput(h_dentry);
+	h_dentry = NULL;
+out:
+	return h_dentry;
+}
+
+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
+{
+	if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
+		     && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
+		return -EPERM;
+	return 0;
+}
+
+/*
+ * returns the number of lower positive dentries,
+ * otherwise an error.
+ * can be called at unlinking with @type is zero.
+ */
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
+		   unsigned int flags)
+{
+	int npositive, err;
+	aufs_bindex_t bindex, btail, bdiropq;
+	unsigned char isdir, dirperm1, dirren;
+	struct au_do_lookup_args args = {
+		.flags		= flags,
+		.name		= &dentry->d_name
+	};
+	struct dentry *parent;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	err = au_test_shwh(sb, args.name);
+	if (unlikely(err))
+		goto out;
+
+	err = au_wh_name_alloc(&args.whname, args.name);
+	if (unlikely(err))
+		goto out;
+
+	isdir = !!d_is_dir(dentry);
+	dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
+	dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
+	if (dirren)
+		au_fset_lkup(args.flags, DIRREN);
+
+	npositive = 0;
+	parent = dget_parent(dentry);
+	btail = au_dbtaildir(parent);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		struct dentry *h_parent, *h_dentry;
+		struct inode *h_inode, *h_dir;
+		struct au_branch *br;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry) {
+			if (d_is_positive(h_dentry))
+				npositive++;
+			break;
+		}
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || !d_is_dir(h_parent))
+			continue;
+
+		if (dirren) {
+			/* if the inum matches, then use the prepared name */
+			err = au_dr_lkup_name(&args, bindex);
+			if (unlikely(err))
+				goto out_parent;
+		}
+
+		h_dir = d_inode(h_parent);
+		vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+		h_dentry = au_do_lookup(h_parent, dentry, bindex, &args);
+		inode_unlock_shared(h_dir);
+		err = PTR_ERR(h_dentry);
+		if (IS_ERR(h_dentry))
+			goto out_parent;
+		if (h_dentry)
+			au_fclr_lkup(args.flags, ALLOW_NEG);
+		if (dirperm1)
+			au_fset_lkup(args.flags, IGNORE_PERM);
+
+		if (au_dbwh(dentry) == bindex)
+			break;
+		if (!h_dentry)
+			continue;
+		if (d_is_negative(h_dentry))
+			continue;
+		h_inode = d_inode(h_dentry);
+		npositive++;
+		if (!args.type)
+			args.type = h_inode->i_mode & S_IFMT;
+		if (args.type != S_IFDIR)
+			break;
+		else if (isdir) {
+			/* the type of lower may be different */
+			bdiropq = au_dbdiropq(dentry);
+			if (bdiropq >= 0 && bdiropq <= bindex)
+				break;
+		}
+		br = au_sbr(sb, bindex);
+		if (dirren
+		    && au_dr_hino_test_add(&br->br_dirren, h_inode->i_ino,
+					   /*add_ent*/NULL)) {
+			/* prepare next name to lookup */
+			err = au_dr_lkup(&args, dentry, bindex);
+			if (unlikely(err))
+				goto out_parent;
+		}
+	}
+
+	if (npositive) {
+		AuLabel(positive);
+		au_update_dbtop(dentry);
+	}
+	err = npositive;
+	if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
+		     && au_dbtop(dentry) < 0)) {
+		err = -EIO;
+		AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
+			dentry, err);
+	}
+
+out_parent:
+	dput(parent);
+	kfree(args.whname.name);
+	if (dirren)
+		au_dr_lkup_fin(&args);
+out:
+	return err;
+}
+
+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
+{
+	struct dentry *dentry;
+	int wkq_err;
+
+	if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
+		dentry = vfsub_lkup_one(name, parent);
+	else {
+		struct vfsub_lkup_one_args args = {
+			.errp	= &dentry,
+			.name	= name,
+			.parent	= parent
+		};
+
+		wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
+		if (unlikely(wkq_err))
+			dentry = ERR_PTR(wkq_err);
+	}
+
+	return dentry;
+}
+
+/*
+ * lookup @dentry on @bindex which should be negative.
+ */
+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
+{
+	int err;
+	struct dentry *parent, *h_parent, *h_dentry;
+	struct au_branch *br;
+
+	parent = dget_parent(dentry);
+	h_parent = au_h_dptr(parent, bindex);
+	br = au_sbr(dentry->d_sb, bindex);
+	if (wh)
+		h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
+	else
+		h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
+	err = PTR_ERR(h_dentry);
+	if (IS_ERR(h_dentry))
+		goto out;
+	if (unlikely(d_is_positive(h_dentry))) {
+		err = -EIO;
+		AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
+		dput(h_dentry);
+		goto out;
+	}
+
+	err = 0;
+	if (bindex < au_dbtop(dentry))
+		au_set_dbtop(dentry, bindex);
+	if (au_dbbot(dentry) < bindex)
+		au_set_dbbot(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, h_dentry);
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* subset of struct inode */
+struct au_iattr {
+	unsigned long		i_ino;
+	/* unsigned int		i_nlink; */
+	kuid_t			i_uid;
+	kgid_t			i_gid;
+	u64			i_version;
+/*
+	loff_t			i_size;
+	blkcnt_t		i_blocks;
+*/
+	umode_t			i_mode;
+};
+
+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
+{
+	ia->i_ino = h_inode->i_ino;
+	/* ia->i_nlink = h_inode->i_nlink; */
+	ia->i_uid = h_inode->i_uid;
+	ia->i_gid = h_inode->i_gid;
+	ia->i_version = h_inode->i_version;
+/*
+	ia->i_size = h_inode->i_size;
+	ia->i_blocks = h_inode->i_blocks;
+*/
+	ia->i_mode = (h_inode->i_mode & S_IFMT);
+}
+
+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
+{
+	return ia->i_ino != h_inode->i_ino
+		/* || ia->i_nlink != h_inode->i_nlink */
+		|| !uid_eq(ia->i_uid, h_inode->i_uid)
+		|| !gid_eq(ia->i_gid, h_inode->i_gid)
+		|| ia->i_version != h_inode->i_version
+/*
+		|| ia->i_size != h_inode->i_size
+		|| ia->i_blocks != h_inode->i_blocks
+*/
+		|| ia->i_mode != (h_inode->i_mode & S_IFMT);
+}
+
+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
+			      struct au_branch *br)
+{
+	int err;
+	struct au_iattr ia;
+	struct inode *h_inode;
+	struct dentry *h_d;
+	struct super_block *h_sb;
+
+	err = 0;
+	memset(&ia, -1, sizeof(ia));
+	h_sb = h_dentry->d_sb;
+	h_inode = NULL;
+	if (d_is_positive(h_dentry)) {
+		h_inode = d_inode(h_dentry);
+		au_iattr_save(&ia, h_inode);
+	} else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
+		/* nfs d_revalidate may return 0 for negative dentry */
+		/* fuse d_revalidate always return 0 for negative dentry */
+		goto out;
+
+	/* main purpose is namei.c:cached_lookup() and d_revalidate */
+	h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
+	err = PTR_ERR(h_d);
+	if (IS_ERR(h_d))
+		goto out;
+
+	err = 0;
+	if (unlikely(h_d != h_dentry
+		     || d_inode(h_d) != h_inode
+		     || (h_inode && au_iattr_test(&ia, h_inode))))
+		err = au_busy_or_stale();
+	dput(h_d);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
+		struct dentry *h_parent, struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	if (udba == AuOpt_UDBA_REVAL
+	    && !au_test_fs_remote(h_dentry->d_sb)) {
+		IMustLock(h_dir);
+		err = (d_inode(h_dentry->d_parent) != h_dir);
+	} else if (udba != AuOpt_UDBA_NONE)
+		err = au_h_verify_dentry(h_dentry, h_parent, br);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
+{
+	int err;
+	aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
+	struct au_hdentry tmp, *p, *q;
+	struct au_dinfo *dinfo;
+	struct super_block *sb;
+
+	DiMustWriteLock(dentry);
+
+	sb = dentry->d_sb;
+	dinfo = au_di(dentry);
+	bbot = dinfo->di_bbot;
+	bwh = dinfo->di_bwh;
+	bdiropq = dinfo->di_bdiropq;
+	bindex = dinfo->di_btop;
+	p = au_hdentry(dinfo, bindex);
+	for (; bindex <= bbot; bindex++, p++) {
+		if (!p->hd_dentry)
+			continue;
+
+		new_bindex = au_br_index(sb, p->hd_id);
+		if (new_bindex == bindex)
+			continue;
+
+		if (dinfo->di_bwh == bindex)
+			bwh = new_bindex;
+		if (dinfo->di_bdiropq == bindex)
+			bdiropq = new_bindex;
+		if (new_bindex < 0) {
+			au_hdput(p);
+			p->hd_dentry = NULL;
+			continue;
+		}
+
+		/* swap two lower dentries, and loop again */
+		q = au_hdentry(dinfo, new_bindex);
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hd_dentry) {
+			bindex--;
+			p--;
+		}
+	}
+
+	dinfo->di_bwh = -1;
+	if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
+		dinfo->di_bwh = bwh;
+
+	dinfo->di_bdiropq = -1;
+	if (bdiropq >= 0
+	    && bdiropq <= au_sbbot(sb)
+	    && au_sbr_whable(sb, bdiropq))
+		dinfo->di_bdiropq = bdiropq;
+
+	err = -EIO;
+	dinfo->di_btop = -1;
+	dinfo->di_bbot = -1;
+	bbot = au_dbbot(parent);
+	bindex = 0;
+	p = au_hdentry(dinfo, bindex);
+	for (; bindex <= bbot; bindex++, p++)
+		if (p->hd_dentry) {
+			dinfo->di_btop = bindex;
+			break;
+		}
+
+	if (dinfo->di_btop >= 0) {
+		bindex = bbot;
+		p = au_hdentry(dinfo, bindex);
+		for (; bindex >= 0; bindex--, p--)
+			if (p->hd_dentry) {
+				dinfo->di_bbot = bindex;
+				err = 0;
+				break;
+			}
+	}
+
+	return err;
+}
+
+static void au_do_hide(struct dentry *dentry)
+{
+	struct inode *inode;
+
+	if (d_really_is_positive(dentry)) {
+		inode = d_inode(dentry);
+		if (!d_is_dir(dentry)) {
+			if (inode->i_nlink && !d_unhashed(dentry))
+				drop_nlink(inode);
+		} else {
+			clear_nlink(inode);
+			/* stop next lookup */
+			inode->i_flags |= S_DEAD;
+		}
+		smp_mb(); /* necessary? */
+	}
+	d_drop(dentry);
+}
+
+static int au_hide_children(struct dentry *parent)
+{
+	int err, i, j, ndentry;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry *dentry;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, parent, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	/* in reverse order */
+	for (i = dpages.ndpage - 1; i >= 0; i--) {
+		dpage = dpages.dpages + i;
+		ndentry = dpage->ndentry;
+		for (j = ndentry - 1; j >= 0; j--) {
+			dentry = dpage->dentries[j];
+			if (dentry != parent)
+				au_do_hide(dentry);
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static void au_hide(struct dentry *dentry)
+{
+	int err;
+
+	AuDbgDentry(dentry);
+	if (d_is_dir(dentry)) {
+		/* shrink_dcache_parent(dentry); */
+		err = au_hide_children(dentry);
+		if (unlikely(err))
+			AuIOErr("%pd, failed hiding children, ignored %d\n",
+				dentry, err);
+	}
+	au_do_hide(dentry);
+}
+
+/*
+ * By adding a dirty branch, a cached dentry may be affected in various ways.
+ *
+ * a dirty branch is added
+ * - on the top of layers
+ * - in the middle of layers
+ * - to the bottom of layers
+ *
+ * on the added branch there exists
+ * - a whiteout
+ * - a diropq
+ * - a same named entry
+ *   + exist
+ *     * negative --> positive
+ *     * positive --> positive
+ *	 - type is unchanged
+ *	 - type is changed
+ *   + doesn't exist
+ *     * negative --> negative
+ *     * positive --> negative (rejected by au_br_del() for non-dir case)
+ * - none
+ */
+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
+			       struct au_dinfo *tmp)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct {
+		struct dentry *dentry;
+		struct inode *inode;
+		mode_t mode;
+	} orig_h, tmp_h = {
+		.dentry = NULL
+	};
+	struct au_hdentry *hd;
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry;
+
+	err = 0;
+	AuDebugOn(dinfo->di_btop < 0);
+	orig_h.mode = 0;
+	orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
+	orig_h.inode = NULL;
+	if (d_is_positive(orig_h.dentry)) {
+		orig_h.inode = d_inode(orig_h.dentry);
+		orig_h.mode = orig_h.inode->i_mode & S_IFMT;
+	}
+	if (tmp->di_btop >= 0) {
+		tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
+		if (d_is_positive(tmp_h.dentry)) {
+			tmp_h.inode = d_inode(tmp_h.dentry);
+			tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
+		}
+	}
+
+	inode = NULL;
+	if (d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (!orig_h.inode) {
+		AuDbg("nagative originally\n");
+		if (inode) {
+			au_hide(dentry);
+			goto out;
+		}
+		AuDebugOn(inode);
+		AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
+		AuDebugOn(dinfo->di_bdiropq != -1);
+
+		if (!tmp_h.inode) {
+			AuDbg("negative --> negative\n");
+			/* should have only one negative lower */
+			if (tmp->di_btop >= 0
+			    && tmp->di_btop < dinfo->di_btop) {
+				AuDebugOn(tmp->di_btop != tmp->di_bbot);
+				AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
+				au_set_h_dptr(dentry, dinfo->di_btop, NULL);
+				au_di_cp(dinfo, tmp);
+				hd = au_hdentry(tmp, tmp->di_btop);
+				au_set_h_dptr(dentry, tmp->di_btop,
+					      dget(hd->hd_dentry));
+			}
+			au_dbg_verify_dinode(dentry);
+		} else {
+			AuDbg("negative --> positive\n");
+			/*
+			 * similar to the behaviour of creating with bypassing
+			 * aufs.
+			 * unhash it in order to force an error in the
+			 * succeeding create operation.
+			 * we should not set S_DEAD here.
+			 */
+			d_drop(dentry);
+			/* au_di_swap(tmp, dinfo); */
+			au_dbg_verify_dinode(dentry);
+		}
+	} else {
+		AuDbg("positive originally\n");
+		/* inode may be NULL */
+		AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
+		if (!tmp_h.inode) {
+			AuDbg("positive --> negative\n");
+			/* or bypassing aufs */
+			au_hide(dentry);
+			if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
+				dinfo->di_bwh = tmp->di_bwh;
+			if (inode)
+				err = au_refresh_hinode_self(inode);
+			au_dbg_verify_dinode(dentry);
+		} else if (orig_h.mode == tmp_h.mode) {
+			AuDbg("positive --> positive, same type\n");
+			if (!S_ISDIR(orig_h.mode)
+			    && dinfo->di_btop > tmp->di_btop) {
+				/*
+				 * similar to the behaviour of removing and
+				 * creating.
+				 */
+				au_hide(dentry);
+				if (inode)
+					err = au_refresh_hinode_self(inode);
+				au_dbg_verify_dinode(dentry);
+			} else {
+				/* fill empty slots */
+				if (dinfo->di_btop > tmp->di_btop)
+					dinfo->di_btop = tmp->di_btop;
+				if (dinfo->di_bbot < tmp->di_bbot)
+					dinfo->di_bbot = tmp->di_bbot;
+				dinfo->di_bwh = tmp->di_bwh;
+				dinfo->di_bdiropq = tmp->di_bdiropq;
+				bbot = dinfo->di_bbot;
+				bindex = tmp->di_btop;
+				hd = au_hdentry(tmp, bindex);
+				for (; bindex <= bbot; bindex++, hd++) {
+					if (au_h_dptr(dentry, bindex))
+						continue;
+					h_dentry = hd->hd_dentry;
+					if (!h_dentry)
+						continue;
+					AuDebugOn(d_is_negative(h_dentry));
+					h_inode = d_inode(h_dentry);
+					AuDebugOn(orig_h.mode
+						  != (h_inode->i_mode
+						      & S_IFMT));
+					au_set_h_dptr(dentry, bindex,
+						      dget(h_dentry));
+				}
+				if (inode)
+					err = au_refresh_hinode(inode, dentry);
+				au_dbg_verify_dinode(dentry);
+			}
+		} else {
+			AuDbg("positive --> positive, different type\n");
+			/* similar to the behaviour of removing and creating */
+			au_hide(dentry);
+			if (inode)
+				err = au_refresh_hinode_self(inode);
+			au_dbg_verify_dinode(dentry);
+		}
+	}
+
+out:
+	return err;
+}
+
+void au_refresh_dop(struct dentry *dentry, int force_reval)
+{
+	const struct dentry_operations *dop
+		= force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
+	static const unsigned int mask
+		= DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
+
+	BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
+
+	if (dentry->d_op == dop)
+		return;
+
+	AuDbg("%pd\n", dentry);
+	spin_lock(&dentry->d_lock);
+	if (dop == &aufs_dop)
+		dentry->d_flags |= mask;
+	else
+		dentry->d_flags &= ~mask;
+	dentry->d_op = dop;
+	spin_unlock(&dentry->d_lock);
+}
+
+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
+{
+	int err, ebrange, nbr;
+	unsigned int sigen;
+	struct au_dinfo *dinfo, *tmp;
+	struct super_block *sb;
+	struct inode *inode;
+
+	DiMustWriteLock(dentry);
+	AuDebugOn(IS_ROOT(dentry));
+	AuDebugOn(d_really_is_negative(parent));
+
+	sb = dentry->d_sb;
+	sigen = au_sigen(sb);
+	err = au_digen_test(parent, sigen);
+	if (unlikely(err))
+		goto out;
+
+	nbr = au_sbbot(sb) + 1;
+	dinfo = au_di(dentry);
+	err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
+	if (unlikely(err))
+		goto out;
+	ebrange = au_dbrange_test(dentry);
+	if (!ebrange)
+		ebrange = au_do_refresh_hdentry(dentry, parent);
+
+	if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
+		AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
+		if (d_really_is_positive(dentry)) {
+			inode = d_inode(dentry);
+			err = au_refresh_hinode_self(inode);
+		}
+		au_dbg_verify_dinode(dentry);
+		if (!err)
+			goto out_dgen; /* success */
+		goto out;
+	}
+
+	/* temporary dinfo */
+	AuDbgDentry(dentry);
+	err = -ENOMEM;
+	tmp = au_di_alloc(sb, AuLsc_DI_TMP);
+	if (unlikely(!tmp))
+		goto out;
+	au_di_swap(tmp, dinfo);
+	/* returns the number of positive dentries */
+	/*
+	 * if current working dir is removed, it returns an error.
+	 * but the dentry is legal.
+	 */
+	err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
+	AuDbgDentry(dentry);
+	au_di_swap(tmp, dinfo);
+	if (err == -ENOENT)
+		err = 0;
+	if (err >= 0) {
+		/* compare/refresh by dinfo */
+		AuDbgDentry(dentry);
+		err = au_refresh_by_dinfo(dentry, dinfo, tmp);
+		au_dbg_verify_dinode(dentry);
+		AuTraceErr(err);
+	}
+	au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
+	au_rw_write_unlock(&tmp->di_rwsem);
+	au_di_free(tmp);
+	if (unlikely(err))
+		goto out;
+
+out_dgen:
+	au_update_digen(dentry);
+out:
+	if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
+		AuIOErr("failed refreshing %pd, %d\n", dentry, err);
+		AuDbgDentry(dentry);
+	}
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
+			   struct dentry *dentry, aufs_bindex_t bindex)
+{
+	int err, valid;
+
+	err = 0;
+	if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
+		goto out;
+
+	AuDbg("b%d\n", bindex);
+	/*
+	 * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
+	 * due to whiteout and branch permission.
+	 */
+	flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
+		   | LOOKUP_FOLLOW | LOOKUP_EXCL);
+	/* it may return tri-state */
+	valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
+
+	if (unlikely(valid < 0))
+		err = valid;
+	else if (!valid)
+		err = -EINVAL;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* todo: remove this */
+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
+			  unsigned int flags, int do_udba, int dirren)
+{
+	int err;
+	umode_t mode, h_mode;
+	aufs_bindex_t bindex, btail, btop, ibs, ibe;
+	unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
+	struct inode *h_inode, *h_cached_inode;
+	struct dentry *h_dentry;
+	struct qstr *name, *h_name;
+
+	err = 0;
+	plus = 0;
+	mode = 0;
+	ibs = -1;
+	ibe = -1;
+	unhashed = !!d_unhashed(dentry);
+	is_root = !!IS_ROOT(dentry);
+	name = &dentry->d_name;
+	tmpfile = au_di(dentry)->di_tmpfile;
+
+	/*
+	 * Theoretically, REVAL test should be unnecessary in case of
+	 * {FS,I}NOTIFY.
+	 * But {fs,i}notify doesn't fire some necessary events,
+	 *	IN_ATTRIB for atime/nlink/pageio
+	 * Let's do REVAL test too.
+	 */
+	if (do_udba && inode) {
+		mode = (inode->i_mode & S_IFMT);
+		plus = (inode->i_nlink > 0);
+		ibs = au_ibtop(inode);
+		ibe = au_ibbot(inode);
+	}
+
+	btop = au_dbtop(dentry);
+	btail = btop;
+	if (inode && S_ISDIR(inode->i_mode))
+		btail = au_dbtaildir(dentry);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+
+		AuDbg("b%d, %pd\n", bindex, h_dentry);
+		h_nfs = !!au_test_nfs(h_dentry->d_sb);
+		spin_lock(&h_dentry->d_lock);
+		h_name = &h_dentry->d_name;
+		if (unlikely(do_udba
+			     && !is_root
+			     && ((!h_nfs
+				  && (unhashed != !!d_unhashed(h_dentry)
+				      || (!tmpfile && !dirren
+					  && !au_qstreq(name, h_name))
+					  ))
+				 || (h_nfs
+				     && !(flags & LOOKUP_OPEN)
+				     && (h_dentry->d_flags
+					 & DCACHE_NFSFS_RENAMED)))
+			    )) {
+			int h_unhashed;
+
+			h_unhashed = d_unhashed(h_dentry);
+			spin_unlock(&h_dentry->d_lock);
+			AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
+			      unhashed, h_unhashed, dentry, h_dentry);
+			goto err;
+		}
+		spin_unlock(&h_dentry->d_lock);
+
+		err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
+		if (unlikely(err))
+			/* do not goto err, to keep the errno */
+			break;
+
+		/* todo: plink too? */
+		if (!do_udba)
+			continue;
+
+		/* UDBA tests */
+		if (unlikely(!!inode != d_is_positive(h_dentry)))
+			goto err;
+
+		h_inode = NULL;
+		if (d_is_positive(h_dentry))
+			h_inode = d_inode(h_dentry);
+		h_plus = plus;
+		h_mode = mode;
+		h_cached_inode = h_inode;
+		if (h_inode) {
+			h_mode = (h_inode->i_mode & S_IFMT);
+			h_plus = (h_inode->i_nlink > 0);
+		}
+		if (inode && ibs <= bindex && bindex <= ibe)
+			h_cached_inode = au_h_iptr(inode, bindex);
+
+		if (!h_nfs) {
+			if (unlikely(plus != h_plus && !tmpfile))
+				goto err;
+		} else {
+			if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+				     && !is_root
+				     && !IS_ROOT(h_dentry)
+				     && unhashed != d_unhashed(h_dentry)))
+				goto err;
+		}
+		if (unlikely(mode != h_mode
+			     || h_cached_inode != h_inode))
+			goto err;
+		continue;
+
+err:
+		err = -EINVAL;
+		break;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *parent;
+
+	if (!au_digen_test(dentry, sigen))
+		return 0;
+
+	parent = dget_parent(dentry);
+	di_read_lock_parent(parent, AuLock_IR);
+	AuDebugOn(au_digen_test(parent, sigen));
+	au_dbg_verify_gen(parent, sigen);
+	err = au_refresh_dentry(dentry, parent);
+	di_read_unlock(parent, AuLock_IR);
+	dput(parent);
+	AuTraceErr(err);
+	return err;
+}
+
+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *d, *parent;
+
+	if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
+		return simple_reval_dpath(dentry, sigen);
+
+	/* slow loop, keep it simple and stupid */
+	/* cf: au_cpup_dirs() */
+	err = 0;
+	parent = NULL;
+	while (au_digen_test(dentry, sigen)) {
+		d = dentry;
+		while (1) {
+			dput(parent);
+			parent = dget_parent(d);
+			if (!au_digen_test(parent, sigen))
+				break;
+			d = parent;
+		}
+
+		if (d != dentry)
+			di_write_lock_child2(d);
+
+		/* someone might update our dentry while we were sleeping */
+		if (au_digen_test(d, sigen)) {
+			/*
+			 * todo: consolidate with simple_reval_dpath(),
+			 * do_refresh() and au_reval_for_attr().
+			 */
+			di_read_lock_parent(parent, AuLock_IR);
+			err = au_refresh_dentry(d, parent);
+			di_read_unlock(parent, AuLock_IR);
+		}
+
+		if (d != dentry)
+			di_write_unlock(d);
+		dput(parent);
+		if (unlikely(err))
+			break;
+	}
+
+	return err;
+}
+
+/*
+ * if valid returns 1, otherwise 0.
+ */
+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int valid, err;
+	unsigned int sigen;
+	unsigned char do_udba, dirren;
+	struct super_block *sb;
+	struct inode *inode;
+
+	/* todo: support rcu-walk? */
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	valid = 0;
+	if (unlikely(!au_di(dentry)))
+		goto out;
+
+	valid = 1;
+	sb = dentry->d_sb;
+	/*
+	 * todo: very ugly
+	 * i_mutex of parent dir may be held,
+	 * but we should not return 'invalid' due to busy.
+	 */
+	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
+	if (unlikely(err)) {
+		valid = err;
+		AuTraceErr(err);
+		goto out;
+	}
+	inode = NULL;
+	if (d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (unlikely(inode && au_is_bad_inode(inode))) {
+		err = -EINVAL;
+		AuTraceErr(err);
+		goto out_dgrade;
+	}
+	if (unlikely(au_dbrange_test(dentry))) {
+		err = -EINVAL;
+		AuTraceErr(err);
+		goto out_dgrade;
+	}
+
+	sigen = au_sigen(sb);
+	if (au_digen_test(dentry, sigen)) {
+		AuDebugOn(IS_ROOT(dentry));
+		err = au_reval_dpath(dentry, sigen);
+		if (unlikely(err)) {
+			AuTraceErr(err);
+			goto out_dgrade;
+		}
+	}
+	di_downgrade_lock(dentry, AuLock_IR);
+
+	err = -EINVAL;
+	if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
+	    && inode
+	    && !(inode->i_state && I_LINKABLE)
+	    && (IS_DEADDIR(inode) || !inode->i_nlink)) {
+		AuTraceErr(err);
+		goto out_inval;
+	}
+
+	do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
+	if (do_udba && inode) {
+		aufs_bindex_t btop = au_ibtop(inode);
+		struct inode *h_inode;
+
+		if (btop >= 0) {
+			h_inode = au_h_iptr(inode, btop);
+			if (h_inode && au_test_higen(inode, h_inode)) {
+				AuTraceErr(err);
+				goto out_inval;
+			}
+		}
+	}
+
+	dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
+	err = h_d_revalidate(dentry, inode, flags, do_udba, dirren);
+	if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
+		err = -EIO;
+		AuDbg("both of real entry and whiteout found, %p, err %d\n",
+		      dentry, err);
+	}
+	goto out_inval;
+
+out_dgrade:
+	di_downgrade_lock(dentry, AuLock_IR);
+out_inval:
+	aufs_read_unlock(dentry, AuLock_IR);
+	AuTraceErr(err);
+	valid = !err;
+out:
+	if (!valid) {
+		AuDbg("%pd invalid, %d\n", dentry, valid);
+		d_drop(dentry);
+	}
+	return valid;
+}
+
+static void aufs_d_release(struct dentry *dentry)
+{
+	if (au_di(dentry)) {
+		au_di_fin(dentry);
+		au_hn_di_reinit(dentry);
+	}
+}
+
+const struct dentry_operations aufs_dop = {
+	.d_revalidate		= aufs_d_revalidate,
+	.d_weak_revalidate	= aufs_d_revalidate,
+	.d_release		= aufs_d_release
+};
+
+/* aufs_dop without d_revalidate */
+const struct dentry_operations aufs_dop_noreval = {
+	.d_release		= aufs_d_release
+};
diff --git a/include/fs/aufs/dentry.h b/include/fs/aufs/dentry.h
new file mode 100644
index 000000000..8f20b51d5
--- /dev/null
+++ b/include/fs/aufs/dentry.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * lookup and dentry operations
+ */
+
+#ifndef __AUFS_DENTRY_H__
+#define __AUFS_DENTRY_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include "dirren.h"
+#include "rwsem.h"
+
+struct au_hdentry {
+	struct dentry		*hd_dentry;
+	aufs_bindex_t		hd_id;
+};
+
+struct au_dinfo {
+	atomic_t		di_generation;
+
+	struct au_rwsem		di_rwsem;
+	aufs_bindex_t		di_btop, di_bbot, di_bwh, di_bdiropq;
+	unsigned char		di_tmpfile; /* to allow the different name */
+	struct au_hdentry	*di_hdentry;
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* flags for au_lkup_dentry() */
+#define AuLkup_ALLOW_NEG	1
+#define AuLkup_IGNORE_PERM	(1 << 1)
+#define AuLkup_DIRREN		(1 << 2)
+#define au_ftest_lkup(flags, name)	((flags) & AuLkup_##name)
+#define au_fset_lkup(flags, name) \
+	do { (flags) |= AuLkup_##name; } while (0)
+#define au_fclr_lkup(flags, name) \
+	do { (flags) &= ~AuLkup_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuLkup_DIRREN
+#define AuLkup_DIRREN 0
+#endif
+
+struct au_do_lookup_args {
+	unsigned int		flags;
+	mode_t			type;
+	struct qstr		whname, *name;
+	struct au_dr_lookup	dirren;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dentry.c */
+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
+struct au_branch;
+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
+		struct dentry *h_parent, struct au_branch *br);
+
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
+		   unsigned int flags);
+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
+void au_refresh_dop(struct dentry *dentry, int force_reval);
+
+/* dinfo.c */
+void au_di_init_once(void *_di);
+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
+void au_di_free(struct au_dinfo *dinfo);
+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
+int au_di_init(struct dentry *dentry);
+void au_di_fin(struct dentry *dentry);
+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
+
+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
+void di_read_unlock(struct dentry *d, int flags);
+void di_downgrade_lock(struct dentry *d, int flags);
+void di_write_lock(struct dentry *d, unsigned int lsc);
+void di_write_unlock(struct dentry *d);
+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
+
+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
+aufs_bindex_t au_dbtail(struct dentry *dentry);
+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
+
+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
+		   struct dentry *h_dentry);
+int au_digen_test(struct dentry *dentry, unsigned int sigen);
+int au_dbrange_test(struct dentry *dentry);
+void au_update_digen(struct dentry *dentry);
+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
+void au_update_dbtop(struct dentry *dentry);
+void au_update_dbbot(struct dentry *dentry);
+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_dinfo *au_di(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for dinfo */
+enum {
+	AuLsc_DI_CHILD,		/* child first */
+	AuLsc_DI_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
+	AuLsc_DI_CHILD3,	/* copyup dirs */
+	AuLsc_DI_PARENT,
+	AuLsc_DI_PARENT2,
+	AuLsc_DI_PARENT3,
+	AuLsc_DI_TMP		/* temp for replacing dinfo */
+};
+
+/*
+ * di_read_lock_child, di_write_lock_child,
+ * di_read_lock_child2, di_write_lock_child2,
+ * di_read_lock_child3, di_write_lock_child3,
+ * di_read_lock_parent, di_write_lock_parent,
+ * di_read_lock_parent2, di_write_lock_parent2,
+ * di_read_lock_parent3, di_write_lock_parent3,
+ */
+#define AuReadLockFunc(name, lsc) \
+static inline void di_read_lock_##name(struct dentry *d, int flags) \
+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
+
+#define AuWriteLockFunc(name, lsc) \
+static inline void di_write_lock_##name(struct dentry *d) \
+{ di_write_lock(d, AuLsc_DI_##lsc); }
+
+#define AuRWLockFuncs(name, lsc) \
+	AuReadLockFunc(name, lsc) \
+	AuWriteLockFunc(name, lsc)
+
+AuRWLockFuncs(child, CHILD);
+AuRWLockFuncs(child2, CHILD2);
+AuRWLockFuncs(child3, CHILD3);
+AuRWLockFuncs(parent, PARENT);
+AuRWLockFuncs(parent2, PARENT2);
+AuRWLockFuncs(parent3, PARENT3);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define DiMustNoWaiters(d)	AuRwMustNoWaiters(&au_di(d)->di_rwsem)
+#define DiMustAnyLock(d)	AuRwMustAnyLock(&au_di(d)->di_rwsem)
+#define DiMustWriteLock(d)	AuRwMustWriteLock(&au_di(d)->di_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: memory barrier? */
+static inline unsigned int au_digen(struct dentry *d)
+{
+	return atomic_read(&au_di(d)->di_generation);
+}
+
+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
+{
+	hdentry->hd_dentry = NULL;
+}
+
+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
+					    aufs_bindex_t bindex)
+{
+	return di->di_hdentry + bindex;
+}
+
+static inline void au_hdput(struct au_hdentry *hd)
+{
+	if (hd)
+		dput(hd->hd_dentry);
+}
+
+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_btop;
+}
+
+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bbot;
+}
+
+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bwh;
+}
+
+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
+{
+	DiMustAnyLock(dentry);
+	return au_di(dentry)->di_bdiropq;
+}
+
+/* todo: hard/soft set? */
+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_btop = bindex;
+}
+
+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_bbot = bindex;
+}
+
+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	/* dbwh can be outside of btop - bbot range */
+	au_di(dentry)->di_bwh = bindex;
+}
+
+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	DiMustWriteLock(dentry);
+	au_di(dentry)->di_bdiropq = bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_HNOTIFY
+static inline void au_digen_dec(struct dentry *d)
+{
+	atomic_dec(&au_di(d)->di_generation);
+}
+
+static inline void au_hn_di_reinit(struct dentry *dentry)
+{
+	dentry->d_fsdata = NULL;
+}
+#else
+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
+#endif /* CONFIG_AUFS_HNOTIFY */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DENTRY_H__ */
diff --git a/include/fs/aufs/dinfo.c b/include/fs/aufs/dinfo.c
new file mode 100644
index 000000000..75b7ea4a5
--- /dev/null
+++ b/include/fs/aufs/dinfo.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dentry private data
+ */
+
+#include "aufs.h"
+
+void au_di_init_once(void *_dinfo)
+{
+	struct au_dinfo *dinfo = _dinfo;
+
+	au_rw_init(&dinfo->di_rwsem);
+}
+
+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
+{
+	struct au_dinfo *dinfo;
+	int nbr, i;
+
+	dinfo = au_cache_alloc_dinfo();
+	if (unlikely(!dinfo))
+		goto out;
+
+	nbr = au_sbbot(sb) + 1;
+	if (nbr <= 0)
+		nbr = 1;
+	dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
+	if (dinfo->di_hdentry) {
+		au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
+		dinfo->di_btop = -1;
+		dinfo->di_bbot = -1;
+		dinfo->di_bwh = -1;
+		dinfo->di_bdiropq = -1;
+		dinfo->di_tmpfile = 0;
+		for (i = 0; i < nbr; i++)
+			dinfo->di_hdentry[i].hd_id = -1;
+		goto out;
+	}
+
+	au_cache_free_dinfo(dinfo);
+	dinfo = NULL;
+
+out:
+	return dinfo;
+}
+
+void au_di_free(struct au_dinfo *dinfo)
+{
+	struct au_hdentry *p;
+	aufs_bindex_t bbot, bindex;
+
+	/* dentry may not be revalidated */
+	bindex = dinfo->di_btop;
+	if (bindex >= 0) {
+		bbot = dinfo->di_bbot;
+		p = au_hdentry(dinfo, bindex);
+		while (bindex++ <= bbot)
+			au_hdput(p++);
+	}
+	kfree(dinfo->di_hdentry);
+	au_cache_free_dinfo(dinfo);
+}
+
+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
+{
+	struct au_hdentry *p;
+	aufs_bindex_t bi;
+
+	AuRwMustWriteLock(&a->di_rwsem);
+	AuRwMustWriteLock(&b->di_rwsem);
+
+#define DiSwap(v, name)				\
+	do {					\
+		v = a->di_##name;		\
+		a->di_##name = b->di_##name;	\
+		b->di_##name = v;		\
+	} while (0)
+
+	DiSwap(p, hdentry);
+	DiSwap(bi, btop);
+	DiSwap(bi, bbot);
+	DiSwap(bi, bwh);
+	DiSwap(bi, bdiropq);
+	/* smp_mb(); */
+
+#undef DiSwap
+}
+
+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
+{
+	AuRwMustWriteLock(&dst->di_rwsem);
+	AuRwMustWriteLock(&src->di_rwsem);
+
+	dst->di_btop = src->di_btop;
+	dst->di_bbot = src->di_bbot;
+	dst->di_bwh = src->di_bwh;
+	dst->di_bdiropq = src->di_bdiropq;
+	/* smp_mb(); */
+}
+
+int au_di_init(struct dentry *dentry)
+{
+	int err;
+	struct super_block *sb;
+	struct au_dinfo *dinfo;
+
+	err = 0;
+	sb = dentry->d_sb;
+	dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
+	if (dinfo) {
+		atomic_set(&dinfo->di_generation, au_sigen(sb));
+		/* smp_mb(); */ /* atomic_set */
+		dentry->d_fsdata = dinfo;
+	} else
+		err = -ENOMEM;
+
+	return err;
+}
+
+void au_di_fin(struct dentry *dentry)
+{
+	struct au_dinfo *dinfo;
+
+	dinfo = au_di(dentry);
+	AuRwDestroy(&dinfo->di_rwsem);
+	au_di_free(dinfo);
+}
+
+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
+{
+	int err, sz;
+	struct au_hdentry *hdp;
+
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	err = -ENOMEM;
+	sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
+	if (!sz)
+		sz = sizeof(*hdp);
+	hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
+			   may_shrink);
+	if (hdp) {
+		dinfo->di_hdentry = hdp;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
+{
+	switch (lsc) {
+	case AuLsc_DI_CHILD:
+		ii_write_lock_child(inode);
+		break;
+	case AuLsc_DI_CHILD2:
+		ii_write_lock_child2(inode);
+		break;
+	case AuLsc_DI_CHILD3:
+		ii_write_lock_child3(inode);
+		break;
+	case AuLsc_DI_PARENT:
+		ii_write_lock_parent(inode);
+		break;
+	case AuLsc_DI_PARENT2:
+		ii_write_lock_parent2(inode);
+		break;
+	case AuLsc_DI_PARENT3:
+		ii_write_lock_parent3(inode);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
+{
+	switch (lsc) {
+	case AuLsc_DI_CHILD:
+		ii_read_lock_child(inode);
+		break;
+	case AuLsc_DI_CHILD2:
+		ii_read_lock_child2(inode);
+		break;
+	case AuLsc_DI_CHILD3:
+		ii_read_lock_child3(inode);
+		break;
+	case AuLsc_DI_PARENT:
+		ii_read_lock_parent(inode);
+		break;
+	case AuLsc_DI_PARENT2:
+		ii_read_lock_parent2(inode);
+		break;
+	case AuLsc_DI_PARENT3:
+		ii_read_lock_parent3(inode);
+		break;
+	default:
+		BUG();
+	}
+}
+
+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
+{
+	struct inode *inode;
+
+	au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
+	if (d_really_is_positive(d)) {
+		inode = d_inode(d);
+		if (au_ftest_lock(flags, IW))
+			do_ii_write_lock(inode, lsc);
+		else if (au_ftest_lock(flags, IR))
+			do_ii_read_lock(inode, lsc);
+	}
+}
+
+void di_read_unlock(struct dentry *d, int flags)
+{
+	struct inode *inode;
+
+	if (d_really_is_positive(d)) {
+		inode = d_inode(d);
+		if (au_ftest_lock(flags, IW)) {
+			au_dbg_verify_dinode(d);
+			ii_write_unlock(inode);
+		} else if (au_ftest_lock(flags, IR)) {
+			au_dbg_verify_dinode(d);
+			ii_read_unlock(inode);
+		}
+	}
+	au_rw_read_unlock(&au_di(d)->di_rwsem);
+}
+
+void di_downgrade_lock(struct dentry *d, int flags)
+{
+	if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
+		ii_downgrade_lock(d_inode(d));
+	au_rw_dgrade_lock(&au_di(d)->di_rwsem);
+}
+
+void di_write_lock(struct dentry *d, unsigned int lsc)
+{
+	au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
+	if (d_really_is_positive(d))
+		do_ii_write_lock(d_inode(d), lsc);
+}
+
+void di_write_unlock(struct dentry *d)
+{
+	au_dbg_verify_dinode(d);
+	if (d_really_is_positive(d))
+		ii_write_unlock(d_inode(d));
+	au_rw_write_unlock(&au_di(d)->di_rwsem);
+}
+
+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
+{
+	AuDebugOn(d1 == d2
+		  || d_inode(d1) == d_inode(d2)
+		  || d1->d_sb != d2->d_sb);
+
+	if ((isdir && au_test_subdir(d1, d2))
+	    || d1 < d2) {
+		di_write_lock_child(d1);
+		di_write_lock_child2(d2);
+	} else {
+		di_write_lock_child(d2);
+		di_write_lock_child2(d1);
+	}
+}
+
+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
+{
+	AuDebugOn(d1 == d2
+		  || d_inode(d1) == d_inode(d2)
+		  || d1->d_sb != d2->d_sb);
+
+	if ((isdir && au_test_subdir(d1, d2))
+	    || d1 < d2) {
+		di_write_lock_parent(d1);
+		di_write_lock_parent2(d2);
+	} else {
+		di_write_lock_parent(d2);
+		di_write_lock_parent2(d1);
+	}
+}
+
+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
+{
+	di_write_unlock(d1);
+	if (d_inode(d1) == d_inode(d2))
+		au_rw_write_unlock(&au_di(d2)->di_rwsem);
+	else
+		di_write_unlock(d2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	struct dentry *d;
+
+	DiMustAnyLock(dentry);
+
+	if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
+		return NULL;
+	AuDebugOn(bindex < 0);
+	d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
+	AuDebugOn(d && au_dcount(d) <= 0);
+	return d;
+}
+
+/*
+ * extended version of au_h_dptr().
+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
+ * error.
+ */
+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	struct dentry *h_dentry;
+	struct inode *inode, *h_inode;
+
+	AuDebugOn(d_really_is_negative(dentry));
+
+	h_dentry = NULL;
+	if (au_dbtop(dentry) <= bindex
+	    && bindex <= au_dbbot(dentry))
+		h_dentry = au_h_dptr(dentry, bindex);
+	if (h_dentry && !au_d_linkable(h_dentry)) {
+		dget(h_dentry);
+		goto out; /* success */
+	}
+
+	inode = d_inode(dentry);
+	AuDebugOn(bindex < au_ibtop(inode));
+	AuDebugOn(au_ibbot(inode) < bindex);
+	h_inode = au_h_iptr(inode, bindex);
+	h_dentry = d_find_alias(h_inode);
+	if (h_dentry) {
+		if (!IS_ERR(h_dentry)) {
+			if (!au_d_linkable(h_dentry))
+				goto out; /* success */
+			dput(h_dentry);
+		} else
+			goto out;
+	}
+
+	if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
+		h_dentry = au_plink_lkup(inode, bindex);
+		AuDebugOn(!h_dentry);
+		if (!IS_ERR(h_dentry)) {
+			if (!au_d_hashed_positive(h_dentry))
+				goto out; /* success */
+			dput(h_dentry);
+			h_dentry = NULL;
+		}
+	}
+
+out:
+	AuDbgDentry(h_dentry);
+	return h_dentry;
+}
+
+aufs_bindex_t au_dbtail(struct dentry *dentry)
+{
+	aufs_bindex_t bbot, bwh;
+
+	bbot = au_dbbot(dentry);
+	if (0 <= bbot) {
+		bwh = au_dbwh(dentry);
+		if (!bwh)
+			return bwh;
+		if (0 < bwh && bwh < bbot)
+			return bwh - 1;
+	}
+	return bbot;
+}
+
+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
+{
+	aufs_bindex_t bbot, bopq;
+
+	bbot = au_dbtail(dentry);
+	if (0 <= bbot) {
+		bopq = au_dbdiropq(dentry);
+		if (0 <= bopq && bopq < bbot)
+			bbot = bopq;
+	}
+	return bbot;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
+		   struct dentry *h_dentry)
+{
+	struct au_dinfo *dinfo;
+	struct au_hdentry *hd;
+	struct au_branch *br;
+
+	DiMustWriteLock(dentry);
+
+	dinfo = au_di(dentry);
+	hd = au_hdentry(dinfo, bindex);
+	au_hdput(hd);
+	hd->hd_dentry = h_dentry;
+	if (h_dentry) {
+		br = au_sbr(dentry->d_sb, bindex);
+		hd->hd_id = br->br_id;
+	}
+}
+
+int au_dbrange_test(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t btop, bbot;
+
+	err = 0;
+	btop = au_dbtop(dentry);
+	bbot = au_dbbot(dentry);
+	if (btop >= 0)
+		AuDebugOn(bbot < 0 && btop > bbot);
+	else {
+		err = -EIO;
+		AuDebugOn(bbot >= 0);
+	}
+
+	return err;
+}
+
+int au_digen_test(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(au_digen(dentry) != sigen
+		     || au_iigen_test(d_inode(dentry), sigen)))
+		err = -EIO;
+
+	return err;
+}
+
+void au_update_digen(struct dentry *dentry)
+{
+	atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
+	/* smp_mb(); */ /* atomic_set */
+}
+
+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
+{
+	struct au_dinfo *dinfo;
+	struct dentry *h_d;
+	struct au_hdentry *hdp;
+	aufs_bindex_t bindex, bbot;
+
+	DiMustWriteLock(dentry);
+
+	dinfo = au_di(dentry);
+	if (!dinfo || dinfo->di_btop < 0)
+		return;
+
+	if (do_put_zero) {
+		bbot = dinfo->di_bbot;
+		bindex = dinfo->di_btop;
+		hdp = au_hdentry(dinfo, bindex);
+		for (; bindex <= bbot; bindex++, hdp++) {
+			h_d = hdp->hd_dentry;
+			if (h_d && d_is_negative(h_d))
+				au_set_h_dptr(dentry, bindex, NULL);
+		}
+	}
+
+	dinfo->di_btop = 0;
+	hdp = au_hdentry(dinfo, dinfo->di_btop);
+	for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
+		if (hdp->hd_dentry)
+			break;
+	if (dinfo->di_btop > dinfo->di_bbot) {
+		dinfo->di_btop = -1;
+		dinfo->di_bbot = -1;
+		return;
+	}
+
+	hdp = au_hdentry(dinfo, dinfo->di_bbot);
+	for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
+		if (hdp->hd_dentry)
+			break;
+	AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
+}
+
+void au_update_dbtop(struct dentry *dentry)
+{
+	aufs_bindex_t bindex, bbot;
+	struct dentry *h_dentry;
+
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		if (d_is_positive(h_dentry)) {
+			au_set_dbtop(dentry, bindex);
+			return;
+		}
+		au_set_h_dptr(dentry, bindex, NULL);
+	}
+}
+
+void au_update_dbbot(struct dentry *dentry)
+{
+	aufs_bindex_t bindex, btop;
+	struct dentry *h_dentry;
+
+	btop = au_dbtop(dentry);
+	for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		if (d_is_positive(h_dentry)) {
+			au_set_dbbot(dentry, bindex);
+			return;
+		}
+		au_set_h_dptr(dentry, bindex, NULL);
+	}
+}
+
+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
+{
+	aufs_bindex_t bindex, bbot;
+
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
+		if (au_h_dptr(dentry, bindex) == h_dentry)
+			return bindex;
+	return -1;
+}
diff --git a/include/fs/aufs/dir.c b/include/fs/aufs/dir.c
new file mode 100644
index 000000000..cd9e4aaf6
--- /dev/null
+++ b/include/fs/aufs/dir.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * directory operations
+ */
+
+#include <linux/fs_stack.h>
+#include "aufs.h"
+
+void au_add_nlink(struct inode *dir, struct inode *h_dir)
+{
+	unsigned int nlink;
+
+	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
+
+	nlink = dir->i_nlink;
+	nlink += h_dir->i_nlink - 2;
+	if (h_dir->i_nlink < 2)
+		nlink += 2;
+	smp_mb(); /* for i_nlink */
+	/* 0 can happen in revaliding */
+	set_nlink(dir, nlink);
+}
+
+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
+{
+	unsigned int nlink;
+
+	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
+
+	nlink = dir->i_nlink;
+	nlink -= h_dir->i_nlink - 2;
+	if (h_dir->i_nlink < 2)
+		nlink -= 2;
+	smp_mb(); /* for i_nlink */
+	/* nlink == 0 means the branch-fs is broken */
+	set_nlink(dir, nlink);
+}
+
+loff_t au_dir_size(struct file *file, struct dentry *dentry)
+{
+	loff_t sz;
+	aufs_bindex_t bindex, bbot;
+	struct file *h_file;
+	struct dentry *h_dentry;
+
+	sz = 0;
+	if (file) {
+		AuDebugOn(!d_is_dir(file->f_path.dentry));
+
+		bbot = au_fbbot_dir(file);
+		for (bindex = au_fbtop(file);
+		     bindex <= bbot && sz < KMALLOC_MAX_SIZE;
+		     bindex++) {
+			h_file = au_hf_dir(file, bindex);
+			if (h_file && file_inode(h_file))
+				sz += vfsub_f_size_read(h_file);
+		}
+	} else {
+		AuDebugOn(!dentry);
+		AuDebugOn(!d_is_dir(dentry));
+
+		bbot = au_dbtaildir(dentry);
+		for (bindex = au_dbtop(dentry);
+		     bindex <= bbot && sz < KMALLOC_MAX_SIZE;
+		     bindex++) {
+			h_dentry = au_h_dptr(dentry, bindex);
+			if (h_dentry && d_is_positive(h_dentry))
+				sz += i_size_read(d_inode(h_dentry));
+		}
+	}
+	if (sz < KMALLOC_MAX_SIZE)
+		sz = roundup_pow_of_two(sz);
+	if (sz > KMALLOC_MAX_SIZE)
+		sz = KMALLOC_MAX_SIZE;
+	else if (sz < NAME_MAX) {
+		BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
+		sz = AUFS_RDBLK_DEF;
+	}
+	return sz;
+}
+
+struct au_dir_ts_arg {
+	struct dentry *dentry;
+	aufs_bindex_t brid;
+};
+
+static void au_do_dir_ts(void *arg)
+{
+	struct au_dir_ts_arg *a = arg;
+	struct au_dtime dt;
+	struct path h_path;
+	struct inode *dir, *h_dir;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_hinode *hdir;
+	int err;
+	aufs_bindex_t btop, bindex;
+
+	sb = a->dentry->d_sb;
+	if (d_really_is_negative(a->dentry))
+		goto out;
+	/* no dir->i_mutex lock */
+	aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
+
+	dir = d_inode(a->dentry);
+	btop = au_ibtop(dir);
+	bindex = au_br_index(sb, a->brid);
+	if (bindex < btop)
+		goto out_unlock;
+
+	br = au_sbr(sb, bindex);
+	h_path.dentry = au_h_dptr(a->dentry, bindex);
+	if (!h_path.dentry)
+		goto out_unlock;
+	h_path.mnt = au_br_mnt(br);
+	au_dtime_store(&dt, a->dentry, &h_path);
+
+	br = au_sbr(sb, btop);
+	if (!au_br_writable(br->br_perm))
+		goto out_unlock;
+	h_path.dentry = au_h_dptr(a->dentry, btop);
+	h_path.mnt = au_br_mnt(br);
+	err = vfsub_mnt_want_write(h_path.mnt);
+	if (err)
+		goto out_unlock;
+	hdir = au_hi(dir, btop);
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	h_dir = au_h_iptr(dir, btop);
+	if (h_dir->i_nlink
+	    && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
+		dt.dt_h_path = h_path;
+		au_dtime_revert(&dt);
+	}
+	au_hn_inode_unlock(hdir);
+	vfsub_mnt_drop_write(h_path.mnt);
+	au_cpup_attr_timesizes(dir);
+
+out_unlock:
+	aufs_read_unlock(a->dentry, AuLock_DW);
+out:
+	dput(a->dentry);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	kfree(arg);
+}
+
+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
+{
+	int perm, wkq_err;
+	aufs_bindex_t btop;
+	struct au_dir_ts_arg *arg;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	IMustLock(dir);
+
+	dentry = d_find_any_alias(dir);
+	AuDebugOn(!dentry);
+	sb = dentry->d_sb;
+	btop = au_ibtop(dir);
+	if (btop == bindex) {
+		au_cpup_attr_timesizes(dir);
+		goto out;
+	}
+
+	perm = au_sbr_perm(sb, btop);
+	if (!au_br_writable(perm))
+		goto out;
+
+	arg = kmalloc(sizeof(*arg), GFP_NOFS);
+	if (!arg)
+		goto out;
+
+	arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
+	arg->brid = au_sbr_id(sb, bindex);
+	wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
+	if (unlikely(wkq_err)) {
+		pr_err("wkq %d\n", wkq_err);
+		dput(dentry);
+		kfree(arg);
+	}
+
+out:
+	dput(dentry);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int reopen_dir(struct file *file)
+{
+	int err;
+	unsigned int flags;
+	aufs_bindex_t bindex, btail, btop;
+	struct dentry *dentry, *h_dentry;
+	struct file *h_file;
+
+	/* open all lower dirs */
+	dentry = file->f_path.dentry;
+	btop = au_dbtop(dentry);
+	for (bindex = au_fbtop(file); bindex < btop; bindex++)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbtop(file, btop);
+
+	btail = au_dbtaildir(dentry);
+	for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbbot_dir(file, btail);
+
+	flags = vfsub_file_flags(file);
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+		h_file = au_hf_dir(file, bindex);
+		if (h_file)
+			continue;
+
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+		err = PTR_ERR(h_file);
+		if (IS_ERR(h_file))
+			goto out; /* close all? */
+		au_set_h_fptr(file, bindex, h_file);
+	}
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	err = 0;
+
+out:
+	return err;
+}
+
+static int do_open_dir(struct file *file, int flags, struct file *h_file)
+{
+	int err;
+	aufs_bindex_t bindex, btail;
+	struct dentry *dentry, *h_dentry;
+	struct vfsmount *mnt;
+
+	FiMustWriteLock(file);
+	AuDebugOn(h_file);
+
+	err = 0;
+	mnt = file->f_path.mnt;
+	dentry = file->f_path.dentry;
+	file->f_version = d_inode(dentry)->i_version;
+	bindex = au_dbtop(dentry);
+	au_set_fbtop(file, bindex);
+	btail = au_dbtaildir(dentry);
+	au_set_fbbot_dir(file, btail);
+	for (; !err && bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (!h_dentry)
+			continue;
+
+		err = vfsub_test_mntns(mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			break;
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+		if (IS_ERR(h_file)) {
+			err = PTR_ERR(h_file);
+			break;
+		}
+		au_set_h_fptr(file, bindex, h_file);
+	}
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	if (!err)
+		return 0; /* success */
+
+	/* close all */
+	for (bindex = au_fbtop(file); bindex <= btail; bindex++)
+		au_set_h_fptr(file, bindex, NULL);
+	au_set_fbtop(file, -1);
+	au_set_fbbot_dir(file, -1);
+
+	return err;
+}
+
+static int aufs_open_dir(struct inode *inode __maybe_unused,
+			 struct file *file)
+{
+	int err;
+	struct super_block *sb;
+	struct au_fidir *fidir;
+
+	err = -ENOMEM;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	fidir = au_fidir_alloc(sb);
+	if (fidir) {
+		struct au_do_open_args args = {
+			.open	= do_open_dir,
+			.fidir	= fidir
+		};
+		err = au_do_open(file, &args);
+		if (unlikely(err))
+			kfree(fidir);
+	}
+	si_read_unlock(sb);
+	return err;
+}
+
+static int aufs_release_dir(struct inode *inode __maybe_unused,
+			    struct file *file)
+{
+	struct au_vdir *vdir_cache;
+	struct au_finfo *finfo;
+	struct au_fidir *fidir;
+	struct au_hfile *hf;
+	aufs_bindex_t bindex, bbot;
+
+	finfo = au_fi(file);
+	fidir = finfo->fi_hdir;
+	if (fidir) {
+		au_hbl_del(&finfo->fi_hlist,
+			   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+		vdir_cache = fidir->fd_vdir_cache; /* lock-free */
+		if (vdir_cache)
+			au_vdir_free(vdir_cache);
+
+		bindex = finfo->fi_btop;
+		if (bindex >= 0) {
+			hf = fidir->fd_hfile + bindex;
+			/*
+			 * calls fput() instead of filp_close(),
+			 * since no dnotify or lock for the lower file.
+			 */
+			bbot = fidir->fd_bbot;
+			for (; bindex <= bbot; bindex++, hf++)
+				if (hf->hf_file)
+					au_hfput(hf, /*execed*/0);
+		}
+		kfree(fidir);
+		finfo->fi_hdir = NULL;
+	}
+	au_finfo_fin(file);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_flush_dir(struct file *file, fl_owner_t id)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct file *h_file;
+
+	err = 0;
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
+		h_file = au_hf_dir(file, bindex);
+		if (h_file)
+			err = vfsub_flush(h_file, id);
+	}
+	return err;
+}
+
+static int aufs_flush_dir(struct file *file, fl_owner_t id)
+{
+	return au_do_flush(file, id, au_do_flush_dir);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct inode *inode;
+	struct super_block *sb;
+
+	err = 0;
+	sb = dentry->d_sb;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
+		struct path h_path;
+
+		if (au_test_ro(sb, bindex, inode))
+			continue;
+		h_path.dentry = au_h_dptr(dentry, bindex);
+		if (!h_path.dentry)
+			continue;
+
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		err = vfsub_fsync(NULL, &h_path, datasync);
+	}
+
+	return err;
+}
+
+static int au_do_fsync_dir(struct file *file, int datasync)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct file *h_file;
+	struct super_block *sb;
+	struct inode *inode;
+
+	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
+	if (unlikely(err))
+		goto out;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	bbot = au_fbbot_dir(file);
+	for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
+		h_file = au_hf_dir(file, bindex);
+		if (!h_file || au_test_ro(sb, bindex, inode))
+			continue;
+
+		err = vfsub_fsync(h_file, &h_file->f_path, datasync);
+	}
+
+out:
+	return err;
+}
+
+/*
+ * @file may be NULL
+ */
+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
+			  int datasync)
+{
+	int err;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct super_block *sb;
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	inode_lock(inode);
+	sb = dentry->d_sb;
+	si_noflush_read_lock(sb);
+	if (file)
+		err = au_do_fsync_dir(file, datasync);
+	else {
+		di_write_lock_child(dentry);
+		err = au_do_fsync_dir_no_file(dentry, datasync);
+	}
+	au_cpup_attr_timesizes(inode);
+	di_write_unlock(dentry);
+	if (file)
+		fi_write_unlock(file);
+
+	si_read_unlock(sb);
+	inode_unlock(inode);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct dentry *dentry;
+	struct inode *inode, *h_inode;
+	struct super_block *sb;
+
+	AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
+
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+
+	sb = dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
+	if (unlikely(err))
+		goto out;
+	err = au_alive_dir(dentry);
+	if (!err)
+		err = au_vdir_init(file);
+	di_downgrade_lock(dentry, AuLock_IR);
+	if (unlikely(err))
+		goto out_unlock;
+
+	h_inode = au_h_iptr(inode, au_ibtop(inode));
+	if (!au_test_nfsd()) {
+		err = au_vdir_fill_de(file, ctx);
+		fsstack_copy_attr_atime(inode, h_inode);
+	} else {
+		/*
+		 * nfsd filldir may call lookup_one_len(), vfs_getattr(),
+		 * encode_fh() and others.
+		 */
+		atomic_inc(&h_inode->i_count);
+		di_read_unlock(dentry, AuLock_IR);
+		si_read_unlock(sb);
+		err = au_vdir_fill_de(file, ctx);
+		fsstack_copy_attr_atime(inode, h_inode);
+		fi_write_unlock(file);
+		iput(h_inode);
+
+		AuTraceErr(err);
+		return err;
+	}
+
+out_unlock:
+	di_read_unlock(dentry, AuLock_IR);
+	fi_write_unlock(file);
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define AuTestEmpty_WHONLY	1
+#define AuTestEmpty_CALLED	(1 << 1)
+#define AuTestEmpty_SHWH	(1 << 2)
+#define au_ftest_testempty(flags, name)	((flags) & AuTestEmpty_##name)
+#define au_fset_testempty(flags, name) \
+	do { (flags) |= AuTestEmpty_##name; } while (0)
+#define au_fclr_testempty(flags, name) \
+	do { (flags) &= ~AuTestEmpty_##name; } while (0)
+
+#ifndef CONFIG_AUFS_SHWH
+#undef AuTestEmpty_SHWH
+#define AuTestEmpty_SHWH	0
+#endif
+
+struct test_empty_arg {
+	struct dir_context ctx;
+	struct au_nhash *whlist;
+	unsigned int flags;
+	int err;
+	aufs_bindex_t bindex;
+};
+
+static int test_empty_cb(struct dir_context *ctx, const char *__name,
+			 int namelen, loff_t offset __maybe_unused, u64 ino,
+			 unsigned int d_type)
+{
+	struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
+						  ctx);
+	char *name = (void *)__name;
+
+	arg->err = 0;
+	au_fset_testempty(arg->flags, CALLED);
+	/* smp_mb(); */
+	if (name[0] == '.'
+	    && (namelen == 1 || (name[1] == '.' && namelen == 2)))
+		goto out; /* success */
+
+	if (namelen <= AUFS_WH_PFX_LEN
+	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+		if (au_ftest_testempty(arg->flags, WHONLY)
+		    && !au_nhash_test_known_wh(arg->whlist, name, namelen))
+			arg->err = -ENOTEMPTY;
+		goto out;
+	}
+
+	name += AUFS_WH_PFX_LEN;
+	namelen -= AUFS_WH_PFX_LEN;
+	if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
+		arg->err = au_nhash_append_wh
+			(arg->whlist, name, namelen, ino, d_type, arg->bindex,
+			 au_ftest_testempty(arg->flags, SHWH));
+
+out:
+	/* smp_mb(); */
+	AuTraceErr(arg->err);
+	return arg->err;
+}
+
+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
+{
+	int err;
+	struct file *h_file;
+
+	h_file = au_h_open(dentry, arg->bindex,
+			   O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
+			   /*file*/NULL, /*force_wr*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = 0;
+	if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
+	    && !file_inode(h_file)->i_nlink)
+		goto out_put;
+
+	do {
+		arg->err = 0;
+		au_fclr_testempty(arg->flags, CALLED);
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(h_file, &arg->ctx);
+		if (err >= 0)
+			err = arg->err;
+	} while (!err && au_ftest_testempty(arg->flags, CALLED));
+
+out_put:
+	fput(h_file);
+	au_sbr_put(dentry->d_sb, arg->bindex);
+out:
+	return err;
+}
+
+struct do_test_empty_args {
+	int *errp;
+	struct dentry *dentry;
+	struct test_empty_arg *arg;
+};
+
+static void call_do_test_empty(void *args)
+{
+	struct do_test_empty_args *a = args;
+	*a->errp = do_test_empty(a->dentry, a->arg);
+}
+
+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
+{
+	int err, wkq_err;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, arg->bindex);
+	h_inode = d_inode(h_dentry);
+	/* todo: i_mode changes anytime? */
+	vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
+	err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
+	inode_unlock_shared(h_inode);
+	if (!err)
+		err = do_test_empty(dentry, arg);
+	else {
+		struct do_test_empty_args args = {
+			.errp	= &err,
+			.dentry	= dentry,
+			.arg	= arg
+		};
+		unsigned int flags = arg->flags;
+
+		wkq_err = au_wkq_wait(call_do_test_empty, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+		arg->flags = flags;
+	}
+
+	return err;
+}
+
+int au_test_empty_lower(struct dentry *dentry)
+{
+	int err;
+	unsigned int rdhash;
+	aufs_bindex_t bindex, btop, btail;
+	struct au_nhash whlist;
+	struct test_empty_arg arg = {
+		.ctx = {
+			.actor = test_empty_cb
+		}
+	};
+	int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
+
+	SiMustAnyLock(dentry->d_sb);
+
+	rdhash = au_sbi(dentry->d_sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
+	err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+
+	arg.flags = 0;
+	arg.whlist = &whlist;
+	btop = au_dbtop(dentry);
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
+		au_fset_testempty(arg.flags, SHWH);
+	test_empty = do_test_empty;
+	if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
+		test_empty = sio_test_empty;
+	arg.bindex = btop;
+	err = test_empty(dentry, &arg);
+	if (unlikely(err))
+		goto out_whlist;
+
+	au_fset_testempty(arg.flags, WHONLY);
+	btail = au_dbtaildir(dentry);
+	for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
+		struct dentry *h_dentry;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry && d_is_positive(h_dentry)) {
+			arg.bindex = bindex;
+			err = test_empty(dentry, &arg);
+		}
+	}
+
+out_whlist:
+	au_nhash_wh_free(&whlist);
+out:
+	return err;
+}
+
+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
+{
+	int err;
+	struct test_empty_arg arg = {
+		.ctx = {
+			.actor = test_empty_cb
+		}
+	};
+	aufs_bindex_t bindex, btail;
+
+	err = 0;
+	arg.whlist = whlist;
+	arg.flags = AuTestEmpty_WHONLY;
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
+		au_fset_testempty(arg.flags, SHWH);
+	btail = au_dbtaildir(dentry);
+	for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
+		struct dentry *h_dentry;
+
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry && d_is_positive(h_dentry)) {
+			arg.bindex = bindex;
+			err = sio_test_empty(dentry, &arg);
+		}
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+const struct file_operations aufs_dir_fop = {
+	.owner		= THIS_MODULE,
+	.llseek		= default_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= aufs_iterate_shared,
+	.unlocked_ioctl	= aufs_ioctl_dir,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= aufs_compat_ioctl_dir,
+#endif
+	.open		= aufs_open_dir,
+	.release	= aufs_release_dir,
+	.flush		= aufs_flush_dir,
+	.fsync		= aufs_fsync_dir
+};
diff --git a/include/fs/aufs/dir.h b/include/fs/aufs/dir.h
new file mode 100644
index 000000000..ece11af43
--- /dev/null
+++ b/include/fs/aufs/dir.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * directory operations
+ */
+
+#ifndef __AUFS_DIR_H__
+#define __AUFS_DIR_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+
+/* ---------------------------------------------------------------------- */
+
+/* need to be faster and smaller */
+
+struct au_nhash {
+	unsigned int		nh_num;
+	struct hlist_head	*nh_head;
+};
+
+struct au_vdir_destr {
+	unsigned char	len;
+	unsigned char	name[0];
+} __packed;
+
+struct au_vdir_dehstr {
+	struct hlist_node	hash;
+	struct au_vdir_destr	*str;
+} ____cacheline_aligned_in_smp;
+
+struct au_vdir_de {
+	ino_t			de_ino;
+	unsigned char		de_type;
+	/* caution: packed */
+	struct au_vdir_destr	de_str;
+} __packed;
+
+struct au_vdir_wh {
+	struct hlist_node	wh_hash;
+#ifdef CONFIG_AUFS_SHWH
+	ino_t			wh_ino;
+	aufs_bindex_t		wh_bindex;
+	unsigned char		wh_type;
+#else
+	aufs_bindex_t		wh_bindex;
+#endif
+	/* caution: packed */
+	struct au_vdir_destr	wh_str;
+} __packed;
+
+union au_vdir_deblk_p {
+	unsigned char		*deblk;
+	struct au_vdir_de	*de;
+};
+
+struct au_vdir {
+	unsigned char	**vd_deblk;
+	unsigned long	vd_nblk;
+	struct {
+		unsigned long		ul;
+		union au_vdir_deblk_p	p;
+	} vd_last;
+
+	unsigned long	vd_version;
+	unsigned int	vd_deblk_sz;
+	unsigned long		vd_jiffy;
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* dir.c */
+extern const struct file_operations aufs_dir_fop;
+void au_add_nlink(struct inode *dir, struct inode *h_dir);
+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
+loff_t au_dir_size(struct file *file, struct dentry *dentry);
+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
+int au_test_empty_lower(struct dentry *dentry);
+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
+
+/* vdir.c */
+unsigned int au_rdhash_est(loff_t sz);
+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
+void au_nhash_wh_free(struct au_nhash *whlist);
+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
+			    int limit);
+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
+		       unsigned int d_type, aufs_bindex_t bindex,
+		       unsigned char shwh);
+void au_vdir_free(struct au_vdir *vdir);
+int au_vdir_init(struct file *file);
+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
+
+/* ioctl.c */
+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
+
+#ifdef CONFIG_AUFS_RDU
+/* rdu.c */
+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
+			 unsigned long arg);
+#endif
+#else
+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
+       unsigned int cmd, unsigned long arg)
+#ifdef CONFIG_COMPAT
+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
+       unsigned int cmd, unsigned long arg)
+#endif
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DIR_H__ */
diff --git a/include/fs/aufs/dirren.c b/include/fs/aufs/dirren.c
new file mode 100644
index 000000000..14a3621dc
--- /dev/null
+++ b/include/fs/aufs/dirren.c
@@ -0,0 +1,1315 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * special handling in renaming a directoy
+ * in order to support looking-up the before-renamed name on the lower readonly
+ * branches
+ */
+
+#include <linux/byteorder/generic.h>
+#include "aufs.h"
+
+static void au_dr_hino_del(struct au_dr_br *dr, struct au_dr_hino *ent)
+{
+	int idx;
+
+	idx = au_dr_ihash(ent->dr_h_ino);
+	au_hbl_del(&ent->dr_hnode, dr->dr_h_ino + idx);
+}
+
+static int au_dr_hino_test_empty(struct au_dr_br *dr)
+{
+	int ret, i;
+	struct hlist_bl_head *hbl;
+
+	ret = 1;
+	for (i = 0; ret && i < AuDirren_NHASH; i++) {
+		hbl = dr->dr_h_ino + i;
+		hlist_bl_lock(hbl);
+		ret &= hlist_bl_empty(hbl);
+		hlist_bl_unlock(hbl);
+	}
+
+	return ret;
+}
+
+static struct au_dr_hino *au_dr_hino_find(struct au_dr_br *dr, ino_t ino)
+{
+	struct au_dr_hino *found, *ent;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	int idx;
+
+	found = NULL;
+	idx = au_dr_ihash(ino);
+	hbl = dr->dr_h_ino + idx;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
+		if (ent->dr_h_ino == ino) {
+			found = ent;
+			break;
+		}
+	hlist_bl_unlock(hbl);
+
+	return found;
+}
+
+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t ino,
+			struct au_dr_hino *add_ent)
+{
+	int found, idx;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_dr_hino *ent;
+
+	found = 0;
+	idx = au_dr_ihash(ino);
+	hbl = dr->dr_h_ino + idx;
+#if 0
+	{
+		struct hlist_bl_node *tmp;
+
+		hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
+			AuDbg("hi%llu\n", (unsigned long long)ent->dr_h_ino);
+	}
+#endif
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
+		if (ent->dr_h_ino == ino) {
+			found = 1;
+			break;
+		}
+	if (!found && add_ent)
+		hlist_bl_add_head(&add_ent->dr_hnode, hbl);
+	hlist_bl_unlock(hbl);
+
+	if (!found && add_ent)
+		AuDbg("i%llu added\n", (unsigned long long)add_ent->dr_h_ino);
+
+	return found;
+}
+
+void au_dr_hino_free(struct au_dr_br *dr)
+{
+	int i;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_dr_hino *ent;
+
+	/* SiMustWriteLock(sb); */
+
+	for (i = 0; i < AuDirren_NHASH; i++) {
+		hbl = dr->dr_h_ino + i;
+		/* no spinlock since sbinfo must be write-locked */
+		hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
+			kfree(ent);
+		INIT_HLIST_BL_HEAD(hbl);
+	}
+}
+
+/* returns the number of inodes or an error */
+static int au_dr_hino_store(struct super_block *sb, struct au_branch *br,
+			    struct file *hinofile)
+{
+	int err, i;
+	ssize_t ssz;
+	loff_t pos, oldsize;
+	__be64 u64;
+	struct inode *hinoinode;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *n1, *n2;
+	struct au_dr_hino *ent;
+
+	SiMustWriteLock(sb);
+	AuDebugOn(!au_br_writable(br->br_perm));
+
+	hinoinode = file_inode(hinofile);
+	oldsize = i_size_read(hinoinode);
+
+	err = 0;
+	pos = 0;
+	hbl = br->br_dirren.dr_h_ino;
+	for (i = 0; !err && i < AuDirren_NHASH; i++, hbl++) {
+		/* no bit-lock since sbinfo must be write-locked */
+		hlist_bl_for_each_entry_safe(ent, n1, n2, hbl, dr_hnode) {
+			AuDbg("hi%llu, %pD2\n",
+			      (unsigned long long)ent->dr_h_ino, hinofile);
+			u64 = cpu_to_be64(ent->dr_h_ino);
+			ssz = vfsub_write_k(hinofile, &u64, sizeof(u64), &pos);
+			if (ssz == sizeof(u64))
+				continue;
+
+			/* write error */
+			pr_err("ssz %zd, %pD2\n", ssz, hinofile);
+			err = -ENOSPC;
+			if (ssz < 0)
+				err = ssz;
+			break;
+		}
+	}
+	/* regardless the error */
+	if (pos < oldsize) {
+		err = vfsub_trunc(&hinofile->f_path, pos, /*attr*/0, hinofile);
+		AuTraceErr(err);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_dr_hino_load(struct au_dr_br *dr, struct file *hinofile)
+{
+	int err, hidx;
+	ssize_t ssz;
+	size_t sz, n;
+	loff_t pos;
+	uint64_t u64;
+	struct au_dr_hino *ent;
+	struct inode *hinoinode;
+	struct hlist_bl_head *hbl;
+
+	err = 0;
+	pos = 0;
+	hbl = dr->dr_h_ino;
+	hinoinode = file_inode(hinofile);
+	sz = i_size_read(hinoinode);
+	AuDebugOn(sz % sizeof(u64));
+	n = sz / sizeof(u64);
+	while (n--) {
+		ssz = vfsub_read_k(hinofile, &u64, sizeof(u64), &pos);
+		if (unlikely(ssz != sizeof(u64))) {
+			pr_err("ssz %zd, %pD2\n", ssz, hinofile);
+			err = -EINVAL;
+			if (ssz < 0)
+				err = ssz;
+			goto out_free;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_NOFS);
+		if (!ent) {
+			err = -ENOMEM;
+			AuTraceErr(err);
+			goto out_free;
+		}
+		ent->dr_h_ino = be64_to_cpu((__force __be64)u64);
+		AuDbg("hi%llu, %pD2\n",
+		      (unsigned long long)ent->dr_h_ino, hinofile);
+		hidx = au_dr_ihash(ent->dr_h_ino);
+		au_hbl_add(&ent->dr_hnode, hbl + hidx);
+	}
+	goto out; /* success */
+
+out_free:
+	au_dr_hino_free(dr);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * @bindex/@br is a switch to distinguish whether suspending hnotify or not.
+ * @path is a switch to distinguish load and store.
+ */
+static int au_dr_hino(struct super_block *sb, aufs_bindex_t bindex,
+		      struct au_branch *br, const struct path *path)
+{
+	int err, flags;
+	unsigned char load, suspend;
+	struct file *hinofile;
+	struct au_hinode *hdir;
+	struct inode *dir, *delegated;
+	struct path hinopath;
+	struct qstr hinoname = QSTR_INIT(AUFS_WH_DR_BRHINO,
+					 sizeof(AUFS_WH_DR_BRHINO) - 1);
+
+	AuDebugOn(bindex < 0 && !br);
+	AuDebugOn(bindex >= 0 && br);
+
+	err = -EINVAL;
+	suspend = !br;
+	if (suspend)
+		br = au_sbr(sb, bindex);
+	load = !!path;
+	if (!load) {
+		path = &br->br_path;
+		AuDebugOn(!au_br_writable(br->br_perm));
+		if (unlikely(!au_br_writable(br->br_perm)))
+			goto out;
+	}
+
+	hdir = NULL;
+	if (suspend) {
+		dir = d_inode(sb->s_root);
+		hdir = au_hinode(au_ii(dir), bindex);
+		dir = hdir->hi_inode;
+		au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
+	} else {
+		dir = d_inode(path->dentry);
+		inode_lock_nested(dir, AuLsc_I_CHILD);
+	}
+	hinopath.dentry = vfsub_lkup_one(&hinoname, path->dentry);
+	err = PTR_ERR(hinopath.dentry);
+	if (IS_ERR(hinopath.dentry))
+		goto out_unlock;
+
+	err = 0;
+	flags = O_RDONLY;
+	if (load) {
+		if (d_is_negative(hinopath.dentry))
+			goto out_dput; /* success */
+	} else {
+		if (au_dr_hino_test_empty(&br->br_dirren)) {
+			if (d_is_positive(hinopath.dentry)) {
+				delegated = NULL;
+				err = vfsub_unlink(dir, &hinopath, &delegated,
+						   /*force*/0);
+				AuTraceErr(err);
+				if (unlikely(err))
+					pr_err("ignored err %d, %pd2\n",
+					       err, hinopath.dentry);
+				if (unlikely(err == -EWOULDBLOCK))
+					iput(delegated);
+				err = 0;
+			}
+			goto out_dput;
+		} else if (!d_is_positive(hinopath.dentry)) {
+			err = vfsub_create(dir, &hinopath, 0600,
+					   /*want_excl*/false);
+			AuTraceErr(err);
+			if (unlikely(err))
+				goto out_dput;
+		}
+		flags = O_WRONLY;
+	}
+	hinopath.mnt = path->mnt;
+	hinofile = vfsub_dentry_open(&hinopath, flags);
+	if (suspend)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(dir);
+	dput(hinopath.dentry);
+	AuTraceErrPtr(hinofile);
+	if (IS_ERR(hinofile)) {
+		err = PTR_ERR(hinofile);
+		goto out;
+	}
+
+	if (load)
+		err = au_dr_hino_load(&br->br_dirren, hinofile);
+	else
+		err = au_dr_hino_store(sb, br, hinofile);
+	fput(hinofile);
+	goto out;
+
+out_dput:
+	dput(hinopath.dentry);
+out_unlock:
+	if (suspend)
+		au_hn_inode_unlock(hdir);
+	else
+		inode_unlock(dir);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_dr_brid_init(struct au_dr_brid *brid, const struct path *path)
+{
+	int err;
+	struct kstatfs kstfs;
+	dev_t dev;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	err = vfs_statfs((void *)path, &kstfs);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	/* todo: support for UUID */
+
+	if (kstfs.f_fsid.val[0] || kstfs.f_fsid.val[1]) {
+		brid->type = AuBrid_FSID;
+		brid->fsid = kstfs.f_fsid;
+	} else {
+		dentry = path->dentry;
+		sb = dentry->d_sb;
+		dev = sb->s_dev;
+		if (dev) {
+			brid->type = AuBrid_DEV;
+			brid->dev = dev;
+		}
+	}
+
+out:
+	return err;
+}
+
+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
+		  const struct path *path)
+{
+	int err, i;
+	struct au_dr_br *dr;
+	struct hlist_bl_head *hbl;
+
+	dr = &br->br_dirren;
+	hbl = dr->dr_h_ino;
+	for (i = 0; i < AuDirren_NHASH; i++, hbl++)
+		INIT_HLIST_BL_HEAD(hbl);
+
+	err = au_dr_brid_init(&dr->dr_brid, path);
+	if (unlikely(err))
+		goto out;
+
+	if (au_opt_test(au_mntflags(sb), DIRREN))
+		err = au_dr_hino(sb, /*bindex*/-1, br, path);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_dr_br_fin(struct super_block *sb, struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	if (au_br_writable(br->br_perm))
+		err = au_dr_hino(sb, /*bindex*/-1, br, /*path*/NULL);
+	if (!err)
+		au_dr_hino_free(&br->br_dirren);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_brid_str(struct au_dr_brid *brid, struct inode *h_inode,
+		       char *buf, size_t sz)
+{
+	int err;
+	unsigned int major, minor;
+	char *p;
+
+	p = buf;
+	err = snprintf(p, sz, "%d_", brid->type);
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+	switch (brid->type) {
+	case AuBrid_Unset:
+		return -EINVAL;
+	case AuBrid_UUID:
+		err = snprintf(p, sz, "%pU", brid->uuid.b);
+		break;
+	case AuBrid_FSID:
+		err = snprintf(p, sz, "%08x-%08x",
+			       brid->fsid.val[0], brid->fsid.val[1]);
+		break;
+	case AuBrid_DEV:
+		major = MAJOR(brid->dev);
+		minor = MINOR(brid->dev);
+		if (major <= 0xff && minor <= 0xff)
+			err = snprintf(p, sz, "%02x%02x", major, minor);
+		else
+			err = snprintf(p, sz, "%03x:%05x", major, minor);
+		break;
+	}
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+	err = snprintf(p, sz, "_%llu", (unsigned long long)h_inode->i_ino);
+	AuDebugOn(err > sz);
+	p += err;
+	sz -= err;
+
+	return p - buf;
+}
+
+static int au_drinfo_name(struct au_branch *br, char *name, int len)
+{
+	int rlen;
+	struct dentry *br_dentry;
+	struct inode *br_inode;
+
+	br_dentry = au_br_dentry(br);
+	br_inode = d_inode(br_dentry);
+	rlen = au_brid_str(&br->br_dirren.dr_brid, br_inode, name, len);
+	AuDebugOn(rlen >= AUFS_DIRREN_ENV_VAL_SZ);
+	AuDebugOn(rlen > len);
+
+	return rlen;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * from the given @h_dentry, construct drinfo at @*fdata.
+ * when the size of @*fdata is not enough, reallocate and return new @fdata and
+ * @allocated.
+ */
+static int au_drinfo_construct(struct au_drinfo_fdata **fdata,
+			       struct dentry *h_dentry,
+			       unsigned char *allocated)
+{
+	int err, v;
+	struct au_drinfo_fdata *f, *p;
+	struct au_drinfo *drinfo;
+	struct inode *h_inode;
+	struct qstr *qname;
+
+	err = 0;
+	f = *fdata;
+	h_inode = d_inode(h_dentry);
+	qname = &h_dentry->d_name;
+	drinfo = &f->drinfo;
+	drinfo->ino = (__force uint64_t)cpu_to_be64(h_inode->i_ino);
+	drinfo->oldnamelen = qname->len;
+	if (*allocated < sizeof(*f) + qname->len) {
+		v = roundup_pow_of_two(*allocated + qname->len);
+		p = au_krealloc(f, v, GFP_NOFS, /*may_shrink*/0);
+		if (unlikely(!p)) {
+			err = -ENOMEM;
+			AuTraceErr(err);
+			goto out;
+		}
+		f = p;
+		*fdata = f;
+		*allocated = v;
+		drinfo = &f->drinfo;
+	}
+	memcpy(drinfo->oldname, qname->name, qname->len);
+	AuDbg("i%llu, %.*s\n",
+	      be64_to_cpu((__force __be64)drinfo->ino), drinfo->oldnamelen,
+	      drinfo->oldname);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* callers have to free the return value */
+static struct au_drinfo *au_drinfo_read_k(struct file *file, ino_t h_ino)
+{
+	struct au_drinfo *ret, *drinfo;
+	struct au_drinfo_fdata fdata;
+	int len;
+	loff_t pos;
+	ssize_t ssz;
+
+	ret = ERR_PTR(-EIO);
+	pos = 0;
+	ssz = vfsub_read_k(file, &fdata, sizeof(fdata), &pos);
+	if (unlikely(ssz != sizeof(fdata))) {
+		AuIOErr("ssz %zd, %u, %pD2\n",
+			ssz, (unsigned int)sizeof(fdata), file);
+		goto out;
+	}
+
+	fdata.magic = ntohl((__force __be32)fdata.magic);
+	switch (fdata.magic) {
+	case AUFS_DRINFO_MAGIC_V1:
+		break;
+	default:
+		AuIOErr("magic-num 0x%x, 0x%x, %pD2\n",
+			fdata.magic, AUFS_DRINFO_MAGIC_V1, file);
+		goto out;
+	}
+
+	drinfo = &fdata.drinfo;
+	len = drinfo->oldnamelen;
+	if (!len) {
+		AuIOErr("broken drinfo %pD2\n", file);
+		goto out;
+	}
+
+	ret = NULL;
+	drinfo->ino = be64_to_cpu((__force __be64)drinfo->ino);
+	if (unlikely(h_ino && drinfo->ino != h_ino)) {
+		AuDbg("ignored i%llu, i%llu, %pD2\n",
+		      (unsigned long long)drinfo->ino,
+		      (unsigned long long)h_ino, file);
+		goto out; /* success */
+	}
+
+	ret = kmalloc(sizeof(*ret) + len, GFP_NOFS);
+	if (unlikely(!ret)) {
+		ret = ERR_PTR(-ENOMEM);
+		AuTraceErrPtr(ret);
+		goto out;
+	}
+
+	*ret = *drinfo;
+	ssz = vfsub_read_k(file, (void *)ret->oldname, len, &pos);
+	if (unlikely(ssz != len)) {
+		kfree(ret);
+		ret = ERR_PTR(-EIO);
+		AuIOErr("ssz %zd, %u, %pD2\n", ssz, len, file);
+		goto out;
+	}
+
+	AuDbg("oldname %.*s\n", ret->oldnamelen, ret->oldname);
+
+out:
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* in order to be revertible */
+struct au_drinfo_rev_elm {
+	int			created;
+	struct dentry		*info_dentry;
+	struct au_drinfo	*info_last;
+};
+
+struct au_drinfo_rev {
+	unsigned char			already;
+	aufs_bindex_t			nelm;
+	struct au_drinfo_rev_elm	elm[0];
+};
+
+/* todo: isn't it too large? */
+struct au_drinfo_store {
+	struct path h_ppath;
+	struct dentry *h_dentry;
+	struct au_drinfo_fdata *fdata;
+	char *infoname;			/* inside of whname, just after PFX */
+	char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ];
+	aufs_bindex_t btgt, btail;
+	unsigned char no_sio,
+		allocated,		/* current size of *fdata */
+		infonamelen,		/* room size for p */
+		whnamelen,		/* length of the genarated name */
+		renameback;		/* renamed back */
+};
+
+/* on rename(2) error, the caller should revert it using @elm */
+static int au_drinfo_do_store(struct au_drinfo_store *w,
+			      struct au_drinfo_rev_elm *elm)
+{
+	int err, len;
+	ssize_t ssz;
+	loff_t pos;
+	struct path infopath = {
+		.mnt = w->h_ppath.mnt
+	};
+	struct inode *h_dir, *h_inode, *delegated;
+	struct file *infofile;
+	struct qstr *qname;
+
+	AuDebugOn(elm
+		  && memcmp(elm, page_address(ZERO_PAGE(0)), sizeof(*elm)));
+
+	infopath.dentry = vfsub_lookup_one_len(w->whname, w->h_ppath.dentry,
+					       w->whnamelen);
+	AuTraceErrPtr(infopath.dentry);
+	if (IS_ERR(infopath.dentry)) {
+		err = PTR_ERR(infopath.dentry);
+		goto out;
+	}
+
+	err = 0;
+	h_dir = d_inode(w->h_ppath.dentry);
+	if (elm && d_is_negative(infopath.dentry)) {
+		err = vfsub_create(h_dir, &infopath, 0600, /*want_excl*/true);
+		AuTraceErr(err);
+		if (unlikely(err))
+			goto out_dput;
+		elm->created = 1;
+		elm->info_dentry = dget(infopath.dentry);
+	}
+
+	infofile = vfsub_dentry_open(&infopath, O_RDWR);
+	AuTraceErrPtr(infofile);
+	if (IS_ERR(infofile)) {
+		err = PTR_ERR(infofile);
+		goto out_dput;
+	}
+
+	h_inode = d_inode(infopath.dentry);
+	if (elm && i_size_read(h_inode)) {
+		h_inode = d_inode(w->h_dentry);
+		elm->info_last = au_drinfo_read_k(infofile, h_inode->i_ino);
+		AuTraceErrPtr(elm->info_last);
+		if (IS_ERR(elm->info_last)) {
+			err = PTR_ERR(elm->info_last);
+			elm->info_last = NULL;
+			AuDebugOn(elm->info_dentry);
+			goto out_fput;
+		}
+	}
+
+	if (elm && w->renameback) {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &infopath, &delegated, /*force*/0);
+		AuTraceErr(err);
+		if (unlikely(err == -EWOULDBLOCK))
+			iput(delegated);
+		goto out_fput;
+	}
+
+	pos = 0;
+	qname = &w->h_dentry->d_name;
+	len = sizeof(*w->fdata) + qname->len;
+	if (!elm)
+		len = sizeof(*w->fdata) + w->fdata->drinfo.oldnamelen;
+	ssz = vfsub_write_k(infofile, w->fdata, len, &pos);
+	if (ssz == len) {
+		AuDbg("hi%llu, %.*s\n", w->fdata->drinfo.ino,
+		      w->fdata->drinfo.oldnamelen, w->fdata->drinfo.oldname);
+		goto out_fput; /* success */
+	} else {
+		err = -EIO;
+		if (ssz < 0)
+			err = ssz;
+		/* the caller should revert it using @elm */
+	}
+
+out_fput:
+	fput(infofile);
+out_dput:
+	dput(infopath.dentry);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+struct au_call_drinfo_do_store_args {
+	int *errp;
+	struct au_drinfo_store *w;
+	struct au_drinfo_rev_elm *elm;
+};
+
+static void au_call_drinfo_do_store(void *args)
+{
+	struct au_call_drinfo_do_store_args *a = args;
+
+	*a->errp = au_drinfo_do_store(a->w, a->elm);
+}
+
+static int au_drinfo_store_sio(struct au_drinfo_store *w,
+			       struct au_drinfo_rev_elm *elm)
+{
+	int err, wkq_err;
+
+	if (w->no_sio)
+		err = au_drinfo_do_store(w, elm);
+	else {
+		struct au_call_drinfo_do_store_args a = {
+			.errp	= &err,
+			.w	= w,
+			.elm	= elm
+		};
+		wkq_err = au_wkq_wait(au_call_drinfo_do_store, &a);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+	AuTraceErr(err);
+
+	return err;
+}
+
+static int au_drinfo_store_work_init(struct au_drinfo_store *w,
+				     aufs_bindex_t btgt)
+{
+	int err;
+
+	memset(w, 0, sizeof(*w));
+	w->allocated = roundup_pow_of_two(sizeof(*w->fdata) + 40);
+	strcpy(w->whname, AUFS_WH_DR_INFO_PFX);
+	w->infoname = w->whname + sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+	w->infonamelen = sizeof(w->whname) - sizeof(AUFS_WH_DR_INFO_PFX);
+	w->btgt = btgt;
+	w->no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
+
+	err = -ENOMEM;
+	w->fdata = kcalloc(1, w->allocated, GFP_NOFS);
+	if (unlikely(!w->fdata)) {
+		AuTraceErr(err);
+		goto out;
+	}
+	w->fdata->magic = (__force uint32_t)htonl(AUFS_DRINFO_MAGIC_V1);
+	err = 0;
+
+out:
+	return err;
+}
+
+static void au_drinfo_store_work_fin(struct au_drinfo_store *w)
+{
+	kfree(w->fdata);
+}
+
+static void au_drinfo_store_rev(struct au_drinfo_rev *rev,
+				struct au_drinfo_store *w)
+{
+	struct au_drinfo_rev_elm *elm;
+	struct inode *h_dir, *delegated;
+	int err, nelm;
+	struct path infopath = {
+		.mnt = w->h_ppath.mnt
+	};
+
+	h_dir = d_inode(w->h_ppath.dentry);
+	IMustLock(h_dir);
+
+	err = 0;
+	elm = rev->elm;
+	for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
+		AuDebugOn(elm->created && elm->info_last);
+		if (elm->created) {
+			AuDbg("here\n");
+			delegated = NULL;
+			infopath.dentry = elm->info_dentry;
+			err = vfsub_unlink(h_dir, &infopath, &delegated,
+					   !w->no_sio);
+			AuTraceErr(err);
+			if (unlikely(err == -EWOULDBLOCK))
+				iput(delegated);
+			dput(elm->info_dentry);
+		} else if (elm->info_last) {
+			AuDbg("here\n");
+			w->fdata->drinfo = *elm->info_last;
+			memcpy(w->fdata->drinfo.oldname,
+			       elm->info_last->oldname,
+			       elm->info_last->oldnamelen);
+			err = au_drinfo_store_sio(w, /*elm*/NULL);
+			kfree(elm->info_last);
+		}
+		if (unlikely(err))
+			AuIOErr("%d, %s\n", err, w->whname);
+		/* go on even if err */
+	}
+}
+
+/* caller has to call au_dr_rename_fin() later */
+static int au_drinfo_store(struct dentry *dentry, aufs_bindex_t btgt,
+			   struct qstr *dst_name, void *_rev)
+{
+	int err, sz, nelm;
+	aufs_bindex_t bindex, btail;
+	struct au_drinfo_store work;
+	struct au_drinfo_rev *rev, **p;
+	struct au_drinfo_rev_elm *elm;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_hinode *hdir;
+
+	err = au_drinfo_store_work_init(&work, btgt);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOMEM;
+	btail = au_dbtaildir(dentry);
+	nelm = btail - btgt;
+	sz = sizeof(*rev) + sizeof(*elm) * nelm;
+	rev = kcalloc(1, sz, GFP_NOFS);
+	if (unlikely(!rev)) {
+		AuTraceErr(err);
+		goto out_args;
+	}
+	rev->nelm = nelm;
+	elm = rev->elm;
+	p = _rev;
+	*p = rev;
+
+	err = 0;
+	sb = dentry->d_sb;
+	work.h_ppath.dentry = au_h_dptr(dentry, btgt);
+	work.h_ppath.mnt = au_sbr_mnt(sb, btgt);
+	hdir = au_hi(d_inode(dentry), btgt);
+	au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
+	for (bindex = btgt + 1; bindex <= btail; bindex++, elm++) {
+		work.h_dentry = au_h_dptr(dentry, bindex);
+		if (!work.h_dentry)
+			continue;
+
+		err = au_drinfo_construct(&work.fdata, work.h_dentry,
+					  &work.allocated);
+		AuTraceErr(err);
+		if (unlikely(err))
+			break;
+
+		work.renameback = au_qstreq(&work.h_dentry->d_name, dst_name);
+		br = au_sbr(sb, bindex);
+		work.whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+		work.whnamelen += au_drinfo_name(br, work.infoname,
+						 work.infonamelen);
+		AuDbg("whname %.*s, i%llu, %.*s\n",
+		      work.whnamelen, work.whname,
+		      be64_to_cpu((__force __be64)work.fdata->drinfo.ino),
+		      work.fdata->drinfo.oldnamelen,
+		      work.fdata->drinfo.oldname);
+
+		err = au_drinfo_store_sio(&work, elm);
+		AuTraceErr(err);
+		if (unlikely(err))
+			break;
+	}
+	if (unlikely(err)) {
+		/* revert all drinfo */
+		au_drinfo_store_rev(rev, &work);
+		kfree(rev);
+		*p = NULL;
+	}
+	au_hn_inode_unlock(hdir);
+
+out_args:
+	au_drinfo_store_work_fin(&work);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
+		 struct qstr *dst_name, void *_rev)
+{
+	int err, already;
+	ino_t ino;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_dr_br *dr;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_dr_hino *ent;
+	struct au_drinfo_rev *rev, **p;
+
+	AuDbg("bindex %d\n", bindex);
+
+	err = -ENOMEM;
+	ent = kmalloc(sizeof(*ent), GFP_NOFS);
+	if (unlikely(!ent))
+		goto out;
+
+	sb = src->d_sb;
+	br = au_sbr(sb, bindex);
+	dr = &br->br_dirren;
+	h_dentry = au_h_dptr(src, bindex);
+	h_inode = d_inode(h_dentry);
+	ino = h_inode->i_ino;
+	ent->dr_h_ino = ino;
+	already = au_dr_hino_test_add(dr, ino, ent);
+	AuDbg("b%d, hi%llu, already %d\n",
+	      bindex, (unsigned long long)ino, already);
+
+	err = au_drinfo_store(src, bindex, dst_name, _rev);
+	AuTraceErr(err);
+	if (!err) {
+		p = _rev;
+		rev = *p;
+		rev->already = already;
+		goto out; /* success */
+	}
+
+	/* revert */
+	if (!already)
+		au_dr_hino_del(dr, ent);
+	kfree(ent);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *_rev)
+{
+	struct au_drinfo_rev *rev;
+	struct au_drinfo_rev_elm *elm;
+	int nelm;
+
+	rev = _rev;
+	elm = rev->elm;
+	for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
+		dput(elm->info_dentry);
+		kfree(elm->info_last);
+	}
+	kfree(rev);
+}
+
+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t btgt, void *_rev)
+{
+	int err;
+	struct au_drinfo_store work;
+	struct au_drinfo_rev *rev = _rev;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct inode *h_inode;
+	struct au_dr_br *dr;
+	struct au_dr_hino *ent;
+
+	err = au_drinfo_store_work_init(&work, btgt);
+	if (unlikely(err))
+		goto out;
+
+	sb = src->d_sb;
+	br = au_sbr(sb, btgt);
+	work.h_ppath.dentry = au_h_dptr(src, btgt);
+	work.h_ppath.mnt = au_br_mnt(br);
+	au_drinfo_store_rev(rev, &work);
+	au_drinfo_store_work_fin(&work);
+	if (rev->already)
+		goto out;
+
+	dr = &br->br_dirren;
+	h_inode = d_inode(work.h_ppath.dentry);
+	ent = au_dr_hino_find(dr, h_inode->i_ino);
+	BUG_ON(!ent);
+	au_dr_hino_del(dr, ent);
+	kfree(ent);
+
+out:
+	kfree(rev);
+	if (unlikely(err))
+		pr_err("failed to remove dirren info\n");
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct au_drinfo *au_drinfo_do_load(struct path *h_ppath,
+					   char *whname, int whnamelen,
+					   struct dentry **info_dentry)
+{
+	struct au_drinfo *drinfo;
+	struct file *f;
+	struct inode *h_dir;
+	struct path infopath;
+	int unlocked;
+
+	AuDbg("%pd/%.*s\n", h_ppath->dentry, whnamelen, whname);
+
+	*info_dentry = NULL;
+	drinfo = NULL;
+	unlocked = 0;
+	h_dir = d_inode(h_ppath->dentry);
+	vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+	infopath.dentry = vfsub_lookup_one_len(whname, h_ppath->dentry,
+					       whnamelen);
+	if (IS_ERR(infopath.dentry)) {
+		drinfo = (void *)infopath.dentry;
+		goto out;
+	}
+
+	if (d_is_negative(infopath.dentry))
+		goto out_dput; /* success */
+
+	infopath.mnt = h_ppath->mnt;
+	f = vfsub_dentry_open(&infopath, O_RDONLY);
+	inode_unlock_shared(h_dir);
+	unlocked = 1;
+	if (IS_ERR(f)) {
+		drinfo = (void *)f;
+		goto out_dput;
+	}
+
+	drinfo = au_drinfo_read_k(f, /*h_ino*/0);
+	if (IS_ERR_OR_NULL(drinfo))
+		goto out_fput;
+
+	AuDbg("oldname %.*s\n", drinfo->oldnamelen, drinfo->oldname);
+	*info_dentry = dget(infopath.dentry); /* keep it alive */
+
+out_fput:
+	fput(f);
+out_dput:
+	dput(infopath.dentry);
+out:
+	if (!unlocked)
+		inode_unlock_shared(h_dir);
+	AuTraceErrPtr(drinfo);
+	return drinfo;
+}
+
+struct au_drinfo_do_load_args {
+	struct au_drinfo **drinfop;
+	struct path *h_ppath;
+	char *whname;
+	int whnamelen;
+	struct dentry **info_dentry;
+};
+
+static void au_call_drinfo_do_load(void *args)
+{
+	struct au_drinfo_do_load_args *a = args;
+
+	*a->drinfop = au_drinfo_do_load(a->h_ppath, a->whname, a->whnamelen,
+					a->info_dentry);
+}
+
+struct au_drinfo_load {
+	struct path h_ppath;
+	struct qstr *qname;
+	unsigned char no_sio;
+
+	aufs_bindex_t ninfo;
+	struct au_drinfo **drinfo;
+};
+
+static int au_drinfo_load(struct au_drinfo_load *w, aufs_bindex_t bindex,
+			  struct au_branch *br)
+{
+	int err, wkq_err, whnamelen, e;
+	char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ]
+		= AUFS_WH_DR_INFO_PFX;
+	struct au_drinfo *drinfo;
+	struct qstr oldname;
+	struct inode *h_dir, *delegated;
+	struct dentry *info_dentry;
+	struct path infopath;
+
+	whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
+	whnamelen += au_drinfo_name(br, whname + whnamelen,
+				    sizeof(whname) - whnamelen);
+	if (w->no_sio)
+		drinfo = au_drinfo_do_load(&w->h_ppath, whname, whnamelen,
+					   &info_dentry);
+	else {
+		struct au_drinfo_do_load_args args = {
+			.drinfop	= &drinfo,
+			.h_ppath	= &w->h_ppath,
+			.whname		= whname,
+			.whnamelen	= whnamelen,
+			.info_dentry	= &info_dentry
+		};
+		wkq_err = au_wkq_wait(au_call_drinfo_do_load, &args);
+		if (unlikely(wkq_err))
+			drinfo = ERR_PTR(wkq_err);
+	}
+	err = PTR_ERR(drinfo);
+	if (IS_ERR_OR_NULL(drinfo))
+		goto out;
+
+	err = 0;
+	oldname.len = drinfo->oldnamelen;
+	oldname.name = drinfo->oldname;
+	if (au_qstreq(w->qname, &oldname)) {
+		/* the name is renamed back */
+		kfree(drinfo);
+		drinfo = NULL;
+
+		infopath.dentry = info_dentry;
+		infopath.mnt = w->h_ppath.mnt;
+		h_dir = d_inode(w->h_ppath.dentry);
+		delegated = NULL;
+		inode_lock_nested(h_dir, AuLsc_I_PARENT);
+		e = vfsub_unlink(h_dir, &infopath, &delegated, !w->no_sio);
+		inode_unlock(h_dir);
+		if (unlikely(e))
+			AuIOErr("ignored %d, %pd2\n", e, &infopath.dentry);
+		if (unlikely(e == -EWOULDBLOCK))
+			iput(delegated);
+	}
+	kfree(w->drinfo[bindex]);
+	w->drinfo[bindex] = drinfo;
+	dput(info_dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_dr_lkup_free(struct au_drinfo **drinfo, int n)
+{
+	struct au_drinfo **p = drinfo;
+
+	while (n-- > 0)
+		kfree(*drinfo++);
+	kfree(p);
+}
+
+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
+	       aufs_bindex_t btgt)
+{
+	int err, ninfo;
+	struct au_drinfo_load w;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+	struct inode *h_dir;
+	struct au_dr_hino *ent;
+	struct super_block *sb;
+
+	AuDbg("%.*s, name %.*s, whname %.*s, b%d\n",
+	      AuLNPair(&dentry->d_name), AuLNPair(&lkup->dirren.dr_name),
+	      AuLNPair(&lkup->whname), btgt);
+
+	sb = dentry->d_sb;
+	bbot = au_sbbot(sb);
+	w.ninfo = bbot + 1;
+	if (!lkup->dirren.drinfo) {
+		lkup->dirren.drinfo = kcalloc(w.ninfo,
+					      sizeof(*lkup->dirren.drinfo),
+					      GFP_NOFS);
+		if (unlikely(!lkup->dirren.drinfo)) {
+			err = -ENOMEM;
+			goto out;
+		}
+		lkup->dirren.ninfo = w.ninfo;
+	}
+	w.drinfo = lkup->dirren.drinfo;
+	w.no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
+	w.h_ppath.dentry = au_h_dptr(dentry, btgt);
+	AuDebugOn(!w.h_ppath.dentry);
+	w.h_ppath.mnt = au_sbr_mnt(sb, btgt);
+	w.qname = &dentry->d_name;
+
+	ninfo = 0;
+	for (bindex = btgt + 1; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_drinfo_load(&w, bindex, br);
+		if (unlikely(err))
+			goto out_free;
+		if (w.drinfo[bindex])
+			ninfo++;
+	}
+	if (!ninfo) {
+		br = au_sbr(sb, btgt);
+		h_dir = d_inode(w.h_ppath.dentry);
+		ent = au_dr_hino_find(&br->br_dirren, h_dir->i_ino);
+		AuDebugOn(!ent);
+		au_dr_hino_del(&br->br_dirren, ent);
+		kfree(ent);
+	}
+	goto out; /* success */
+
+out_free:
+	au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
+	lkup->dirren.ninfo = 0;
+	lkup->dirren.drinfo = NULL;
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_dr_lkup_fin(struct au_do_lookup_args *lkup)
+{
+	au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
+}
+
+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt)
+{
+	int err;
+	struct au_drinfo *drinfo;
+
+	err = 0;
+	if (!lkup->dirren.drinfo)
+		goto out;
+	AuDebugOn(lkup->dirren.ninfo < btgt + 1);
+	drinfo = lkup->dirren.drinfo[btgt + 1];
+	if (!drinfo)
+		goto out;
+
+	kfree(lkup->whname.name);
+	lkup->whname.name = NULL;
+	lkup->dirren.dr_name.len = drinfo->oldnamelen;
+	lkup->dirren.dr_name.name = drinfo->oldname;
+	lkup->name = &lkup->dirren.dr_name;
+	err = au_wh_name_alloc(&lkup->whname, lkup->name);
+	if (!err)
+		AuDbg("name %.*s, whname %.*s, b%d\n",
+		      AuLNPair(lkup->name), AuLNPair(&lkup->whname),
+		      btgt);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
+		     ino_t h_ino)
+{
+	int match;
+	struct au_drinfo *drinfo;
+
+	match = 1;
+	if (!lkup->dirren.drinfo)
+		goto out;
+	AuDebugOn(lkup->dirren.ninfo < bindex + 1);
+	drinfo = lkup->dirren.drinfo[bindex + 1];
+	if (!drinfo)
+		goto out;
+
+	match = (drinfo->ino == h_ino);
+	AuDbg("match %d\n", match);
+
+out:
+	return match;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dr_opt_set(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_dr_hino(sb, bindex, /*br*/NULL, &br->br_path);
+	}
+
+	return err;
+}
+
+int au_dr_opt_flush(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_writable(br->br_perm))
+			err = au_dr_hino(sb, bindex, /*br*/NULL, /*path*/NULL);
+	}
+
+	return err;
+}
+
+int au_dr_opt_clr(struct super_block *sb, int no_flush)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	err = 0;
+	if (!no_flush) {
+		err = au_dr_opt_flush(sb);
+		if (unlikely(err))
+			goto out;
+	}
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		au_dr_hino_free(&br->br_dirren);
+	}
+
+out:
+	return err;
+}
diff --git a/include/fs/aufs/dirren.h b/include/fs/aufs/dirren.h
new file mode 100644
index 000000000..847480eeb
--- /dev/null
+++ b/include/fs/aufs/dirren.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * renamed dir info
+ */
+
+#ifndef __AUFS_DIRREN_H__
+#define __AUFS_DIRREN_H__
+
+#ifdef __KERNEL__
+
+#include <linux/dcache.h>
+#include <linux/statfs.h>
+#include <linux/uuid.h>
+#include "hbl.h"
+
+#define AuDirren_NHASH 100
+
+#ifdef CONFIG_AUFS_DIRREN
+enum au_brid_type {
+	AuBrid_Unset,
+	AuBrid_UUID,
+	AuBrid_FSID,
+	AuBrid_DEV
+};
+
+struct au_dr_brid {
+	enum au_brid_type	type;
+	union {
+		uuid_t	uuid;	/* unimplemented yet */
+		fsid_t	fsid;
+		dev_t	dev;
+	};
+};
+
+/* 20 is the max digits length of ulong 64 */
+/* brid-type "_" uuid "_" inum */
+#define AUFS_DIRREN_FNAME_SZ	(1 + 1 + UUID_STRING_LEN + 20)
+#define AUFS_DIRREN_ENV_VAL_SZ	(AUFS_DIRREN_FNAME_SZ + 1 + 20)
+
+struct au_dr_hino {
+	struct hlist_bl_node	dr_hnode;
+	ino_t			dr_h_ino;
+};
+
+struct au_dr_br {
+	struct hlist_bl_head	dr_h_ino[AuDirren_NHASH];
+	struct au_dr_brid	dr_brid;
+};
+
+struct au_dr_lookup {
+	/* dr_name is pointed by struct au_do_lookup_args.name */
+	struct qstr		dr_name; /* subset of dr_info */
+	aufs_bindex_t		ninfo;
+	struct au_drinfo	**drinfo;
+};
+#else
+struct au_dr_hino;
+/* empty */
+struct au_dr_br { };
+struct au_dr_lookup { };
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_branch;
+struct au_do_lookup_args;
+struct au_hinode;
+#ifdef CONFIG_AUFS_DIRREN
+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t h_ino,
+			struct au_dr_hino *add_ent);
+void au_dr_hino_free(struct au_dr_br *dr);
+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
+		  const struct path *path);
+int au_dr_br_fin(struct super_block *sb, struct au_branch *br);
+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
+		 struct qstr *dst_name, void *_rev);
+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *rev);
+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t bindex, void *rev);
+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
+	       aufs_bindex_t bindex);
+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
+		     ino_t h_ino);
+void au_dr_lkup_fin(struct au_do_lookup_args *lkup);
+int au_dr_opt_set(struct super_block *sb);
+int au_dr_opt_flush(struct super_block *sb);
+int au_dr_opt_clr(struct super_block *sb, int no_flush);
+#else
+AuStubInt0(au_dr_hino_test_add, struct au_dr_br *dr, ino_t h_ino,
+	   struct au_dr_hino *add_ent);
+AuStubVoid(au_dr_hino_free, struct au_dr_br *dr);
+AuStubInt0(au_dr_br_init, struct super_block *sb, struct au_branch *br,
+	   const struct path *path);
+AuStubInt0(au_dr_br_fin, struct super_block *sb, struct au_branch *br);
+AuStubInt0(au_dr_rename, struct dentry *src, aufs_bindex_t bindex,
+	   struct qstr *dst_name, void *_rev);
+AuStubVoid(au_dr_rename_fin, struct dentry *src, aufs_bindex_t btgt, void *rev);
+AuStubVoid(au_dr_rename_rev, struct dentry *src, aufs_bindex_t bindex,
+	   void *rev);
+AuStubInt0(au_dr_lkup, struct au_do_lookup_args *lkup, struct dentry *dentry,
+	   aufs_bindex_t bindex);
+AuStubInt0(au_dr_lkup_name, struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
+AuStubInt0(au_dr_lkup_h_ino, struct au_do_lookup_args *lkup,
+	   aufs_bindex_t bindex, ino_t h_ino);
+AuStubVoid(au_dr_lkup_fin, struct au_do_lookup_args *lkup);
+AuStubInt0(au_dr_opt_set, struct super_block *sb);
+AuStubInt0(au_dr_opt_flush, struct super_block *sb);
+AuStubInt0(au_dr_opt_clr, struct super_block *sb, int no_flush);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_DIRREN
+static inline int au_dr_ihash(ino_t h_ino)
+{
+	return h_ino % AuDirren_NHASH;
+}
+#else
+AuStubInt0(au_dr_ihash, ino_t h_ino);
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DIRREN_H__ */
diff --git a/include/fs/aufs/dynop.c b/include/fs/aufs/dynop.c
new file mode 100644
index 000000000..d38dd77a7
--- /dev/null
+++ b/include/fs/aufs/dynop.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dynamically customizable operations for regular files
+ */
+
+#include "aufs.h"
+
+#define DyPrSym(key)	AuDbgSym(key->dk_op.dy_hop)
+
+/*
+ * How large will these lists be?
+ * Usually just a few elements, 20-30 at most for each, I guess.
+ */
+static struct hlist_bl_head dynop[AuDyLast];
+
+static struct au_dykey *dy_gfind_get(struct hlist_bl_head *hbl,
+				     const void *h_op)
+{
+	struct au_dykey *key, *tmp;
+	struct hlist_bl_node *pos;
+
+	key = NULL;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
+		if (tmp->dk_op.dy_hop == h_op) {
+			key = tmp;
+			kref_get(&key->dk_kref);
+			break;
+		}
+	hlist_bl_unlock(hbl);
+
+	return key;
+}
+
+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
+{
+	struct au_dykey **k, *found;
+	const void *h_op = key->dk_op.dy_hop;
+	int i;
+
+	found = NULL;
+	k = br->br_dykey;
+	for (i = 0; i < AuBrDynOp; i++)
+		if (k[i]) {
+			if (k[i]->dk_op.dy_hop == h_op) {
+				found = k[i];
+				break;
+			}
+		} else
+			break;
+	if (!found) {
+		spin_lock(&br->br_dykey_lock);
+		for (; i < AuBrDynOp; i++)
+			if (k[i]) {
+				if (k[i]->dk_op.dy_hop == h_op) {
+					found = k[i];
+					break;
+				}
+			} else {
+				k[i] = key;
+				break;
+			}
+		spin_unlock(&br->br_dykey_lock);
+		BUG_ON(i == AuBrDynOp); /* expand the array */
+	}
+
+	return found;
+}
+
+/* kref_get() if @key is already added */
+static struct au_dykey *dy_gadd(struct hlist_bl_head *hbl, struct au_dykey *key)
+{
+	struct au_dykey *tmp, *found;
+	struct hlist_bl_node *pos;
+	const void *h_op = key->dk_op.dy_hop;
+
+	found = NULL;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
+		if (tmp->dk_op.dy_hop == h_op) {
+			kref_get(&tmp->dk_kref);
+			found = tmp;
+			break;
+		}
+	if (!found)
+		hlist_bl_add_head(&key->dk_hnode, hbl);
+	hlist_bl_unlock(hbl);
+
+	if (!found)
+		DyPrSym(key);
+	return found;
+}
+
+static void dy_free_rcu(struct rcu_head *rcu)
+{
+	struct au_dykey *key;
+
+	key = container_of(rcu, struct au_dykey, dk_rcu);
+	DyPrSym(key);
+	kfree(key);
+}
+
+static void dy_free(struct kref *kref)
+{
+	struct au_dykey *key;
+	struct hlist_bl_head *hbl;
+
+	key = container_of(kref, struct au_dykey, dk_kref);
+	hbl = dynop + key->dk_op.dy_type;
+	au_hbl_del(&key->dk_hnode, hbl);
+	call_rcu(&key->dk_rcu, dy_free_rcu);
+}
+
+void au_dy_put(struct au_dykey *key)
+{
+	kref_put(&key->dk_kref, dy_free);
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define DyDbgSize(cnt, op)	AuDebugOn(cnt != sizeof(op)/sizeof(void *))
+
+#ifdef CONFIG_AUFS_DEBUG
+#define DyDbgDeclare(cnt)	unsigned int cnt = 0
+#define DyDbgInc(cnt)		do { cnt++; } while (0)
+#else
+#define DyDbgDeclare(cnt)	do {} while (0)
+#define DyDbgInc(cnt)		do {} while (0)
+#endif
+
+#define DySet(func, dst, src, h_op, h_sb) do {				\
+	DyDbgInc(cnt);							\
+	if (h_op->func) {						\
+		if (src.func)						\
+			dst.func = src.func;				\
+		else							\
+			AuDbg("%s %s\n", au_sbtype(h_sb), #func);	\
+	}								\
+} while (0)
+
+#define DySetForce(func, dst, src) do {		\
+	AuDebugOn(!src.func);			\
+	DyDbgInc(cnt);				\
+	dst.func = src.func;			\
+} while (0)
+
+#define DySetAop(func) \
+	DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
+#define DySetAopForce(func) \
+	DySetForce(func, dyaop->da_op, aufs_aop)
+
+static void dy_aop(struct au_dykey *key, const void *h_op,
+		   struct super_block *h_sb __maybe_unused)
+{
+	struct au_dyaop *dyaop = (void *)key;
+	const struct address_space_operations *h_aop = h_op;
+	DyDbgDeclare(cnt);
+
+	AuDbg("%s\n", au_sbtype(h_sb));
+
+	DySetAop(writepage);
+	DySetAopForce(readpage);	/* force */
+	DySetAop(writepages);
+	DySetAop(set_page_dirty);
+	DySetAop(readpages);
+	DySetAop(write_begin);
+	DySetAop(write_end);
+	DySetAop(bmap);
+	DySetAop(invalidatepage);
+	DySetAop(releasepage);
+	DySetAop(freepage);
+	/* this one will be changed according to an aufs mount option */
+	DySetAop(direct_IO);
+	DySetAop(migratepage);
+	DySetAop(isolate_page);
+	DySetAop(putback_page);
+	DySetAop(launder_page);
+	DySetAop(is_partially_uptodate);
+	DySetAop(is_dirty_writeback);
+	DySetAop(error_remove_page);
+	DySetAop(swap_activate);
+	DySetAop(swap_deactivate);
+
+	DyDbgSize(cnt, *h_aop);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void dy_bug(struct kref *kref)
+{
+	BUG();
+}
+
+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
+{
+	struct au_dykey *key, *old;
+	struct hlist_bl_head *hbl;
+	struct op {
+		unsigned int sz;
+		void (*set)(struct au_dykey *key, const void *h_op,
+			    struct super_block *h_sb __maybe_unused);
+	};
+	static const struct op a[] = {
+		[AuDy_AOP] = {
+			.sz	= sizeof(struct au_dyaop),
+			.set	= dy_aop
+		}
+	};
+	const struct op *p;
+
+	hbl = dynop + op->dy_type;
+	key = dy_gfind_get(hbl, op->dy_hop);
+	if (key)
+		goto out_add; /* success */
+
+	p = a + op->dy_type;
+	key = kzalloc(p->sz, GFP_NOFS);
+	if (unlikely(!key)) {
+		key = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	key->dk_op.dy_hop = op->dy_hop;
+	kref_init(&key->dk_kref);
+	p->set(key, op->dy_hop, au_br_sb(br));
+	old = dy_gadd(hbl, key);
+	if (old) {
+		kfree(key);
+		key = old;
+	}
+
+out_add:
+	old = dy_bradd(br, key);
+	if (old)
+		/* its ref-count should never be zero here */
+		kref_put(&key->dk_kref, dy_bug);
+out:
+	return key;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
+ * See the aufs manual in detail.
+ */
+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
+{
+	if (!do_dx)
+		dyaop->da_op.direct_IO = NULL;
+	else
+		dyaop->da_op.direct_IO = aufs_aop.direct_IO;
+}
+
+static struct au_dyaop *dy_aget(struct au_branch *br,
+				const struct address_space_operations *h_aop,
+				int do_dx)
+{
+	struct au_dyaop *dyaop;
+	struct au_dynop op;
+
+	op.dy_type = AuDy_AOP;
+	op.dy_haop = h_aop;
+	dyaop = (void *)dy_get(&op, br);
+	if (IS_ERR(dyaop))
+		goto out;
+	dy_adx(dyaop, do_dx);
+
+out:
+	return dyaop;
+}
+
+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
+		struct inode *h_inode)
+{
+	int err, do_dx;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_dyaop *dyaop;
+
+	AuDebugOn(!S_ISREG(h_inode->i_mode));
+	IiMustWriteLock(inode);
+
+	sb = inode->i_sb;
+	br = au_sbr(sb, bindex);
+	do_dx = !!au_opt_test(au_mntflags(sb), DIO);
+	dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
+	err = PTR_ERR(dyaop);
+	if (IS_ERR(dyaop))
+		/* unnecessary to call dy_fput() */
+		goto out;
+
+	err = 0;
+	inode->i_mapping->a_ops = &dyaop->da_op;
+
+out:
+	return err;
+}
+
+/*
+ * Is it safe to replace a_ops during the inode/file is in operation?
+ * Yes, I hope so.
+ */
+int au_dy_irefresh(struct inode *inode)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct inode *h_inode;
+
+	err = 0;
+	if (S_ISREG(inode->i_mode)) {
+		btop = au_ibtop(inode);
+		h_inode = au_h_iptr(inode, btop);
+		err = au_dy_iaop(inode, btop, h_inode);
+	}
+	return err;
+}
+
+void au_dy_arefresh(int do_dx)
+{
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_dykey *key;
+
+	hbl = dynop + AuDy_AOP;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(key, pos, hbl, dk_hnode)
+		dy_adx((void *)key, do_dx);
+	hlist_bl_unlock(hbl);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void __init au_dy_init(void)
+{
+	int i;
+
+	/* make sure that 'struct au_dykey *' can be any type */
+	BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
+
+	for (i = 0; i < AuDyLast; i++)
+		INIT_HLIST_BL_HEAD(dynop + i);
+}
+
+void au_dy_fin(void)
+{
+	int i;
+
+	for (i = 0; i < AuDyLast; i++)
+		WARN_ON(!hlist_bl_empty(dynop + i));
+}
diff --git a/include/fs/aufs/dynop.h b/include/fs/aufs/dynop.h
new file mode 100644
index 000000000..81a770505
--- /dev/null
+++ b/include/fs/aufs/dynop.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * dynamically customizable operations (for regular files only)
+ */
+
+#ifndef __AUFS_DYNOP_H__
+#define __AUFS_DYNOP_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/kref.h>
+
+enum {AuDy_AOP, AuDyLast};
+
+struct au_dynop {
+	int						dy_type;
+	union {
+		const void				*dy_hop;
+		const struct address_space_operations	*dy_haop;
+	};
+};
+
+struct au_dykey {
+	union {
+		struct hlist_bl_node	dk_hnode;
+		struct rcu_head		dk_rcu;
+	};
+	struct au_dynop		dk_op;
+
+	/*
+	 * during I am in the branch local array, kref is gotten. when the
+	 * branch is removed, kref is put.
+	 */
+	struct kref		dk_kref;
+};
+
+/* stop unioning since their sizes are very different from each other */
+struct au_dyaop {
+	struct au_dykey			da_key;
+	struct address_space_operations	da_op; /* not const */
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* dynop.c */
+struct au_branch;
+void au_dy_put(struct au_dykey *key);
+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
+		struct inode *h_inode);
+int au_dy_irefresh(struct inode *inode);
+void au_dy_arefresh(int do_dio);
+
+void __init au_dy_init(void);
+void au_dy_fin(void);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DYNOP_H__ */
diff --git a/include/fs/aufs/export.c b/include/fs/aufs/export.c
new file mode 100644
index 000000000..155e07a8e
--- /dev/null
+++ b/include/fs/aufs/export.c
@@ -0,0 +1,836 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * export via nfs
+ */
+
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/nsproxy.h>
+#include <linux/random.h>
+#include <linux/writeback.h>
+#include "aufs.h"
+
+union conv {
+#ifdef CONFIG_AUFS_INO_T_64
+	__u32 a[2];
+#else
+	__u32 a[1];
+#endif
+	ino_t ino;
+};
+
+static ino_t decode_ino(__u32 *a)
+{
+	union conv u;
+
+	BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
+	u.a[0] = a[0];
+#ifdef CONFIG_AUFS_INO_T_64
+	u.a[1] = a[1];
+#endif
+	return u.ino;
+}
+
+static void encode_ino(__u32 *a, ino_t ino)
+{
+	union conv u;
+
+	u.ino = ino;
+	a[0] = u.a[0];
+#ifdef CONFIG_AUFS_INO_T_64
+	a[1] = u.a[1];
+#endif
+}
+
+/* NFS file handle */
+enum {
+	Fh_br_id,
+	Fh_sigen,
+#ifdef CONFIG_AUFS_INO_T_64
+	/* support 64bit inode number */
+	Fh_ino1,
+	Fh_ino2,
+	Fh_dir_ino1,
+	Fh_dir_ino2,
+#else
+	Fh_ino1,
+	Fh_dir_ino1,
+#endif
+	Fh_igen,
+	Fh_h_type,
+	Fh_tail,
+
+	Fh_ino = Fh_ino1,
+	Fh_dir_ino = Fh_dir_ino1
+};
+
+static int au_test_anon(struct dentry *dentry)
+{
+	/* note: read d_flags without d_lock */
+	return !!(dentry->d_flags & DCACHE_DISCONNECTED);
+}
+
+int au_test_nfsd(void)
+{
+	int ret;
+	struct task_struct *tsk = current;
+	char comm[sizeof(tsk->comm)];
+
+	ret = 0;
+	if (tsk->flags & PF_KTHREAD) {
+		get_task_comm(comm, tsk);
+		ret = !strcmp(comm, "nfsd");
+	}
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+/* inode generation external table */
+
+void au_xigen_inc(struct inode *inode)
+{
+	loff_t pos;
+	ssize_t sz;
+	__u32 igen;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	sb = inode->i_sb;
+	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+
+	sbinfo = au_sbi(sb);
+	pos = inode->i_ino;
+	pos *= sizeof(igen);
+	igen = inode->i_generation + 1;
+	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
+			 sizeof(igen), &pos);
+	if (sz == sizeof(igen))
+		return; /* success */
+
+	if (unlikely(sz >= 0))
+		AuIOErr("xigen error (%zd)\n", sz);
+}
+
+int au_xigen_new(struct inode *inode)
+{
+	int err;
+	loff_t pos;
+	ssize_t sz;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	err = 0;
+	/* todo: dirty, at mount time */
+	if (inode->i_ino == AUFS_ROOT_INO)
+		goto out;
+	sb = inode->i_sb;
+	SiMustAnyLock(sb);
+	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
+		goto out;
+
+	err = -EFBIG;
+	pos = inode->i_ino;
+	if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
+		AuIOErr1("too large i%lld\n", pos);
+		goto out;
+	}
+	pos *= sizeof(inode->i_generation);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	file = sbinfo->si_xigen;
+	BUG_ON(!file);
+
+	if (vfsub_f_size_read(file)
+	    < pos + sizeof(inode->i_generation)) {
+		inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
+		sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
+				 sizeof(inode->i_generation), &pos);
+	} else
+		sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
+				sizeof(inode->i_generation), &pos);
+	if (sz == sizeof(inode->i_generation))
+		goto out; /* success */
+
+	err = sz;
+	if (unlikely(sz >= 0)) {
+		err = -EIO;
+		AuIOErr("xigen error (%zd)\n", sz);
+	}
+
+out:
+	return err;
+}
+
+int au_xigen_set(struct super_block *sb, struct file *base)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	file = au_xino_create2(base, sbinfo->si_xigen);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	err = 0;
+	if (sbinfo->si_xigen)
+		fput(sbinfo->si_xigen);
+	sbinfo->si_xigen = file;
+
+out:
+	return err;
+}
+
+void au_xigen_clr(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	if (sbinfo->si_xigen) {
+		fput(sbinfo->si_xigen);
+		sbinfo->si_xigen = NULL;
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
+				    ino_t dir_ino)
+{
+	struct dentry *dentry, *d;
+	struct inode *inode;
+	unsigned int sigen;
+
+	dentry = NULL;
+	inode = ilookup(sb, ino);
+	if (!inode)
+		goto out;
+
+	dentry = ERR_PTR(-ESTALE);
+	sigen = au_sigen(sb);
+	if (unlikely(au_is_bad_inode(inode)
+		     || IS_DEADDIR(inode)
+		     || sigen != au_iigen(inode, NULL)))
+		goto out_iput;
+
+	dentry = NULL;
+	if (!dir_ino || S_ISDIR(inode->i_mode))
+		dentry = d_find_alias(inode);
+	else {
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
+			spin_lock(&d->d_lock);
+			if (!au_test_anon(d)
+			    && d_inode(d->d_parent)->i_ino == dir_ino) {
+				dentry = dget_dlock(d);
+				spin_unlock(&d->d_lock);
+				break;
+			}
+			spin_unlock(&d->d_lock);
+		}
+		spin_unlock(&inode->i_lock);
+	}
+	if (unlikely(dentry && au_digen_test(dentry, sigen))) {
+		/* need to refresh */
+		dput(dentry);
+		dentry = NULL;
+	}
+
+out_iput:
+	iput(inode);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: dirty? */
+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
+
+struct au_compare_mnt_args {
+	/* input */
+	struct super_block *sb;
+
+	/* output */
+	struct vfsmount *mnt;
+};
+
+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
+{
+	struct au_compare_mnt_args *a = arg;
+
+	if (mnt->mnt_sb != a->sb)
+		return 0;
+	a->mnt = mntget(mnt);
+	return 1;
+}
+
+static struct vfsmount *au_mnt_get(struct super_block *sb)
+{
+	int err;
+	struct path root;
+	struct au_compare_mnt_args args = {
+		.sb = sb
+	};
+
+	get_fs_root(current->fs, &root);
+	rcu_read_lock();
+	err = iterate_mounts(au_compare_mnt, &args, root.mnt);
+	rcu_read_unlock();
+	path_put(&root);
+	AuDebugOn(!err);
+	AuDebugOn(!args.mnt);
+	return args.mnt;
+}
+
+struct au_nfsd_si_lock {
+	unsigned int sigen;
+	aufs_bindex_t bindex, br_id;
+	unsigned char force_lock;
+};
+
+static int si_nfsd_read_lock(struct super_block *sb,
+			     struct au_nfsd_si_lock *nsi_lock)
+{
+	int err;
+	aufs_bindex_t bindex;
+
+	si_read_lock(sb, AuLock_FLUSH);
+
+	/* branch id may be wrapped around */
+	err = 0;
+	bindex = au_br_index(sb, nsi_lock->br_id);
+	if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
+		goto out; /* success */
+
+	err = -ESTALE;
+	bindex = -1;
+	if (!nsi_lock->force_lock)
+		si_read_unlock(sb);
+
+out:
+	nsi_lock->bindex = bindex;
+	return err;
+}
+
+struct find_name_by_ino {
+	struct dir_context ctx;
+	int called, found;
+	ino_t ino;
+	char *name;
+	int namelen;
+};
+
+static int
+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
+		 loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
+						  ctx);
+
+	a->called++;
+	if (a->ino != ino)
+		return 0;
+
+	memcpy(a->name, name, namelen);
+	a->namelen = namelen;
+	a->found = 1;
+	return 1;
+}
+
+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
+				     struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry, *parent;
+	struct file *file;
+	struct inode *dir;
+	struct find_name_by_ino arg = {
+		.ctx = {
+			.actor = find_name_by_ino
+		}
+	};
+	int err;
+
+	parent = path->dentry;
+	if (nsi_lock)
+		si_read_unlock(parent->d_sb);
+	file = vfsub_dentry_open(path, au_dir_roflags);
+	dentry = (void *)file;
+	if (IS_ERR(file))
+		goto out;
+
+	dentry = ERR_PTR(-ENOMEM);
+	arg.name = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!arg.name))
+		goto out_file;
+	arg.ino = ino;
+	arg.found = 0;
+	do {
+		arg.called = 0;
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(file, &arg.ctx);
+	} while (!err && !arg.found && arg.called);
+	dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_name;
+	/* instead of ENOENT */
+	dentry = ERR_PTR(-ESTALE);
+	if (!arg.found)
+		goto out_name;
+
+	/* do not call vfsub_lkup_one() */
+	dir = d_inode(parent);
+	dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
+	AuTraceErrPtr(dentry);
+	if (IS_ERR(dentry))
+		goto out_name;
+	AuDebugOn(au_test_anon(dentry));
+	if (unlikely(d_really_is_negative(dentry))) {
+		dput(dentry);
+		dentry = ERR_PTR(-ENOENT);
+	}
+
+out_name:
+	free_page((unsigned long)arg.name);
+out_file:
+	fput(file);
+out:
+	if (unlikely(nsi_lock
+		     && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
+		if (!IS_ERR(dentry)) {
+			dput(dentry);
+			dentry = ERR_PTR(-ESTALE);
+		}
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
+					ino_t dir_ino,
+					struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry;
+	struct path path;
+
+	if (dir_ino != AUFS_ROOT_INO) {
+		path.dentry = decode_by_ino(sb, dir_ino, 0);
+		dentry = path.dentry;
+		if (!path.dentry || IS_ERR(path.dentry))
+			goto out;
+		AuDebugOn(au_test_anon(path.dentry));
+	} else
+		path.dentry = dget(sb->s_root);
+
+	path.mnt = au_mnt_get(sb);
+	dentry = au_lkup_by_ino(&path, ino, nsi_lock);
+	path_put(&path);
+
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int h_acceptable(void *expv, struct dentry *dentry)
+{
+	return 1;
+}
+
+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
+			   char *buf, int len, struct super_block *sb)
+{
+	char *p;
+	int n;
+	struct path path;
+
+	p = d_path(h_rootpath, buf, len);
+	if (IS_ERR(p))
+		goto out;
+	n = strlen(p);
+
+	path.mnt = h_rootpath->mnt;
+	path.dentry = h_parent;
+	p = d_path(&path, buf, len);
+	if (IS_ERR(p))
+		goto out;
+	if (n != 1)
+		p += n;
+
+	path.mnt = au_mnt_get(sb);
+	path.dentry = sb->s_root;
+	p = d_path(&path, buf, len - strlen(p));
+	mntput(path.mnt);
+	if (IS_ERR(p))
+		goto out;
+	if (n != 1)
+		p[strlen(p)] = '/';
+
+out:
+	AuTraceErrPtr(p);
+	return p;
+}
+
+static
+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
+			      int fh_len, struct au_nfsd_si_lock *nsi_lock)
+{
+	struct dentry *dentry, *h_parent, *root;
+	struct super_block *h_sb;
+	char *pathname, *p;
+	struct vfsmount *h_mnt;
+	struct au_branch *br;
+	int err;
+	struct path path;
+
+	br = au_sbr(sb, nsi_lock->bindex);
+	h_mnt = au_br_mnt(br);
+	h_sb = h_mnt->mnt_sb;
+	/* todo: call lower fh_to_dentry()? fh_to_parent()? */
+	lockdep_off();
+	h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
+				      fh_len - Fh_tail, fh[Fh_h_type],
+				      h_acceptable, /*context*/NULL);
+	lockdep_on();
+	dentry = h_parent;
+	if (unlikely(!h_parent || IS_ERR(h_parent))) {
+		AuWarn1("%s decode_fh failed, %ld\n",
+			au_sbtype(h_sb), PTR_ERR(h_parent));
+		goto out;
+	}
+	dentry = NULL;
+	if (unlikely(au_test_anon(h_parent))) {
+		AuWarn1("%s decode_fh returned a disconnected dentry\n",
+			au_sbtype(h_sb));
+		goto out_h_parent;
+	}
+
+	dentry = ERR_PTR(-ENOMEM);
+	pathname = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!pathname))
+		goto out_h_parent;
+
+	root = sb->s_root;
+	path.mnt = h_mnt;
+	di_read_lock_parent(root, !AuLock_IR);
+	path.dentry = au_h_dptr(root, nsi_lock->bindex);
+	di_read_unlock(root, !AuLock_IR);
+	p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
+	dentry = (void *)p;
+	if (IS_ERR(p))
+		goto out_pathname;
+
+	si_read_unlock(sb);
+	err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
+	dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_relock;
+
+	dentry = ERR_PTR(-ENOENT);
+	AuDebugOn(au_test_anon(path.dentry));
+	if (unlikely(d_really_is_negative(path.dentry)))
+		goto out_path;
+
+	if (ino != d_inode(path.dentry)->i_ino)
+		dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
+	else
+		dentry = dget(path.dentry);
+
+out_path:
+	path_put(&path);
+out_relock:
+	if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
+		if (!IS_ERR(dentry)) {
+			dput(dentry);
+			dentry = ERR_PTR(-ESTALE);
+		}
+out_pathname:
+	free_page((unsigned long)pathname);
+out_h_parent:
+	dput(h_parent);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *
+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
+		  int fh_type)
+{
+	struct dentry *dentry;
+	__u32 *fh = fid->raw;
+	struct au_branch *br;
+	ino_t ino, dir_ino;
+	struct au_nfsd_si_lock nsi_lock = {
+		.force_lock	= 0
+	};
+
+	dentry = ERR_PTR(-ESTALE);
+	/* it should never happen, but the file handle is unreliable */
+	if (unlikely(fh_len < Fh_tail))
+		goto out;
+	nsi_lock.sigen = fh[Fh_sigen];
+	nsi_lock.br_id = fh[Fh_br_id];
+
+	/* branch id may be wrapped around */
+	br = NULL;
+	if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
+		goto out;
+	nsi_lock.force_lock = 1;
+
+	/* is this inode still cached? */
+	ino = decode_ino(fh + Fh_ino);
+	/* it should never happen */
+	if (unlikely(ino == AUFS_ROOT_INO))
+		goto out_unlock;
+
+	dir_ino = decode_ino(fh + Fh_dir_ino);
+	dentry = decode_by_ino(sb, ino, dir_ino);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (dentry)
+		goto accept;
+
+	/* is the parent dir cached? */
+	br = au_sbr(sb, nsi_lock.bindex);
+	au_br_get(br);
+	dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (dentry)
+		goto accept;
+
+	/* lookup path */
+	dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+	if (unlikely(!dentry))
+		/* todo?: make it ESTALE */
+		goto out_unlock;
+
+accept:
+	if (!au_digen_test(dentry, au_sigen(sb))
+	    && d_inode(dentry)->i_generation == fh[Fh_igen])
+		goto out_unlock; /* success */
+
+	dput(dentry);
+	dentry = ERR_PTR(-ESTALE);
+out_unlock:
+	if (br)
+		au_br_put(br);
+	si_read_unlock(sb);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+#if 0 /* reserved for future use */
+/* support subtreecheck option */
+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
+					int fh_len, int fh_type)
+{
+	struct dentry *parent;
+	__u32 *fh = fid->raw;
+	ino_t dir_ino;
+
+	dir_ino = decode_ino(fh + Fh_dir_ino);
+	parent = decode_by_ino(sb, dir_ino, 0);
+	if (IS_ERR(parent))
+		goto out;
+	if (!parent)
+		parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
+					dir_ino, fh, fh_len);
+
+out:
+	AuTraceErrPtr(parent);
+	return parent;
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
+			  struct inode *dir)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb, *h_sb;
+	struct dentry *dentry, *parent, *h_parent;
+	struct inode *h_dir;
+	struct au_branch *br;
+
+	err = -ENOSPC;
+	if (unlikely(*max_len <= Fh_tail)) {
+		AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
+		goto out;
+	}
+
+	err = FILEID_ROOT;
+	if (inode->i_ino == AUFS_ROOT_INO) {
+		AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
+		goto out;
+	}
+
+	h_parent = NULL;
+	sb = inode->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH);
+	if (unlikely(err))
+		goto out;
+
+#ifdef CONFIG_AUFS_DEBUG
+	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
+		AuWarn1("NFS-exporting requires xino\n");
+#endif
+	err = -EIO;
+	parent = NULL;
+	ii_read_lock_child(inode);
+	bindex = au_ibtop(inode);
+	if (!dir) {
+		dentry = d_find_any_alias(inode);
+		if (unlikely(!dentry))
+			goto out_unlock;
+		AuDebugOn(au_test_anon(dentry));
+		parent = dget_parent(dentry);
+		dput(dentry);
+		if (unlikely(!parent))
+			goto out_unlock;
+		if (d_really_is_positive(parent))
+			dir = d_inode(parent);
+	}
+
+	ii_read_lock_parent(dir);
+	h_dir = au_h_iptr(dir, bindex);
+	ii_read_unlock(dir);
+	if (unlikely(!h_dir))
+		goto out_parent;
+	h_parent = d_find_any_alias(h_dir);
+	if (unlikely(!h_parent))
+		goto out_hparent;
+
+	err = -EPERM;
+	br = au_sbr(sb, bindex);
+	h_sb = au_br_sb(br);
+	if (unlikely(!h_sb->s_export_op)) {
+		AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
+		goto out_hparent;
+	}
+
+	fh[Fh_br_id] = br->br_id;
+	fh[Fh_sigen] = au_sigen(sb);
+	encode_ino(fh + Fh_ino, inode->i_ino);
+	encode_ino(fh + Fh_dir_ino, dir->i_ino);
+	fh[Fh_igen] = inode->i_generation;
+
+	*max_len -= Fh_tail;
+	fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
+					   max_len,
+					   /*connectable or subtreecheck*/0);
+	err = fh[Fh_h_type];
+	*max_len += Fh_tail;
+	/* todo: macros? */
+	if (err != FILEID_INVALID)
+		err = 99;
+	else
+		AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
+
+out_hparent:
+	dput(h_parent);
+out_parent:
+	dput(parent);
+out_unlock:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+out:
+	if (unlikely(err < 0))
+		err = FILEID_INVALID;
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_commit_metadata(struct inode *inode)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct inode *h_inode;
+	int (*f)(struct inode *inode);
+
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+	ii_write_lock_child(inode);
+	bindex = au_ibtop(inode);
+	AuDebugOn(bindex < 0);
+	h_inode = au_h_iptr(inode, bindex);
+
+	f = h_inode->i_sb->s_export_op->commit_metadata;
+	if (f)
+		err = f(h_inode);
+	else {
+		struct writeback_control wbc = {
+			.sync_mode	= WB_SYNC_ALL,
+			.nr_to_write	= 0 /* metadata only */
+		};
+
+		err = sync_inode(h_inode, &wbc);
+	}
+
+	au_cpup_attr_timesizes(inode);
+	ii_write_unlock(inode);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct export_operations aufs_export_op = {
+	.fh_to_dentry		= aufs_fh_to_dentry,
+	/* .fh_to_parent	= aufs_fh_to_parent, */
+	.encode_fh		= aufs_encode_fh,
+	.commit_metadata	= aufs_commit_metadata
+};
+
+void au_export_init(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	__u32 u;
+
+	BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
+			 && IS_MODULE(CONFIG_EXPORTFS),
+			 AUFS_NAME ": unsupported configuration "
+			 "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
+
+	sb->s_export_op = &aufs_export_op;
+	sbinfo = au_sbi(sb);
+	sbinfo->si_xigen = NULL;
+	get_random_bytes(&u, sizeof(u));
+	BUILD_BUG_ON(sizeof(u) != sizeof(int));
+	atomic_set(&sbinfo->si_xigen_next, u);
+}
diff --git a/include/fs/aufs/f_op.c b/include/fs/aufs/f_op.c
new file mode 100644
index 000000000..00df9096a
--- /dev/null
+++ b/include/fs/aufs/f_op.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file and vm operations
+ */
+
+#include <linux/aio.h>
+#include <linux/fs_stack.h>
+#include <linux/mman.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct dentry *dentry, *h_dentry;
+	struct au_finfo *finfo;
+	struct inode *h_inode;
+
+	FiMustWriteLock(file);
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	AuDebugOn(IS_ERR_OR_NULL(dentry));
+	finfo = au_fi(file);
+	memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
+	atomic_set(&finfo->fi_mmapped, 0);
+	bindex = au_dbtop(dentry);
+	if (!h_file) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			goto out;
+		h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
+	} else {
+		h_dentry = h_file->f_path.dentry;
+		err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
+		if (unlikely(err))
+			goto out;
+		get_file(h_file);
+	}
+	if (IS_ERR(h_file))
+		err = PTR_ERR(h_file);
+	else {
+		if ((flags & __O_TMPFILE)
+		    && !(flags & O_EXCL)) {
+			h_inode = file_inode(h_file);
+			spin_lock(&h_inode->i_lock);
+			h_inode->i_state |= I_LINKABLE;
+			spin_unlock(&h_inode->i_lock);
+		}
+		au_set_fbtop(file, bindex);
+		au_set_h_fptr(file, bindex, h_file);
+		au_update_figen(file);
+		/* todo: necessary? */
+		/* file->f_ra = h_file->f_ra; */
+	}
+
+out:
+	return err;
+}
+
+static int aufs_open_nondir(struct inode *inode __maybe_unused,
+			    struct file *file)
+{
+	int err;
+	struct super_block *sb;
+	struct au_do_open_args args = {
+		.open	= au_do_open_nondir
+	};
+
+	AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
+	      file, vfsub_file_flags(file), file->f_mode);
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	err = au_do_open(file, &args);
+	si_read_unlock(sb);
+	return err;
+}
+
+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
+{
+	struct au_finfo *finfo;
+	aufs_bindex_t bindex;
+
+	finfo = au_fi(file);
+	au_hbl_del(&finfo->fi_hlist,
+		   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+	bindex = finfo->fi_btop;
+	if (bindex >= 0)
+		au_set_h_fptr(file, bindex, NULL);
+
+	au_finfo_fin(file);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
+{
+	int err;
+	struct file *h_file;
+
+	err = 0;
+	h_file = au_hf_top(file);
+	if (h_file)
+		err = vfsub_flush(h_file, id);
+	return err;
+}
+
+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
+{
+	return au_do_flush(file, id, au_do_flush_nondir);
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * read and write functions acquire [fdi]_rwsem once, but release before
+ * mmap_sem. This is because to stop a race condition between mmap(2).
+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
+ * read functions after [fdi]_rwsem are released, but it should be harmless.
+ */
+
+/* Callers should call au_read_post() or fput() in the end */
+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
+{
+	struct file *h_file;
+	int err;
+
+	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
+	if (!err) {
+		di_read_unlock(file->f_path.dentry, AuLock_IR);
+		h_file = au_hf_top(file);
+		get_file(h_file);
+		if (!keep_fi)
+			fi_read_unlock(file);
+	} else
+		h_file = ERR_PTR(err);
+
+	return h_file;
+}
+
+static void au_read_post(struct inode *inode, struct file *h_file)
+{
+	/* update without lock, I don't think it a problem */
+	fsstack_copy_attr_atime(inode, file_inode(h_file));
+	fput(h_file);
+}
+
+struct au_write_pre {
+	/* input */
+	unsigned int lsc;
+
+	/* output */
+	blkcnt_t blks;
+	aufs_bindex_t btop;
+};
+
+/*
+ * return with iinfo is write-locked
+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
+ * end
+ */
+static struct file *au_write_pre(struct file *file, int do_ready,
+				 struct au_write_pre *wpre)
+{
+	struct file *h_file;
+	struct dentry *dentry;
+	int err;
+	unsigned int lsc;
+	struct au_pin pin;
+
+	lsc = 0;
+	if (wpre)
+		lsc = wpre->lsc;
+	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc);
+	h_file = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	if (do_ready) {
+		err = au_ready_to_write(file, -1, &pin);
+		if (unlikely(err)) {
+			h_file = ERR_PTR(err);
+			di_write_unlock(dentry);
+			goto out_fi;
+		}
+	}
+
+	di_downgrade_lock(dentry, /*flags*/0);
+	if (wpre)
+		wpre->btop = au_fbtop(file);
+	h_file = au_hf_top(file);
+	get_file(h_file);
+	if (wpre)
+		wpre->blks = file_inode(h_file)->i_blocks;
+	if (do_ready)
+		au_unpin(&pin);
+	di_read_unlock(dentry, /*flags*/0);
+
+out_fi:
+	fi_write_unlock(file);
+out:
+	return h_file;
+}
+
+static void au_write_post(struct inode *inode, struct file *h_file,
+			  struct au_write_pre *wpre, ssize_t written)
+{
+	struct inode *h_inode;
+
+	au_cpup_attr_timesizes(inode);
+	AuDebugOn(au_ibtop(inode) != wpre->btop);
+	h_inode = file_inode(h_file);
+	inode->i_mode = h_inode->i_mode;
+	ii_write_unlock(inode);
+	/* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
+	if (written > 0)
+		au_fhsm_wrote(inode->i_sb, wpre->btop,
+			      /*force*/h_inode->i_blocks > wpre->blks);
+	fput(h_file);
+}
+
+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	ssize_t err;
+	struct inode *inode;
+	struct file *h_file;
+	struct super_block *sb;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* filedata may be obsoleted by concurrent copyup, but no problem */
+	err = vfsub_read_u(h_file, buf, count, ppos);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/*
+ * todo: very ugly
+ * it locks both of i_mutex and si_rwsem for read in safe.
+ * if the plink maintenance mode continues forever (that is the problem),
+ * may loop forever.
+ */
+static void au_mtx_and_read_lock(struct inode *inode)
+{
+	int err;
+	struct super_block *sb = inode->i_sb;
+
+	while (1) {
+		inode_lock(inode);
+		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (!err)
+			break;
+		inode_unlock(inode);
+		si_read_lock(sb, AuLock_NOPLMW);
+		si_read_unlock(sb);
+	}
+}
+
+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
+			  size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+	char __user *buf = (char __user *)ubuf;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_write_u(h_file, buf, count, ppos);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
+			  struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct file *file;
+	ssize_t (*iter)(struct kiocb *, struct iov_iter *);
+
+	err = security_file_permission(h_file, rw);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOSYS;
+	iter = NULL;
+	if (rw == MAY_READ)
+		iter = h_file->f_op->read_iter;
+	else if (rw == MAY_WRITE)
+		iter = h_file->f_op->write_iter;
+
+	file = kio->ki_filp;
+	kio->ki_filp = h_file;
+	if (iter) {
+		lockdep_off();
+		err = iter(kio, iov_iter);
+		lockdep_on();
+	} else
+		/* currently there is no such fs */
+		WARN_ON_ONCE(1);
+	kio->ki_filp = file;
+
+out:
+	return err;
+}
+
+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct file *file, *h_file;
+	struct inode *inode;
+	struct super_block *sb;
+
+	file = kio->ki_filp;
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	if (au_test_loopback_kthread()) {
+		au_warn_loopback(h_file->f_path.dentry->d_sb);
+		if (file->f_mapping != h_file->f_mapping) {
+			file->f_mapping = h_file->f_mapping;
+			smp_mb(); /* unnecessary? */
+		}
+	}
+	fi_read_unlock(file);
+
+	err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *file, *h_file;
+
+	file = kio->ki_filp;
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
+				struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags)
+{
+	ssize_t err;
+	struct file *h_file;
+	struct inode *inode;
+	struct super_block *sb;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
+	/* todo: necessasry? */
+	/* file->f_ra = h_file->f_ra; */
+	au_read_post(inode, h_file);
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static ssize_t
+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
+		  size_t len, unsigned int flags)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
+	au_write_post(inode, h_file, &wpre, err);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
+			   loff_t len)
+{
+	long err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	lockdep_off();
+	err = vfs_fallocate(h_file, mode, offset, len);
+	lockdep_on();
+	au_write_post(inode, h_file, &wpre, /*written*/1);
+
+out:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+	return err;
+}
+
+static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
+				    struct file *dst, loff_t dst_pos,
+				    size_t len, unsigned int flags)
+{
+	ssize_t err;
+	struct au_write_pre wpre;
+	enum { SRC, DST };
+	struct {
+		struct inode *inode;
+		struct file *h_file;
+		struct super_block *h_sb;
+	} a[2];
+#define a_src	a[SRC]
+#define a_dst	a[DST]
+
+	err = -EINVAL;
+	a_src.inode = file_inode(src);
+	if (unlikely(!S_ISREG(a_src.inode->i_mode)))
+		goto out;
+	a_dst.inode = file_inode(dst);
+	if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
+		goto out;
+
+	au_mtx_and_read_lock(a_dst.inode);
+	/*
+	 * in order to match the order in di_write_lock2_{child,parent}(),
+	 * use f_path.dentry for this comparision.
+	 */
+	if (src->f_path.dentry < dst->f_path.dentry) {
+		a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
+		err = PTR_ERR(a_src.h_file);
+		if (IS_ERR(a_src.h_file))
+			goto out_si;
+
+		wpre.lsc = AuLsc_FI_2;
+		a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
+		err = PTR_ERR(a_dst.h_file);
+		if (IS_ERR(a_dst.h_file)) {
+			au_read_post(a_src.inode, a_src.h_file);
+			goto out_si;
+		}
+	} else {
+		wpre.lsc = AuLsc_FI_1;
+		a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
+		err = PTR_ERR(a_dst.h_file);
+		if (IS_ERR(a_dst.h_file))
+			goto out_si;
+
+		a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
+		err = PTR_ERR(a_src.h_file);
+		if (IS_ERR(a_src.h_file)) {
+			au_write_post(a_dst.inode, a_dst.h_file, &wpre,
+				      /*written*/0);
+			goto out_si;
+		}
+	}
+
+	err = -EXDEV;
+	a_src.h_sb = file_inode(a_src.h_file)->i_sb;
+	a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
+	if (unlikely(a_src.h_sb != a_dst.h_sb)) {
+		AuDbgFile(src);
+		AuDbgFile(dst);
+		goto out_file;
+	}
+
+	err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
+				    dst_pos, len, flags);
+
+out_file:
+	au_write_post(a_dst.inode, a_dst.h_file, &wpre, err);
+	fi_read_unlock(src);
+	au_read_post(a_src.inode, a_src.h_file);
+out_si:
+	si_read_unlock(a_dst.inode->i_sb);
+	inode_unlock(a_dst.inode);
+out:
+	return err;
+#undef a_src
+#undef a_dst
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * The locking order around current->mmap_sem.
+ * - in most and regular cases
+ *   file I/O syscall -- aufs_read() or something
+ *	-- si_rwsem for read -- mmap_sem
+ *	(Note that [fdi]i_rwsem are released before mmap_sem).
+ * - in mmap case
+ *   mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
+ * It means that when aufs acquires si_rwsem for write, the process should never
+ * acquire mmap_sem.
+ *
+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
+ * problem either since any directory is not able to be mmap-ed.
+ * The similar scenario is applied to aufs_readlink() too.
+ */
+
+#if 0 /* stop calling security_file_mmap() */
+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
+#define AuConv_VM_PROT(f, b)	_calc_vm_trans(f, VM_##b, PROT_##b)
+
+static unsigned long au_arch_prot_conv(unsigned long flags)
+{
+	/* currently ppc64 only */
+#ifdef CONFIG_PPC64
+	/* cf. linux/arch/powerpc/include/asm/mman.h */
+	AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
+	return AuConv_VM_PROT(flags, SAO);
+#else
+	AuDebugOn(arch_calc_vm_prot_bits(-1));
+	return 0;
+#endif
+}
+
+static unsigned long au_prot_conv(unsigned long flags)
+{
+	return AuConv_VM_PROT(flags, READ)
+		| AuConv_VM_PROT(flags, WRITE)
+		| AuConv_VM_PROT(flags, EXEC)
+		| au_arch_prot_conv(flags);
+}
+
+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
+#define AuConv_VM_MAP(f, b)	_calc_vm_trans(f, VM_##b, MAP_##b)
+
+static unsigned long au_flag_conv(unsigned long flags)
+{
+	return AuConv_VM_MAP(flags, GROWSDOWN)
+		| AuConv_VM_MAP(flags, DENYWRITE)
+		| AuConv_VM_MAP(flags, LOCKED);
+}
+#endif
+
+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err;
+	const unsigned char wlock
+		= (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
+	struct super_block *sb;
+	struct file *h_file;
+	struct inode *inode;
+
+	AuDbgVmRegion(file, vma);
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	lockdep_off();
+	si_read_lock(sb, AuLock_NOPLMW);
+
+	h_file = au_write_pre(file, wlock, /*wpre*/NULL);
+	lockdep_on();
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	err = 0;
+	au_set_mmapped(file);
+	au_vm_file_reset(vma, h_file);
+	/*
+	 * we cannot call security_mmap_file() here since it may acquire
+	 * mmap_sem or i_mutex.
+	 *
+	 * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
+	 *			 au_flag_conv(vma->vm_flags));
+	 */
+	if (!err)
+		err = call_mmap(h_file, vma);
+	if (!err) {
+		au_vm_prfile_set(vma, file);
+		fsstack_copy_attr_atime(inode, file_inode(h_file));
+		goto out_fput; /* success */
+	}
+	au_unset_mmapped(file);
+	au_vm_file_reset(vma, file);
+
+out_fput:
+	lockdep_off();
+	ii_write_unlock(inode);
+	lockdep_on();
+	fput(h_file);
+out:
+	lockdep_off();
+	si_read_unlock(sb);
+	lockdep_on();
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	int err;
+	struct au_write_pre wpre;
+	struct inode *inode;
+	struct file *h_file;
+
+	err = 0; /* -EBADF; */ /* posix? */
+	if (unlikely(!(file->f_mode & FMODE_WRITE)))
+		goto out;
+
+	inode = file_inode(file);
+	au_mtx_and_read_lock(inode);
+
+	wpre.lsc = 0;
+	h_file = au_write_pre(file, /*do_ready*/1, &wpre);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out_unlock;
+
+	err = vfsub_fsync(h_file, &h_file->f_path, datasync);
+	au_write_post(inode, h_file, &wpre, /*written*/0);
+
+out_unlock:
+	si_read_unlock(inode->i_sb);
+	inode_unlock(inode);
+out:
+	return err;
+}
+
+static int aufs_fasync(int fd, struct file *file, int flag)
+{
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	if (h_file->f_op->fasync)
+		err = h_file->f_op->fasync(fd, h_file, flag);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+static int aufs_setfl(struct file *file, unsigned long arg)
+{
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* stop calling h_file->fasync */
+	arg |= vfsub_file_flags(file) & FASYNC;
+	err = setfl(/*unused fd*/-1, h_file, arg);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* no one supports this operation, currently */
+#if 0
+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
+			     size_t len, loff_t *pos, int more)
+{
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+const struct file_operations aufs_file_fop = {
+	.owner		= THIS_MODULE,
+
+	.llseek		= default_llseek,
+
+	.read		= aufs_read,
+	.write		= aufs_write,
+	.read_iter	= aufs_read_iter,
+	.write_iter	= aufs_write_iter,
+
+#ifdef CONFIG_AUFS_POLL
+	.poll		= aufs_poll,
+#endif
+	.unlocked_ioctl	= aufs_ioctl_nondir,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= aufs_compat_ioctl_nondir,
+#endif
+	.mmap		= aufs_mmap,
+	.open		= aufs_open_nondir,
+	.flush		= aufs_flush_nondir,
+	.release	= aufs_release_nondir,
+	.fsync		= aufs_fsync_nondir,
+	.fasync		= aufs_fasync,
+	/* .sendpage	= aufs_sendpage, */
+	.setfl		= aufs_setfl,
+	.splice_write	= aufs_splice_write,
+	.splice_read	= aufs_splice_read,
+#if 0
+	.aio_splice_write = aufs_aio_splice_write,
+	.aio_splice_read  = aufs_aio_splice_read,
+#endif
+	.fallocate	= aufs_fallocate,
+	.copy_file_range = aufs_copy_file_range
+};
diff --git a/include/fs/aufs/fhsm.c b/include/fs/aufs/fhsm.c
new file mode 100644
index 000000000..cf0503baa
--- /dev/null
+++ b/include/fs/aufs/fhsm.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2011-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/*
+ * File-based Hierarchy Storage Management
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	AuDebugOn(!fhsm);
+	return fhsm->fhsm_bottom;
+}
+
+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	AuDebugOn(!fhsm);
+	fhsm->fhsm_bottom = bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
+{
+	struct au_br_fhsm *bf;
+
+	bf = br->br_fhsm;
+	MtxMustLock(&bf->bf_lock);
+
+	return !bf->bf_readable
+		|| time_after(jiffies,
+			      bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_fhsm_notify(struct super_block *sb, int val)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	if (au_fhsm_pid(fhsm)
+	    && atomic_read(&fhsm->fhsm_readable) != -1) {
+		atomic_set(&fhsm->fhsm_readable, val);
+		if (val)
+			wake_up(&fhsm->fhsm_wqh);
+	}
+}
+
+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
+			struct aufs_stfs *rstfs, int do_lock, int do_notify)
+{
+	int err;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	br = au_sbr(sb, bindex);
+	AuDebugOn(au_br_rdonly(br));
+	bf = br->br_fhsm;
+	AuDebugOn(!bf);
+
+	if (do_lock)
+		mutex_lock(&bf->bf_lock);
+	else
+		MtxMustLock(&bf->bf_lock);
+
+	/* sb->s_root for NFS is unreliable */
+	err = au_br_stfs(br, &bf->bf_stfs);
+	if (unlikely(err)) {
+		AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
+		goto out;
+	}
+
+	bf->bf_jiffy = jiffies;
+	bf->bf_readable = 1;
+	if (do_notify)
+		au_fhsm_notify(sb, /*val*/1);
+	if (rstfs)
+		*rstfs = bf->bf_stfs;
+
+out:
+	if (do_lock)
+		mutex_unlock(&bf->bf_lock);
+	au_fhsm_notify(sb, /*val*/1);
+
+	return err;
+}
+
+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	AuDbg("b%d, force %d\n", bindex, force);
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	if (!au_ftest_si(sbinfo, FHSM)
+	    || fhsm->fhsm_bottom == bindex)
+		return;
+
+	br = au_sbr(sb, bindex);
+	bf = br->br_fhsm;
+	AuDebugOn(!bf);
+	mutex_lock(&bf->bf_lock);
+	if (force
+	    || au_fhsm_pid(fhsm)
+	    || au_fhsm_test_jiffy(sbinfo, br))
+		err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
+				  /*do_notify*/1);
+	mutex_unlock(&bf->bf_lock);
+}
+
+void au_fhsm_wrote_all(struct super_block *sb, int force)
+{
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	/* exclude the bottom */
+	bbot = au_fhsm_bottom(sb);
+	for (bindex = 0; bindex < bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_fhsm(br->br_perm))
+			au_fhsm_wrote(sb, bindex, force);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static unsigned int au_fhsm_poll(struct file *file,
+				 struct poll_table_struct *wait)
+{
+	unsigned int mask;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	mask = 0;
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+	poll_wait(file, &fhsm->fhsm_wqh, wait);
+	if (atomic_read(&fhsm->fhsm_readable))
+		mask = POLLIN /* | POLLRDNORM */;
+
+	if (!mask)
+		AuDbg("mask 0x%x\n", mask);
+	return mask;
+}
+
+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
+			      struct aufs_stfs *stfs, __s16 brid)
+{
+	int err;
+
+	err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
+	if (!err)
+		err = __put_user(brid, &stbr->brid);
+	if (unlikely(err))
+		err = -EFAULT;
+
+	return err;
+}
+
+static ssize_t au_fhsm_do_read(struct super_block *sb,
+			       struct aufs_stbr __user *stbr, size_t count)
+{
+	ssize_t err;
+	int nstbr;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+	struct au_br_fhsm *bf;
+
+	/* except the bottom branch */
+	err = 0;
+	nstbr = 0;
+	bbot = au_fhsm_bottom(sb);
+	for (bindex = 0; !err && bindex < bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!au_br_fhsm(br->br_perm))
+			continue;
+
+		bf = br->br_fhsm;
+		mutex_lock(&bf->bf_lock);
+		if (bf->bf_readable) {
+			err = -EFAULT;
+			if (count >= sizeof(*stbr))
+				err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
+							  br->br_id);
+			if (!err) {
+				bf->bf_readable = 0;
+				count -= sizeof(*stbr);
+				nstbr++;
+			}
+		}
+		mutex_unlock(&bf->bf_lock);
+	}
+	if (!err)
+		err = sizeof(*stbr) * nstbr;
+
+	return err;
+}
+
+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
+			   loff_t *pos)
+{
+	ssize_t err;
+	int readable;
+	aufs_bindex_t nfhsm, bindex, bbot;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	struct au_branch *br;
+	struct super_block *sb;
+
+	err = 0;
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+need_data:
+	spin_lock_irq(&fhsm->fhsm_wqh.lock);
+	if (!atomic_read(&fhsm->fhsm_readable)) {
+		if (vfsub_file_flags(file) & O_NONBLOCK)
+			err = -EAGAIN;
+		else
+			err = wait_event_interruptible_locked_irq
+				(fhsm->fhsm_wqh,
+				 atomic_read(&fhsm->fhsm_readable));
+	}
+	spin_unlock_irq(&fhsm->fhsm_wqh.lock);
+	if (unlikely(err))
+		goto out;
+
+	/* sb may already be dead */
+	au_rw_read_lock(&sbinfo->si_rwsem);
+	readable = atomic_read(&fhsm->fhsm_readable);
+	if (readable > 0) {
+		sb = sbinfo->si_sb;
+		AuDebugOn(!sb);
+		/* exclude the bottom branch */
+		nfhsm = 0;
+		bbot = au_fhsm_bottom(sb);
+		for (bindex = 0; bindex < bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm))
+				nfhsm++;
+		}
+		err = -EMSGSIZE;
+		if (nfhsm * sizeof(struct aufs_stbr) <= count) {
+			atomic_set(&fhsm->fhsm_readable, 0);
+			err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
+					     count);
+		}
+	}
+	au_rw_read_unlock(&sbinfo->si_rwsem);
+	if (!readable)
+		goto need_data;
+
+out:
+	return err;
+}
+
+static int au_fhsm_release(struct inode *inode, struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	/* sb may already be dead */
+	sbinfo = file->private_data;
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock(&fhsm->fhsm_spin);
+	fhsm->fhsm_pid = 0;
+	spin_unlock(&fhsm->fhsm_spin);
+	kobject_put(&sbinfo->si_kobj);
+
+	return 0;
+}
+
+static const struct file_operations au_fhsm_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= noop_llseek,
+	.read		= au_fhsm_read,
+	.poll		= au_fhsm_poll,
+	.release	= au_fhsm_release
+};
+
+int au_fhsm_fd(struct super_block *sb, int oflags)
+{
+	int err, fd;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
+		goto out;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock(&fhsm->fhsm_spin);
+	if (!fhsm->fhsm_pid)
+		fhsm->fhsm_pid = current->pid;
+	else
+		err = -EBUSY;
+	spin_unlock(&fhsm->fhsm_spin);
+	if (unlikely(err))
+		goto out;
+
+	oflags |= O_RDONLY;
+	/* oflags |= FMODE_NONOTIFY; */
+	fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
+	err = fd;
+	if (unlikely(fd < 0))
+		goto out_pid;
+
+	/* succeed reglardless 'fhsm' status */
+	kobject_get(&sbinfo->si_kobj);
+	si_noflush_read_lock(sb);
+	if (au_ftest_si(sbinfo, FHSM))
+		au_fhsm_wrote_all(sb, /*force*/0);
+	si_read_unlock(sb);
+	goto out; /* success */
+
+out_pid:
+	spin_lock(&fhsm->fhsm_spin);
+	fhsm->fhsm_pid = 0;
+	spin_unlock(&fhsm->fhsm_spin);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_fhsm_br_alloc(struct au_branch *br)
+{
+	int err;
+
+	err = 0;
+	br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
+	if (br->br_fhsm)
+		au_br_fhsm_init(br->br_fhsm);
+	else
+		err = -ENOMEM;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_fhsm_fin(struct super_block *sb)
+{
+	au_fhsm_notify(sb, /*val*/-1);
+}
+
+void au_fhsm_init(struct au_sbinfo *sbinfo)
+{
+	struct au_fhsm *fhsm;
+
+	fhsm = &sbinfo->si_fhsm;
+	spin_lock_init(&fhsm->fhsm_spin);
+	init_waitqueue_head(&fhsm->fhsm_wqh);
+	atomic_set(&fhsm->fhsm_readable, 0);
+	fhsm->fhsm_expire
+		= msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
+	fhsm->fhsm_bottom = -1;
+}
+
+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
+{
+	sbinfo->si_fhsm.fhsm_expire
+		= msecs_to_jiffies(sec * MSEC_PER_SEC);
+}
+
+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
+{
+	unsigned int u;
+
+	if (!au_ftest_si(sbinfo, FHSM))
+		return;
+
+	u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
+	if (u != AUFS_FHSM_CACHE_DEF_SEC)
+		seq_printf(seq, ",fhsm_sec=%u", u);
+}
diff --git a/include/fs/aufs/file.c b/include/fs/aufs/file.c
new file mode 100644
index 000000000..4646b4b02
--- /dev/null
+++ b/include/fs/aufs/file.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * handling file/dir, and address_space operation
+ */
+
+#ifdef CONFIG_AUFS_DEBUG
+#include <linux/migrate.h>
+#endif
+#include <linux/pagemap.h>
+#include "aufs.h"
+
+/* drop flags for writing */
+unsigned int au_file_roflags(unsigned int flags)
+{
+	flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
+	flags |= O_RDONLY | O_NOATIME;
+	return flags;
+}
+
+/* common functions to regular file and dir */
+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
+		       struct file *file, int force_wr)
+{
+	struct file *h_file;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct path h_path;
+	int err;
+
+	/* a race condition can happen between open and unlink/rmdir */
+	h_file = ERR_PTR(-ENOENT);
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
+		goto out;
+	h_inode = d_inode(h_dentry);
+	spin_lock(&h_dentry->d_lock);
+	err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
+		/* || !d_inode(dentry)->i_nlink */
+		;
+	spin_unlock(&h_dentry->d_lock);
+	if (unlikely(err))
+		goto out;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, bindex);
+	err = au_br_test_oflag(flags, br);
+	h_file = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	/* drop flags for writing */
+	if (au_test_ro(sb, bindex, d_inode(dentry))) {
+		if (force_wr && !(flags & O_WRONLY))
+			force_wr = 0;
+		flags = au_file_roflags(flags);
+		if (force_wr) {
+			h_file = ERR_PTR(-EROFS);
+			flags = au_file_roflags(flags);
+			if (unlikely(vfsub_native_ro(h_inode)
+				     || IS_APPEND(h_inode)))
+				goto out;
+			flags &= ~O_ACCMODE;
+			flags |= O_WRONLY;
+		}
+	}
+	flags &= ~O_CREAT;
+	au_br_get(br);
+	h_path.dentry = h_dentry;
+	h_path.mnt = au_br_mnt(br);
+	h_file = vfsub_dentry_open(&h_path, flags);
+	if (IS_ERR(h_file))
+		goto out_br;
+
+	if (flags & __FMODE_EXEC) {
+		err = deny_write_access(h_file);
+		if (unlikely(err)) {
+			fput(h_file);
+			h_file = ERR_PTR(err);
+			goto out_br;
+		}
+	}
+	fsnotify_open(h_file);
+	goto out; /* success */
+
+out_br:
+	au_br_put(br);
+out:
+	return h_file;
+}
+
+static int au_cmoo(struct dentry *dentry)
+{
+	int err, cmoo, matched;
+	unsigned int udba;
+	struct path h_path;
+	struct au_pin pin;
+	struct au_cp_generic cpg = {
+		.dentry	= dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= -1,
+		.pin	= &pin,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN
+	};
+	struct inode *delegated;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct au_fhsm *fhsm;
+	pid_t pid;
+	struct au_branch *br;
+	struct dentry *parent;
+	struct au_hinode *hdir;
+
+	DiMustWriteLock(dentry);
+	IiMustWriteLock(d_inode(dentry));
+
+	err = 0;
+	if (IS_ROOT(dentry))
+		goto out;
+	cpg.bsrc = au_dbtop(dentry);
+	if (!cpg.bsrc)
+		goto out;
+
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	fhsm = &sbinfo->si_fhsm;
+	pid = au_fhsm_pid(fhsm);
+	rcu_read_lock();
+	matched = (pid
+		   && (current->pid == pid
+		       || rcu_dereference(current->real_parent)->pid == pid));
+	rcu_read_unlock();
+	if (matched)
+		goto out;
+
+	br = au_sbr(sb, cpg.bsrc);
+	cmoo = au_br_cmoo(br->br_perm);
+	if (!cmoo)
+		goto out;
+	if (!d_is_reg(dentry))
+		cmoo &= AuBrAttr_COO_ALL;
+	if (!cmoo)
+		goto out;
+
+	parent = dget_parent(dentry);
+	di_write_lock_parent(parent);
+	err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
+	cpg.bdst = err;
+	if (unlikely(err < 0)) {
+		err = 0;	/* there is no upper writable branch */
+		goto out_dgrade;
+	}
+	AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
+
+	/* do not respect the coo attrib for the target branch */
+	err = au_cpup_dirs(dentry, cpg.bdst);
+	if (unlikely(err))
+		goto out_dgrade;
+
+	di_downgrade_lock(parent, AuLock_IR);
+	udba = au_opt_udba(sb);
+	err = au_pin(&pin, dentry, cpg.bdst, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_parent;
+
+	err = au_sio_cpup_simple(&cpg);
+	au_unpin(&pin);
+	if (unlikely(err))
+		goto out_parent;
+	if (!(cmoo & AuBrWAttr_MOO))
+		goto out_parent; /* success */
+
+	err = au_pin(&pin, dentry, cpg.bsrc, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_parent;
+
+	h_path.mnt = au_br_mnt(br);
+	h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
+	hdir = au_hi(d_inode(parent), cpg.bsrc);
+	delegated = NULL;
+	err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
+	au_unpin(&pin);
+	/* todo: keep h_dentry or not? */
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err)) {
+		pr_err("unlink %pd after coo failed (%d), ignored\n",
+		       dentry, err);
+		err = 0;
+	}
+	goto out_parent; /* success */
+
+out_dgrade:
+	di_downgrade_lock(parent, AuLock_IR);
+out_parent:
+	di_read_unlock(parent, AuLock_IR);
+	dput(parent);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_do_open(struct file *file, struct au_do_open_args *args)
+{
+	int err, aopen = args->aopen;
+	struct dentry *dentry;
+	struct au_finfo *finfo;
+
+	if (!aopen)
+		err = au_finfo_init(file, args->fidir);
+	else {
+		lockdep_off();
+		err = au_finfo_init(file, args->fidir);
+		lockdep_on();
+	}
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	AuDebugOn(IS_ERR_OR_NULL(dentry));
+	di_write_lock_child(dentry);
+	err = au_cmoo(dentry);
+	di_downgrade_lock(dentry, AuLock_IR);
+	if (!err) {
+		if (!aopen)
+			err = args->open(file, vfsub_file_flags(file), NULL);
+		else {
+			lockdep_off();
+			err = args->open(file, vfsub_file_flags(file), NULL);
+			lockdep_on();
+		}
+	}
+	di_read_unlock(dentry, AuLock_IR);
+
+	finfo = au_fi(file);
+	if (!err) {
+		finfo->fi_file = file;
+		au_hbl_add(&finfo->fi_hlist,
+			   &au_sbi(file->f_path.dentry->d_sb)->si_files);
+	}
+	if (!aopen)
+		fi_write_unlock(file);
+	else {
+		lockdep_off();
+		fi_write_unlock(file);
+		lockdep_on();
+	}
+	if (unlikely(err)) {
+		finfo->fi_hdir = NULL;
+		au_finfo_fin(file);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_reopen_nondir(struct file *file)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct dentry *dentry;
+	struct file *h_file, *h_file_tmp;
+
+	dentry = file->f_path.dentry;
+	btop = au_dbtop(dentry);
+	h_file_tmp = NULL;
+	if (au_fbtop(file) == btop) {
+		h_file = au_hf_top(file);
+		if (file->f_mode == h_file->f_mode)
+			return 0; /* success */
+		h_file_tmp = h_file;
+		get_file(h_file_tmp);
+		au_set_h_fptr(file, btop, NULL);
+	}
+	AuDebugOn(au_fi(file)->fi_hdir);
+	/*
+	 * it can happen
+	 * file exists on both of rw and ro
+	 * open --> dbtop and fbtop are both 0
+	 * prepend a branch as rw, "rw" become ro
+	 * remove rw/file
+	 * delete the top branch, "rw" becomes rw again
+	 *	--> dbtop is 1, fbtop is still 0
+	 * write --> fbtop is 0 but dbtop is 1
+	 */
+	/* AuDebugOn(au_fbtop(file) < btop); */
+
+	h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
+			   file, /*force_wr*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file)) {
+		if (h_file_tmp) {
+			au_sbr_get(dentry->d_sb, btop);
+			au_set_h_fptr(file, btop, h_file_tmp);
+			h_file_tmp = NULL;
+		}
+		goto out; /* todo: close all? */
+	}
+
+	err = 0;
+	au_set_fbtop(file, btop);
+	au_set_h_fptr(file, btop, h_file);
+	au_update_figen(file);
+	/* todo: necessary? */
+	/* file->f_ra = h_file->f_ra; */
+
+out:
+	if (h_file_tmp)
+		fput(h_file_tmp);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
+			struct dentry *hi_wh)
+{
+	int err;
+	aufs_bindex_t btop;
+	struct au_dinfo *dinfo;
+	struct dentry *h_dentry;
+	struct au_hdentry *hdp;
+
+	dinfo = au_di(file->f_path.dentry);
+	AuRwMustWriteLock(&dinfo->di_rwsem);
+
+	btop = dinfo->di_btop;
+	dinfo->di_btop = btgt;
+	hdp = au_hdentry(dinfo, btgt);
+	h_dentry = hdp->hd_dentry;
+	hdp->hd_dentry = hi_wh;
+	err = au_reopen_nondir(file);
+	hdp->hd_dentry = h_dentry;
+	dinfo->di_btop = btop;
+
+	return err;
+}
+
+static int au_ready_to_write_wh(struct file *file, loff_t len,
+				aufs_bindex_t bcpup, struct au_pin *pin)
+{
+	int err;
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry, *hi_wh;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= bcpup,
+		.bsrc	= -1,
+		.len	= len,
+		.pin	= pin
+	};
+
+	au_update_dbtop(cpg.dentry);
+	inode = d_inode(cpg.dentry);
+	h_inode = NULL;
+	if (au_dbtop(cpg.dentry) <= bcpup
+	    && au_dbbot(cpg.dentry) >= bcpup) {
+		h_dentry = au_h_dptr(cpg.dentry, bcpup);
+		if (h_dentry && d_is_positive(h_dentry))
+			h_inode = d_inode(h_dentry);
+	}
+	hi_wh = au_hi_wh(inode, bcpup);
+	if (!hi_wh && !h_inode)
+		err = au_sio_cpup_wh(&cpg, file);
+	else
+		/* already copied-up after unlink */
+		err = au_reopen_wh(file, bcpup, hi_wh);
+
+	if (!err
+	    && (inode->i_nlink > 1
+		|| (inode->i_state & I_LINKABLE))
+	    && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
+		au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
+
+	return err;
+}
+
+/*
+ * prepare the @file for writing.
+ */
+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
+{
+	int err;
+	aufs_bindex_t dbtop;
+	struct dentry *parent;
+	struct inode *inode;
+	struct super_block *sb;
+	struct file *h_file;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= len,
+		.pin	= pin,
+		.flags	= AuCpup_DTIME
+	};
+
+	sb = cpg.dentry->d_sb;
+	inode = d_inode(cpg.dentry);
+	cpg.bsrc = au_fbtop(file);
+	err = au_test_ro(sb, cpg.bsrc, inode);
+	if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
+		err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
+			     /*flags*/0);
+		goto out;
+	}
+
+	/* need to cpup or reopen */
+	parent = dget_parent(cpg.dentry);
+	di_write_lock_parent(parent);
+	err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
+	cpg.bdst = err;
+	if (unlikely(err < 0))
+		goto out_dgrade;
+	err = 0;
+
+	if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
+		err = au_cpup_dirs(cpg.dentry, cpg.bdst);
+		if (unlikely(err))
+			goto out_dgrade;
+	}
+
+	err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out_dgrade;
+
+	dbtop = au_dbtop(cpg.dentry);
+	if (dbtop <= cpg.bdst)
+		cpg.bsrc = cpg.bdst;
+
+	if (dbtop <= cpg.bdst		/* just reopen */
+	    || !d_unhashed(cpg.dentry)	/* copyup and reopen */
+		) {
+		h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
+		if (IS_ERR(h_file))
+			err = PTR_ERR(h_file);
+		else {
+			di_downgrade_lock(parent, AuLock_IR);
+			if (dbtop > cpg.bdst)
+				err = au_sio_cpup_simple(&cpg);
+			if (!err)
+				err = au_reopen_nondir(file);
+			au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
+		}
+	} else {			/* copyup as wh and reopen */
+		/*
+		 * since writable hfsplus branch is not supported,
+		 * h_open_pre/post() are unnecessary.
+		 */
+		err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
+		di_downgrade_lock(parent, AuLock_IR);
+	}
+
+	if (!err) {
+		au_pin_set_parent_lflag(pin, /*lflag*/0);
+		goto out_dput; /* success */
+	}
+	au_unpin(pin);
+	goto out_unlock;
+
+out_dgrade:
+	di_downgrade_lock(parent, AuLock_IR);
+out_unlock:
+	di_read_unlock(parent, AuLock_IR);
+out_dput:
+	dput(parent);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_do_flush(struct file *file, fl_owner_t id,
+		int (*flush)(struct file *file, fl_owner_t id))
+{
+	int err;
+	struct super_block *sb;
+	struct inode *inode;
+
+	inode = file_inode(file);
+	sb = inode->i_sb;
+	si_noflush_read_lock(sb);
+	fi_read_lock(file);
+	ii_read_lock_child(inode);
+
+	err = flush(file, id);
+	au_cpup_attr_timesizes(inode);
+
+	ii_read_unlock(inode);
+	fi_read_unlock(file);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
+{
+	int err;
+	struct au_pin pin;
+	struct au_finfo *finfo;
+	struct dentry *parent, *hi_wh;
+	struct inode *inode;
+	struct super_block *sb;
+	struct au_cp_generic cpg = {
+		.dentry	= file->f_path.dentry,
+		.bdst	= -1,
+		.bsrc	= -1,
+		.len	= -1,
+		.pin	= &pin,
+		.flags	= AuCpup_DTIME
+	};
+
+	FiMustWriteLock(file);
+
+	err = 0;
+	finfo = au_fi(file);
+	sb = cpg.dentry->d_sb;
+	inode = d_inode(cpg.dentry);
+	cpg.bdst = au_ibtop(inode);
+	if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
+		goto out;
+
+	parent = dget_parent(cpg.dentry);
+	if (au_test_ro(sb, cpg.bdst, inode)) {
+		di_read_lock_parent(parent, !AuLock_IR);
+		err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
+		cpg.bdst = err;
+		di_read_unlock(parent, !AuLock_IR);
+		if (unlikely(err < 0))
+			goto out_parent;
+		err = 0;
+	}
+
+	di_read_lock_parent(parent, AuLock_IR);
+	hi_wh = au_hi_wh(inode, cpg.bdst);
+	if (!S_ISDIR(inode->i_mode)
+	    && au_opt_test(au_mntflags(sb), PLINK)
+	    && au_plink_test(inode)
+	    && !d_unhashed(cpg.dentry)
+	    && cpg.bdst < au_dbtop(cpg.dentry)) {
+		err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
+		if (unlikely(err))
+			goto out_unlock;
+
+		/* always superio. */
+		err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+	} else if (hi_wh) {
+		/* already copied-up after unlink */
+		err = au_reopen_wh(file, cpg.bdst, hi_wh);
+		*need_reopen = 0;
+	}
+
+out_unlock:
+	di_read_unlock(parent, AuLock_IR);
+out_parent:
+	dput(parent);
+out:
+	return err;
+}
+
+static void au_do_refresh_dir(struct file *file)
+{
+	aufs_bindex_t bindex, bbot, new_bindex, brid;
+	struct au_hfile *p, tmp, *q;
+	struct au_finfo *finfo;
+	struct super_block *sb;
+	struct au_fidir *fidir;
+
+	FiMustWriteLock(file);
+
+	sb = file->f_path.dentry->d_sb;
+	finfo = au_fi(file);
+	fidir = finfo->fi_hdir;
+	AuDebugOn(!fidir);
+	p = fidir->fd_hfile + finfo->fi_btop;
+	brid = p->hf_br->br_id;
+	bbot = fidir->fd_bbot;
+	for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
+		if (!p->hf_file)
+			continue;
+
+		new_bindex = au_br_index(sb, p->hf_br->br_id);
+		if (new_bindex == bindex)
+			continue;
+		if (new_bindex < 0) {
+			au_set_h_fptr(file, bindex, NULL);
+			continue;
+		}
+
+		/* swap two lower inode, and loop again */
+		q = fidir->fd_hfile + new_bindex;
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hf_file) {
+			bindex--;
+			p--;
+		}
+	}
+
+	p = fidir->fd_hfile;
+	if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
+		bbot = au_sbbot(sb);
+		for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
+		     finfo->fi_btop++, p++)
+			if (p->hf_file) {
+				if (file_inode(p->hf_file))
+					break;
+				au_hfput(p, /*execed*/0);
+			}
+	} else {
+		bbot = au_br_index(sb, brid);
+		for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
+		     finfo->fi_btop++, p++)
+			if (p->hf_file)
+				au_hfput(p, /*execed*/0);
+		bbot = au_sbbot(sb);
+	}
+
+	p = fidir->fd_hfile + bbot;
+	for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
+	     fidir->fd_bbot--, p--)
+		if (p->hf_file) {
+			if (file_inode(p->hf_file))
+				break;
+			au_hfput(p, /*execed*/0);
+		}
+	AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
+}
+
+/*
+ * after branch manipulating, refresh the file.
+ */
+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
+{
+	int err, need_reopen, nbr;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *dentry;
+	struct super_block *sb;
+	struct au_finfo *finfo;
+	struct au_hfile *hfile;
+
+	dentry = file->f_path.dentry;
+	sb = dentry->d_sb;
+	nbr = au_sbbot(sb) + 1;
+	finfo = au_fi(file);
+	if (!finfo->fi_hdir) {
+		hfile = &finfo->fi_htop;
+		AuDebugOn(!hfile->hf_file);
+		bindex = au_br_index(sb, hfile->hf_br->br_id);
+		AuDebugOn(bindex < 0);
+		if (bindex != finfo->fi_btop)
+			au_set_fbtop(file, bindex);
+	} else {
+		err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
+		if (unlikely(err))
+			goto out;
+		au_do_refresh_dir(file);
+	}
+
+	err = 0;
+	need_reopen = 1;
+	if (!au_test_mmapped(file))
+		err = au_file_refresh_by_inode(file, &need_reopen);
+	if (finfo->fi_hdir)
+		/* harmless if err */
+		au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
+	if (!err && need_reopen && !d_unlinked(dentry))
+		err = reopen(file);
+	if (!err) {
+		au_update_figen(file);
+		goto out; /* success */
+	}
+
+	/* error, close all lower files */
+	if (finfo->fi_hdir) {
+		bbot = au_fbbot_dir(file);
+		for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
+			au_set_h_fptr(file, bindex, NULL);
+	}
+
+out:
+	return err;
+}
+
+/* common function to regular file and dir */
+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
+			  int wlock, unsigned int fi_lsc)
+{
+	int err;
+	unsigned int sigen, figen;
+	aufs_bindex_t btop;
+	unsigned char pseudo_link;
+	struct dentry *dentry;
+	struct inode *inode;
+
+	err = 0;
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	sigen = au_sigen(dentry->d_sb);
+	fi_write_lock_nested(file, fi_lsc);
+	figen = au_figen(file);
+	if (!fi_lsc)
+		di_write_lock_child(dentry);
+	else
+		di_write_lock_child2(dentry);
+	btop = au_dbtop(dentry);
+	pseudo_link = (btop != au_ibtop(inode));
+	if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
+		if (!wlock) {
+			di_downgrade_lock(dentry, AuLock_IR);
+			fi_downgrade_lock(file);
+		}
+		goto out; /* success */
+	}
+
+	AuDbg("sigen %d, figen %d\n", sigen, figen);
+	if (au_digen_test(dentry, sigen)) {
+		err = au_reval_dpath(dentry, sigen);
+		AuDebugOn(!err && au_digen_test(dentry, sigen));
+	}
+
+	if (!err)
+		err = refresh_file(file, reopen);
+	if (!err) {
+		if (!wlock) {
+			di_downgrade_lock(dentry, AuLock_IR);
+			fi_downgrade_lock(file);
+		}
+	} else {
+		di_write_unlock(dentry);
+		fi_write_unlock(file);
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* cf. aufs_nopage() */
+/* for madvise(2) */
+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
+{
+	unlock_page(page);
+	return 0;
+}
+
+/* it will never be called, but necessary to support O_DIRECT */
+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{ BUG(); return 0; }
+
+/* they will never be called. */
+#ifdef CONFIG_AUFS_DEBUG
+static int aufs_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned len, unsigned flags,
+			    struct page **pagep, void **fsdata)
+{ AuUnsupport(); return 0; }
+static int aufs_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, unsigned len, unsigned copied,
+			  struct page *page, void *fsdata)
+{ AuUnsupport(); return 0; }
+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
+{ AuUnsupport(); return 0; }
+
+static int aufs_set_page_dirty(struct page *page)
+{ AuUnsupport(); return 0; }
+static void aufs_invalidatepage(struct page *page, unsigned int offset,
+				unsigned int length)
+{ AuUnsupport(); }
+static int aufs_releasepage(struct page *page, gfp_t gfp)
+{ AuUnsupport(); return 0; }
+#if 0 /* called by memory compaction regardless file */
+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
+			    struct page *page, enum migrate_mode mode)
+{ AuUnsupport(); return 0; }
+#endif
+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
+{ AuUnsupport(); return true; }
+static void aufs_putback_page(struct page *page)
+{ AuUnsupport(); }
+static int aufs_launder_page(struct page *page)
+{ AuUnsupport(); return 0; }
+static int aufs_is_partially_uptodate(struct page *page,
+				      unsigned long from,
+				      unsigned long count)
+{ AuUnsupport(); return 0; }
+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
+				    bool *writeback)
+{ AuUnsupport(); }
+static int aufs_error_remove_page(struct address_space *mapping,
+				  struct page *page)
+{ AuUnsupport(); return 0; }
+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
+			      sector_t *span)
+{ AuUnsupport(); return 0; }
+static void aufs_swap_deactivate(struct file *file)
+{ AuUnsupport(); }
+#endif /* CONFIG_AUFS_DEBUG */
+
+const struct address_space_operations aufs_aop = {
+	.readpage		= aufs_readpage,
+	.direct_IO		= aufs_direct_IO,
+#ifdef CONFIG_AUFS_DEBUG
+	.writepage		= aufs_writepage,
+	/* no writepages, because of writepage */
+	.set_page_dirty		= aufs_set_page_dirty,
+	/* no readpages, because of readpage */
+	.write_begin		= aufs_write_begin,
+	.write_end		= aufs_write_end,
+	/* no bmap, no block device */
+	.invalidatepage		= aufs_invalidatepage,
+	.releasepage		= aufs_releasepage,
+	/* is fallback_migrate_page ok? */
+	/* .migratepage		= aufs_migratepage, */
+	.isolate_page		= aufs_isolate_page,
+	.putback_page		= aufs_putback_page,
+	.launder_page		= aufs_launder_page,
+	.is_partially_uptodate	= aufs_is_partially_uptodate,
+	.is_dirty_writeback	= aufs_is_dirty_writeback,
+	.error_remove_page	= aufs_error_remove_page,
+	.swap_activate		= aufs_swap_activate,
+	.swap_deactivate	= aufs_swap_deactivate
+#endif /* CONFIG_AUFS_DEBUG */
+};
diff --git a/include/fs/aufs/file.h b/include/fs/aufs/file.h
new file mode 100644
index 000000000..ed79fa1df
--- /dev/null
+++ b/include/fs/aufs/file.h
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file operations
+ */
+
+#ifndef __AUFS_FILE_H__
+#define __AUFS_FILE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm_types.h>
+#include <linux/poll.h>
+#include "rwsem.h"
+
+struct au_branch;
+struct au_hfile {
+	struct file		*hf_file;
+	struct au_branch	*hf_br;
+};
+
+struct au_vdir;
+struct au_fidir {
+	aufs_bindex_t		fd_bbot;
+	aufs_bindex_t		fd_nent;
+	struct au_vdir		*fd_vdir_cache;
+	struct au_hfile		fd_hfile[];
+};
+
+static inline int au_fidir_sz(int nent)
+{
+	AuDebugOn(nent < 0);
+	return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
+}
+
+struct au_finfo {
+	atomic_t		fi_generation;
+
+	struct au_rwsem		fi_rwsem;
+	aufs_bindex_t		fi_btop;
+
+	/* do not union them */
+	struct {				/* for non-dir */
+		struct au_hfile			fi_htop;
+		atomic_t			fi_mmapped;
+	};
+	struct au_fidir		*fi_hdir;	/* for dir only */
+
+	struct hlist_bl_node	fi_hlist;
+	struct file		*fi_file;	/* very ugly */
+} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
+/* file.c */
+extern const struct address_space_operations aufs_aop;
+unsigned int au_file_roflags(unsigned int flags);
+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
+		       struct file *file, int force_wr);
+struct au_do_open_args {
+	int		aopen;
+	int		(*open)(struct file *file, int flags,
+				struct file *h_file);
+	struct au_fidir	*fidir;
+	struct file	*h_file;
+};
+int au_do_open(struct file *file, struct au_do_open_args *args);
+int au_reopen_nondir(struct file *file);
+struct au_pin;
+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
+			  int wlock, unsigned int fi_lsc);
+int au_do_flush(struct file *file, fl_owner_t id,
+		int (*flush)(struct file *file, fl_owner_t id));
+
+/* poll.c */
+#ifdef CONFIG_AUFS_POLL
+unsigned int aufs_poll(struct file *file, poll_table *wait);
+#endif
+
+#ifdef CONFIG_AUFS_BR_HFSPLUS
+/* hfsplus.c */
+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
+			   int force_wr);
+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
+		    struct file *h_file);
+#else
+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
+       aufs_bindex_t bindex, int force_wr)
+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
+	   struct file *h_file);
+#endif
+
+/* f_op.c */
+extern const struct file_operations aufs_file_fop;
+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
+
+/* finfo.c */
+void au_hfput(struct au_hfile *hf, int execed);
+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
+		   struct file *h_file);
+
+void au_update_figen(struct file *file);
+struct au_fidir *au_fidir_alloc(struct super_block *sb);
+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
+
+void au_fi_init_once(void *_fi);
+void au_finfo_fin(struct file *file);
+int au_finfo_init(struct file *file, struct au_fidir *fidir);
+
+/* ioctl.c */
+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
+			   unsigned long arg);
+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
+			      unsigned long arg);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_finfo *au_fi(struct file *file)
+{
+	return file->private_data;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define fi_read_lock(f)	au_rw_read_lock(&au_fi(f)->fi_rwsem)
+#define fi_write_lock(f)	au_rw_write_lock(&au_fi(f)->fi_rwsem)
+#define fi_read_trylock(f)	au_rw_read_trylock(&au_fi(f)->fi_rwsem)
+#define fi_write_trylock(f)	au_rw_write_trylock(&au_fi(f)->fi_rwsem)
+/*
+#define fi_read_trylock_nested(f) \
+	au_rw_read_trylock_nested(&au_fi(f)->fi_rwsem)
+#define fi_write_trylock_nested(f) \
+	au_rw_write_trylock_nested(&au_fi(f)->fi_rwsem)
+*/
+
+#define fi_read_unlock(f)	au_rw_read_unlock(&au_fi(f)->fi_rwsem)
+#define fi_write_unlock(f)	au_rw_write_unlock(&au_fi(f)->fi_rwsem)
+#define fi_downgrade_lock(f)	au_rw_dgrade_lock(&au_fi(f)->fi_rwsem)
+
+/* lock subclass for finfo */
+enum {
+	AuLsc_FI_1,
+	AuLsc_FI_2
+};
+
+static inline void fi_read_lock_nested(struct file *f, unsigned int lsc)
+{
+	au_rw_read_lock_nested(&au_fi(f)->fi_rwsem, lsc);
+}
+
+static inline void fi_write_lock_nested(struct file *f, unsigned int lsc)
+{
+	au_rw_write_lock_nested(&au_fi(f)->fi_rwsem, lsc);
+}
+
+/*
+ * fi_read_lock_1, fi_write_lock_1,
+ * fi_read_lock_2, fi_write_lock_2
+ */
+#define AuReadLockFunc(name) \
+static inline void fi_read_lock_##name(struct file *f) \
+{ fi_read_lock_nested(f, AuLsc_FI_##name); }
+
+#define AuWriteLockFunc(name) \
+static inline void fi_write_lock_##name(struct file *f) \
+{ fi_write_lock_nested(f, AuLsc_FI_##name); }
+
+#define AuRWLockFuncs(name) \
+	AuReadLockFunc(name) \
+	AuWriteLockFunc(name)
+
+AuRWLockFuncs(1);
+AuRWLockFuncs(2);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define FiMustNoWaiters(f)	AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
+#define FiMustAnyLock(f)	AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
+#define FiMustWriteLock(f)	AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: hard/soft set? */
+static inline aufs_bindex_t au_fbtop(struct file *file)
+{
+	FiMustAnyLock(file);
+	return au_fi(file)->fi_btop;
+}
+
+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_bbot;
+}
+
+static inline struct au_vdir *au_fvdir_cache(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_vdir_cache;
+}
+
+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustWriteLock(file);
+	au_fi(file)->fi_btop = bindex;
+}
+
+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustWriteLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	au_fi(file)->fi_hdir->fd_bbot = bindex;
+}
+
+static inline void au_set_fvdir_cache(struct file *file,
+				      struct au_vdir *vdir_cache)
+{
+	FiMustWriteLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
+}
+
+static inline struct file *au_hf_top(struct file *file)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_htop.hf_file;
+}
+
+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
+{
+	FiMustAnyLock(file);
+	AuDebugOn(!au_fi(file)->fi_hdir);
+	return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
+}
+
+/* todo: memory barrier? */
+static inline unsigned int au_figen(struct file *f)
+{
+	return atomic_read(&au_fi(f)->fi_generation);
+}
+
+static inline void au_set_mmapped(struct file *f)
+{
+	if (atomic_inc_return(&au_fi(f)->fi_mmapped))
+		return;
+	pr_warn("fi_mmapped wrapped around\n");
+	while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
+		;
+}
+
+static inline void au_unset_mmapped(struct file *f)
+{
+	atomic_dec(&au_fi(f)->fi_mmapped);
+}
+
+static inline int au_test_mmapped(struct file *f)
+{
+	return atomic_read(&au_fi(f)->fi_mmapped);
+}
+
+/* customize vma->vm_file */
+
+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
+				       struct file *file)
+{
+	struct file *f;
+
+	f = vma->vm_file;
+	get_file(file);
+	vma->vm_file = file;
+	fput(f);
+}
+
+#ifdef CONFIG_MMU
+#define AuDbgVmRegion(file, vma) do {} while (0)
+
+static inline void au_vm_file_reset(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	au_do_vm_file_reset(vma, file);
+}
+#else
+#define AuDbgVmRegion(file, vma) \
+	AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
+
+static inline void au_vm_file_reset(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	struct file *f;
+
+	au_do_vm_file_reset(vma, file);
+	f = vma->vm_region->vm_file;
+	get_file(file);
+	vma->vm_region->vm_file = file;
+	fput(f);
+}
+#endif /* CONFIG_MMU */
+
+/* handle vma->vm_prfile */
+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
+				    struct file *file)
+{
+	get_file(file);
+	vma->vm_prfile = file;
+#ifndef CONFIG_MMU
+	get_file(file);
+	vma->vm_region->vm_prfile = file;
+#endif
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_FILE_H__ */
diff --git a/include/fs/aufs/finfo.c b/include/fs/aufs/finfo.c
new file mode 100644
index 000000000..3144f462a
--- /dev/null
+++ b/include/fs/aufs/finfo.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * file private data
+ */
+
+#include "aufs.h"
+
+void au_hfput(struct au_hfile *hf, int execed)
+{
+	if (execed)
+		allow_write_access(hf->hf_file);
+	fput(hf->hf_file);
+	hf->hf_file = NULL;
+	au_br_put(hf->hf_br);
+	hf->hf_br = NULL;
+}
+
+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
+{
+	struct au_finfo *finfo = au_fi(file);
+	struct au_hfile *hf;
+	struct au_fidir *fidir;
+
+	fidir = finfo->fi_hdir;
+	if (!fidir) {
+		AuDebugOn(finfo->fi_btop != bindex);
+		hf = &finfo->fi_htop;
+	} else
+		hf = fidir->fd_hfile + bindex;
+
+	if (hf && hf->hf_file)
+		au_hfput(hf, vfsub_file_execed(file));
+	if (val) {
+		FiMustWriteLock(file);
+		AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
+		hf->hf_file = val;
+		hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
+	}
+}
+
+void au_update_figen(struct file *file)
+{
+	atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
+	/* smp_mb(); */ /* atomic_set */
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_fidir *au_fidir_alloc(struct super_block *sb)
+{
+	struct au_fidir *fidir;
+	int nbr;
+
+	nbr = au_sbbot(sb) + 1;
+	if (nbr < 2)
+		nbr = 2; /* initial allocate for 2 branches */
+	fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
+	if (fidir) {
+		fidir->fd_bbot = -1;
+		fidir->fd_nent = nbr;
+	}
+
+	return fidir;
+}
+
+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
+{
+	int err;
+	struct au_fidir *fidir, *p;
+
+	AuRwMustWriteLock(&finfo->fi_rwsem);
+	fidir = finfo->fi_hdir;
+	AuDebugOn(!fidir);
+
+	err = -ENOMEM;
+	p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
+			 GFP_NOFS, may_shrink);
+	if (p) {
+		p->fd_nent = nbr;
+		finfo->fi_hdir = p;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_finfo_fin(struct file *file)
+{
+	struct au_finfo *finfo;
+
+	au_nfiles_dec(file->f_path.dentry->d_sb);
+
+	finfo = au_fi(file);
+	AuDebugOn(finfo->fi_hdir);
+	AuRwDestroy(&finfo->fi_rwsem);
+	au_cache_free_finfo(finfo);
+}
+
+void au_fi_init_once(void *_finfo)
+{
+	struct au_finfo *finfo = _finfo;
+
+	au_rw_init(&finfo->fi_rwsem);
+}
+
+int au_finfo_init(struct file *file, struct au_fidir *fidir)
+{
+	int err;
+	struct au_finfo *finfo;
+	struct dentry *dentry;
+
+	err = -ENOMEM;
+	dentry = file->f_path.dentry;
+	finfo = au_cache_alloc_finfo();
+	if (unlikely(!finfo))
+		goto out;
+
+	err = 0;
+	au_nfiles_inc(dentry->d_sb);
+	au_rw_write_lock(&finfo->fi_rwsem);
+	finfo->fi_btop = -1;
+	finfo->fi_hdir = fidir;
+	atomic_set(&finfo->fi_generation, au_digen(dentry));
+	/* smp_mb(); */ /* atomic_set */
+
+	file->private_data = finfo;
+
+out:
+	return err;
+}
diff --git a/include/fs/aufs/fstype.h b/include/fs/aufs/fstype.h
new file mode 100644
index 000000000..7562506dc
--- /dev/null
+++ b/include/fs/aufs/fstype.h
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * judging filesystem type
+ */
+
+#ifndef __AUFS_FSTYPE_H__
+#define __AUFS_FSTYPE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <linux/nfs_fs.h>
+#include <linux/romfs_fs.h>
+
+static inline int au_test_aufs(struct super_block *sb)
+{
+	return sb->s_magic == AUFS_SUPER_MAGIC;
+}
+
+static inline const char *au_sbtype(struct super_block *sb)
+{
+	return sb->s_type->name;
+}
+
+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ISO9660_FS)
+	return sb->s_magic == ISOFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ROMFS_FS)
+	return sb->s_magic == ROMFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_CRAMFS)
+	return sb->s_magic == CRAMFS_MAGIC;
+#endif
+	return 0;
+}
+
+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_NFS_FS)
+	return sb->s_magic == NFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_FUSE_FS)
+	return sb->s_magic == FUSE_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_XFS_FS)
+	return sb->s_magic == XFS_SB_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_TMPFS
+	return sb->s_magic == TMPFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_ECRYPT_FS)
+	return !strcmp(au_sbtype(sb), "ecryptfs");
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_ramfs(struct super_block *sb)
+{
+	return sb->s_magic == RAMFS_MAGIC;
+}
+
+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_UBIFS_FS)
+	return sb->s_magic == UBIFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_PROC_FS
+	return sb->s_magic == PROC_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_SYSFS
+	return sb->s_magic == SYSFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
+	return sb->s_magic == CONFIGFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_minix(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_MINIX_FS)
+	return sb->s_magic == MINIX3_SUPER_MAGIC
+		|| sb->s_magic == MINIX2_SUPER_MAGIC
+		|| sb->s_magic == MINIX2_SUPER_MAGIC2
+		|| sb->s_magic == MINIX_SUPER_MAGIC
+		|| sb->s_magic == MINIX_SUPER_MAGIC2;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_fat(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_FAT_FS)
+	return sb->s_magic == MSDOS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_msdos(struct super_block *sb)
+{
+	return au_test_fat(sb);
+}
+
+static inline int au_test_vfat(struct super_block *sb)
+{
+	return au_test_fat(sb);
+}
+
+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_SECURITYFS
+	return sb->s_magic == SECURITYFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_SQUASHFS)
+	return sb->s_magic == SQUASHFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_BTRFS_FS)
+	return sb->s_magic == BTRFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_XENFS)
+	return sb->s_magic == XENFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
+{
+#ifdef CONFIG_DEBUG_FS
+	return sb->s_magic == DEBUGFS_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_NILFS)
+	return sb->s_magic == NILFS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
+{
+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
+	return sb->s_magic == HFSPLUS_SUPER_MAGIC;
+#else
+	return 0;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * they can't be an aufs branch.
+ */
+static inline int au_test_fs_unsuppoted(struct super_block *sb)
+{
+	return
+#ifndef CONFIG_AUFS_BR_RAMFS
+		au_test_ramfs(sb) ||
+#endif
+		au_test_procfs(sb)
+		|| au_test_sysfs(sb)
+		|| au_test_configfs(sb)
+		|| au_test_debugfs(sb)
+		|| au_test_securityfs(sb)
+		|| au_test_xenfs(sb)
+		|| au_test_ecryptfs(sb)
+		/* || !strcmp(au_sbtype(sb), "unionfs") */
+		|| au_test_aufs(sb); /* will be supported in next version */
+}
+
+static inline int au_test_fs_remote(struct super_block *sb)
+{
+	return !au_test_tmpfs(sb)
+#ifdef CONFIG_AUFS_BR_RAMFS
+		&& !au_test_ramfs(sb)
+#endif
+		&& !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * Note: these functions (below) are created after reading ->getattr() in all
+ * filesystems under linux/fs. it means we have to do so in every update...
+ */
+
+/*
+ * some filesystems require getattr to refresh the inode attributes before
+ * referencing.
+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
+ * and leave the work for d_revalidate()
+ */
+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
+{
+	return au_test_nfs(sb)
+		|| au_test_fuse(sb)
+		/* || au_test_btrfs(sb) */	/* untested */
+		;
+}
+
+/*
+ * filesystems which don't maintain i_size or i_blocks.
+ */
+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
+{
+	return au_test_xfs(sb)
+		|| au_test_btrfs(sb)
+		|| au_test_ubifs(sb)
+		|| au_test_hfsplus(sb)	/* maintained, but incorrect */
+		/* || au_test_minix(sb) */	/* untested */
+		;
+}
+
+/*
+ * filesystems which don't store the correct value in some of their inode
+ * attributes.
+ */
+static inline int au_test_fs_bad_iattr(struct super_block *sb)
+{
+	return au_test_fs_bad_iattr_size(sb)
+		|| au_test_fat(sb)
+		|| au_test_msdos(sb)
+		|| au_test_vfat(sb);
+}
+
+/* they don't check i_nlink in link(2) */
+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
+{
+	return au_test_tmpfs(sb)
+#ifdef CONFIG_AUFS_BR_RAMFS
+		|| au_test_ramfs(sb)
+#endif
+		|| au_test_ubifs(sb)
+		|| au_test_hfsplus(sb);
+}
+
+/*
+ * filesystems which sets S_NOATIME and S_NOCMTIME.
+ */
+static inline int au_test_fs_notime(struct super_block *sb)
+{
+	return au_test_nfs(sb)
+		|| au_test_fuse(sb)
+		|| au_test_ubifs(sb)
+		;
+}
+
+/* temporary support for i#1 in cramfs */
+static inline int au_test_fs_unique_ino(struct inode *inode)
+{
+	if (au_test_cramfs(inode->i_sb))
+		return inode->i_ino != 1;
+	return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * the filesystem where the xino files placed must support i/o after unlink and
+ * maintain i_size and i_blocks.
+ */
+static inline int au_test_fs_bad_xino(struct super_block *sb)
+{
+	return au_test_fs_remote(sb)
+		|| au_test_fs_bad_iattr_size(sb)
+		/* don't want unnecessary work for xino */
+		|| au_test_aufs(sb)
+		|| au_test_ecryptfs(sb)
+		|| au_test_nilfs(sb);
+}
+
+static inline int au_test_fs_trunc_xino(struct super_block *sb)
+{
+	return au_test_tmpfs(sb)
+		|| au_test_ramfs(sb);
+}
+
+/*
+ * test if the @sb is real-readonly.
+ */
+static inline int au_test_fs_rr(struct super_block *sb)
+{
+	return au_test_squashfs(sb)
+		|| au_test_iso9660(sb)
+		|| au_test_cramfs(sb)
+		|| au_test_romfs(sb);
+}
+
+/*
+ * test if the @inode is nfs with 'noacl' option
+ * NFS always sets SB_POSIXACL regardless its mount option 'noacl.'
+ */
+static inline int au_test_nfs_noacl(struct inode *inode)
+{
+	return au_test_nfs(inode->i_sb)
+		/* && IS_POSIXACL(inode) */
+		&& !nfs_server_capable(inode, NFS_CAP_ACLS);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_FSTYPE_H__ */
diff --git a/include/fs/aufs/hbl.h b/include/fs/aufs/hbl.h
new file mode 100644
index 000000000..328586e37
--- /dev/null
+++ b/include/fs/aufs/hbl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2017-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * helpers for hlist_bl.h
+ */
+
+#ifndef __AUFS_HBL_H__
+#define __AUFS_HBL_H__
+
+#ifdef __KERNEL__
+
+#include <linux/list_bl.h>
+
+static inline void au_hbl_add(struct hlist_bl_node *node,
+			      struct hlist_bl_head *hbl)
+{
+	hlist_bl_lock(hbl);
+	hlist_bl_add_head(node, hbl);
+	hlist_bl_unlock(hbl);
+}
+
+static inline void au_hbl_del(struct hlist_bl_node *node,
+			      struct hlist_bl_head *hbl)
+{
+	hlist_bl_lock(hbl);
+	hlist_bl_del(node);
+	hlist_bl_unlock(hbl);
+}
+
+#define au_hbl_for_each(pos, head)					\
+	for (pos = hlist_bl_first(head);				\
+	     pos;							\
+	     pos = pos->next)
+
+static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
+{
+	unsigned long cnt;
+	struct hlist_bl_node *pos;
+
+	cnt = 0;
+	hlist_bl_lock(hbl);
+	au_hbl_for_each(pos, hbl)
+		cnt++;
+	hlist_bl_unlock(hbl);
+	return cnt;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_HBL_H__ */
diff --git a/include/fs/aufs/hfsnotify.c b/include/fs/aufs/hfsnotify.c
new file mode 100644
index 000000000..edd7f6385
--- /dev/null
+++ b/include/fs/aufs/hfsnotify.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * fsnotify for the lower directories
+ */
+
+#include "aufs.h"
+
+/* FS_IN_IGNORED is unnecessary */
+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
+				 | FS_CREATE | FS_EVENT_ON_CHILD);
+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
+
+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
+{
+	struct au_hnotify *hn = container_of(mark, struct au_hnotify,
+					     hn_mark);
+	/* AuDbg("here\n"); */
+	au_cache_free_hnotify(hn);
+	smp_mb__before_atomic(); /* for atomic64_dec */
+	if (atomic64_dec_and_test(&au_hfsn_ifree))
+		wake_up(&au_hfsn_wq);
+}
+
+static int au_hfsn_alloc(struct au_hinode *hinode)
+{
+	int err;
+	struct au_hnotify *hn;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct fsnotify_mark *mark;
+	aufs_bindex_t bindex;
+
+	hn = hinode->hi_notify;
+	sb = hn->hn_aufs_inode->i_sb;
+	bindex = au_br_index(sb, hinode->hi_id);
+	br = au_sbr(sb, bindex);
+	AuDebugOn(!br->br_hfsn);
+
+	mark = &hn->hn_mark;
+	fsnotify_init_mark(mark, br->br_hfsn->hfsn_group);
+	mark->mask = AuHfsnMask;
+	/*
+	 * by udba rename or rmdir, aufs assign a new inode to the known
+	 * h_inode, so specify 1 to allow dups.
+	 */
+	lockdep_off();
+	err = fsnotify_add_mark(mark, hinode->hi_inode, /*mnt*/NULL,
+				/*allow_dups*/1);
+	lockdep_on();
+
+	return err;
+}
+
+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
+{
+	struct fsnotify_mark *mark;
+	unsigned long long ull;
+	struct fsnotify_group *group;
+
+	ull = atomic64_inc_return(&au_hfsn_ifree);
+	BUG_ON(!ull);
+
+	mark = &hn->hn_mark;
+	spin_lock(&mark->lock);
+	group = mark->group;
+	fsnotify_get_group(group);
+	spin_unlock(&mark->lock);
+	lockdep_off();
+	fsnotify_destroy_mark(mark, group);
+	fsnotify_put_mark(mark);
+	fsnotify_put_group(group);
+	lockdep_on();
+
+	/* free hn by myself */
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
+{
+	struct fsnotify_mark *mark;
+
+	mark = &hinode->hi_notify->hn_mark;
+	spin_lock(&mark->lock);
+	if (do_set) {
+		AuDebugOn(mark->mask & AuHfsnMask);
+		mark->mask |= AuHfsnMask;
+	} else {
+		AuDebugOn(!(mark->mask & AuHfsnMask));
+		mark->mask &= ~AuHfsnMask;
+	}
+	spin_unlock(&mark->lock);
+	/* fsnotify_recalc_inode_mask(hinode->hi_inode); */
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* #define AuDbgHnotify */
+#ifdef AuDbgHnotify
+static char *au_hfsn_name(u32 mask)
+{
+#ifdef CONFIG_AUFS_DEBUG
+#define test_ret(flag)				\
+	do {					\
+		if (mask & flag)		\
+			return #flag;		\
+	} while (0)
+	test_ret(FS_ACCESS);
+	test_ret(FS_MODIFY);
+	test_ret(FS_ATTRIB);
+	test_ret(FS_CLOSE_WRITE);
+	test_ret(FS_CLOSE_NOWRITE);
+	test_ret(FS_OPEN);
+	test_ret(FS_MOVED_FROM);
+	test_ret(FS_MOVED_TO);
+	test_ret(FS_CREATE);
+	test_ret(FS_DELETE);
+	test_ret(FS_DELETE_SELF);
+	test_ret(FS_MOVE_SELF);
+	test_ret(FS_UNMOUNT);
+	test_ret(FS_Q_OVERFLOW);
+	test_ret(FS_IN_IGNORED);
+	test_ret(FS_ISDIR);
+	test_ret(FS_IN_ONESHOT);
+	test_ret(FS_EVENT_ON_CHILD);
+	return "";
+#undef test_ret
+#else
+	return "??";
+#endif
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_free_group(struct fsnotify_group *group)
+{
+	struct au_br_hfsnotify *hfsn = group->private;
+
+	/* AuDbg("here\n"); */
+	kfree(hfsn);
+}
+
+static int au_hfsn_handle_event(struct fsnotify_group *group,
+				struct inode *inode,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
+				u32 mask, const void *data, int data_type,
+				const unsigned char *file_name, u32 cookie,
+				struct fsnotify_iter_info *iter_info)
+{
+	int err;
+	struct au_hnotify *hnotify;
+	struct inode *h_dir, *h_inode;
+	struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
+
+	AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
+
+	err = 0;
+	/* if FS_UNMOUNT happens, there must be another bug */
+	AuDebugOn(mask & FS_UNMOUNT);
+	if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
+		goto out;
+
+	h_dir = inode;
+	h_inode = NULL;
+#ifdef AuDbgHnotify
+	au_debug_on();
+	if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
+	    || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
+		AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
+		      h_dir->i_ino, mask, au_hfsn_name(mask),
+		      AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
+		/* WARN_ON(1); */
+	}
+	au_debug_off();
+#endif
+
+	AuDebugOn(!inode_mark);
+	hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
+	err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
+
+out:
+	return err;
+}
+
+static struct fsnotify_ops au_hfsn_ops = {
+	.handle_event		= au_hfsn_handle_event,
+	.free_group_priv	= au_hfsn_free_group,
+	.free_mark		= au_hfsn_free_mark
+};
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_fin_br(struct au_branch *br)
+{
+	struct au_br_hfsnotify *hfsn;
+
+	hfsn = br->br_hfsn;
+	if (hfsn) {
+		lockdep_off();
+		fsnotify_put_group(hfsn->hfsn_group);
+		lockdep_on();
+	}
+}
+
+static int au_hfsn_init_br(struct au_branch *br, int perm)
+{
+	int err;
+	struct fsnotify_group *group;
+	struct au_br_hfsnotify *hfsn;
+
+	err = 0;
+	br->br_hfsn = NULL;
+	if (!au_br_hnotifyable(perm))
+		goto out;
+
+	err = -ENOMEM;
+	hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
+	if (unlikely(!hfsn))
+		goto out;
+
+	err = 0;
+	group = fsnotify_alloc_group(&au_hfsn_ops);
+	if (IS_ERR(group)) {
+		err = PTR_ERR(group);
+		pr_err("fsnotify_alloc_group() failed, %d\n", err);
+		goto out_hfsn;
+	}
+
+	group->private = hfsn;
+	hfsn->hfsn_group = group;
+	br->br_hfsn = hfsn;
+	goto out; /* success */
+
+out_hfsn:
+	kfree(hfsn);
+out:
+	return err;
+}
+
+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
+{
+	int err;
+
+	err = 0;
+	if (!br->br_hfsn)
+		err = au_hfsn_init_br(br, perm);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_hfsn_fin(void)
+{
+	AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
+	wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
+}
+
+const struct au_hnotify_op au_hnotify_op = {
+	.ctl		= au_hfsn_ctl,
+	.alloc		= au_hfsn_alloc,
+	.free		= au_hfsn_free,
+
+	.fin		= au_hfsn_fin,
+
+	.reset_br	= au_hfsn_reset_br,
+	.fin_br		= au_hfsn_fin_br,
+	.init_br	= au_hfsn_init_br
+};
diff --git a/include/fs/aufs/hfsplus.c b/include/fs/aufs/hfsplus.c
new file mode 100644
index 000000000..f4ed51678
--- /dev/null
+++ b/include/fs/aufs/hfsplus.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * special support for filesystems which aqucires an inode mutex
+ * at final closing a file, eg, hfsplus.
+ *
+ * This trick is very simple and stupid, just to open the file before really
+ * neceeary open to tell hfsplus that this is not the final closing.
+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
+ * and au_h_open_post() after releasing it.
+ */
+
+#include "aufs.h"
+
+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
+			   int force_wr)
+{
+	struct file *h_file;
+	struct dentry *h_dentry;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	AuDebugOn(!h_dentry);
+	AuDebugOn(d_is_negative(h_dentry));
+
+	h_file = NULL;
+	if (au_test_hfsplus(h_dentry->d_sb)
+	    && d_is_reg(h_dentry))
+		h_file = au_h_open(dentry, bindex,
+				   O_RDONLY | O_NOATIME | O_LARGEFILE,
+				   /*file*/NULL, force_wr);
+	return h_file;
+}
+
+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
+		    struct file *h_file)
+{
+	if (h_file) {
+		fput(h_file);
+		au_sbr_put(dentry->d_sb, bindex);
+	}
+}
diff --git a/include/fs/aufs/hnotify.c b/include/fs/aufs/hnotify.c
new file mode 100644
index 000000000..7dd64918b
--- /dev/null
+++ b/include/fs/aufs/hnotify.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * abstraction to notify the direct changes on lower directories
+ */
+
+#include "aufs.h"
+
+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
+{
+	int err;
+	struct au_hnotify *hn;
+
+	err = -ENOMEM;
+	hn = au_cache_alloc_hnotify();
+	if (hn) {
+		hn->hn_aufs_inode = inode;
+		hinode->hi_notify = hn;
+		err = au_hnotify_op.alloc(hinode);
+		AuTraceErr(err);
+		if (unlikely(err)) {
+			hinode->hi_notify = NULL;
+			au_cache_free_hnotify(hn);
+			/*
+			 * The upper dir was removed by udba, but the same named
+			 * dir left. In this case, aufs assignes a new inode
+			 * number and set the monitor again.
+			 * For the lower dir, the old monitnor is still left.
+			 */
+			if (err == -EEXIST)
+				err = 0;
+		}
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+void au_hn_free(struct au_hinode *hinode)
+{
+	struct au_hnotify *hn;
+
+	hn = hinode->hi_notify;
+	if (hn) {
+		hinode->hi_notify = NULL;
+		if (au_hnotify_op.free(hinode, hn))
+			au_cache_free_hnotify(hn);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_hn_ctl(struct au_hinode *hinode, int do_set)
+{
+	if (hinode->hi_notify)
+		au_hnotify_op.ctl(hinode, do_set);
+}
+
+void au_hn_reset(struct inode *inode, unsigned int flags)
+{
+	aufs_bindex_t bindex, bbot;
+	struct inode *hi;
+	struct dentry *iwhdentry;
+
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
+		hi = au_h_iptr(inode, bindex);
+		if (!hi)
+			continue;
+
+		/* inode_lock_nested(hi, AuLsc_I_CHILD); */
+		iwhdentry = au_hi_wh(inode, bindex);
+		if (iwhdentry)
+			dget(iwhdentry);
+		au_igrab(hi);
+		au_set_h_iptr(inode, bindex, NULL, 0);
+		au_set_h_iptr(inode, bindex, au_igrab(hi),
+			      flags & ~AuHi_XINO);
+		iput(hi);
+		dput(iwhdentry);
+		/* inode_unlock(hi); */
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int hn_xino(struct inode *inode, struct inode *h_inode)
+{
+	int err;
+	aufs_bindex_t bindex, bbot, bfound, btop;
+	struct inode *h_i;
+
+	err = 0;
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("branch root dir was changed\n");
+		goto out;
+	}
+
+	bfound = -1;
+	bbot = au_ibbot(inode);
+	btop = au_ibtop(inode);
+#if 0 /* reserved for future use */
+	if (bindex == bbot) {
+		/* keep this ino in rename case */
+		goto out;
+	}
+#endif
+	for (bindex = btop; bindex <= bbot; bindex++)
+		if (au_h_iptr(inode, bindex) == h_inode) {
+			bfound = bindex;
+			break;
+		}
+	if (bfound < 0)
+		goto out;
+
+	for (bindex = btop; bindex <= bbot; bindex++) {
+		h_i = au_h_iptr(inode, bindex);
+		if (!h_i)
+			continue;
+
+		err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
+		/* ignore this error */
+		/* bad action? */
+	}
+
+	/* children inode number will be broken */
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int hn_gen_tree(struct dentry *dentry)
+{
+	int err, i, j, ndentry;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			struct dentry *d;
+
+			d = dentries[j];
+			if (IS_ROOT(d))
+				continue;
+
+			au_digen_dec(d);
+			if (d_really_is_positive(d))
+				/* todo: reset children xino?
+				   cached children only? */
+				au_iigen_dec(d_inode(d));
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+
+#if 0
+	/* discard children */
+	dentry_unhash(dentry);
+	dput(dentry);
+#endif
+out:
+	return err;
+}
+
+/*
+ * return 0 if processed.
+ */
+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
+			   const unsigned int isdir)
+{
+	int err;
+	struct dentry *d;
+	struct qstr *dname;
+
+	err = 1;
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("branch root dir was changed\n");
+		err = 0;
+		goto out;
+	}
+
+	if (!isdir) {
+		AuDebugOn(!name);
+		au_iigen_dec(inode);
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
+			spin_lock(&d->d_lock);
+			dname = &d->d_name;
+			if (dname->len != nlen
+			    && memcmp(dname->name, name, nlen)) {
+				spin_unlock(&d->d_lock);
+				continue;
+			}
+			err = 0;
+			au_digen_dec(d);
+			spin_unlock(&d->d_lock);
+			break;
+		}
+		spin_unlock(&inode->i_lock);
+	} else {
+		au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
+		d = d_find_any_alias(inode);
+		if (!d) {
+			au_iigen_dec(inode);
+			goto out;
+		}
+
+		spin_lock(&d->d_lock);
+		dname = &d->d_name;
+		if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
+			spin_unlock(&d->d_lock);
+			err = hn_gen_tree(d);
+			spin_lock(&d->d_lock);
+		}
+		spin_unlock(&d->d_lock);
+		dput(d);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
+{
+	int err;
+
+	if (IS_ROOT(dentry)) {
+		pr_warn("branch root dir was changed\n");
+		return 0;
+	}
+
+	err = 0;
+	if (!isdir) {
+		au_digen_dec(dentry);
+		if (d_really_is_positive(dentry))
+			au_iigen_dec(d_inode(dentry));
+	} else {
+		au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
+		if (d_really_is_positive(dentry))
+			err = hn_gen_tree(dentry);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* hnotify job flags */
+#define AuHnJob_XINO0		1
+#define AuHnJob_GEN		(1 << 1)
+#define AuHnJob_DIRENT		(1 << 2)
+#define AuHnJob_ISDIR		(1 << 3)
+#define AuHnJob_TRYXINO0	(1 << 4)
+#define AuHnJob_MNTPNT		(1 << 5)
+#define au_ftest_hnjob(flags, name)	((flags) & AuHnJob_##name)
+#define au_fset_hnjob(flags, name) \
+	do { (flags) |= AuHnJob_##name; } while (0)
+#define au_fclr_hnjob(flags, name) \
+	do { (flags) &= ~AuHnJob_##name; } while (0)
+
+enum {
+	AuHn_CHILD,
+	AuHn_PARENT,
+	AuHnLast
+};
+
+struct au_hnotify_args {
+	struct inode *h_dir, *dir, *h_child_inode;
+	u32 mask;
+	unsigned int flags[AuHnLast];
+	unsigned int h_child_nlen;
+	char h_child_name[];
+};
+
+struct hn_job_args {
+	unsigned int flags;
+	struct inode *inode, *h_inode, *dir, *h_dir;
+	struct dentry *dentry;
+	char *h_name;
+	int h_nlen;
+};
+
+static int hn_job(struct hn_job_args *a)
+{
+	const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
+	int e;
+
+	/* reset xino */
+	if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
+		hn_xino(a->inode, a->h_inode); /* ignore this error */
+
+	if (au_ftest_hnjob(a->flags, TRYXINO0)
+	    && a->inode
+	    && a->h_inode) {
+		vfsub_inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
+		if (!a->h_inode->i_nlink
+		    && !(a->h_inode->i_state & I_LINKABLE))
+			hn_xino(a->inode, a->h_inode); /* ignore this error */
+		inode_unlock_shared(a->h_inode);
+	}
+
+	/* make the generation obsolete */
+	if (au_ftest_hnjob(a->flags, GEN)) {
+		e = -1;
+		if (a->inode)
+			e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
+					      isdir);
+		if (e && a->dentry)
+			hn_gen_by_name(a->dentry, isdir);
+		/* ignore this error */
+	}
+
+	/* make dir entries obsolete */
+	if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
+		struct au_vdir *vdir;
+
+		vdir = au_ivdir(a->inode);
+		if (vdir)
+			vdir->vd_jiffy = 0;
+		/* IMustLock(a->inode); */
+		/* a->inode->i_version++; */
+	}
+
+	/* can do nothing but warn */
+	if (au_ftest_hnjob(a->flags, MNTPNT)
+	    && a->dentry
+	    && d_mountpoint(a->dentry))
+		pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
+					   struct inode *dir)
+{
+	struct dentry *dentry, *d, *parent;
+	struct qstr *dname;
+
+	parent = d_find_any_alias(dir);
+	if (!parent)
+		return NULL;
+
+	dentry = NULL;
+	spin_lock(&parent->d_lock);
+	list_for_each_entry(d, &parent->d_subdirs, d_child) {
+		/* AuDbg("%pd\n", d); */
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+		dname = &d->d_name;
+		if (dname->len != nlen || memcmp(dname->name, name, nlen))
+			goto cont_unlock;
+		if (au_di(d))
+			au_digen_dec(d);
+		else
+			goto cont_unlock;
+		if (au_dcount(d) > 0) {
+			dentry = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			break;
+		}
+
+cont_unlock:
+		spin_unlock(&d->d_lock);
+	}
+	spin_unlock(&parent->d_lock);
+	dput(parent);
+
+	if (dentry)
+		di_write_lock_child(dentry);
+
+	return dentry;
+}
+
+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
+					 aufs_bindex_t bindex, ino_t h_ino)
+{
+	struct inode *inode;
+	ino_t ino;
+	int err;
+
+	inode = NULL;
+	err = au_xino_read(sb, bindex, h_ino, &ino);
+	if (!err && ino)
+		inode = ilookup(sb, ino);
+	if (!inode)
+		goto out;
+
+	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
+		pr_warn("wrong root branch\n");
+		iput(inode);
+		inode = NULL;
+		goto out;
+	}
+
+	ii_write_lock_child(inode);
+
+out:
+	return inode;
+}
+
+static void au_hn_bh(void *_args)
+{
+	struct au_hnotify_args *a = _args;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot, bfound;
+	unsigned char xino, try_iput;
+	int err;
+	struct inode *inode;
+	ino_t h_ino;
+	struct hn_job_args args;
+	struct dentry *dentry;
+	struct au_sbinfo *sbinfo;
+
+	AuDebugOn(!_args);
+	AuDebugOn(!a->h_dir);
+	AuDebugOn(!a->dir);
+	AuDebugOn(!a->mask);
+	AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
+	      a->mask, a->dir->i_ino, a->h_dir->i_ino,
+	      a->h_child_inode ? a->h_child_inode->i_ino : 0);
+
+	inode = NULL;
+	dentry = NULL;
+	/*
+	 * do not lock a->dir->i_mutex here
+	 * because of d_revalidate() may cause a deadlock.
+	 */
+	sb = a->dir->i_sb;
+	AuDebugOn(!sb);
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!sbinfo);
+	si_write_lock(sb, AuLock_NOPLMW);
+
+	if (au_opt_test(sbinfo->si_mntflags, DIRREN))
+		switch (a->mask & FS_EVENTS_POSS_ON_CHILD) {
+		case FS_MOVED_FROM:
+		case FS_MOVED_TO:
+			AuWarn1("DIRREN with UDBA may not work correctly "
+				"for the direct rename(2)\n");
+		}
+
+	ii_read_lock_parent(a->dir);
+	bfound = -1;
+	bbot = au_ibbot(a->dir);
+	for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
+		if (au_h_iptr(a->dir, bindex) == a->h_dir) {
+			bfound = bindex;
+			break;
+		}
+	ii_read_unlock(a->dir);
+	if (unlikely(bfound < 0))
+		goto out;
+
+	xino = !!au_opt_test(au_mntflags(sb), XINO);
+	h_ino = 0;
+	if (a->h_child_inode)
+		h_ino = a->h_child_inode->i_ino;
+
+	if (a->h_child_nlen
+	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
+		dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
+					      a->dir);
+	try_iput = 0;
+	if (dentry && d_really_is_positive(dentry))
+		inode = d_inode(dentry);
+	if (xino && !inode && h_ino
+	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
+		|| au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
+		inode = lookup_wlock_by_ino(sb, bfound, h_ino);
+		try_iput = 1;
+	}
+
+	args.flags = a->flags[AuHn_CHILD];
+	args.dentry = dentry;
+	args.inode = inode;
+	args.h_inode = a->h_child_inode;
+	args.dir = a->dir;
+	args.h_dir = a->h_dir;
+	args.h_name = a->h_child_name;
+	args.h_nlen = a->h_child_nlen;
+	err = hn_job(&args);
+	if (dentry) {
+		if (au_di(dentry))
+			di_write_unlock(dentry);
+		dput(dentry);
+	}
+	if (inode && try_iput) {
+		ii_write_unlock(inode);
+		iput(inode);
+	}
+
+	ii_write_lock_parent(a->dir);
+	args.flags = a->flags[AuHn_PARENT];
+	args.dentry = NULL;
+	args.inode = a->dir;
+	args.h_inode = a->h_dir;
+	args.dir = NULL;
+	args.h_dir = NULL;
+	args.h_name = NULL;
+	args.h_nlen = 0;
+	err = hn_job(&args);
+	ii_write_unlock(a->dir);
+
+out:
+	iput(a->h_child_inode);
+	iput(a->h_dir);
+	iput(a->dir);
+	si_write_unlock(sb);
+	au_nwt_done(&sbinfo->si_nowait);
+	kfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
+	       struct qstr *h_child_qstr, struct inode *h_child_inode)
+{
+	int err, len;
+	unsigned int flags[AuHnLast], f;
+	unsigned char isdir, isroot, wh;
+	struct inode *dir;
+	struct au_hnotify_args *args;
+	char *p, *h_child_name;
+
+	err = 0;
+	AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
+	dir = igrab(hnotify->hn_aufs_inode);
+	if (!dir)
+		goto out;
+
+	isroot = (dir->i_ino == AUFS_ROOT_INO);
+	wh = 0;
+	h_child_name = (void *)h_child_qstr->name;
+	len = h_child_qstr->len;
+	if (h_child_name) {
+		if (len > AUFS_WH_PFX_LEN
+		    && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+			h_child_name += AUFS_WH_PFX_LEN;
+			len -= AUFS_WH_PFX_LEN;
+			wh = 1;
+		}
+	}
+
+	isdir = 0;
+	if (h_child_inode)
+		isdir = !!S_ISDIR(h_child_inode->i_mode);
+	flags[AuHn_PARENT] = AuHnJob_ISDIR;
+	flags[AuHn_CHILD] = 0;
+	if (isdir)
+		flags[AuHn_CHILD] = AuHnJob_ISDIR;
+	au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
+	au_fset_hnjob(flags[AuHn_CHILD], GEN);
+	switch (mask & FS_EVENTS_POSS_ON_CHILD) {
+	case FS_MOVED_FROM:
+	case FS_MOVED_TO:
+		au_fset_hnjob(flags[AuHn_CHILD], XINO0);
+		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
+		/*FALLTHROUGH*/
+	case FS_CREATE:
+		AuDebugOn(!h_child_name);
+		break;
+
+	case FS_DELETE:
+		/*
+		 * aufs never be able to get this child inode.
+		 * revalidation should be in d_revalidate()
+		 * by checking i_nlink, i_generation or d_unhashed().
+		 */
+		AuDebugOn(!h_child_name);
+		au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
+		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
+		break;
+
+	default:
+		AuDebugOn(1);
+	}
+
+	if (wh)
+		h_child_inode = NULL;
+
+	err = -ENOMEM;
+	/* iput() and kfree() will be called in au_hnotify() */
+	args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
+	if (unlikely(!args)) {
+		AuErr1("no memory\n");
+		iput(dir);
+		goto out;
+	}
+	args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
+	args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
+	args->mask = mask;
+	args->dir = dir;
+	args->h_dir = igrab(h_dir);
+	if (h_child_inode)
+		h_child_inode = igrab(h_child_inode); /* can be NULL */
+	args->h_child_inode = h_child_inode;
+	args->h_child_nlen = len;
+	if (len) {
+		p = (void *)args;
+		p += sizeof(*args);
+		memcpy(p, h_child_name, len);
+		p[len] = 0;
+	}
+
+	/* NFS fires the event for silly-renamed one from kworker */
+	f = 0;
+	if (!dir->i_nlink
+	    || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
+		f = AuWkq_NEST;
+	err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
+	if (unlikely(err)) {
+		pr_err("wkq %d\n", err);
+		iput(args->h_child_inode);
+		iput(args->h_dir);
+		iput(args->dir);
+		kfree(args);
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
+{
+	int err;
+
+	AuDebugOn(!(udba & AuOptMask_UDBA));
+
+	err = 0;
+	if (au_hnotify_op.reset_br)
+		err = au_hnotify_op.reset_br(udba, br, perm);
+
+	return err;
+}
+
+int au_hnotify_init_br(struct au_branch *br, int perm)
+{
+	int err;
+
+	err = 0;
+	if (au_hnotify_op.init_br)
+		err = au_hnotify_op.init_br(br, perm);
+
+	return err;
+}
+
+void au_hnotify_fin_br(struct au_branch *br)
+{
+	if (au_hnotify_op.fin_br)
+		au_hnotify_op.fin_br(br);
+}
+
+static void au_hn_destroy_cache(void)
+{
+	kmem_cache_destroy(au_cache[AuCache_HNOTIFY]);
+	au_cache[AuCache_HNOTIFY] = NULL;
+}
+
+int __init au_hnotify_init(void)
+{
+	int err;
+
+	err = -ENOMEM;
+	au_cache[AuCache_HNOTIFY] = AuCache(au_hnotify);
+	if (au_cache[AuCache_HNOTIFY]) {
+		err = 0;
+		if (au_hnotify_op.init)
+			err = au_hnotify_op.init();
+		if (unlikely(err))
+			au_hn_destroy_cache();
+	}
+	AuTraceErr(err);
+	return err;
+}
+
+void au_hnotify_fin(void)
+{
+	if (au_hnotify_op.fin)
+		au_hnotify_op.fin();
+
+	/* cf. au_cache_fin() */
+	if (au_cache[AuCache_HNOTIFY])
+		au_hn_destroy_cache();
+}
diff --git a/include/fs/aufs/i_op.c b/include/fs/aufs/i_op.c
new file mode 100644
index 000000000..80bd9beba
--- /dev/null
+++ b/include/fs/aufs/i_op.c
@@ -0,0 +1,1459 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (except add/del/rename)
+ */
+
+#include <linux/device_cgroup.h>
+#include <linux/fs_stack.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+static int h_permission(struct inode *h_inode, int mask,
+			struct path *h_path, int brperm)
+{
+	int err;
+	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+
+	err = -EPERM;
+	if (write_mask && IS_IMMUTABLE(h_inode))
+		goto out;
+
+	err = -EACCES;
+	if (((mask & MAY_EXEC)
+	     && S_ISREG(h_inode->i_mode)
+	     && (path_noexec(h_path)
+		 || !(h_inode->i_mode & S_IXUGO))))
+		goto out;
+
+	/*
+	 * - skip the lower fs test in the case of write to ro branch.
+	 * - nfs dir permission write check is optimized, but a policy for
+	 *   link/rename requires a real check.
+	 * - nfs always sets SB_POSIXACL regardless its mount option 'noacl.'
+	 *   in this case, generic_permission() returns -EOPNOTSUPP.
+	 */
+	if ((write_mask && !au_br_writable(brperm))
+	    || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
+		&& write_mask && !(mask & MAY_READ))
+	    || !h_inode->i_op->permission) {
+		/* AuLabel(generic_permission); */
+		/* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
+		err = generic_permission(h_inode, mask);
+		if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
+			err = h_inode->i_op->permission(h_inode, mask);
+		AuTraceErr(err);
+	} else {
+		/* AuLabel(h_inode->permission); */
+		err = h_inode->i_op->permission(h_inode, mask);
+		AuTraceErr(err);
+	}
+
+	if (!err)
+		err = devcgroup_inode_permission(h_inode, mask);
+	if (!err)
+		err = security_inode_permission(h_inode, mask);
+
+#if 0
+	if (!err) {
+		/* todo: do we need to call ima_path_check()? */
+		struct path h_path = {
+			.dentry	=
+			.mnt	= h_mnt
+		};
+		err = ima_path_check(&h_path,
+				     mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				     IMA_COUNT_LEAVE);
+	}
+#endif
+
+out:
+	return err;
+}
+
+static int aufs_permission(struct inode *inode, int mask)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	const unsigned char isdir = !!S_ISDIR(inode->i_mode),
+		write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+
+	/* todo: support rcu-walk? */
+	if (mask & MAY_NOT_BLOCK)
+		return -ECHILD;
+
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_read_lock_child(inode);
+#if 0
+	err = au_iigen_test(inode, au_sigen(sb));
+	if (unlikely(err))
+		goto out;
+#endif
+
+	if (!isdir
+	    || write_mask
+	    || au_opt_test(au_mntflags(sb), DIRPERM1)) {
+		err = au_busy_or_stale();
+		h_inode = au_h_iptr(inode, au_ibtop(inode));
+		if (unlikely(!h_inode
+			     || (h_inode->i_mode & S_IFMT)
+			     != (inode->i_mode & S_IFMT)))
+			goto out;
+
+		err = 0;
+		bindex = au_ibtop(inode);
+		br = au_sbr(sb, bindex);
+		err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
+		if (write_mask
+		    && !err
+		    && !special_file(h_inode->i_mode)) {
+			/* test whether the upper writable branch exists */
+			err = -EROFS;
+			for (; bindex >= 0; bindex--)
+				if (!au_br_rdonly(au_sbr(sb, bindex))) {
+					err = 0;
+					break;
+				}
+		}
+		goto out;
+	}
+
+	/* non-write to dir */
+	err = 0;
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (h_inode) {
+			err = au_busy_or_stale();
+			if (unlikely(!S_ISDIR(h_inode->i_mode)))
+				break;
+
+			br = au_sbr(sb, bindex);
+			err = h_permission(h_inode, mask, &br->br_path,
+					   br->br_perm);
+		}
+	}
+
+out:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
+				  unsigned int flags)
+{
+	struct dentry *ret, *parent;
+	struct inode *inode;
+	struct super_block *sb;
+	int err, npositive;
+
+	IMustLock(dir);
+
+	/* todo: support rcu-walk? */
+	ret = ERR_PTR(-ECHILD);
+	if (flags & LOOKUP_RCU)
+		goto out;
+
+	ret = ERR_PTR(-ENAMETOOLONG);
+	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		goto out;
+
+	sb = dir->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	ret = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	err = au_di_init(dentry);
+	ret = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_si;
+
+	inode = NULL;
+	npositive = 0; /* suppress a warning */
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_read_lock_parent(parent, AuLock_IR);
+	err = au_alive_dir(parent);
+	if (!err)
+		err = au_digen_test(parent, au_sigen(sb));
+	if (!err) {
+		/* regardless LOOKUP_CREATE, always ALLOW_NEG */
+		npositive = au_lkup_dentry(dentry, au_dbtop(parent),
+					   AuLkup_ALLOW_NEG);
+		err = npositive;
+	}
+	di_read_unlock(parent, AuLock_IR);
+	ret = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out_unlock;
+
+	if (npositive) {
+		inode = au_new_inode(dentry, /*must_new*/0);
+		if (IS_ERR(inode)) {
+			ret = (void *)inode;
+			inode = NULL;
+			goto out_unlock;
+		}
+	}
+
+	if (inode)
+		atomic_inc(&inode->i_count);
+	ret = d_splice_alias(inode, dentry);
+#if 0
+	if (unlikely(d_need_lookup(dentry))) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+		spin_unlock(&dentry->d_lock);
+	} else
+#endif
+	if (inode) {
+		if (!IS_ERR(ret)) {
+			iput(inode);
+			if (ret && ret != dentry)
+				ii_write_unlock(inode);
+		} else {
+			ii_write_unlock(inode);
+			iput(inode);
+			inode = NULL;
+		}
+	}
+
+out_unlock:
+	di_write_unlock(dentry);
+out_si:
+	si_read_unlock(sb);
+out:
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct aopen_node {
+	struct hlist_bl_node hblist;
+	struct file *file, *h_file;
+};
+
+static int au_do_aopen(struct inode *inode, struct file *file)
+{
+	struct hlist_bl_head *aopen;
+	struct hlist_bl_node *pos;
+	struct aopen_node *node;
+	struct au_do_open_args args = {
+		.aopen	= 1,
+		.open	= au_do_open_nondir
+	};
+
+	aopen = &au_sbi(inode->i_sb)->si_aopen;
+	hlist_bl_lock(aopen);
+	hlist_bl_for_each_entry(node, pos, aopen, hblist)
+		if (node->file == file) {
+			args.h_file = node->h_file;
+			break;
+		}
+	hlist_bl_unlock(aopen);
+	/* AuDebugOn(!args.h_file); */
+
+	return au_do_open(file, &args);
+}
+
+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
+			    struct file *file, unsigned int open_flag,
+			    umode_t create_mode, int *opened)
+{
+	int err, unlocked, h_opened = *opened;
+	unsigned int lkup_flags;
+	struct dentry *parent, *d;
+	struct hlist_bl_head *aopen;
+	struct vfsub_aopen_args args = {
+		.open_flag	= open_flag,
+		.create_mode	= create_mode,
+		.opened		= &h_opened
+	};
+	struct aopen_node aopen_node = {
+		.file	= file
+	};
+
+	IMustLock(dir);
+	AuDbg("open_flag 0%o\n", open_flag);
+	AuDbgDentry(dentry);
+
+	err = 0;
+	if (!au_di(dentry)) {
+		lkup_flags = LOOKUP_OPEN;
+		if (open_flag & O_CREAT)
+			lkup_flags |= LOOKUP_CREATE;
+		d = aufs_lookup(dir, dentry, lkup_flags);
+		if (IS_ERR(d)) {
+			err = PTR_ERR(d);
+			AuTraceErr(err);
+			goto out;
+		} else if (d) {
+			/*
+			 * obsoleted dentry found.
+			 * another error will be returned later.
+			 */
+			d_drop(d);
+			AuDbgDentry(d);
+			dput(d);
+		}
+		AuDbgDentry(dentry);
+	}
+
+	if (d_is_positive(dentry)
+	    || d_unhashed(dentry)
+	    || d_unlinked(dentry)
+	    || !(open_flag & O_CREAT))
+		goto out_no_open;
+
+	unlocked = 0;
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
+	if (unlikely(err))
+		goto out;
+
+	parent = dentry->d_parent;	/* dir is locked */
+	di_write_lock_parent(parent);
+	err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
+	if (unlikely(err))
+		goto out_unlock;
+
+	AuDbgDentry(dentry);
+	if (d_is_positive(dentry))
+		goto out_unlock;
+
+	args.file = get_empty_filp();
+	err = PTR_ERR(args.file);
+	if (IS_ERR(args.file))
+		goto out_unlock;
+
+	args.file->f_flags = file->f_flags;
+	err = au_aopen_or_create(dir, dentry, &args);
+	AuTraceErr(err);
+	AuDbgFile(args.file);
+	if (unlikely(err < 0)) {
+		if (h_opened & FILE_OPENED)
+			fput(args.file);
+		else
+			put_filp(args.file);
+		goto out_unlock;
+	}
+	di_write_unlock(parent);
+	di_write_unlock(dentry);
+	unlocked = 1;
+
+	/* some filesystems don't set FILE_CREATED while succeeded? */
+	*opened |= FILE_CREATED;
+	if (h_opened & FILE_OPENED)
+		aopen_node.h_file = args.file;
+	else {
+		put_filp(args.file);
+		args.file = NULL;
+	}
+	aopen = &au_sbi(dir->i_sb)->si_aopen;
+	au_hbl_add(&aopen_node.hblist, aopen);
+	err = finish_open(file, dentry, au_do_aopen, opened);
+	au_hbl_del(&aopen_node.hblist, aopen);
+	AuTraceErr(err);
+	AuDbgFile(file);
+	if (aopen_node.h_file)
+		fput(aopen_node.h_file);
+
+out_unlock:
+	if (unlocked)
+		si_read_unlock(dentry->d_sb);
+	else {
+		di_write_unlock(parent);
+		aufs_read_unlock(dentry, AuLock_DW);
+	}
+	AuDbgDentry(dentry);
+	if (unlikely(err < 0))
+		goto out;
+out_no_open:
+	if (err >= 0 && !(*opened & FILE_CREATED)) {
+		AuLabel(out_no_open);
+		dget(dentry);
+		err = finish_no_open(file, dentry);
+	}
+out:
+	AuDbg("%pd%s%s\n", dentry,
+	      (*opened & FILE_CREATED) ? " created" : "",
+	      (*opened & FILE_OPENED) ? " opened" : "");
+	AuTraceErr(err);
+	return err;
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
+			  const unsigned char add_entry, aufs_bindex_t bcpup,
+			  aufs_bindex_t btop)
+{
+	int err;
+	struct dentry *h_parent;
+	struct inode *h_dir;
+
+	if (add_entry)
+		IMustLock(d_inode(parent));
+	else
+		di_write_lock_parent(parent);
+
+	err = 0;
+	if (!au_h_dptr(parent, bcpup)) {
+		if (btop > bcpup)
+			err = au_cpup_dirs(dentry, bcpup);
+		else if (btop < bcpup)
+			err = au_cpdown_dirs(dentry, bcpup);
+		else
+			BUG();
+	}
+	if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
+		h_parent = au_h_dptr(parent, bcpup);
+		h_dir = d_inode(h_parent);
+		vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
+		err = au_lkup_neg(dentry, bcpup, /*wh*/0);
+		/* todo: no unlock here */
+		inode_unlock_shared(h_dir);
+
+		AuDbg("bcpup %d\n", bcpup);
+		if (!err) {
+			if (d_really_is_negative(dentry))
+				au_set_h_dptr(dentry, btop, NULL);
+			au_update_dbrange(dentry, /*do_put_zero*/0);
+		}
+	}
+
+	if (!add_entry)
+		di_write_unlock(parent);
+	if (!err)
+		err = bcpup; /* success */
+
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * decide the branch and the parent dir where we will create a new entry.
+ * returns new bindex or an error.
+ * copyup the parent dir if needed.
+ */
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args)
+{
+	int err;
+	unsigned int flags;
+	aufs_bindex_t bcpup, btop, src_btop;
+	const unsigned char add_entry
+		= au_ftest_wrdir(args->flags, ADD_ENTRY)
+		| au_ftest_wrdir(args->flags, TMPFILE);
+	struct super_block *sb;
+	struct dentry *parent;
+	struct au_sbinfo *sbinfo;
+
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	parent = dget_parent(dentry);
+	btop = au_dbtop(dentry);
+	bcpup = btop;
+	if (args->force_btgt < 0) {
+		if (src_dentry) {
+			src_btop = au_dbtop(src_dentry);
+			if (src_btop < btop)
+				bcpup = src_btop;
+		} else if (add_entry) {
+			flags = 0;
+			if (au_ftest_wrdir(args->flags, ISDIR))
+				au_fset_wbr(flags, DIR);
+			err = AuWbrCreate(sbinfo, dentry, flags);
+			bcpup = err;
+		}
+
+		if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
+			if (add_entry)
+				err = AuWbrCopyup(sbinfo, dentry);
+			else {
+				if (!IS_ROOT(dentry)) {
+					di_read_lock_parent(parent, !AuLock_IR);
+					err = AuWbrCopyup(sbinfo, dentry);
+					di_read_unlock(parent, !AuLock_IR);
+				} else
+					err = AuWbrCopyup(sbinfo, dentry);
+			}
+			bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else {
+		bcpup = args->force_btgt;
+		AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
+	}
+
+	AuDbg("btop %d, bcpup %d\n", btop, bcpup);
+	err = bcpup;
+	if (bcpup == btop)
+		goto out; /* success */
+
+	/* copyup the new parent into the branch we process */
+	err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
+	if (err >= 0) {
+		if (d_really_is_negative(dentry)) {
+			au_set_h_dptr(dentry, btop, NULL);
+			au_set_dbtop(dentry, bcpup);
+			au_set_dbbot(dentry, bcpup);
+		}
+		AuDebugOn(add_entry
+			  && !au_ftest_wrdir(args->flags, TMPFILE)
+			  && !au_h_dptr(dentry, bcpup));
+	}
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_pin_hdir_unlock(struct au_pin *p)
+{
+	if (p->hdir)
+		au_hn_inode_unlock(p->hdir);
+}
+
+int au_pin_hdir_lock(struct au_pin *p)
+{
+	int err;
+
+	err = 0;
+	if (!p->hdir)
+		goto out;
+
+	/* even if an error happens later, keep this lock */
+	au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
+
+	err = -EBUSY;
+	if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
+		goto out;
+
+	err = 0;
+	if (p->h_dentry)
+		err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
+				  p->h_parent, p->br);
+
+out:
+	return err;
+}
+
+int au_pin_hdir_relock(struct au_pin *p)
+{
+	int err, i;
+	struct inode *h_i;
+	struct dentry *h_d[] = {
+		p->h_dentry,
+		p->h_parent
+	};
+
+	err = au_pin_hdir_lock(p);
+	if (unlikely(err))
+		goto out;
+
+	for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
+		if (!h_d[i])
+			continue;
+		if (d_is_positive(h_d[i])) {
+			h_i = d_inode(h_d[i]);
+			err = !h_i->i_nlink;
+		}
+	}
+
+out:
+	return err;
+}
+
+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
+{
+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
+	p->hdir->hi_inode->i_rwsem.owner = task;
+#endif
+}
+
+void au_pin_hdir_acquire_nest(struct au_pin *p)
+{
+	if (p->hdir) {
+		rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
+				   p->lsc_hi, 0, NULL, _RET_IP_);
+		au_pin_hdir_set_owner(p, current);
+	}
+}
+
+void au_pin_hdir_release(struct au_pin *p)
+{
+	if (p->hdir) {
+		au_pin_hdir_set_owner(p, p->task);
+		rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
+	}
+}
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin)
+{
+	if (pin && pin->parent)
+		return au_h_dptr(pin->parent, pin->bindex);
+	return NULL;
+}
+
+void au_unpin(struct au_pin *p)
+{
+	if (p->hdir)
+		au_pin_hdir_unlock(p);
+	if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
+		vfsub_mnt_drop_write(p->h_mnt);
+	if (!p->hdir)
+		return;
+
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_unlock(p->parent, AuLock_IR);
+	iput(p->hdir->hi_inode);
+	dput(p->parent);
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+	/* do not clear p->task */
+}
+
+int au_do_pin(struct au_pin *p)
+{
+	int err;
+	struct super_block *sb;
+	struct inode *h_dir;
+
+	err = 0;
+	sb = p->dentry->d_sb;
+	p->br = au_sbr(sb, p->bindex);
+	if (IS_ROOT(p->dentry)) {
+		if (au_ftest_pin(p->flags, MNT_WRITE)) {
+			p->h_mnt = au_br_mnt(p->br);
+			err = vfsub_mnt_want_write(p->h_mnt);
+			if (unlikely(err)) {
+				au_fclr_pin(p->flags, MNT_WRITE);
+				goto out_err;
+			}
+		}
+		goto out;
+	}
+
+	p->h_dentry = NULL;
+	if (p->bindex <= au_dbbot(p->dentry))
+		p->h_dentry = au_h_dptr(p->dentry, p->bindex);
+
+	p->parent = dget_parent(p->dentry);
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_lock(p->parent, AuLock_IR, p->lsc_di);
+
+	h_dir = NULL;
+	p->h_parent = au_h_dptr(p->parent, p->bindex);
+	p->hdir = au_hi(d_inode(p->parent), p->bindex);
+	if (p->hdir)
+		h_dir = p->hdir->hi_inode;
+
+	/*
+	 * udba case, or
+	 * if DI_LOCKED is not set, then p->parent may be different
+	 * and h_parent can be NULL.
+	 */
+	if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
+		err = -EBUSY;
+		if (!au_ftest_pin(p->flags, DI_LOCKED))
+			di_read_unlock(p->parent, AuLock_IR);
+		dput(p->parent);
+		p->parent = NULL;
+		goto out_err;
+	}
+
+	if (au_ftest_pin(p->flags, MNT_WRITE)) {
+		p->h_mnt = au_br_mnt(p->br);
+		err = vfsub_mnt_want_write(p->h_mnt);
+		if (unlikely(err)) {
+			au_fclr_pin(p->flags, MNT_WRITE);
+			if (!au_ftest_pin(p->flags, DI_LOCKED))
+				di_read_unlock(p->parent, AuLock_IR);
+			dput(p->parent);
+			p->parent = NULL;
+			goto out_err;
+		}
+	}
+
+	au_igrab(h_dir);
+	err = au_pin_hdir_lock(p);
+	if (!err)
+		goto out; /* success */
+
+	au_unpin(p);
+
+out_err:
+	pr_err("err %d\n", err);
+	err = au_busy_or_stale();
+out:
+	return err;
+}
+
+void au_pin_init(struct au_pin *p, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags)
+{
+	p->dentry = dentry;
+	p->udba = udba;
+	p->lsc_di = lsc_di;
+	p->lsc_hi = lsc_hi;
+	p->flags = flags;
+	p->bindex = bindex;
+
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+
+	p->h_dentry = NULL;
+	p->h_parent = NULL;
+	p->br = NULL;
+	p->task = current;
+}
+
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags)
+{
+	au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
+		    udba, flags);
+	return au_do_pin(pin);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * ->setattr() and ->getattr() are called in various cases.
+ * chmod, stat: dentry is revalidated.
+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
+ *		  unhashed.
+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
+ */
+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct dentry *parent;
+
+	err = 0;
+	if (au_digen_test(dentry, sigen)) {
+		parent = dget_parent(dentry);
+		di_read_lock_parent(parent, AuLock_IR);
+		err = au_refresh_dentry(dentry, parent);
+		di_read_unlock(parent, AuLock_IR);
+		dput(parent);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
+		     struct au_icpup_args *a)
+{
+	int err;
+	loff_t sz;
+	aufs_bindex_t btop, ibtop;
+	struct dentry *hi_wh, *parent;
+	struct inode *inode;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= 0
+	};
+
+	if (d_is_dir(dentry))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	/* plink or hi_wh() case */
+	btop = au_dbtop(dentry);
+	inode = d_inode(dentry);
+	ibtop = au_ibtop(inode);
+	if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
+		wr_dir_args.force_btgt = ibtop;
+	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
+	if (unlikely(err < 0))
+		goto out;
+	a->btgt = err;
+	if (err != btop)
+		au_fset_icpup(a->flags, DID_CPUP);
+
+	err = 0;
+	a->pin_flags = AuPin_MNT_WRITE;
+	parent = NULL;
+	if (!IS_ROOT(dentry)) {
+		au_fset_pin(a->pin_flags, DI_LOCKED);
+		parent = dget_parent(dentry);
+		di_write_lock_parent(parent);
+	}
+
+	err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
+	if (unlikely(err))
+		goto out_parent;
+
+	sz = -1;
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	a->h_inode = d_inode(a->h_path.dentry);
+	if (ia && (ia->ia_valid & ATTR_SIZE)) {
+		vfsub_inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
+		if (ia->ia_size < i_size_read(a->h_inode))
+			sz = ia->ia_size;
+		inode_unlock_shared(a->h_inode);
+	}
+
+	hi_wh = NULL;
+	if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
+		hi_wh = au_hi_wh(inode, a->btgt);
+		if (!hi_wh) {
+			struct au_cp_generic cpg = {
+				.dentry	= dentry,
+				.bdst	= a->btgt,
+				.bsrc	= -1,
+				.len	= sz,
+				.pin	= &a->pin
+			};
+			err = au_sio_cpup_wh(&cpg, /*file*/NULL);
+			if (unlikely(err))
+				goto out_unlock;
+			hi_wh = au_hi_wh(inode, a->btgt);
+			/* todo: revalidate hi_wh? */
+		}
+	}
+
+	if (parent) {
+		au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
+		di_downgrade_lock(parent, AuLock_IR);
+		dput(parent);
+		parent = NULL;
+	}
+	if (!au_ftest_icpup(a->flags, DID_CPUP))
+		goto out; /* success */
+
+	if (!d_unhashed(dentry)) {
+		struct au_cp_generic cpg = {
+			.dentry	= dentry,
+			.bdst	= a->btgt,
+			.bsrc	= btop,
+			.len	= sz,
+			.pin	= &a->pin,
+			.flags	= AuCpup_DTIME | AuCpup_HOPEN
+		};
+		err = au_sio_cpup_simple(&cpg);
+		if (!err)
+			a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	} else if (!hi_wh)
+		a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	else
+		a->h_path.dentry = hi_wh; /* do not dget here */
+
+out_unlock:
+	a->h_inode = d_inode(a->h_path.dentry);
+	if (!err)
+		goto out; /* success */
+	au_unpin(&a->pin);
+out_parent:
+	if (parent) {
+		di_write_unlock(parent);
+		dput(parent);
+	}
+out:
+	if (!err)
+		inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
+	return err;
+}
+
+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int err;
+	struct inode *inode, *delegated;
+	struct super_block *sb;
+	struct file *file;
+	struct au_icpup_args *a;
+
+	inode = d_inode(dentry);
+	IMustLock(inode);
+
+	err = setattr_prepare(dentry, ia);
+	if (unlikely(err))
+		goto out;
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		ia->ia_valid &= ~ATTR_MODE;
+
+	file = NULL;
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_kfree;
+
+	if (ia->ia_valid & ATTR_FILE) {
+		/* currently ftruncate(2) only */
+		AuDebugOn(!d_is_reg(dentry));
+		file = ia->ia_file;
+		err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1,
+					    /*fi_lsc*/0);
+		if (unlikely(err))
+			goto out_si;
+		ia->ia_file = au_hf_top(file);
+		a->udba = AuOpt_UDBA_NONE;
+	} else {
+		/* fchmod() doesn't pass ia_file */
+		a->udba = au_opt_udba(sb);
+		di_write_lock_child(dentry);
+		/* no d_unlinked(), to set UDBA_NONE for root */
+		if (d_unhashed(dentry))
+			a->udba = AuOpt_UDBA_NONE;
+		if (a->udba != AuOpt_UDBA_NONE) {
+			AuDebugOn(IS_ROOT(dentry));
+			err = au_reval_for_attr(dentry, au_sigen(sb));
+			if (unlikely(err))
+				goto out_dentry;
+		}
+	}
+
+	err = au_pin_and_icpup(dentry, ia, a);
+	if (unlikely(err < 0))
+		goto out_dentry;
+	if (au_ftest_icpup(a->flags, DID_CPUP)) {
+		ia->ia_file = NULL;
+		ia->ia_valid &= ~ATTR_FILE;
+	}
+
+	a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
+	if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
+	    == (ATTR_MODE | ATTR_CTIME)) {
+		err = security_path_chmod(&a->h_path, ia->ia_mode);
+		if (unlikely(err))
+			goto out_unlock;
+	} else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
+		   && (ia->ia_valid & ATTR_CTIME)) {
+		err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	if (ia->ia_valid & ATTR_SIZE) {
+		struct file *f;
+
+		if (ia->ia_size < i_size_read(inode))
+			/* unmap only */
+			truncate_setsize(inode, ia->ia_size);
+
+		f = NULL;
+		if (ia->ia_valid & ATTR_FILE)
+			f = ia->ia_file;
+		inode_unlock(a->h_inode);
+		err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
+		inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
+	} else {
+		delegated = NULL;
+		while (1) {
+			err = vfsub_notify_change(&a->h_path, ia, &delegated);
+			if (delegated) {
+				err = break_deleg_wait(&delegated);
+				if (!err)
+					continue;
+			}
+			break;
+		}
+	}
+	/*
+	 * regardless aufs 'acl' option setting.
+	 * why don't all acl-aware fs call this func from their ->setattr()?
+	 */
+	if (!err && (ia->ia_valid & ATTR_MODE))
+		err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
+	if (!err)
+		au_cpup_attr_changeable(inode);
+
+out_unlock:
+	inode_unlock(a->h_inode);
+	au_unpin(&a->pin);
+	if (unlikely(err))
+		au_update_dbtop(dentry);
+out_dentry:
+	di_write_unlock(dentry);
+	if (file) {
+		fi_write_unlock(file);
+		ia->ia_file = file;
+		ia->ia_valid |= ATTR_FILE;
+	}
+out_si:
+	si_read_unlock(sb);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
+static int au_h_path_to_set_attr(struct dentry *dentry,
+				 struct au_icpup_args *a, struct path *h_path)
+{
+	int err;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	a->udba = au_opt_udba(sb);
+	/* no d_unlinked(), to set UDBA_NONE for root */
+	if (d_unhashed(dentry))
+		a->udba = AuOpt_UDBA_NONE;
+	if (a->udba != AuOpt_UDBA_NONE) {
+		AuDebugOn(IS_ROOT(dentry));
+		err = au_reval_for_attr(dentry, au_sigen(sb));
+		if (unlikely(err))
+			goto out;
+	}
+	err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
+	if (unlikely(err < 0))
+		goto out;
+
+	h_path->dentry = a->h_path.dentry;
+	h_path->mnt = au_sbr_mnt(sb, a->btgt);
+
+out:
+	return err;
+}
+
+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
+		  struct au_sxattr *arg)
+{
+	int err;
+	struct path h_path;
+	struct super_block *sb;
+	struct au_icpup_args *a;
+	struct inode *h_inode;
+
+	IMustLock(inode);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_kfree;
+
+	h_path.dentry = NULL;	/* silence gcc */
+	di_write_lock_child(dentry);
+	err = au_h_path_to_set_attr(dentry, a, &h_path);
+	if (unlikely(err))
+		goto out_di;
+
+	inode_unlock(a->h_inode);
+	switch (arg->type) {
+	case AU_XATTR_SET:
+		AuDebugOn(d_is_negative(h_path.dentry));
+		err = vfsub_setxattr(h_path.dentry,
+				     arg->u.set.name, arg->u.set.value,
+				     arg->u.set.size, arg->u.set.flags);
+		break;
+	case AU_ACL_SET:
+		err = -EOPNOTSUPP;
+		h_inode = d_inode(h_path.dentry);
+		if (h_inode->i_op->set_acl)
+			/* this will call posix_acl_update_mode */
+			err = h_inode->i_op->set_acl(h_inode,
+						     arg->u.acl_set.acl,
+						     arg->u.acl_set.type);
+		break;
+	}
+	if (!err)
+		au_cpup_attr_timesizes(inode);
+
+	au_unpin(&a->pin);
+	if (unlikely(err))
+		au_update_dbtop(dentry);
+
+out_di:
+	di_write_unlock(dentry);
+	si_read_unlock(sb);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+#endif
+
+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
+			     unsigned int nlink)
+{
+	unsigned int n;
+
+	inode->i_mode = st->mode;
+	/* don't i_[ug]id_write() here */
+	inode->i_uid = st->uid;
+	inode->i_gid = st->gid;
+	inode->i_atime = st->atime;
+	inode->i_mtime = st->mtime;
+	inode->i_ctime = st->ctime;
+
+	au_cpup_attr_nlink(inode, /*force*/0);
+	if (S_ISDIR(inode->i_mode)) {
+		n = inode->i_nlink;
+		n -= nlink;
+		n += st->nlink;
+		smp_mb(); /* for i_nlink */
+		/* 0 can happen */
+		set_nlink(inode, n);
+	}
+
+	spin_lock(&inode->i_lock);
+	inode->i_blocks = st->blocks;
+	i_size_write(inode, st->size);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * common routine for aufs_getattr() and au_getxattr().
+ * returns zero or negative (an error).
+ * @dentry will be read-locked in success.
+ */
+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
+		      int locked)
+{
+	int err;
+	unsigned int mnt_flags, sigen;
+	unsigned char udba_none;
+	aufs_bindex_t bindex;
+	struct super_block *sb, *h_sb;
+	struct inode *inode;
+
+	h_path->mnt = NULL;
+	h_path->dentry = NULL;
+
+	err = 0;
+	sb = dentry->d_sb;
+	mnt_flags = au_mntflags(sb);
+	udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
+
+	if (unlikely(locked))
+		goto body; /* skip locking dinfo */
+
+	/* support fstat(2) */
+	if (!d_unlinked(dentry) && !udba_none) {
+		sigen = au_sigen(sb);
+		err = au_digen_test(dentry, sigen);
+		if (!err) {
+			di_read_lock_child(dentry, AuLock_IR);
+			err = au_dbrange_test(dentry);
+			if (unlikely(err)) {
+				di_read_unlock(dentry, AuLock_IR);
+				goto out;
+			}
+		} else {
+			AuDebugOn(IS_ROOT(dentry));
+			di_write_lock_child(dentry);
+			err = au_dbrange_test(dentry);
+			if (!err)
+				err = au_reval_for_attr(dentry, sigen);
+			if (!err)
+				di_downgrade_lock(dentry, AuLock_IR);
+			else {
+				di_write_unlock(dentry);
+				goto out;
+			}
+		}
+	} else
+		di_read_lock_child(dentry, AuLock_IR);
+
+body:
+	inode = d_inode(dentry);
+	bindex = au_ibtop(inode);
+	h_path->mnt = au_sbr_mnt(sb, bindex);
+	h_sb = h_path->mnt->mnt_sb;
+	if (!force
+	    && !au_test_fs_bad_iattr(h_sb)
+	    && udba_none)
+		goto out; /* success */
+
+	if (au_dbtop(dentry) == bindex)
+		h_path->dentry = au_h_dptr(dentry, bindex);
+	else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
+		h_path->dentry = au_plink_lkup(inode, bindex);
+		if (IS_ERR(h_path->dentry))
+			/* pretending success */
+			h_path->dentry = NULL;
+		else
+			dput(h_path->dentry);
+	}
+
+out:
+	return err;
+}
+
+static int aufs_getattr(const struct path *path, struct kstat *st,
+			u32 request, unsigned int query)
+{
+	int err;
+	unsigned char positive;
+	struct path h_path;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct super_block *sb;
+
+	dentry = path->dentry;
+	inode = d_inode(dentry);
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out;
+	err = au_h_path_getattr(dentry, /*force*/0, &h_path, /*locked*/0);
+	if (unlikely(err))
+		goto out_si;
+	if (unlikely(!h_path.dentry))
+		/* illegally overlapped or something */
+		goto out_fill; /* pretending success */
+
+	positive = d_is_positive(h_path.dentry);
+	if (positive)
+		/* no vfsub version */
+		err = vfs_getattr(&h_path, st, request, query);
+	if (!err) {
+		if (positive)
+			au_refresh_iattr(inode, st,
+					 d_inode(h_path.dentry)->i_nlink);
+		goto out_fill; /* success */
+	}
+	AuTraceErr(err);
+	goto out_di;
+
+out_fill:
+	generic_fillattr(inode, st);
+out_di:
+	di_read_unlock(dentry, AuLock_IR);
+out_si:
+	si_read_unlock(sb);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
+				 struct delayed_call *done)
+{
+	const char *ret;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	int err;
+	aufs_bindex_t bindex;
+
+	ret = NULL; /* suppress a warning */
+	err = -ECHILD;
+	if (!dentry)
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
+	if (unlikely(err))
+		goto out;
+
+	err = au_d_hashed_positive(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = -EINVAL;
+	inode = d_inode(dentry);
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (unlikely(!h_inode->i_op->get_link))
+		goto out_unlock;
+
+	err = -EBUSY;
+	h_dentry = NULL;
+	if (au_dbtop(dentry) <= bindex) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry)
+			dget(h_dentry);
+	}
+	if (!h_dentry) {
+		h_dentry = d_find_any_alias(h_inode);
+		if (IS_ERR(h_dentry)) {
+			err = PTR_ERR(h_dentry);
+			goto out_unlock;
+		}
+	}
+	if (unlikely(!h_dentry))
+		goto out_unlock;
+
+	err = 0;
+	AuDbg("%pf\n", h_inode->i_op->get_link);
+	AuDbgDentry(h_dentry);
+	ret = vfs_get_link(h_dentry, done);
+	dput(h_dentry);
+	if (IS_ERR(ret))
+		err = PTR_ERR(ret);
+
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_IR);
+out:
+	if (unlikely(err))
+		ret = ERR_PTR(err);
+	AuTraceErrPtr(ret);
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_is_special(struct inode *inode)
+{
+	return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
+}
+
+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct inode *h_inode;
+	struct vfsmount *h_mnt;
+
+	sb = inode->i_sb;
+	WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
+		  "unexpected s_flags 0x%lx", sb->s_flags);
+
+	/* mmap_sem might be acquired already, cf. aufs_mmap() */
+	lockdep_off();
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_write_lock_child(inode);
+
+	err = 0;
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (!au_test_ro(sb, bindex, inode)) {
+		h_mnt = au_sbr_mnt(sb, bindex);
+		err = vfsub_mnt_want_write(h_mnt);
+		if (!err) {
+			err = vfsub_update_time(h_inode, ts, flags);
+			vfsub_mnt_drop_write(h_mnt);
+		}
+	} else if (au_is_special(h_inode)) {
+		/*
+		 * Never copy-up here.
+		 * These special files may already be opened and used for
+		 * communicating. If we copied it up, then the communication
+		 * would be corrupted.
+		 */
+		AuWarn1("timestamps for i%lu are ignored "
+			"since it is on readonly branch (hi%lu).\n",
+			inode->i_ino, h_inode->i_ino);
+	} else if (flags & ~S_ATIME) {
+		err = -EIO;
+		AuIOErr1("unexpected flags 0x%x\n", flags);
+		AuDebugOn(1);
+	}
+
+	if (!err)
+		au_cpup_attr_timesizes(inode);
+	ii_write_unlock(inode);
+	si_read_unlock(sb);
+	lockdep_on();
+
+	if (!err && (flags & S_VERSION))
+		inode_inc_iversion(inode);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* no getattr version will be set by module.c:aufs_init() */
+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
+	aufs_iop[] = {
+	[AuIop_SYMLINK] = {
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl, /* unsupport for symlink? */
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.get_link	= aufs_get_link,
+
+		/* .update_time	= aufs_update_time */
+	},
+	[AuIop_DIR] = {
+		.create		= aufs_create,
+		.lookup		= aufs_lookup,
+		.link		= aufs_link,
+		.unlink		= aufs_unlink,
+		.symlink	= aufs_symlink,
+		.mkdir		= aufs_mkdir,
+		.rmdir		= aufs_rmdir,
+		.mknod		= aufs_mknod,
+		.rename		= aufs_rename,
+
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl,
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.update_time	= aufs_update_time,
+		.atomic_open	= aufs_atomic_open,
+		.tmpfile	= aufs_tmpfile
+	},
+	[AuIop_OTHER] = {
+		.permission	= aufs_permission,
+#ifdef CONFIG_FS_POSIX_ACL
+		.get_acl	= aufs_get_acl,
+		.set_acl	= aufs_set_acl,
+#endif
+
+		.setattr	= aufs_setattr,
+		.getattr	= aufs_getattr,
+
+#ifdef CONFIG_AUFS_XATTR
+		.listxattr	= aufs_listxattr,
+#endif
+
+		.update_time	= aufs_update_time
+	}
+};
diff --git a/include/fs/aufs/i_op_add.c b/include/fs/aufs/i_op_add.c
new file mode 100644
index 000000000..361245ef7
--- /dev/null
+++ b/include/fs/aufs/i_op_add.c
@@ -0,0 +1,920 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (add entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * final procedure of adding a new entry, except link(2).
+ * remove whiteout, instantiate, copyup the parent dir's times and size
+ * and update version.
+ * if it failed, re-create the removed whiteout.
+ */
+static int epilog(struct inode *dir, aufs_bindex_t bindex,
+		  struct dentry *wh_dentry, struct dentry *dentry)
+{
+	int err, rerr;
+	aufs_bindex_t bwh;
+	struct path h_path;
+	struct super_block *sb;
+	struct inode *inode, *h_dir;
+	struct dentry *wh;
+
+	bwh = -1;
+	sb = dir->i_sb;
+	if (wh_dentry) {
+		h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
+		IMustLock(h_dir);
+		AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
+		bwh = au_dbwh(dentry);
+		h_path.dentry = wh_dentry;
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out;
+	}
+
+	inode = au_new_inode(dentry, /*must_new*/1);
+	if (!IS_ERR(inode)) {
+		d_instantiate(dentry, inode);
+		dir = d_inode(dentry->d_parent); /* dir inode is locked */
+		IMustLock(dir);
+		au_dir_ts(dir, bindex);
+		dir->i_version++;
+		au_fhsm_wrote(sb, bindex, /*force*/0);
+		return 0; /* success */
+	}
+
+	err = PTR_ERR(inode);
+	if (!wh_dentry)
+		goto out;
+
+	/* revert */
+	/* dir inode is locked */
+	wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
+	rerr = PTR_ERR(wh);
+	if (IS_ERR(wh)) {
+		AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
+			dentry, err, rerr);
+		err = -EIO;
+	} else
+		dput(wh);
+
+out:
+	return err;
+}
+
+static int au_d_may_add(struct dentry *dentry)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(d_unhashed(dentry)))
+		err = -ENOENT;
+	if (unlikely(d_really_is_positive(dentry)))
+		err = -EEXIST;
+	return err;
+}
+
+/*
+ * simple tests for the adding inode operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	err = -ENAMETOOLONG;
+	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		goto out;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (d_really_is_negative(dentry)) {
+		err = -EEXIST;
+		if (unlikely(d_is_positive(h_dentry)))
+			goto out;
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(d_is_negative(h_dentry)))
+			goto out;
+		h_inode = d_inode(h_dentry);
+		if (unlikely(!h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	}
+
+	err = 0;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		err = -EIO;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * initial procedure of adding a new entry.
+ * prepare writable branch and the parent dir, lock it,
+ * and lookup whiteout for the new entry.
+ */
+static struct dentry*
+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
+		  struct dentry *src_dentry, struct au_pin *pin,
+		  struct au_wr_dir_args *wr_dir_args)
+{
+	struct dentry *wh_dentry, *h_parent;
+	struct super_block *sb;
+	struct au_branch *br;
+	int err;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	AuDbg("%pd\n", dentry);
+
+	err = au_wr_dir(dentry, src_dentry, wr_dir_args);
+	bcpup = err;
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_parent = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbtop(dentry) == bcpup)
+		err = au_may_add(dentry, bcpup, h_parent,
+				 au_ftest_wrdir(wr_dir_args->flags, ISDIR));
+	else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
+		err = -ENAMETOOLONG;
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_unpin;
+
+	br = au_sbr(sb, bcpup);
+	if (dt) {
+		struct path tmp = {
+			.dentry	= h_parent,
+			.mnt	= au_br_mnt(br)
+		};
+		au_dtime_store(dt, au_pinned_parent(pin), &tmp);
+	}
+
+	wh_dentry = NULL;
+	if (bcpup != au_dbwh(dentry))
+		goto out; /* success */
+
+	/*
+	 * ENAMETOOLONG here means that if we allowed create such name, then it
+	 * would not be able to removed in the future. So we don't allow such
+	 * name here and we don't handle ENAMETOOLONG differently here.
+	 */
+	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+
+out_unpin:
+	if (IS_ERR(wh_dentry))
+		au_unpin(pin);
+out:
+	return wh_dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+enum { Mknod, Symlink, Creat };
+struct simple_arg {
+	int type;
+	union {
+		struct {
+			umode_t			mode;
+			bool			want_excl;
+			bool			try_aopen;
+			struct vfsub_aopen_args	*aopen;
+		} c;
+		struct {
+			const char *symname;
+		} s;
+		struct {
+			umode_t mode;
+			dev_t dev;
+		} m;
+	} u;
+};
+
+static int add_simple(struct inode *dir, struct dentry *dentry,
+		      struct simple_arg *arg)
+{
+	int err, rerr;
+	aufs_bindex_t btop;
+	unsigned char created;
+	const unsigned char try_aopen
+		= (arg->type == Creat && arg->u.c.try_aopen);
+	struct dentry *wh_dentry, *parent;
+	struct inode *h_dir;
+	struct super_block *sb;
+	struct au_branch *br;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+		struct path h_path;
+		struct au_wr_dir_args wr_dir_args;
+	} *a;
+
+	AuDbg("%pd\n", dentry);
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+	a->wr_dir_args.force_btgt = -1;
+	a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	if (!try_aopen) {
+		err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+		if (unlikely(err))
+			goto out_free;
+	}
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	if (!try_aopen)
+		di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
+				      &a->pin, &a->wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	btop = au_dbtop(dentry);
+	sb = dentry->d_sb;
+	br = au_sbr(sb, btop);
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	a->h_path.mnt = au_br_mnt(br);
+	h_dir = au_pinned_h_dir(&a->pin);
+	switch (arg->type) {
+	case Creat:
+		err = 0;
+		if (!try_aopen || !h_dir->i_op->atomic_open)
+			err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
+					   arg->u.c.want_excl);
+		else
+			err = vfsub_atomic_open(h_dir, a->h_path.dentry,
+						arg->u.c.aopen, br);
+		break;
+	case Symlink:
+		err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
+		break;
+	case Mknod:
+		err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
+				  arg->u.m.dev);
+		break;
+	default:
+		BUG();
+	}
+	created = !err;
+	if (!err)
+		err = epilog(dir, btop, wh_dentry, dentry);
+
+	/* revert */
+	if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
+		/* no delegation since it is just created */
+		rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
+				    /*force*/0);
+		if (rerr) {
+			AuIOErr("%pd revert failure(%d, %d)\n",
+				dentry, err, rerr);
+			err = -EIO;
+		}
+		au_dtime_revert(&a->dt);
+	}
+
+	if (!err && try_aopen && !h_dir->i_op->atomic_open)
+		*arg->u.c.aopen->opened |= FILE_CREATED;
+
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+
+out_parent:
+	if (!try_aopen)
+		di_write_unlock(parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	if (!try_aopen)
+		aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
+
+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+	       dev_t dev)
+{
+	struct simple_arg arg = {
+		.type = Mknod,
+		.u.m = {
+			.mode	= mode,
+			.dev	= dev
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	struct simple_arg arg = {
+		.type = Symlink,
+		.u.s.symname = symname
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl)
+{
+	struct simple_arg arg = {
+		.type = Creat,
+		.u.c = {
+			.mode		= mode,
+			.want_excl	= want_excl
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
+		       struct vfsub_aopen_args *aopen_args)
+{
+	struct simple_arg arg = {
+		.type = Creat,
+		.u.c = {
+			.mode		= aopen_args->create_mode,
+			.want_excl	= aopen_args->open_flag & O_EXCL,
+			.try_aopen	= true,
+			.aopen		= aopen_args
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent, *h_dentry;
+	struct inode *h_dir, *inode;
+	struct vfsmount *h_mnt;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_TMPFILE
+	};
+
+	/* copy-up may happen */
+	inode_lock(dir);
+
+	sb = dir->i_sb;
+	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out;
+
+	err = au_di_init(dentry);
+	if (unlikely(err))
+		goto out_si;
+
+	err = -EBUSY;
+	parent = d_find_any_alias(dir);
+	AuDebugOn(!parent);
+	di_write_lock_parent(parent);
+	if (unlikely(d_inode(parent) != dir))
+		goto out_parent;
+
+	err = au_digen_test(parent, au_sigen(sb));
+	if (unlikely(err))
+		goto out_parent;
+
+	bindex = au_dbtop(parent);
+	au_set_dbtop(dentry, bindex);
+	au_set_dbbot(dentry, bindex);
+	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
+	bindex = err;
+	if (unlikely(err < 0))
+		goto out_parent;
+
+	err = -EOPNOTSUPP;
+	h_dir = au_h_iptr(dir, bindex);
+	if (unlikely(!h_dir->i_op->tmpfile))
+		goto out_parent;
+
+	h_mnt = au_sbr_mnt(sb, bindex);
+	err = vfsub_mnt_want_write(h_mnt);
+	if (unlikely(err))
+		goto out_parent;
+
+	h_parent = au_h_dptr(parent, bindex);
+	h_dentry = vfs_tmpfile(h_parent, mode, /*open_flag*/0);
+	if (IS_ERR(h_dentry)) {
+		err = PTR_ERR(h_dentry);
+		goto out_mnt;
+	}
+
+	au_set_dbtop(dentry, bindex);
+	au_set_dbbot(dentry, bindex);
+	au_set_h_dptr(dentry, bindex, dget(h_dentry));
+	inode = au_new_inode(dentry, /*must_new*/1);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		au_set_h_dptr(dentry, bindex, NULL);
+		au_set_dbtop(dentry, -1);
+		au_set_dbbot(dentry, -1);
+	} else {
+		if (!inode->i_nlink)
+			set_nlink(inode, 1);
+		d_tmpfile(dentry, inode);
+		au_di(dentry)->di_tmpfile = 1;
+
+		/* update without i_mutex */
+		if (au_ibtop(dir) == au_dbtop(dentry))
+			au_cpup_attr_timesizes(dir);
+	}
+	dput(h_dentry);
+
+out_mnt:
+	vfsub_mnt_drop_write(h_mnt);
+out_parent:
+	di_write_unlock(parent);
+	dput(parent);
+	di_write_unlock(dentry);
+	if (unlikely(err)) {
+		au_di_fin(dentry);
+		dentry->d_fsdata = NULL;
+	}
+out_si:
+	si_read_unlock(sb);
+out:
+	inode_unlock(dir);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_link_args {
+	aufs_bindex_t bdst, bsrc;
+	struct au_pin pin;
+	struct path h_path;
+	struct dentry *src_parent, *parent;
+};
+
+static int au_cpup_before_link(struct dentry *src_dentry,
+			       struct au_link_args *a)
+{
+	int err;
+	struct dentry *h_src_dentry;
+	struct au_cp_generic cpg = {
+		.dentry	= src_dentry,
+		.bdst	= a->bdst,
+		.bsrc	= a->bsrc,
+		.len	= -1,
+		.pin	= &a->pin,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
+	};
+
+	di_read_lock_parent(a->src_parent, AuLock_IR);
+	err = au_test_and_cpup_dirs(src_dentry, a->bdst);
+	if (unlikely(err))
+		goto out;
+
+	h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
+	err = au_pin(&a->pin, src_dentry, a->bdst,
+		     au_opt_udba(src_dentry->d_sb),
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out;
+
+	err = au_sio_cpup_simple(&cpg);
+	au_unpin(&a->pin);
+
+out:
+	di_read_unlock(a->src_parent, AuLock_IR);
+	return err;
+}
+
+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
+			   struct au_link_args *a)
+{
+	int err;
+	unsigned char plink;
+	aufs_bindex_t bbot;
+	struct dentry *h_src_dentry;
+	struct inode *h_inode, *inode, *delegated;
+	struct super_block *sb;
+	struct file *h_file;
+
+	plink = 0;
+	h_inode = NULL;
+	sb = src_dentry->d_sb;
+	inode = d_inode(src_dentry);
+	if (au_ibtop(inode) <= a->bdst)
+		h_inode = au_h_iptr(inode, a->bdst);
+	if (!h_inode || !h_inode->i_nlink) {
+		/* copyup src_dentry as the name of dentry. */
+		bbot = au_dbbot(dentry);
+		if (bbot < a->bsrc)
+			au_set_dbbot(dentry, a->bsrc);
+		au_set_h_dptr(dentry, a->bsrc,
+			      dget(au_h_dptr(src_dentry, a->bsrc)));
+		dget(a->h_path.dentry);
+		au_set_h_dptr(dentry, a->bdst, NULL);
+		AuDbg("temporary d_inode...\n");
+		spin_lock(&dentry->d_lock);
+		dentry->d_inode = d_inode(src_dentry); /* tmp */
+		spin_unlock(&dentry->d_lock);
+		h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
+		if (IS_ERR(h_file))
+			err = PTR_ERR(h_file);
+		else {
+			struct au_cp_generic cpg = {
+				.dentry	= dentry,
+				.bdst	= a->bdst,
+				.bsrc	= -1,
+				.len	= -1,
+				.pin	= &a->pin,
+				.flags	= AuCpup_KEEPLINO
+			};
+			err = au_sio_cpup_simple(&cpg);
+			au_h_open_post(dentry, a->bsrc, h_file);
+			if (!err) {
+				dput(a->h_path.dentry);
+				a->h_path.dentry = au_h_dptr(dentry, a->bdst);
+			} else
+				au_set_h_dptr(dentry, a->bdst,
+					      a->h_path.dentry);
+		}
+		spin_lock(&dentry->d_lock);
+		dentry->d_inode = NULL; /* restore */
+		spin_unlock(&dentry->d_lock);
+		AuDbg("temporary d_inode...done\n");
+		au_set_h_dptr(dentry, a->bsrc, NULL);
+		au_set_dbbot(dentry, bbot);
+	} else {
+		/* the inode of src_dentry already exists on a.bdst branch */
+		h_src_dentry = d_find_alias(h_inode);
+		if (!h_src_dentry && au_plink_test(inode)) {
+			plink = 1;
+			h_src_dentry = au_plink_lkup(inode, a->bdst);
+			err = PTR_ERR(h_src_dentry);
+			if (IS_ERR(h_src_dentry))
+				goto out;
+
+			if (unlikely(d_is_negative(h_src_dentry))) {
+				dput(h_src_dentry);
+				h_src_dentry = NULL;
+			}
+
+		}
+		if (h_src_dentry) {
+			delegated = NULL;
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path, &delegated);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+			dput(h_src_dentry);
+		} else {
+			AuIOErr("no dentry found for hi%lu on b%d\n",
+				h_inode->i_ino, a->bdst);
+			err = -EIO;
+		}
+	}
+
+	if (!err && !plink)
+		au_plink_append(inode, a->bdst, a->h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry)
+{
+	int err, rerr;
+	struct au_dtime dt;
+	struct au_link_args *a;
+	struct dentry *wh_dentry, *h_src_dentry;
+	struct inode *inode, *delegated;
+	struct super_block *sb;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	IMustLock(dir);
+	inode = d_inode(src_dentry);
+	IMustLock(inode);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->parent = dentry->d_parent; /* dir inode is locked */
+	err = aufs_read_and_write_lock2(dentry, src_dentry,
+					AuLock_NOPLM | AuLock_GEN);
+	if (unlikely(err))
+		goto out_kfree;
+	err = au_d_linkable(src_dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	a->src_parent = dget_parent(src_dentry);
+	wr_dir_args.force_btgt = au_ibtop(inode);
+
+	di_write_lock_parent(a->parent);
+	wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
+				      &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	err = 0;
+	sb = dentry->d_sb;
+	a->bdst = au_dbtop(dentry);
+	a->h_path.dentry = au_h_dptr(dentry, a->bdst);
+	a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
+	a->bsrc = au_ibtop(inode);
+	h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
+	if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
+		h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
+	if (!h_src_dentry) {
+		a->bsrc = au_dbtop(src_dentry);
+		h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
+		AuDebugOn(!h_src_dentry);
+	} else if (IS_ERR(h_src_dentry)) {
+		err = PTR_ERR(h_src_dentry);
+		goto out_parent;
+	}
+
+	/*
+	 * aufs doesn't touch the credential so
+	 * security_dentry_create_files_as() is unnecrssary.
+	 */
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
+			err = au_cpup_or_link(src_dentry, dentry, a);
+		else {
+			delegated = NULL;
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path, &delegated);
+			if (unlikely(err == -EWOULDBLOCK)) {
+				pr_warn("cannot retry for NFSv4 delegation"
+					" for an internal link\n");
+				iput(delegated);
+			}
+		}
+		dput(h_src_dentry);
+	} else {
+		/*
+		 * copyup src_dentry to the branch we process,
+		 * and then link(2) to it.
+		 */
+		dput(h_src_dentry);
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
+			au_unpin(&a->pin);
+			di_write_unlock(a->parent);
+			err = au_cpup_before_link(src_dentry, a);
+			di_write_lock_parent(a->parent);
+			if (!err)
+				err = au_pin(&a->pin, dentry, a->bdst,
+					     au_opt_udba(sb),
+					     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+			if (unlikely(err))
+				goto out_wh;
+		}
+		if (!err) {
+			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
+			err = -ENOENT;
+			if (h_src_dentry && d_is_positive(h_src_dentry)) {
+				delegated = NULL;
+				err = vfsub_link(h_src_dentry,
+						 au_pinned_h_dir(&a->pin),
+						 &a->h_path, &delegated);
+				if (unlikely(err == -EWOULDBLOCK)) {
+					pr_warn("cannot retry"
+						" for NFSv4 delegation"
+						" for an internal link\n");
+					iput(delegated);
+				}
+			}
+		}
+	}
+	if (unlikely(err))
+		goto out_unpin;
+
+	if (wh_dentry) {
+		a->h_path.dentry = wh_dentry;
+		err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out_revert;
+	}
+
+	au_dir_ts(dir, a->bdst);
+	dir->i_version++;
+	inc_nlink(inode);
+	inode->i_ctime = dir->i_ctime;
+	d_instantiate(dentry, au_igrab(inode));
+	if (d_unhashed(a->h_path.dentry))
+		/* some filesystem calls d_drop() */
+		d_drop(dentry);
+	/* some filesystems consume an inode even hardlink */
+	au_fhsm_wrote(sb, a->bdst, /*force*/0);
+	goto out_unpin; /* success */
+
+out_revert:
+	/* no delegation since it is just created */
+	rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
+			    /*delegated*/NULL, /*force*/0);
+	if (unlikely(rerr)) {
+		AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
+		err = -EIO;
+	}
+	au_dtime_revert(&dt);
+out_unpin:
+	au_unpin(&a->pin);
+out_wh:
+	dput(wh_dentry);
+out_parent:
+	di_write_unlock(a->parent);
+	dput(a->src_parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	aufs_read_and_write_unlock2(dentry, src_dentry);
+out_kfree:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err, rerr;
+	aufs_bindex_t bindex;
+	unsigned char diropq;
+	struct path h_path;
+	struct dentry *wh_dentry, *parent, *opq_dentry;
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct {
+		struct au_pin pin;
+		struct au_dtime dt;
+	} *a; /* reduce the stack usage */
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
+	};
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_d_may_add(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
+				      &a->pin, &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	sb = dentry->d_sb;
+	bindex = au_dbtop(dentry);
+	h_path.dentry = au_h_dptr(dentry, bindex);
+	h_path.mnt = au_sbr_mnt(sb, bindex);
+	err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
+	if (unlikely(err))
+		goto out_unpin;
+
+	/* make the dir opaque */
+	diropq = 0;
+	h_inode = d_inode(h_path.dentry);
+	if (wh_dentry
+	    || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		opq_dentry = au_diropq_create(dentry, bindex);
+		inode_unlock(h_inode);
+		err = PTR_ERR(opq_dentry);
+		if (IS_ERR(opq_dentry))
+			goto out_dir;
+		dput(opq_dentry);
+		diropq = 1;
+	}
+
+	err = epilog(dir, bindex, wh_dentry, dentry);
+	if (!err) {
+		inc_nlink(dir);
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	if (diropq) {
+		AuLabel(revert opq);
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		rerr = au_diropq_remove(dentry, bindex);
+		inode_unlock(h_inode);
+		if (rerr) {
+			AuIOErr("%pd reverting diropq failed(%d, %d)\n",
+				dentry, err, rerr);
+			err = -EIO;
+		}
+	}
+
+out_dir:
+	AuLabel(revert dir);
+	rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
+	if (rerr) {
+		AuIOErr("%pd reverting dir failed(%d, %d)\n",
+			dentry, err, rerr);
+		err = -EIO;
+	}
+	au_dtime_revert(&a->dt);
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+out_parent:
+	di_write_unlock(parent);
+out_unlock:
+	if (unlikely(err)) {
+		au_update_dbtop(dentry);
+		d_drop(dentry);
+	}
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
diff --git a/include/fs/aufs/i_op_del.c b/include/fs/aufs/i_op_del.c
new file mode 100644
index 000000000..1e9162a24
--- /dev/null
+++ b/include/fs/aufs/i_op_del.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations (del entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * decide if a new whiteout for @dentry is necessary or not.
+ * when it is necessary, prepare the parent dir for the upper branch whose
+ * branch index is @bcpup for creation. the actual creation of the whiteout will
+ * be done by caller.
+ * return value:
+ * 0: wh is unnecessary
+ * plus: wh is necessary
+ * minus: error
+ */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
+{
+	int need_wh, err;
+	aufs_bindex_t btop;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	if (*bcpup < 0) {
+		*bcpup = btop;
+		if (au_test_ro(sb, btop, d_inode(dentry))) {
+			err = AuWbrCopyup(au_sbi(sb), dentry);
+			*bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else
+		AuDebugOn(btop < *bcpup
+			  || au_test_ro(sb, *bcpup, d_inode(dentry)));
+	AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
+
+	if (*bcpup != btop) {
+		err = au_cpup_dirs(dentry, *bcpup);
+		if (unlikely(err))
+			goto out;
+		need_wh = 1;
+	} else {
+		struct au_dinfo *dinfo, *tmp;
+
+		need_wh = -ENOMEM;
+		dinfo = au_di(dentry);
+		tmp = au_di_alloc(sb, AuLsc_DI_TMP);
+		if (tmp) {
+			au_di_cp(tmp, dinfo);
+			au_di_swap(tmp, dinfo);
+			/* returns the number of positive dentries */
+			need_wh = au_lkup_dentry(dentry, btop + 1,
+						 /* AuLkup_IGNORE_PERM */ 0);
+			au_di_swap(tmp, dinfo);
+			au_rw_write_unlock(&tmp->di_rwsem);
+			au_di_free(tmp);
+		}
+	}
+	AuDbg("need_wh %d\n", need_wh);
+	err = need_wh;
+
+out:
+	return err;
+}
+
+/*
+ * simple tests for the del-entry operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry, *h_latest;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (d_really_is_positive(dentry)) {
+		err = -ENOENT;
+		if (unlikely(d_is_negative(h_dentry)))
+			goto out;
+		h_inode = d_inode(h_dentry);
+		if (unlikely(!h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(d_is_positive(h_dentry)))
+			goto out;
+	}
+
+	err = -ENOENT;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		goto out;
+	err = 0;
+
+	/*
+	 * rmdir a dir may break the consistency on some filesystem.
+	 * let's try heavy test.
+	 */
+	err = -EACCES;
+	if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
+		     && au_test_h_perm(d_inode(h_parent),
+				       MAY_EXEC | MAY_WRITE)))
+		goto out;
+
+	h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
+	err = -EIO;
+	if (IS_ERR(h_latest))
+		goto out;
+	if (h_latest == h_dentry)
+		err = 0;
+	dput(h_latest);
+
+out:
+	return err;
+}
+
+/*
+ * decide the branch where we operate for @dentry. the branch index will be set
+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
+ * dir for reverting.
+ * when a new whiteout is necessary, create it.
+ */
+static struct dentry*
+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
+		    struct au_dtime *dt, struct au_pin *pin)
+{
+	struct dentry *wh_dentry;
+	struct super_block *sb;
+	struct path h_path;
+	int err, need_wh;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
+	wh_dentry = ERR_PTR(need_wh);
+	if (unlikely(need_wh < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	bcpup = *rbcpup;
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_path.dentry = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbtop(dentry) == bcpup) {
+		err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
+		wh_dentry = ERR_PTR(err);
+		if (unlikely(err))
+			goto out_unpin;
+	}
+
+	h_path.mnt = au_sbr_mnt(sb, bcpup);
+	au_dtime_store(dt, au_pinned_parent(pin), &h_path);
+	wh_dentry = NULL;
+	if (!need_wh)
+		goto out; /* success, no need to create whiteout */
+
+	wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_unpin;
+
+	/* returns with the parent is locked and wh_dentry is dget-ed */
+	goto out; /* success */
+
+out_unpin:
+	au_unpin(pin);
+out:
+	return wh_dentry;
+}
+
+/*
+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
+ * in order to be revertible and save time for removing many child whiteouts
+ * under the dir.
+ * returns 1 when there are too many child whiteout and caller should remove
+ * them asynchronously. returns 0 when the number of children is enough small to
+ * remove now or the branch fs is a remote fs.
+ * otherwise return an error.
+ */
+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
+			   struct au_nhash *whlist, struct inode *dir)
+{
+	int rmdir_later, err, dirwh;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+	struct inode *inode;
+
+	sb = dentry->d_sb;
+	SiMustAnyLock(sb);
+	h_dentry = au_h_dptr(dentry, bindex);
+	err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
+	if (unlikely(err))
+		goto out;
+
+	/* stop monitoring */
+	inode = d_inode(dentry);
+	au_hn_free(au_hi(inode, bindex));
+
+	if (!au_test_fs_remote(h_dentry->d_sb)) {
+		dirwh = au_sbi(sb)->si_dirwh;
+		rmdir_later = (dirwh <= 1);
+		if (!rmdir_later)
+			rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
+							      dirwh);
+		if (rmdir_later)
+			return rmdir_later;
+	}
+
+	err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
+	if (unlikely(err)) {
+		AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
+			h_dentry, bindex, err);
+		err = 0;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * final procedure for deleting a entry.
+ * maintain dentry and iattr.
+ */
+static void epilog(struct inode *dir, struct dentry *dentry,
+		   aufs_bindex_t bindex)
+{
+	struct inode *inode;
+
+	inode = d_inode(dentry);
+	d_drop(dentry);
+	inode->i_ctime = dir->i_ctime;
+
+	au_dir_ts(dir, bindex);
+	dir->i_version++;
+}
+
+/*
+ * when an error happened, remove the created whiteout and revert everything.
+ */
+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
+		     aufs_bindex_t bwh, struct dentry *wh_dentry,
+		     struct dentry *dentry, struct au_dtime *dt)
+{
+	int rerr;
+	struct path h_path = {
+		.dentry	= wh_dentry,
+		.mnt	= au_sbr_mnt(dir->i_sb, bindex)
+	};
+
+	rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
+	if (!rerr) {
+		au_set_dbwh(dentry, bwh);
+		au_dtime_revert(dt);
+		return 0;
+	}
+
+	AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
+	return -EIO;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bwh, bindex, btop;
+	struct inode *inode, *h_dir, *delegated;
+	struct dentry *parent, *wh_dentry;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+		struct path h_path;
+	} *a;
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_d_hashed_positive(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	err = -EISDIR;
+	if (unlikely(d_is_dir(dentry)))
+		goto out_unlock; /* possible? */
+
+	btop = au_dbtop(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
+					&a->pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
+	a->h_path.dentry = au_h_dptr(dentry, btop);
+	dget(a->h_path.dentry);
+	if (bindex == btop) {
+		h_dir = au_pinned_h_dir(&a->pin);
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	} else {
+		/* dir inode is locked */
+		h_dir = d_inode(wh_dentry->d_parent);
+		IMustLock(h_dir);
+		err = 0;
+	}
+
+	if (!err) {
+		vfsub_drop_nlink(inode);
+		epilog(dir, dentry, bindex);
+
+		/* update target timestamps */
+		if (bindex == btop) {
+			vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
+			/*ignore*/
+			inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+		} else
+			/* todo: this timestamp may be reverted later */
+			inode->i_ctime = h_dir->i_ctime;
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
+				 &a->dt);
+		if (rerr)
+			err = rerr;
+	}
+
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+	dput(a->h_path.dentry);
+out_parent:
+	di_write_unlock(parent);
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	return err;
+}
+
+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int err, rmdir_later;
+	aufs_bindex_t bwh, bindex, btop;
+	struct inode *inode;
+	struct dentry *parent, *wh_dentry, *h_dentry;
+	struct au_whtmp_rmdir *args;
+	/* to reuduce stack size */
+	struct {
+		struct au_dtime dt;
+		struct au_pin pin;
+	} *a;
+
+	IMustLock(dir);
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
+	if (unlikely(err))
+		goto out_free;
+	err = au_alive_dir(dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	inode = d_inode(dentry);
+	IMustLock(inode);
+	err = -ENOTDIR;
+	if (unlikely(!d_is_dir(dentry)))
+		goto out_unlock; /* possible? */
+
+	err = -ENOMEM;
+	args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
+	if (unlikely(!args))
+		goto out_unlock;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	err = au_test_empty(dentry, &args->whlist);
+	if (unlikely(err))
+		goto out_parent;
+
+	btop = au_dbtop(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
+					&a->pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_parent;
+
+	h_dentry = au_h_dptr(dentry, btop);
+	dget(h_dentry);
+	rmdir_later = 0;
+	if (bindex == btop) {
+		err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
+		if (err > 0) {
+			rmdir_later = err;
+			err = 0;
+		}
+	} else {
+		/* stop monitoring */
+		au_hn_free(au_hi(inode, btop));
+
+		/* dir inode is locked */
+		IMustLock(d_inode(wh_dentry->d_parent));
+		err = 0;
+	}
+
+	if (!err) {
+		vfsub_dead_dir(inode);
+		au_set_dbdiropq(dentry, -1);
+		epilog(dir, dentry, bindex);
+
+		if (rmdir_later) {
+			au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
+			args = NULL;
+		}
+
+		goto out_unpin; /* success */
+	}
+
+	/* revert */
+	AuLabel(revert);
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
+				 &a->dt);
+		if (rerr)
+			err = rerr;
+	}
+
+out_unpin:
+	au_unpin(&a->pin);
+	dput(wh_dentry);
+	dput(h_dentry);
+out_parent:
+	di_write_unlock(parent);
+	if (args)
+		au_whtmp_rmdir_free(args);
+out_unlock:
+	aufs_read_unlock(dentry, AuLock_DW);
+out_free:
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/include/fs/aufs/i_op_ren.c b/include/fs/aufs/i_op_ren.c
new file mode 100644
index 000000000..9cf1d7e45
--- /dev/null
+++ b/include/fs/aufs/i_op_ren.c
@@ -0,0 +1,1246 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operation (rename entry)
+ * todo: this is crazy monster
+ */
+
+#include "aufs.h"
+
+enum { AuSRC, AuDST, AuSrcDst };
+enum { AuPARENT, AuCHILD, AuParentChild };
+
+#define AuRen_ISDIR_SRC		1
+#define AuRen_ISDIR_DST		(1 << 1)
+#define AuRen_ISSAMEDIR		(1 << 2)
+#define AuRen_WHSRC		(1 << 3)
+#define AuRen_WHDST		(1 << 4)
+#define AuRen_MNT_WRITE		(1 << 5)
+#define AuRen_DT_DSTDIR		(1 << 6)
+#define AuRen_DIROPQ_SRC	(1 << 7)
+#define AuRen_DIROPQ_DST	(1 << 8)
+#define AuRen_DIRREN		(1 << 9)
+#define AuRen_DROPPED_SRC	(1 << 10)
+#define AuRen_DROPPED_DST	(1 << 11)
+#define au_ftest_ren(flags, name)	((flags) & AuRen_##name)
+#define au_fset_ren(flags, name) \
+	do { (flags) |= AuRen_##name; } while (0)
+#define au_fclr_ren(flags, name) \
+	do { (flags) &= ~AuRen_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuRen_DIRREN
+#define AuRen_DIRREN		0
+#endif
+
+struct au_ren_args {
+	struct {
+		struct dentry *dentry, *h_dentry, *parent, *h_parent,
+			*wh_dentry;
+		struct inode *dir, *inode;
+		struct au_hinode *hdir, *hinode;
+		struct au_dtime dt[AuParentChild];
+		aufs_bindex_t btop, bdiropq;
+	} sd[AuSrcDst];
+
+#define src_dentry	sd[AuSRC].dentry
+#define src_dir		sd[AuSRC].dir
+#define src_inode	sd[AuSRC].inode
+#define src_h_dentry	sd[AuSRC].h_dentry
+#define src_parent	sd[AuSRC].parent
+#define src_h_parent	sd[AuSRC].h_parent
+#define src_wh_dentry	sd[AuSRC].wh_dentry
+#define src_hdir	sd[AuSRC].hdir
+#define src_hinode	sd[AuSRC].hinode
+#define src_h_dir	sd[AuSRC].hdir->hi_inode
+#define src_dt		sd[AuSRC].dt
+#define src_btop	sd[AuSRC].btop
+#define src_bdiropq	sd[AuSRC].bdiropq
+
+#define dst_dentry	sd[AuDST].dentry
+#define dst_dir		sd[AuDST].dir
+#define dst_inode	sd[AuDST].inode
+#define dst_h_dentry	sd[AuDST].h_dentry
+#define dst_parent	sd[AuDST].parent
+#define dst_h_parent	sd[AuDST].h_parent
+#define dst_wh_dentry	sd[AuDST].wh_dentry
+#define dst_hdir	sd[AuDST].hdir
+#define dst_hinode	sd[AuDST].hinode
+#define dst_h_dir	sd[AuDST].hdir->hi_inode
+#define dst_dt		sd[AuDST].dt
+#define dst_btop	sd[AuDST].btop
+#define dst_bdiropq	sd[AuDST].bdiropq
+
+	struct dentry *h_trap;
+	struct au_branch *br;
+	struct path h_path;
+	struct au_nhash whlist;
+	aufs_bindex_t btgt, src_bwh;
+
+	struct {
+		unsigned short auren_flags;
+		unsigned char flags;	/* syscall parameter */
+		unsigned char exchange;
+	} __packed;
+
+	struct au_whtmp_rmdir *thargs;
+	struct dentry *h_dst;
+	struct au_hinode *h_root;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * functions for reverting.
+ * when an error happened in a single rename systemcall, we should revert
+ * everything as if nothing happened.
+ * we don't need to revert the copied-up/down the parent dir since they are
+ * harmless.
+ */
+
+#define RevertFailure(fmt, ...) do { \
+	AuIOErr("revert failure: " fmt " (%d, %d)\n", \
+		##__VA_ARGS__, err, rerr); \
+	err = -EIO; \
+} while (0)
+
+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
+{
+	int rerr;
+	struct dentry *d;
+#define src_or_dst(member) a->sd[idx].member
+
+	d = src_or_dst(dentry); /* {src,dst}_dentry */
+	au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
+	rerr = au_diropq_remove(d, a->btgt);
+	au_hn_inode_unlock(src_or_dst(hinode));
+	au_set_dbdiropq(d, src_or_dst(bdiropq));
+	if (rerr)
+		RevertFailure("remove diropq %pd", d);
+
+#undef src_or_dst_
+}
+
+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
+{
+	if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
+		au_ren_do_rev_diropq(err, a, AuSRC);
+	if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
+		au_ren_do_rev_diropq(err, a, AuDST);
+}
+
+static void au_ren_rev_rename(int err, struct au_ren_args *a)
+{
+	int rerr;
+	struct inode *delegated;
+
+	a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
+					  a->src_h_parent);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("lkup one %pd", a->src_dentry);
+		return;
+	}
+
+	delegated = NULL;
+	rerr = vfsub_rename(a->dst_h_dir,
+			    au_h_dptr(a->src_dentry, a->btgt),
+			    a->src_h_dir, &a->h_path, &delegated, a->flags);
+	if (unlikely(rerr == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	/* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
+	if (rerr)
+		RevertFailure("rename %pd", a->src_dentry);
+}
+
+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
+{
+	int rerr;
+	struct inode *delegated;
+
+	a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
+					  a->dst_h_parent);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("lkup one %pd", a->dst_dentry);
+		return;
+	}
+	if (d_is_positive(a->h_path.dentry)) {
+		d_drop(a->h_path.dentry);
+		dput(a->h_path.dentry);
+		return;
+	}
+
+	delegated = NULL;
+	rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
+			    &delegated, a->flags);
+	if (unlikely(rerr == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	if (!rerr)
+		au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
+	else
+		RevertFailure("rename %pd", a->h_dst);
+}
+
+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = a->src_wh_dentry;
+	rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
+	au_set_dbwh(a->src_dentry, a->src_bwh);
+	if (rerr)
+		RevertFailure("unlink %pd", a->src_wh_dentry);
+}
+#undef RevertFailure
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * when we have to copyup the renaming entry, do it with the rename-target name
+ * in order to minimize the cost (the later actual rename is unnecessary).
+ * otherwise rename it on the target branch.
+ */
+static int au_ren_or_cpup(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d;
+	struct inode *delegated;
+
+	d = a->src_dentry;
+	if (au_dbtop(d) == a->btgt) {
+		a->h_path.dentry = a->dst_h_dentry;
+		AuDebugOn(au_dbtop(d) != a->btgt);
+		delegated = NULL;
+		err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
+				   a->dst_h_dir, &a->h_path, &delegated,
+				   a->flags);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal rename\n");
+			iput(delegated);
+		}
+	} else
+		BUG();
+
+	if (!err && a->h_dst)
+		/* it will be set to dinfo later */
+		dget(a->h_dst);
+
+	return err;
+}
+
+/* cf. aufs_rmdir() */
+static int au_ren_del_whtmp(struct au_ren_args *a)
+{
+	int err;
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	SiMustAnyLock(dir->i_sb);
+	if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
+				     au_sbi(dir->i_sb)->si_dirwh)
+	    || au_test_fs_remote(a->h_dst->d_sb)) {
+		err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
+		if (unlikely(err))
+			pr_warn("failed removing whtmp dir %pd (%d), "
+				"ignored.\n", a->h_dst, err);
+	} else {
+		au_nhash_wh_free(&a->thargs->whlist);
+		a->thargs->whlist = a->whlist;
+		a->whlist.nh_num = 0;
+		au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
+		dput(a->h_dst);
+		a->thargs = NULL;
+	}
+
+	return 0;
+}
+
+/* make it 'opaque' dir. */
+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
+{
+	int err;
+	struct dentry *d, *diropq;
+#define src_or_dst(member) a->sd[idx].member
+
+	err = 0;
+	d = src_or_dst(dentry); /* {src,dst}_dentry */
+	src_or_dst(bdiropq) = au_dbdiropq(d);
+	src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
+	au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
+	diropq = au_diropq_create(d, a->btgt);
+	au_hn_inode_unlock(src_or_dst(hinode));
+	if (IS_ERR(diropq))
+		err = PTR_ERR(diropq);
+	else
+		dput(diropq);
+
+#undef src_or_dst_
+	return err;
+}
+
+static int au_ren_diropq(struct au_ren_args *a)
+{
+	int err;
+	unsigned char always;
+	struct dentry *d;
+
+	err = 0;
+	d = a->dst_dentry; /* already renamed on the branch */
+	always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    && !au_ftest_ren(a->auren_flags, DIRREN)
+	    && a->btgt != au_dbdiropq(a->src_dentry)
+	    && (a->dst_wh_dentry
+		|| a->btgt <= au_dbdiropq(d)
+		/* hide the lower to keep xino */
+		/* the lowers may not be a dir, but we hide them anyway */
+		|| a->btgt < au_dbbot(d)
+		|| always)) {
+		AuDbg("here\n");
+		err = au_ren_do_diropq(a, AuSRC);
+		if (unlikely(err))
+			goto out;
+		au_fset_ren(a->auren_flags, DIROPQ_SRC);
+	}
+	if (!a->exchange)
+		goto out; /* success */
+
+	d = a->src_dentry; /* already renamed on the branch */
+	if (au_ftest_ren(a->auren_flags, ISDIR_DST)
+	    && a->btgt != au_dbdiropq(a->dst_dentry)
+	    && (a->btgt < au_dbdiropq(d)
+		|| a->btgt < au_dbbot(d)
+		|| always)) {
+		AuDbgDentry(a->src_dentry);
+		AuDbgDentry(a->dst_dentry);
+		err = au_ren_do_diropq(a, AuDST);
+		if (unlikely(err))
+			goto out_rev_src;
+		au_fset_ren(a->auren_flags, DIROPQ_DST);
+	}
+	goto out; /* success */
+
+out_rev_src:
+	AuDbg("err %d, reverting src\n", err);
+	au_ren_rev_diropq(err, a);
+out:
+	return err;
+}
+
+static int do_rename(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d, *h_d;
+
+	if (!a->exchange) {
+		/* prepare workqueue args for asynchronous rmdir */
+		h_d = a->dst_h_dentry;
+		if (au_ftest_ren(a->auren_flags, ISDIR_DST)
+		    /* && !au_ftest_ren(a->auren_flags, DIRREN) */
+		    && d_is_positive(h_d)) {
+			err = -ENOMEM;
+			a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
+							 GFP_NOFS);
+			if (unlikely(!a->thargs))
+				goto out;
+			a->h_dst = dget(h_d);
+		}
+
+		/* create whiteout for src_dentry */
+		if (au_ftest_ren(a->auren_flags, WHSRC)) {
+			a->src_bwh = au_dbwh(a->src_dentry);
+			AuDebugOn(a->src_bwh >= 0);
+			a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
+							a->src_h_parent);
+			err = PTR_ERR(a->src_wh_dentry);
+			if (IS_ERR(a->src_wh_dentry))
+				goto out_thargs;
+		}
+
+		/* lookup whiteout for dentry */
+		if (au_ftest_ren(a->auren_flags, WHDST)) {
+			h_d = au_wh_lkup(a->dst_h_parent,
+					 &a->dst_dentry->d_name, a->br);
+			err = PTR_ERR(h_d);
+			if (IS_ERR(h_d))
+				goto out_whsrc;
+			if (d_is_negative(h_d))
+				dput(h_d);
+			else
+				a->dst_wh_dentry = h_d;
+		}
+
+		/* rename dentry to tmpwh */
+		if (a->thargs) {
+			err = au_whtmp_ren(a->dst_h_dentry, a->br);
+			if (unlikely(err))
+				goto out_whdst;
+
+			d = a->dst_dentry;
+			au_set_h_dptr(d, a->btgt, NULL);
+			err = au_lkup_neg(d, a->btgt, /*wh*/0);
+			if (unlikely(err))
+				goto out_whtmp;
+			a->dst_h_dentry = au_h_dptr(d, a->btgt);
+		}
+	}
+
+	BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
+#if 0
+	BUG_ON(!au_ftest_ren(a->auren_flags, DIRREN)
+	       && d_is_positive(a->dst_h_dentry)
+	       && a->src_btop != a->btgt);
+#endif
+
+	/* rename by vfs_rename or cpup */
+	err = au_ren_or_cpup(a);
+	if (unlikely(err))
+		/* leave the copied-up one */
+		goto out_whtmp;
+
+	/* make dir opaque */
+	err = au_ren_diropq(a);
+	if (unlikely(err))
+		goto out_rename;
+
+	/* update target timestamps */
+	if (a->exchange) {
+		AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
+		a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
+		vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
+		a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+	}
+	AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
+	a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
+	vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
+	a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
+
+	if (!a->exchange) {
+		/* remove whiteout for dentry */
+		if (a->dst_wh_dentry) {
+			a->h_path.dentry = a->dst_wh_dentry;
+			err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
+						  a->dst_dentry);
+			if (unlikely(err))
+				goto out_diropq;
+		}
+
+		/* remove whtmp */
+		if (a->thargs)
+			au_ren_del_whtmp(a); /* ignore this error */
+
+		au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
+	}
+	err = 0;
+	goto out_success;
+
+out_diropq:
+	au_ren_rev_diropq(err, a);
+out_rename:
+	au_ren_rev_rename(err, a);
+	dput(a->h_dst);
+out_whtmp:
+	if (a->thargs)
+		au_ren_rev_whtmp(err, a);
+out_whdst:
+	dput(a->dst_wh_dentry);
+	a->dst_wh_dentry = NULL;
+out_whsrc:
+	if (a->src_wh_dentry)
+		au_ren_rev_whsrc(err, a);
+out_success:
+	dput(a->src_wh_dentry);
+	dput(a->dst_wh_dentry);
+out_thargs:
+	if (a->thargs) {
+		dput(a->h_dst);
+		au_whtmp_rmdir_free(a->thargs);
+		a->thargs = NULL;
+	}
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * test if @dentry dir can be rename destination or not.
+ * success means, it is a logically empty dir.
+ */
+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
+{
+	return au_test_empty(dentry, whlist);
+}
+
+/*
+ * test if @a->src_dentry dir can be rename source or not.
+ * if it can, return 0.
+ * success means,
+ * - it is a logically empty dir.
+ * - or, it exists on writable branch and has no children including whiteouts
+ *   on the lower branch unless DIRREN is on.
+ */
+static int may_rename_srcdir(struct au_ren_args *a)
+{
+	int err;
+	unsigned int rdhash;
+	aufs_bindex_t btop, btgt;
+	struct dentry *dentry;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	dentry = a->src_dentry;
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	if (au_opt_test(sbinfo->si_mntflags, DIRREN))
+		au_fset_ren(a->auren_flags, DIRREN);
+
+	btgt = a->btgt;
+	btop = au_dbtop(dentry);
+	if (btop != btgt) {
+		struct au_nhash whlist;
+
+		SiMustAnyLock(sb);
+		rdhash = sbinfo->si_rdhash;
+		if (!rdhash)
+			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
+							   dentry));
+		err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
+		if (unlikely(err))
+			goto out;
+		err = au_test_empty(dentry, &whlist);
+		au_nhash_wh_free(&whlist);
+		goto out;
+	}
+
+	if (btop == au_dbtaildir(dentry))
+		return 0; /* success */
+
+	err = au_test_empty_lower(dentry);
+
+out:
+	if (err == -ENOTEMPTY) {
+		if (au_ftest_ren(a->auren_flags, DIRREN)) {
+			err = 0;
+		} else {
+			AuWarn1("renaming dir who has child(ren) on multiple "
+				"branches, is not supported\n");
+			err = -EXDEV;
+		}
+	}
+	return err;
+}
+
+/* side effect: sets whlist and h_dentry */
+static int au_ren_may_dir(struct au_ren_args *a)
+{
+	int err;
+	unsigned int rdhash;
+	struct dentry *d;
+
+	d = a->dst_dentry;
+	SiMustAnyLock(d->d_sb);
+
+	err = 0;
+	if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
+		rdhash = au_sbi(d->d_sb)->si_rdhash;
+		if (!rdhash)
+			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
+		err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
+		if (unlikely(err))
+			goto out;
+
+		if (!a->exchange) {
+			au_set_dbtop(d, a->dst_btop);
+			err = may_rename_dstdir(d, &a->whlist);
+			au_set_dbtop(d, a->btgt);
+		} else
+			err = may_rename_srcdir(a);
+	}
+	a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
+	if (unlikely(err))
+		goto out;
+
+	d = a->src_dentry;
+	a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
+		err = may_rename_srcdir(a);
+		if (unlikely(err)) {
+			au_nhash_wh_free(&a->whlist);
+			a->whlist.nh_num = 0;
+		}
+	}
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * simple tests for rename.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+static int au_may_ren(struct au_ren_args *a)
+{
+	int err, isdir;
+	struct inode *h_inode;
+
+	if (a->src_btop == a->btgt) {
+		err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
+				 au_ftest_ren(a->auren_flags, ISDIR_SRC));
+		if (unlikely(err))
+			goto out;
+		err = -EINVAL;
+		if (unlikely(a->src_h_dentry == a->h_trap))
+			goto out;
+	}
+
+	err = 0;
+	if (a->dst_btop != a->btgt)
+		goto out;
+
+	err = -ENOTEMPTY;
+	if (unlikely(a->dst_h_dentry == a->h_trap))
+		goto out;
+
+	err = -EIO;
+	isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
+	if (d_really_is_negative(a->dst_dentry)) {
+		if (d_is_negative(a->dst_h_dentry))
+			err = au_may_add(a->dst_dentry, a->btgt,
+					 a->dst_h_parent, isdir);
+	} else {
+		if (unlikely(d_is_negative(a->dst_h_dentry)))
+			goto out;
+		h_inode = d_inode(a->dst_h_dentry);
+		if (h_inode->i_nlink)
+			err = au_may_del(a->dst_dentry, a->btgt,
+					 a->dst_h_parent, isdir);
+	}
+
+out:
+	if (unlikely(err == -ENOENT || err == -EEXIST))
+		err = -EIO;
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * locking order
+ * (VFS)
+ * - src_dir and dir by lock_rename()
+ * - inode if exitsts
+ * (aufs)
+ * - lock all
+ *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
+ *     + si_read_lock
+ *     + di_write_lock2_child()
+ *       + di_write_lock_child()
+ *	   + ii_write_lock_child()
+ *       + di_write_lock_child2()
+ *	   + ii_write_lock_child2()
+ *     + src_parent and parent
+ *       + di_write_lock_parent()
+ *	   + ii_write_lock_parent()
+ *       + di_write_lock_parent2()
+ *	   + ii_write_lock_parent2()
+ *   + lower src_dir and dir by vfsub_lock_rename()
+ *   + verify the every relationships between child and parent. if any
+ *     of them failed, unlock all and return -EBUSY.
+ */
+static void au_ren_unlock(struct au_ren_args *a)
+{
+	vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
+			    a->dst_h_parent, a->dst_hdir);
+	if (au_ftest_ren(a->auren_flags, DIRREN)
+	    && a->h_root)
+		au_hn_inode_unlock(a->h_root);
+	if (au_ftest_ren(a->auren_flags, MNT_WRITE))
+		vfsub_mnt_drop_write(au_br_mnt(a->br));
+}
+
+static int au_ren_lock(struct au_ren_args *a)
+{
+	int err;
+	unsigned int udba;
+
+	err = 0;
+	a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
+	a->src_hdir = au_hi(a->src_dir, a->btgt);
+	a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
+	a->dst_hdir = au_hi(a->dst_dir, a->btgt);
+
+	err = vfsub_mnt_want_write(au_br_mnt(a->br));
+	if (unlikely(err))
+		goto out;
+	au_fset_ren(a->auren_flags, MNT_WRITE);
+	if (au_ftest_ren(a->auren_flags, DIRREN)) {
+		struct dentry *root;
+		struct inode *dir;
+
+		/*
+		 * sbinfo is already locked, so this ii_read_lock is
+		 * unnecessary. but our debugging feature checks it.
+		 */
+		root = a->src_inode->i_sb->s_root;
+		if (root != a->src_parent && root != a->dst_parent) {
+			dir = d_inode(root);
+			ii_read_lock_parent3(dir);
+			a->h_root = au_hi(dir, a->btgt);
+			ii_read_unlock(dir);
+			au_hn_inode_lock_nested(a->h_root, AuLsc_I_PARENT3);
+		}
+	}
+	a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
+				      a->dst_h_parent, a->dst_hdir);
+	udba = au_opt_udba(a->src_dentry->d_sb);
+	if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
+		     || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
+		err = au_busy_or_stale();
+	if (!err && au_dbtop(a->src_dentry) == a->btgt)
+		err = au_h_verify(a->src_h_dentry, udba,
+				  d_inode(a->src_h_parent), a->src_h_parent,
+				  a->br);
+	if (!err && au_dbtop(a->dst_dentry) == a->btgt)
+		err = au_h_verify(a->dst_h_dentry, udba,
+				  d_inode(a->dst_h_parent), a->dst_h_parent,
+				  a->br);
+	if (!err)
+		goto out; /* success */
+
+	err = au_busy_or_stale();
+	au_ren_unlock(a);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_ren_refresh_dir(struct au_ren_args *a)
+{
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	dir->i_version++;
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
+		/* is this updating defined in POSIX? */
+		au_cpup_attr_timesizes(a->src_inode);
+		au_cpup_attr_nlink(dir, /*force*/1);
+	}
+	au_dir_ts(dir, a->btgt);
+
+	if (a->exchange) {
+		dir = a->src_dir;
+		dir->i_version++;
+		if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
+			/* is this updating defined in POSIX? */
+			au_cpup_attr_timesizes(a->dst_inode);
+			au_cpup_attr_nlink(dir, /*force*/1);
+		}
+		au_dir_ts(dir, a->btgt);
+	}
+
+	if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		return;
+
+	dir = a->src_dir;
+	dir->i_version++;
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
+		au_cpup_attr_nlink(dir, /*force*/1);
+	au_dir_ts(dir, a->btgt);
+}
+
+static void au_ren_refresh(struct au_ren_args *a)
+{
+	aufs_bindex_t bbot, bindex;
+	struct dentry *d, *h_d;
+	struct inode *i, *h_i;
+	struct super_block *sb;
+
+	d = a->dst_dentry;
+	d_drop(d);
+	if (a->h_dst)
+		/* already dget-ed by au_ren_or_cpup() */
+		au_set_h_dptr(d, a->btgt, a->h_dst);
+
+	i = a->dst_inode;
+	if (i) {
+		if (!a->exchange) {
+			if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
+				vfsub_drop_nlink(i);
+			else {
+				vfsub_dead_dir(i);
+				au_cpup_attr_timesizes(i);
+			}
+			au_update_dbrange(d, /*do_put_zero*/1);
+		} else
+			au_cpup_attr_nlink(i, /*force*/1);
+	} else {
+		bbot = a->btgt;
+		for (bindex = au_dbtop(d); bindex < bbot; bindex++)
+			au_set_h_dptr(d, bindex, NULL);
+		bbot = au_dbbot(d);
+		for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
+			au_set_h_dptr(d, bindex, NULL);
+		au_update_dbrange(d, /*do_put_zero*/0);
+	}
+
+	if (a->exchange
+	    || au_ftest_ren(a->auren_flags, DIRREN)) {
+		d_drop(a->src_dentry);
+		if (au_ftest_ren(a->auren_flags, DIRREN))
+			au_set_dbwh(a->src_dentry, -1);
+		return;
+	}
+
+	d = a->src_dentry;
+	au_set_dbwh(d, -1);
+	bbot = au_dbbot(d);
+	for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
+		h_d = au_h_dptr(d, bindex);
+		if (h_d)
+			au_set_h_dptr(d, bindex, NULL);
+	}
+	au_set_dbbot(d, a->btgt);
+
+	sb = d->d_sb;
+	i = a->src_inode;
+	if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
+		return; /* success */
+
+	bbot = au_ibbot(i);
+	for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
+		h_i = au_h_iptr(i, bindex);
+		if (h_i) {
+			au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
+			/* ignore this error */
+			au_set_h_iptr(i, bindex, NULL, 0);
+		}
+	}
+	au_set_ibbot(i, a->btgt);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* mainly for link(2) and rename(2) */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
+{
+	aufs_bindex_t bdiropq, bwh;
+	struct dentry *parent;
+	struct au_branch *br;
+
+	parent = dentry->d_parent;
+	IMustLock(d_inode(parent)); /* dir is locked */
+
+	bdiropq = au_dbdiropq(parent);
+	bwh = au_dbwh(dentry);
+	br = au_sbr(dentry->d_sb, btgt);
+	if (au_br_rdonly(br)
+	    || (0 <= bdiropq && bdiropq < btgt)
+	    || (0 <= bwh && bwh < btgt))
+		btgt = -1;
+
+	AuDbg("btgt %d\n", btgt);
+	return btgt;
+}
+
+/* sets src_btop, dst_btop and btgt */
+static int au_ren_wbr(struct au_ren_args *a)
+{
+	int err;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	a->src_btop = au_dbtop(a->src_dentry);
+	a->dst_btop = au_dbtop(a->dst_dentry);
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    || au_ftest_ren(a->auren_flags, ISDIR_DST))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	wr_dir_args.force_btgt = a->src_btop;
+	if (a->dst_inode && a->dst_btop < a->src_btop)
+		wr_dir_args.force_btgt = a->dst_btop;
+	wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
+	err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
+	a->btgt = err;
+	if (a->exchange)
+		au_update_dbtop(a->dst_dentry);
+
+	return err;
+}
+
+static void au_ren_dt(struct au_ren_args *a)
+{
+	a->h_path.dentry = a->src_h_parent;
+	au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
+	if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
+		a->h_path.dentry = a->dst_h_parent;
+		au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
+	}
+
+	au_fclr_ren(a->auren_flags, DT_DSTDIR);
+	if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
+	    && !a->exchange)
+		return;
+
+	a->h_path.dentry = a->src_h_dentry;
+	au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
+	if (d_is_positive(a->dst_h_dentry)) {
+		au_fset_ren(a->auren_flags, DT_DSTDIR);
+		a->h_path.dentry = a->dst_h_dentry;
+		au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
+	}
+}
+
+static void au_ren_rev_dt(int err, struct au_ren_args *a)
+{
+	struct dentry *h_d;
+	struct inode *h_inode;
+
+	au_dtime_revert(a->src_dt + AuPARENT);
+	if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		au_dtime_revert(a->dst_dt + AuPARENT);
+
+	if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
+		h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
+		h_inode = d_inode(h_d);
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		au_dtime_revert(a->src_dt + AuCHILD);
+		inode_unlock(h_inode);
+
+		if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
+			h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
+			h_inode = d_inode(h_d);
+			inode_lock_nested(h_inode, AuLsc_I_CHILD);
+			au_dtime_revert(a->dst_dt + AuCHILD);
+			inode_unlock(h_inode);
+		}
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
+		struct inode *_dst_dir, struct dentry *_dst_dentry,
+		unsigned int _flags)
+{
+	int err, lock_flags;
+	void *rev;
+	/* reduce stack space */
+	struct au_ren_args *a;
+	struct au_pin pin;
+
+	AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
+	IMustLock(_src_dir);
+	IMustLock(_dst_dir);
+
+	err = -EINVAL;
+	if (unlikely(_flags & RENAME_WHITEOUT))
+		goto out;
+
+	err = -ENOMEM;
+	BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->flags = _flags;
+	a->exchange = _flags & RENAME_EXCHANGE;
+	a->src_dir = _src_dir;
+	a->src_dentry = _src_dentry;
+	a->src_inode = NULL;
+	if (d_really_is_positive(a->src_dentry))
+		a->src_inode = d_inode(a->src_dentry);
+	a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
+	a->dst_dir = _dst_dir;
+	a->dst_dentry = _dst_dentry;
+	a->dst_inode = NULL;
+	if (d_really_is_positive(a->dst_dentry))
+		a->dst_inode = d_inode(a->dst_dentry);
+	a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
+	if (a->dst_inode) {
+		/*
+		 * if EXCHANGE && src is non-dir && dst is dir,
+		 * dst is not locked.
+		 */
+		/* IMustLock(a->dst_inode); */
+		au_igrab(a->dst_inode);
+	}
+
+	err = -ENOTDIR;
+	lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
+	if (d_is_dir(a->src_dentry)) {
+		au_fset_ren(a->auren_flags, ISDIR_SRC);
+		if (unlikely(!a->exchange
+			     && d_really_is_positive(a->dst_dentry)
+			     && !d_is_dir(a->dst_dentry)))
+			goto out_free;
+		lock_flags |= AuLock_DIRS;
+	}
+	if (a->dst_inode && d_is_dir(a->dst_dentry)) {
+		au_fset_ren(a->auren_flags, ISDIR_DST);
+		if (unlikely(!a->exchange
+			     && d_really_is_positive(a->src_dentry)
+			     && !d_is_dir(a->src_dentry)))
+			goto out_free;
+		lock_flags |= AuLock_DIRS;
+	}
+	err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
+					lock_flags);
+	if (unlikely(err))
+		goto out_free;
+
+	err = au_d_hashed_positive(a->src_dentry);
+	if (unlikely(err))
+		goto out_unlock;
+	err = -ENOENT;
+	if (a->dst_inode) {
+		/*
+		 * If it is a dir, VFS unhash it before this
+		 * function. It means we cannot rely upon d_unhashed().
+		 */
+		if (unlikely(!a->dst_inode->i_nlink))
+			goto out_unlock;
+		if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
+			err = au_d_hashed_positive(a->dst_dentry);
+			if (unlikely(err && !a->exchange))
+				goto out_unlock;
+		} else if (unlikely(IS_DEADDIR(a->dst_inode)))
+			goto out_unlock;
+	} else if (unlikely(d_unhashed(a->dst_dentry)))
+		goto out_unlock;
+
+	/*
+	 * is it possible?
+	 * yes, it happened (in linux-3.3-rcN) but I don't know why.
+	 * there may exist a problem somewhere else.
+	 */
+	err = -EINVAL;
+	if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
+		goto out_unlock;
+
+	au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
+	di_write_lock_parent(a->dst_parent);
+
+	/* which branch we process */
+	err = au_ren_wbr(a);
+	if (unlikely(err < 0))
+		goto out_parent;
+	a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
+	a->h_path.mnt = au_br_mnt(a->br);
+
+	/* are they available to be renamed */
+	err = au_ren_may_dir(a);
+	if (unlikely(err))
+		goto out_children;
+
+	/* prepare the writable parent dir on the same branch */
+	if (a->dst_btop == a->btgt) {
+		au_fset_ren(a->auren_flags, WHDST);
+	} else {
+		err = au_cpup_dirs(a->dst_dentry, a->btgt);
+		if (unlikely(err))
+			goto out_children;
+	}
+
+	err = 0;
+	if (!a->exchange) {
+		if (a->src_dir != a->dst_dir) {
+			/*
+			 * this temporary unlock is safe,
+			 * because both dir->i_mutex are locked.
+			 */
+			di_write_unlock(a->dst_parent);
+			di_write_lock_parent(a->src_parent);
+			err = au_wr_dir_need_wh(a->src_dentry,
+						au_ftest_ren(a->auren_flags,
+							     ISDIR_SRC),
+						&a->btgt);
+			di_write_unlock(a->src_parent);
+			di_write_lock2_parent(a->src_parent, a->dst_parent,
+					      /*isdir*/1);
+			au_fclr_ren(a->auren_flags, ISSAMEDIR);
+		} else
+			err = au_wr_dir_need_wh(a->src_dentry,
+						au_ftest_ren(a->auren_flags,
+							     ISDIR_SRC),
+						&a->btgt);
+	}
+	if (unlikely(err < 0))
+		goto out_children;
+	if (err)
+		au_fset_ren(a->auren_flags, WHSRC);
+
+	/* cpup src */
+	if (a->src_btop != a->btgt) {
+		err = au_pin(&pin, a->src_dentry, a->btgt,
+			     au_opt_udba(a->src_dentry->d_sb),
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			struct au_cp_generic cpg = {
+				.dentry	= a->src_dentry,
+				.bdst	= a->btgt,
+				.bsrc	= a->src_btop,
+				.len	= -1,
+				.pin	= &pin,
+				.flags	= AuCpup_DTIME | AuCpup_HOPEN
+			};
+			AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+		if (unlikely(err))
+			goto out_children;
+		a->src_btop = a->btgt;
+		a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
+		if (!a->exchange)
+			au_fset_ren(a->auren_flags, WHSRC);
+	}
+
+	/* cpup dst */
+	if (a->exchange && a->dst_inode
+	    && a->dst_btop != a->btgt) {
+		err = au_pin(&pin, a->dst_dentry, a->btgt,
+			     au_opt_udba(a->dst_dentry->d_sb),
+			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+		if (!err) {
+			struct au_cp_generic cpg = {
+				.dentry	= a->dst_dentry,
+				.bdst	= a->btgt,
+				.bsrc	= a->dst_btop,
+				.len	= -1,
+				.pin	= &pin,
+				.flags	= AuCpup_DTIME | AuCpup_HOPEN
+			};
+			err = au_sio_cpup_simple(&cpg);
+			au_unpin(&pin);
+		}
+		if (unlikely(err))
+			goto out_children;
+		a->dst_btop = a->btgt;
+		a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
+	}
+
+	/* lock them all */
+	err = au_ren_lock(a);
+	if (unlikely(err))
+		/* leave the copied-up one */
+		goto out_children;
+
+	if (!a->exchange) {
+		if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
+			err = au_may_ren(a);
+		else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
+			err = -ENAMETOOLONG;
+		if (unlikely(err))
+			goto out_hdir;
+	}
+
+	/* store timestamps to be revertible */
+	au_ren_dt(a);
+
+	/* store dirren info */
+	if (au_ftest_ren(a->auren_flags, DIRREN)) {
+		err = au_dr_rename(a->src_dentry, a->btgt,
+				   &a->dst_dentry->d_name, &rev);
+		AuTraceErr(err);
+		if (unlikely(err))
+			goto out_dt;
+	}
+
+	/* here we go */
+	err = do_rename(a);
+	if (unlikely(err))
+		goto out_dirren;
+
+	if (au_ftest_ren(a->auren_flags, DIRREN))
+		au_dr_rename_fin(a->src_dentry, a->btgt, rev);
+
+	/* update dir attributes */
+	au_ren_refresh_dir(a);
+
+	/* dput/iput all lower dentries */
+	au_ren_refresh(a);
+
+	goto out_hdir; /* success */
+
+out_dirren:
+	if (au_ftest_ren(a->auren_flags, DIRREN))
+		au_dr_rename_rev(a->src_dentry, a->btgt, rev);
+out_dt:
+	au_ren_rev_dt(err, a);
+out_hdir:
+	au_ren_unlock(a);
+out_children:
+	au_nhash_wh_free(&a->whlist);
+	if (err && a->dst_inode && a->dst_btop != a->btgt) {
+		AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
+		au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
+		au_set_dbtop(a->dst_dentry, a->dst_btop);
+	}
+out_parent:
+	if (!err) {
+		if (d_unhashed(a->src_dentry))
+			au_fset_ren(a->auren_flags, DROPPED_SRC);
+		if (d_unhashed(a->dst_dentry))
+			au_fset_ren(a->auren_flags, DROPPED_DST);
+		if (!a->exchange)
+			d_move(a->src_dentry, a->dst_dentry);
+		else {
+			d_exchange(a->src_dentry, a->dst_dentry);
+			if (au_ftest_ren(a->auren_flags, DROPPED_DST))
+				d_drop(a->dst_dentry);
+		}
+		if (au_ftest_ren(a->auren_flags, DROPPED_SRC))
+			d_drop(a->src_dentry);
+	} else {
+		au_update_dbtop(a->dst_dentry);
+		if (!a->dst_inode)
+			d_drop(a->dst_dentry);
+	}
+	if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
+		di_write_unlock(a->dst_parent);
+	else
+		di_write_unlock2(a->src_parent, a->dst_parent);
+out_unlock:
+	aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
+out_free:
+	iput(a->dst_inode);
+	if (a->thargs)
+		au_whtmp_rmdir_free(a->thargs);
+	kfree(a);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/include/fs/aufs/iinfo.c b/include/fs/aufs/iinfo.c
new file mode 100644
index 000000000..d5cba3b98
--- /dev/null
+++ b/include/fs/aufs/iinfo.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode private data
+ */
+
+#include "aufs.h"
+
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct inode *h_inode;
+	struct au_hinode *hinode;
+
+	IiMustAnyLock(inode);
+
+	hinode = au_hinode(au_ii(inode), bindex);
+	h_inode = hinode->hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+	return h_inode;
+}
+
+/* todo: hard/soft set? */
+void au_hiput(struct au_hinode *hinode)
+{
+	au_hn_free(hinode);
+	dput(hinode->hi_whdentry);
+	iput(hinode->hi_inode);
+}
+
+unsigned int au_hi_flags(struct inode *inode, int isdir)
+{
+	unsigned int flags;
+	const unsigned int mnt_flags = au_mntflags(inode->i_sb);
+
+	flags = 0;
+	if (au_opt_test(mnt_flags, XINO))
+		au_fset_hi(flags, XINO);
+	if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
+		au_fset_hi(flags, HNOTIFY);
+	return flags;
+}
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags)
+{
+	struct au_hinode *hinode;
+	struct inode *hi;
+	struct au_iinfo *iinfo = au_ii(inode);
+
+	IiMustWriteLock(inode);
+
+	hinode = au_hinode(iinfo, bindex);
+	hi = hinode->hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+
+	if (hi)
+		au_hiput(hinode);
+	hinode->hi_inode = h_inode;
+	if (h_inode) {
+		int err;
+		struct super_block *sb = inode->i_sb;
+		struct au_branch *br;
+
+		AuDebugOn(inode->i_mode
+			  && (h_inode->i_mode & S_IFMT)
+			  != (inode->i_mode & S_IFMT));
+		if (bindex == iinfo->ii_btop)
+			au_cpup_igen(inode, h_inode);
+		br = au_sbr(sb, bindex);
+		hinode->hi_id = br->br_id;
+		if (au_ftest_hi(flags, XINO)) {
+			err = au_xino_write(sb, bindex, h_inode->i_ino,
+					    inode->i_ino);
+			if (unlikely(err))
+				AuIOErr1("failed au_xino_write() %d\n", err);
+		}
+
+		if (au_ftest_hi(flags, HNOTIFY)
+		    && au_br_hnotifyable(br->br_perm)) {
+			err = au_hn_alloc(hinode, inode);
+			if (unlikely(err))
+				AuIOErr1("au_hn_alloc() %d\n", err);
+		}
+	}
+}
+
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh)
+{
+	struct au_hinode *hinode;
+
+	IiMustWriteLock(inode);
+
+	hinode = au_hinode(au_ii(inode), bindex);
+	AuDebugOn(hinode->hi_whdentry);
+	hinode->hi_whdentry = h_wh;
+}
+
+void au_update_iigen(struct inode *inode, int half)
+{
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+	unsigned int sigen;
+
+	sigen = au_sigen(inode->i_sb);
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	iigen->ig_generation = sigen;
+	if (half)
+		au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
+	else
+		au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
+	spin_unlock(&iigen->ig_spin);
+}
+
+/* it may be called at remount time, too */
+void au_update_ibrange(struct inode *inode, int do_put_zero)
+{
+	struct au_iinfo *iinfo;
+	aufs_bindex_t bindex, bbot;
+
+	AuDebugOn(au_is_bad_inode(inode));
+	IiMustWriteLock(inode);
+
+	iinfo = au_ii(inode);
+	if (do_put_zero && iinfo->ii_btop >= 0) {
+		for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
+		     bindex++) {
+			struct inode *h_i;
+
+			h_i = au_hinode(iinfo, bindex)->hi_inode;
+			if (h_i
+			    && !h_i->i_nlink
+			    && !(h_i->i_state & I_LINKABLE))
+				au_set_h_iptr(inode, bindex, NULL, 0);
+		}
+	}
+
+	iinfo->ii_btop = -1;
+	iinfo->ii_bbot = -1;
+	bbot = au_sbbot(inode->i_sb);
+	for (bindex = 0; bindex <= bbot; bindex++)
+		if (au_hinode(iinfo, bindex)->hi_inode) {
+			iinfo->ii_btop = bindex;
+			break;
+		}
+	if (iinfo->ii_btop >= 0)
+		for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
+			if (au_hinode(iinfo, bindex)->hi_inode) {
+				iinfo->ii_bbot = bindex;
+				break;
+			}
+	AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_icntnr_init_once(void *_c)
+{
+	struct au_icntnr *c = _c;
+	struct au_iinfo *iinfo = &c->iinfo;
+
+	spin_lock_init(&iinfo->ii_generation.ig_spin);
+	au_rw_init(&iinfo->ii_rwsem);
+	inode_init_once(&c->vfs_inode);
+}
+
+void au_hinode_init(struct au_hinode *hinode)
+{
+	hinode->hi_inode = NULL;
+	hinode->hi_id = -1;
+	au_hn_init(hinode);
+	hinode->hi_whdentry = NULL;
+}
+
+int au_iinfo_init(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct super_block *sb;
+	struct au_hinode *hi;
+	int nbr, i;
+
+	sb = inode->i_sb;
+	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+	nbr = au_sbbot(sb) + 1;
+	if (unlikely(nbr <= 0))
+		nbr = 1;
+	hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
+	if (hi) {
+		au_ninodes_inc(sb);
+
+		iinfo->ii_hinode = hi;
+		for (i = 0; i < nbr; i++, hi++)
+			au_hinode_init(hi);
+
+		iinfo->ii_generation.ig_generation = au_sigen(sb);
+		iinfo->ii_btop = -1;
+		iinfo->ii_bbot = -1;
+		iinfo->ii_vdir = NULL;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
+{
+	int err, i;
+	struct au_hinode *hip;
+
+	AuRwMustWriteLock(&iinfo->ii_rwsem);
+
+	err = -ENOMEM;
+	hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
+			  may_shrink);
+	if (hip) {
+		iinfo->ii_hinode = hip;
+		i = iinfo->ii_bbot + 1;
+		hip += i;
+		for (; i < nbr; i++, hip++)
+			au_hinode_init(hip);
+		err = 0;
+	}
+
+	return err;
+}
+
+void au_iinfo_fin(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+	const unsigned char unlinked = !inode->i_nlink;
+
+	AuDebugOn(au_is_bad_inode(inode));
+
+	sb = inode->i_sb;
+	au_ninodes_dec(sb);
+	if (si_pid_test(sb))
+		au_xino_delete_inode(inode, unlinked);
+	else {
+		/*
+		 * it is safe to hide the dependency between sbinfo and
+		 * sb->s_umount.
+		 */
+		lockdep_off();
+		si_noflush_read_lock(sb);
+		au_xino_delete_inode(inode, unlinked);
+		si_read_unlock(sb);
+		lockdep_on();
+	}
+
+	iinfo = au_ii(inode);
+	if (iinfo->ii_vdir)
+		au_vdir_free(iinfo->ii_vdir);
+
+	bindex = iinfo->ii_btop;
+	if (bindex >= 0) {
+		hi = au_hinode(iinfo, bindex);
+		bbot = iinfo->ii_bbot;
+		while (bindex++ <= bbot) {
+			if (hi->hi_inode)
+				au_hiput(hi);
+			hi++;
+		}
+	}
+	kfree(iinfo->ii_hinode);
+	AuRwDestroy(&iinfo->ii_rwsem);
+}
diff --git a/include/fs/aufs/inode.c b/include/fs/aufs/inode.c
new file mode 100644
index 000000000..79803c4ef
--- /dev/null
+++ b/include/fs/aufs/inode.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode functions
+ */
+
+#include "aufs.h"
+
+struct inode *au_igrab(struct inode *inode)
+{
+	if (inode) {
+		AuDebugOn(!atomic_read(&inode->i_count));
+		ihold(inode);
+	}
+	return inode;
+}
+
+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
+{
+	au_cpup_attr_all(inode, /*force*/0);
+	au_update_iigen(inode, /*half*/1);
+	if (do_version)
+		inode->i_version++;
+}
+
+static int au_ii_refresh(struct inode *inode, int *update)
+{
+	int err, e, nbr;
+	umode_t type;
+	aufs_bindex_t bindex, new_bindex;
+	struct super_block *sb;
+	struct au_iinfo *iinfo;
+	struct au_hinode *p, *q, tmp;
+
+	AuDebugOn(au_is_bad_inode(inode));
+	IiMustWriteLock(inode);
+
+	*update = 0;
+	sb = inode->i_sb;
+	nbr = au_sbbot(sb) + 1;
+	type = inode->i_mode & S_IFMT;
+	iinfo = au_ii(inode);
+	err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
+	if (unlikely(err))
+		goto out;
+
+	AuDebugOn(iinfo->ii_btop < 0);
+	p = au_hinode(iinfo, iinfo->ii_btop);
+	for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
+	     bindex++, p++) {
+		if (!p->hi_inode)
+			continue;
+
+		AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
+		new_bindex = au_br_index(sb, p->hi_id);
+		if (new_bindex == bindex)
+			continue;
+
+		if (new_bindex < 0) {
+			*update = 1;
+			au_hiput(p);
+			p->hi_inode = NULL;
+			continue;
+		}
+
+		if (new_bindex < iinfo->ii_btop)
+			iinfo->ii_btop = new_bindex;
+		if (iinfo->ii_bbot < new_bindex)
+			iinfo->ii_bbot = new_bindex;
+		/* swap two lower inode, and loop again */
+		q = au_hinode(iinfo, new_bindex);
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hi_inode) {
+			bindex--;
+			p--;
+		}
+	}
+	au_update_ibrange(inode, /*do_put_zero*/0);
+	au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
+	e = au_dy_irefresh(inode);
+	if (unlikely(e && !err))
+		err = e;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+void au_refresh_iop(struct inode *inode, int force_getattr)
+{
+	int type;
+	struct au_sbinfo *sbi = au_sbi(inode->i_sb);
+	const struct inode_operations *iop
+		= force_getattr ? aufs_iop : sbi->si_iop_array;
+
+	if (inode->i_op == iop)
+		return;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		type = AuIop_DIR;
+		break;
+	case S_IFLNK:
+		type = AuIop_SYMLINK;
+		break;
+	default:
+		type = AuIop_OTHER;
+		break;
+	}
+
+	inode->i_op = iop + type;
+	/* unnecessary smp_wmb() */
+}
+
+int au_refresh_hinode_self(struct inode *inode)
+{
+	int err, update;
+
+	err = au_ii_refresh(inode, &update);
+	if (!err)
+		au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
+
+	AuTraceErr(err);
+	return err;
+}
+
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
+{
+	int err, e, update;
+	unsigned int flags;
+	umode_t mode;
+	aufs_bindex_t bindex, bbot;
+	unsigned char isdir;
+	struct au_hinode *p;
+	struct au_iinfo *iinfo;
+
+	err = au_ii_refresh(inode, &update);
+	if (unlikely(err))
+		goto out;
+
+	update = 0;
+	iinfo = au_ii(inode);
+	p = au_hinode(iinfo, iinfo->ii_btop);
+	mode = (inode->i_mode & S_IFMT);
+	isdir = S_ISDIR(mode);
+	flags = au_hi_flags(inode, isdir);
+	bbot = au_dbbot(dentry);
+	for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
+		struct inode *h_i, *h_inode;
+		struct dentry *h_d;
+
+		h_d = au_h_dptr(dentry, bindex);
+		if (!h_d || d_is_negative(h_d))
+			continue;
+
+		h_inode = d_inode(h_d);
+		AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
+		if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
+			h_i = au_h_iptr(inode, bindex);
+			if (h_i) {
+				if (h_i == h_inode)
+					continue;
+				err = -EIO;
+				break;
+			}
+		}
+		if (bindex < iinfo->ii_btop)
+			iinfo->ii_btop = bindex;
+		if (iinfo->ii_bbot < bindex)
+			iinfo->ii_bbot = bindex;
+		au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
+		update = 1;
+	}
+	au_update_ibrange(inode, /*do_put_zero*/0);
+	e = au_dy_irefresh(inode);
+	if (unlikely(e && !err))
+		err = e;
+	if (!err)
+		au_refresh_hinode_attr(inode, update && isdir);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int set_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	unsigned int flags;
+	umode_t mode;
+	aufs_bindex_t bindex, btop, btail;
+	unsigned char isdir;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_iinfo *iinfo;
+	struct inode_operations *iop;
+
+	IiMustWriteLock(inode);
+
+	err = 0;
+	isdir = 0;
+	iop = au_sbi(inode->i_sb)->si_iop_array;
+	btop = au_dbtop(dentry);
+	h_dentry = au_h_dptr(dentry, btop);
+	h_inode = d_inode(h_dentry);
+	mode = h_inode->i_mode;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_OTHER;
+		inode->i_fop = &aufs_file_fop;
+		err = au_dy_iaop(inode, btop, h_inode);
+		if (unlikely(err))
+			goto out;
+		break;
+	case S_IFDIR:
+		isdir = 1;
+		btail = au_dbtaildir(dentry);
+		inode->i_op = iop + AuIop_DIR;
+		inode->i_fop = &aufs_dir_fop;
+		break;
+	case S_IFLNK:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_SYMLINK;
+		break;
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+	case S_IFSOCK:
+		btail = au_dbtail(dentry);
+		inode->i_op = iop + AuIop_OTHER;
+		init_special_inode(inode, mode, h_inode->i_rdev);
+		break;
+	default:
+		AuIOErr("Unknown file type 0%o\n", mode);
+		err = -EIO;
+		goto out;
+	}
+
+	/* do not set hnotify for whiteouted dirs (SHWH mode) */
+	flags = au_hi_flags(inode, isdir);
+	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
+	    && au_ftest_hi(flags, HNOTIFY)
+	    && dentry->d_name.len > AUFS_WH_PFX_LEN
+	    && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
+		au_fclr_hi(flags, HNOTIFY);
+	iinfo = au_ii(inode);
+	iinfo->ii_btop = btop;
+	iinfo->ii_bbot = btail;
+	for (bindex = btop; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry)
+			au_set_h_iptr(inode, bindex,
+				      au_igrab(d_inode(h_dentry)), flags);
+	}
+	au_cpup_attr_all(inode, /*force*/1);
+	/*
+	 * to force calling aufs_get_acl() every time,
+	 * do not call cache_no_acl() for aufs inode.
+	 */
+
+out:
+	return err;
+}
+
+/*
+ * successful returns with iinfo write_locked
+ * minus: errno
+ * zero: success, matched
+ * plus: no error, but unmatched
+ */
+static int reval_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	unsigned int gen, igflags;
+	aufs_bindex_t bindex, bbot;
+	struct inode *h_inode, *h_dinode;
+	struct dentry *h_dentry;
+
+	/*
+	 * before this function, if aufs got any iinfo lock, it must be only
+	 * one, the parent dir.
+	 * it can happen by UDBA and the obsoleted inode number.
+	 */
+	err = -EIO;
+	if (unlikely(inode->i_ino == parent_ino(dentry)))
+		goto out;
+
+	err = 1;
+	ii_write_lock_new_child(inode);
+	h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
+	h_dinode = d_inode(h_dentry);
+	bbot = au_ibbot(inode);
+	for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (!h_inode || h_inode != h_dinode)
+			continue;
+
+		err = 0;
+		gen = au_iigen(inode, &igflags);
+		if (gen == au_digen(dentry)
+		    && !au_ig_ftest(igflags, HALF_REFRESHED))
+			break;
+
+		/* fully refresh inode using dentry */
+		err = au_refresh_hinode(inode, dentry);
+		if (!err)
+			au_update_iigen(inode, /*half*/0);
+		break;
+	}
+
+	if (unlikely(err))
+		ii_write_unlock(inode);
+out:
+	return err;
+}
+
+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+	   unsigned int d_type, ino_t *ino)
+{
+	int err, idx;
+	const int isnondir = d_type != DT_DIR;
+
+	/* prevent hardlinked inode number from race condition */
+	if (isnondir) {
+		err = au_xinondir_enter(sb, bindex, h_ino, &idx);
+		if (unlikely(err))
+			goto out;
+	}
+
+	err = au_xino_read(sb, bindex, h_ino, ino);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (!*ino) {
+		err = -EIO;
+		*ino = au_xino_new_ino(sb);
+		if (unlikely(!*ino))
+			goto out_xinondir;
+		err = au_xino_write(sb, bindex, h_ino, *ino);
+		if (unlikely(err))
+			goto out_xinondir;
+	}
+
+out_xinondir:
+	if (isnondir && idx >= 0)
+		au_xinondir_leave(sb, bindex, h_ino, idx);
+out:
+	return err;
+}
+
+/* successful returns with iinfo write_locked */
+/* todo: return with unlocked? */
+struct inode *au_new_inode(struct dentry *dentry, int must_new)
+{
+	struct inode *inode, *h_inode;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+	ino_t h_ino, ino;
+	int err, idx, hlinked;
+	aufs_bindex_t btop;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	h_dentry = au_h_dptr(dentry, btop);
+	h_inode = d_inode(h_dentry);
+	h_ino = h_inode->i_ino;
+	hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
+
+new_ino:
+	/*
+	 * stop 'race'-ing between hardlinks under different
+	 * parents.
+	 */
+	if (hlinked) {
+		err = au_xinondir_enter(sb, btop, h_ino, &idx);
+		inode = ERR_PTR(err);
+		if (unlikely(err))
+			goto out;
+	}
+
+	err = au_xino_read(sb, btop, h_ino, &ino);
+	inode = ERR_PTR(err);
+	if (unlikely(err))
+		goto out_xinondir;
+
+	if (!ino) {
+		ino = au_xino_new_ino(sb);
+		if (unlikely(!ino)) {
+			inode = ERR_PTR(-EIO);
+			goto out_xinondir;
+		}
+	}
+
+	AuDbg("i%lu\n", (unsigned long)ino);
+	inode = au_iget_locked(sb, ino);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_xinondir;
+
+	AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
+	if (inode->i_state & I_NEW) {
+		ii_write_lock_new_child(inode);
+		err = set_inode(inode, dentry);
+		if (!err) {
+			unlock_new_inode(inode);
+			goto out_xinondir; /* success */
+		}
+
+		/*
+		 * iget_failed() calls iput(), but we need to call
+		 * ii_write_unlock() after iget_failed(). so dirty hack for
+		 * i_count.
+		 */
+		atomic_inc(&inode->i_count);
+		iget_failed(inode);
+		ii_write_unlock(inode);
+		au_xino_write(sb, btop, h_ino, /*ino*/0);
+		/* ignore this error */
+		goto out_iput;
+	} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
+		/*
+		 * horrible race condition between lookup, readdir and copyup
+		 * (or something).
+		 */
+		if (hlinked && idx >= 0)
+			au_xinondir_leave(sb, btop, h_ino, idx);
+		err = reval_inode(inode, dentry);
+		if (unlikely(err < 0)) {
+			hlinked = 0;
+			goto out_iput;
+		}
+		if (!err)
+			goto out; /* success */
+		else if (hlinked && idx >= 0) {
+			err = au_xinondir_enter(sb, btop, h_ino, &idx);
+			if (unlikely(err)) {
+				iput(inode);
+				inode = ERR_PTR(err);
+				goto out;
+			}
+		}
+	}
+
+	if (unlikely(au_test_fs_unique_ino(h_inode)))
+		AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
+			" b%d, %s, %pd, hi%lu, i%lu.\n",
+			btop, au_sbtype(h_dentry->d_sb), dentry,
+			(unsigned long)h_ino, (unsigned long)ino);
+	ino = 0;
+	err = au_xino_write(sb, btop, h_ino, /*ino*/0);
+	if (!err) {
+		iput(inode);
+		if (hlinked && idx >= 0)
+			au_xinondir_leave(sb, btop, h_ino, idx);
+		goto new_ino;
+	}
+
+out_iput:
+	iput(inode);
+	inode = ERR_PTR(err);
+out_xinondir:
+	if (hlinked && idx >= 0)
+		au_xinondir_leave(sb, btop, h_ino, idx);
+out:
+	return inode;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode)
+{
+	int err;
+	struct inode *hi;
+
+	err = au_br_rdonly(au_sbr(sb, bindex));
+
+	/* pseudo-link after flushed may happen out of bounds */
+	if (!err
+	    && inode
+	    && au_ibtop(inode) <= bindex
+	    && bindex <= au_ibbot(inode)) {
+		/*
+		 * permission check is unnecessary since vfsub routine
+		 * will be called later
+		 */
+		hi = au_h_iptr(inode, bindex);
+		if (hi)
+			err = IS_IMMUTABLE(hi) ? -EROFS : 0;
+	}
+
+	return err;
+}
+
+int au_test_h_perm(struct inode *h_inode, int mask)
+{
+	if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
+		return 0;
+	return inode_permission(h_inode, mask);
+}
+
+int au_test_h_perm_sio(struct inode *h_inode, int mask)
+{
+	if (au_test_nfs(h_inode->i_sb)
+	    && (mask & MAY_WRITE)
+	    && S_ISDIR(h_inode->i_mode))
+		mask |= MAY_READ; /* force permission check */
+	return au_test_h_perm(h_inode, mask);
+}
diff --git a/include/fs/aufs/inode.h b/include/fs/aufs/inode.h
new file mode 100644
index 000000000..35e02ad9f
--- /dev/null
+++ b/include/fs/aufs/inode.h
@@ -0,0 +1,695 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * inode operations
+ */
+
+#ifndef __AUFS_INODE_H__
+#define __AUFS_INODE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fsnotify.h>
+#include "rwsem.h"
+
+struct vfsmount;
+
+struct au_hnotify {
+#ifdef CONFIG_AUFS_HNOTIFY
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	/* never use fsnotify_add_vfsmount_mark() */
+	struct fsnotify_mark		hn_mark;
+#endif
+	struct inode		*hn_aufs_inode;	/* no get/put */
+#endif
+} ____cacheline_aligned_in_smp;
+
+struct au_hinode {
+	struct inode		*hi_inode;
+	aufs_bindex_t		hi_id;
+#ifdef CONFIG_AUFS_HNOTIFY
+	struct au_hnotify	*hi_notify;
+#endif
+
+	/* reference to the copied-up whiteout with get/put */
+	struct dentry		*hi_whdentry;
+};
+
+/* ig_flags */
+#define AuIG_HALF_REFRESHED		1
+#define au_ig_ftest(flags, name)	((flags) & AuIG_##name)
+#define au_ig_fset(flags, name) \
+	do { (flags) |= AuIG_##name; } while (0)
+#define au_ig_fclr(flags, name) \
+	do { (flags) &= ~AuIG_##name; } while (0)
+
+struct au_iigen {
+	spinlock_t	ig_spin;
+	__u32		ig_generation, ig_flags;
+};
+
+struct au_vdir;
+struct au_iinfo {
+	struct au_iigen		ii_generation;
+	struct super_block	*ii_hsb1;	/* no get/put */
+
+	struct au_rwsem		ii_rwsem;
+	aufs_bindex_t		ii_btop, ii_bbot;
+	__u32			ii_higen;
+	struct au_hinode	*ii_hinode;
+	struct au_vdir		*ii_vdir;
+};
+
+struct au_icntnr {
+	struct au_iinfo iinfo;
+	struct inode vfs_inode;
+	struct hlist_bl_node plink;
+} ____cacheline_aligned_in_smp;
+
+/* au_pin flags */
+#define AuPin_DI_LOCKED		1
+#define AuPin_MNT_WRITE		(1 << 1)
+#define au_ftest_pin(flags, name)	((flags) & AuPin_##name)
+#define au_fset_pin(flags, name) \
+	do { (flags) |= AuPin_##name; } while (0)
+#define au_fclr_pin(flags, name) \
+	do { (flags) &= ~AuPin_##name; } while (0)
+
+struct au_pin {
+	/* input */
+	struct dentry *dentry;
+	unsigned int udba;
+	unsigned char lsc_di, lsc_hi, flags;
+	aufs_bindex_t bindex;
+
+	/* output */
+	struct dentry *parent;
+	struct au_hinode *hdir;
+	struct vfsmount *h_mnt;
+
+	/* temporary unlock/relock for copyup */
+	struct dentry *h_dentry, *h_parent;
+	struct au_branch *br;
+	struct task_struct *task;
+};
+
+void au_pin_hdir_unlock(struct au_pin *p);
+int au_pin_hdir_lock(struct au_pin *p);
+int au_pin_hdir_relock(struct au_pin *p);
+void au_pin_hdir_acquire_nest(struct au_pin *p);
+void au_pin_hdir_release(struct au_pin *p);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_iinfo *au_ii(struct inode *inode)
+{
+	BUG_ON(is_bad_inode(inode));
+	return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* inode.c */
+struct inode *au_igrab(struct inode *inode);
+void au_refresh_iop(struct inode *inode, int force_getattr);
+int au_refresh_hinode_self(struct inode *inode);
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+	   unsigned int d_type, ino_t *ino);
+struct inode *au_new_inode(struct dentry *dentry, int must_new);
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode);
+int au_test_h_perm(struct inode *h_inode, int mask);
+int au_test_h_perm_sio(struct inode *h_inode, int mask);
+
+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
+			    ino_t h_ino, unsigned int d_type, ino_t *ino)
+{
+#ifdef CONFIG_AUFS_SHWH
+	return au_ino(sb, bindex, h_ino, d_type, ino);
+#else
+	return 0;
+#endif
+}
+
+/* i_op.c */
+enum {
+	AuIop_SYMLINK,
+	AuIop_DIR,
+	AuIop_OTHER,
+	AuIop_Last
+};
+extern struct inode_operations aufs_iop[AuIop_Last],
+	aufs_iop_nogetattr[AuIop_Last];
+
+/* au_wr_dir flags */
+#define AuWrDir_ADD_ENTRY	1
+#define AuWrDir_ISDIR		(1 << 1)
+#define AuWrDir_TMPFILE		(1 << 2)
+#define au_ftest_wrdir(flags, name)	((flags) & AuWrDir_##name)
+#define au_fset_wrdir(flags, name) \
+	do { (flags) |= AuWrDir_##name; } while (0)
+#define au_fclr_wrdir(flags, name) \
+	do { (flags) &= ~AuWrDir_##name; } while (0)
+
+struct au_wr_dir_args {
+	aufs_bindex_t force_btgt;
+	unsigned char flags;
+};
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args);
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin);
+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags);
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags) __must_check;
+int au_do_pin(struct au_pin *pin) __must_check;
+void au_unpin(struct au_pin *pin);
+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
+
+#define AuIcpup_DID_CPUP	1
+#define au_ftest_icpup(flags, name)	((flags) & AuIcpup_##name)
+#define au_fset_icpup(flags, name) \
+	do { (flags) |= AuIcpup_##name; } while (0)
+#define au_fclr_icpup(flags, name) \
+	do { (flags) &= ~AuIcpup_##name; } while (0)
+
+struct au_icpup_args {
+	unsigned char flags;
+	unsigned char pin_flags;
+	aufs_bindex_t btgt;
+	unsigned int udba;
+	struct au_pin pin;
+	struct path h_path;
+	struct inode *h_inode;
+};
+
+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
+		     struct au_icpup_args *a);
+
+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
+		      int locked);
+
+/* i_op_add.c */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+	       dev_t dev);
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl);
+struct vfsub_aopen_args;
+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
+		       struct vfsub_aopen_args *args);
+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry);
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+
+/* i_op_del.c */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_unlink(struct inode *dir, struct dentry *dentry);
+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
+
+/* i_op_ren.c */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
+		struct inode *dir, struct dentry *dentry,
+		unsigned int flags);
+
+/* iinfo.c */
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
+void au_hiput(struct au_hinode *hinode);
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh);
+unsigned int au_hi_flags(struct inode *inode, int isdir);
+
+/* hinode flags */
+#define AuHi_XINO	1
+#define AuHi_HNOTIFY	(1 << 1)
+#define au_ftest_hi(flags, name)	((flags) & AuHi_##name)
+#define au_fset_hi(flags, name) \
+	do { (flags) |= AuHi_##name; } while (0)
+#define au_fclr_hi(flags, name) \
+	do { (flags) &= ~AuHi_##name; } while (0)
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuHi_HNOTIFY
+#define AuHi_HNOTIFY	0
+#endif
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags);
+
+void au_update_iigen(struct inode *inode, int half);
+void au_update_ibrange(struct inode *inode, int do_put_zero);
+
+void au_icntnr_init_once(void *_c);
+void au_hinode_init(struct au_hinode *hinode);
+int au_iinfo_init(struct inode *inode);
+void au_iinfo_fin(struct inode *inode);
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
+
+#ifdef CONFIG_PROC_FS
+/* plink.c */
+int au_plink_maint(struct super_block *sb, int flags);
+struct au_sbinfo;
+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
+int au_plink_maint_enter(struct super_block *sb);
+#ifdef CONFIG_AUFS_DEBUG
+void au_plink_list(struct super_block *sb);
+#else
+AuStubVoid(au_plink_list, struct super_block *sb)
+#endif
+int au_plink_test(struct inode *inode);
+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
+		     struct dentry *h_dentry);
+void au_plink_put(struct super_block *sb, int verbose);
+void au_plink_clean(struct super_block *sb, int verbose);
+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
+#else
+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
+AuStubVoid(au_plink_list, struct super_block *sb);
+AuStubInt0(au_plink_test, struct inode *inode);
+AuStub(struct dentry *, au_plink_lkup, return NULL,
+       struct inode *inode, aufs_bindex_t bindex);
+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
+	   struct dentry *h_dentry);
+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_AUFS_XATTR
+/* xattr.c */
+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
+		  unsigned int verbose);
+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
+void au_xattr_init(struct super_block *sb);
+#else
+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
+	   int ignore_flags, unsigned int verbose);
+AuStubVoid(au_xattr_init, struct super_block *sb);
+#endif
+
+#ifdef CONFIG_FS_POSIX_ACL
+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+
+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
+enum {
+	AU_XATTR_SET,
+	AU_ACL_SET
+};
+
+struct au_sxattr {
+	int type;
+	union {
+		struct {
+			const char	*name;
+			const void	*value;
+			size_t		size;
+			int		flags;
+		} set;
+		struct {
+			struct posix_acl *acl;
+			int		type;
+		} acl_set;
+	} u;
+};
+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
+		  struct au_sxattr *arg);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for iinfo */
+enum {
+	AuLsc_II_CHILD,		/* child first */
+	AuLsc_II_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
+	AuLsc_II_CHILD3,	/* copyup dirs */
+	AuLsc_II_PARENT,	/* see AuLsc_I_PARENT in vfsub.h */
+	AuLsc_II_PARENT2,
+	AuLsc_II_PARENT3,	/* copyup dirs */
+	AuLsc_II_NEW_CHILD
+};
+
+/*
+ * ii_read_lock_child, ii_write_lock_child,
+ * ii_read_lock_child2, ii_write_lock_child2,
+ * ii_read_lock_child3, ii_write_lock_child3,
+ * ii_read_lock_parent, ii_write_lock_parent,
+ * ii_read_lock_parent2, ii_write_lock_parent2,
+ * ii_read_lock_parent3, ii_write_lock_parent3,
+ * ii_read_lock_new_child, ii_write_lock_new_child,
+ */
+#define AuReadLockFunc(name, lsc) \
+static inline void ii_read_lock_##name(struct inode *i) \
+{ \
+	au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuWriteLockFunc(name, lsc) \
+static inline void ii_write_lock_##name(struct inode *i) \
+{ \
+	au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuRWLockFuncs(name, lsc) \
+	AuReadLockFunc(name, lsc) \
+	AuWriteLockFunc(name, lsc)
+
+AuRWLockFuncs(child, CHILD);
+AuRWLockFuncs(child2, CHILD2);
+AuRWLockFuncs(child3, CHILD3);
+AuRWLockFuncs(parent, PARENT);
+AuRWLockFuncs(parent2, PARENT2);
+AuRWLockFuncs(parent3, PARENT3);
+AuRWLockFuncs(new_child, NEW_CHILD);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+#define ii_read_unlock(i)	au_rw_read_unlock(&au_ii(i)->ii_rwsem)
+#define ii_write_unlock(i)	au_rw_write_unlock(&au_ii(i)->ii_rwsem)
+#define ii_downgrade_lock(i)	au_rw_dgrade_lock(&au_ii(i)->ii_rwsem)
+
+#define IiMustNoWaiters(i)	AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
+#define IiMustAnyLock(i)	AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
+#define IiMustWriteLock(i)	AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+static inline void au_icntnr_init(struct au_icntnr *c)
+{
+#ifdef CONFIG_AUFS_DEBUG
+	c->vfs_inode.i_mode = 0;
+#endif
+}
+
+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
+{
+	unsigned int gen;
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	if (igflags)
+		*igflags = iigen->ig_flags;
+	gen = iigen->ig_generation;
+	spin_unlock(&iigen->ig_spin);
+
+	return gen;
+}
+
+/* tiny test for inode number */
+/* tmpfs generation is too rough */
+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
+{
+	struct au_iinfo *iinfo;
+
+	iinfo = au_ii(inode);
+	AuRwMustAnyLock(&iinfo->ii_rwsem);
+	return !(iinfo->ii_hsb1 == h_inode->i_sb
+		 && iinfo->ii_higen == h_inode->i_generation);
+}
+
+static inline void au_iigen_dec(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct au_iigen *iigen;
+
+	iinfo = au_ii(inode);
+	iigen = &iinfo->ii_generation;
+	spin_lock(&iigen->ig_spin);
+	iigen->ig_generation--;
+	spin_unlock(&iigen->ig_spin);
+}
+
+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
+{
+	int err;
+
+	err = 0;
+	if (unlikely(inode && au_iigen(inode, NULL) != sigen))
+		err = -EIO;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
+					  aufs_bindex_t bindex)
+{
+	return iinfo->ii_hinode + bindex;
+}
+
+static inline int au_is_bad_inode(struct inode *inode)
+{
+	return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
+}
+
+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
+					aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex)->hi_id;
+}
+
+static inline aufs_bindex_t au_ibtop(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_btop;
+}
+
+static inline aufs_bindex_t au_ibbot(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_bbot;
+}
+
+static inline struct au_vdir *au_ivdir(struct inode *inode)
+{
+	IiMustAnyLock(inode);
+	return au_ii(inode)->ii_vdir;
+}
+
+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex)->hi_whdentry;
+}
+
+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_btop = bindex;
+}
+
+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_bbot = bindex;
+}
+
+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
+{
+	IiMustWriteLock(inode);
+	au_ii(inode)->ii_vdir = vdir;
+}
+
+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
+{
+	IiMustAnyLock(inode);
+	return au_hinode(au_ii(inode), bindex);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
+{
+	if (pin)
+		return pin->parent;
+	return NULL;
+}
+
+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
+{
+	if (pin && pin->hdir)
+		return pin->hdir->hi_inode;
+	return NULL;
+}
+
+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
+{
+	if (pin)
+		return pin->hdir;
+	return NULL;
+}
+
+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
+{
+	if (pin)
+		pin->dentry = dentry;
+}
+
+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
+					   unsigned char lflag)
+{
+	if (pin) {
+		if (lflag)
+			au_fset_pin(pin->flags, DI_LOCKED);
+		else
+			au_fclr_pin(pin->flags, DI_LOCKED);
+	}
+}
+
+#if 0 /* reserved */
+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
+{
+	if (pin) {
+		dput(pin->parent);
+		pin->parent = dget(parent);
+	}
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_branch;
+#ifdef CONFIG_AUFS_HNOTIFY
+struct au_hnotify_op {
+	void (*ctl)(struct au_hinode *hinode, int do_set);
+	int (*alloc)(struct au_hinode *hinode);
+
+	/*
+	 * if it returns true, the the caller should free hinode->hi_notify,
+	 * otherwise ->free() frees it.
+	 */
+	int (*free)(struct au_hinode *hinode,
+		    struct au_hnotify *hn) __must_check;
+
+	void (*fin)(void);
+	int (*init)(void);
+
+	int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
+	void (*fin_br)(struct au_branch *br);
+	int (*init_br)(struct au_branch *br, int perm);
+};
+
+/* hnotify.c */
+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
+void au_hn_free(struct au_hinode *hinode);
+void au_hn_ctl(struct au_hinode *hinode, int do_set);
+void au_hn_reset(struct inode *inode, unsigned int flags);
+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
+	       struct qstr *h_child_qstr, struct inode *h_child_inode);
+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
+int au_hnotify_init_br(struct au_branch *br, int perm);
+void au_hnotify_fin_br(struct au_branch *br);
+int __init au_hnotify_init(void);
+void au_hnotify_fin(void);
+
+/* hfsnotify.c */
+extern const struct au_hnotify_op au_hnotify_op;
+
+static inline
+void au_hn_init(struct au_hinode *hinode)
+{
+	hinode->hi_notify = NULL;
+}
+
+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
+{
+	return hinode->hi_notify;
+}
+
+#else
+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
+       struct au_hinode *hinode __maybe_unused,
+       struct inode *inode __maybe_unused)
+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
+	   int do_set __maybe_unused)
+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
+	   unsigned int flags __maybe_unused)
+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
+	   struct au_branch *br __maybe_unused,
+	   int perm __maybe_unused)
+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
+	   int perm __maybe_unused)
+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
+AuStubInt0(__init au_hnotify_init, void)
+AuStubVoid(au_hnotify_fin, void)
+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
+#endif /* CONFIG_AUFS_HNOTIFY */
+
+static inline void au_hn_suspend(struct au_hinode *hdir)
+{
+	au_hn_ctl(hdir, /*do_set*/0);
+}
+
+static inline void au_hn_resume(struct au_hinode *hdir)
+{
+	au_hn_ctl(hdir, /*do_set*/1);
+}
+
+static inline void au_hn_inode_lock(struct au_hinode *hdir)
+{
+	inode_lock(hdir->hi_inode);
+	au_hn_suspend(hdir);
+}
+
+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
+					  unsigned int sc __maybe_unused)
+{
+	inode_lock_nested(hdir->hi_inode, sc);
+	au_hn_suspend(hdir);
+}
+
+#if 0 /* unused */
+#include "vfsub.h"
+static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
+						  unsigned int sc)
+{
+	vfsub_inode_lock_shared_nested(hdir->hi_inode, sc);
+	au_hn_suspend(hdir);
+}
+#endif
+
+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
+{
+	au_hn_resume(hdir);
+	inode_unlock(hdir->hi_inode);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_INODE_H__ */
diff --git a/include/fs/aufs/ioctl.c b/include/fs/aufs/ioctl.c
new file mode 100644
index 000000000..de188adb3
--- /dev/null
+++ b/include/fs/aufs/ioctl.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * ioctl
+ * plink-management and readdir in userspace.
+ * assist the pathconf(3) wrapper library.
+ * move-down
+ * File-based Hierarchical Storage Management.
+ */
+
+#include <linux/compat.h>
+#include <linux/file.h>
+#include "aufs.h"
+
+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
+{
+	int err, fd;
+	aufs_bindex_t wbi, bindex, bbot;
+	struct file *h_file;
+	struct super_block *sb;
+	struct dentry *root;
+	struct au_branch *br;
+	struct aufs_wbr_fd wbrfd = {
+		.oflags	= au_dir_roflags,
+		.brid	= -1
+	};
+	const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
+		| O_NOATIME | O_CLOEXEC;
+
+	AuDebugOn(wbrfd.oflags & ~valid);
+
+	if (arg) {
+		err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
+		if (unlikely(err)) {
+			err = -EFAULT;
+			goto out;
+		}
+
+		err = -EINVAL;
+		AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
+		wbrfd.oflags |= au_dir_roflags;
+		AuDbg("0%o\n", wbrfd.oflags);
+		if (unlikely(wbrfd.oflags & ~valid))
+			goto out;
+	}
+
+	fd = get_unused_fd_flags(0);
+	err = fd;
+	if (unlikely(fd < 0))
+		goto out;
+
+	h_file = ERR_PTR(-EINVAL);
+	wbi = 0;
+	br = NULL;
+	sb = path->dentry->d_sb;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_IR);
+	bbot = au_sbbot(sb);
+	if (wbrfd.brid >= 0) {
+		wbi = au_br_index(sb, wbrfd.brid);
+		if (unlikely(wbi < 0 || wbi > bbot))
+			goto out_unlock;
+	}
+
+	h_file = ERR_PTR(-ENOENT);
+	br = au_sbr(sb, wbi);
+	if (!au_br_writable(br->br_perm)) {
+		if (arg)
+			goto out_unlock;
+
+		bindex = wbi + 1;
+		wbi = -1;
+		for (; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_writable(br->br_perm)) {
+				wbi = bindex;
+				br = au_sbr(sb, wbi);
+				break;
+			}
+		}
+	}
+	AuDbg("wbi %d\n", wbi);
+	if (wbi >= 0)
+		h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
+				   /*force_wr*/0);
+
+out_unlock:
+	aufs_read_unlock(root, AuLock_IR);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out_fd;
+
+	au_br_put(br); /* cf. au_h_open() */
+	fd_install(fd, h_file);
+	err = fd;
+	goto out; /* success */
+
+out_fd:
+	put_unused_fd(fd);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err;
+	struct dentry *dentry;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ioctl(file, cmd, arg);
+		break;
+
+	case AUFS_CTL_WBR_FD:
+		err = au_wbr_fd(&file->f_path, (void __user *)arg);
+		break;
+
+	case AUFS_CTL_IBUSY:
+		err = au_ibusy_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_BRINFO:
+		err = au_brinfo_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_FHSM_FD:
+		dentry = file->f_path.dentry;
+		if (IS_ROOT(dentry))
+			err = au_fhsm_fd(dentry->d_sb, arg);
+		else
+			err = -ENOTTY;
+		break;
+
+	default:
+		/* do not call the lower */
+		AuDbg("0x%x\n", cmd);
+		err = -ENOTTY;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err;
+
+	switch (cmd) {
+	case AUFS_CTL_MVDOWN:
+		err = au_mvdown(file->f_path.dentry, (void __user *)arg);
+		break;
+
+	case AUFS_CTL_WBR_FD:
+		err = au_wbr_fd(&file->f_path, (void __user *)arg);
+		break;
+
+	default:
+		/* do not call the lower */
+		AuDbg("0x%x\n", cmd);
+		err = -ENOTTY;
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
+			   unsigned long arg)
+{
+	long err;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_compat_ioctl(file, cmd, arg);
+		break;
+
+	case AUFS_CTL_IBUSY:
+		err = au_ibusy_compat_ioctl(file, arg);
+		break;
+
+	case AUFS_CTL_BRINFO:
+		err = au_brinfo_compat_ioctl(file, arg);
+		break;
+
+	default:
+		err = aufs_ioctl_dir(file, cmd, arg);
+	}
+
+	AuTraceErr(err);
+	return err;
+}
+
+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
diff --git a/include/fs/aufs/loop.c b/include/fs/aufs/loop.c
new file mode 100644
index 000000000..983ef9876
--- /dev/null
+++ b/include/fs/aufs/loop.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * support for loopback block device as a branch
+ */
+
+#include "aufs.h"
+
+/* added into drivers/block/loop.c */
+static struct file *(*backing_file_func)(struct super_block *sb);
+
+/*
+ * test if two lower dentries have overlapping branches.
+ */
+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
+{
+	struct super_block *h_sb;
+	struct file *backing_file;
+
+	if (unlikely(!backing_file_func)) {
+		/* don't load "loop" module here */
+		backing_file_func = symbol_get(loop_backing_file);
+		if (unlikely(!backing_file_func))
+			/* "loop" module is not loaded */
+			return 0;
+	}
+
+	h_sb = h_adding->d_sb;
+	backing_file = backing_file_func(h_sb);
+	if (!backing_file)
+		return 0;
+
+	h_adding = backing_file->f_path.dentry;
+	/*
+	 * h_adding can be local NFS.
+	 * in this case aufs cannot detect the loop.
+	 */
+	if (unlikely(h_adding->d_sb == sb))
+		return 1;
+	return !!au_test_subdir(h_adding, sb->s_root);
+}
+
+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
+int au_test_loopback_kthread(void)
+{
+	int ret;
+	struct task_struct *tsk = current;
+	char c, comm[sizeof(tsk->comm)];
+
+	ret = 0;
+	if (tsk->flags & PF_KTHREAD) {
+		get_task_comm(comm, tsk);
+		c = comm[4];
+		ret = ('0' <= c && c <= '9'
+		       && !strncmp(comm, "loop", 4));
+	}
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define au_warn_loopback_step	16
+static int au_warn_loopback_nelem = au_warn_loopback_step;
+static unsigned long *au_warn_loopback_array;
+
+void au_warn_loopback(struct super_block *h_sb)
+{
+	int i, new_nelem;
+	unsigned long *a, magic;
+	static DEFINE_SPINLOCK(spin);
+
+	magic = h_sb->s_magic;
+	spin_lock(&spin);
+	a = au_warn_loopback_array;
+	for (i = 0; i < au_warn_loopback_nelem && *a; i++)
+		if (a[i] == magic) {
+			spin_unlock(&spin);
+			return;
+		}
+
+	/* h_sb is new to us, print it */
+	if (i < au_warn_loopback_nelem) {
+		a[i] = magic;
+		goto pr;
+	}
+
+	/* expand the array */
+	new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
+	a = au_kzrealloc(au_warn_loopback_array,
+			 au_warn_loopback_nelem * sizeof(unsigned long),
+			 new_nelem * sizeof(unsigned long), GFP_ATOMIC,
+			 /*may_shrink*/0);
+	if (a) {
+		au_warn_loopback_nelem = new_nelem;
+		au_warn_loopback_array = a;
+		a[i] = magic;
+		goto pr;
+	}
+
+	spin_unlock(&spin);
+	AuWarn1("realloc failed, ignored\n");
+	return;
+
+pr:
+	spin_unlock(&spin);
+	pr_warn("you may want to try another patch for loopback file "
+		"on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
+}
+
+int au_loopback_init(void)
+{
+	int err;
+	struct super_block *sb __maybe_unused;
+
+	BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
+
+	err = 0;
+	au_warn_loopback_array = kcalloc(au_warn_loopback_step,
+					 sizeof(unsigned long), GFP_NOFS);
+	if (unlikely(!au_warn_loopback_array))
+		err = -ENOMEM;
+
+	return err;
+}
+
+void au_loopback_fin(void)
+{
+	if (backing_file_func)
+		symbol_put(loop_backing_file);
+	kfree(au_warn_loopback_array);
+}
diff --git a/include/fs/aufs/loop.h b/include/fs/aufs/loop.h
new file mode 100644
index 000000000..e6b76609a
--- /dev/null
+++ b/include/fs/aufs/loop.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * support for loopback mount as a branch
+ */
+
+#ifndef __AUFS_LOOP_H__
+#define __AUFS_LOOP_H__
+
+#ifdef __KERNEL__
+
+struct dentry;
+struct super_block;
+
+#ifdef CONFIG_AUFS_BDEV_LOOP
+/* drivers/block/loop.c */
+struct file *loop_backing_file(struct super_block *sb);
+
+/* loop.c */
+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
+int au_test_loopback_kthread(void);
+void au_warn_loopback(struct super_block *h_sb);
+
+int au_loopback_init(void);
+void au_loopback_fin(void);
+#else
+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
+	   struct dentry *h_adding)
+AuStubInt0(au_test_loopback_kthread, void)
+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
+
+AuStubInt0(au_loopback_init, void)
+AuStubVoid(au_loopback_fin, void)
+#endif /* BLK_DEV_LOOP */
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_LOOP_H__ */
diff --git a/include/fs/aufs/magic.mk b/include/fs/aufs/magic.mk
new file mode 100644
index 000000000..7bc9eef3f
--- /dev/null
+++ b/include/fs/aufs/magic.mk
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# defined in ${srctree}/fs/fuse/inode.c
+# tristate
+ifdef CONFIG_FUSE_FS
+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
+endif
+
+# defined in ${srctree}/fs/xfs/xfs_sb.h
+# tristate
+ifdef CONFIG_XFS_FS
+ccflags-y += -DXFS_SB_MAGIC=0x58465342
+endif
+
+# defined in ${srctree}/fs/configfs/mount.c
+# tristate
+ifdef CONFIG_CONFIGFS_FS
+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
+endif
+
+# defined in ${srctree}/fs/ubifs/ubifs.h
+# tristate
+ifdef CONFIG_UBIFS_FS
+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
+endif
+
+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
+# tristate
+ifdef CONFIG_HFSPLUS_FS
+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
+endif
diff --git a/include/fs/aufs/module.c b/include/fs/aufs/module.c
new file mode 100644
index 000000000..9eeec710a
--- /dev/null
+++ b/include/fs/aufs/module.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * module global variables and operations
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include "aufs.h"
+
+/* shrinkable realloc */
+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
+{
+	size_t sz;
+	int diff;
+
+	sz = 0;
+	diff = -1;
+	if (p) {
+#if 0 /* unused */
+		if (!new_sz) {
+			kfree(p);
+			p = NULL;
+			goto out;
+		}
+#else
+		AuDebugOn(!new_sz);
+#endif
+		sz = ksize(p);
+		diff = au_kmidx_sub(sz, new_sz);
+	}
+	if (sz && !diff)
+		goto out;
+
+	if (sz < new_sz)
+		/* expand or SLOB */
+		p = krealloc(p, new_sz, gfp);
+	else if (new_sz < sz && may_shrink) {
+		/* shrink */
+		void *q;
+
+		q = kmalloc(new_sz, gfp);
+		if (q) {
+			if (p) {
+				memcpy(q, p, new_sz);
+				kfree(p);
+			}
+			p = q;
+		} else
+			p = NULL;
+	}
+
+out:
+	return p;
+}
+
+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
+		   int may_shrink)
+{
+	p = au_krealloc(p, new_sz, gfp, may_shrink);
+	if (p && new_sz > nused)
+		memset(p + nused, 0, new_sz - nused);
+	return p;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * aufs caches
+ */
+struct kmem_cache *au_cache[AuCache_Last];
+
+static void au_cache_fin(void)
+{
+	int i;
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+
+	/* excluding AuCache_HNOTIFY */
+	BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
+	for (i = 0; i < AuCache_HNOTIFY; i++) {
+		kmem_cache_destroy(au_cache[i]);
+		au_cache[i] = NULL;
+	}
+}
+
+static int __init au_cache_init(void)
+{
+	au_cache[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
+	if (au_cache[AuCache_DINFO])
+		/* SLAB_DESTROY_BY_RCU */
+		au_cache[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
+						       au_icntnr_init_once);
+	if (au_cache[AuCache_ICNTNR])
+		au_cache[AuCache_FINFO] = AuCacheCtor(au_finfo,
+						      au_fi_init_once);
+	if (au_cache[AuCache_FINFO])
+		au_cache[AuCache_VDIR] = AuCache(au_vdir);
+	if (au_cache[AuCache_VDIR])
+		au_cache[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
+	if (au_cache[AuCache_DEHSTR])
+		return 0;
+
+	au_cache_fin();
+	return -ENOMEM;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_dir_roflags;
+
+#ifdef CONFIG_AUFS_SBILIST
+/*
+ * iterate_supers_type() doesn't protect us from
+ * remounting (branch management)
+ */
+struct hlist_bl_head au_sbilist;
+#endif
+
+/*
+ * functions for module interface.
+ */
+MODULE_LICENSE("GPL");
+/* MODULE_LICENSE("GPL v2"); */
+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
+MODULE_DESCRIPTION(AUFS_NAME
+	" -- Advanced multi layered unification filesystem");
+MODULE_VERSION(AUFS_VERSION);
+MODULE_ALIAS_FS(AUFS_NAME);
+
+/* this module parameter has no meaning when SYSFS is disabled */
+int sysaufs_brs = 1;
+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
+
+/* this module parameter has no meaning when USER_NS is disabled */
+bool au_userns;
+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
+
+/* ---------------------------------------------------------------------- */
+
+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
+
+int au_seq_path(struct seq_file *seq, struct path *path)
+{
+	int err;
+
+	err = seq_path(seq, path, au_esc_chars);
+	if (err >= 0)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int __init aufs_init(void)
+{
+	int err, i;
+	char *p;
+
+	p = au_esc_chars;
+	for (i = 1; i <= ' '; i++)
+		*p++ = i;
+	*p++ = '\\';
+	*p++ = '\x7f';
+	*p = 0;
+
+	au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
+
+	memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
+	for (i = 0; i < AuIop_Last; i++)
+		aufs_iop_nogetattr[i].getattr = NULL;
+
+	memset(au_cache, 0, sizeof(au_cache));	/* including hnotify */
+
+	au_sbilist_init();
+	sysaufs_brs_init();
+	au_debug_init();
+	au_dy_init();
+	err = sysaufs_init();
+	if (unlikely(err))
+		goto out;
+	err = au_procfs_init();
+	if (unlikely(err))
+		goto out_sysaufs;
+	err = au_wkq_init();
+	if (unlikely(err))
+		goto out_procfs;
+	err = au_loopback_init();
+	if (unlikely(err))
+		goto out_wkq;
+	err = au_hnotify_init();
+	if (unlikely(err))
+		goto out_loopback;
+	err = au_sysrq_init();
+	if (unlikely(err))
+		goto out_hin;
+	err = au_cache_init();
+	if (unlikely(err))
+		goto out_sysrq;
+
+	aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
+	err = register_filesystem(&aufs_fs_type);
+	if (unlikely(err))
+		goto out_cache;
+
+	/* since we define pr_fmt, call printk directly */
+	printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
+	goto out; /* success */
+
+out_cache:
+	au_cache_fin();
+out_sysrq:
+	au_sysrq_fin();
+out_hin:
+	au_hnotify_fin();
+out_loopback:
+	au_loopback_fin();
+out_wkq:
+	au_wkq_fin();
+out_procfs:
+	au_procfs_fin();
+out_sysaufs:
+	sysaufs_fin();
+	au_dy_fin();
+out:
+	return err;
+}
+
+static void __exit aufs_exit(void)
+{
+	unregister_filesystem(&aufs_fs_type);
+	au_cache_fin();
+	au_sysrq_fin();
+	au_hnotify_fin();
+	au_loopback_fin();
+	au_wkq_fin();
+	au_procfs_fin();
+	sysaufs_fin();
+	au_dy_fin();
+}
+
+module_init(aufs_init);
+module_exit(aufs_exit);
diff --git a/include/fs/aufs/module.h b/include/fs/aufs/module.h
new file mode 100644
index 000000000..6d222d1c6
--- /dev/null
+++ b/include/fs/aufs/module.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * module initialization and module-global
+ */
+
+#ifndef __AUFS_MODULE_H__
+#define __AUFS_MODULE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/slab.h>
+
+struct path;
+struct seq_file;
+
+/* module parameters */
+extern int sysaufs_brs;
+extern bool au_userns;
+
+/* ---------------------------------------------------------------------- */
+
+extern int au_dir_roflags;
+
+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
+		   int may_shrink);
+
+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
+{
+#ifndef CONFIG_SLOB
+	return kmalloc_index(sz) - kmalloc_index(new_sz);
+#else
+	return -1; /* SLOB is untested */
+#endif
+}
+
+int au_seq_path(struct seq_file *seq, struct path *path);
+
+#ifdef CONFIG_PROC_FS
+/* procfs.c */
+int __init au_procfs_init(void);
+void au_procfs_fin(void);
+#else
+AuStubInt0(au_procfs_init, void);
+AuStubVoid(au_procfs_fin, void);
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+/* kmem cache */
+enum {
+	AuCache_DINFO,
+	AuCache_ICNTNR,
+	AuCache_FINFO,
+	AuCache_VDIR,
+	AuCache_DEHSTR,
+	AuCache_HNOTIFY, /* must be last */
+	AuCache_Last
+};
+
+extern struct kmem_cache *au_cache[AuCache_Last];
+
+#define AuCacheFlags		(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
+#define AuCache(type)		KMEM_CACHE(type, AuCacheFlags)
+#define AuCacheCtor(type, ctor)	\
+	kmem_cache_create(#type, sizeof(struct type), \
+			  __alignof__(struct type), AuCacheFlags, ctor)
+
+#define AuCacheFuncs(name, index) \
+static inline struct au_##name *au_cache_alloc_##name(void) \
+{ return kmem_cache_alloc(au_cache[AuCache_##index], GFP_NOFS); } \
+static inline void au_cache_free_##name(struct au_##name *p) \
+{ kmem_cache_free(au_cache[AuCache_##index], p); }
+
+AuCacheFuncs(dinfo, DINFO);
+AuCacheFuncs(icntnr, ICNTNR);
+AuCacheFuncs(finfo, FINFO);
+AuCacheFuncs(vdir, VDIR);
+AuCacheFuncs(vdir_dehstr, DEHSTR);
+#ifdef CONFIG_AUFS_HNOTIFY
+AuCacheFuncs(hnotify, HNOTIFY);
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_MODULE_H__ */
diff --git a/include/fs/aufs/mvdown.c b/include/fs/aufs/mvdown.c
new file mode 100644
index 000000000..b51219658
--- /dev/null
+++ b/include/fs/aufs/mvdown.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2011-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * move-down, opposite of copy-up
+ */
+
+#include "aufs.h"
+
+struct au_mvd_args {
+	struct {
+		struct super_block *h_sb;
+		struct dentry *h_parent;
+		struct au_hinode *hdir;
+		struct inode *h_dir, *h_inode;
+		struct au_pin pin;
+	} info[AUFS_MVDOWN_NARRAY];
+
+	struct aufs_mvdown mvdown;
+	struct dentry *dentry, *parent;
+	struct inode *inode, *dir;
+	struct super_block *sb;
+	aufs_bindex_t bopq, bwh, bfound;
+	unsigned char rename_lock;
+};
+
+#define mvd_errno		mvdown.au_errno
+#define mvd_bsrc		mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
+#define mvd_src_brid		mvdown.stbr[AUFS_MVDOWN_UPPER].brid
+#define mvd_bdst		mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
+#define mvd_dst_brid		mvdown.stbr[AUFS_MVDOWN_LOWER].brid
+
+#define mvd_h_src_sb		info[AUFS_MVDOWN_UPPER].h_sb
+#define mvd_h_src_parent	info[AUFS_MVDOWN_UPPER].h_parent
+#define mvd_hdir_src		info[AUFS_MVDOWN_UPPER].hdir
+#define mvd_h_src_dir		info[AUFS_MVDOWN_UPPER].h_dir
+#define mvd_h_src_inode		info[AUFS_MVDOWN_UPPER].h_inode
+#define mvd_pin_src		info[AUFS_MVDOWN_UPPER].pin
+
+#define mvd_h_dst_sb		info[AUFS_MVDOWN_LOWER].h_sb
+#define mvd_h_dst_parent	info[AUFS_MVDOWN_LOWER].h_parent
+#define mvd_hdir_dst		info[AUFS_MVDOWN_LOWER].hdir
+#define mvd_h_dst_dir		info[AUFS_MVDOWN_LOWER].h_dir
+#define mvd_h_dst_inode		info[AUFS_MVDOWN_LOWER].h_inode
+#define mvd_pin_dst		info[AUFS_MVDOWN_LOWER].pin
+
+#define AU_MVD_PR(flag, ...) do {			\
+		if (flag)				\
+			pr_err(__VA_ARGS__);		\
+	} while (0)
+
+static int find_lower_writable(struct au_mvd_args *a)
+{
+	struct super_block *sb;
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	sb = a->sb;
+	bindex = a->mvd_bsrc;
+	bbot = au_sbbot(sb);
+	if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm)
+			    && !sb_rdonly(au_br_sb(br)))
+				return bindex;
+		}
+	else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (!au_br_rdonly(br))
+				return bindex;
+		}
+	else
+		for (bindex++; bindex <= bbot; bindex++) {
+			br = au_sbr(sb, bindex);
+			if (!sb_rdonly(au_br_sb(br))) {
+				if (au_br_rdonly(br))
+					a->mvdown.flags
+						|= AUFS_MVDOWN_ROLOWER_R;
+				return bindex;
+			}
+		}
+
+	return -1;
+}
+
+/* make the parent dir on bdst */
+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = 0;
+	a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
+	a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
+	a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
+	a->mvd_h_dst_parent = NULL;
+	if (au_dbbot(a->parent) >= a->mvd_bdst)
+		a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
+	if (!a->mvd_h_dst_parent) {
+		err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
+		if (unlikely(err)) {
+			AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
+			goto out;
+		}
+		a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* lock them all */
+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct dentry *h_trap;
+
+	a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
+	a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
+	err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
+		     au_opt_udba(a->sb),
+		     AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+	AuTraceErr(err);
+	if (unlikely(err)) {
+		AU_MVD_PR(dmsg, "pin_dst failed\n");
+		goto out;
+	}
+
+	if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
+		a->rename_lock = 0;
+		au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
+			    AuLsc_DI_PARENT, AuLsc_I_PARENT3,
+			    au_opt_udba(a->sb),
+			    AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+		err = au_do_pin(&a->mvd_pin_src);
+		AuTraceErr(err);
+		a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
+		if (unlikely(err)) {
+			AU_MVD_PR(dmsg, "pin_src failed\n");
+			goto out_dst;
+		}
+		goto out; /* success */
+	}
+
+	a->rename_lock = 1;
+	au_pin_hdir_unlock(&a->mvd_pin_dst);
+	err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
+		     au_opt_udba(a->sb),
+		     AuPin_MNT_WRITE | AuPin_DI_LOCKED);
+	AuTraceErr(err);
+	a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
+	if (unlikely(err)) {
+		AU_MVD_PR(dmsg, "pin_src failed\n");
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+		goto out_dst;
+	}
+	au_pin_hdir_unlock(&a->mvd_pin_src);
+	h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				   a->mvd_h_dst_parent, a->mvd_hdir_dst);
+	if (h_trap) {
+		err = (h_trap != a->mvd_h_src_parent);
+		if (err)
+			err = (h_trap != a->mvd_h_dst_parent);
+	}
+	BUG_ON(err); /* it should never happen */
+	if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
+		err = -EBUSY;
+		AuTraceErr(err);
+		vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				    a->mvd_h_dst_parent, a->mvd_hdir_dst);
+		au_pin_hdir_lock(&a->mvd_pin_src);
+		au_unpin(&a->mvd_pin_src);
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+		goto out_dst;
+	}
+	goto out; /* success */
+
+out_dst:
+	au_unpin(&a->mvd_pin_dst);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	if (!a->rename_lock)
+		au_unpin(&a->mvd_pin_src);
+	else {
+		vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
+				    a->mvd_h_dst_parent, a->mvd_hdir_dst);
+		au_pin_hdir_lock(&a->mvd_pin_src);
+		au_unpin(&a->mvd_pin_src);
+		au_pin_hdir_lock(&a->mvd_pin_dst);
+	}
+	au_unpin(&a->mvd_pin_dst);
+}
+
+/* copy-down the file */
+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_cp_generic cpg = {
+		.dentry	= a->dentry,
+		.bdst	= a->mvd_bdst,
+		.bsrc	= a->mvd_bsrc,
+		.len	= -1,
+		.pin	= &a->mvd_pin_dst,
+		.flags	= AuCpup_DTIME | AuCpup_HOPEN
+	};
+
+	AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
+	if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
+		au_fset_cpup(cpg.flags, OVERWRITE);
+	if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
+		au_fset_cpup(cpg.flags, RWDST);
+	err = au_sio_cpdown_simple(&cpg);
+	if (unlikely(err))
+		AU_MVD_PR(dmsg, "cpdown failed\n");
+
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
+ * were sleeping
+ */
+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct path h_path;
+	struct au_branch *br;
+	struct inode *delegated;
+
+	br = au_sbr(a->sb, a->mvd_bdst);
+	h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry)) {
+		AU_MVD_PR(dmsg, "wh_lkup failed\n");
+		goto out;
+	}
+
+	err = 0;
+	if (d_is_positive(h_path.dentry)) {
+		h_path.mnt = au_br_mnt(br);
+		delegated = NULL;
+		err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
+				   &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+		if (unlikely(err))
+			AU_MVD_PR(dmsg, "wh_unlink failed\n");
+	}
+	dput(h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/*
+ * unlink the topmost h_dentry
+ */
+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct path h_path;
+	struct inode *delegated;
+
+	h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
+	h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
+	delegated = NULL;
+	err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err))
+		AU_MVD_PR(dmsg, "unlink failed\n");
+
+	AuTraceErr(err);
+	return err;
+}
+
+/* Since mvdown succeeded, we ignore an error of this function */
+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_branch *br;
+
+	a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
+	br = au_sbr(a->sb, a->mvd_bsrc);
+	err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
+	if (!err) {
+		br = au_sbr(a->sb, a->mvd_bdst);
+		a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
+		err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
+	}
+	if (!err)
+		a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
+	else
+		AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
+}
+
+/*
+ * copy-down the file and unlink the bsrc file.
+ * - unlink the bdst whout if exist
+ * - copy-down the file (with whtmp name and rename)
+ * - unlink the bsrc file
+ */
+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = au_do_mkdir(dmsg, a);
+	if (!err)
+		err = au_do_lock(dmsg, a);
+	if (unlikely(err))
+		goto out;
+
+	/*
+	 * do not revert the activities we made on bdst since they should be
+	 * harmless in aufs.
+	 */
+
+	err = au_do_cpdown(dmsg, a);
+	if (!err)
+		err = au_do_unlink_wh(dmsg, a);
+	if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
+		err = au_do_unlink(dmsg, a);
+	if (unlikely(err))
+		goto out_unlock;
+
+	AuDbg("%pd2, 0x%x, %d --> %d\n",
+	      a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
+	if (find_lower_writable(a) < 0)
+		a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
+
+	if (a->mvdown.flags & AUFS_MVDOWN_STFS)
+		au_do_stfs(dmsg, a);
+
+	/* maintain internal array */
+	if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
+		au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
+		au_set_dbtop(a->dentry, a->mvd_bdst);
+		au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
+		au_set_ibtop(a->inode, a->mvd_bdst);
+	} else {
+		/* hide the lower */
+		au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
+		au_set_dbbot(a->dentry, a->mvd_bsrc);
+		au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
+		au_set_ibbot(a->inode, a->mvd_bsrc);
+	}
+	if (au_dbbot(a->dentry) < a->mvd_bdst)
+		au_set_dbbot(a->dentry, a->mvd_bdst);
+	if (au_ibbot(a->inode) < a->mvd_bdst)
+		au_set_ibbot(a->inode, a->mvd_bdst);
+
+out_unlock:
+	au_do_unlock(dmsg, a);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* make sure the file is idle */
+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err, plinked;
+
+	err = 0;
+	plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
+	if (au_dbtop(a->dentry) == a->mvd_bsrc
+	    && au_dcount(a->dentry) == 1
+	    && atomic_read(&a->inode->i_count) == 1
+	    /* && a->mvd_h_src_inode->i_nlink == 1 */
+	    && (!plinked || !au_plink_test(a->inode))
+	    && a->inode->i_nlink == 1)
+		goto out;
+
+	err = -EBUSY;
+	AU_MVD_PR(dmsg,
+		  "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
+		  a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
+		  atomic_read(&a->inode->i_count), a->inode->i_nlink,
+		  a->mvd_h_src_inode->i_nlink,
+		  plinked, plinked ? au_plink_test(a->inode) : 0);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* make sure the parent dir is fine */
+static int au_mvd_args_parent(const unsigned char dmsg,
+			      struct au_mvd_args *a)
+{
+	int err;
+	aufs_bindex_t bindex;
+
+	err = 0;
+	if (unlikely(au_alive_dir(a->parent))) {
+		err = -ENOENT;
+		AU_MVD_PR(dmsg, "parent dir is dead\n");
+		goto out;
+	}
+
+	a->bopq = au_dbdiropq(a->parent);
+	bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
+	AuDbg("b%d\n", bindex);
+	if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
+		     || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
+		err = -EINVAL;
+		a->mvd_errno = EAU_MVDOWN_OPAQUE;
+		AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
+			  a->bopq, a->mvd_bdst);
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args_intermediate(const unsigned char dmsg,
+				    struct au_mvd_args *a)
+{
+	int err;
+	struct au_dinfo *dinfo, *tmp;
+
+	/* lookup the next lower positive entry */
+	err = -ENOMEM;
+	tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
+	if (unlikely(!tmp))
+		goto out;
+
+	a->bfound = -1;
+	a->bwh = -1;
+	dinfo = au_di(a->dentry);
+	au_di_cp(tmp, dinfo);
+	au_di_swap(tmp, dinfo);
+
+	/* returns the number of positive dentries */
+	err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
+			     /* AuLkup_IGNORE_PERM */ 0);
+	if (!err)
+		a->bwh = au_dbwh(a->dentry);
+	else if (err > 0)
+		a->bfound = au_dbtop(a->dentry);
+
+	au_di_swap(tmp, dinfo);
+	au_rw_write_unlock(&tmp->di_rwsem);
+	au_di_free(tmp);
+	if (unlikely(err < 0))
+		AU_MVD_PR(dmsg, "failed look-up lower\n");
+
+	/*
+	 * here, we have these cases.
+	 * bfound == -1
+	 *	no positive dentry under bsrc. there are more sub-cases.
+	 *	bwh < 0
+	 *		there no whiteout, we can safely move-down.
+	 *	bwh <= bsrc
+	 *		impossible
+	 *	bsrc < bwh && bwh < bdst
+	 *		there is a whiteout on RO branch. cannot proceed.
+	 *	bwh == bdst
+	 *		there is a whiteout on the RW target branch. it should
+	 *		be removed.
+	 *	bdst < bwh
+	 *		there is a whiteout somewhere unrelated branch.
+	 * -1 < bfound && bfound <= bsrc
+	 *	impossible.
+	 * bfound < bdst
+	 *	found, but it is on RO branch between bsrc and bdst. cannot
+	 *	proceed.
+	 * bfound == bdst
+	 *	found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
+	 *	error.
+	 * bdst < bfound
+	 *	found, after we create the file on bdst, it will be hidden.
+	 */
+
+	AuDebugOn(a->bfound == -1
+		  && a->bwh != -1
+		  && a->bwh <= a->mvd_bsrc);
+	AuDebugOn(-1 < a->bfound
+		  && a->bfound <= a->mvd_bsrc);
+
+	err = -EINVAL;
+	if (a->bfound == -1
+	    && a->mvd_bsrc < a->bwh
+	    && a->bwh != -1
+	    && a->bwh < a->mvd_bdst) {
+		a->mvd_errno = EAU_MVDOWN_WHITEOUT;
+		AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
+			  a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
+		goto out;
+	} else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
+		a->mvd_errno = EAU_MVDOWN_UPPER;
+		AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
+			  a->mvd_bdst, a->bfound);
+		goto out;
+	}
+
+	err = 0; /* success */
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+
+	err = 0;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
+	    && a->bfound == a->mvd_bdst)
+		err = -EEXIST;
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
+{
+	int err;
+	struct au_branch *br;
+
+	err = -EISDIR;
+	if (unlikely(S_ISDIR(a->inode->i_mode)))
+		goto out;
+
+	err = -EINVAL;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
+		a->mvd_bsrc = au_ibtop(a->inode);
+	else {
+		a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
+		if (unlikely(a->mvd_bsrc < 0
+			     || (a->mvd_bsrc < au_dbtop(a->dentry)
+				 || au_dbbot(a->dentry) < a->mvd_bsrc
+				 || !au_h_dptr(a->dentry, a->mvd_bsrc))
+			     || (a->mvd_bsrc < au_ibtop(a->inode)
+				 || au_ibbot(a->inode) < a->mvd_bsrc
+				 || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
+			a->mvd_errno = EAU_MVDOWN_NOUPPER;
+			AU_MVD_PR(dmsg, "no upper\n");
+			goto out;
+		}
+	}
+	if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
+		a->mvd_errno = EAU_MVDOWN_BOTTOM;
+		AU_MVD_PR(dmsg, "on the bottom\n");
+		goto out;
+	}
+	a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
+	br = au_sbr(a->sb, a->mvd_bsrc);
+	err = au_br_rdonly(br);
+	if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
+		if (unlikely(err))
+			goto out;
+	} else if (!(vfsub_native_ro(a->mvd_h_src_inode)
+		     || IS_APPEND(a->mvd_h_src_inode))) {
+		if (err)
+			a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
+		/* go on */
+	} else
+		goto out;
+
+	err = -EINVAL;
+	if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
+		a->mvd_bdst = find_lower_writable(a);
+		if (unlikely(a->mvd_bdst < 0)) {
+			a->mvd_errno = EAU_MVDOWN_BOTTOM;
+			AU_MVD_PR(dmsg, "no writable lower branch\n");
+			goto out;
+		}
+	} else {
+		a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
+		if (unlikely(a->mvd_bdst < 0
+			     || au_sbbot(a->sb) < a->mvd_bdst)) {
+			a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
+			AU_MVD_PR(dmsg, "no lower brid\n");
+			goto out;
+		}
+	}
+
+	err = au_mvd_args_busy(dmsg, a);
+	if (!err)
+		err = au_mvd_args_parent(dmsg, a);
+	if (!err)
+		err = au_mvd_args_intermediate(dmsg, a);
+	if (!err)
+		err = au_mvd_args_exist(dmsg, a);
+	if (!err)
+		AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
+{
+	int err, e;
+	unsigned char dmsg;
+	struct au_mvd_args *args;
+	struct inode *inode;
+
+	inode = d_inode(dentry);
+	err = -EPERM;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -ENOMEM;
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (unlikely(!args))
+		goto out;
+
+	err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
+	if (!err)
+		err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out_free;
+	}
+	AuDbg("flags 0x%x\n", args->mvdown.flags);
+	args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
+	args->mvdown.au_errno = 0;
+	args->dentry = dentry;
+	args->inode = inode;
+	args->sb = dentry->d_sb;
+
+	err = -ENOENT;
+	dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
+	args->parent = dget_parent(dentry);
+	args->dir = d_inode(args->parent);
+	inode_lock_nested(args->dir, I_MUTEX_PARENT);
+	dput(args->parent);
+	if (unlikely(args->parent != dentry->d_parent)) {
+		AU_MVD_PR(dmsg, "parent dir is moved\n");
+		goto out_dir;
+	}
+
+	inode_lock_nested(inode, I_MUTEX_CHILD);
+	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
+	if (unlikely(err))
+		goto out_inode;
+
+	di_write_lock_parent(args->parent);
+	err = au_mvd_args(dmsg, args);
+	if (unlikely(err))
+		goto out_parent;
+
+	err = au_do_mvdown(dmsg, args);
+	if (unlikely(err))
+		goto out_parent;
+
+	au_cpup_attr_timesizes(args->dir);
+	au_cpup_attr_timesizes(inode);
+	if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
+		au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
+	/* au_digen_dec(dentry); */
+
+out_parent:
+	di_write_unlock(args->parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+out_inode:
+	inode_unlock(inode);
+out_dir:
+	inode_unlock(args->dir);
+out_free:
+	e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
+	if (unlikely(e))
+		err = -EFAULT;
+	kfree(args);
+out:
+	AuTraceErr(err);
+	return err;
+}
diff --git a/include/fs/aufs/opts.c b/include/fs/aufs/opts.c
new file mode 100644
index 000000000..eb3fe3fb4
--- /dev/null
+++ b/include/fs/aufs/opts.c
@@ -0,0 +1,1891 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount options/flags
+ */
+
+#include <linux/namei.h>
+#include <linux/types.h> /* a distribution requires */
+#include <linux/parser.h>
+#include "aufs.h"
+
+/* ---------------------------------------------------------------------- */
+
+enum {
+	Opt_br,
+	Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
+	Opt_idel, Opt_imod,
+	Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
+	Opt_rdblk_def, Opt_rdhash_def,
+	Opt_xino, Opt_noxino,
+	Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
+	Opt_trunc_xino_path, Opt_itrunc_xino,
+	Opt_trunc_xib, Opt_notrunc_xib,
+	Opt_shwh, Opt_noshwh,
+	Opt_plink, Opt_noplink, Opt_list_plink,
+	Opt_udba,
+	Opt_dio, Opt_nodio,
+	Opt_diropq_a, Opt_diropq_w,
+	Opt_warn_perm, Opt_nowarn_perm,
+	Opt_wbr_copyup, Opt_wbr_create,
+	Opt_fhsm_sec,
+	Opt_verbose, Opt_noverbose,
+	Opt_sum, Opt_nosum, Opt_wsum,
+	Opt_dirperm1, Opt_nodirperm1,
+	Opt_dirren, Opt_nodirren,
+	Opt_acl, Opt_noacl,
+	Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
+};
+
+static match_table_t options = {
+	{Opt_br, "br=%s"},
+	{Opt_br, "br:%s"},
+
+	{Opt_add, "add=%d:%s"},
+	{Opt_add, "add:%d:%s"},
+	{Opt_add, "ins=%d:%s"},
+	{Opt_add, "ins:%d:%s"},
+	{Opt_append, "append=%s"},
+	{Opt_append, "append:%s"},
+	{Opt_prepend, "prepend=%s"},
+	{Opt_prepend, "prepend:%s"},
+
+	{Opt_del, "del=%s"},
+	{Opt_del, "del:%s"},
+	/* {Opt_idel, "idel:%d"}, */
+	{Opt_mod, "mod=%s"},
+	{Opt_mod, "mod:%s"},
+	/* {Opt_imod, "imod:%d:%s"}, */
+
+	{Opt_dirwh, "dirwh=%d"},
+
+	{Opt_xino, "xino=%s"},
+	{Opt_noxino, "noxino"},
+	{Opt_trunc_xino, "trunc_xino"},
+	{Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
+	{Opt_notrunc_xino, "notrunc_xino"},
+	{Opt_trunc_xino_path, "trunc_xino=%s"},
+	{Opt_itrunc_xino, "itrunc_xino=%d"},
+	/* {Opt_zxino, "zxino=%s"}, */
+	{Opt_trunc_xib, "trunc_xib"},
+	{Opt_notrunc_xib, "notrunc_xib"},
+
+#ifdef CONFIG_PROC_FS
+	{Opt_plink, "plink"},
+#else
+	{Opt_ignore_silent, "plink"},
+#endif
+
+	{Opt_noplink, "noplink"},
+
+#ifdef CONFIG_AUFS_DEBUG
+	{Opt_list_plink, "list_plink"},
+#endif
+
+	{Opt_udba, "udba=%s"},
+
+	{Opt_dio, "dio"},
+	{Opt_nodio, "nodio"},
+
+#ifdef CONFIG_AUFS_DIRREN
+	{Opt_dirren, "dirren"},
+	{Opt_nodirren, "nodirren"},
+#else
+	{Opt_ignore, "dirren"},
+	{Opt_ignore_silent, "nodirren"},
+#endif
+
+#ifdef CONFIG_AUFS_FHSM
+	{Opt_fhsm_sec, "fhsm_sec=%d"},
+#else
+	{Opt_ignore, "fhsm_sec=%d"},
+#endif
+
+	{Opt_diropq_a, "diropq=always"},
+	{Opt_diropq_a, "diropq=a"},
+	{Opt_diropq_w, "diropq=whiteouted"},
+	{Opt_diropq_w, "diropq=w"},
+
+	{Opt_warn_perm, "warn_perm"},
+	{Opt_nowarn_perm, "nowarn_perm"},
+
+	/* keep them temporary */
+	{Opt_ignore_silent, "nodlgt"},
+	{Opt_ignore, "clean_plink"},
+
+#ifdef CONFIG_AUFS_SHWH
+	{Opt_shwh, "shwh"},
+#endif
+	{Opt_noshwh, "noshwh"},
+
+	{Opt_dirperm1, "dirperm1"},
+	{Opt_nodirperm1, "nodirperm1"},
+
+	{Opt_verbose, "verbose"},
+	{Opt_verbose, "v"},
+	{Opt_noverbose, "noverbose"},
+	{Opt_noverbose, "quiet"},
+	{Opt_noverbose, "q"},
+	{Opt_noverbose, "silent"},
+
+	{Opt_sum, "sum"},
+	{Opt_nosum, "nosum"},
+	{Opt_wsum, "wsum"},
+
+	{Opt_rdcache, "rdcache=%d"},
+	{Opt_rdblk, "rdblk=%d"},
+	{Opt_rdblk_def, "rdblk=def"},
+	{Opt_rdhash, "rdhash=%d"},
+	{Opt_rdhash_def, "rdhash=def"},
+
+	{Opt_wbr_create, "create=%s"},
+	{Opt_wbr_create, "create_policy=%s"},
+	{Opt_wbr_copyup, "cpup=%s"},
+	{Opt_wbr_copyup, "copyup=%s"},
+	{Opt_wbr_copyup, "copyup_policy=%s"},
+
+	/* generic VFS flag */
+#ifdef CONFIG_FS_POSIX_ACL
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+#else
+	{Opt_ignore, "acl"},
+	{Opt_ignore_silent, "noacl"},
+#endif
+
+	/* internal use for the scripts */
+	{Opt_ignore_silent, "si=%s"},
+
+	{Opt_br, "dirs=%s"},
+	{Opt_ignore, "debug=%d"},
+	{Opt_ignore, "delete=whiteout"},
+	{Opt_ignore, "delete=all"},
+	{Opt_ignore, "imap=%s"},
+
+	/* temporary workaround, due to old mount(8)? */
+	{Opt_ignore_silent, "relatime"},
+
+	{Opt_err, NULL}
+};
+
+/* ---------------------------------------------------------------------- */
+
+static const char *au_parser_pattern(int val, match_table_t tbl)
+{
+	struct match_token *p;
+
+	p = tbl;
+	while (p->pattern) {
+		if (p->token == val)
+			return p->pattern;
+		p++;
+	}
+	BUG();
+	return "??";
+}
+
+static const char *au_optstr(int *val, match_table_t tbl)
+{
+	struct match_token *p;
+	int v;
+
+	v = *val;
+	if (!v)
+		goto out;
+	p = tbl;
+	while (p->pattern) {
+		if (p->token
+		    && (v & p->token) == p->token) {
+			*val &= ~p->token;
+			return p->pattern;
+		}
+		p++;
+	}
+
+out:
+	return NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t brperm = {
+	{AuBrPerm_RO, AUFS_BRPERM_RO},
+	{AuBrPerm_RR, AUFS_BRPERM_RR},
+	{AuBrPerm_RW, AUFS_BRPERM_RW},
+	{0, NULL}
+};
+
+static match_table_t brattr = {
+	/* general */
+	{AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
+	{AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
+	/* 'unpin' attrib is meaningless since linux-3.18-rc1 */
+	{AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
+#ifdef CONFIG_AUFS_FHSM
+	{AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
+#endif
+#ifdef CONFIG_AUFS_XATTR
+	{AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
+	{AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
+	{AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
+	{AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
+	{AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
+	{AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
+#endif
+
+	/* ro/rr branch */
+	{AuBrRAttr_WH, AUFS_BRRATTR_WH},
+
+	/* rw branch */
+	{AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
+	{AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
+
+	{0, NULL}
+};
+
+static int br_attr_val(char *str, match_table_t table, substring_t args[])
+{
+	int attr, v;
+	char *p;
+
+	attr = 0;
+	do {
+		p = strchr(str, '+');
+		if (p)
+			*p = 0;
+		v = match_token(str, table, args);
+		if (v) {
+			if (v & AuBrAttr_CMOO_Mask)
+				attr &= ~AuBrAttr_CMOO_Mask;
+			attr |= v;
+		} else {
+			if (p)
+				*p = '+';
+			pr_warn("ignored branch attribute %s\n", str);
+			break;
+		}
+		if (p)
+			str = p + 1;
+	} while (p);
+
+	return attr;
+}
+
+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
+{
+	int sz;
+	const char *p;
+	char *q;
+
+	q = str->a;
+	*q = 0;
+	p = au_optstr(&perm, brattr);
+	if (p) {
+		sz = strlen(p);
+		memcpy(q, p, sz + 1);
+		q += sz;
+	} else
+		goto out;
+
+	do {
+		p = au_optstr(&perm, brattr);
+		if (p) {
+			*q++ = '+';
+			sz = strlen(p);
+			memcpy(q, p, sz + 1);
+			q += sz;
+		}
+	} while (p);
+
+out:
+	return q - str->a;
+}
+
+static int noinline_for_stack br_perm_val(char *perm)
+{
+	int val, bad, sz;
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	au_br_perm_str_t attr;
+
+	p = strchr(perm, '+');
+	if (p)
+		*p = 0;
+	val = match_token(perm, brperm, args);
+	if (!val) {
+		if (p)
+			*p = '+';
+		pr_warn("ignored branch permission %s\n", perm);
+		val = AuBrPerm_RO;
+		goto out;
+	}
+	if (!p)
+		goto out;
+
+	val |= br_attr_val(p + 1, brattr, args);
+
+	bad = 0;
+	switch (val & AuBrPerm_Mask) {
+	case AuBrPerm_RO:
+	case AuBrPerm_RR:
+		bad = val & AuBrWAttr_Mask;
+		val &= ~AuBrWAttr_Mask;
+		break;
+	case AuBrPerm_RW:
+		bad = val & AuBrRAttr_Mask;
+		val &= ~AuBrRAttr_Mask;
+		break;
+	}
+
+	/*
+	 * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
+	 * does not treat it as an error, just warning.
+	 * this is a tiny guard for the user operation.
+	 */
+	if (val & AuBrAttr_UNPIN) {
+		bad |= AuBrAttr_UNPIN;
+		val &= ~AuBrAttr_UNPIN;
+	}
+
+	if (unlikely(bad)) {
+		sz = au_do_optstr_br_attr(&attr, bad);
+		AuDebugOn(!sz);
+		pr_warn("ignored branch attribute %s\n", attr.a);
+	}
+
+out:
+	return val;
+}
+
+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
+{
+	au_br_perm_str_t attr;
+	const char *p;
+	char *q;
+	int sz;
+
+	q = str->a;
+	p = au_optstr(&perm, brperm);
+	AuDebugOn(!p || !*p);
+	sz = strlen(p);
+	memcpy(q, p, sz + 1);
+	q += sz;
+
+	sz = au_do_optstr_br_attr(&attr, perm);
+	if (sz) {
+		*q++ = '+';
+		memcpy(q, attr.a, sz + 1);
+	}
+
+	AuDebugOn(strlen(str->a) >= sizeof(str->a));
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t udbalevel = {
+	{AuOpt_UDBA_REVAL, "reval"},
+	{AuOpt_UDBA_NONE, "none"},
+#ifdef CONFIG_AUFS_HNOTIFY
+	{AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
+#ifdef CONFIG_AUFS_HFSNOTIFY
+	{AuOpt_UDBA_HNOTIFY, "fsnotify"},
+#endif
+#endif
+	{-1, NULL}
+};
+
+static int noinline_for_stack udba_val(char *str)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	return match_token(str, udbalevel, args);
+}
+
+const char *au_optstr_udba(int udba)
+{
+	return au_parser_pattern(udba, udbalevel);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static match_table_t au_wbr_create_policy = {
+	{AuWbrCreate_TDP, "tdp"},
+	{AuWbrCreate_TDP, "top-down-parent"},
+	{AuWbrCreate_RR, "rr"},
+	{AuWbrCreate_RR, "round-robin"},
+	{AuWbrCreate_MFS, "mfs"},
+	{AuWbrCreate_MFS, "most-free-space"},
+	{AuWbrCreate_MFSV, "mfs:%d"},
+	{AuWbrCreate_MFSV, "most-free-space:%d"},
+
+	/* top-down regardless the parent, and then mfs */
+	{AuWbrCreate_TDMFS, "tdmfs:%d"},
+	{AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
+
+	{AuWbrCreate_MFSRR, "mfsrr:%d"},
+	{AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
+	{AuWbrCreate_PMFS, "pmfs"},
+	{AuWbrCreate_PMFSV, "pmfs:%d"},
+	{AuWbrCreate_PMFSRR, "pmfsrr:%d"},
+	{AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
+
+	{-1, NULL}
+};
+
+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
+			    struct au_opt_wbr_create *create)
+{
+	int err;
+	unsigned long long ull;
+
+	err = 0;
+	if (!match_u64(arg, &ull))
+		create->mfsrr_watermark = ull;
+	else {
+		pr_err("bad integer in %s\n", str);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int au_wbr_mfs_sec(substring_t *arg, char *str,
+			  struct au_opt_wbr_create *create)
+{
+	int n, err;
+
+	err = 0;
+	if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
+		create->mfs_second = n;
+	else {
+		pr_err("bad integer in %s\n", str);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int noinline_for_stack
+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
+{
+	int err, e;
+	substring_t args[MAX_OPT_ARGS];
+
+	err = match_token(str, au_wbr_create_policy, args);
+	create->wbr_create = err;
+	switch (err) {
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRRV:
+		e = au_wbr_mfs_wmark(&args[0], str, create);
+		if (!e)
+			e = au_wbr_mfs_sec(&args[1], str, create);
+		if (unlikely(e))
+			err = e;
+		break;
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_PMFSRR:
+		e = au_wbr_mfs_wmark(&args[0], str, create);
+		if (unlikely(e)) {
+			err = e;
+			break;
+		}
+		/*FALLTHROUGH*/
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_PMFS:
+		create->mfs_second = AUFS_MFS_DEF_SEC;
+		break;
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFSV:
+		e = au_wbr_mfs_sec(&args[0], str, create);
+		if (unlikely(e))
+			err = e;
+		break;
+	}
+
+	return err;
+}
+
+const char *au_optstr_wbr_create(int wbr_create)
+{
+	return au_parser_pattern(wbr_create, au_wbr_create_policy);
+}
+
+static match_table_t au_wbr_copyup_policy = {
+	{AuWbrCopyup_TDP, "tdp"},
+	{AuWbrCopyup_TDP, "top-down-parent"},
+	{AuWbrCopyup_BUP, "bup"},
+	{AuWbrCopyup_BUP, "bottom-up-parent"},
+	{AuWbrCopyup_BU, "bu"},
+	{AuWbrCopyup_BU, "bottom-up"},
+	{-1, NULL}
+};
+
+static int noinline_for_stack au_wbr_copyup_val(char *str)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	return match_token(str, au_wbr_copyup_policy, args);
+}
+
+const char *au_optstr_wbr_copyup(int wbr_copyup)
+{
+	return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+static void dump_opts(struct au_opts *opts)
+{
+#ifdef CONFIG_AUFS_DEBUG
+	/* reduce stack space */
+	union {
+		struct au_opt_add *add;
+		struct au_opt_del *del;
+		struct au_opt_mod *mod;
+		struct au_opt_xino *xino;
+		struct au_opt_xino_itrunc *xino_itrunc;
+		struct au_opt_wbr_create *create;
+	} u;
+	struct au_opt *opt;
+
+	opt = opts->opt;
+	while (opt->type != Opt_tail) {
+		switch (opt->type) {
+		case Opt_add:
+			u.add = &opt->add;
+			AuDbg("add {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_del:
+		case Opt_idel:
+			u.del = &opt->del;
+			AuDbg("del {%s, %p}\n",
+			      u.del->pathname, u.del->h_path.dentry);
+			break;
+		case Opt_mod:
+		case Opt_imod:
+			u.mod = &opt->mod;
+			AuDbg("mod {%s, 0x%x, %p}\n",
+				  u.mod->path, u.mod->perm, u.mod->h_root);
+			break;
+		case Opt_append:
+			u.add = &opt->add;
+			AuDbg("append {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_prepend:
+			u.add = &opt->add;
+			AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
+				  u.add->bindex, u.add->pathname, u.add->perm,
+				  u.add->path.dentry);
+			break;
+		case Opt_dirwh:
+			AuDbg("dirwh %d\n", opt->dirwh);
+			break;
+		case Opt_rdcache:
+			AuDbg("rdcache %d\n", opt->rdcache);
+			break;
+		case Opt_rdblk:
+			AuDbg("rdblk %u\n", opt->rdblk);
+			break;
+		case Opt_rdblk_def:
+			AuDbg("rdblk_def\n");
+			break;
+		case Opt_rdhash:
+			AuDbg("rdhash %u\n", opt->rdhash);
+			break;
+		case Opt_rdhash_def:
+			AuDbg("rdhash_def\n");
+			break;
+		case Opt_xino:
+			u.xino = &opt->xino;
+			AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
+			break;
+		case Opt_trunc_xino:
+			AuLabel(trunc_xino);
+			break;
+		case Opt_notrunc_xino:
+			AuLabel(notrunc_xino);
+			break;
+		case Opt_trunc_xino_path:
+		case Opt_itrunc_xino:
+			u.xino_itrunc = &opt->xino_itrunc;
+			AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
+			break;
+		case Opt_noxino:
+			AuLabel(noxino);
+			break;
+		case Opt_trunc_xib:
+			AuLabel(trunc_xib);
+			break;
+		case Opt_notrunc_xib:
+			AuLabel(notrunc_xib);
+			break;
+		case Opt_shwh:
+			AuLabel(shwh);
+			break;
+		case Opt_noshwh:
+			AuLabel(noshwh);
+			break;
+		case Opt_dirperm1:
+			AuLabel(dirperm1);
+			break;
+		case Opt_nodirperm1:
+			AuLabel(nodirperm1);
+			break;
+		case Opt_plink:
+			AuLabel(plink);
+			break;
+		case Opt_noplink:
+			AuLabel(noplink);
+			break;
+		case Opt_list_plink:
+			AuLabel(list_plink);
+			break;
+		case Opt_udba:
+			AuDbg("udba %d, %s\n",
+				  opt->udba, au_optstr_udba(opt->udba));
+			break;
+		case Opt_dio:
+			AuLabel(dio);
+			break;
+		case Opt_nodio:
+			AuLabel(nodio);
+			break;
+		case Opt_diropq_a:
+			AuLabel(diropq_a);
+			break;
+		case Opt_diropq_w:
+			AuLabel(diropq_w);
+			break;
+		case Opt_warn_perm:
+			AuLabel(warn_perm);
+			break;
+		case Opt_nowarn_perm:
+			AuLabel(nowarn_perm);
+			break;
+		case Opt_verbose:
+			AuLabel(verbose);
+			break;
+		case Opt_noverbose:
+			AuLabel(noverbose);
+			break;
+		case Opt_sum:
+			AuLabel(sum);
+			break;
+		case Opt_nosum:
+			AuLabel(nosum);
+			break;
+		case Opt_wsum:
+			AuLabel(wsum);
+			break;
+		case Opt_wbr_create:
+			u.create = &opt->wbr_create;
+			AuDbg("create %d, %s\n", u.create->wbr_create,
+				  au_optstr_wbr_create(u.create->wbr_create));
+			switch (u.create->wbr_create) {
+			case AuWbrCreate_MFSV:
+			case AuWbrCreate_PMFSV:
+				AuDbg("%d sec\n", u.create->mfs_second);
+				break;
+			case AuWbrCreate_MFSRR:
+			case AuWbrCreate_TDMFS:
+				AuDbg("%llu watermark\n",
+					  u.create->mfsrr_watermark);
+				break;
+			case AuWbrCreate_MFSRRV:
+			case AuWbrCreate_TDMFSV:
+			case AuWbrCreate_PMFSRRV:
+				AuDbg("%llu watermark, %d sec\n",
+					  u.create->mfsrr_watermark,
+					  u.create->mfs_second);
+				break;
+			}
+			break;
+		case Opt_wbr_copyup:
+			AuDbg("copyup %d, %s\n", opt->wbr_copyup,
+				  au_optstr_wbr_copyup(opt->wbr_copyup));
+			break;
+		case Opt_fhsm_sec:
+			AuDbg("fhsm_sec %u\n", opt->fhsm_second);
+			break;
+		case Opt_dirren:
+			AuLabel(dirren);
+			break;
+		case Opt_nodirren:
+			AuLabel(nodirren);
+			break;
+		case Opt_acl:
+			AuLabel(acl);
+			break;
+		case Opt_noacl:
+			AuLabel(noacl);
+			break;
+		default:
+			BUG();
+		}
+		opt++;
+	}
+#endif
+}
+
+void au_opts_free(struct au_opts *opts)
+{
+	struct au_opt *opt;
+
+	opt = opts->opt;
+	while (opt->type != Opt_tail) {
+		switch (opt->type) {
+		case Opt_add:
+		case Opt_append:
+		case Opt_prepend:
+			path_put(&opt->add.path);
+			break;
+		case Opt_del:
+		case Opt_idel:
+			path_put(&opt->del.h_path);
+			break;
+		case Opt_mod:
+		case Opt_imod:
+			dput(opt->mod.h_root);
+			break;
+		case Opt_xino:
+			fput(opt->xino.file);
+			break;
+		}
+		opt++;
+	}
+}
+
+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
+		   aufs_bindex_t bindex)
+{
+	int err;
+	struct au_opt_add *add = &opt->add;
+	char *p;
+
+	add->bindex = bindex;
+	add->perm = AuBrPerm_RO;
+	add->pathname = opt_str;
+	p = strchr(opt_str, '=');
+	if (p) {
+		*p++ = 0;
+		if (*p)
+			add->perm = br_perm_val(p);
+	}
+
+	err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
+	if (!err) {
+		if (!p) {
+			add->perm = AuBrPerm_RO;
+			if (au_test_fs_rr(add->path.dentry->d_sb))
+				add->perm = AuBrPerm_RR;
+			else if (!bindex && !(sb_flags & SB_RDONLY))
+				add->perm = AuBrPerm_RW;
+		}
+		opt->type = Opt_add;
+		goto out;
+	}
+	pr_err("lookup failed %s (%d)\n", add->pathname, err);
+	err = -EINVAL;
+
+out:
+	return err;
+}
+
+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
+{
+	int err;
+
+	del->pathname = args[0].from;
+	AuDbg("del path %s\n", del->pathname);
+
+	err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
+	if (unlikely(err))
+		pr_err("lookup failed %s (%d)\n", del->pathname, err);
+
+	return err;
+}
+
+#if 0 /* reserved for future use */
+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
+			      struct au_opt_del *del, substring_t args[])
+{
+	int err;
+	struct dentry *root;
+
+	err = -EINVAL;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	if (bindex < 0 || au_sbbot(sb) < bindex) {
+		pr_err("out of bounds, %d\n", bindex);
+		goto out;
+	}
+
+	err = 0;
+	del->h_path.dentry = dget(au_h_dptr(root, bindex));
+	del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
+
+out:
+	aufs_read_unlock(root, !AuLock_IR);
+	return err;
+}
+#endif
+
+static int noinline_for_stack
+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
+{
+	int err;
+	struct path path;
+	char *p;
+
+	err = -EINVAL;
+	mod->path = args[0].from;
+	p = strchr(mod->path, '=');
+	if (unlikely(!p)) {
+		pr_err("no permssion %s\n", args[0].from);
+		goto out;
+	}
+
+	*p++ = 0;
+	err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
+	if (unlikely(err)) {
+		pr_err("lookup failed %s (%d)\n", mod->path, err);
+		goto out;
+	}
+
+	mod->perm = br_perm_val(p);
+	AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
+	mod->h_root = dget(path.dentry);
+	path_put(&path);
+
+out:
+	return err;
+}
+
+#if 0 /* reserved for future use */
+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
+			      struct au_opt_mod *mod, substring_t args[])
+{
+	int err;
+	struct dentry *root;
+
+	err = -EINVAL;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	if (bindex < 0 || au_sbbot(sb) < bindex) {
+		pr_err("out of bounds, %d\n", bindex);
+		goto out;
+	}
+
+	err = 0;
+	mod->perm = br_perm_val(args[1].from);
+	AuDbg("mod path %s, perm 0x%x, %s\n",
+	      mod->path, mod->perm, args[1].from);
+	mod->h_root = dget(au_h_dptr(root, bindex));
+
+out:
+	aufs_read_unlock(root, !AuLock_IR);
+	return err;
+}
+#endif
+
+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
+			      substring_t args[])
+{
+	int err;
+	struct file *file;
+
+	file = au_xino_create(sb, args[0].from, /*silent*/0);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(file->f_path.dentry->d_sb == sb)) {
+		fput(file);
+		pr_err("%s must be outside\n", args[0].from);
+		goto out;
+	}
+
+	err = 0;
+	xino->file = file;
+	xino->path = args[0].from;
+
+out:
+	return err;
+}
+
+static int noinline_for_stack
+au_opts_parse_xino_itrunc_path(struct super_block *sb,
+			       struct au_opt_xino_itrunc *xino_itrunc,
+			       substring_t args[])
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct path path;
+	struct dentry *root;
+
+	err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
+	if (unlikely(err)) {
+		pr_err("lookup failed %s (%d)\n", args[0].from, err);
+		goto out;
+	}
+
+	xino_itrunc->bindex = -1;
+	root = sb->s_root;
+	aufs_read_lock(root, AuLock_FLUSH);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		if (au_h_dptr(root, bindex) == path.dentry) {
+			xino_itrunc->bindex = bindex;
+			break;
+		}
+	}
+	aufs_read_unlock(root, !AuLock_IR);
+	path_put(&path);
+
+	if (unlikely(xino_itrunc->bindex < 0)) {
+		pr_err("no such branch %s\n", args[0].from);
+		err = -EINVAL;
+	}
+
+out:
+	return err;
+}
+
+/* called without aufs lock */
+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
+{
+	int err, n, token;
+	aufs_bindex_t bindex;
+	unsigned char skipped;
+	struct dentry *root;
+	struct au_opt *opt, *opt_tail;
+	char *opt_str;
+	/* reduce the stack space */
+	union {
+		struct au_opt_xino_itrunc *xino_itrunc;
+		struct au_opt_wbr_create *create;
+	} u;
+	struct {
+		substring_t args[MAX_OPT_ARGS];
+	} *a;
+
+	err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	root = sb->s_root;
+	err = 0;
+	bindex = 0;
+	opt = opts->opt;
+	opt_tail = opt + opts->max_opt - 1;
+	opt->type = Opt_tail;
+	while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
+		err = -EINVAL;
+		skipped = 0;
+		token = match_token(opt_str, options, a->args);
+		switch (token) {
+		case Opt_br:
+			err = 0;
+			while (!err && (opt_str = strsep(&a->args[0].from, ":"))
+			       && *opt_str) {
+				err = opt_add(opt, opt_str, opts->sb_flags,
+					      bindex++);
+				if (unlikely(!err && ++opt > opt_tail)) {
+					err = -E2BIG;
+					break;
+				}
+				opt->type = Opt_tail;
+				skipped = 1;
+			}
+			break;
+		case Opt_add:
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			bindex = n;
+			err = opt_add(opt, a->args[1].from, opts->sb_flags,
+				      bindex);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_append:
+			err = opt_add(opt, a->args[0].from, opts->sb_flags,
+				      /*dummy bindex*/1);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_prepend:
+			err = opt_add(opt, a->args[0].from, opts->sb_flags,
+				      /*bindex*/0);
+			if (!err)
+				opt->type = token;
+			break;
+		case Opt_del:
+			err = au_opts_parse_del(&opt->del, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#if 0 /* reserved for future use */
+		case Opt_idel:
+			del->pathname = "(indexed)";
+			if (unlikely(match_int(&args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			err = au_opts_parse_idel(sb, n, &opt->del, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#endif
+		case Opt_mod:
+			err = au_opts_parse_mod(&opt->mod, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#ifdef IMOD /* reserved for future use */
+		case Opt_imod:
+			u.mod->path = "(indexed)";
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+#endif
+		case Opt_xino:
+			err = au_opts_parse_xino(sb, &opt->xino, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+
+		case Opt_trunc_xino_path:
+			err = au_opts_parse_xino_itrunc_path
+				(sb, &opt->xino_itrunc, a->args);
+			if (!err)
+				opt->type = token;
+			break;
+
+		case Opt_itrunc_xino:
+			u.xino_itrunc = &opt->xino_itrunc;
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			u.xino_itrunc->bindex = n;
+			aufs_read_lock(root, AuLock_FLUSH);
+			if (n < 0 || au_sbbot(sb) < n) {
+				pr_err("out of bounds, %d\n", n);
+				aufs_read_unlock(root, !AuLock_IR);
+				break;
+			}
+			aufs_read_unlock(root, !AuLock_IR);
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_dirwh:
+			if (unlikely(match_int(&a->args[0], &opt->dirwh)))
+				break;
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_rdcache:
+			if (unlikely(match_int(&a->args[0], &n))) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (unlikely(n > AUFS_RDCACHE_MAX)) {
+				pr_err("rdcache must be smaller than %d\n",
+				       AUFS_RDCACHE_MAX);
+				break;
+			}
+			opt->rdcache = n;
+			err = 0;
+			opt->type = token;
+			break;
+		case Opt_rdblk:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0
+				     || n > KMALLOC_MAX_SIZE)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (unlikely(n && n < NAME_MAX)) {
+				pr_err("rdblk must be larger than %d\n",
+				       NAME_MAX);
+				break;
+			}
+			opt->rdblk = n;
+			err = 0;
+			opt->type = token;
+			break;
+		case Opt_rdhash:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0
+				     || n * sizeof(struct hlist_head)
+				     > KMALLOC_MAX_SIZE)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			opt->rdhash = n;
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_trunc_xino:
+		case Opt_notrunc_xino:
+		case Opt_noxino:
+		case Opt_trunc_xib:
+		case Opt_notrunc_xib:
+		case Opt_shwh:
+		case Opt_noshwh:
+		case Opt_dirperm1:
+		case Opt_nodirperm1:
+		case Opt_plink:
+		case Opt_noplink:
+		case Opt_list_plink:
+		case Opt_dio:
+		case Opt_nodio:
+		case Opt_diropq_a:
+		case Opt_diropq_w:
+		case Opt_warn_perm:
+		case Opt_nowarn_perm:
+		case Opt_verbose:
+		case Opt_noverbose:
+		case Opt_sum:
+		case Opt_nosum:
+		case Opt_wsum:
+		case Opt_rdblk_def:
+		case Opt_rdhash_def:
+		case Opt_dirren:
+		case Opt_nodirren:
+		case Opt_acl:
+		case Opt_noacl:
+			err = 0;
+			opt->type = token;
+			break;
+
+		case Opt_udba:
+			opt->udba = udba_val(a->args[0].from);
+			if (opt->udba >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+
+		case Opt_wbr_create:
+			u.create = &opt->wbr_create;
+			u.create->wbr_create
+				= au_wbr_create_val(a->args[0].from, u.create);
+			if (u.create->wbr_create >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+		case Opt_wbr_copyup:
+			opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
+			if (opt->wbr_copyup >= 0) {
+				err = 0;
+				opt->type = token;
+			} else
+				pr_err("wrong value, %s\n", opt_str);
+			break;
+
+		case Opt_fhsm_sec:
+			if (unlikely(match_int(&a->args[0], &n)
+				     || n < 0)) {
+				pr_err("bad integer in %s\n", opt_str);
+				break;
+			}
+			if (sysaufs_brs) {
+				opt->fhsm_second = n;
+				opt->type = token;
+			} else
+				pr_warn("ignored %s\n", opt_str);
+			err = 0;
+			break;
+
+		case Opt_ignore:
+			pr_warn("ignored %s\n", opt_str);
+			/*FALLTHROUGH*/
+		case Opt_ignore_silent:
+			skipped = 1;
+			err = 0;
+			break;
+		case Opt_err:
+			pr_err("unknown option %s\n", opt_str);
+			break;
+		}
+
+		if (!err && !skipped) {
+			if (unlikely(++opt > opt_tail)) {
+				err = -E2BIG;
+				opt--;
+				opt->type = Opt_tail;
+				break;
+			}
+			opt->type = Opt_tail;
+		}
+	}
+
+	kfree(a);
+	dump_opts(opts);
+	if (unlikely(err))
+		au_opts_free(opts);
+
+out:
+	return err;
+}
+
+static int au_opt_wbr_create(struct super_block *sb,
+			     struct au_opt_wbr_create *create)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 1; /* handled */
+	sbinfo = au_sbi(sb);
+	if (sbinfo->si_wbr_create_ops->fin) {
+		err = sbinfo->si_wbr_create_ops->fin(sb);
+		if (!err)
+			err = 1;
+	}
+
+	sbinfo->si_wbr_create = create->wbr_create;
+	sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
+	switch (create->wbr_create) {
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRR:
+	case AuWbrCreate_PMFSRRV:
+		sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
+		/*FALLTHROUGH*/
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFS:
+	case AuWbrCreate_PMFSV:
+		sbinfo->si_wbr_mfs.mfs_expire
+			= msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
+		break;
+	}
+
+	if (sbinfo->si_wbr_create_ops->init)
+		sbinfo->si_wbr_create_ops->init(sb); /* ignore */
+
+	return err;
+}
+
+/*
+ * returns,
+ * plus: processed without an error
+ * zero: unprocessed
+ */
+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
+			 struct au_opts *opts)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 1; /* handled */
+	sbinfo = au_sbi(sb);
+	switch (opt->type) {
+	case Opt_udba:
+		sbinfo->si_mntflags &= ~AuOptMask_UDBA;
+		sbinfo->si_mntflags |= opt->udba;
+		opts->given_udba |= opt->udba;
+		break;
+
+	case Opt_plink:
+		au_opt_set(sbinfo->si_mntflags, PLINK);
+		break;
+	case Opt_noplink:
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_put(sb, /*verbose*/1);
+		au_opt_clr(sbinfo->si_mntflags, PLINK);
+		break;
+	case Opt_list_plink:
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_list(sb);
+		break;
+
+	case Opt_dio:
+		au_opt_set(sbinfo->si_mntflags, DIO);
+		au_fset_opts(opts->flags, REFRESH_DYAOP);
+		break;
+	case Opt_nodio:
+		au_opt_clr(sbinfo->si_mntflags, DIO);
+		au_fset_opts(opts->flags, REFRESH_DYAOP);
+		break;
+
+	case Opt_fhsm_sec:
+		au_fhsm_set(sbinfo, opt->fhsm_second);
+		break;
+
+	case Opt_diropq_a:
+		au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
+		break;
+	case Opt_diropq_w:
+		au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
+		break;
+
+	case Opt_warn_perm:
+		au_opt_set(sbinfo->si_mntflags, WARN_PERM);
+		break;
+	case Opt_nowarn_perm:
+		au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
+		break;
+
+	case Opt_verbose:
+		au_opt_set(sbinfo->si_mntflags, VERBOSE);
+		break;
+	case Opt_noverbose:
+		au_opt_clr(sbinfo->si_mntflags, VERBOSE);
+		break;
+
+	case Opt_sum:
+		au_opt_set(sbinfo->si_mntflags, SUM);
+		break;
+	case Opt_wsum:
+		au_opt_clr(sbinfo->si_mntflags, SUM);
+		au_opt_set(sbinfo->si_mntflags, SUM_W);
+	case Opt_nosum:
+		au_opt_clr(sbinfo->si_mntflags, SUM);
+		au_opt_clr(sbinfo->si_mntflags, SUM_W);
+		break;
+
+	case Opt_wbr_create:
+		err = au_opt_wbr_create(sb, &opt->wbr_create);
+		break;
+	case Opt_wbr_copyup:
+		sbinfo->si_wbr_copyup = opt->wbr_copyup;
+		sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
+		break;
+
+	case Opt_dirwh:
+		sbinfo->si_dirwh = opt->dirwh;
+		break;
+
+	case Opt_rdcache:
+		sbinfo->si_rdcache
+			= msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
+		break;
+	case Opt_rdblk:
+		sbinfo->si_rdblk = opt->rdblk;
+		break;
+	case Opt_rdblk_def:
+		sbinfo->si_rdblk = AUFS_RDBLK_DEF;
+		break;
+	case Opt_rdhash:
+		sbinfo->si_rdhash = opt->rdhash;
+		break;
+	case Opt_rdhash_def:
+		sbinfo->si_rdhash = AUFS_RDHASH_DEF;
+		break;
+
+	case Opt_shwh:
+		au_opt_set(sbinfo->si_mntflags, SHWH);
+		break;
+	case Opt_noshwh:
+		au_opt_clr(sbinfo->si_mntflags, SHWH);
+		break;
+
+	case Opt_dirperm1:
+		au_opt_set(sbinfo->si_mntflags, DIRPERM1);
+		break;
+	case Opt_nodirperm1:
+		au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
+		break;
+
+	case Opt_trunc_xino:
+		au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
+		break;
+	case Opt_notrunc_xino:
+		au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
+		break;
+
+	case Opt_trunc_xino_path:
+	case Opt_itrunc_xino:
+		err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
+		if (!err)
+			err = 1;
+		break;
+
+	case Opt_trunc_xib:
+		au_fset_opts(opts->flags, TRUNC_XIB);
+		break;
+	case Opt_notrunc_xib:
+		au_fclr_opts(opts->flags, TRUNC_XIB);
+		break;
+
+	case Opt_dirren:
+		err = 1;
+		if (!au_opt_test(sbinfo->si_mntflags, DIRREN)) {
+			err = au_dr_opt_set(sb);
+			if (!err)
+				err = 1;
+		}
+		if (err == 1)
+			au_opt_set(sbinfo->si_mntflags, DIRREN);
+		break;
+	case Opt_nodirren:
+		err = 1;
+		if (au_opt_test(sbinfo->si_mntflags, DIRREN)) {
+			err = au_dr_opt_clr(sb, au_ftest_opts(opts->flags,
+							      DR_FLUSHED));
+			if (!err)
+				err = 1;
+		}
+		if (err == 1)
+			au_opt_clr(sbinfo->si_mntflags, DIRREN);
+		break;
+
+	case Opt_acl:
+		sb->s_flags |= SB_POSIXACL;
+		break;
+	case Opt_noacl:
+		sb->s_flags &= ~SB_POSIXACL;
+		break;
+
+	default:
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+/*
+ * returns tri-state.
+ * plus: processed without an error
+ * zero: unprocessed
+ * minus: error
+ */
+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
+		     struct au_opts *opts)
+{
+	int err, do_refresh;
+
+	err = 0;
+	switch (opt->type) {
+	case Opt_append:
+		opt->add.bindex = au_sbbot(sb) + 1;
+		if (opt->add.bindex < 0)
+			opt->add.bindex = 0;
+		goto add;
+	case Opt_prepend:
+		opt->add.bindex = 0;
+	add: /* indented label */
+	case Opt_add:
+		err = au_br_add(sb, &opt->add,
+				au_ftest_opts(opts->flags, REMOUNT));
+		if (!err) {
+			err = 1;
+			au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+
+	case Opt_del:
+	case Opt_idel:
+		err = au_br_del(sb, &opt->del,
+				au_ftest_opts(opts->flags, REMOUNT));
+		if (!err) {
+			err = 1;
+			au_fset_opts(opts->flags, TRUNC_XIB);
+			au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+
+	case Opt_mod:
+	case Opt_imod:
+		err = au_br_mod(sb, &opt->mod,
+				au_ftest_opts(opts->flags, REMOUNT),
+				&do_refresh);
+		if (!err) {
+			err = 1;
+			if (do_refresh)
+				au_fset_opts(opts->flags, REFRESH);
+		}
+		break;
+	}
+	return err;
+}
+
+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
+		       struct au_opt_xino **opt_xino,
+		       struct au_opts *opts)
+{
+	int err;
+	aufs_bindex_t bbot, bindex;
+	struct dentry *root, *parent, *h_root;
+
+	err = 0;
+	switch (opt->type) {
+	case Opt_xino:
+		err = au_xino_set(sb, &opt->xino,
+				  !!au_ftest_opts(opts->flags, REMOUNT));
+		if (unlikely(err))
+			break;
+
+		*opt_xino = &opt->xino;
+		au_xino_brid_set(sb, -1);
+
+		/* safe d_parent access */
+		parent = opt->xino.file->f_path.dentry->d_parent;
+		root = sb->s_root;
+		bbot = au_sbbot(sb);
+		for (bindex = 0; bindex <= bbot; bindex++) {
+			h_root = au_h_dptr(root, bindex);
+			if (h_root == parent) {
+				au_xino_brid_set(sb, au_sbr_id(sb, bindex));
+				break;
+			}
+		}
+		break;
+
+	case Opt_noxino:
+		au_xino_clr(sb);
+		au_xino_brid_set(sb, -1);
+		*opt_xino = (void *)-1;
+		break;
+	}
+
+	return err;
+}
+
+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
+		   unsigned int pending)
+{
+	int err, fhsm;
+	aufs_bindex_t bindex, bbot;
+	unsigned char do_plink, skip, do_free, can_no_dreval;
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *root, *dentry;
+	struct inode *dir, *h_dir;
+	struct au_sbinfo *sbinfo;
+	struct au_hinode *hdir;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
+
+	if (!(sb_flags & SB_RDONLY)) {
+		if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
+			pr_warn("first branch should be rw\n");
+		if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
+			pr_warn_once("shwh should be used with ro\n");
+	}
+
+	if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
+	    && !au_opt_test(sbinfo->si_mntflags, XINO))
+		pr_warn_once("udba=*notify requires xino\n");
+
+	if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
+		pr_warn_once("dirperm1 breaks the protection"
+			     " by the permission bits on the lower branch\n");
+
+	err = 0;
+	fhsm = 0;
+	root = sb->s_root;
+	dir = d_inode(root);
+	do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
+	can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
+				      UDBA_NONE);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++) {
+		skip = 0;
+		h_dir = au_h_iptr(dir, bindex);
+		br = au_sbr(sb, bindex);
+
+		if ((br->br_perm & AuBrAttr_ICEX)
+		    && !h_dir->i_op->listxattr)
+			br->br_perm &= ~AuBrAttr_ICEX;
+#if 0
+		if ((br->br_perm & AuBrAttr_ICEX_SEC)
+		    && (au_br_sb(br)->s_flags & SB_NOSEC))
+			br->br_perm &= ~AuBrAttr_ICEX_SEC;
+#endif
+
+		do_free = 0;
+		wbr = br->br_wbr;
+		if (wbr)
+			wbr_wh_read_lock(wbr);
+
+		if (!au_br_writable(br->br_perm)) {
+			do_free = !!wbr;
+			skip = (!wbr
+				|| (!wbr->wbr_whbase
+				    && !wbr->wbr_plink
+				    && !wbr->wbr_orph));
+		} else if (!au_br_wh_linkable(br->br_perm)) {
+			/* skip = (!br->br_whbase && !br->br_orph); */
+			skip = (!wbr || !wbr->wbr_whbase);
+			if (skip && wbr) {
+				if (do_plink)
+					skip = !!wbr->wbr_plink;
+				else
+					skip = !wbr->wbr_plink;
+			}
+		} else {
+			/* skip = (br->br_whbase && br->br_ohph); */
+			skip = (wbr && wbr->wbr_whbase);
+			if (skip) {
+				if (do_plink)
+					skip = !!wbr->wbr_plink;
+				else
+					skip = !wbr->wbr_plink;
+			}
+		}
+		if (wbr)
+			wbr_wh_read_unlock(wbr);
+
+		if (can_no_dreval) {
+			dentry = br->br_path.dentry;
+			spin_lock(&dentry->d_lock);
+			if (dentry->d_flags &
+			    (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
+				can_no_dreval = 0;
+			spin_unlock(&dentry->d_lock);
+		}
+
+		if (au_br_fhsm(br->br_perm)) {
+			fhsm++;
+			AuDebugOn(!br->br_fhsm);
+		}
+
+		if (skip)
+			continue;
+
+		hdir = au_hi(dir, bindex);
+		au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+		if (wbr)
+			wbr_wh_write_lock(wbr);
+		err = au_wh_init(br, sb);
+		if (wbr)
+			wbr_wh_write_unlock(wbr);
+		au_hn_inode_unlock(hdir);
+
+		if (!err && do_free) {
+			kfree(wbr);
+			br->br_wbr = NULL;
+		}
+	}
+
+	if (can_no_dreval)
+		au_fset_si(sbinfo, NO_DREVAL);
+	else
+		au_fclr_si(sbinfo, NO_DREVAL);
+
+	if (fhsm >= 2) {
+		au_fset_si(sbinfo, FHSM);
+		for (bindex = bbot; bindex >= 0; bindex--) {
+			br = au_sbr(sb, bindex);
+			if (au_br_fhsm(br->br_perm)) {
+				au_fhsm_set_bottom(sb, bindex);
+				break;
+			}
+		}
+	} else {
+		au_fclr_si(sbinfo, FHSM);
+		au_fhsm_set_bottom(sb, -1);
+	}
+
+	return err;
+}
+
+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
+{
+	int err;
+	unsigned int tmp;
+	aufs_bindex_t bindex, bbot;
+	struct au_opt *opt;
+	struct au_opt_xino *opt_xino, xino;
+	struct au_sbinfo *sbinfo;
+	struct au_branch *br;
+	struct inode *dir;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	opt_xino = NULL;
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail)
+		err = au_opt_simple(sb, opt++, opts);
+	if (err > 0)
+		err = 0;
+	else if (unlikely(err < 0))
+		goto out;
+
+	/* disable xino and udba temporary */
+	sbinfo = au_sbi(sb);
+	tmp = sbinfo->si_mntflags;
+	au_opt_clr(sbinfo->si_mntflags, XINO);
+	au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
+
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail)
+		err = au_opt_br(sb, opt++, opts);
+	if (err > 0)
+		err = 0;
+	else if (unlikely(err < 0))
+		goto out;
+
+	bbot = au_sbbot(sb);
+	if (unlikely(bbot < 0)) {
+		err = -EINVAL;
+		pr_err("no branches\n");
+		goto out;
+	}
+
+	if (au_opt_test(tmp, XINO))
+		au_opt_set(sbinfo->si_mntflags, XINO);
+	opt = opts->opt;
+	while (!err && opt->type != Opt_tail)
+		err = au_opt_xino(sb, opt++, &opt_xino, opts);
+	if (unlikely(err))
+		goto out;
+
+	err = au_opts_verify(sb, sb->s_flags, tmp);
+	if (unlikely(err))
+		goto out;
+
+	/* restore xino */
+	if (au_opt_test(tmp, XINO) && !opt_xino) {
+		xino.file = au_xino_def(sb);
+		err = PTR_ERR(xino.file);
+		if (IS_ERR(xino.file))
+			goto out;
+
+		err = au_xino_set(sb, &xino, /*remount*/0);
+		fput(xino.file);
+		if (unlikely(err))
+			goto out;
+	}
+
+	/* restore udba */
+	tmp &= AuOptMask_UDBA;
+	sbinfo->si_mntflags &= ~AuOptMask_UDBA;
+	sbinfo->si_mntflags |= tmp;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_hnotify_reset_br(tmp, br, br->br_perm);
+		if (unlikely(err))
+			AuIOErr("hnotify failed on br %d, %d, ignored\n",
+				bindex, err);
+		/* go on even if err */
+	}
+	if (au_opt_test(tmp, UDBA_HNOTIFY)) {
+		dir = d_inode(sb->s_root);
+		au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
+	}
+
+out:
+	return err;
+}
+
+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
+{
+	int err, rerr;
+	unsigned char no_dreval;
+	struct inode *dir;
+	struct au_opt_xino *opt_xino;
+	struct au_opt *opt;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = au_dr_opt_flush(sb);
+	if (unlikely(err))
+		goto out;
+	au_fset_opts(opts->flags, DR_FLUSHED);
+
+	dir = d_inode(sb->s_root);
+	sbinfo = au_sbi(sb);
+	opt_xino = NULL;
+	opt = opts->opt;
+	while (err >= 0 && opt->type != Opt_tail) {
+		err = au_opt_simple(sb, opt, opts);
+		if (!err)
+			err = au_opt_br(sb, opt, opts);
+		if (!err)
+			err = au_opt_xino(sb, opt, &opt_xino, opts);
+		opt++;
+	}
+	if (err > 0)
+		err = 0;
+	AuTraceErr(err);
+	/* go on even err */
+
+	no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
+	rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
+	if (unlikely(rerr && !err))
+		err = rerr;
+
+	if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
+		au_fset_opts(opts->flags, REFRESH_IDOP);
+
+	if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
+		rerr = au_xib_trunc(sb);
+		if (unlikely(rerr && !err))
+			err = rerr;
+	}
+
+	/* will be handled by the caller */
+	if (!au_ftest_opts(opts->flags, REFRESH)
+	    && (opts->given_udba
+		|| au_opt_test(sbinfo->si_mntflags, XINO)
+		|| au_ftest_opts(opts->flags, REFRESH_IDOP)
+		    ))
+		au_fset_opts(opts->flags, REFRESH);
+
+	AuDbg("status 0x%x\n", opts->flags);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int au_opt_udba(struct super_block *sb)
+{
+	return au_mntflags(sb) & AuOptMask_UDBA;
+}
diff --git a/include/fs/aufs/opts.h b/include/fs/aufs/opts.h
new file mode 100644
index 000000000..30bda60f7
--- /dev/null
+++ b/include/fs/aufs/opts.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount options/flags
+ */
+
+#ifndef __AUFS_OPTS_H__
+#define __AUFS_OPTS_H__
+
+#ifdef __KERNEL__
+
+#include <linux/path.h>
+
+struct file;
+
+/* ---------------------------------------------------------------------- */
+
+/* mount flags */
+#define AuOpt_XINO		1		/* external inode number bitmap
+						   and translation table */
+#define AuOpt_TRUNC_XINO	(1 << 1)	/* truncate xino files */
+#define AuOpt_UDBA_NONE		(1 << 2)	/* users direct branch access */
+#define AuOpt_UDBA_REVAL	(1 << 3)
+#define AuOpt_UDBA_HNOTIFY	(1 << 4)
+#define AuOpt_SHWH		(1 << 5)	/* show whiteout */
+#define AuOpt_PLINK		(1 << 6)	/* pseudo-link */
+#define AuOpt_DIRPERM1		(1 << 7)	/* ignore the lower dir's perm
+						   bits */
+#define AuOpt_ALWAYS_DIROPQ	(1 << 9)	/* policy to creating diropq */
+#define AuOpt_SUM		(1 << 10)	/* summation for statfs(2) */
+#define AuOpt_SUM_W		(1 << 11)	/* unimplemented */
+#define AuOpt_WARN_PERM		(1 << 12)	/* warn when add-branch */
+#define AuOpt_VERBOSE		(1 << 13)	/* busy inode when del-branch */
+#define AuOpt_DIO		(1 << 14)	/* direct io */
+#define AuOpt_DIRREN		(1 << 15)	/* directory rename */
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuOpt_UDBA_HNOTIFY
+#define AuOpt_UDBA_HNOTIFY	0
+#endif
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuOpt_DIRREN
+#define AuOpt_DIRREN		0
+#endif
+#ifndef CONFIG_AUFS_SHWH
+#undef AuOpt_SHWH
+#define AuOpt_SHWH		0
+#endif
+
+#define AuOpt_Def	(AuOpt_XINO \
+			 | AuOpt_UDBA_REVAL \
+			 | AuOpt_PLINK \
+			 /* | AuOpt_DIRPERM1 */ \
+			 | AuOpt_WARN_PERM)
+#define AuOptMask_UDBA	(AuOpt_UDBA_NONE \
+			 | AuOpt_UDBA_REVAL \
+			 | AuOpt_UDBA_HNOTIFY)
+
+#define au_opt_test(flags, name)	(flags & AuOpt_##name)
+#define au_opt_set(flags, name) do { \
+	BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
+	((flags) |= AuOpt_##name); \
+} while (0)
+#define au_opt_set_udba(flags, name) do { \
+	(flags) &= ~AuOptMask_UDBA; \
+	((flags) |= AuOpt_##name); \
+} while (0)
+#define au_opt_clr(flags, name) do { \
+	((flags) &= ~AuOpt_##name); \
+} while (0)
+
+static inline unsigned int au_opts_plink(unsigned int mntflags)
+{
+#ifdef CONFIG_PROC_FS
+	return mntflags;
+#else
+	return mntflags & ~AuOpt_PLINK;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies to select one among multiple writable branches */
+enum {
+	AuWbrCreate_TDP,	/* top down parent */
+	AuWbrCreate_RR,		/* round robin */
+	AuWbrCreate_MFS,	/* most free space */
+	AuWbrCreate_MFSV,	/* mfs with seconds */
+	AuWbrCreate_MFSRR,	/* mfs then rr */
+	AuWbrCreate_MFSRRV,	/* mfs then rr with seconds */
+	AuWbrCreate_TDMFS,	/* top down regardless parent and mfs */
+	AuWbrCreate_TDMFSV,	/* top down regardless parent and mfs */
+	AuWbrCreate_PMFS,	/* parent and mfs */
+	AuWbrCreate_PMFSV,	/* parent and mfs with seconds */
+	AuWbrCreate_PMFSRR,	/* parent, mfs and round-robin */
+	AuWbrCreate_PMFSRRV,	/* plus seconds */
+
+	AuWbrCreate_Def = AuWbrCreate_TDP
+};
+
+enum {
+	AuWbrCopyup_TDP,	/* top down parent */
+	AuWbrCopyup_BUP,	/* bottom up parent */
+	AuWbrCopyup_BU,		/* bottom up */
+
+	AuWbrCopyup_Def = AuWbrCopyup_TDP
+};
+
+/* ---------------------------------------------------------------------- */
+
+struct au_opt_add {
+	aufs_bindex_t	bindex;
+	char		*pathname;
+	int		perm;
+	struct path	path;
+};
+
+struct au_opt_del {
+	char		*pathname;
+	struct path	h_path;
+};
+
+struct au_opt_mod {
+	char		*path;
+	int		perm;
+	struct dentry	*h_root;
+};
+
+struct au_opt_xino {
+	char		*path;
+	struct file	*file;
+};
+
+struct au_opt_xino_itrunc {
+	aufs_bindex_t	bindex;
+};
+
+struct au_opt_wbr_create {
+	int			wbr_create;
+	int			mfs_second;
+	unsigned long long	mfsrr_watermark;
+};
+
+struct au_opt {
+	int type;
+	union {
+		struct au_opt_xino	xino;
+		struct au_opt_xino_itrunc xino_itrunc;
+		struct au_opt_add	add;
+		struct au_opt_del	del;
+		struct au_opt_mod	mod;
+		int			dirwh;
+		int			rdcache;
+		unsigned int		rdblk;
+		unsigned int		rdhash;
+		int			udba;
+		struct au_opt_wbr_create wbr_create;
+		int			wbr_copyup;
+		unsigned int		fhsm_second;
+	};
+};
+
+/* opts flags */
+#define AuOpts_REMOUNT		1
+#define AuOpts_REFRESH		(1 << 1)
+#define AuOpts_TRUNC_XIB	(1 << 2)
+#define AuOpts_REFRESH_DYAOP	(1 << 3)
+#define AuOpts_REFRESH_IDOP	(1 << 4)
+#define AuOpts_DR_FLUSHED	(1 << 5)
+#define au_ftest_opts(flags, name)	((flags) & AuOpts_##name)
+#define au_fset_opts(flags, name) \
+	do { (flags) |= AuOpts_##name; } while (0)
+#define au_fclr_opts(flags, name) \
+	do { (flags) &= ~AuOpts_##name; } while (0)
+
+#ifndef CONFIG_AUFS_DIRREN
+#undef AuOpts_DR_FLUSHED
+#define AuOpts_DR_FLUSHED	0
+#endif
+
+struct au_opts {
+	struct au_opt	*opt;
+	int		max_opt;
+
+	unsigned int	given_udba;
+	unsigned int	flags;
+	unsigned long	sb_flags;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* opts.c */
+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
+const char *au_optstr_udba(int udba);
+const char *au_optstr_wbr_copyup(int wbr_copyup);
+const char *au_optstr_wbr_create(int wbr_create);
+
+void au_opts_free(struct au_opts *opts);
+struct super_block;
+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
+		   unsigned int pending);
+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
+
+unsigned int au_opt_udba(struct super_block *sb);
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_OPTS_H__ */
diff --git a/include/fs/aufs/plink.c b/include/fs/aufs/plink.c
new file mode 100644
index 000000000..9ad023004
--- /dev/null
+++ b/include/fs/aufs/plink.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * pseudo-link
+ */
+
+#include "aufs.h"
+
+/*
+ * the pseudo-link maintenance mode.
+ * during a user process maintains the pseudo-links,
+ * prohibit adding a new plink and branch manipulation.
+ *
+ * Flags
+ * NOPLM:
+ *	For entry functions which will handle plink, and i_mutex is already held
+ *	in VFS.
+ *	They cannot wait and should return an error at once.
+ *	Callers has to check the error.
+ * NOPLMW:
+ *	For entry functions which will handle plink, but i_mutex is not held
+ *	in VFS.
+ *	They can wait the plink maintenance mode to finish.
+ *
+ * They behave like F_SETLK and F_SETLKW.
+ * If the caller never handle plink, then both flags are unnecessary.
+ */
+
+int au_plink_maint(struct super_block *sb, int flags)
+{
+	int err;
+	pid_t pid, ppid;
+	struct task_struct *parent, *prev;
+	struct au_sbinfo *sbi;
+
+	SiMustAnyLock(sb);
+
+	err = 0;
+	if (!au_opt_test(au_mntflags(sb), PLINK))
+		goto out;
+
+	sbi = au_sbi(sb);
+	pid = sbi->si_plink_maint_pid;
+	if (!pid || pid == current->pid)
+		goto out;
+
+	/* todo: it highly depends upon /sbin/mount.aufs */
+	prev = NULL;
+	parent = current;
+	ppid = 0;
+	rcu_read_lock();
+	while (1) {
+		parent = rcu_dereference(parent->real_parent);
+		if (parent == prev)
+			break;
+		ppid = task_pid_vnr(parent);
+		if (pid == ppid) {
+			rcu_read_unlock();
+			goto out;
+		}
+		prev = parent;
+	}
+	rcu_read_unlock();
+
+	if (au_ftest_lock(flags, NOPLMW)) {
+		/* if there is no i_mutex lock in VFS, we don't need to wait */
+		/* AuDebugOn(!lockdep_depth(current)); */
+		while (sbi->si_plink_maint_pid) {
+			si_read_unlock(sb);
+			/* gave up wake_up_bit() */
+			wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
+
+			if (au_ftest_lock(flags, FLUSH))
+				au_nwt_flush(&sbi->si_nowait);
+			si_noflush_read_lock(sb);
+		}
+	} else if (au_ftest_lock(flags, NOPLM)) {
+		AuDbg("ppid %d, pid %d\n", ppid, pid);
+		err = -EAGAIN;
+	}
+
+out:
+	return err;
+}
+
+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
+{
+	spin_lock(&sbinfo->si_plink_maint_lock);
+	sbinfo->si_plink_maint_pid = 0;
+	spin_unlock(&sbinfo->si_plink_maint_lock);
+	wake_up_all(&sbinfo->si_plink_wq);
+}
+
+int au_plink_maint_enter(struct super_block *sb)
+{
+	int err;
+	struct au_sbinfo *sbinfo;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	/* make sure i am the only one in this fs */
+	si_write_lock(sb, AuLock_FLUSH);
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		spin_lock(&sbinfo->si_plink_maint_lock);
+		if (!sbinfo->si_plink_maint_pid)
+			sbinfo->si_plink_maint_pid = current->pid;
+		else
+			err = -EBUSY;
+		spin_unlock(&sbinfo->si_plink_maint_lock);
+	}
+	si_write_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_DEBUG
+void au_plink_list(struct super_block *sb)
+{
+	int i;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+
+	SiMustAnyLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		hlist_bl_lock(hbl);
+		hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
+			AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
+		hlist_bl_unlock(hbl);
+	}
+}
+#endif
+
+/* is the inode pseudo-linked? */
+int au_plink_test(struct inode *inode)
+{
+	int found, i;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+
+	sbinfo = au_sbi(inode->i_sb);
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+	AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
+	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
+
+	found = 0;
+	i = au_plink_hash(inode->i_ino);
+	hbl =  sbinfo->si_plink + i;
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
+		if (&icntnr->vfs_inode == inode) {
+			found = 1;
+			break;
+		}
+	hlist_bl_unlock(hbl);
+	return found;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * generate a name for plink.
+ * the file will be stored under AUFS_WH_PLINKDIR.
+ */
+/* 20 is max digits length of ulong 64 */
+#define PLINK_NAME_LEN	((20 + 1) * 2)
+
+static int plink_name(char *name, int len, struct inode *inode,
+		      aufs_bindex_t bindex)
+{
+	int rlen;
+	struct inode *h_inode;
+
+	h_inode = au_h_iptr(inode, bindex);
+	rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
+	return rlen;
+}
+
+struct au_do_plink_lkup_args {
+	struct dentry **errp;
+	struct qstr *tgtname;
+	struct dentry *h_parent;
+	struct au_branch *br;
+};
+
+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
+				       struct dentry *h_parent,
+				       struct au_branch *br)
+{
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_inode = d_inode(h_parent);
+	vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
+	h_dentry = vfsub_lkup_one(tgtname, h_parent);
+	inode_unlock_shared(h_inode);
+	return h_dentry;
+}
+
+static void au_call_do_plink_lkup(void *args)
+{
+	struct au_do_plink_lkup_args *a = args;
+	*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
+}
+
+/* lookup the plink-ed @inode under the branch at @bindex */
+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct dentry *h_dentry, *h_parent;
+	struct au_branch *br;
+	int wkq_err;
+	char a[PLINK_NAME_LEN];
+	struct qstr tgtname = QSTR_INIT(a, 0);
+
+	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
+
+	br = au_sbr(inode->i_sb, bindex);
+	h_parent = br->br_wbr->wbr_plink;
+	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
+
+	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
+		struct au_do_plink_lkup_args args = {
+			.errp		= &h_dentry,
+			.tgtname	= &tgtname,
+			.h_parent	= h_parent,
+			.br		= br
+		};
+
+		wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
+		if (unlikely(wkq_err))
+			h_dentry = ERR_PTR(wkq_err);
+	} else
+		h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
+
+	return h_dentry;
+}
+
+/* create a pseudo-link */
+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
+		      struct dentry *h_dentry, struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+	struct inode *h_dir, *delegated;
+
+	h_dir = d_inode(h_parent);
+	inode_lock_nested(h_dir, AuLsc_I_CHILD2);
+again:
+	h_path.dentry = vfsub_lkup_one(tgt, h_parent);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	err = 0;
+	/* wh.plink dir is not monitored */
+	/* todo: is it really safe? */
+	if (d_is_positive(h_path.dentry)
+	    && d_inode(h_path.dentry) != d_inode(h_dentry)) {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+		dput(h_path.dentry);
+		h_path.dentry = NULL;
+		if (!err)
+			goto again;
+	}
+	if (!err && d_is_negative(h_path.dentry)) {
+		delegated = NULL;
+		err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal link\n");
+			iput(delegated);
+		}
+	}
+	dput(h_path.dentry);
+
+out:
+	inode_unlock(h_dir);
+	return err;
+}
+
+struct do_whplink_args {
+	int *errp;
+	struct qstr *tgt;
+	struct dentry *h_parent;
+	struct dentry *h_dentry;
+	struct au_branch *br;
+};
+
+static void call_do_whplink(void *args)
+{
+	struct do_whplink_args *a = args;
+	*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
+}
+
+static int whplink(struct dentry *h_dentry, struct inode *inode,
+		   aufs_bindex_t bindex, struct au_branch *br)
+{
+	int err, wkq_err;
+	struct au_wbr *wbr;
+	struct dentry *h_parent;
+	char a[PLINK_NAME_LEN];
+	struct qstr tgtname = QSTR_INIT(a, 0);
+
+	wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
+	h_parent = wbr->wbr_plink;
+	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
+
+	/* always superio. */
+	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
+		struct do_whplink_args args = {
+			.errp		= &err,
+			.tgt		= &tgtname,
+			.h_parent	= h_parent,
+			.h_dentry	= h_dentry,
+			.br		= br
+		};
+		wkq_err = au_wkq_wait(call_do_whplink, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	} else
+		err = do_whplink(&tgtname, h_parent, h_dentry, br);
+
+	return err;
+}
+
+/*
+ * create a new pseudo-link for @h_dentry on @bindex.
+ * the linked inode is held in aufs @inode.
+ */
+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
+		     struct dentry *h_dentry)
+{
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos;
+	struct au_icntnr *icntnr;
+	int found, err, cnt, i;
+
+	sb = inode->i_sb;
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	found = au_plink_test(inode);
+	if (found)
+		return;
+
+	i = au_plink_hash(inode->i_ino);
+	hbl = sbinfo->si_plink + i;
+	au_igrab(inode);
+
+	hlist_bl_lock(hbl);
+	hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
+		if (&icntnr->vfs_inode == inode) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		icntnr = container_of(inode, struct au_icntnr, vfs_inode);
+		hlist_bl_add_head(&icntnr->plink, hbl);
+	}
+	hlist_bl_unlock(hbl);
+	if (!found) {
+		cnt = au_hbl_count(hbl);
+#define msg "unexpectedly unblanced or too many pseudo-links"
+		if (cnt > AUFS_PLINK_WARN)
+			AuWarn1(msg ", %d\n", cnt);
+#undef msg
+		err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
+		if (unlikely(err)) {
+			pr_warn("err %d, damaged pseudo link.\n", err);
+			au_hbl_del(&icntnr->plink, hbl);
+			iput(&icntnr->vfs_inode);
+		}
+	} else
+		iput(&icntnr->vfs_inode);
+}
+
+/* free all plinks */
+void au_plink_put(struct super_block *sb, int verbose)
+{
+	int i, warned;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_icntnr *icntnr;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	/* no spin_lock since sbinfo is write-locked */
+	warned = 0;
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		if (!warned && verbose && !hlist_bl_empty(hbl)) {
+			pr_warn("pseudo-link is not flushed");
+			warned = 1;
+		}
+		hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
+			iput(&icntnr->vfs_inode);
+		INIT_HLIST_BL_HEAD(hbl);
+	}
+}
+
+void au_plink_clean(struct super_block *sb, int verbose)
+{
+	struct dentry *root;
+
+	root = sb->s_root;
+	aufs_write_lock(root);
+	if (au_opt_test(au_mntflags(sb), PLINK))
+		au_plink_put(sb, verbose);
+	aufs_write_unlock(root);
+}
+
+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
+{
+	int do_put;
+	aufs_bindex_t btop, bbot, bindex;
+
+	do_put = 0;
+	btop = au_ibtop(inode);
+	bbot = au_ibbot(inode);
+	if (btop >= 0) {
+		for (bindex = btop; bindex <= bbot; bindex++) {
+			if (!au_h_iptr(inode, bindex)
+			    || au_ii_br_id(inode, bindex) != br_id)
+				continue;
+			au_set_h_iptr(inode, bindex, NULL, 0);
+			do_put = 1;
+			break;
+		}
+		if (do_put)
+			for (bindex = btop; bindex <= bbot; bindex++)
+				if (au_h_iptr(inode, bindex)) {
+					do_put = 0;
+					break;
+				}
+	} else
+		do_put = 1;
+
+	return do_put;
+}
+
+/* free the plinks on a branch specified by @br_id */
+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
+{
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_head *hbl;
+	struct hlist_bl_node *pos, *tmp;
+	struct au_icntnr *icntnr;
+	struct inode *inode;
+	int i, do_put;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
+	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
+
+	/* no bit_lock since sbinfo is write-locked */
+	for (i = 0; i < AuPlink_NHASH; i++) {
+		hbl = sbinfo->si_plink + i;
+		hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
+			inode = au_igrab(&icntnr->vfs_inode);
+			ii_write_lock_child(inode);
+			do_put = au_plink_do_half_refresh(inode, br_id);
+			if (do_put) {
+				hlist_bl_del(&icntnr->plink);
+				iput(inode);
+			}
+			ii_write_unlock(inode);
+			iput(inode);
+		}
+	}
+}
diff --git a/include/fs/aufs/poll.c b/include/fs/aufs/poll.c
new file mode 100644
index 000000000..ee5aa0b05
--- /dev/null
+++ b/include/fs/aufs/poll.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * poll operation
+ * There is only one filesystem which implements ->poll operation, currently.
+ */
+
+#include "aufs.h"
+
+unsigned int aufs_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask;
+	int err;
+	struct file *h_file;
+	struct super_block *sb;
+
+	/* We should pretend an error happened. */
+	mask = POLLERR /* | POLLIN | POLLOUT */;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
+
+	h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
+	err = PTR_ERR(h_file);
+	if (IS_ERR(h_file))
+		goto out;
+
+	/* it is not an error if h_file has no operation */
+	mask = DEFAULT_POLLMASK;
+	if (h_file->f_op->poll)
+		mask = h_file->f_op->poll(h_file, wait);
+	fput(h_file); /* instead of au_read_post() */
+
+out:
+	si_read_unlock(sb);
+	if (mask & POLLERR)
+		AuDbg("mask 0x%x\n", mask);
+	return mask;
+}
diff --git a/include/fs/aufs/posix_acl.c b/include/fs/aufs/posix_acl.c
new file mode 100644
index 000000000..bbd7aa2ec
--- /dev/null
+++ b/include/fs/aufs/posix_acl.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * posix acl operations
+ */
+
+#include <linux/fs.h>
+#include "aufs.h"
+
+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int err;
+	aufs_bindex_t bindex;
+	struct inode *h_inode;
+	struct super_block *sb;
+
+	acl = NULL;
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_read_lock_child(inode);
+	if (!(sb->s_flags & SB_POSIXACL))
+		goto out;
+
+	bindex = au_ibtop(inode);
+	h_inode = au_h_iptr(inode, bindex);
+	if (unlikely(!h_inode
+		     || ((h_inode->i_mode & S_IFMT)
+			 != (inode->i_mode & S_IFMT)))) {
+		err = au_busy_or_stale();
+		acl = ERR_PTR(err);
+		goto out;
+	}
+
+	/* always topmost only */
+	acl = get_acl(h_inode, type);
+	if (!IS_ERR_OR_NULL(acl))
+		set_cached_acl(inode, type, acl);
+
+out:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+
+	AuTraceErrPtr(acl);
+	return acl;
+}
+
+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	int err;
+	ssize_t ssz;
+	struct dentry *dentry;
+	struct au_sxattr arg = {
+		.type = AU_ACL_SET,
+		.u.acl_set = {
+			.acl	= acl,
+			.type	= type
+		},
+	};
+
+	IMustLock(inode);
+
+	if (inode->i_ino == AUFS_ROOT_INO)
+		dentry = dget(inode->i_sb->s_root);
+	else {
+		dentry = d_find_alias(inode);
+		if (!dentry)
+			dentry = d_find_any_alias(inode);
+		if (!dentry) {
+			pr_warn("cannot handle this inode, "
+				"please report to aufs-users ML\n");
+			err = -ENOENT;
+			goto out;
+		}
+	}
+
+	ssz = au_sxattr(dentry, inode, &arg);
+	dput(dentry);
+	err = ssz;
+	if (ssz >= 0) {
+		err = 0;
+		set_cached_acl(inode, type, acl);
+	}
+
+out:
+	return err;
+}
diff --git a/include/fs/aufs/procfs.c b/include/fs/aufs/procfs.c
new file mode 100644
index 000000000..6201739c6
--- /dev/null
+++ b/include/fs/aufs/procfs.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2010-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * procfs interfaces
+ */
+
+#include <linux/proc_fs.h>
+#include "aufs.h"
+
+static int au_procfs_plm_release(struct inode *inode, struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = file->private_data;
+	if (sbinfo) {
+		au_plink_maint_leave(sbinfo);
+		kobject_put(&sbinfo->si_kobj);
+	}
+
+	return 0;
+}
+
+static void au_procfs_plm_write_clean(struct file *file)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = file->private_data;
+	if (sbinfo)
+		au_plink_clean(sbinfo->si_sb, /*verbose*/0);
+}
+
+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
+{
+	int err;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_node *pos;
+
+	err = -EBUSY;
+	if (unlikely(file->private_data))
+		goto out;
+
+	sb = NULL;
+	/* don't use au_sbilist_lock() here */
+	hlist_bl_lock(&au_sbilist);
+	hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
+		if (id == sysaufs_si_id(sbinfo)) {
+			kobject_get(&sbinfo->si_kobj);
+			sb = sbinfo->si_sb;
+			break;
+		}
+	hlist_bl_unlock(&au_sbilist);
+
+	err = -EINVAL;
+	if (unlikely(!sb))
+		goto out;
+
+	err = au_plink_maint_enter(sb);
+	if (!err)
+		/* keep kobject_get() */
+		file->private_data = sbinfo;
+	else
+		kobject_put(&sbinfo->si_kobj);
+out:
+	return err;
+}
+
+/*
+ * Accept a valid "si=xxxx" only.
+ * Once it is accepted successfully, accept "clean" too.
+ */
+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
+				   size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	unsigned long id;
+	/* last newline is allowed */
+	char buf[3 + sizeof(unsigned long) * 2 + 1];
+
+	err = -EACCES;
+	if (unlikely(!capable(CAP_SYS_ADMIN)))
+		goto out;
+
+	err = -EINVAL;
+	if (unlikely(count > sizeof(buf)))
+		goto out;
+
+	err = copy_from_user(buf, ubuf, count);
+	if (unlikely(err)) {
+		err = -EFAULT;
+		goto out;
+	}
+	buf[count] = 0;
+
+	err = -EINVAL;
+	if (!strcmp("clean", buf)) {
+		au_procfs_plm_write_clean(file);
+		goto out_success;
+	} else if (unlikely(strncmp("si=", buf, 3)))
+		goto out;
+
+	err = kstrtoul(buf + 3, 16, &id);
+	if (unlikely(err))
+		goto out;
+
+	err = au_procfs_plm_write_si(file, id);
+	if (unlikely(err))
+		goto out;
+
+out_success:
+	err = count; /* success */
+out:
+	return err;
+}
+
+static const struct file_operations au_procfs_plm_fop = {
+	.write		= au_procfs_plm_write,
+	.release	= au_procfs_plm_release,
+	.owner		= THIS_MODULE
+};
+
+/* ---------------------------------------------------------------------- */
+
+static struct proc_dir_entry *au_procfs_dir;
+
+void au_procfs_fin(void)
+{
+	remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
+	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
+}
+
+int __init au_procfs_init(void)
+{
+	int err;
+	struct proc_dir_entry *entry;
+
+	err = -ENOMEM;
+	au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
+	if (unlikely(!au_procfs_dir))
+		goto out;
+
+	entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
+			    au_procfs_dir, &au_procfs_plm_fop);
+	if (unlikely(!entry))
+		goto out_dir;
+
+	err = 0;
+	goto out; /* success */
+
+
+out_dir:
+	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
+out:
+	return err;
+}
diff --git a/include/fs/aufs/rdu.c b/include/fs/aufs/rdu.c
new file mode 100644
index 000000000..f6a10f553
--- /dev/null
+++ b/include/fs/aufs/rdu.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * readdir in userspace.
+ */
+
+#include <linux/compat.h>
+#include <linux/fs_stack.h>
+#include <linux/security.h>
+#include "aufs.h"
+
+/* bits for struct aufs_rdu.flags */
+#define	AuRdu_CALLED	1
+#define	AuRdu_CONT	(1 << 1)
+#define	AuRdu_FULL	(1 << 2)
+#define au_ftest_rdu(flags, name)	((flags) & AuRdu_##name)
+#define au_fset_rdu(flags, name) \
+	do { (flags) |= AuRdu_##name; } while (0)
+#define au_fclr_rdu(flags, name) \
+	do { (flags) &= ~AuRdu_##name; } while (0)
+
+struct au_rdu_arg {
+	struct dir_context		ctx;
+	struct aufs_rdu			*rdu;
+	union au_rdu_ent_ul		ent;
+	unsigned long			end;
+
+	struct super_block		*sb;
+	int				err;
+};
+
+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
+		       loff_t offset, u64 h_ino, unsigned int d_type)
+{
+	int err, len;
+	struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
+	struct aufs_rdu *rdu = arg->rdu;
+	struct au_rdu_ent ent;
+
+	err = 0;
+	arg->err = 0;
+	au_fset_rdu(rdu->cookie.flags, CALLED);
+	len = au_rdu_len(nlen);
+	if (arg->ent.ul + len  < arg->end) {
+		ent.ino = h_ino;
+		ent.bindex = rdu->cookie.bindex;
+		ent.type = d_type;
+		ent.nlen = nlen;
+		if (unlikely(nlen > AUFS_MAX_NAMELEN))
+			ent.type = DT_UNKNOWN;
+
+		/* unnecessary to support mmap_sem since this is a dir */
+		err = -EFAULT;
+		if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
+			goto out;
+		if (copy_to_user(arg->ent.e->name, name, nlen))
+			goto out;
+		/* the terminating NULL */
+		if (__put_user(0, arg->ent.e->name + nlen))
+			goto out;
+		err = 0;
+		/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
+		arg->ent.ul += len;
+		rdu->rent++;
+	} else {
+		err = -EFAULT;
+		au_fset_rdu(rdu->cookie.flags, FULL);
+		rdu->full = 1;
+		rdu->tail = arg->ent;
+	}
+
+out:
+	/* AuTraceErr(err); */
+	return err;
+}
+
+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
+{
+	int err;
+	loff_t offset;
+	struct au_rdu_cookie *cookie = &arg->rdu->cookie;
+
+	/* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
+	offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
+	err = offset;
+	if (unlikely(offset != cookie->h_pos))
+		goto out;
+
+	err = 0;
+	do {
+		arg->err = 0;
+		au_fclr_rdu(cookie->flags, CALLED);
+		/* smp_mb(); */
+		err = vfsub_iterate_dir(h_file, &arg->ctx);
+		if (err >= 0)
+			err = arg->err;
+	} while (!err
+		 && au_ftest_rdu(cookie->flags, CALLED)
+		 && !au_ftest_rdu(cookie->flags, FULL));
+	cookie->h_pos = h_file->f_pos;
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
+{
+	int err;
+	aufs_bindex_t bbot;
+	struct au_rdu_arg arg = {
+		.ctx = {
+			.actor = au_rdu_fill
+		}
+	};
+	struct dentry *dentry;
+	struct inode *inode;
+	struct file *h_file;
+	struct au_rdu_cookie *cookie = &rdu->cookie;
+
+	err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	rdu->rent = 0;
+	rdu->tail = rdu->ent;
+	rdu->full = 0;
+	arg.rdu = rdu;
+	arg.ent = rdu->ent;
+	arg.end = arg.ent.ul;
+	arg.end += rdu->sz;
+
+	err = -ENOTDIR;
+	if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
+		goto out;
+
+	err = security_file_permission(file, MAY_READ);
+	AuTraceErr(err);
+	if (unlikely(err))
+		goto out;
+
+	dentry = file->f_path.dentry;
+	inode = d_inode(dentry);
+	inode_lock_shared(inode);
+
+	arg.sb = inode->i_sb;
+	err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_mtx;
+	err = au_alive_dir(dentry);
+	if (unlikely(err))
+		goto out_si;
+	/* todo: reval? */
+	fi_read_lock(file);
+
+	err = -EAGAIN;
+	if (unlikely(au_ftest_rdu(cookie->flags, CONT)
+		     && cookie->generation != au_figen(file)))
+		goto out_unlock;
+
+	err = 0;
+	if (!rdu->blk) {
+		rdu->blk = au_sbi(arg.sb)->si_rdblk;
+		if (!rdu->blk)
+			rdu->blk = au_dir_size(file, /*dentry*/NULL);
+	}
+	bbot = au_fbtop(file);
+	if (cookie->bindex < bbot)
+		cookie->bindex = bbot;
+	bbot = au_fbbot_dir(file);
+	/* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
+	for (; !err && cookie->bindex <= bbot;
+	     cookie->bindex++, cookie->h_pos = 0) {
+		h_file = au_hf_dir(file, cookie->bindex);
+		if (!h_file)
+			continue;
+
+		au_fclr_rdu(cookie->flags, FULL);
+		err = au_rdu_do(h_file, &arg);
+		AuTraceErr(err);
+		if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
+			break;
+	}
+	AuDbg("rent %llu\n", rdu->rent);
+
+	if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
+		rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
+		au_fset_rdu(cookie->flags, CONT);
+		cookie->generation = au_figen(file);
+	}
+
+	ii_read_lock_child(inode);
+	fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
+	ii_read_unlock(inode);
+
+out_unlock:
+	fi_read_unlock(file);
+out_si:
+	si_read_unlock(arg.sb);
+out_mtx:
+	inode_unlock_shared(inode);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
+{
+	int err;
+	ino_t ino;
+	unsigned long long nent;
+	union au_rdu_ent_ul *u;
+	struct au_rdu_ent ent;
+	struct super_block *sb;
+
+	err = 0;
+	nent = rdu->nent;
+	u = &rdu->ent;
+	sb = file->f_path.dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	while (nent-- > 0) {
+		/* unnecessary to support mmap_sem since this is a dir */
+		err = copy_from_user(&ent, u->e, sizeof(ent));
+		if (!err)
+			err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+			break;
+		}
+
+		/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
+		if (!ent.wh)
+			err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
+		else
+			err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
+					&ino);
+		if (unlikely(err)) {
+			AuTraceErr(err);
+			break;
+		}
+
+		err = __put_user(ino, &u->e->ino);
+		if (unlikely(err)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+			break;
+		}
+		u->ul += au_rdu_len(ent.nlen);
+	}
+	si_read_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_rdu_verify(struct aufs_rdu *rdu)
+{
+	AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
+	      "%llu, b%d, 0x%x, g%u}\n",
+	      rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
+	      rdu->blk,
+	      rdu->rent, rdu->shwh, rdu->full,
+	      rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
+	      rdu->cookie.generation);
+
+	if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
+		return 0;
+
+	AuDbg("%u:%u\n",
+	      rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
+	return -EINVAL;
+}
+
+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err, e;
+	struct aufs_rdu rdu;
+	void __user *p = (void __user *)arg;
+
+	err = copy_from_user(&rdu, p, sizeof(rdu));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	err = au_rdu_verify(&rdu);
+	if (unlikely(err))
+		goto out;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+		err = au_rdu(file, &rdu);
+		if (unlikely(err))
+			break;
+
+		e = copy_to_user(p, &rdu, sizeof(rdu));
+		if (unlikely(e)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+		break;
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ino(file, &rdu);
+		break;
+
+	default:
+		/* err = -ENOTTY; */
+		err = -EINVAL;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long err, e;
+	struct aufs_rdu rdu;
+	void __user *p = compat_ptr(arg);
+
+	/* todo: get_user()? */
+	err = copy_from_user(&rdu, p, sizeof(rdu));
+	if (unlikely(err)) {
+		err = -EFAULT;
+		AuTraceErr(err);
+		goto out;
+	}
+	rdu.ent.e = compat_ptr(rdu.ent.ul);
+	err = au_rdu_verify(&rdu);
+	if (unlikely(err))
+		goto out;
+
+	switch (cmd) {
+	case AUFS_CTL_RDU:
+		err = au_rdu(file, &rdu);
+		if (unlikely(err))
+			break;
+
+		rdu.ent.ul = ptr_to_compat(rdu.ent.e);
+		rdu.tail.ul = ptr_to_compat(rdu.tail.e);
+		e = copy_to_user(p, &rdu, sizeof(rdu));
+		if (unlikely(e)) {
+			err = -EFAULT;
+			AuTraceErr(err);
+		}
+		break;
+	case AUFS_CTL_RDU_INO:
+		err = au_rdu_ino(file, &rdu);
+		break;
+
+	default:
+		/* err = -ENOTTY; */
+		err = -EINVAL;
+	}
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+#endif
diff --git a/include/fs/aufs/rwsem.h b/include/fs/aufs/rwsem.h
new file mode 100644
index 000000000..e4711f6f7
--- /dev/null
+++ b/include/fs/aufs/rwsem.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * simple read-write semaphore wrappers
+ */
+
+#ifndef __AUFS_RWSEM_H__
+#define __AUFS_RWSEM_H__
+
+#ifdef __KERNEL__
+
+#include "debug.h"
+
+/* in the futre, the name 'au_rwsem' will be totally gone */
+#define au_rwsem	rw_semaphore
+
+/* to debug easier, do not make them inlined functions */
+#define AuRwMustNoWaiters(rw)	AuDebugOn(rwsem_is_contended(rw))
+/* rwsem_is_locked() is unusable */
+#define AuRwMustReadLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held_type(rw, 1))
+#define AuRwMustWriteLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held_type(rw, 0))
+#define AuRwMustAnyLock(rw)	AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && !lockdep_is_held(rw))
+#define AuRwDestroy(rw)		AuDebugOn(!lockdep_recursing(current) \
+					  && debug_locks \
+					  && lockdep_is_held(rw))
+
+#define au_rw_init(rw)	init_rwsem(rw)
+
+#define au_rw_init_wlock(rw) do {		\
+		au_rw_init(rw);			\
+		down_write(rw);			\
+	} while (0)
+
+#define au_rw_init_wlock_nested(rw, lsc) do {	\
+		au_rw_init(rw);			\
+		down_write_nested(rw, lsc);	\
+	} while (0)
+
+#define au_rw_read_lock(rw)		down_read(rw)
+#define au_rw_read_lock_nested(rw, lsc)	down_read_nested(rw, lsc)
+#define au_rw_read_unlock(rw)		up_read(rw)
+#define au_rw_dgrade_lock(rw)		downgrade_write(rw)
+#define au_rw_write_lock(rw)		down_write(rw)
+#define au_rw_write_lock_nested(rw, lsc) down_write_nested(rw, lsc)
+#define au_rw_write_unlock(rw)		up_write(rw)
+/* why is not _nested version defined? */
+#define au_rw_read_trylock(rw)		down_read_trylock(rw)
+#define au_rw_write_trylock(rw)		down_write_trylock(rw)
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_RWSEM_H__ */
diff --git a/include/fs/aufs/sbinfo.c b/include/fs/aufs/sbinfo.c
new file mode 100644
index 000000000..eaab8f5c3
--- /dev/null
+++ b/include/fs/aufs/sbinfo.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * superblock private data
+ */
+
+#include "aufs.h"
+
+/*
+ * they are necessary regardless sysfs is disabled.
+ */
+void au_si_free(struct kobject *kobj)
+{
+	int i;
+	struct au_sbinfo *sbinfo;
+	char *locked __maybe_unused; /* debug only */
+
+	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
+	for (i = 0; i < AuPlink_NHASH; i++)
+		AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
+	AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
+
+	AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
+	percpu_counter_destroy(&sbinfo->si_ninodes);
+	AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
+	percpu_counter_destroy(&sbinfo->si_nfiles);
+
+	au_rw_write_lock(&sbinfo->si_rwsem);
+	au_br_free(sbinfo);
+	au_rw_write_unlock(&sbinfo->si_rwsem);
+
+	kfree(sbinfo->si_branch);
+	mutex_destroy(&sbinfo->si_xib_mtx);
+	AuRwDestroy(&sbinfo->si_rwsem);
+
+	kfree(sbinfo);
+}
+
+int au_si_alloc(struct super_block *sb)
+{
+	int err, i;
+	struct au_sbinfo *sbinfo;
+
+	err = -ENOMEM;
+	sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
+	if (unlikely(!sbinfo))
+		goto out;
+
+	/* will be reallocated separately */
+	sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
+	if (unlikely(!sbinfo->si_branch))
+		goto out_sbinfo;
+
+	err = sysaufs_si_init(sbinfo);
+	if (unlikely(err))
+		goto out_br;
+
+	au_nwt_init(&sbinfo->si_nowait);
+	au_rw_init_wlock(&sbinfo->si_rwsem);
+
+	percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
+	percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
+
+	sbinfo->si_bbot = -1;
+	sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
+
+	sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
+	sbinfo->si_wbr_create = AuWbrCreate_Def;
+	sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
+	sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
+
+	au_fhsm_init(sbinfo);
+
+	sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
+
+	sbinfo->si_xino_jiffy = jiffies;
+	sbinfo->si_xino_expire
+		= msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
+	mutex_init(&sbinfo->si_xib_mtx);
+	sbinfo->si_xino_brid = -1;
+	/* leave si_xib_last_pindex and si_xib_next_bit */
+
+	INIT_HLIST_BL_HEAD(&sbinfo->si_aopen);
+
+	sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
+	sbinfo->si_rdblk = AUFS_RDBLK_DEF;
+	sbinfo->si_rdhash = AUFS_RDHASH_DEF;
+	sbinfo->si_dirwh = AUFS_DIRWH_DEF;
+
+	for (i = 0; i < AuPlink_NHASH; i++)
+		INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
+	init_waitqueue_head(&sbinfo->si_plink_wq);
+	spin_lock_init(&sbinfo->si_plink_maint_lock);
+
+	INIT_HLIST_BL_HEAD(&sbinfo->si_files);
+
+	/* with getattr by default */
+	sbinfo->si_iop_array = aufs_iop;
+
+	/* leave other members for sysaufs and si_mnt. */
+	sbinfo->si_sb = sb;
+	sb->s_fs_info = sbinfo;
+	si_pid_set(sb);
+	return 0; /* success */
+
+out_br:
+	kfree(sbinfo->si_branch);
+out_sbinfo:
+	kfree(sbinfo);
+out:
+	return err;
+}
+
+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
+{
+	int err, sz;
+	struct au_branch **brp;
+
+	AuRwMustWriteLock(&sbinfo->si_rwsem);
+
+	err = -ENOMEM;
+	sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
+	if (unlikely(!sz))
+		sz = sizeof(*brp);
+	brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
+			   may_shrink);
+	if (brp) {
+		sbinfo->si_branch = brp;
+		err = 0;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int au_sigen_inc(struct super_block *sb)
+{
+	unsigned int gen;
+	struct inode *inode;
+
+	SiMustWriteLock(sb);
+
+	gen = ++au_sbi(sb)->si_generation;
+	au_update_digen(sb->s_root);
+	inode = d_inode(sb->s_root);
+	au_update_iigen(inode, /*half*/0);
+	inode->i_version++;
+	return gen;
+}
+
+aufs_bindex_t au_new_br_id(struct super_block *sb)
+{
+	aufs_bindex_t br_id;
+	int i;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
+		br_id = ++sbinfo->si_last_br_id;
+		AuDebugOn(br_id < 0);
+		if (br_id && au_br_index(sb, br_id) < 0)
+			return br_id;
+	}
+
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
+int si_read_lock(struct super_block *sb, int flags)
+{
+	int err;
+
+	err = 0;
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+
+	si_noflush_read_lock(sb);
+	err = au_plink_maint(sb, flags);
+	if (unlikely(err))
+		si_read_unlock(sb);
+
+	return err;
+}
+
+int si_write_lock(struct super_block *sb, int flags)
+{
+	int err;
+
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+
+	si_noflush_write_lock(sb);
+	err = au_plink_maint(sb, flags);
+	if (unlikely(err))
+		si_write_unlock(sb);
+
+	return err;
+}
+
+/* dentry and super_block lock. call at entry point */
+int aufs_read_lock(struct dentry *dentry, int flags)
+{
+	int err;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	err = si_read_lock(sb, flags);
+	if (unlikely(err))
+		goto out;
+
+	if (au_ftest_lock(flags, DW))
+		di_write_lock_child(dentry);
+	else
+		di_read_lock_child(dentry, flags);
+
+	if (au_ftest_lock(flags, GEN)) {
+		err = au_digen_test(dentry, au_sigen(sb));
+		if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
+			AuDebugOn(!err && au_dbrange_test(dentry));
+		else if (!err)
+			err = au_dbrange_test(dentry);
+		if (unlikely(err))
+			aufs_read_unlock(dentry, flags);
+	}
+
+out:
+	return err;
+}
+
+void aufs_read_unlock(struct dentry *dentry, int flags)
+{
+	if (au_ftest_lock(flags, DW))
+		di_write_unlock(dentry);
+	else
+		di_read_unlock(dentry, flags);
+	si_read_unlock(dentry->d_sb);
+}
+
+void aufs_write_lock(struct dentry *dentry)
+{
+	si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
+	di_write_lock_child(dentry);
+}
+
+void aufs_write_unlock(struct dentry *dentry)
+{
+	di_write_unlock(dentry);
+	si_write_unlock(dentry->d_sb);
+}
+
+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
+{
+	int err;
+	unsigned int sigen;
+	struct super_block *sb;
+
+	sb = d1->d_sb;
+	err = si_read_lock(sb, flags);
+	if (unlikely(err))
+		goto out;
+
+	di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
+
+	if (au_ftest_lock(flags, GEN)) {
+		sigen = au_sigen(sb);
+		err = au_digen_test(d1, sigen);
+		AuDebugOn(!err && au_dbrange_test(d1));
+		if (!err) {
+			err = au_digen_test(d2, sigen);
+			AuDebugOn(!err && au_dbrange_test(d2));
+		}
+		if (unlikely(err))
+			aufs_read_and_write_unlock2(d1, d2);
+	}
+
+out:
+	return err;
+}
+
+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
+{
+	di_write_unlock2(d1, d2);
+	si_read_unlock(d1->d_sb);
+}
diff --git a/include/fs/aufs/super.c b/include/fs/aufs/super.c
new file mode 100644
index 000000000..5652497d6
--- /dev/null
+++ b/include/fs/aufs/super.c
@@ -0,0 +1,1046 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * mount and super_block operations
+ */
+
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/vmalloc.h>
+#include "aufs.h"
+
+/*
+ * super_operations
+ */
+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
+{
+	struct au_icntnr *c;
+
+	c = au_cache_alloc_icntnr();
+	if (c) {
+		au_icntnr_init(c);
+		c->vfs_inode.i_version = 1; /* sigen(sb); */
+		c->iinfo.ii_hinode = NULL;
+		return &c->vfs_inode;
+	}
+	return NULL;
+}
+
+static void aufs_destroy_inode_cb(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
+}
+
+static void aufs_destroy_inode(struct inode *inode)
+{
+	if (!au_is_bad_inode(inode))
+		au_iinfo_fin(inode);
+	call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
+}
+
+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
+{
+	struct inode *inode;
+	int err;
+
+	inode = iget_locked(sb, ino);
+	if (unlikely(!inode)) {
+		inode = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	if (!(inode->i_state & I_NEW))
+		goto out;
+
+	err = au_xigen_new(inode);
+	if (!err)
+		err = au_iinfo_init(inode);
+	if (!err)
+		inode->i_version++;
+	else {
+		iget_failed(inode);
+		inode = ERR_PTR(err);
+	}
+
+out:
+	/* never return NULL */
+	AuDebugOn(!inode);
+	AuTraceErrPtr(inode);
+	return inode;
+}
+
+/* lock free root dinfo */
+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	struct path path;
+	struct au_hdentry *hdp;
+	struct au_branch *br;
+	au_br_perm_str_t perm;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	bindex = 0;
+	hdp = au_hdentry(au_di(sb->s_root), bindex);
+	for (; !err && bindex <= bbot; bindex++, hdp++) {
+		br = au_sbr(sb, bindex);
+		path.mnt = au_br_mnt(br);
+		path.dentry = hdp->hd_dentry;
+		err = au_seq_path(seq, &path);
+		if (!err) {
+			au_optstr_br_perm(&perm, br->br_perm);
+			seq_printf(seq, "=%s", perm.a);
+			if (bindex != bbot)
+				seq_putc(seq, ':');
+		}
+	}
+	if (unlikely(err || seq_has_overflowed(seq)))
+		err = -E2BIG;
+
+	return err;
+}
+
+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
+		       const char *append)
+{
+	char *p;
+
+	p = fmt;
+	while (*pat != ':')
+		*p++ = *pat++;
+	*p++ = *pat++;
+	strcpy(p, append);
+	AuDebugOn(strlen(fmt) >= len);
+}
+
+static void au_show_wbr_create(struct seq_file *m, int v,
+			       struct au_sbinfo *sbinfo)
+{
+	const char *pat;
+	char fmt[32];
+	struct au_wbr_mfs *mfs;
+
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+
+	seq_puts(m, ",create=");
+	pat = au_optstr_wbr_create(v);
+	mfs = &sbinfo->si_wbr_mfs;
+	switch (v) {
+	case AuWbrCreate_TDP:
+	case AuWbrCreate_RR:
+	case AuWbrCreate_MFS:
+	case AuWbrCreate_PMFS:
+		seq_puts(m, pat);
+		break;
+	case AuWbrCreate_MFSRR:
+	case AuWbrCreate_TDMFS:
+	case AuWbrCreate_PMFSRR:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
+		seq_printf(m, fmt, mfs->mfsrr_watermark);
+		break;
+	case AuWbrCreate_MFSV:
+	case AuWbrCreate_PMFSV:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
+		seq_printf(m, fmt,
+			   jiffies_to_msecs(mfs->mfs_expire)
+			   / MSEC_PER_SEC);
+		break;
+	case AuWbrCreate_MFSRRV:
+	case AuWbrCreate_TDMFSV:
+	case AuWbrCreate_PMFSRRV:
+		au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
+		seq_printf(m, fmt, mfs->mfsrr_watermark,
+			   jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
+{
+#ifdef CONFIG_SYSFS
+	return 0;
+#else
+	int err;
+	const int len = sizeof(AUFS_XINO_FNAME) - 1;
+	aufs_bindex_t bindex, brid;
+	struct qstr *name;
+	struct file *f;
+	struct dentry *d, *h_root;
+
+	AuRwMustAnyLock(&sbinfo->si_rwsem);
+
+	err = 0;
+	f = au_sbi(sb)->si_xib;
+	if (!f)
+		goto out;
+
+	/* stop printing the default xino path on the first writable branch */
+	h_root = NULL;
+	brid = au_xino_brid(sb);
+	if (brid >= 0) {
+		bindex = au_br_index(sb, brid);
+		h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
+	}
+	d = f->f_path.dentry;
+	name = &d->d_name;
+	/* safe ->d_parent because the file is unlinked */
+	if (d->d_parent == h_root
+	    && name->len == len
+	    && !memcmp(name->name, AUFS_XINO_FNAME, len))
+		goto out;
+
+	seq_puts(seq, ",xino=");
+	err = au_xino_path(seq, f);
+
+out:
+	return err;
+#endif
+}
+
+/* seq_file will re-call me in case of too long string */
+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
+{
+	int err;
+	unsigned int mnt_flags, v;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+#define AuBool(name, str) do { \
+	v = au_opt_test(mnt_flags, name); \
+	if (v != au_opt_test(AuOpt_Def, name)) \
+		seq_printf(m, ",%s" #str, v ? "" : "no"); \
+} while (0)
+
+#define AuStr(name, str) do { \
+	v = mnt_flags & AuOptMask_##name; \
+	if (v != (AuOpt_Def & AuOptMask_##name)) \
+		seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
+} while (0)
+
+#define AuUInt(name, str, val) do { \
+	if (val != AUFS_##name##_DEF) \
+		seq_printf(m, "," #str "=%u", val); \
+} while (0)
+
+	sb = dentry->d_sb;
+	if (sb->s_flags & SB_POSIXACL)
+		seq_puts(m, ",acl");
+
+	/* lock free root dinfo */
+	si_noflush_read_lock(sb);
+	sbinfo = au_sbi(sb);
+	seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
+
+	mnt_flags = au_mntflags(sb);
+	if (au_opt_test(mnt_flags, XINO)) {
+		err = au_show_xino(m, sb);
+		if (unlikely(err))
+			goto out;
+	} else
+		seq_puts(m, ",noxino");
+
+	AuBool(TRUNC_XINO, trunc_xino);
+	AuStr(UDBA, udba);
+	AuBool(SHWH, shwh);
+	AuBool(PLINK, plink);
+	AuBool(DIO, dio);
+	AuBool(DIRPERM1, dirperm1);
+
+	v = sbinfo->si_wbr_create;
+	if (v != AuWbrCreate_Def)
+		au_show_wbr_create(m, v, sbinfo);
+
+	v = sbinfo->si_wbr_copyup;
+	if (v != AuWbrCopyup_Def)
+		seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
+
+	v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
+	if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
+		seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
+
+	AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
+
+	v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
+	AuUInt(RDCACHE, rdcache, v);
+
+	AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
+	AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
+
+	au_fhsm_show(m, sbinfo);
+
+	AuBool(DIRREN, dirren);
+	AuBool(SUM, sum);
+	/* AuBool(SUM_W, wsum); */
+	AuBool(WARN_PERM, warn_perm);
+	AuBool(VERBOSE, verbose);
+
+out:
+	/* be sure to print "br:" last */
+	if (!sysaufs_brs) {
+		seq_puts(m, ",br:");
+		au_show_brs(m, sb);
+	}
+	si_read_unlock(sb);
+	return 0;
+
+#undef AuBool
+#undef AuStr
+#undef AuUInt
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* sum mode which returns the summation for statfs(2) */
+
+static u64 au_add_till_max(u64 a, u64 b)
+{
+	u64 old;
+
+	old = a;
+	a += b;
+	if (old <= a)
+		return a;
+	return ULLONG_MAX;
+}
+
+static u64 au_mul_till_max(u64 a, long mul)
+{
+	u64 old;
+
+	old = a;
+	a *= mul;
+	if (old <= a)
+		return a;
+	return ULLONG_MAX;
+}
+
+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
+{
+	int err;
+	long bsize, factor;
+	u64 blocks, bfree, bavail, files, ffree;
+	aufs_bindex_t bbot, bindex, i;
+	unsigned char shared;
+	struct path h_path;
+	struct super_block *h_sb;
+
+	err = 0;
+	bsize = LONG_MAX;
+	files = 0;
+	ffree = 0;
+	blocks = 0;
+	bfree = 0;
+	bavail = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		h_path.mnt = au_sbr_mnt(sb, bindex);
+		h_sb = h_path.mnt->mnt_sb;
+		shared = 0;
+		for (i = 0; !shared && i < bindex; i++)
+			shared = (au_sbr_sb(sb, i) == h_sb);
+		if (shared)
+			continue;
+
+		/* sb->s_root for NFS is unreliable */
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, buf);
+		if (unlikely(err))
+			goto out;
+
+		if (bsize > buf->f_bsize) {
+			/*
+			 * we will reduce bsize, so we have to expand blocks
+			 * etc. to match them again
+			 */
+			factor = (bsize / buf->f_bsize);
+			blocks = au_mul_till_max(blocks, factor);
+			bfree = au_mul_till_max(bfree, factor);
+			bavail = au_mul_till_max(bavail, factor);
+			bsize = buf->f_bsize;
+		}
+
+		factor = (buf->f_bsize / bsize);
+		blocks = au_add_till_max(blocks,
+				au_mul_till_max(buf->f_blocks, factor));
+		bfree = au_add_till_max(bfree,
+				au_mul_till_max(buf->f_bfree, factor));
+		bavail = au_add_till_max(bavail,
+				au_mul_till_max(buf->f_bavail, factor));
+		files = au_add_till_max(files, buf->f_files);
+		ffree = au_add_till_max(ffree, buf->f_ffree);
+	}
+
+	buf->f_bsize = bsize;
+	buf->f_blocks = blocks;
+	buf->f_bfree = bfree;
+	buf->f_bavail = bavail;
+	buf->f_files = files;
+	buf->f_ffree = ffree;
+	buf->f_frsize = 0;
+
+out:
+	return err;
+}
+
+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int err;
+	struct path h_path;
+	struct super_block *sb;
+
+	/* lock free root dinfo */
+	sb = dentry->d_sb;
+	si_noflush_read_lock(sb);
+	if (!au_opt_test(au_mntflags(sb), SUM)) {
+		/* sb->s_root for NFS is unreliable */
+		h_path.mnt = au_sbr_mnt(sb, 0);
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, buf);
+	} else
+		err = au_statfs_sum(sb, buf);
+	si_read_unlock(sb);
+
+	if (!err) {
+		buf->f_type = AUFS_SUPER_MAGIC;
+		buf->f_namelen = AUFS_MAX_NAMELEN;
+		memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
+	}
+	/* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int aufs_sync_fs(struct super_block *sb, int wait)
+{
+	int err, e;
+	aufs_bindex_t bbot, bindex;
+	struct au_branch *br;
+	struct super_block *h_sb;
+
+	err = 0;
+	si_noflush_read_lock(sb);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!au_br_writable(br->br_perm))
+			continue;
+
+		h_sb = au_sbr_sb(sb, bindex);
+		e = vfsub_sync_filesystem(h_sb, wait);
+		if (unlikely(e && !err))
+			err = e;
+		/* go on even if an error happens */
+	}
+	si_read_unlock(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* final actions when unmounting a file system */
+static void aufs_put_super(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = au_sbi(sb);
+	if (!sbinfo)
+		return;
+
+	dbgaufs_si_fin(sbinfo);
+	kobject_put(&sbinfo->si_kobj);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
+		     struct super_block *sb, void *arg)
+{
+	void *array;
+	unsigned long long n, sz;
+
+	array = NULL;
+	n = 0;
+	if (!*hint)
+		goto out;
+
+	if (*hint > ULLONG_MAX / sizeof(array)) {
+		array = ERR_PTR(-EMFILE);
+		pr_err("hint %llu\n", *hint);
+		goto out;
+	}
+
+	sz = sizeof(array) * *hint;
+	array = kzalloc(sz, GFP_NOFS);
+	if (unlikely(!array))
+		array = vzalloc(sz);
+	if (unlikely(!array)) {
+		array = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	n = cb(sb, array, *hint, arg);
+	AuDebugOn(n > *hint);
+
+out:
+	*hint = n;
+	return array;
+}
+
+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
+				       unsigned long long max __maybe_unused,
+				       void *arg)
+{
+	unsigned long long n;
+	struct inode **p, *inode;
+	struct list_head *head;
+
+	n = 0;
+	p = a;
+	head = arg;
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, head, i_sb_list) {
+		if (!au_is_bad_inode(inode)
+		    && au_ii(inode)->ii_btop >= 0) {
+			spin_lock(&inode->i_lock);
+			if (atomic_read(&inode->i_count)) {
+				au_igrab(inode);
+				*p++ = inode;
+				n++;
+				AuDebugOn(n > max);
+			}
+			spin_unlock(&inode->i_lock);
+		}
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+
+	return n;
+}
+
+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
+{
+	*max = au_ninodes(sb);
+	return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
+}
+
+void au_iarray_free(struct inode **a, unsigned long long max)
+{
+	unsigned long long ull;
+
+	for (ull = 0; ull < max; ull++)
+		iput(a[ull]);
+	kvfree(a);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * refresh dentry and inode at remount time.
+ */
+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
+		      struct dentry *parent)
+{
+	int err;
+
+	di_write_lock_child(dentry);
+	di_read_lock_parent(parent, AuLock_IR);
+	err = au_refresh_dentry(dentry, parent);
+	if (!err && dir_flags)
+		au_hn_reset(d_inode(dentry), dir_flags);
+	di_read_unlock(parent, AuLock_IR);
+	di_write_unlock(dentry);
+
+	return err;
+}
+
+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
+			   struct au_sbinfo *sbinfo,
+			   const unsigned int dir_flags, unsigned int do_idop)
+{
+	int err;
+	struct dentry *parent;
+
+	err = 0;
+	parent = dget_parent(dentry);
+	if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
+		if (d_really_is_positive(dentry)) {
+			if (!d_is_dir(dentry))
+				err = au_do_refresh(dentry, /*dir_flags*/0,
+						 parent);
+			else {
+				err = au_do_refresh(dentry, dir_flags, parent);
+				if (unlikely(err))
+					au_fset_si(sbinfo, FAILED_REFRESH_DIR);
+			}
+		} else
+			err = au_do_refresh(dentry, /*dir_flags*/0, parent);
+		AuDbgDentry(dentry);
+	}
+	dput(parent);
+
+	if (!err) {
+		if (do_idop)
+			au_refresh_dop(dentry, /*force_reval*/0);
+	} else
+		au_refresh_dop(dentry, /*force_reval*/1);
+
+	AuTraceErr(err);
+	return err;
+}
+
+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
+{
+	int err, i, j, ndentry, e;
+	unsigned int sigen;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries, *d;
+	struct au_sbinfo *sbinfo;
+	struct dentry *root = sb->s_root;
+	const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
+
+	if (do_idop)
+		au_refresh_dop(root, /*force_reval*/0);
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_dcsub_pages(&dpages, root, NULL, NULL);
+	if (unlikely(err))
+		goto out_dpages;
+
+	sigen = au_sigen(sb);
+	sbinfo = au_sbi(sb);
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			d = dentries[j];
+			e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
+					    do_idop);
+			if (unlikely(e && !err))
+				err = e;
+			/* go on even err */
+		}
+	}
+
+out_dpages:
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
+{
+	int err, e;
+	unsigned int sigen;
+	unsigned long long max, ull;
+	struct inode *inode, **array;
+
+	array = au_iarray_alloc(sb, &max);
+	err = PTR_ERR(array);
+	if (IS_ERR(array))
+		goto out;
+
+	err = 0;
+	sigen = au_sigen(sb);
+	for (ull = 0; ull < max; ull++) {
+		inode = array[ull];
+		if (unlikely(!inode))
+			break;
+
+		e = 0;
+		ii_write_lock_child(inode);
+		if (au_iigen(inode, NULL) != sigen) {
+			e = au_refresh_hinode_self(inode);
+			if (unlikely(e)) {
+				au_refresh_iop(inode, /*force_getattr*/1);
+				pr_err("error %d, i%lu\n", e, inode->i_ino);
+				if (!err)
+					err = e;
+				/* go on even if err */
+			}
+		}
+		if (!e && do_idop)
+			au_refresh_iop(inode, /*force_getattr*/0);
+		ii_write_unlock(inode);
+	}
+
+	au_iarray_free(array, max);
+
+out:
+	return err;
+}
+
+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
+{
+	int err, e;
+	unsigned int udba;
+	aufs_bindex_t bindex, bbot;
+	struct dentry *root;
+	struct inode *inode;
+	struct au_branch *br;
+	struct au_sbinfo *sbi;
+
+	au_sigen_inc(sb);
+	sbi = au_sbi(sb);
+	au_fclr_si(sbi, FAILED_REFRESH_DIR);
+
+	root = sb->s_root;
+	DiMustNoWaiters(root);
+	inode = d_inode(root);
+	IiMustNoWaiters(inode);
+
+	udba = au_opt_udba(sb);
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		err = au_hnotify_reset_br(udba, br, br->br_perm);
+		if (unlikely(err))
+			AuIOErr("hnotify failed on br %d, %d, ignored\n",
+				bindex, err);
+		/* go on even if err */
+	}
+	au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
+
+	if (do_idop) {
+		if (au_ftest_si(sbi, NO_DREVAL)) {
+			AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
+			sb->s_d_op = &aufs_dop_noreval;
+			AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
+			sbi->si_iop_array = aufs_iop_nogetattr;
+		} else {
+			AuDebugOn(sb->s_d_op == &aufs_dop);
+			sb->s_d_op = &aufs_dop;
+			AuDebugOn(sbi->si_iop_array == aufs_iop);
+			sbi->si_iop_array = aufs_iop;
+		}
+		pr_info("reset to %pf and %pf\n",
+			sb->s_d_op, sbi->si_iop_array);
+	}
+
+	di_write_unlock(root);
+	err = au_refresh_d(sb, do_idop);
+	e = au_refresh_i(sb, do_idop);
+	if (unlikely(e && !err))
+		err = e;
+	/* aufs_write_lock() calls ..._child() */
+	di_write_lock_child(root);
+
+	au_cpup_attr_all(inode, /*force*/1);
+
+	if (unlikely(err))
+		AuIOErr("refresh failed, ignored, %d\n", err);
+}
+
+/* stop extra interpretation of errno in mount(8), and strange error messages */
+static int cvt_err(int err)
+{
+	AuTraceErr(err);
+
+	switch (err) {
+	case -ENOENT:
+	case -ENOTDIR:
+	case -EEXIST:
+	case -EIO:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	int err, do_dx;
+	unsigned int mntflags;
+	struct au_opts opts = {
+		.opt = NULL
+	};
+	struct dentry *root;
+	struct inode *inode;
+	struct au_sbinfo *sbinfo;
+
+	err = 0;
+	root = sb->s_root;
+	if (!data || !*data) {
+		err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (!err) {
+			di_write_lock_child(root);
+			err = au_opts_verify(sb, *flags, /*pending*/0);
+			aufs_write_unlock(root);
+		}
+		goto out;
+	}
+
+	err = -ENOMEM;
+	opts.opt = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!opts.opt))
+		goto out;
+	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
+	opts.flags = AuOpts_REMOUNT;
+	opts.sb_flags = *flags;
+
+	/* parse it before aufs lock */
+	err = au_opts_parse(sb, data, &opts);
+	if (unlikely(err))
+		goto out_opts;
+
+	sbinfo = au_sbi(sb);
+	inode = d_inode(root);
+	inode_lock(inode);
+	err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+	if (unlikely(err))
+		goto out_mtx;
+	di_write_lock_child(root);
+
+	/* au_opts_remount() may return an error */
+	err = au_opts_remount(sb, &opts);
+	au_opts_free(&opts);
+
+	if (au_ftest_opts(opts.flags, REFRESH))
+		au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
+
+	if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
+		mntflags = au_mntflags(sb);
+		do_dx = !!au_opt_test(mntflags, DIO);
+		au_dy_arefresh(do_dx);
+	}
+
+	au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
+	aufs_write_unlock(root);
+
+out_mtx:
+	inode_unlock(inode);
+out_opts:
+	free_page((unsigned long)opts.opt);
+out:
+	err = cvt_err(err);
+	AuTraceErr(err);
+	return err;
+}
+
+static const struct super_operations aufs_sop = {
+	.alloc_inode	= aufs_alloc_inode,
+	.destroy_inode	= aufs_destroy_inode,
+	/* always deleting, no clearing */
+	.drop_inode	= generic_delete_inode,
+	.show_options	= aufs_show_options,
+	.statfs		= aufs_statfs,
+	.put_super	= aufs_put_super,
+	.sync_fs	= aufs_sync_fs,
+	.remount_fs	= aufs_remount_fs
+};
+
+/* ---------------------------------------------------------------------- */
+
+static int alloc_root(struct super_block *sb)
+{
+	int err;
+	struct inode *inode;
+	struct dentry *root;
+
+	err = -ENOMEM;
+	inode = au_iget_locked(sb, AUFS_ROOT_INO);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out;
+
+	inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
+	inode->i_fop = &aufs_dir_fop;
+	inode->i_mode = S_IFDIR;
+	set_nlink(inode, 2);
+	unlock_new_inode(inode);
+
+	root = d_make_root(inode);
+	if (unlikely(!root))
+		goto out;
+	err = PTR_ERR(root);
+	if (IS_ERR(root))
+		goto out;
+
+	err = au_di_init(root);
+	if (!err) {
+		sb->s_root = root;
+		return 0; /* success */
+	}
+	dput(root);
+
+out:
+	return err;
+}
+
+static int aufs_fill_super(struct super_block *sb, void *raw_data,
+			   int silent __maybe_unused)
+{
+	int err;
+	struct au_opts opts = {
+		.opt = NULL
+	};
+	struct au_sbinfo *sbinfo;
+	struct dentry *root;
+	struct inode *inode;
+	char *arg = raw_data;
+
+	if (unlikely(!arg || !*arg)) {
+		err = -EINVAL;
+		pr_err("no arg\n");
+		goto out;
+	}
+
+	err = -ENOMEM;
+	opts.opt = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!opts.opt))
+		goto out;
+	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
+	opts.sb_flags = sb->s_flags;
+
+	err = au_si_alloc(sb);
+	if (unlikely(err))
+		goto out_opts;
+	sbinfo = au_sbi(sb);
+
+	/* all timestamps always follow the ones on the branch */
+	sb->s_flags |= SB_NOATIME | SB_NODIRATIME;
+	sb->s_op = &aufs_sop;
+	sb->s_d_op = &aufs_dop;
+	sb->s_magic = AUFS_SUPER_MAGIC;
+	sb->s_maxbytes = 0;
+	sb->s_stack_depth = 1;
+	au_export_init(sb);
+	au_xattr_init(sb);
+
+	err = alloc_root(sb);
+	if (unlikely(err)) {
+		si_write_unlock(sb);
+		goto out_info;
+	}
+	root = sb->s_root;
+	inode = d_inode(root);
+
+	/*
+	 * actually we can parse options regardless aufs lock here.
+	 * but at remount time, parsing must be done before aufs lock.
+	 * so we follow the same rule.
+	 */
+	ii_write_lock_parent(inode);
+	aufs_write_unlock(root);
+	err = au_opts_parse(sb, arg, &opts);
+	if (unlikely(err))
+		goto out_root;
+
+	/* lock vfs_inode first, then aufs. */
+	inode_lock(inode);
+	aufs_write_lock(root);
+	err = au_opts_mount(sb, &opts);
+	au_opts_free(&opts);
+	if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
+		sb->s_d_op = &aufs_dop_noreval;
+		pr_info("%pf\n", sb->s_d_op);
+		au_refresh_dop(root, /*force_reval*/0);
+		sbinfo->si_iop_array = aufs_iop_nogetattr;
+		au_refresh_iop(inode, /*force_getattr*/0);
+	}
+	aufs_write_unlock(root);
+	inode_unlock(inode);
+	if (!err)
+		goto out_opts; /* success */
+
+out_root:
+	dput(root);
+	sb->s_root = NULL;
+out_info:
+	dbgaufs_si_fin(sbinfo);
+	kobject_put(&sbinfo->si_kobj);
+	sb->s_fs_info = NULL;
+out_opts:
+	free_page((unsigned long)opts.opt);
+out:
+	AuTraceErr(err);
+	err = cvt_err(err);
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
+				 const char *dev_name __maybe_unused,
+				 void *raw_data)
+{
+	struct dentry *root;
+	struct super_block *sb;
+
+	/* all timestamps always follow the ones on the branch */
+	/* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
+	root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
+	if (IS_ERR(root))
+		goto out;
+
+	sb = root->d_sb;
+	si_write_lock(sb, !AuLock_FLUSH);
+	sysaufs_brs_add(sb, 0);
+	si_write_unlock(sb);
+	au_sbilist_add(sb);
+
+out:
+	return root;
+}
+
+static void aufs_kill_sb(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	sbinfo = au_sbi(sb);
+	if (sbinfo) {
+		au_sbilist_del(sb);
+		aufs_write_lock(sb->s_root);
+		au_fhsm_fin(sb);
+		if (sbinfo->si_wbr_create_ops->fin)
+			sbinfo->si_wbr_create_ops->fin(sb);
+		if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
+			au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
+			au_remount_refresh(sb, /*do_idop*/0);
+		}
+		if (au_opt_test(sbinfo->si_mntflags, PLINK))
+			au_plink_put(sb, /*verbose*/1);
+		au_xino_clr(sb);
+		au_dr_opt_flush(sb);
+		sbinfo->si_sb = NULL;
+		aufs_write_unlock(sb->s_root);
+		au_nwt_flush(&sbinfo->si_nowait);
+	}
+	kill_anon_super(sb);
+}
+
+struct file_system_type aufs_fs_type = {
+	.name		= AUFS_FSTYPE,
+	/* a race between rename and others */
+	.fs_flags	= FS_RENAME_DOES_D_MOVE,
+	.mount		= aufs_mount,
+	.kill_sb	= aufs_kill_sb,
+	/* no need to __module_get() and module_put(). */
+	.owner		= THIS_MODULE,
+};
diff --git a/include/fs/aufs/super.h b/include/fs/aufs/super.h
new file mode 100644
index 000000000..e6da60317
--- /dev/null
+++ b/include/fs/aufs/super.h
@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * super_block operations
+ */
+
+#ifndef __AUFS_SUPER_H__
+#define __AUFS_SUPER_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include "hbl.h"
+#include "rwsem.h"
+#include "wkq.h"
+
+/* policies to select one among multiple writable branches */
+struct au_wbr_copyup_operations {
+	int (*copyup)(struct dentry *dentry);
+};
+
+#define AuWbr_DIR	1		/* target is a dir */
+#define AuWbr_PARENT	(1 << 1)	/* always require a parent */
+
+#define au_ftest_wbr(flags, name)	((flags) & AuWbr_##name)
+#define au_fset_wbr(flags, name)	{ (flags) |= AuWbr_##name; }
+#define au_fclr_wbr(flags, name)	{ (flags) &= ~AuWbr_##name; }
+
+struct au_wbr_create_operations {
+	int (*create)(struct dentry *dentry, unsigned int flags);
+	int (*init)(struct super_block *sb);
+	int (*fin)(struct super_block *sb);
+};
+
+struct au_wbr_mfs {
+	struct mutex	mfs_lock; /* protect this structure */
+	unsigned long	mfs_jiffy;
+	unsigned long	mfs_expire;
+	aufs_bindex_t	mfs_bindex;
+
+	unsigned long long	mfsrr_bytes;
+	unsigned long long	mfsrr_watermark;
+};
+
+#define AuPlink_NHASH 100
+static inline int au_plink_hash(ino_t ino)
+{
+	return ino % AuPlink_NHASH;
+}
+
+/* File-based Hierarchical Storage Management */
+struct au_fhsm {
+#ifdef CONFIG_AUFS_FHSM
+	/* allow only one process who can receive the notification */
+	spinlock_t		fhsm_spin;
+	pid_t			fhsm_pid;
+	wait_queue_head_t	fhsm_wqh;
+	atomic_t		fhsm_readable;
+
+	/* these are protected by si_rwsem */
+	unsigned long		fhsm_expire;
+	aufs_bindex_t		fhsm_bottom;
+#endif
+};
+
+struct au_branch;
+struct au_sbinfo {
+	/* nowait tasks in the system-wide workqueue */
+	struct au_nowait_tasks	si_nowait;
+
+	/*
+	 * tried sb->s_umount, but failed due to the dependecy between i_mutex.
+	 * rwsem for au_sbinfo is necessary.
+	 */
+	struct au_rwsem		si_rwsem;
+
+	/*
+	 * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
+	 * remount.
+	 */
+	struct percpu_counter	si_ninodes, si_nfiles;
+
+	/* branch management */
+	unsigned int		si_generation;
+
+	/* see AuSi_ flags */
+	unsigned char		au_si_status;
+
+	aufs_bindex_t		si_bbot;
+
+	/* dirty trick to keep br_id plus */
+	unsigned int		si_last_br_id :
+				sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
+	struct au_branch	**si_branch;
+
+	/* policy to select a writable branch */
+	unsigned char		si_wbr_copyup;
+	unsigned char		si_wbr_create;
+	struct au_wbr_copyup_operations *si_wbr_copyup_ops;
+	struct au_wbr_create_operations *si_wbr_create_ops;
+
+	/* round robin */
+	atomic_t		si_wbr_rr_next;
+
+	/* most free space */
+	struct au_wbr_mfs	si_wbr_mfs;
+
+	/* File-based Hierarchical Storage Management */
+	struct au_fhsm		si_fhsm;
+
+	/* mount flags */
+	/* include/asm-ia64/siginfo.h defines a macro named si_flags */
+	unsigned int		si_mntflags;
+
+	/* external inode number (bitmap and translation table) */
+	vfs_readf_t		si_xread;
+	vfs_writef_t		si_xwrite;
+	struct file		*si_xib;
+	struct mutex		si_xib_mtx; /* protect xib members */
+	unsigned long		*si_xib_buf;
+	unsigned long		si_xib_last_pindex;
+	int			si_xib_next_bit;
+	aufs_bindex_t		si_xino_brid;
+	unsigned long		si_xino_jiffy;
+	unsigned long		si_xino_expire;
+	/* reserved for future use */
+	/* unsigned long long	si_xib_limit; */	/* Max xib file size */
+
+#ifdef CONFIG_AUFS_EXPORT
+	/* i_generation */
+	struct file		*si_xigen;
+	atomic_t		si_xigen_next;
+#endif
+
+	/* dirty trick to suppoer atomic_open */
+	struct hlist_bl_head	si_aopen;
+
+	/* vdir parameters */
+	unsigned long		si_rdcache;	/* max cache time in jiffies */
+	unsigned int		si_rdblk;	/* deblk size */
+	unsigned int		si_rdhash;	/* hash size */
+
+	/*
+	 * If the number of whiteouts are larger than si_dirwh, leave all of
+	 * them after au_whtmp_ren to reduce the cost of rmdir(2).
+	 * future fsck.aufs or kernel thread will remove them later.
+	 * Otherwise, remove all whiteouts and the dir in rmdir(2).
+	 */
+	unsigned int		si_dirwh;
+
+	/* pseudo_link list */
+	struct hlist_bl_head	si_plink[AuPlink_NHASH];
+	wait_queue_head_t	si_plink_wq;
+	spinlock_t		si_plink_maint_lock;
+	pid_t			si_plink_maint_pid;
+
+	/* file list */
+	struct hlist_bl_head	si_files;
+
+	/* with/without getattr, brother of sb->s_d_op */
+	struct inode_operations *si_iop_array;
+
+	/*
+	 * sysfs and lifetime management.
+	 * this is not a small structure and it may be a waste of memory in case
+	 * of sysfs is disabled, particulary when many aufs-es are mounted.
+	 * but using sysfs is majority.
+	 */
+	struct kobject		si_kobj;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		 *si_dbgaufs;
+	struct dentry		 *si_dbgaufs_plink;
+	struct dentry		 *si_dbgaufs_xib;
+#ifdef CONFIG_AUFS_EXPORT
+	struct dentry		 *si_dbgaufs_xigen;
+#endif
+#endif
+
+#ifdef CONFIG_AUFS_SBILIST
+	struct hlist_bl_node	si_list;
+#endif
+
+	/* dirty, necessary for unmounting, sysfs and sysrq */
+	struct super_block	*si_sb;
+};
+
+/* sbinfo status flags */
+/*
+ * set true when refresh_dirs() failed at remount time.
+ * then try refreshing dirs at access time again.
+ * if it is false, refreshing dirs at access time is unnecesary
+ */
+#define AuSi_FAILED_REFRESH_DIR	1
+#define AuSi_FHSM		(1 << 1)	/* fhsm is active now */
+#define AuSi_NO_DREVAL		(1 << 2)	/* disable all d_revalidate */
+
+#ifndef CONFIG_AUFS_FHSM
+#undef AuSi_FHSM
+#define AuSi_FHSM		0
+#endif
+
+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
+					   unsigned int flag)
+{
+	AuRwMustAnyLock(&sbi->si_rwsem);
+	return sbi->au_si_status & flag;
+}
+#define au_ftest_si(sbinfo, name)	au_do_ftest_si(sbinfo, AuSi_##name)
+#define au_fset_si(sbinfo, name) do { \
+	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
+	(sbinfo)->au_si_status |= AuSi_##name; \
+} while (0)
+#define au_fclr_si(sbinfo, name) do { \
+	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
+	(sbinfo)->au_si_status &= ~AuSi_##name; \
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+
+/* policy to select one among writable branches */
+#define AuWbrCopyup(sbinfo, ...) \
+	((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
+#define AuWbrCreate(sbinfo, ...) \
+	((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
+
+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
+#define AuLock_DW		1		/* write-lock dentry */
+#define AuLock_IR		(1 << 1)	/* read-lock inode */
+#define AuLock_IW		(1 << 2)	/* write-lock inode */
+#define AuLock_FLUSH		(1 << 3)	/* wait for 'nowait' tasks */
+#define AuLock_DIRS		(1 << 4)	/* target is a pair of dirs */
+						/* except RENAME_EXCHANGE */
+#define AuLock_NOPLM		(1 << 5)	/* return err in plm mode */
+#define AuLock_NOPLMW		(1 << 6)	/* wait for plm mode ends */
+#define AuLock_GEN		(1 << 7)	/* test digen/iigen */
+#define au_ftest_lock(flags, name)	((flags) & AuLock_##name)
+#define au_fset_lock(flags, name) \
+	do { (flags) |= AuLock_##name; } while (0)
+#define au_fclr_lock(flags, name) \
+	do { (flags) &= ~AuLock_##name; } while (0)
+
+/* ---------------------------------------------------------------------- */
+
+/* super.c */
+extern struct file_system_type aufs_fs_type;
+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
+					   unsigned long long max, void *arg);
+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
+		     struct super_block *sb, void *arg);
+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
+void au_iarray_free(struct inode **a, unsigned long long max);
+
+/* sbinfo.c */
+void au_si_free(struct kobject *kobj);
+int au_si_alloc(struct super_block *sb);
+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
+
+unsigned int au_sigen_inc(struct super_block *sb);
+aufs_bindex_t au_new_br_id(struct super_block *sb);
+
+int si_read_lock(struct super_block *sb, int flags);
+int si_write_lock(struct super_block *sb, int flags);
+int aufs_read_lock(struct dentry *dentry, int flags);
+void aufs_read_unlock(struct dentry *dentry, int flags);
+void aufs_write_lock(struct dentry *dentry);
+void aufs_write_unlock(struct dentry *dentry);
+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
+
+/* wbr_policy.c */
+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
+extern struct au_wbr_create_operations au_wbr_create_ops[];
+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
+
+/* mvdown.c */
+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
+
+#ifdef CONFIG_AUFS_FHSM
+/* fhsm.c */
+
+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
+{
+	pid_t pid;
+
+	spin_lock(&fhsm->fhsm_spin);
+	pid = fhsm->fhsm_pid;
+	spin_unlock(&fhsm->fhsm_spin);
+
+	return pid;
+}
+
+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
+void au_fhsm_wrote_all(struct super_block *sb, int force);
+int au_fhsm_fd(struct super_block *sb, int oflags);
+int au_fhsm_br_alloc(struct au_branch *br);
+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
+void au_fhsm_fin(struct super_block *sb);
+void au_fhsm_init(struct au_sbinfo *sbinfo);
+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
+#else
+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
+	   int force)
+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(au_fhsm_fin, struct super_block *sb)
+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_EXPORT
+int au_test_nfsd(void);
+void au_export_init(struct super_block *sb);
+void au_xigen_inc(struct inode *inode);
+int au_xigen_new(struct inode *inode);
+int au_xigen_set(struct super_block *sb, struct file *base);
+void au_xigen_clr(struct super_block *sb);
+
+static inline int au_busy_or_stale(void)
+{
+	if (!au_test_nfsd())
+		return -EBUSY;
+	return -ESTALE;
+}
+#else
+AuStubInt0(au_test_nfsd, void)
+AuStubVoid(au_export_init, struct super_block *sb)
+AuStubVoid(au_xigen_inc, struct inode *inode)
+AuStubInt0(au_xigen_new, struct inode *inode)
+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
+AuStubVoid(au_xigen_clr, struct super_block *sb)
+AuStub(int, au_busy_or_stale, return -EBUSY, void)
+#endif /* CONFIG_AUFS_EXPORT */
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_SBILIST
+/* module.c */
+extern struct hlist_bl_head au_sbilist;
+
+static inline void au_sbilist_init(void)
+{
+	INIT_HLIST_BL_HEAD(&au_sbilist);
+}
+
+static inline void au_sbilist_add(struct super_block *sb)
+{
+	au_hbl_add(&au_sbi(sb)->si_list, &au_sbilist);
+}
+
+static inline void au_sbilist_del(struct super_block *sb)
+{
+	au_hbl_del(&au_sbi(sb)->si_list, &au_sbilist);
+}
+
+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
+static inline void au_sbilist_lock(void)
+{
+	hlist_bl_lock(&au_sbilist);
+}
+
+static inline void au_sbilist_unlock(void)
+{
+	hlist_bl_unlock(&au_sbilist);
+}
+#define AuGFP_SBILIST	GFP_ATOMIC
+#else
+AuStubVoid(au_sbilist_lock, void)
+AuStubVoid(au_sbilist_unlock, void)
+#define AuGFP_SBILIST	GFP_NOFS
+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
+#else
+AuStubVoid(au_sbilist_init, void)
+AuStubVoid(au_sbilist_add, struct super_block *sb)
+AuStubVoid(au_sbilist_del, struct super_block *sb)
+AuStubVoid(au_sbilist_lock, void)
+AuStubVoid(au_sbilist_unlock, void)
+#define AuGFP_SBILIST	GFP_NOFS
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
+{
+	/*
+	 * This function is a dynamic '__init' function actually,
+	 * so the tiny check for si_rwsem is unnecessary.
+	 */
+	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
+#ifdef CONFIG_DEBUG_FS
+	sbinfo->si_dbgaufs = NULL;
+	sbinfo->si_dbgaufs_plink = NULL;
+	sbinfo->si_dbgaufs_xib = NULL;
+#ifdef CONFIG_AUFS_EXPORT
+	sbinfo->si_dbgaufs_xigen = NULL;
+#endif
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* current->atomic_flags */
+/* this value should never corrupt the ones defined in linux/sched.h */
+#define PFA_AUFS	7
+
+TASK_PFA_TEST(AUFS, test_aufs)	/* task_test_aufs */
+TASK_PFA_SET(AUFS, aufs)	/* task_set_aufs */
+TASK_PFA_CLEAR(AUFS, aufs)	/* task_clear_aufs */
+
+static inline int si_pid_test(struct super_block *sb)
+{
+	return !!task_test_aufs(current);
+}
+
+static inline void si_pid_clr(struct super_block *sb)
+{
+	AuDebugOn(!task_test_aufs(current));
+	task_clear_aufs(current);
+}
+
+static inline void si_pid_set(struct super_block *sb)
+{
+	AuDebugOn(task_test_aufs(current));
+	task_set_aufs(current);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* lock superblock. mainly for entry point functions */
+#define __si_read_lock(sb)	au_rw_read_lock(&au_sbi(sb)->si_rwsem)
+#define __si_write_lock(sb)	au_rw_write_lock(&au_sbi(sb)->si_rwsem)
+#define __si_read_trylock(sb)	au_rw_read_trylock(&au_sbi(sb)->si_rwsem)
+#define __si_write_trylock(sb)	au_rw_write_trylock(&au_sbi(sb)->si_rwsem)
+/*
+#define __si_read_trylock_nested(sb) \
+	au_rw_read_trylock_nested(&au_sbi(sb)->si_rwsem)
+#define __si_write_trylock_nested(sb) \
+	au_rw_write_trylock_nested(&au_sbi(sb)->si_rwsem)
+*/
+
+#define __si_read_unlock(sb)	au_rw_read_unlock(&au_sbi(sb)->si_rwsem)
+#define __si_write_unlock(sb)	au_rw_write_unlock(&au_sbi(sb)->si_rwsem)
+#define __si_downgrade_lock(sb)	au_rw_dgrade_lock(&au_sbi(sb)->si_rwsem)
+
+#define SiMustNoWaiters(sb)	AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
+#define SiMustAnyLock(sb)	AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
+#define SiMustWriteLock(sb)	AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
+
+static inline void si_noflush_read_lock(struct super_block *sb)
+{
+	__si_read_lock(sb);
+	si_pid_set(sb);
+}
+
+static inline int si_noflush_read_trylock(struct super_block *sb)
+{
+	int locked;
+
+	locked = __si_read_trylock(sb);
+	if (locked)
+		si_pid_set(sb);
+	return locked;
+}
+
+static inline void si_noflush_write_lock(struct super_block *sb)
+{
+	__si_write_lock(sb);
+	si_pid_set(sb);
+}
+
+static inline int si_noflush_write_trylock(struct super_block *sb)
+{
+	int locked;
+
+	locked = __si_write_trylock(sb);
+	if (locked)
+		si_pid_set(sb);
+	return locked;
+}
+
+#if 0 /* reserved */
+static inline int si_read_trylock(struct super_block *sb, int flags)
+{
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+	return si_noflush_read_trylock(sb);
+}
+#endif
+
+static inline void si_read_unlock(struct super_block *sb)
+{
+	si_pid_clr(sb);
+	__si_read_unlock(sb);
+}
+
+#if 0 /* reserved */
+static inline int si_write_trylock(struct super_block *sb, int flags)
+{
+	if (au_ftest_lock(flags, FLUSH))
+		au_nwt_flush(&au_sbi(sb)->si_nowait);
+	return si_noflush_write_trylock(sb);
+}
+#endif
+
+static inline void si_write_unlock(struct super_block *sb)
+{
+	si_pid_clr(sb);
+	__si_write_unlock(sb);
+}
+
+#if 0 /* reserved */
+static inline void si_downgrade_lock(struct super_block *sb)
+{
+	__si_downgrade_lock(sb);
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_bbot;
+}
+
+static inline unsigned int au_mntflags(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_mntflags;
+}
+
+static inline unsigned int au_sigen(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_generation;
+}
+
+static inline unsigned long long au_ninodes(struct super_block *sb)
+{
+	s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
+
+	BUG_ON(n < 0);
+	return n;
+}
+
+static inline void au_ninodes_inc(struct super_block *sb)
+{
+	percpu_counter_inc(&au_sbi(sb)->si_ninodes);
+}
+
+static inline void au_ninodes_dec(struct super_block *sb)
+{
+	percpu_counter_dec(&au_sbi(sb)->si_ninodes);
+}
+
+static inline unsigned long long au_nfiles(struct super_block *sb)
+{
+	s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
+
+	BUG_ON(n < 0);
+	return n;
+}
+
+static inline void au_nfiles_inc(struct super_block *sb)
+{
+	percpu_counter_inc(&au_sbi(sb)->si_nfiles);
+}
+
+static inline void au_nfiles_dec(struct super_block *sb)
+{
+	percpu_counter_dec(&au_sbi(sb)->si_nfiles);
+}
+
+static inline struct au_branch *au_sbr(struct super_block *sb,
+				       aufs_bindex_t bindex)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_branch[0 + bindex];
+}
+
+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
+{
+	SiMustWriteLock(sb);
+	au_sbi(sb)->si_xino_brid = brid;
+}
+
+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
+{
+	SiMustAnyLock(sb);
+	return au_sbi(sb)->si_xino_brid;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_SUPER_H__ */
diff --git a/include/fs/aufs/sysaufs.c b/include/fs/aufs/sysaufs.c
new file mode 100644
index 000000000..d87a20d4d
--- /dev/null
+++ b/include/fs/aufs/sysaufs.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface and lifetime management
+ * they are necessary regardless sysfs is disabled.
+ */
+
+#include <linux/random.h>
+#include "aufs.h"
+
+unsigned long sysaufs_si_mask;
+struct kset *sysaufs_kset;
+
+#define AuSiAttr(_name) { \
+	.attr   = { .name = __stringify(_name), .mode = 0444 },	\
+	.show   = sysaufs_si_##_name,				\
+}
+
+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
+struct attribute *sysaufs_si_attrs[] = {
+	&sysaufs_si_attr_xi_path.attr,
+	NULL,
+};
+
+static const struct sysfs_ops au_sbi_ops = {
+	.show   = sysaufs_si_show
+};
+
+static struct kobj_type au_sbi_ktype = {
+	.release	= au_si_free,
+	.sysfs_ops	= &au_sbi_ops,
+	.default_attrs	= sysaufs_si_attrs
+};
+
+/* ---------------------------------------------------------------------- */
+
+int sysaufs_si_init(struct au_sbinfo *sbinfo)
+{
+	int err;
+
+	sbinfo->si_kobj.kset = sysaufs_kset;
+	/* cf. sysaufs_name() */
+	err = kobject_init_and_add
+		(&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
+		 SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
+
+	dbgaufs_si_null(sbinfo);
+	if (!err) {
+		err = dbgaufs_si_init(sbinfo);
+		if (unlikely(err))
+			kobject_put(&sbinfo->si_kobj);
+	}
+	return err;
+}
+
+void sysaufs_fin(void)
+{
+	dbgaufs_fin();
+	sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
+	kset_unregister(sysaufs_kset);
+}
+
+int __init sysaufs_init(void)
+{
+	int err;
+
+	do {
+		get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
+	} while (!sysaufs_si_mask);
+
+	err = -EINVAL;
+	sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
+	if (unlikely(!sysaufs_kset))
+		goto out;
+	err = PTR_ERR(sysaufs_kset);
+	if (IS_ERR(sysaufs_kset))
+		goto out;
+	err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
+	if (unlikely(err)) {
+		kset_unregister(sysaufs_kset);
+		goto out;
+	}
+
+	err = dbgaufs_init();
+	if (unlikely(err))
+		sysaufs_fin();
+out:
+	return err;
+}
diff --git a/include/fs/aufs/sysaufs.h b/include/fs/aufs/sysaufs.h
new file mode 100644
index 000000000..50aca1dc8
--- /dev/null
+++ b/include/fs/aufs/sysaufs.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface and mount lifetime management
+ */
+
+#ifndef __SYSAUFS_H__
+#define __SYSAUFS_H__
+
+#ifdef __KERNEL__
+
+#include <linux/sysfs.h>
+#include "module.h"
+
+struct super_block;
+struct au_sbinfo;
+
+struct sysaufs_si_attr {
+	struct attribute attr;
+	int (*show)(struct seq_file *seq, struct super_block *sb);
+};
+
+/* ---------------------------------------------------------------------- */
+
+/* sysaufs.c */
+extern unsigned long sysaufs_si_mask;
+extern struct kset *sysaufs_kset;
+extern struct attribute *sysaufs_si_attrs[];
+int sysaufs_si_init(struct au_sbinfo *sbinfo);
+int __init sysaufs_init(void);
+void sysaufs_fin(void);
+
+/* ---------------------------------------------------------------------- */
+
+/* some people doesn't like to show a pointer in kernel */
+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
+{
+	return sysaufs_si_mask ^ (unsigned long)sbinfo;
+}
+
+#define SysaufsSiNamePrefix	"si_"
+#define SysaufsSiNameLen	(sizeof(SysaufsSiNamePrefix) + 16)
+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
+{
+	snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
+		 sysaufs_si_id(sbinfo));
+}
+
+struct au_branch;
+#ifdef CONFIG_SYSFS
+/* sysfs.c */
+extern struct attribute_group *sysaufs_attr_group;
+
+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
+			 char *buf);
+long au_brinfo_ioctl(struct file *file, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
+#endif
+
+void sysaufs_br_init(struct au_branch *br);
+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
+
+#define sysaufs_brs_init()	do {} while (0)
+
+#else
+#define sysaufs_attr_group	NULL
+
+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
+       struct attribute *attr, char *buf)
+AuStubVoid(sysaufs_br_init, struct au_branch *br)
+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
+
+static inline void sysaufs_brs_init(void)
+{
+	sysaufs_brs = 0;
+}
+
+#endif /* CONFIG_SYSFS */
+
+#endif /* __KERNEL__ */
+#endif /* __SYSAUFS_H__ */
diff --git a/include/fs/aufs/sysfs.c b/include/fs/aufs/sysfs.c
new file mode 100644
index 000000000..58e148602
--- /dev/null
+++ b/include/fs/aufs/sysfs.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sysfs interface
+ */
+
+#include <linux/compat.h>
+#include <linux/seq_file.h>
+#include "aufs.h"
+
+#ifdef CONFIG_AUFS_FS_MODULE
+/* this entry violates the "one line per file" policy of sysfs */
+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	ssize_t err;
+	static char *conf =
+/* this file is generated at compiling */
+#include "conf.str"
+		;
+
+	err = snprintf(buf, PAGE_SIZE, conf);
+	if (unlikely(err >= PAGE_SIZE))
+		err = -EFBIG;
+	return err;
+}
+
+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
+#endif
+
+static struct attribute *au_attr[] = {
+#ifdef CONFIG_AUFS_FS_MODULE
+	&au_config_attr.attr,
+#endif
+	NULL,	/* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group sysaufs_attr_group_body = {
+	.attrs = au_attr
+};
+
+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
+
+/* ---------------------------------------------------------------------- */
+
+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
+{
+	int err;
+
+	SiMustAnyLock(sb);
+
+	err = 0;
+	if (au_opt_test(au_mntflags(sb), XINO)) {
+		err = au_xino_path(seq, au_sbi(sb)->si_xib);
+		seq_putc(seq, '\n');
+	}
+	return err;
+}
+
+/*
+ * the lifetime of branch is independent from the entry under sysfs.
+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
+ * unlinked.
+ */
+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
+			 aufs_bindex_t bindex, int idx)
+{
+	int err;
+	struct path path;
+	struct dentry *root;
+	struct au_branch *br;
+	au_br_perm_str_t perm;
+
+	AuDbg("b%d\n", bindex);
+
+	err = 0;
+	root = sb->s_root;
+	di_read_lock_parent(root, !AuLock_IR);
+	br = au_sbr(sb, bindex);
+
+	switch (idx) {
+	case AuBrSysfs_BR:
+		path.mnt = au_br_mnt(br);
+		path.dentry = au_h_dptr(root, bindex);
+		err = au_seq_path(seq, &path);
+		if (!err) {
+			au_optstr_br_perm(&perm, br->br_perm);
+			seq_printf(seq, "=%s\n", perm.a);
+		}
+		break;
+	case AuBrSysfs_BRID:
+		seq_printf(seq, "%d\n", br->br_id);
+		break;
+	}
+	di_read_unlock(root, !AuLock_IR);
+	if (unlikely(err || seq_has_overflowed(seq)))
+		err = -E2BIG;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct seq_file *au_seq(char *p, ssize_t len)
+{
+	struct seq_file *seq;
+
+	seq = kzalloc(sizeof(*seq), GFP_NOFS);
+	if (seq) {
+		/* mutex_init(&seq.lock); */
+		seq->buf = p;
+		seq->size = len;
+		return seq; /* success */
+	}
+
+	seq = ERR_PTR(-ENOMEM);
+	return seq;
+}
+
+#define SysaufsBr_PREFIX	"br"
+#define SysaufsBrid_PREFIX	"brid"
+
+/* todo: file size may exceed PAGE_SIZE */
+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
+			char *buf)
+{
+	ssize_t err;
+	int idx;
+	long l;
+	aufs_bindex_t bbot;
+	struct au_sbinfo *sbinfo;
+	struct super_block *sb;
+	struct seq_file *seq;
+	char *name;
+	struct attribute **cattr;
+
+	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
+	sb = sbinfo->si_sb;
+
+	/*
+	 * prevent a race condition between sysfs and aufs.
+	 * for instance, sysfs_file_read() calls sysfs_get_active_two() which
+	 * prohibits maintaining the sysfs entries.
+	 * hew we acquire read lock after sysfs_get_active_two().
+	 * on the other hand, the remount process may maintain the sysfs/aufs
+	 * entries after acquiring write lock.
+	 * it can cause a deadlock.
+	 * simply we gave up processing read here.
+	 */
+	err = -EBUSY;
+	if (unlikely(!si_noflush_read_trylock(sb)))
+		goto out;
+
+	seq = au_seq(buf, PAGE_SIZE);
+	err = PTR_ERR(seq);
+	if (IS_ERR(seq))
+		goto out_unlock;
+
+	name = (void *)attr->name;
+	cattr = sysaufs_si_attrs;
+	while (*cattr) {
+		if (!strcmp(name, (*cattr)->name)) {
+			err = container_of(*cattr, struct sysaufs_si_attr, attr)
+				->show(seq, sb);
+			goto out_seq;
+		}
+		cattr++;
+	}
+
+	if (!strncmp(name, SysaufsBrid_PREFIX,
+		     sizeof(SysaufsBrid_PREFIX) - 1)) {
+		idx = AuBrSysfs_BRID;
+		name += sizeof(SysaufsBrid_PREFIX) - 1;
+	} else if (!strncmp(name, SysaufsBr_PREFIX,
+			    sizeof(SysaufsBr_PREFIX) - 1)) {
+		idx = AuBrSysfs_BR;
+		name += sizeof(SysaufsBr_PREFIX) - 1;
+	} else
+		  BUG();
+
+	err = kstrtol(name, 10, &l);
+	if (!err) {
+		bbot = au_sbbot(sb);
+		if (l <= bbot)
+			err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
+		else
+			err = -ENOENT;
+	}
+
+out_seq:
+	if (!err) {
+		err = seq->count;
+		/* sysfs limit */
+		if (unlikely(err == PAGE_SIZE))
+			err = -EFBIG;
+	}
+	kfree(seq);
+out_unlock:
+	si_read_unlock(sb);
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
+{
+	int err;
+	int16_t brid;
+	aufs_bindex_t bindex, bbot;
+	size_t sz;
+	char *buf;
+	struct seq_file *seq;
+	struct au_branch *br;
+
+	si_read_lock(sb, AuLock_FLUSH);
+	bbot = au_sbbot(sb);
+	err = bbot + 1;
+	if (!arg)
+		goto out;
+
+	err = -ENOMEM;
+	buf = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!buf))
+		goto out;
+
+	seq = au_seq(buf, PAGE_SIZE);
+	err = PTR_ERR(seq);
+	if (IS_ERR(seq))
+		goto out_buf;
+
+	sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
+	for (bindex = 0; bindex <= bbot; bindex++, arg++) {
+		err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
+		if (unlikely(err))
+			break;
+
+		br = au_sbr(sb, bindex);
+		brid = br->br_id;
+		BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
+		err = __put_user(brid, &arg->id);
+		if (unlikely(err))
+			break;
+
+		BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
+		err = __put_user(br->br_perm, &arg->perm);
+		if (unlikely(err))
+			break;
+
+		err = au_seq_path(seq, &br->br_path);
+		if (unlikely(err))
+			break;
+		seq_putc(seq, '\0');
+		if (!seq_has_overflowed(seq)) {
+			err = copy_to_user(arg->path, seq->buf, seq->count);
+			seq->count = 0;
+			if (unlikely(err))
+				break;
+		} else {
+			err = -E2BIG;
+			goto out_seq;
+		}
+	}
+	if (unlikely(err))
+		err = -EFAULT;
+
+out_seq:
+	kfree(seq);
+out_buf:
+	free_page((unsigned long)buf);
+out:
+	si_read_unlock(sb);
+	return err;
+}
+
+long au_brinfo_ioctl(struct file *file, unsigned long arg)
+{
+	return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
+{
+	return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+void sysaufs_br_init(struct au_branch *br)
+{
+	int i;
+	struct au_brsysfs *br_sysfs;
+	struct attribute *attr;
+
+	br_sysfs = br->br_sysfs;
+	for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+		attr = &br_sysfs->attr;
+		sysfs_attr_init(attr);
+		attr->name = br_sysfs->name;
+		attr->mode = S_IRUGO;
+		br_sysfs++;
+	}
+}
+
+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
+{
+	struct au_branch *br;
+	struct kobject *kobj;
+	struct au_brsysfs *br_sysfs;
+	int i;
+	aufs_bindex_t bbot;
+
+	dbgaufs_brs_del(sb, bindex);
+
+	if (!sysaufs_brs)
+		return;
+
+	kobj = &au_sbi(sb)->si_kobj;
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		br_sysfs = br->br_sysfs;
+		for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+			sysfs_remove_file(kobj, &br_sysfs->attr);
+			br_sysfs++;
+		}
+	}
+}
+
+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err, i;
+	aufs_bindex_t bbot;
+	struct kobject *kobj;
+	struct au_branch *br;
+	struct au_brsysfs *br_sysfs;
+
+	dbgaufs_brs_add(sb, bindex);
+
+	if (!sysaufs_brs)
+		return;
+
+	kobj = &au_sbi(sb)->si_kobj;
+	bbot = au_sbbot(sb);
+	for (; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		br_sysfs = br->br_sysfs;
+		snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
+			 SysaufsBr_PREFIX "%d", bindex);
+		snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
+			 SysaufsBrid_PREFIX "%d", bindex);
+		for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
+			err = sysfs_create_file(kobj, &br_sysfs->attr);
+			if (unlikely(err))
+				pr_warn("failed %s under sysfs(%d)\n",
+					br_sysfs->name, err);
+			br_sysfs++;
+		}
+	}
+}
diff --git a/include/fs/aufs/sysrq.c b/include/fs/aufs/sysrq.c
new file mode 100644
index 000000000..401c1e8ec
--- /dev/null
+++ b/include/fs/aufs/sysrq.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * magic sysrq hanlder
+ */
+
+/* #include <linux/sysrq.h> */
+#include <linux/writeback.h>
+#include "aufs.h"
+
+/* ---------------------------------------------------------------------- */
+
+static void sysrq_sb(struct super_block *sb)
+{
+	char *plevel;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+	struct hlist_bl_head *files;
+	struct hlist_bl_node *pos;
+	struct au_finfo *finfo;
+
+	plevel = au_plevel;
+	au_plevel = KERN_WARNING;
+
+	/* since we define pr_fmt, call printk directly */
+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
+
+	sbinfo = au_sbi(sb);
+	printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
+	pr("superblock\n");
+	au_dpri_sb(sb);
+
+#if 0
+	pr("root dentry\n");
+	au_dpri_dentry(sb->s_root);
+	pr("root inode\n");
+	au_dpri_inode(d_inode(sb->s_root));
+#endif
+
+#if 0
+	do {
+		int err, i, j, ndentry;
+		struct au_dcsub_pages dpages;
+		struct au_dpage *dpage;
+
+		err = au_dpages_init(&dpages, GFP_ATOMIC);
+		if (unlikely(err))
+			break;
+		err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
+		if (!err)
+			for (i = 0; i < dpages.ndpage; i++) {
+				dpage = dpages.dpages + i;
+				ndentry = dpage->ndentry;
+				for (j = 0; j < ndentry; j++)
+					au_dpri_dentry(dpage->dentries[j]);
+			}
+		au_dpages_free(&dpages);
+	} while (0);
+#endif
+
+#if 1
+	{
+		struct inode *i;
+
+		pr("isolated inode\n");
+		spin_lock(&sb->s_inode_list_lock);
+		list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
+			spin_lock(&i->i_lock);
+			if (1 || hlist_empty(&i->i_dentry))
+				au_dpri_inode(i);
+			spin_unlock(&i->i_lock);
+		}
+		spin_unlock(&sb->s_inode_list_lock);
+	}
+#endif
+	pr("files\n");
+	files = &au_sbi(sb)->si_files;
+	hlist_bl_lock(files);
+	hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
+		umode_t mode;
+
+		file = finfo->fi_file;
+		mode = file_inode(file)->i_mode;
+		if (!special_file(mode))
+			au_dpri_file(file);
+	}
+	hlist_bl_unlock(files);
+	pr("done\n");
+
+#undef pr
+	au_plevel = plevel;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* module parameter */
+static char *aufs_sysrq_key = "a";
+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
+
+static void au_sysrq(int key __maybe_unused)
+{
+	struct au_sbinfo *sbinfo;
+	struct hlist_bl_node *pos;
+
+	lockdep_off();
+	au_sbilist_lock();
+	hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
+		sysrq_sb(sbinfo->si_sb);
+	au_sbilist_unlock();
+	lockdep_on();
+}
+
+static struct sysrq_key_op au_sysrq_op = {
+	.handler	= au_sysrq,
+	.help_msg	= "Aufs",
+	.action_msg	= "Aufs",
+	.enable_mask	= SYSRQ_ENABLE_DUMP
+};
+
+/* ---------------------------------------------------------------------- */
+
+int __init au_sysrq_init(void)
+{
+	int err;
+	char key;
+
+	err = -1;
+	key = *aufs_sysrq_key;
+	if ('a' <= key && key <= 'z')
+		err = register_sysrq_key(key, &au_sysrq_op);
+	if (unlikely(err))
+		pr_err("err %d, sysrq=%c\n", err, key);
+	return err;
+}
+
+void au_sysrq_fin(void)
+{
+	int err;
+
+	err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
+	if (unlikely(err))
+		pr_err("err %d (ignored)\n", err);
+}
diff --git a/include/fs/aufs/vdir.c b/include/fs/aufs/vdir.c
new file mode 100644
index 000000000..6db517232
--- /dev/null
+++ b/include/fs/aufs/vdir.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * virtual or vertical directory
+ */
+
+#include "aufs.h"
+
+static unsigned int calc_size(int nlen)
+{
+	return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
+}
+
+static int set_deblk_end(union au_vdir_deblk_p *p,
+			 union au_vdir_deblk_p *deblk_end)
+{
+	if (calc_size(0) <= deblk_end->deblk - p->deblk) {
+		p->de->de_str.len = 0;
+		/* smp_mb(); */
+		return 0;
+	}
+	return -1; /* error */
+}
+
+/* returns true or false */
+static int is_deblk_end(union au_vdir_deblk_p *p,
+			union au_vdir_deblk_p *deblk_end)
+{
+	if (calc_size(0) <= deblk_end->deblk - p->deblk)
+		return !p->de->de_str.len;
+	return 1;
+}
+
+static unsigned char *last_deblk(struct au_vdir *vdir)
+{
+	return vdir->vd_deblk[vdir->vd_nblk - 1];
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* estimate the appropriate size for name hash table */
+unsigned int au_rdhash_est(loff_t sz)
+{
+	unsigned int n;
+
+	n = UINT_MAX;
+	sz >>= 10;
+	if (sz < n)
+		n = sz;
+	if (sz < AUFS_RDHASH_DEF)
+		n = AUFS_RDHASH_DEF;
+	/* pr_info("n %u\n", n); */
+	return n;
+}
+
+/*
+ * the allocated memory has to be freed by
+ * au_nhash_wh_free() or au_nhash_de_free().
+ */
+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
+{
+	struct hlist_head *head;
+	unsigned int u;
+	size_t sz;
+
+	sz = sizeof(*nhash->nh_head) * num_hash;
+	head = kmalloc(sz, gfp);
+	if (head) {
+		nhash->nh_num = num_hash;
+		nhash->nh_head = head;
+		for (u = 0; u < num_hash; u++)
+			INIT_HLIST_HEAD(head++);
+		return 0; /* success */
+	}
+
+	return -ENOMEM;
+}
+
+static void nhash_count(struct hlist_head *head)
+{
+#if 0
+	unsigned long n;
+	struct hlist_node *pos;
+
+	n = 0;
+	hlist_for_each(pos, head)
+		n++;
+	pr_info("%lu\n", n);
+#endif
+}
+
+static void au_nhash_wh_do_free(struct hlist_head *head)
+{
+	struct au_vdir_wh *pos;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(pos, node, head, wh_hash)
+		kfree(pos);
+}
+
+static void au_nhash_de_do_free(struct hlist_head *head)
+{
+	struct au_vdir_dehstr *pos;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(pos, node, head, hash)
+		au_cache_free_vdir_dehstr(pos);
+}
+
+static void au_nhash_do_free(struct au_nhash *nhash,
+			     void (*free)(struct hlist_head *head))
+{
+	unsigned int n;
+	struct hlist_head *head;
+
+	n = nhash->nh_num;
+	if (!n)
+		return;
+
+	head = nhash->nh_head;
+	while (n-- > 0) {
+		nhash_count(head);
+		free(head++);
+	}
+	kfree(nhash->nh_head);
+}
+
+void au_nhash_wh_free(struct au_nhash *whlist)
+{
+	au_nhash_do_free(whlist, au_nhash_wh_do_free);
+}
+
+static void au_nhash_de_free(struct au_nhash *delist)
+{
+	au_nhash_do_free(delist, au_nhash_de_do_free);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
+			    int limit)
+{
+	int num;
+	unsigned int u, n;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+
+	num = 0;
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (u = 0; u < n; u++, head++)
+		hlist_for_each_entry(pos, head, wh_hash)
+			if (pos->wh_bindex == btgt && ++num > limit)
+				return 1;
+	return 0;
+}
+
+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
+				       unsigned char *name,
+				       unsigned int len)
+{
+	unsigned int v;
+	/* const unsigned int magic_bit = 12; */
+
+	AuDebugOn(!nhash->nh_num || !nhash->nh_head);
+
+	v = 0;
+	if (len > 8)
+		len = 8;
+	while (len--)
+		v += *name++;
+	/* v = hash_long(v, magic_bit); */
+	v %= nhash->nh_num;
+	return nhash->nh_head + v;
+}
+
+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
+			      int nlen)
+{
+	return str->len == nlen && !memcmp(str->name, name, nlen);
+}
+
+/* returns found or not */
+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
+{
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct au_vdir_destr *str;
+
+	head = au_name_hash(whlist, name, nlen);
+	hlist_for_each_entry(pos, head, wh_hash) {
+		str = &pos->wh_str;
+		AuDbg("%.*s\n", str->len, str->name);
+		if (au_nhash_test_name(str, name, nlen))
+			return 1;
+	}
+	return 0;
+}
+
+/* returns found(true) or not */
+static int test_known(struct au_nhash *delist, char *name, int nlen)
+{
+	struct hlist_head *head;
+	struct au_vdir_dehstr *pos;
+	struct au_vdir_destr *str;
+
+	head = au_name_hash(delist, name, nlen);
+	hlist_for_each_entry(pos, head, hash) {
+		str = pos->str;
+		AuDbg("%.*s\n", str->len, str->name);
+		if (au_nhash_test_name(str, name, nlen))
+			return 1;
+	}
+	return 0;
+}
+
+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
+			    unsigned char d_type)
+{
+#ifdef CONFIG_AUFS_SHWH
+	wh->wh_ino = ino;
+	wh->wh_type = d_type;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
+		       unsigned int d_type, aufs_bindex_t bindex,
+		       unsigned char shwh)
+{
+	int err;
+	struct au_vdir_destr *str;
+	struct au_vdir_wh *wh;
+
+	AuDbg("%.*s\n", nlen, name);
+	AuDebugOn(!whlist->nh_num || !whlist->nh_head);
+
+	err = -ENOMEM;
+	wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
+	if (unlikely(!wh))
+		goto out;
+
+	err = 0;
+	wh->wh_bindex = bindex;
+	if (shwh)
+		au_shwh_init_wh(wh, ino, d_type);
+	str = &wh->wh_str;
+	str->len = nlen;
+	memcpy(str->name, name, nlen);
+	hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
+	/* smp_mb(); */
+
+out:
+	return err;
+}
+
+static int append_deblk(struct au_vdir *vdir)
+{
+	int err;
+	unsigned long ul;
+	const unsigned int deblk_sz = vdir->vd_deblk_sz;
+	union au_vdir_deblk_p p, deblk_end;
+	unsigned char **o;
+
+	err = -ENOMEM;
+	o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
+			GFP_NOFS, /*may_shrink*/0);
+	if (unlikely(!o))
+		goto out;
+
+	vdir->vd_deblk = o;
+	p.deblk = kmalloc(deblk_sz, GFP_NOFS);
+	if (p.deblk) {
+		ul = vdir->vd_nblk++;
+		vdir->vd_deblk[ul] = p.deblk;
+		vdir->vd_last.ul = ul;
+		vdir->vd_last.p.deblk = p.deblk;
+		deblk_end.deblk = p.deblk + deblk_sz;
+		err = set_deblk_end(&p, &deblk_end);
+	}
+
+out:
+	return err;
+}
+
+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
+		     unsigned int d_type, struct au_nhash *delist)
+{
+	int err;
+	unsigned int sz;
+	const unsigned int deblk_sz = vdir->vd_deblk_sz;
+	union au_vdir_deblk_p p, *room, deblk_end;
+	struct au_vdir_dehstr *dehstr;
+
+	p.deblk = last_deblk(vdir);
+	deblk_end.deblk = p.deblk + deblk_sz;
+	room = &vdir->vd_last.p;
+	AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
+		  || !is_deblk_end(room, &deblk_end));
+
+	sz = calc_size(nlen);
+	if (unlikely(sz > deblk_end.deblk - room->deblk)) {
+		err = append_deblk(vdir);
+		if (unlikely(err))
+			goto out;
+
+		p.deblk = last_deblk(vdir);
+		deblk_end.deblk = p.deblk + deblk_sz;
+		/* smp_mb(); */
+		AuDebugOn(room->deblk != p.deblk);
+	}
+
+	err = -ENOMEM;
+	dehstr = au_cache_alloc_vdir_dehstr();
+	if (unlikely(!dehstr))
+		goto out;
+
+	dehstr->str = &room->de->de_str;
+	hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
+	room->de->de_ino = ino;
+	room->de->de_type = d_type;
+	room->de->de_str.len = nlen;
+	memcpy(room->de->de_str.name, name, nlen);
+
+	err = 0;
+	room->deblk += sz;
+	if (unlikely(set_deblk_end(room, &deblk_end)))
+		err = append_deblk(vdir);
+	/* smp_mb(); */
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_vdir_free(struct au_vdir *vdir)
+{
+	unsigned char **deblk;
+
+	deblk = vdir->vd_deblk;
+	while (vdir->vd_nblk--)
+		kfree(*deblk++);
+	kfree(vdir->vd_deblk);
+	au_cache_free_vdir(vdir);
+}
+
+static struct au_vdir *alloc_vdir(struct file *file)
+{
+	struct au_vdir *vdir;
+	struct super_block *sb;
+	int err;
+
+	sb = file->f_path.dentry->d_sb;
+	SiMustAnyLock(sb);
+
+	err = -ENOMEM;
+	vdir = au_cache_alloc_vdir();
+	if (unlikely(!vdir))
+		goto out;
+
+	vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
+	if (unlikely(!vdir->vd_deblk))
+		goto out_free;
+
+	vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
+	if (!vdir->vd_deblk_sz) {
+		/* estimate the appropriate size for deblk */
+		vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
+		/* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
+	}
+	vdir->vd_nblk = 0;
+	vdir->vd_version = 0;
+	vdir->vd_jiffy = 0;
+	err = append_deblk(vdir);
+	if (!err)
+		return vdir; /* success */
+
+	kfree(vdir->vd_deblk);
+
+out_free:
+	au_cache_free_vdir(vdir);
+out:
+	vdir = ERR_PTR(err);
+	return vdir;
+}
+
+static int reinit_vdir(struct au_vdir *vdir)
+{
+	int err;
+	union au_vdir_deblk_p p, deblk_end;
+
+	while (vdir->vd_nblk > 1) {
+		kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
+		/* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
+		vdir->vd_nblk--;
+	}
+	p.deblk = vdir->vd_deblk[0];
+	deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
+	err = set_deblk_end(&p, &deblk_end);
+	/* keep vd_dblk_sz */
+	vdir->vd_last.ul = 0;
+	vdir->vd_last.p.deblk = vdir->vd_deblk[0];
+	vdir->vd_version = 0;
+	vdir->vd_jiffy = 0;
+	/* smp_mb(); */
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define AuFillVdir_CALLED	1
+#define AuFillVdir_WHABLE	(1 << 1)
+#define AuFillVdir_SHWH		(1 << 2)
+#define au_ftest_fillvdir(flags, name)	((flags) & AuFillVdir_##name)
+#define au_fset_fillvdir(flags, name) \
+	do { (flags) |= AuFillVdir_##name; } while (0)
+#define au_fclr_fillvdir(flags, name) \
+	do { (flags) &= ~AuFillVdir_##name; } while (0)
+
+#ifndef CONFIG_AUFS_SHWH
+#undef AuFillVdir_SHWH
+#define AuFillVdir_SHWH		0
+#endif
+
+struct fillvdir_arg {
+	struct dir_context	ctx;
+	struct file		*file;
+	struct au_vdir		*vdir;
+	struct au_nhash		delist;
+	struct au_nhash		whlist;
+	aufs_bindex_t		bindex;
+	unsigned int		flags;
+	int			err;
+};
+
+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
+		    loff_t offset __maybe_unused, u64 h_ino,
+		    unsigned int d_type)
+{
+	struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
+	char *name = (void *)__name;
+	struct super_block *sb;
+	ino_t ino;
+	const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
+
+	arg->err = 0;
+	sb = arg->file->f_path.dentry->d_sb;
+	au_fset_fillvdir(arg->flags, CALLED);
+	/* smp_mb(); */
+	if (nlen <= AUFS_WH_PFX_LEN
+	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
+		if (test_known(&arg->delist, name, nlen)
+		    || au_nhash_test_known_wh(&arg->whlist, name, nlen))
+			goto out; /* already exists or whiteouted */
+
+		arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
+		if (!arg->err) {
+			if (unlikely(nlen > AUFS_MAX_NAMELEN))
+				d_type = DT_UNKNOWN;
+			arg->err = append_de(arg->vdir, name, nlen, ino,
+					     d_type, &arg->delist);
+		}
+	} else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
+		name += AUFS_WH_PFX_LEN;
+		nlen -= AUFS_WH_PFX_LEN;
+		if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
+			goto out; /* already whiteouted */
+
+		if (shwh)
+			arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
+					     &ino);
+		if (!arg->err) {
+			if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
+				d_type = DT_UNKNOWN;
+			arg->err = au_nhash_append_wh
+				(&arg->whlist, name, nlen, ino, d_type,
+				 arg->bindex, shwh);
+		}
+	}
+
+out:
+	if (!arg->err)
+		arg->vdir->vd_jiffy = jiffies;
+	/* smp_mb(); */
+	AuTraceErr(arg->err);
+	return arg->err;
+}
+
+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
+			  struct au_nhash *whlist, struct au_nhash *delist)
+{
+#ifdef CONFIG_AUFS_SHWH
+	int err;
+	unsigned int nh, u;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct hlist_node *n;
+	char *p, *o;
+	struct au_vdir_destr *destr;
+
+	AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
+
+	err = -ENOMEM;
+	o = p = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!p))
+		goto out;
+
+	err = 0;
+	nh = whlist->nh_num;
+	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+	p += AUFS_WH_PFX_LEN;
+	for (u = 0; u < nh; u++) {
+		head = whlist->nh_head + u;
+		hlist_for_each_entry_safe(pos, n, head, wh_hash) {
+			destr = &pos->wh_str;
+			memcpy(p, destr->name, destr->len);
+			err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
+					pos->wh_ino, pos->wh_type, delist);
+			if (unlikely(err))
+				break;
+		}
+	}
+
+	free_page((unsigned long)o);
+
+out:
+	AuTraceErr(err);
+	return err;
+#else
+	return 0;
+#endif
+}
+
+static int au_do_read_vdir(struct fillvdir_arg *arg)
+{
+	int err;
+	unsigned int rdhash;
+	loff_t offset;
+	aufs_bindex_t bbot, bindex, btop;
+	unsigned char shwh;
+	struct file *hf, *file;
+	struct super_block *sb;
+
+	file = arg->file;
+	sb = file->f_path.dentry->d_sb;
+	SiMustAnyLock(sb);
+
+	rdhash = au_sbi(sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
+	err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
+	if (unlikely(err))
+		goto out_delist;
+
+	err = 0;
+	arg->flags = 0;
+	shwh = 0;
+	if (au_opt_test(au_mntflags(sb), SHWH)) {
+		shwh = 1;
+		au_fset_fillvdir(arg->flags, SHWH);
+	}
+	btop = au_fbtop(file);
+	bbot = au_fbbot_dir(file);
+	for (bindex = btop; !err && bindex <= bbot; bindex++) {
+		hf = au_hf_dir(file, bindex);
+		if (!hf)
+			continue;
+
+		offset = vfsub_llseek(hf, 0, SEEK_SET);
+		err = offset;
+		if (unlikely(offset))
+			break;
+
+		arg->bindex = bindex;
+		au_fclr_fillvdir(arg->flags, WHABLE);
+		if (shwh
+		    || (bindex != bbot
+			&& au_br_whable(au_sbr_perm(sb, bindex))))
+			au_fset_fillvdir(arg->flags, WHABLE);
+		do {
+			arg->err = 0;
+			au_fclr_fillvdir(arg->flags, CALLED);
+			/* smp_mb(); */
+			err = vfsub_iterate_dir(hf, &arg->ctx);
+			if (err >= 0)
+				err = arg->err;
+		} while (!err && au_ftest_fillvdir(arg->flags, CALLED));
+
+		/*
+		 * dir_relax() may be good for concurrency, but aufs should not
+		 * use it since it will cause a lockdep problem.
+		 */
+	}
+
+	if (!err && shwh)
+		err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
+
+	au_nhash_wh_free(&arg->whlist);
+
+out_delist:
+	au_nhash_de_free(&arg->delist);
+out:
+	return err;
+}
+
+static int read_vdir(struct file *file, int may_read)
+{
+	int err;
+	unsigned long expire;
+	unsigned char do_read;
+	struct fillvdir_arg arg = {
+		.ctx = {
+			.actor = fillvdir
+		}
+	};
+	struct inode *inode;
+	struct au_vdir *vdir, *allocated;
+
+	err = 0;
+	inode = file_inode(file);
+	IMustLock(inode);
+	IiMustWriteLock(inode);
+	SiMustAnyLock(inode->i_sb);
+
+	allocated = NULL;
+	do_read = 0;
+	expire = au_sbi(inode->i_sb)->si_rdcache;
+	vdir = au_ivdir(inode);
+	if (!vdir) {
+		do_read = 1;
+		vdir = alloc_vdir(file);
+		err = PTR_ERR(vdir);
+		if (IS_ERR(vdir))
+			goto out;
+		err = 0;
+		allocated = vdir;
+	} else if (may_read
+		   && (inode->i_version != vdir->vd_version
+		       || time_after(jiffies, vdir->vd_jiffy + expire))) {
+		do_read = 1;
+		err = reinit_vdir(vdir);
+		if (unlikely(err))
+			goto out;
+	}
+
+	if (!do_read)
+		return 0; /* success */
+
+	arg.file = file;
+	arg.vdir = vdir;
+	err = au_do_read_vdir(&arg);
+	if (!err) {
+		/* file->f_pos = 0; */ /* todo: ctx->pos? */
+		vdir->vd_version = inode->i_version;
+		vdir->vd_last.ul = 0;
+		vdir->vd_last.p.deblk = vdir->vd_deblk[0];
+		if (allocated)
+			au_set_ivdir(inode, allocated);
+	} else if (allocated)
+		au_vdir_free(allocated);
+
+out:
+	return err;
+}
+
+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
+{
+	int err, rerr;
+	unsigned long ul, n;
+	const unsigned int deblk_sz = src->vd_deblk_sz;
+
+	AuDebugOn(tgt->vd_nblk != 1);
+
+	err = -ENOMEM;
+	if (tgt->vd_nblk < src->vd_nblk) {
+		unsigned char **p;
+
+		p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
+				GFP_NOFS, /*may_shrink*/0);
+		if (unlikely(!p))
+			goto out;
+		tgt->vd_deblk = p;
+	}
+
+	if (tgt->vd_deblk_sz != deblk_sz) {
+		unsigned char *p;
+
+		tgt->vd_deblk_sz = deblk_sz;
+		p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
+				/*may_shrink*/1);
+		if (unlikely(!p))
+			goto out;
+		tgt->vd_deblk[0] = p;
+	}
+	memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
+	tgt->vd_version = src->vd_version;
+	tgt->vd_jiffy = src->vd_jiffy;
+
+	n = src->vd_nblk;
+	for (ul = 1; ul < n; ul++) {
+		tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
+					    GFP_NOFS);
+		if (unlikely(!tgt->vd_deblk[ul]))
+			goto out;
+		tgt->vd_nblk++;
+	}
+	tgt->vd_nblk = n;
+	tgt->vd_last.ul = tgt->vd_last.ul;
+	tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
+	tgt->vd_last.p.deblk += src->vd_last.p.deblk
+		- src->vd_deblk[src->vd_last.ul];
+	/* smp_mb(); */
+	return 0; /* success */
+
+out:
+	rerr = reinit_vdir(tgt);
+	BUG_ON(rerr);
+	return err;
+}
+
+int au_vdir_init(struct file *file)
+{
+	int err;
+	struct inode *inode;
+	struct au_vdir *vdir_cache, *allocated;
+
+	/* test file->f_pos here instead of ctx->pos */
+	err = read_vdir(file, !file->f_pos);
+	if (unlikely(err))
+		goto out;
+
+	allocated = NULL;
+	vdir_cache = au_fvdir_cache(file);
+	if (!vdir_cache) {
+		vdir_cache = alloc_vdir(file);
+		err = PTR_ERR(vdir_cache);
+		if (IS_ERR(vdir_cache))
+			goto out;
+		allocated = vdir_cache;
+	} else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
+		/* test file->f_pos here instead of ctx->pos */
+		err = reinit_vdir(vdir_cache);
+		if (unlikely(err))
+			goto out;
+	} else
+		return 0; /* success */
+
+	inode = file_inode(file);
+	err = copy_vdir(vdir_cache, au_ivdir(inode));
+	if (!err) {
+		file->f_version = inode->i_version;
+		if (allocated)
+			au_set_fvdir_cache(file, allocated);
+	} else if (allocated)
+		au_vdir_free(allocated);
+
+out:
+	return err;
+}
+
+static loff_t calc_offset(struct au_vdir *vdir)
+{
+	loff_t offset;
+	union au_vdir_deblk_p p;
+
+	p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
+	offset = vdir->vd_last.p.deblk - p.deblk;
+	offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
+	return offset;
+}
+
+/* returns true or false */
+static int seek_vdir(struct file *file, struct dir_context *ctx)
+{
+	int valid;
+	unsigned int deblk_sz;
+	unsigned long ul, n;
+	loff_t offset;
+	union au_vdir_deblk_p p, deblk_end;
+	struct au_vdir *vdir_cache;
+
+	valid = 1;
+	vdir_cache = au_fvdir_cache(file);
+	offset = calc_offset(vdir_cache);
+	AuDbg("offset %lld\n", offset);
+	if (ctx->pos == offset)
+		goto out;
+
+	vdir_cache->vd_last.ul = 0;
+	vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
+	if (!ctx->pos)
+		goto out;
+
+	valid = 0;
+	deblk_sz = vdir_cache->vd_deblk_sz;
+	ul = div64_u64(ctx->pos, deblk_sz);
+	AuDbg("ul %lu\n", ul);
+	if (ul >= vdir_cache->vd_nblk)
+		goto out;
+
+	n = vdir_cache->vd_nblk;
+	for (; ul < n; ul++) {
+		p.deblk = vdir_cache->vd_deblk[ul];
+		deblk_end.deblk = p.deblk + deblk_sz;
+		offset = ul;
+		offset *= deblk_sz;
+		while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
+			unsigned int l;
+
+			l = calc_size(p.de->de_str.len);
+			offset += l;
+			p.deblk += l;
+		}
+		if (!is_deblk_end(&p, &deblk_end)) {
+			valid = 1;
+			vdir_cache->vd_last.ul = ul;
+			vdir_cache->vd_last.p = p;
+			break;
+		}
+	}
+
+out:
+	/* smp_mb(); */
+	if (!valid)
+		AuDbg("valid %d\n", !valid);
+	return valid;
+}
+
+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
+{
+	unsigned int l, deblk_sz;
+	union au_vdir_deblk_p deblk_end;
+	struct au_vdir *vdir_cache;
+	struct au_vdir_de *de;
+
+	vdir_cache = au_fvdir_cache(file);
+	if (!seek_vdir(file, ctx))
+		return 0;
+
+	deblk_sz = vdir_cache->vd_deblk_sz;
+	while (1) {
+		deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
+		deblk_end.deblk += deblk_sz;
+		while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
+			de = vdir_cache->vd_last.p.de;
+			AuDbg("%.*s, off%lld, i%lu, dt%d\n",
+			      de->de_str.len, de->de_str.name, ctx->pos,
+			      (unsigned long)de->de_ino, de->de_type);
+			if (unlikely(!dir_emit(ctx, de->de_str.name,
+					       de->de_str.len, de->de_ino,
+					       de->de_type))) {
+				/* todo: ignore the error caused by udba? */
+				/* return err; */
+				return 0;
+			}
+
+			l = calc_size(de->de_str.len);
+			vdir_cache->vd_last.p.deblk += l;
+			ctx->pos += l;
+		}
+		if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
+			vdir_cache->vd_last.ul++;
+			vdir_cache->vd_last.p.deblk
+				= vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
+			ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
+			continue;
+		}
+		break;
+	}
+
+	/* smp_mb(); */
+	return 0;
+}
diff --git a/include/fs/aufs/vfsub.c b/include/fs/aufs/vfsub.c
new file mode 100644
index 000000000..732d81cb4
--- /dev/null
+++ b/include/fs/aufs/vfsub.c
@@ -0,0 +1,894 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for VFS
+ */
+
+#include <linux/mnt_namespace.h>
+#include <linux/namei.h>
+#include <linux/nsproxy.h>
+#include <linux/security.h>
+#include <linux/splice.h>
+#include "aufs.h"
+
+#ifdef CONFIG_AUFS_BR_FUSE
+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
+{
+	if (!au_test_fuse(h_sb) || !au_userns)
+		return 0;
+
+	return is_current_mnt_ns(mnt) ? 0 : -EACCES;
+}
+#endif
+
+int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
+{
+	int err;
+
+	lockdep_off();
+	down_read(&h_sb->s_umount);
+	err = __sync_filesystem(h_sb, wait);
+	up_read(&h_sb->s_umount);
+	lockdep_on();
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_update_h_iattr(struct path *h_path, int *did)
+{
+	int err;
+	struct kstat st;
+	struct super_block *h_sb;
+
+	/* for remote fs, leave work for its getattr or d_revalidate */
+	/* for bad i_attr fs, handle them in aufs_getattr() */
+	/* still some fs may acquire i_mutex. we need to skip them */
+	err = 0;
+	if (!did)
+		did = &err;
+	h_sb = h_path->dentry->d_sb;
+	*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
+	if (*did)
+		err = vfsub_getattr(h_path, &st);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct file *vfsub_dentry_open(struct path *path, int flags)
+{
+	struct file *file;
+
+	file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
+			   current_cred());
+	if (!IS_ERR_OR_NULL(file)
+	    && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		i_readcount_inc(d_inode(path->dentry));
+
+	return file;
+}
+
+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
+{
+	struct file *file;
+
+	lockdep_off();
+	file = filp_open(path,
+			 oflags /* | __FMODE_NONOTIFY */,
+			 mode);
+	lockdep_on();
+	if (IS_ERR(file))
+		goto out;
+	vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+
+out:
+	return file;
+}
+
+/*
+ * Ideally this function should call VFS:do_last() in order to keep all its
+ * checkings. But it is very hard for aufs to regenerate several VFS internal
+ * structure such as nameidata. This is a second (or third) best approach.
+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
+ */
+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
+		      struct vfsub_aopen_args *args, struct au_branch *br)
+{
+	int err;
+	struct file *file = args->file;
+	/* copied from linux/fs/namei.c:atomic_open() */
+	struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
+
+	IMustLock(dir);
+	AuDebugOn(!dir->i_op->atomic_open);
+
+	err = au_br_test_oflag(args->open_flag, br);
+	if (unlikely(err))
+		goto out;
+
+	args->file->f_path.dentry = DENTRY_NOT_SET;
+	args->file->f_path.mnt = au_br_mnt(br);
+	err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
+				     args->create_mode, args->opened);
+	if (err >= 0) {
+		/* some filesystems don't set FILE_CREATED while succeeded? */
+		if (*args->opened & FILE_CREATED)
+			fsnotify_create(dir, dentry);
+	} else
+		goto out;
+
+
+	if (!err) {
+		/* todo: call VFS:may_open() here */
+		err = open_check_o_direct(file);
+		/* todo: ima_file_check() too? */
+		if (!err && (args->open_flag & __FMODE_EXEC))
+			err = deny_write_access(file);
+		if (unlikely(err))
+			/* note that the file is created and still opened */
+			goto out;
+	}
+
+	au_br_get(br);
+	fsnotify_open(file);
+
+out:
+	return err;
+}
+
+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
+{
+	int err;
+
+	err = kern_path(name, flags, path);
+	if (!err && d_is_positive(path->dentry))
+		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
+					     struct dentry *parent, int len)
+{
+	struct path path = {
+		.mnt = NULL
+	};
+
+	path.dentry = lookup_one_len_unlocked(name, parent, len);
+	if (IS_ERR(path.dentry))
+		goto out;
+	if (d_is_positive(path.dentry))
+		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
+
+out:
+	AuTraceErrPtr(path.dentry);
+	return path.dentry;
+}
+
+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
+				    int len)
+{
+	struct path path = {
+		.mnt = NULL
+	};
+
+	/* VFS checks it too, but by WARN_ON_ONCE() */
+	IMustLock(d_inode(parent));
+
+	path.dentry = lookup_one_len(name, parent, len);
+	if (IS_ERR(path.dentry))
+		goto out;
+	if (d_is_positive(path.dentry))
+		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
+
+out:
+	AuTraceErrPtr(path.dentry);
+	return path.dentry;
+}
+
+void vfsub_call_lkup_one(void *args)
+{
+	struct vfsub_lkup_one_args *a = args;
+	*a->errp = vfsub_lkup_one(a->name, a->parent);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
+				 struct dentry *d2, struct au_hinode *hdir2)
+{
+	struct dentry *d;
+
+	lockdep_off();
+	d = lock_rename(d1, d2);
+	lockdep_on();
+	au_hn_suspend(hdir1);
+	if (hdir1 != hdir2)
+		au_hn_suspend(hdir2);
+
+	return d;
+}
+
+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
+			 struct dentry *d2, struct au_hinode *hdir2)
+{
+	au_hn_resume(hdir1);
+	if (hdir1 != hdir2)
+		au_hn_resume(hdir2);
+	lockdep_off();
+	unlock_rename(d1, d2);
+	lockdep_on();
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mknod(path, d, mode, 0);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_create(dir, path->dentry, mode, want_excl);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_symlink(path, d, symname);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_symlink(dir, path->dentry, symname);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mknod(path, d, mode, new_encode_dev(dev));
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_mknod(dir, path->dentry, mode, dev);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+static int au_test_nlink(struct inode *inode)
+{
+	const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
+
+	if (!au_test_fs_no_limit_nlink(inode->i_sb)
+	    || inode->i_nlink < link_max)
+		return 0;
+	return -EMLINK;
+}
+
+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
+	       struct inode **delegated_inode)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	err = au_test_nlink(d_inode(src_dentry));
+	if (unlikely(err))
+		return err;
+
+	/* we don't call may_linkat() */
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_link(src_dentry, path, d);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		/* fuse has different memory inode for the same inumber */
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+			tmp.dentry = src_dentry;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
+		 struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, unsigned int flags)
+{
+	int err;
+	struct path tmp = {
+		.mnt	= path->mnt
+	};
+	struct dentry *d;
+
+	IMustLock(dir);
+	IMustLock(src_dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	tmp.dentry = src_dentry->d_parent;
+	err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
+			 delegated_inode, flags);
+	lockdep_on();
+	if (!err) {
+		int did;
+
+		tmp.dentry = d->d_parent;
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = src_dentry;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+			tmp.dentry = src_dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_mkdir(path, d, mode);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_mkdir(dir, path->dentry, mode);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = *path;
+		int did;
+
+		vfsub_update_h_iattr(&tmp, &did);
+		if (did) {
+			tmp.dentry = path->dentry->d_parent;
+			vfsub_update_h_iattr(&tmp, /*did*/NULL);
+		}
+		/*ignore*/
+	}
+
+out:
+	return err;
+}
+
+int vfsub_rmdir(struct inode *dir, struct path *path)
+{
+	int err;
+	struct dentry *d;
+
+	IMustLock(dir);
+
+	d = path->dentry;
+	path->dentry = d->d_parent;
+	err = security_path_rmdir(path, d);
+	path->dentry = d;
+	if (unlikely(err))
+		goto out;
+
+	lockdep_off();
+	err = vfs_rmdir(dir, path->dentry);
+	lockdep_on();
+	if (!err) {
+		struct path tmp = {
+			.dentry	= path->dentry->d_parent,
+			.mnt	= path->mnt
+		};
+
+		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* todo: support mmap_sem? */
+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
+		     loff_t *ppos)
+{
+	ssize_t err;
+
+	lockdep_off();
+	err = vfs_read(file, ubuf, count, ppos);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+/* todo: kernel_read()? */
+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
+		     loff_t *ppos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		char __user *u;
+	} buf;
+
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = vfsub_read_u(file, buf.u, count, ppos);
+	set_fs(oldfs);
+	return err;
+}
+
+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
+		      loff_t *ppos)
+{
+	ssize_t err;
+
+	lockdep_off();
+	err = vfs_write(file, ubuf, count, ppos);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		const char __user *u;
+	} buf;
+
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = vfsub_write_u(file, buf.u, count, ppos);
+	set_fs(oldfs);
+	return err;
+}
+
+int vfsub_flush(struct file *file, fl_owner_t id)
+{
+	int err;
+
+	err = 0;
+	if (file->f_op->flush) {
+		if (!au_test_nfs(file->f_path.dentry->d_sb))
+			err = file->f_op->flush(file, id);
+		else {
+			lockdep_off();
+			err = file->f_op->flush(file, id);
+			lockdep_on();
+		}
+		if (!err)
+			vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
+		/*ignore*/
+	}
+	return err;
+}
+
+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
+{
+	int err;
+
+	AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
+
+	lockdep_off();
+	err = iterate_dir(file, ctx);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+
+	return err;
+}
+
+long vfsub_splice_to(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags)
+{
+	long err;
+
+	lockdep_off();
+	err = do_splice_to(in, ppos, pipe, len, flags);
+	lockdep_on();
+	file_accessed(in);
+	if (err >= 0)
+		vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
+		       loff_t *ppos, size_t len, unsigned int flags)
+{
+	long err;
+
+	lockdep_off();
+	err = do_splice_from(pipe, out, ppos, len, flags);
+	lockdep_on();
+	if (err >= 0)
+		vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
+	return err;
+}
+
+int vfsub_fsync(struct file *file, struct path *path, int datasync)
+{
+	int err;
+
+	/* file can be NULL */
+	lockdep_off();
+	err = vfs_fsync(file, datasync);
+	lockdep_on();
+	if (!err) {
+		if (!path) {
+			AuDebugOn(!file);
+			path = &file->f_path;
+		}
+		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
+	}
+	return err;
+}
+
+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
+		struct file *h_file)
+{
+	int err;
+	struct inode *h_inode;
+	struct super_block *h_sb;
+
+	if (!h_file) {
+		err = vfsub_truncate(h_path, length);
+		goto out;
+	}
+
+	h_inode = d_inode(h_path->dentry);
+	h_sb = h_inode->i_sb;
+	lockdep_off();
+	sb_start_write(h_sb);
+	lockdep_on();
+	err = locks_verify_truncate(h_inode, h_file, length);
+	if (!err)
+		err = security_path_truncate(h_path);
+	if (!err) {
+		lockdep_off();
+		err = do_truncate(h_path->dentry, length, attr, h_file);
+		lockdep_on();
+	}
+	lockdep_off();
+	sb_end_write(h_sb);
+	lockdep_on();
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_vfsub_mkdir_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+	int mode;
+};
+
+static void au_call_vfsub_mkdir(void *args)
+{
+	struct au_vfsub_mkdir_args *a = args;
+	*a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
+}
+
+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
+{
+	int err, do_sio, wkq_err;
+
+	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
+	if (!do_sio) {
+		lockdep_off();
+		err = vfsub_mkdir(dir, path, mode);
+		lockdep_on();
+	} else {
+		struct au_vfsub_mkdir_args args = {
+			.errp	= &err,
+			.dir	= dir,
+			.path	= path,
+			.mode	= mode
+		};
+		wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+
+struct au_vfsub_rmdir_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+};
+
+static void au_call_vfsub_rmdir(void *args)
+{
+	struct au_vfsub_rmdir_args *a = args;
+	*a->errp = vfsub_rmdir(a->dir, a->path);
+}
+
+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
+{
+	int err, do_sio, wkq_err;
+
+	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
+	if (!do_sio) {
+		lockdep_off();
+		err = vfsub_rmdir(dir, path);
+		lockdep_on();
+	} else {
+		struct au_vfsub_rmdir_args args = {
+			.errp	= &err,
+			.dir	= dir,
+			.path	= path
+		};
+		wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct notify_change_args {
+	int *errp;
+	struct path *path;
+	struct iattr *ia;
+	struct inode **delegated_inode;
+};
+
+static void call_notify_change(void *args)
+{
+	struct notify_change_args *a = args;
+	struct inode *h_inode;
+
+	h_inode = d_inode(a->path->dentry);
+	IMustLock(h_inode);
+
+	*a->errp = -EPERM;
+	if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
+		lockdep_off();
+		*a->errp = notify_change(a->path->dentry, a->ia,
+					 a->delegated_inode);
+		lockdep_on();
+		if (!*a->errp)
+			vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
+	}
+	AuTraceErr(*a->errp);
+}
+
+int vfsub_notify_change(struct path *path, struct iattr *ia,
+			struct inode **delegated_inode)
+{
+	int err;
+	struct notify_change_args args = {
+		.errp			= &err,
+		.path			= path,
+		.ia			= ia,
+		.delegated_inode	= delegated_inode
+	};
+
+	call_notify_change(&args);
+
+	return err;
+}
+
+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
+			    struct inode **delegated_inode)
+{
+	int err, wkq_err;
+	struct notify_change_args args = {
+		.errp			= &err,
+		.path			= path,
+		.ia			= ia,
+		.delegated_inode	= delegated_inode
+	};
+
+	wkq_err = au_wkq_wait(call_notify_change, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct unlink_args {
+	int *errp;
+	struct inode *dir;
+	struct path *path;
+	struct inode **delegated_inode;
+};
+
+static void call_unlink(void *args)
+{
+	struct unlink_args *a = args;
+	struct dentry *d = a->path->dentry;
+	struct inode *h_inode;
+	const int stop_sillyrename = (au_test_nfs(d->d_sb)
+				      && au_dcount(d) == 1);
+
+	IMustLock(a->dir);
+
+	a->path->dentry = d->d_parent;
+	*a->errp = security_path_unlink(a->path, d);
+	a->path->dentry = d;
+	if (unlikely(*a->errp))
+		return;
+
+	if (!stop_sillyrename)
+		dget(d);
+	h_inode = NULL;
+	if (d_is_positive(d)) {
+		h_inode = d_inode(d);
+		ihold(h_inode);
+	}
+
+	lockdep_off();
+	*a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
+	lockdep_on();
+	if (!*a->errp) {
+		struct path tmp = {
+			.dentry = d->d_parent,
+			.mnt	= a->path->mnt
+		};
+		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
+	}
+
+	if (!stop_sillyrename)
+		dput(d);
+	if (h_inode)
+		iput(h_inode);
+
+	AuTraceErr(*a->errp);
+}
+
+/*
+ * @dir: must be locked.
+ * @dentry: target dentry.
+ */
+int vfsub_unlink(struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, int force)
+{
+	int err;
+	struct unlink_args args = {
+		.errp			= &err,
+		.dir			= dir,
+		.path			= path,
+		.delegated_inode	= delegated_inode
+	};
+
+	if (!force)
+		call_unlink(&args);
+	else {
+		int wkq_err;
+
+		wkq_err = au_wkq_wait(call_unlink, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+
+	return err;
+}
diff --git a/include/fs/aufs/vfsub.h b/include/fs/aufs/vfsub.h
new file mode 100644
index 000000000..9dd6cb218
--- /dev/null
+++ b/include/fs/aufs/vfsub.h
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * sub-routines for VFS
+ */
+
+#ifndef __AUFS_VFSUB_H__
+#define __AUFS_VFSUB_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/posix_acl.h>
+#include <linux/xattr.h>
+#include "debug.h"
+
+/* copied from linux/fs/internal.h */
+/* todo: BAD approach!! */
+extern void __mnt_drop_write(struct vfsmount *);
+extern int open_check_o_direct(struct file *f);
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for lower inode */
+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
+/* reduce? gave up. */
+enum {
+	AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
+	AuLsc_I_PARENT,		/* lower inode, parent first */
+	AuLsc_I_PARENT2,	/* copyup dirs */
+	AuLsc_I_PARENT3,	/* copyup wh */
+	AuLsc_I_CHILD,
+	AuLsc_I_CHILD2,
+	AuLsc_I_End
+};
+
+/* to debug easier, do not make them inlined functions */
+#define MtxMustLock(mtx)	AuDebugOn(!mutex_is_locked(mtx))
+#define IMustLock(i)		AuDebugOn(!inode_is_locked(i))
+
+/* why VFS doesn't define it? */
+static inline
+void vfsub_inode_lock_shared_nested(struct inode *inode, unsigned int sc)
+{
+	down_read_nested(&inode->i_rwsem, sc);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline void vfsub_drop_nlink(struct inode *inode)
+{
+	AuDebugOn(!inode->i_nlink);
+	drop_nlink(inode);
+}
+
+static inline void vfsub_dead_dir(struct inode *inode)
+{
+	AuDebugOn(!S_ISDIR(inode->i_mode));
+	inode->i_flags |= S_DEAD;
+	clear_nlink(inode);
+}
+
+static inline int vfsub_native_ro(struct inode *inode)
+{
+	return sb_rdonly(inode->i_sb)
+		|| IS_RDONLY(inode)
+		/* || IS_APPEND(inode) */
+		|| IS_IMMUTABLE(inode);
+}
+
+#ifdef CONFIG_AUFS_BR_FUSE
+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
+#else
+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
+#endif
+
+int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_update_h_iattr(struct path *h_path, int *did);
+struct file *vfsub_dentry_open(struct path *path, int flags);
+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
+struct vfsub_aopen_args {
+	struct file	*file;
+	unsigned int	open_flag;
+	umode_t		create_mode;
+	int		*opened;
+};
+struct au_branch;
+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
+		      struct vfsub_aopen_args *args, struct au_branch *br);
+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
+
+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
+					     struct dentry *parent, int len);
+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
+				    int len);
+
+struct vfsub_lkup_one_args {
+	struct dentry **errp;
+	struct qstr *name;
+	struct dentry *parent;
+};
+
+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
+					    struct dentry *parent)
+{
+	return vfsub_lookup_one_len(name->name, parent, name->len);
+}
+
+void vfsub_call_lkup_one(void *args);
+
+/* ---------------------------------------------------------------------- */
+
+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
+{
+	int err;
+
+	lockdep_off();
+	err = mnt_want_write(mnt);
+	lockdep_on();
+	return err;
+}
+
+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
+{
+	lockdep_off();
+	mnt_drop_write(mnt);
+	lockdep_on();
+}
+
+#if 0 /* reserved */
+static inline void vfsub_mnt_drop_write_file(struct file *file)
+{
+	lockdep_off();
+	mnt_drop_write_file(file);
+	lockdep_on();
+}
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+struct au_hinode;
+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
+				 struct dentry *d2, struct au_hinode *hdir2);
+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
+			 struct dentry *d2, struct au_hinode *hdir2);
+
+int vfsub_create(struct inode *dir, struct path *path, int mode,
+		 bool want_excl);
+int vfsub_symlink(struct inode *dir, struct path *path,
+		  const char *symname);
+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
+	       struct path *path, struct inode **delegated_inode);
+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
+		 struct inode *hdir, struct path *path,
+		 struct inode **delegated_inode, unsigned int flags);
+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
+int vfsub_rmdir(struct inode *dir, struct path *path);
+
+/* ---------------------------------------------------------------------- */
+
+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
+		     loff_t *ppos);
+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
+			loff_t *ppos);
+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
+		      loff_t *ppos);
+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
+		      loff_t *ppos);
+int vfsub_flush(struct file *file, fl_owner_t id);
+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
+
+static inline loff_t vfsub_f_size_read(struct file *file)
+{
+	return i_size_read(file_inode(file));
+}
+
+static inline unsigned int vfsub_file_flags(struct file *file)
+{
+	unsigned int flags;
+
+	spin_lock(&file->f_lock);
+	flags = file->f_flags;
+	spin_unlock(&file->f_lock);
+
+	return flags;
+}
+
+static inline int vfsub_file_execed(struct file *file)
+{
+	/* todo: direct access f_flags */
+	return !!(vfsub_file_flags(file) & __FMODE_EXEC);
+}
+
+#if 0 /* reserved */
+static inline void vfsub_file_accessed(struct file *h_file)
+{
+	file_accessed(h_file);
+	vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
+}
+#endif
+
+#if 0 /* reserved */
+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
+				     struct dentry *h_dentry)
+{
+	struct path h_path = {
+		.dentry	= h_dentry,
+		.mnt	= h_mnt
+	};
+	touch_atime(&h_path);
+	vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
+}
+#endif
+
+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
+				    int flags)
+{
+	return update_time(h_inode, ts, flags);
+	/* no vfsub_update_h_iattr() since we don't have struct path */
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
+{
+	int err;
+
+	err = posix_acl_chmod(h_inode, h_mode);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+#else
+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
+#endif
+
+long vfsub_splice_to(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags);
+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
+		       loff_t *ppos, size_t len, unsigned int flags);
+
+static inline long vfsub_truncate(struct path *path, loff_t length)
+{
+	long err;
+
+	lockdep_off();
+	err = vfs_truncate(path, length);
+	lockdep_on();
+	return err;
+}
+
+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
+		struct file *h_file);
+int vfsub_fsync(struct file *file, struct path *path, int datasync);
+
+/*
+ * re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
+ * ioctl.
+ */
+static inline int vfsub_clone_file_range(struct file *src, struct file *dst,
+					 u64 len)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_clone_file_range(src, 0, dst, 0, len);
+	lockdep_on();
+
+	return err;
+}
+
+/* copy_file_range(2) is a systemcall */
+static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
+					    struct file *dst, loff_t dst_pos,
+					    size_t len, unsigned int flags)
+{
+	ssize_t ssz;
+
+	lockdep_off();
+	ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
+	lockdep_on();
+
+	return ssz;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t err;
+
+	lockdep_off();
+	err = vfs_llseek(file, offset, origin);
+	lockdep_on();
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
+			    struct inode **delegated_inode);
+int vfsub_notify_change(struct path *path, struct iattr *ia,
+			struct inode **delegated_inode);
+int vfsub_unlink(struct inode *dir, struct path *path,
+		 struct inode **delegated_inode, int force);
+
+static inline int vfsub_getattr(const struct path *path, struct kstat *st)
+{
+	return vfs_getattr(path, st, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
+				 const void *value, size_t size, int flags)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_setxattr(dentry, name, value, size, flags);
+	lockdep_on();
+
+	return err;
+}
+
+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+
+	lockdep_off();
+	err = vfs_removexattr(dentry, name);
+	lockdep_on();
+
+	return err;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_VFSUB_H__ */
diff --git a/include/fs/aufs/wbr_policy.c b/include/fs/aufs/wbr_policy.c
new file mode 100644
index 000000000..1d365a2e7
--- /dev/null
+++ b/include/fs/aufs/wbr_policy.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * policies for selecting one among multiple writable branches
+ */
+
+#include <linux/statfs.h>
+#include "aufs.h"
+
+/* subset of cpup_attr() */
+static noinline_for_stack
+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
+{
+	int err, sbits;
+	struct iattr ia;
+	struct inode *h_isrc;
+
+	h_isrc = d_inode(h_src);
+	ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
+	ia.ia_mode = h_isrc->i_mode;
+	ia.ia_uid = h_isrc->i_uid;
+	ia.ia_gid = h_isrc->i_gid;
+	sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
+	au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
+	/* no delegation since it is just created */
+	err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
+
+	/* is this nfs only? */
+	if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
+		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
+		ia.ia_mode = h_isrc->i_mode;
+		err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
+	}
+
+	return err;
+}
+
+#define AuCpdown_PARENT_OPQ	1
+#define AuCpdown_WHED		(1 << 1)
+#define AuCpdown_MADE_DIR	(1 << 2)
+#define AuCpdown_DIROPQ		(1 << 3)
+#define au_ftest_cpdown(flags, name)	((flags) & AuCpdown_##name)
+#define au_fset_cpdown(flags, name) \
+	do { (flags) |= AuCpdown_##name; } while (0)
+#define au_fclr_cpdown(flags, name) \
+	do { (flags) &= ~AuCpdown_##name; } while (0)
+
+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
+			     unsigned int *flags)
+{
+	int err;
+	struct dentry *opq_dentry;
+
+	opq_dentry = au_diropq_create(dentry, bdst);
+	err = PTR_ERR(opq_dentry);
+	if (IS_ERR(opq_dentry))
+		goto out;
+	dput(opq_dentry);
+	au_fset_cpdown(*flags, DIROPQ);
+
+out:
+	return err;
+}
+
+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
+			    struct inode *dir, aufs_bindex_t bdst)
+{
+	int err;
+	struct path h_path;
+	struct au_branch *br;
+
+	br = au_sbr(dentry->d_sb, bdst);
+	h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	err = 0;
+	if (d_is_positive(h_path.dentry)) {
+		h_path.mnt = au_br_mnt(br);
+		err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
+					  dentry);
+	}
+	dput(h_path.dentry);
+
+out:
+	return err;
+}
+
+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
+			 struct au_pin *pin,
+			 struct dentry *h_parent, void *arg)
+{
+	int err, rerr;
+	aufs_bindex_t bopq, btop;
+	struct path h_path;
+	struct dentry *parent;
+	struct inode *h_dir, *h_inode, *inode, *dir;
+	unsigned int *flags = arg;
+
+	btop = au_dbtop(dentry);
+	/* dentry is di-locked */
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
+	h_dir = d_inode(h_parent);
+	AuDebugOn(h_dir != au_h_iptr(dir, bdst));
+	IMustLock(h_dir);
+
+	err = au_lkup_neg(dentry, bdst, /*wh*/0);
+	if (unlikely(err < 0))
+		goto out;
+	h_path.dentry = au_h_dptr(dentry, bdst);
+	h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
+	err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
+			      S_IRWXU | S_IRUGO | S_IXUGO);
+	if (unlikely(err))
+		goto out_put;
+	au_fset_cpdown(*flags, MADE_DIR);
+
+	bopq = au_dbdiropq(dentry);
+	au_fclr_cpdown(*flags, WHED);
+	au_fclr_cpdown(*flags, DIROPQ);
+	if (au_dbwh(dentry) == bdst)
+		au_fset_cpdown(*flags, WHED);
+	if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
+		au_fset_cpdown(*flags, PARENT_OPQ);
+	h_inode = d_inode(h_path.dentry);
+	inode_lock_nested(h_inode, AuLsc_I_CHILD);
+	if (au_ftest_cpdown(*flags, WHED)) {
+		err = au_cpdown_dir_opq(dentry, bdst, flags);
+		if (unlikely(err)) {
+			inode_unlock(h_inode);
+			goto out_dir;
+		}
+	}
+
+	err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
+	inode_unlock(h_inode);
+	if (unlikely(err))
+		goto out_opq;
+
+	if (au_ftest_cpdown(*flags, WHED)) {
+		err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
+		if (unlikely(err))
+			goto out_opq;
+	}
+
+	inode = d_inode(dentry);
+	if (au_ibbot(inode) < bdst)
+		au_set_ibbot(inode, bdst);
+	au_set_h_iptr(inode, bdst, au_igrab(h_inode),
+		      au_hi_flags(inode, /*isdir*/1));
+	au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
+	goto out; /* success */
+
+	/* revert */
+out_opq:
+	if (au_ftest_cpdown(*flags, DIROPQ)) {
+		inode_lock_nested(h_inode, AuLsc_I_CHILD);
+		rerr = au_diropq_remove(dentry, bdst);
+		inode_unlock(h_inode);
+		if (unlikely(rerr)) {
+			AuIOErr("failed removing diropq for %pd b%d (%d)\n",
+				dentry, bdst, rerr);
+			err = -EIO;
+			goto out;
+		}
+	}
+out_dir:
+	if (au_ftest_cpdown(*flags, MADE_DIR)) {
+		rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
+		if (unlikely(rerr)) {
+			AuIOErr("failed removing %pd b%d (%d)\n",
+				dentry, bdst, rerr);
+			err = -EIO;
+		}
+	}
+out_put:
+	au_set_h_dptr(dentry, bdst, NULL);
+	if (au_dbbot(dentry) == bdst)
+		au_update_dbbot(dentry);
+out:
+	dput(parent);
+	return err;
+}
+
+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
+{
+	int err;
+	unsigned int flags;
+
+	flags = 0;
+	err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies for create */
+
+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	int err, i, j, ndentry;
+	aufs_bindex_t bopq;
+	struct au_dcsub_pages dpages;
+	struct au_dpage *dpage;
+	struct dentry **dentries, *parent, *d;
+
+	err = au_dpages_init(&dpages, GFP_NOFS);
+	if (unlikely(err))
+		goto out;
+	parent = dget_parent(dentry);
+	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
+	if (unlikely(err))
+		goto out_free;
+
+	err = bindex;
+	for (i = 0; i < dpages.ndpage; i++) {
+		dpage = dpages.dpages + i;
+		dentries = dpage->dentries;
+		ndentry = dpage->ndentry;
+		for (j = 0; j < ndentry; j++) {
+			d = dentries[j];
+			di_read_lock_parent2(d, !AuLock_IR);
+			bopq = au_dbdiropq(d);
+			di_read_unlock(d, !AuLock_IR);
+			if (bopq >= 0 && bopq < err)
+				err = bopq;
+		}
+	}
+
+out_free:
+	dput(parent);
+	au_dpages_free(&dpages);
+out:
+	return err;
+}
+
+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
+{
+	for (; bindex >= 0; bindex--)
+		if (!au_br_rdonly(au_sbr(sb, bindex)))
+			return bindex;
+	return -EROFS;
+}
+
+/* top down parent */
+static int au_wbr_create_tdp(struct dentry *dentry,
+			     unsigned int flags __maybe_unused)
+{
+	int err;
+	aufs_bindex_t btop, bindex;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent;
+
+	sb = dentry->d_sb;
+	btop = au_dbtop(dentry);
+	err = btop;
+	if (!au_br_rdonly(au_sbr(sb, btop)))
+		goto out;
+
+	err = -EROFS;
+	parent = dget_parent(dentry);
+	for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		if (!au_br_rdonly(au_sbr(sb, bindex))) {
+			err = bindex;
+			break;
+		}
+	}
+	dput(parent);
+
+	/* bottom up here */
+	if (unlikely(err < 0)) {
+		err = au_wbr_bu(sb, btop - 1);
+		if (err >= 0)
+			err = au_wbr_nonopq(dentry, err);
+	}
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* an exception for the policy other than tdp */
+static int au_wbr_create_exp(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bwh, bdiropq;
+	struct dentry *parent;
+
+	err = -1;
+	bwh = au_dbwh(dentry);
+	parent = dget_parent(dentry);
+	bdiropq = au_dbdiropq(parent);
+	if (bwh >= 0) {
+		if (bdiropq >= 0)
+			err = min(bdiropq, bwh);
+		else
+			err = bwh;
+		AuDbg("%d\n", err);
+	} else if (bdiropq >= 0) {
+		err = bdiropq;
+		AuDbg("%d\n", err);
+	}
+	dput(parent);
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+	if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
+		err = -1;
+
+	AuDbg("%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* round robin */
+static int au_wbr_create_init_rr(struct super_block *sb)
+{
+	int err;
+
+	err = au_wbr_bu(sb, au_sbbot(sb));
+	atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
+	/* smp_mb(); */
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
+{
+	int err, nbr;
+	unsigned int u;
+	aufs_bindex_t bindex, bbot;
+	struct super_block *sb;
+	atomic_t *next;
+
+	err = au_wbr_create_exp(dentry);
+	if (err >= 0)
+		goto out;
+
+	sb = dentry->d_sb;
+	next = &au_sbi(sb)->si_wbr_rr_next;
+	bbot = au_sbbot(sb);
+	nbr = bbot + 1;
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		if (!au_ftest_wbr(flags, DIR)) {
+			err = atomic_dec_return(next) + 1;
+			/* modulo for 0 is meaningless */
+			if (unlikely(!err))
+				err = atomic_dec_return(next) + 1;
+		} else
+			err = atomic_read(next);
+		AuDbg("%d\n", err);
+		u = err;
+		err = u % nbr;
+		AuDbg("%d\n", err);
+		if (!au_br_rdonly(au_sbr(sb, err)))
+			break;
+		err = -EROFS;
+	}
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out:
+	AuDbg("%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* most free space */
+static void au_mfs(struct dentry *dentry, struct dentry *parent)
+{
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_wbr_mfs *mfs;
+	struct dentry *h_parent;
+	aufs_bindex_t bindex, bbot;
+	int err;
+	unsigned long long b, bavail;
+	struct path h_path;
+	/* reduce the stack usage */
+	struct kstatfs *st;
+
+	st = kmalloc(sizeof(*st), GFP_NOFS);
+	if (unlikely(!st)) {
+		AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
+		return;
+	}
+
+	bavail = 0;
+	sb = dentry->d_sb;
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	MtxMustLock(&mfs->mfs_lock);
+	mfs->mfs_bindex = -EROFS;
+	mfs->mfsrr_bytes = 0;
+	if (!parent) {
+		bindex = 0;
+		bbot = au_sbbot(sb);
+	} else {
+		bindex = au_dbtop(parent);
+		bbot = au_dbtaildir(parent);
+	}
+
+	for (; bindex <= bbot; bindex++) {
+		if (parent) {
+			h_parent = au_h_dptr(parent, bindex);
+			if (!h_parent || d_is_negative(h_parent))
+				continue;
+		}
+		br = au_sbr(sb, bindex);
+		if (au_br_rdonly(br))
+			continue;
+
+		/* sb->s_root for NFS is unreliable */
+		h_path.mnt = au_br_mnt(br);
+		h_path.dentry = h_path.mnt->mnt_root;
+		err = vfs_statfs(&h_path, st);
+		if (unlikely(err)) {
+			AuWarn1("failed statfs, b%d, %d\n", bindex, err);
+			continue;
+		}
+
+		/* when the available size is equal, select the lower one */
+		BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
+			     || sizeof(b) < sizeof(st->f_bsize));
+		b = st->f_bavail * st->f_bsize;
+		br->br_wbr->wbr_bytes = b;
+		if (b >= bavail) {
+			bavail = b;
+			mfs->mfs_bindex = bindex;
+			mfs->mfs_jiffy = jiffies;
+		}
+	}
+
+	mfs->mfsrr_bytes = bavail;
+	AuDbg("b%d\n", mfs->mfs_bindex);
+	kfree(st);
+}
+
+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	struct dentry *parent;
+	struct super_block *sb;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_exp(dentry);
+	if (err >= 0)
+		goto out;
+
+	sb = dentry->d_sb;
+	parent = NULL;
+	if (au_ftest_wbr(flags, PARENT))
+		parent = dget_parent(dentry);
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
+	    || mfs->mfs_bindex < 0
+	    || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
+		au_mfs(dentry, parent);
+	mutex_unlock(&mfs->mfs_lock);
+	err = mfs->mfs_bindex;
+	dput(parent);
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_init_mfs(struct super_block *sb)
+{
+	struct au_wbr_mfs *mfs;
+
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_init(&mfs->mfs_lock);
+	mfs->mfs_jiffy = 0;
+	mfs->mfs_bindex = -EROFS;
+
+	return 0;
+}
+
+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
+{
+	mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* top down regardless parent, and then mfs */
+static int au_wbr_create_tdmfs(struct dentry *dentry,
+			       unsigned int flags __maybe_unused)
+{
+	int err;
+	aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
+	unsigned long long watermark;
+	struct super_block *sb;
+	struct au_wbr_mfs *mfs;
+	struct au_branch *br;
+	struct dentry *parent;
+
+	sb = dentry->d_sb;
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
+	    || mfs->mfs_bindex < 0)
+		au_mfs(dentry, /*parent*/NULL);
+	watermark = mfs->mfsrr_watermark;
+	bmfs = mfs->mfs_bindex;
+	mutex_unlock(&mfs->mfs_lock);
+
+	/* another style of au_wbr_create_exp() */
+	bwh = au_dbwh(dentry);
+	parent = dget_parent(dentry);
+	btail = au_dbtaildir(parent);
+	if (bwh >= 0 && bwh < btail)
+		btail = bwh;
+
+	err = au_wbr_nonopq(dentry, btail);
+	if (unlikely(err < 0))
+		goto out;
+	btail = err;
+	bfound = -1;
+	for (bindex = 0; bindex <= btail; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_rdonly(br))
+			continue;
+		if (br->br_wbr->wbr_bytes > watermark) {
+			bfound = bindex;
+			break;
+		}
+	}
+	err = bfound;
+	if (err < 0)
+		err = bmfs;
+
+out:
+	dput(parent);
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* most free space and then round robin */
+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_mfs(dentry, flags);
+	if (err >= 0) {
+		mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
+		mutex_lock(&mfs->mfs_lock);
+		if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
+			err = au_wbr_create_rr(dentry, flags);
+		mutex_unlock(&mfs->mfs_lock);
+	}
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_create_init_mfsrr(struct super_block *sb)
+{
+	int err;
+
+	au_wbr_create_init_mfs(sb); /* ignore */
+	err = au_wbr_create_init_rr(sb);
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* top down parent and most free space */
+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
+{
+	int err, e2;
+	unsigned long long b;
+	aufs_bindex_t bindex, btop, bbot;
+	struct super_block *sb;
+	struct dentry *parent, *h_parent;
+	struct au_branch *br;
+
+	err = au_wbr_create_tdp(dentry, flags);
+	if (unlikely(err < 0))
+		goto out;
+	parent = dget_parent(dentry);
+	btop = au_dbtop(parent);
+	bbot = au_dbtaildir(parent);
+	if (btop == bbot)
+		goto out_parent; /* success */
+
+	e2 = au_wbr_create_mfs(dentry, flags);
+	if (e2 < 0)
+		goto out_parent; /* success */
+
+	/* when the available size is equal, select upper one */
+	sb = dentry->d_sb;
+	br = au_sbr(sb, err);
+	b = br->br_wbr->wbr_bytes;
+	AuDbg("b%d, %llu\n", err, b);
+
+	for (bindex = btop; bindex <= bbot; bindex++) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		br = au_sbr(sb, bindex);
+		if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
+			b = br->br_wbr->wbr_bytes;
+			err = bindex;
+			AuDbg("b%d, %llu\n", err, b);
+		}
+	}
+
+	if (err >= 0)
+		err = au_wbr_nonopq(dentry, err);
+
+out_parent:
+	dput(parent);
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * - top down parent
+ * - most free space with parent
+ * - most free space round-robin regardless parent
+ */
+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	unsigned long long watermark;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct au_wbr_mfs *mfs;
+
+	err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
+	if (unlikely(err < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, err);
+	mfs = &au_sbi(sb)->si_wbr_mfs;
+	mutex_lock(&mfs->mfs_lock);
+	watermark = mfs->mfsrr_watermark;
+	mutex_unlock(&mfs->mfs_lock);
+	if (br->br_wbr->wbr_bytes < watermark)
+		/* regardless the parent dir */
+		err = au_wbr_create_mfsrr(dentry, flags);
+
+out:
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* policies for copyup */
+
+/* top down parent */
+static int au_wbr_copyup_tdp(struct dentry *dentry)
+{
+	return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
+}
+
+/* bottom up parent */
+static int au_wbr_copyup_bup(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bindex, btop;
+	struct dentry *parent, *h_parent;
+	struct super_block *sb;
+
+	err = -EROFS;
+	sb = dentry->d_sb;
+	parent = dget_parent(dentry);
+	btop = au_dbtop(parent);
+	for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
+		h_parent = au_h_dptr(parent, bindex);
+		if (!h_parent || d_is_negative(h_parent))
+			continue;
+
+		if (!au_br_rdonly(au_sbr(sb, bindex))) {
+			err = bindex;
+			break;
+		}
+	}
+	dput(parent);
+
+	/* bottom up here */
+	if (unlikely(err < 0))
+		err = au_wbr_bu(sb, btop - 1);
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+/* bottom up */
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
+{
+	int err;
+
+	err = au_wbr_bu(dentry->d_sb, btop);
+	AuDbg("b%d\n", err);
+	if (err > btop)
+		err = au_wbr_nonopq(dentry, err);
+
+	AuDbg("b%d\n", err);
+	return err;
+}
+
+static int au_wbr_copyup_bu(struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t btop;
+
+	btop = au_dbtop(dentry);
+	err = au_wbr_do_copyup_bu(dentry, btop);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
+	[AuWbrCopyup_TDP] = {
+		.copyup	= au_wbr_copyup_tdp
+	},
+	[AuWbrCopyup_BUP] = {
+		.copyup	= au_wbr_copyup_bup
+	},
+	[AuWbrCopyup_BU] = {
+		.copyup	= au_wbr_copyup_bu
+	}
+};
+
+struct au_wbr_create_operations au_wbr_create_ops[] = {
+	[AuWbrCreate_TDP] = {
+		.create	= au_wbr_create_tdp
+	},
+	[AuWbrCreate_RR] = {
+		.create	= au_wbr_create_rr,
+		.init	= au_wbr_create_init_rr
+	},
+	[AuWbrCreate_MFS] = {
+		.create	= au_wbr_create_mfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSV] = {
+		.create	= au_wbr_create_mfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSRR] = {
+		.create	= au_wbr_create_mfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_MFSRRV] = {
+		.create	= au_wbr_create_mfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_TDMFS] = {
+		.create	= au_wbr_create_tdmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_TDMFSV] = {
+		.create	= au_wbr_create_tdmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFS] = {
+		.create	= au_wbr_create_pmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSV] = {
+		.create	= au_wbr_create_pmfs,
+		.init	= au_wbr_create_init_mfs,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSRR] = {
+		.create	= au_wbr_create_pmfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	},
+	[AuWbrCreate_PMFSRRV] = {
+		.create	= au_wbr_create_pmfsrr,
+		.init	= au_wbr_create_init_mfsrr,
+		.fin	= au_wbr_create_fin_mfs
+	}
+};
diff --git a/include/fs/aufs/whout.c b/include/fs/aufs/whout.c
new file mode 100644
index 000000000..c5cf34e2b
--- /dev/null
+++ b/include/fs/aufs/whout.c
@@ -0,0 +1,1061 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * whiteout for logical deletion and opaque directory
+ */
+
+#include "aufs.h"
+
+#define WH_MASK			S_IRUGO
+
+/*
+ * If a directory contains this file, then it is opaque.  We start with the
+ * .wh. flag so that it is blocked by lookup.
+ */
+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
+					   sizeof(AUFS_WH_DIROPQ) - 1);
+
+/*
+ * generate whiteout name, which is NOT terminated by NULL.
+ * @name: original d_name.name
+ * @len: original d_name.len
+ * @wh: whiteout qstr
+ * returns zero when succeeds, otherwise error.
+ * succeeded value as wh->name should be freed by kfree().
+ */
+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
+{
+	char *p;
+
+	if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
+		return -ENAMETOOLONG;
+
+	wh->len = name->len + AUFS_WH_PFX_LEN;
+	p = kmalloc(wh->len, GFP_NOFS);
+	wh->name = p;
+	if (p) {
+		memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+		memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
+		/* smp_mb(); */
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * test if the @wh_name exists under @h_parent.
+ * @try_sio specifies the necessary of super-io.
+ */
+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
+{
+	int err;
+	struct dentry *wh_dentry;
+
+	if (!try_sio)
+		wh_dentry = vfsub_lkup_one(wh_name, h_parent);
+	else
+		wh_dentry = au_sio_lkup_one(wh_name, h_parent);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry)) {
+		if (err == -ENAMETOOLONG)
+			err = 0;
+		goto out;
+	}
+
+	err = 0;
+	if (d_is_negative(wh_dentry))
+		goto out_wh; /* success */
+
+	err = 1;
+	if (d_is_reg(wh_dentry))
+		goto out_wh; /* success */
+
+	err = -EIO;
+	AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
+		wh_dentry, d_inode(wh_dentry)->i_mode);
+
+out_wh:
+	dput(wh_dentry);
+out:
+	return err;
+}
+
+/*
+ * test if the @h_dentry sets opaque or not.
+ */
+int au_diropq_test(struct dentry *h_dentry)
+{
+	int err;
+	struct inode *h_dir;
+
+	h_dir = d_inode(h_dentry);
+	err = au_wh_test(h_dentry, &diropq_name,
+			 au_test_h_perm_sio(h_dir, MAY_EXEC));
+	return err;
+}
+
+/*
+ * returns a negative dentry whose name is unique and temporary.
+ */
+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
+			     struct qstr *prefix)
+{
+	struct dentry *dentry;
+	int i;
+	char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
+		*name, *p;
+	/* strict atomic_t is unnecessary here */
+	static unsigned short cnt;
+	struct qstr qs;
+
+	BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
+
+	name = defname;
+	qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
+	if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
+		dentry = ERR_PTR(-ENAMETOOLONG);
+		if (unlikely(qs.len > NAME_MAX))
+			goto out;
+		dentry = ERR_PTR(-ENOMEM);
+		name = kmalloc(qs.len + 1, GFP_NOFS);
+		if (unlikely(!name))
+			goto out;
+	}
+
+	/* doubly whiteout-ed */
+	memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
+	p = name + AUFS_WH_PFX_LEN * 2;
+	memcpy(p, prefix->name, prefix->len);
+	p += prefix->len;
+	*p++ = '.';
+	AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
+
+	qs.name = name;
+	for (i = 0; i < 3; i++) {
+		sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
+		dentry = au_sio_lkup_one(&qs, h_parent);
+		if (IS_ERR(dentry) || d_is_negative(dentry))
+			goto out_name;
+		dput(dentry);
+	}
+	/* pr_warn("could not get random name\n"); */
+	dentry = ERR_PTR(-EEXIST);
+	AuDbg("%.*s\n", AuLNPair(&qs));
+	BUG();
+
+out_name:
+	if (name != defname)
+		kfree(name);
+out:
+	AuTraceErrPtr(dentry);
+	return dentry;
+}
+
+/*
+ * rename the @h_dentry on @br to the whiteouted temporary name.
+ */
+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+	struct inode *h_dir, *delegated;
+	struct dentry *h_parent;
+
+	h_parent = h_dentry->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
+	err = PTR_ERR(h_path.dentry);
+	if (IS_ERR(h_path.dentry))
+		goto out;
+
+	/* under the same dir, no need to lock_rename() */
+	delegated = NULL;
+	err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
+			   /*flags*/0);
+	AuTraceErr(err);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal rename\n");
+		iput(delegated);
+	}
+	dput(h_path.dentry);
+
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * functions for removing a whiteout
+ */
+
+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
+{
+	int err, force;
+	struct inode *delegated;
+
+	/*
+	 * forces superio when the dir has a sticky bit.
+	 * this may be a violation of unix fs semantics.
+	 */
+	force = (h_dir->i_mode & S_ISVTX)
+		&& !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
+	delegated = NULL;
+	err = vfsub_unlink(h_dir, h_path, &delegated, force);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	return err;
+}
+
+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
+			struct dentry *dentry)
+{
+	int err;
+
+	err = do_unlink_wh(h_dir, h_path);
+	if (!err && dentry)
+		au_set_dbwh(dentry, -1);
+
+	return err;
+}
+
+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
+			  struct au_branch *br)
+{
+	int err;
+	struct path h_path = {
+		.mnt = au_br_mnt(br)
+	};
+
+	err = 0;
+	h_path.dentry = vfsub_lkup_one(wh, h_parent);
+	if (IS_ERR(h_path.dentry))
+		err = PTR_ERR(h_path.dentry);
+	else {
+		if (d_is_reg(h_path.dentry))
+			err = do_unlink_wh(d_inode(h_parent), &h_path);
+		dput(h_path.dentry);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * initialize/clean whiteout for a branch
+ */
+
+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
+			const int isdir)
+{
+	int err;
+	struct inode *delegated;
+
+	if (d_is_negative(whpath->dentry))
+		return;
+
+	if (isdir)
+		err = vfsub_rmdir(h_dir, whpath);
+	else {
+		delegated = NULL;
+		err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	}
+	if (unlikely(err))
+		pr_warn("failed removing %pd (%d), ignored.\n",
+			whpath->dentry, err);
+}
+
+static int test_linkable(struct dentry *h_root)
+{
+	struct inode *h_dir = d_inode(h_root);
+
+	if (h_dir->i_op->link)
+		return 0;
+
+	pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
+	       h_root, au_sbtype(h_root->d_sb));
+	return -ENOSYS;
+}
+
+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
+static int au_whdir(struct inode *h_dir, struct path *path)
+{
+	int err;
+
+	err = -EEXIST;
+	if (d_is_negative(path->dentry)) {
+		int mode = S_IRWXU;
+
+		if (au_test_nfs(path->dentry->d_sb))
+			mode |= S_IXUGO;
+		err = vfsub_mkdir(h_dir, path, mode);
+	} else if (d_is_dir(path->dentry))
+		err = 0;
+	else
+		pr_err("unknown %pd exists\n", path->dentry);
+
+	return err;
+}
+
+struct au_wh_base {
+	const struct qstr *name;
+	struct dentry *dentry;
+};
+
+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
+			  struct path *h_path)
+{
+	h_path->dentry = base[AuBrWh_BASE].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/0);
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/1);
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/1);
+}
+
+/*
+ * returns tri-state,
+ * minus: error, caller should print the message
+ * zero: succuess
+ * plus: error, caller should NOT print the message
+ */
+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
+				int do_plink, struct au_wh_base base[],
+				struct path *h_path)
+{
+	int err;
+	struct inode *h_dir;
+
+	h_dir = d_inode(h_root);
+	h_path->dentry = base[AuBrWh_BASE].dentry;
+	au_wh_clean(h_dir, h_path, /*isdir*/0);
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	if (do_plink) {
+		err = test_linkable(h_root);
+		if (unlikely(err)) {
+			err = 1;
+			goto out;
+		}
+
+		err = au_whdir(h_dir, h_path);
+		if (unlikely(err))
+			goto out;
+		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
+	} else
+		au_wh_clean(h_dir, h_path, /*isdir*/1);
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	err = au_whdir(h_dir, h_path);
+	if (unlikely(err))
+		goto out;
+	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
+
+out:
+	return err;
+}
+
+/*
+ * for the moment, aufs supports the branch filesystem which does not support
+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
+ * copyup failed. finally, such filesystem will not be used as the writable
+ * branch.
+ *
+ * returns tri-state, see above.
+ */
+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
+			 int do_plink, struct au_wh_base base[],
+			 struct path *h_path)
+{
+	int err;
+	struct inode *h_dir;
+
+	WbrWhMustWriteLock(wbr);
+
+	err = test_linkable(h_root);
+	if (unlikely(err)) {
+		err = 1;
+		goto out;
+	}
+
+	/*
+	 * todo: should this create be done in /sbin/mount.aufs helper?
+	 */
+	err = -EEXIST;
+	h_dir = d_inode(h_root);
+	if (d_is_negative(base[AuBrWh_BASE].dentry)) {
+		h_path->dentry = base[AuBrWh_BASE].dentry;
+		err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
+	} else if (d_is_reg(base[AuBrWh_BASE].dentry))
+		err = 0;
+	else
+		pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
+	if (unlikely(err))
+		goto out;
+
+	h_path->dentry = base[AuBrWh_PLINK].dentry;
+	if (do_plink) {
+		err = au_whdir(h_dir, h_path);
+		if (unlikely(err))
+			goto out;
+		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
+	} else
+		au_wh_clean(h_dir, h_path, /*isdir*/1);
+	wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
+
+	h_path->dentry = base[AuBrWh_ORPH].dentry;
+	err = au_whdir(h_dir, h_path);
+	if (unlikely(err))
+		goto out;
+	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
+
+out:
+	return err;
+}
+
+/*
+ * initialize the whiteout base file/dir for @br.
+ */
+int au_wh_init(struct au_branch *br, struct super_block *sb)
+{
+	int err, i;
+	const unsigned char do_plink
+		= !!au_opt_test(au_mntflags(sb), PLINK);
+	struct inode *h_dir;
+	struct path path = br->br_path;
+	struct dentry *h_root = path.dentry;
+	struct au_wbr *wbr = br->br_wbr;
+	static const struct qstr base_name[] = {
+		[AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
+					  sizeof(AUFS_BASE_NAME) - 1),
+		[AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
+					   sizeof(AUFS_PLINKDIR_NAME) - 1),
+		[AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
+					  sizeof(AUFS_ORPHDIR_NAME) - 1)
+	};
+	struct au_wh_base base[] = {
+		[AuBrWh_BASE] = {
+			.name	= base_name + AuBrWh_BASE,
+			.dentry	= NULL
+		},
+		[AuBrWh_PLINK] = {
+			.name	= base_name + AuBrWh_PLINK,
+			.dentry	= NULL
+		},
+		[AuBrWh_ORPH] = {
+			.name	= base_name + AuBrWh_ORPH,
+			.dentry	= NULL
+		}
+	};
+
+	if (wbr)
+		WbrWhMustWriteLock(wbr);
+
+	for (i = 0; i < AuBrWh_Last; i++) {
+		/* doubly whiteouted */
+		struct dentry *d;
+
+		d = au_wh_lkup(h_root, (void *)base[i].name, br);
+		err = PTR_ERR(d);
+		if (IS_ERR(d))
+			goto out;
+
+		base[i].dentry = d;
+		AuDebugOn(wbr
+			  && wbr->wbr_wh[i]
+			  && wbr->wbr_wh[i] != base[i].dentry);
+	}
+
+	if (wbr)
+		for (i = 0; i < AuBrWh_Last; i++) {
+			dput(wbr->wbr_wh[i]);
+			wbr->wbr_wh[i] = NULL;
+		}
+
+	err = 0;
+	if (!au_br_writable(br->br_perm)) {
+		h_dir = d_inode(h_root);
+		au_wh_init_ro(h_dir, base, &path);
+	} else if (!au_br_wh_linkable(br->br_perm)) {
+		err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
+		if (err > 0)
+			goto out;
+		else if (err)
+			goto out_err;
+	} else {
+		err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
+		if (err > 0)
+			goto out;
+		else if (err)
+			goto out_err;
+	}
+	goto out; /* success */
+
+out_err:
+	pr_err("an error(%d) on the writable branch %pd(%s)\n",
+	       err, h_root, au_sbtype(h_root->d_sb));
+out:
+	for (i = 0; i < AuBrWh_Last; i++)
+		dput(base[i].dentry);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * whiteouts are all hard-linked usually.
+ * when its link count reaches a ceiling, we create a new whiteout base
+ * asynchronously.
+ */
+
+struct reinit_br_wh {
+	struct super_block *sb;
+	struct au_branch *br;
+};
+
+static void reinit_br_wh(void *arg)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct path h_path;
+	struct reinit_br_wh *a = arg;
+	struct au_wbr *wbr;
+	struct inode *dir, *delegated;
+	struct dentry *h_root;
+	struct au_hinode *hdir;
+
+	err = 0;
+	wbr = a->br->br_wbr;
+	/* big aufs lock */
+	si_noflush_write_lock(a->sb);
+	if (!au_br_writable(a->br->br_perm))
+		goto out;
+	bindex = au_br_index(a->sb, a->br->br_id);
+	if (unlikely(bindex < 0))
+		goto out;
+
+	di_read_lock_parent(a->sb->s_root, AuLock_IR);
+	dir = d_inode(a->sb->s_root);
+	hdir = au_hi(dir, bindex);
+	h_root = au_h_dptr(a->sb->s_root, bindex);
+	AuDebugOn(h_root != au_br_dentry(a->br));
+
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	wbr_wh_write_lock(wbr);
+	err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
+			  h_root, a->br);
+	if (!err) {
+		h_path.dentry = wbr->wbr_whbase;
+		h_path.mnt = au_br_mnt(a->br);
+		delegated = NULL;
+		err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
+				   /*force*/0);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal unlink\n");
+			iput(delegated);
+		}
+	} else {
+		pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
+		err = 0;
+	}
+	dput(wbr->wbr_whbase);
+	wbr->wbr_whbase = NULL;
+	if (!err)
+		err = au_wh_init(a->br, a->sb);
+	wbr_wh_write_unlock(wbr);
+	au_hn_inode_unlock(hdir);
+	di_read_unlock(a->sb->s_root, AuLock_IR);
+	if (!err)
+		au_fhsm_wrote(a->sb, bindex, /*force*/0);
+
+out:
+	if (wbr)
+		atomic_dec(&wbr->wbr_wh_running);
+	au_br_put(a->br);
+	si_write_unlock(a->sb);
+	au_nwt_done(&au_sbi(a->sb)->si_nowait);
+	kfree(arg);
+	if (unlikely(err))
+		AuIOErr("err %d\n", err);
+}
+
+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
+{
+	int do_dec, wkq_err;
+	struct reinit_br_wh *arg;
+
+	do_dec = 1;
+	if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
+		goto out;
+
+	/* ignore ENOMEM */
+	arg = kmalloc(sizeof(*arg), GFP_NOFS);
+	if (arg) {
+		/*
+		 * dec(wh_running), kfree(arg) and dec(br_count)
+		 * in reinit function
+		 */
+		arg->sb = sb;
+		arg->br = br;
+		au_br_get(br);
+		wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
+		if (unlikely(wkq_err)) {
+			atomic_dec(&br->br_wbr->wbr_wh_running);
+			au_br_put(br);
+			kfree(arg);
+		}
+		do_dec = 0;
+	}
+
+out:
+	if (do_dec)
+		atomic_dec(&br->br_wbr->wbr_wh_running);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create the whiteout @wh.
+ */
+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
+			     struct dentry *wh)
+{
+	int err;
+	struct path h_path = {
+		.dentry = wh
+	};
+	struct au_branch *br;
+	struct au_wbr *wbr;
+	struct dentry *h_parent;
+	struct inode *h_dir, *delegated;
+
+	h_parent = wh->d_parent; /* dir inode is locked */
+	h_dir = d_inode(h_parent);
+	IMustLock(h_dir);
+
+	br = au_sbr(sb, bindex);
+	h_path.mnt = au_br_mnt(br);
+	wbr = br->br_wbr;
+	wbr_wh_read_lock(wbr);
+	if (wbr->wbr_whbase) {
+		delegated = NULL;
+		err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
+		if (unlikely(err == -EWOULDBLOCK)) {
+			pr_warn("cannot retry for NFSv4 delegation"
+				" for an internal link\n");
+			iput(delegated);
+		}
+		if (!err || err != -EMLINK)
+			goto out;
+
+		/* link count full. re-initialize br_whbase. */
+		kick_reinit_br_wh(sb, br);
+	}
+
+	/* return this error in this context */
+	err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
+	if (!err)
+		au_fhsm_wrote(sb, bindex, /*force*/0);
+
+out:
+	wbr_wh_read_unlock(wbr);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create or remove the diropq.
+ */
+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
+				unsigned int flags)
+{
+	struct dentry *opq_dentry, *h_dentry;
+	struct super_block *sb;
+	struct au_branch *br;
+	int err;
+
+	sb = dentry->d_sb;
+	br = au_sbr(sb, bindex);
+	h_dentry = au_h_dptr(dentry, bindex);
+	opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
+	if (IS_ERR(opq_dentry))
+		goto out;
+
+	if (au_ftest_diropq(flags, CREATE)) {
+		err = link_or_create_wh(sb, bindex, opq_dentry);
+		if (!err) {
+			au_set_dbdiropq(dentry, bindex);
+			goto out; /* success */
+		}
+	} else {
+		struct path tmp = {
+			.dentry = opq_dentry,
+			.mnt	= au_br_mnt(br)
+		};
+		err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
+		if (!err)
+			au_set_dbdiropq(dentry, -1);
+	}
+	dput(opq_dentry);
+	opq_dentry = ERR_PTR(err);
+
+out:
+	return opq_dentry;
+}
+
+struct do_diropq_args {
+	struct dentry **errp;
+	struct dentry *dentry;
+	aufs_bindex_t bindex;
+	unsigned int flags;
+};
+
+static void call_do_diropq(void *args)
+{
+	struct do_diropq_args *a = args;
+	*a->errp = do_diropq(a->dentry, a->bindex, a->flags);
+}
+
+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
+			     unsigned int flags)
+{
+	struct dentry *diropq, *h_dentry;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
+		diropq = do_diropq(dentry, bindex, flags);
+	else {
+		int wkq_err;
+		struct do_diropq_args args = {
+			.errp		= &diropq,
+			.dentry		= dentry,
+			.bindex		= bindex,
+			.flags		= flags
+		};
+
+		wkq_err = au_wkq_wait(call_do_diropq, &args);
+		if (unlikely(wkq_err))
+			diropq = ERR_PTR(wkq_err);
+	}
+
+	return diropq;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * lookup whiteout dentry.
+ * @h_parent: lower parent dentry which must exist and be locked
+ * @base_name: name of dentry which will be whiteouted
+ * returns dentry for whiteout.
+ */
+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
+			  struct au_branch *br)
+{
+	int err;
+	struct qstr wh_name;
+	struct dentry *wh_dentry;
+
+	err = au_wh_name_alloc(&wh_name, base_name);
+	wh_dentry = ERR_PTR(err);
+	if (!err) {
+		wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
+		kfree(wh_name.name);
+	}
+	return wh_dentry;
+}
+
+/*
+ * link/create a whiteout for @dentry on @bindex.
+ */
+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
+			    struct dentry *h_parent)
+{
+	struct dentry *wh_dentry;
+	struct super_block *sb;
+	int err;
+
+	sb = dentry->d_sb;
+	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
+	if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
+		err = link_or_create_wh(sb, bindex, wh_dentry);
+		if (!err) {
+			au_set_dbwh(dentry, bindex);
+			au_fhsm_wrote(sb, bindex, /*force*/0);
+		} else {
+			dput(wh_dentry);
+			wh_dentry = ERR_PTR(err);
+		}
+	}
+
+	return wh_dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* Delete all whiteouts in this directory on branch bindex. */
+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
+			   aufs_bindex_t bindex, struct au_branch *br)
+{
+	int err;
+	unsigned long ul, n;
+	struct qstr wh_name;
+	char *p;
+	struct hlist_head *head;
+	struct au_vdir_wh *pos;
+	struct au_vdir_destr *str;
+
+	err = -ENOMEM;
+	p = (void *)__get_free_page(GFP_NOFS);
+	wh_name.name = p;
+	if (unlikely(!wh_name.name))
+		goto out;
+
+	err = 0;
+	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
+	p += AUFS_WH_PFX_LEN;
+	n = whlist->nh_num;
+	head = whlist->nh_head;
+	for (ul = 0; !err && ul < n; ul++, head++) {
+		hlist_for_each_entry(pos, head, wh_hash) {
+			if (pos->wh_bindex != bindex)
+				continue;
+
+			str = &pos->wh_str;
+			if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
+				memcpy(p, str->name, str->len);
+				wh_name.len = AUFS_WH_PFX_LEN + str->len;
+				err = unlink_wh_name(h_dentry, &wh_name, br);
+				if (!err)
+					continue;
+				break;
+			}
+			AuIOErr("whiteout name too long %.*s\n",
+				str->len, str->name);
+			err = -EIO;
+			break;
+		}
+	}
+	free_page((unsigned long)wh_name.name);
+
+out:
+	return err;
+}
+
+struct del_wh_children_args {
+	int *errp;
+	struct dentry *h_dentry;
+	struct au_nhash *whlist;
+	aufs_bindex_t bindex;
+	struct au_branch *br;
+};
+
+static void call_del_wh_children(void *args)
+{
+	struct del_wh_children_args *a = args;
+	*a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
+{
+	struct au_whtmp_rmdir *whtmp;
+	int err;
+	unsigned int rdhash;
+
+	SiMustAnyLock(sb);
+
+	whtmp = kzalloc(sizeof(*whtmp), gfp);
+	if (unlikely(!whtmp)) {
+		whtmp = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	/* no estimation for dir size */
+	rdhash = au_sbi(sb)->si_rdhash;
+	if (!rdhash)
+		rdhash = AUFS_RDHASH_DEF;
+	err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
+	if (unlikely(err)) {
+		kfree(whtmp);
+		whtmp = ERR_PTR(err);
+	}
+
+out:
+	return whtmp;
+}
+
+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
+{
+	if (whtmp->br)
+		au_br_put(whtmp->br);
+	dput(whtmp->wh_dentry);
+	iput(whtmp->dir);
+	au_nhash_wh_free(&whtmp->whlist);
+	kfree(whtmp);
+}
+
+/*
+ * rmdir the whiteouted temporary named dir @h_dentry.
+ * @whlist: whiteouted children.
+ */
+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
+		   struct dentry *wh_dentry, struct au_nhash *whlist)
+{
+	int err;
+	unsigned int h_nlink;
+	struct path h_tmp;
+	struct inode *wh_inode, *h_dir;
+	struct au_branch *br;
+
+	h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
+	IMustLock(h_dir);
+
+	br = au_sbr(dir->i_sb, bindex);
+	wh_inode = d_inode(wh_dentry);
+	inode_lock_nested(wh_inode, AuLsc_I_CHILD);
+
+	/*
+	 * someone else might change some whiteouts while we were sleeping.
+	 * it means this whlist may have an obsoleted entry.
+	 */
+	if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
+		err = del_wh_children(wh_dentry, whlist, bindex, br);
+	else {
+		int wkq_err;
+		struct del_wh_children_args args = {
+			.errp		= &err,
+			.h_dentry	= wh_dentry,
+			.whlist		= whlist,
+			.bindex		= bindex,
+			.br		= br
+		};
+
+		wkq_err = au_wkq_wait(call_del_wh_children, &args);
+		if (unlikely(wkq_err))
+			err = wkq_err;
+	}
+	inode_unlock(wh_inode);
+
+	if (!err) {
+		h_tmp.dentry = wh_dentry;
+		h_tmp.mnt = au_br_mnt(br);
+		h_nlink = h_dir->i_nlink;
+		err = vfsub_rmdir(h_dir, &h_tmp);
+		/* some fs doesn't change the parent nlink in some cases */
+		h_nlink -= h_dir->i_nlink;
+	}
+
+	if (!err) {
+		if (au_ibtop(dir) == bindex) {
+			/* todo: dir->i_mutex is necessary */
+			au_cpup_attr_timesizes(dir);
+			if (h_nlink)
+				vfsub_drop_nlink(dir);
+		}
+		return 0; /* success */
+	}
+
+	pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
+	return err;
+}
+
+static void call_rmdir_whtmp(void *args)
+{
+	int err;
+	aufs_bindex_t bindex;
+	struct au_whtmp_rmdir *a = args;
+	struct super_block *sb;
+	struct dentry *h_parent;
+	struct inode *h_dir;
+	struct au_hinode *hdir;
+
+	/* rmdir by nfsd may cause deadlock with this i_mutex */
+	/* inode_lock(a->dir); */
+	err = -EROFS;
+	sb = a->dir->i_sb;
+	si_read_lock(sb, !AuLock_FLUSH);
+	if (!au_br_writable(a->br->br_perm))
+		goto out;
+	bindex = au_br_index(sb, a->br->br_id);
+	if (unlikely(bindex < 0))
+		goto out;
+
+	err = -EIO;
+	ii_write_lock_parent(a->dir);
+	h_parent = dget_parent(a->wh_dentry);
+	h_dir = d_inode(h_parent);
+	hdir = au_hi(a->dir, bindex);
+	err = vfsub_mnt_want_write(au_br_mnt(a->br));
+	if (unlikely(err))
+		goto out_mnt;
+	au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
+	err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
+			  a->br);
+	if (!err)
+		err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
+	au_hn_inode_unlock(hdir);
+	vfsub_mnt_drop_write(au_br_mnt(a->br));
+
+out_mnt:
+	dput(h_parent);
+	ii_write_unlock(a->dir);
+out:
+	/* inode_unlock(a->dir); */
+	au_whtmp_rmdir_free(a);
+	si_read_unlock(sb);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	if (unlikely(err))
+		AuIOErr("err %d\n", err);
+}
+
+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
+			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
+{
+	int wkq_err;
+	struct super_block *sb;
+
+	IMustLock(dir);
+
+	/* all post-process will be done in do_rmdir_whtmp(). */
+	sb = dir->i_sb;
+	args->dir = au_igrab(dir);
+	args->br = au_sbr(sb, bindex);
+	au_br_get(args->br);
+	args->wh_dentry = dget(wh_dentry);
+	wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
+	if (unlikely(wkq_err)) {
+		pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
+		au_whtmp_rmdir_free(args);
+	}
+}
diff --git a/include/fs/aufs/whout.h b/include/fs/aufs/whout.h
new file mode 100644
index 000000000..766a1d930
--- /dev/null
+++ b/include/fs/aufs/whout.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * whiteout for logical deletion and opaque directory
+ */
+
+#ifndef __AUFS_WHOUT_H__
+#define __AUFS_WHOUT_H__
+
+#ifdef __KERNEL__
+
+#include "dir.h"
+
+/* whout.c */
+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
+int au_diropq_test(struct dentry *h_dentry);
+struct au_branch;
+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
+			     struct qstr *prefix);
+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
+			struct dentry *dentry);
+int au_wh_init(struct au_branch *br, struct super_block *sb);
+
+/* diropq flags */
+#define AuDiropq_CREATE	1
+#define au_ftest_diropq(flags, name)	((flags) & AuDiropq_##name)
+#define au_fset_diropq(flags, name) \
+	do { (flags) |= AuDiropq_##name; } while (0)
+#define au_fclr_diropq(flags, name) \
+	do { (flags) &= ~AuDiropq_##name; } while (0)
+
+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
+			     unsigned int flags);
+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
+			  struct au_branch *br);
+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
+			    struct dentry *h_parent);
+
+/* real rmdir for the whiteout-ed dir */
+struct au_whtmp_rmdir {
+	struct inode *dir;
+	struct au_branch *br;
+	struct dentry *wh_dentry;
+	struct au_nhash whlist;
+};
+
+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
+		   struct dentry *wh_dentry, struct au_nhash *whlist);
+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
+			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct dentry *au_diropq_create(struct dentry *dentry,
+					      aufs_bindex_t bindex)
+{
+	return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
+}
+
+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
+{
+	return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WHOUT_H__ */
diff --git a/include/fs/aufs/wkq.c b/include/fs/aufs/wkq.c
new file mode 100644
index 000000000..a3316e7e5
--- /dev/null
+++ b/include/fs/aufs/wkq.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * workqueue for asynchronous/super-io operations
+ * todo: try new dredential scheme
+ */
+
+#include <linux/module.h>
+#include "aufs.h"
+
+/* internal workqueue named AUFS_WKQ_NAME */
+
+static struct workqueue_struct *au_wkq;
+
+struct au_wkinfo {
+	struct work_struct wk;
+	struct kobject *kobj;
+
+	unsigned int flags; /* see wkq.h */
+
+	au_wkq_func_t func;
+	void *args;
+
+#ifdef CONFIG_LOCKDEP
+	int dont_check;
+	struct held_lock **hlock;
+#endif
+
+	struct completion *comp;
+};
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Aufs passes some operations to the workqueue such as the internal copyup.
+ * This scheme looks rather unnatural for LOCKDEP debugging feature, since the
+ * job run by workqueue depends upon the locks acquired in the other task.
+ * Delegating a small operation to the workqueue, aufs passes its lockdep
+ * information too. And the job in the workqueue restores the info in order to
+ * pretend as if it acquired those locks. This is just to make LOCKDEP work
+ * correctly and expectedly.
+ */
+
+#ifndef CONFIG_LOCKDEP
+AuStubInt0(au_wkq_lockdep_alloc, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_free, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_pre, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_post, struct au_wkinfo *wkinfo);
+AuStubVoid(au_wkq_lockdep_init, struct au_wkinfo *wkinfo);
+#else
+static void au_wkq_lockdep_init(struct au_wkinfo *wkinfo)
+{
+	wkinfo->hlock = NULL;
+	wkinfo->dont_check = 0;
+}
+
+/*
+ * 1: matched
+ * 0: unmatched
+ */
+static int au_wkq_lockdep_test(struct lock_class_key *key, const char *name)
+{
+	static DEFINE_SPINLOCK(spin);
+	static struct {
+		char *name;
+		struct lock_class_key *key;
+	} a[] = {
+		{ .name = "&sbinfo->si_rwsem" },
+		{ .name = "&finfo->fi_rwsem" },
+		{ .name = "&dinfo->di_rwsem" },
+		{ .name = "&iinfo->ii_rwsem" }
+	};
+	static int set;
+	int i;
+
+	/* lockless read from 'set.' see below */
+	if (set == ARRAY_SIZE(a)) {
+		for (i = 0; i < ARRAY_SIZE(a); i++)
+			if (a[i].key == key)
+				goto match;
+		goto unmatch;
+	}
+
+	spin_lock(&spin);
+	if (set)
+		for (i = 0; i < ARRAY_SIZE(a); i++)
+			if (a[i].key == key) {
+				spin_unlock(&spin);
+				goto match;
+			}
+	for (i = 0; i < ARRAY_SIZE(a); i++) {
+		if (a[i].key) {
+			if (unlikely(a[i].key == key)) { /* rare but possible */
+				spin_unlock(&spin);
+				goto match;
+			} else
+				continue;
+		}
+		if (strstr(a[i].name, name)) {
+			/*
+			 * the order of these three lines is important for the
+			 * lockless read above.
+			 */
+			a[i].key = key;
+			spin_unlock(&spin);
+			set++;
+			/* AuDbg("%d, %s\n", set, name); */
+			goto match;
+		}
+	}
+	spin_unlock(&spin);
+	goto unmatch;
+
+match:
+	return 1;
+unmatch:
+	return 0;
+}
+
+static int au_wkq_lockdep_alloc(struct au_wkinfo *wkinfo)
+{
+	int err, n;
+	struct task_struct *curr;
+	struct held_lock **hl, *held_locks, *p;
+
+	err = 0;
+	curr = current;
+	wkinfo->dont_check = lockdep_recursing(curr);
+	if (wkinfo->dont_check)
+		goto out;
+	n = curr->lockdep_depth;
+	if (!n)
+		goto out;
+
+	err = -ENOMEM;
+	wkinfo->hlock = kmalloc_array(n + 1, sizeof(*wkinfo->hlock), GFP_NOFS);
+	if (unlikely(!wkinfo->hlock))
+		goto out;
+
+	err = 0;
+#if 0
+	if (0 && au_debug_test()) /* left for debugging */
+		lockdep_print_held_locks(curr);
+#endif
+	held_locks = curr->held_locks;
+	hl = wkinfo->hlock;
+	while (n--) {
+		p = held_locks++;
+		if (au_wkq_lockdep_test(p->instance->key, p->instance->name))
+			*hl++ = p;
+	}
+	*hl = NULL;
+
+out:
+	return err;
+}
+
+static void au_wkq_lockdep_free(struct au_wkinfo *wkinfo)
+{
+	kfree(wkinfo->hlock);
+}
+
+static void au_wkq_lockdep_pre(struct au_wkinfo *wkinfo)
+{
+	struct held_lock *p, **hl = wkinfo->hlock;
+	int subclass;
+
+	if (wkinfo->dont_check)
+		lockdep_off();
+	if (!hl)
+		return;
+	while ((p = *hl++)) { /* assignment */
+		subclass = lockdep_hlock_class(p)->subclass;
+		/* AuDbg("%s, %d\n", p->instance->name, subclass); */
+		if (p->read)
+			rwsem_acquire_read(p->instance, subclass, 0,
+					   /*p->acquire_ip*/_RET_IP_);
+		else
+			rwsem_acquire(p->instance, subclass, 0,
+				      /*p->acquire_ip*/_RET_IP_);
+	}
+}
+
+static void au_wkq_lockdep_post(struct au_wkinfo *wkinfo)
+{
+	struct held_lock *p, **hl = wkinfo->hlock;
+
+	if (wkinfo->dont_check)
+		lockdep_on();
+	if (!hl)
+		return;
+	while ((p = *hl++)) /* assignment */
+		rwsem_release(p->instance, 0, /*p->acquire_ip*/_RET_IP_);
+}
+#endif
+
+static void wkq_func(struct work_struct *wk)
+{
+	struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
+
+	AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
+	AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
+
+	au_wkq_lockdep_pre(wkinfo);
+	wkinfo->func(wkinfo->args);
+	au_wkq_lockdep_post(wkinfo);
+	if (au_ftest_wkq(wkinfo->flags, WAIT))
+		complete(wkinfo->comp);
+	else {
+		kobject_put(wkinfo->kobj);
+		module_put(THIS_MODULE); /* todo: ?? */
+		kfree(wkinfo);
+	}
+}
+
+/*
+ * Since struct completion is large, try allocating it dynamically.
+ */
+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
+#define AuWkqCompDeclare(name)	struct completion *comp = NULL
+
+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
+{
+	*comp = kmalloc(sizeof(**comp), GFP_NOFS);
+	if (*comp) {
+		init_completion(*comp);
+		wkinfo->comp = *comp;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+static void au_wkq_comp_free(struct completion *comp)
+{
+	kfree(comp);
+}
+
+#else
+
+/* no braces */
+#define AuWkqCompDeclare(name) \
+	DECLARE_COMPLETION_ONSTACK(_ ## name); \
+	struct completion *comp = &_ ## name
+
+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
+{
+	wkinfo->comp = *comp;
+	return 0;
+}
+
+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
+{
+	/* empty */
+}
+#endif /* 4KSTACKS */
+
+static void au_wkq_run(struct au_wkinfo *wkinfo)
+{
+	if (au_ftest_wkq(wkinfo->flags, NEST)) {
+		if (au_wkq_test()) {
+			AuWarn1("wkq from wkq, unless silly-rename on NFS,"
+				" due to a dead dir by UDBA?\n");
+			AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
+		}
+	} else
+		au_dbg_verify_kthread();
+
+	if (au_ftest_wkq(wkinfo->flags, WAIT)) {
+		INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
+		queue_work(au_wkq, &wkinfo->wk);
+	} else {
+		INIT_WORK(&wkinfo->wk, wkq_func);
+		schedule_work(&wkinfo->wk);
+	}
+}
+
+/*
+ * Be careful. It is easy to make deadlock happen.
+ * processA: lock, wkq and wait
+ * processB: wkq and wait, lock in wkq
+ * --> deadlock
+ */
+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
+{
+	int err;
+	AuWkqCompDeclare(comp);
+	struct au_wkinfo wkinfo = {
+		.flags	= flags,
+		.func	= func,
+		.args	= args
+	};
+
+	err = au_wkq_comp_alloc(&wkinfo, &comp);
+	if (unlikely(err))
+		goto out;
+	err = au_wkq_lockdep_alloc(&wkinfo);
+	if (unlikely(err))
+		goto out_comp;
+	if (!err) {
+		au_wkq_run(&wkinfo);
+		/* no timeout, no interrupt */
+		wait_for_completion(wkinfo.comp);
+	}
+	au_wkq_lockdep_free(&wkinfo);
+
+out_comp:
+	au_wkq_comp_free(comp);
+out:
+	destroy_work_on_stack(&wkinfo.wk);
+	return err;
+}
+
+/*
+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
+ * problem in a concurrent umounting.
+ */
+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
+		  unsigned int flags)
+{
+	int err;
+	struct au_wkinfo *wkinfo;
+
+	atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
+
+	/*
+	 * wkq_func() must free this wkinfo.
+	 * it highly depends upon the implementation of workqueue.
+	 */
+	err = 0;
+	wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
+	if (wkinfo) {
+		wkinfo->kobj = &au_sbi(sb)->si_kobj;
+		wkinfo->flags = flags & ~AuWkq_WAIT;
+		wkinfo->func = func;
+		wkinfo->args = args;
+		wkinfo->comp = NULL;
+		au_wkq_lockdep_init(wkinfo);
+		kobject_get(wkinfo->kobj);
+		__module_get(THIS_MODULE); /* todo: ?? */
+
+		au_wkq_run(wkinfo);
+	} else {
+		err = -ENOMEM;
+		au_nwt_done(&au_sbi(sb)->si_nowait);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_nwt_init(struct au_nowait_tasks *nwt)
+{
+	atomic_set(&nwt->nw_len, 0);
+	/* smp_mb(); */ /* atomic_set */
+	init_waitqueue_head(&nwt->nw_wq);
+}
+
+void au_wkq_fin(void)
+{
+	destroy_workqueue(au_wkq);
+}
+
+int __init au_wkq_init(void)
+{
+	int err;
+
+	err = 0;
+	au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
+	if (IS_ERR(au_wkq))
+		err = PTR_ERR(au_wkq);
+	else if (!au_wkq)
+		err = -ENOMEM;
+
+	return err;
+}
diff --git a/include/fs/aufs/wkq.h b/include/fs/aufs/wkq.h
new file mode 100644
index 000000000..9fc86c11a
--- /dev/null
+++ b/include/fs/aufs/wkq.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * workqueue for asynchronous/super-io operations
+ * todo: try new credentials management scheme
+ */
+
+#ifndef __AUFS_WKQ_H__
+#define __AUFS_WKQ_H__
+
+#ifdef __KERNEL__
+
+#include <linux/wait.h>
+
+struct super_block;
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
+ */
+struct au_nowait_tasks {
+	atomic_t		nw_len;
+	wait_queue_head_t	nw_wq;
+};
+
+/* ---------------------------------------------------------------------- */
+
+typedef void (*au_wkq_func_t)(void *args);
+
+/* wkq flags */
+#define AuWkq_WAIT	1
+#define AuWkq_NEST	(1 << 1)
+#define au_ftest_wkq(flags, name)	((flags) & AuWkq_##name)
+#define au_fset_wkq(flags, name) \
+	do { (flags) |= AuWkq_##name; } while (0)
+#define au_fclr_wkq(flags, name) \
+	do { (flags) &= ~AuWkq_##name; } while (0)
+
+#ifndef CONFIG_AUFS_HNOTIFY
+#undef AuWkq_NEST
+#define AuWkq_NEST	0
+#endif
+
+/* wkq.c */
+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
+		  unsigned int flags);
+void au_nwt_init(struct au_nowait_tasks *nwt);
+int __init au_wkq_init(void);
+void au_wkq_fin(void);
+
+/* ---------------------------------------------------------------------- */
+
+static inline int au_wkq_test(void)
+{
+	return current->flags & PF_WQ_WORKER;
+}
+
+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
+{
+	return au_wkq_do_wait(AuWkq_WAIT, func, args);
+}
+
+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
+{
+	if (atomic_dec_and_test(&nwt->nw_len))
+		wake_up_all(&nwt->nw_wq);
+}
+
+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
+{
+	wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
+	return 0;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WKQ_H__ */
diff --git a/include/fs/aufs/xattr.c b/include/fs/aufs/xattr.c
new file mode 100644
index 000000000..7c647bbbc
--- /dev/null
+++ b/include/fs/aufs/xattr.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2014-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * handling xattr functions
+ */
+
+#include <linux/fs.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+#include "aufs.h"
+
+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
+{
+	if (!ignore_flags)
+		goto out;
+	switch (err) {
+	case -ENOMEM:
+	case -EDQUOT:
+		goto out;
+	}
+
+	if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
+		err = 0;
+		goto out;
+	}
+
+#define cmp(brattr, prefix) do {					\
+		if (!strncmp(name, XATTR_##prefix##_PREFIX,		\
+			     XATTR_##prefix##_PREFIX_LEN)) {		\
+			if (ignore_flags & AuBrAttr_ICEX_##brattr)	\
+				err = 0;				\
+			goto out;					\
+		}							\
+	} while (0)
+
+	cmp(SEC, SECURITY);
+	cmp(SYS, SYSTEM);
+	cmp(TR, TRUSTED);
+	cmp(USR, USER);
+#undef cmp
+
+	if (ignore_flags & AuBrAttr_ICEX_OTH)
+		err = 0;
+
+out:
+	return err;
+}
+
+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
+
+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
+			    char *name, char **buf, unsigned int ignore_flags,
+			    unsigned int verbose)
+{
+	int err;
+	ssize_t ssz;
+	struct inode *h_idst;
+
+	ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
+	err = ssz;
+	if (unlikely(err <= 0)) {
+		if (err == -ENODATA
+		    || (err == -EOPNOTSUPP
+			&& ((ignore_flags & au_xattr_out_of_list)
+			    || (au_test_nfs_noacl(d_inode(h_src))
+				&& (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
+				    || !strcmp(name,
+					       XATTR_NAME_POSIX_ACL_DEFAULT))))
+			    ))
+			err = 0;
+		if (err && (verbose || au_debug_test()))
+			pr_err("%s, err %d\n", name, err);
+		goto out;
+	}
+
+	/* unlock it temporary */
+	h_idst = d_inode(h_dst);
+	inode_unlock(h_idst);
+	err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
+	inode_lock_nested(h_idst, AuLsc_I_CHILD2);
+	if (unlikely(err)) {
+		if (verbose || au_debug_test())
+			pr_err("%s, err %d\n", name, err);
+		err = au_xattr_ignore(err, name, ignore_flags);
+	}
+
+out:
+	return err;
+}
+
+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
+		  unsigned int verbose)
+{
+	int err, unlocked, acl_access, acl_default;
+	ssize_t ssz;
+	struct inode *h_isrc, *h_idst;
+	char *value, *p, *o, *e;
+
+	/* try stopping to update the source inode while we are referencing */
+	/* there should not be the parent-child relationship between them */
+	h_isrc = d_inode(h_src);
+	h_idst = d_inode(h_dst);
+	inode_unlock(h_idst);
+	vfsub_inode_lock_shared_nested(h_isrc, AuLsc_I_CHILD);
+	inode_lock_nested(h_idst, AuLsc_I_CHILD2);
+	unlocked = 0;
+
+	/* some filesystems don't list POSIX ACL, for example tmpfs */
+	ssz = vfs_listxattr(h_src, NULL, 0);
+	err = ssz;
+	if (unlikely(err < 0)) {
+		AuTraceErr(err);
+		if (err == -ENODATA
+		    || err == -EOPNOTSUPP)
+			err = 0;	/* ignore */
+		goto out;
+	}
+
+	err = 0;
+	p = NULL;
+	o = NULL;
+	if (ssz) {
+		err = -ENOMEM;
+		p = kmalloc(ssz, GFP_NOFS);
+		o = p;
+		if (unlikely(!p))
+			goto out;
+		err = vfs_listxattr(h_src, p, ssz);
+	}
+	inode_unlock_shared(h_isrc);
+	unlocked = 1;
+	AuDbg("err %d, ssz %zd\n", err, ssz);
+	if (unlikely(err < 0))
+		goto out_free;
+
+	err = 0;
+	e = p + ssz;
+	value = NULL;
+	acl_access = 0;
+	acl_default = 0;
+	while (!err && p < e) {
+		acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
+				       sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
+		acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
+					sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
+					- 1);
+		err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
+				       verbose);
+		p += strlen(p) + 1;
+	}
+	AuTraceErr(err);
+	ignore_flags |= au_xattr_out_of_list;
+	if (!err && !acl_access) {
+		err = au_do_cpup_xattr(h_dst, h_src,
+				       XATTR_NAME_POSIX_ACL_ACCESS, &value,
+				       ignore_flags, verbose);
+		AuTraceErr(err);
+	}
+	if (!err && !acl_default) {
+		err = au_do_cpup_xattr(h_dst, h_src,
+				       XATTR_NAME_POSIX_ACL_DEFAULT, &value,
+				       ignore_flags, verbose);
+		AuTraceErr(err);
+	}
+
+	kfree(value);
+
+out_free:
+	kfree(o);
+out:
+	if (!unlocked)
+		inode_unlock_shared(h_isrc);
+	AuTraceErr(err);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_smack_reentering(struct super_block *sb)
+{
+#if IS_ENABLED(CONFIG_SECURITY_SMACK)
+	/*
+	 * as a part of lookup, smack_d_instantiate() is called, and it calls
+	 * i_op->getxattr(). ouch.
+	 */
+	return si_pid_test(sb);
+#else
+	return 0;
+#endif
+}
+
+enum {
+	AU_XATTR_LIST,
+	AU_XATTR_GET
+};
+
+struct au_lgxattr {
+	int type;
+	union {
+		struct {
+			char	*list;
+			size_t	size;
+		} list;
+		struct {
+			const char	*name;
+			void		*value;
+			size_t		size;
+		} get;
+	} u;
+};
+
+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
+{
+	ssize_t err;
+	int reenter;
+	struct path h_path;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	reenter = au_smack_reentering(sb);
+	if (!reenter) {
+		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+		if (unlikely(err))
+			goto out;
+	}
+	err = au_h_path_getattr(dentry, /*force*/1, &h_path, reenter);
+	if (unlikely(err))
+		goto out_si;
+	if (unlikely(!h_path.dentry))
+		/* illegally overlapped or something */
+		goto out_di; /* pretending success */
+
+	/* always topmost entry only */
+	switch (arg->type) {
+	case AU_XATTR_LIST:
+		err = vfs_listxattr(h_path.dentry,
+				    arg->u.list.list, arg->u.list.size);
+		break;
+	case AU_XATTR_GET:
+		AuDebugOn(d_is_negative(h_path.dentry));
+		err = vfs_getxattr(h_path.dentry,
+				   arg->u.get.name, arg->u.get.value,
+				   arg->u.get.size);
+		break;
+	}
+
+out_di:
+	if (!reenter)
+		di_read_unlock(dentry, AuLock_IR);
+out_si:
+	if (!reenter)
+		si_read_unlock(sb);
+out:
+	AuTraceErr(err);
+	return err;
+}
+
+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct au_lgxattr arg = {
+		.type = AU_XATTR_LIST,
+		.u.list = {
+			.list	= list,
+			.size	= size
+		},
+	};
+
+	return au_lgxattr(dentry, &arg);
+}
+
+static ssize_t au_getxattr(struct dentry *dentry,
+			   struct inode *inode __maybe_unused,
+			   const char *name, void *value, size_t size)
+{
+	struct au_lgxattr arg = {
+		.type = AU_XATTR_GET,
+		.u.get = {
+			.name	= name,
+			.value	= value,
+			.size	= size
+		},
+	};
+
+	return au_lgxattr(dentry, &arg);
+}
+
+static int au_setxattr(struct dentry *dentry, struct inode *inode,
+		       const char *name, const void *value, size_t size,
+		       int flags)
+{
+	struct au_sxattr arg = {
+		.type = AU_XATTR_SET,
+		.u.set = {
+			.name	= name,
+			.value	= value,
+			.size	= size,
+			.flags	= flags
+		},
+	};
+
+	return au_sxattr(dentry, inode, &arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
+{
+	return au_getxattr(dentry, inode, name, buffer, size);
+}
+
+static int au_xattr_set(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value, size_t size,
+			int flags)
+{
+	return au_setxattr(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler au_xattr_handler = {
+	.name	= "",
+	.prefix	= "",
+	.get	= au_xattr_get,
+	.set	= au_xattr_set
+};
+
+static const struct xattr_handler *au_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+#endif
+	&au_xattr_handler, /* must be last */
+	NULL
+};
+
+void au_xattr_init(struct super_block *sb)
+{
+	sb->s_xattr = au_xattr_handlers;
+}
diff --git a/include/fs/aufs/xino.c b/include/fs/aufs/xino.c
new file mode 100644
index 000000000..29f53f9e5
--- /dev/null
+++ b/include/fs/aufs/xino.c
@@ -0,0 +1,1469 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * external inode number translation table and bitmap
+ */
+
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+			      size_t size, loff_t *pos);
+
+/* todo: unnecessary to support mmap_sem since kernel-space? */
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
+		   loff_t *pos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		char __user *u;
+	} buf;
+	int i;
+	const int prevent_endless = 10;
+
+	i = 0;
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	do {
+		err = func(file, buf.u, size, pos);
+		if (err == -EINTR
+		    && !au_wkq_test()
+		    && fatal_signal_pending(current)) {
+			set_fs(oldfs);
+			err = xino_fread_wkq(func, file, kbuf, size, pos);
+			BUG_ON(err == -EINTR);
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+		}
+	} while (i++ < prevent_endless
+		 && (err == -EAGAIN || err == -EINTR));
+	set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+	if (err > 0)
+		fsnotify_access(file->f_path.dentry);
+#endif
+
+	return err;
+}
+
+struct xino_fread_args {
+	ssize_t *errp;
+	vfs_readf_t func;
+	struct file *file;
+	void *buf;
+	size_t size;
+	loff_t *pos;
+};
+
+static void call_xino_fread(void *args)
+{
+	struct xino_fread_args *a = args;
+	*a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos)
+{
+	ssize_t err;
+	int wkq_err;
+	struct xino_fread_args args = {
+		.errp	= &err,
+		.func	= func,
+		.file	= file,
+		.buf	= buf,
+		.size	= size,
+		.pos	= pos
+	};
+
+	wkq_err = au_wkq_wait(call_xino_fread, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos);
+
+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
+			      size_t size, loff_t *pos)
+{
+	ssize_t err;
+	mm_segment_t oldfs;
+	union {
+		void *k;
+		const char __user *u;
+	} buf;
+	int i;
+	const int prevent_endless = 10;
+
+	i = 0;
+	buf.k = kbuf;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	do {
+		err = func(file, buf.u, size, pos);
+		if (err == -EINTR
+		    && !au_wkq_test()
+		    && fatal_signal_pending(current)) {
+			set_fs(oldfs);
+			err = xino_fwrite_wkq(func, file, kbuf, size, pos);
+			BUG_ON(err == -EINTR);
+			oldfs = get_fs();
+			set_fs(KERNEL_DS);
+		}
+	} while (i++ < prevent_endless
+		 && (err == -EAGAIN || err == -EINTR));
+	set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+	if (err > 0)
+		fsnotify_modify(file->f_path.dentry);
+#endif
+
+	return err;
+}
+
+struct do_xino_fwrite_args {
+	ssize_t *errp;
+	vfs_writef_t func;
+	struct file *file;
+	void *buf;
+	size_t size;
+	loff_t *pos;
+};
+
+static void call_do_xino_fwrite(void *args)
+{
+	struct do_xino_fwrite_args *a = args;
+	*a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+			       size_t size, loff_t *pos)
+{
+	ssize_t err;
+	int wkq_err;
+	struct do_xino_fwrite_args args = {
+		.errp	= &err,
+		.func	= func,
+		.file	= file,
+		.buf	= buf,
+		.size	= size,
+		.pos	= pos
+	};
+
+	/*
+	 * it breaks RLIMIT_FSIZE and normal user's limit,
+	 * users should care about quota and real 'filesystem full.'
+	 */
+	wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
+	if (unlikely(wkq_err))
+		err = wkq_err;
+
+	return err;
+}
+
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+		    size_t size, loff_t *pos)
+{
+	ssize_t err;
+
+	if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
+		lockdep_off();
+		err = do_xino_fwrite(func, file, buf, size, pos);
+		lockdep_on();
+	} else {
+		lockdep_off();
+		err = xino_fwrite_wkq(func, file, buf, size, pos);
+		lockdep_on();
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create a new xinofile at the same place/path as @base_file.
+ */
+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
+{
+	struct file *file;
+	struct dentry *base, *parent;
+	struct inode *dir, *delegated;
+	struct qstr *name;
+	struct path path;
+	int err;
+
+	base = base_file->f_path.dentry;
+	parent = base->d_parent; /* dir inode is locked */
+	dir = d_inode(parent);
+	IMustLock(dir);
+
+	file = ERR_PTR(-EINVAL);
+	name = &base->d_name;
+	path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
+	if (IS_ERR(path.dentry)) {
+		file = (void *)path.dentry;
+		pr_err("%pd lookup err %ld\n",
+		       base, PTR_ERR(path.dentry));
+		goto out;
+	}
+
+	/* no need to mnt_want_write() since we call dentry_open() later */
+	err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
+	if (unlikely(err)) {
+		file = ERR_PTR(err);
+		pr_err("%pd create err %d\n", base, err);
+		goto out_dput;
+	}
+
+	path.mnt = base_file->f_path.mnt;
+	file = vfsub_dentry_open(&path,
+				 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+				 /* | __FMODE_NONOTIFY */);
+	if (IS_ERR(file)) {
+		pr_err("%pd open err %ld\n", base, PTR_ERR(file));
+		goto out_dput;
+	}
+
+	delegated = NULL;
+	err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
+	if (unlikely(err == -EWOULDBLOCK)) {
+		pr_warn("cannot retry for NFSv4 delegation"
+			" for an internal unlink\n");
+		iput(delegated);
+	}
+	if (unlikely(err)) {
+		pr_err("%pd unlink err %d\n", base, err);
+		goto out_fput;
+	}
+
+	if (copy_src) {
+		/* no one can touch copy_src xino */
+		err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
+		if (unlikely(err)) {
+			pr_err("%pd copy err %d\n", base, err);
+			goto out_fput;
+		}
+	}
+	goto out_dput; /* success */
+
+out_fput:
+	fput(file);
+	file = ERR_PTR(err);
+out_dput:
+	dput(path.dentry);
+out:
+	return file;
+}
+
+struct au_xino_lock_dir {
+	struct au_hinode *hdir;
+	struct dentry *parent;
+	struct inode *dir;
+};
+
+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
+			     struct au_xino_lock_dir *ldir)
+{
+	aufs_bindex_t brid, bindex;
+
+	ldir->hdir = NULL;
+	bindex = -1;
+	brid = au_xino_brid(sb);
+	if (brid >= 0)
+		bindex = au_br_index(sb, brid);
+	if (bindex >= 0) {
+		ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
+		au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
+	} else {
+		ldir->parent = dget_parent(xino->f_path.dentry);
+		ldir->dir = d_inode(ldir->parent);
+		inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
+	}
+}
+
+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
+{
+	if (ldir->hdir)
+		au_hn_inode_unlock(ldir->hdir);
+	else {
+		inode_unlock(ldir->dir);
+		dput(ldir->parent);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* trucate xino files asynchronously */
+
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
+{
+	int err;
+	unsigned long jiffy;
+	blkcnt_t blocks;
+	aufs_bindex_t bi, bbot;
+	struct kstatfs *st;
+	struct au_branch *br;
+	struct file *new_xino, *file;
+	struct super_block *h_sb;
+	struct au_xino_lock_dir ldir;
+
+	err = -ENOMEM;
+	st = kmalloc(sizeof(*st), GFP_NOFS);
+	if (unlikely(!st))
+		goto out;
+
+	err = -EINVAL;
+	bbot = au_sbbot(sb);
+	if (unlikely(bindex < 0 || bbot < bindex))
+		goto out_st;
+	br = au_sbr(sb, bindex);
+	file = br->br_xino.xi_file;
+	if (!file)
+		goto out_st;
+
+	err = vfs_statfs(&file->f_path, st);
+	if (unlikely(err))
+		AuErr1("statfs err %d, ignored\n", err);
+	jiffy = jiffies;
+	blocks = file_inode(file)->i_blocks;
+	pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
+		bindex, (u64)blocks, st->f_bfree, st->f_blocks);
+
+	au_xino_lock_dir(sb, file, &ldir);
+	/* mnt_want_write() is unnecessary here */
+	new_xino = au_xino_create2(file, file);
+	au_xino_unlock_dir(&ldir);
+	err = PTR_ERR(new_xino);
+	if (IS_ERR(new_xino)) {
+		pr_err("err %d, ignored\n", err);
+		goto out_st;
+	}
+	err = 0;
+	fput(file);
+	br->br_xino.xi_file = new_xino;
+
+	h_sb = au_br_sb(br);
+	for (bi = 0; bi <= bbot; bi++) {
+		if (unlikely(bi == bindex))
+			continue;
+		br = au_sbr(sb, bi);
+		if (au_br_sb(br) != h_sb)
+			continue;
+
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = new_xino;
+		get_file(new_xino);
+	}
+
+	err = vfs_statfs(&new_xino->f_path, st);
+	if (!err) {
+		pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
+			bindex, (u64)file_inode(new_xino)->i_blocks,
+			st->f_bfree, st->f_blocks);
+		if (file_inode(new_xino)->i_blocks < blocks)
+			au_sbi(sb)->si_xino_jiffy = jiffy;
+	} else
+		AuErr1("statfs err %d, ignored\n", err);
+
+out_st:
+	kfree(st);
+out:
+	return err;
+}
+
+struct xino_do_trunc_args {
+	struct super_block *sb;
+	struct au_branch *br;
+};
+
+static void xino_do_trunc(void *_args)
+{
+	struct xino_do_trunc_args *args = _args;
+	struct super_block *sb;
+	struct au_branch *br;
+	struct inode *dir;
+	int err;
+	aufs_bindex_t bindex;
+
+	err = 0;
+	sb = args->sb;
+	dir = d_inode(sb->s_root);
+	br = args->br;
+
+	si_noflush_write_lock(sb);
+	ii_read_lock_parent(dir);
+	bindex = au_br_index(sb, br->br_id);
+	err = au_xino_trunc(sb, bindex);
+	ii_read_unlock(dir);
+	if (unlikely(err))
+		pr_warn("err b%d, (%d)\n", bindex, err);
+	atomic_dec(&br->br_xino_running);
+	au_br_put(br);
+	si_write_unlock(sb);
+	au_nwt_done(&au_sbi(sb)->si_nowait);
+	kfree(args);
+}
+
+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
+{
+	int err;
+	struct kstatfs st;
+	struct au_sbinfo *sbinfo;
+
+	/* todo: si_xino_expire and the ratio should be customizable */
+	sbinfo = au_sbi(sb);
+	if (time_before(jiffies,
+			sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
+		return 0;
+
+	/* truncation border */
+	err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
+	if (unlikely(err)) {
+		AuErr1("statfs err %d, ignored\n", err);
+		return 0;
+	}
+	if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
+		return 0;
+
+	return 1;
+}
+
+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
+{
+	struct xino_do_trunc_args *args;
+	int wkq_err;
+
+	if (!xino_trunc_test(sb, br))
+		return;
+
+	if (atomic_inc_return(&br->br_xino_running) > 1)
+		goto out;
+
+	/* lock and kfree() will be called in trunc_xino() */
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (unlikely(!args)) {
+		AuErr1("no memory\n");
+		goto out;
+	}
+
+	au_br_get(br);
+	args->sb = sb;
+	args->br = br;
+	wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
+	if (!wkq_err)
+		return; /* success */
+
+	pr_err("wkq %d\n", wkq_err);
+	au_br_put(br);
+	kfree(args);
+
+out:
+	atomic_dec(&br->br_xino_running);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+			    ino_t h_ino, ino_t ino)
+{
+	loff_t pos;
+	ssize_t sz;
+
+	pos = h_ino;
+	if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
+		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
+		return -EFBIG;
+	}
+	pos *= sizeof(ino);
+	sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
+	if (sz == sizeof(ino))
+		return 0; /* success */
+
+	AuIOErr("write failed (%zd)\n", sz);
+	return -EIO;
+}
+
+/*
+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * even if @ino is zero, it is written to the xinofile and means no entry.
+ * if the size of the xino file on a specific filesystem exceeds the watermark,
+ * try truncating it.
+ */
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		  ino_t ino)
+{
+	int err;
+	unsigned int mnt_flags;
+	struct au_branch *br;
+
+	BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
+		     || ((loff_t)-1) > 0);
+	SiMustAnyLock(sb);
+
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, XINO))
+		return 0;
+
+	br = au_sbr(sb, bindex);
+	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
+			       h_ino, ino);
+	if (!err) {
+		if (au_opt_test(mnt_flags, TRUNC_XINO)
+		    && au_test_fs_trunc_xino(au_br_sb(br)))
+			xino_try_trunc(sb, br);
+		return 0; /* success */
+	}
+
+	AuIOErr("write failed (%d)\n", err);
+	return -EIO;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* aufs inode number bitmap */
+
+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
+static ino_t xib_calc_ino(unsigned long pindex, int bit)
+{
+	ino_t ino;
+
+	AuDebugOn(bit < 0 || page_bits <= bit);
+	ino = AUFS_FIRST_INO + pindex * page_bits + bit;
+	return ino;
+}
+
+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
+{
+	AuDebugOn(ino < AUFS_FIRST_INO);
+	ino -= AUFS_FIRST_INO;
+	*pindex = ino / page_bits;
+	*bit = ino % page_bits;
+}
+
+static int xib_pindex(struct super_block *sb, unsigned long pindex)
+{
+	int err;
+	loff_t pos;
+	ssize_t sz;
+	struct au_sbinfo *sbinfo;
+	struct file *xib;
+	unsigned long *p;
+
+	sbinfo = au_sbi(sb);
+	MtxMustLock(&sbinfo->si_xib_mtx);
+	AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
+		  || !au_opt_test(sbinfo->si_mntflags, XINO));
+
+	if (pindex == sbinfo->si_xib_last_pindex)
+		return 0;
+
+	xib = sbinfo->si_xib;
+	p = sbinfo->si_xib_buf;
+	pos = sbinfo->si_xib_last_pindex;
+	pos *= PAGE_SIZE;
+	sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+	if (unlikely(sz != PAGE_SIZE))
+		goto out;
+
+	pos = pindex;
+	pos *= PAGE_SIZE;
+	if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
+		sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
+	else {
+		memset(p, 0, PAGE_SIZE);
+		sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+	}
+	if (sz == PAGE_SIZE) {
+		sbinfo->si_xib_last_pindex = pindex;
+		return 0; /* success */
+	}
+
+out:
+	AuIOErr1("write failed (%zd)\n", sz);
+	err = sz;
+	if (sz >= 0)
+		err = -EIO;
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_xib_clear_bit(struct inode *inode)
+{
+	int err, bit;
+	unsigned long pindex;
+	struct super_block *sb;
+	struct au_sbinfo *sbinfo;
+
+	AuDebugOn(inode->i_nlink);
+
+	sb = inode->i_sb;
+	xib_calc_bit(inode->i_ino, &pindex, &bit);
+	AuDebugOn(page_bits <= bit);
+	sbinfo = au_sbi(sb);
+	mutex_lock(&sbinfo->si_xib_mtx);
+	err = xib_pindex(sb, pindex);
+	if (!err) {
+		clear_bit(bit, sbinfo->si_xib_buf);
+		sbinfo->si_xib_next_bit = bit;
+	}
+	mutex_unlock(&sbinfo->si_xib_mtx);
+}
+
+/* for s_op->delete_inode() */
+void au_xino_delete_inode(struct inode *inode, const int unlinked)
+{
+	int err;
+	unsigned int mnt_flags;
+	aufs_bindex_t bindex, bbot, bi;
+	unsigned char try_trunc;
+	struct au_iinfo *iinfo;
+	struct super_block *sb;
+	struct au_hinode *hi;
+	struct inode *h_inode;
+	struct au_branch *br;
+	vfs_writef_t xwrite;
+
+	AuDebugOn(au_is_bad_inode(inode));
+
+	sb = inode->i_sb;
+	mnt_flags = au_mntflags(sb);
+	if (!au_opt_test(mnt_flags, XINO)
+	    || inode->i_ino == AUFS_ROOT_INO)
+		return;
+
+	if (unlinked) {
+		au_xigen_inc(inode);
+		au_xib_clear_bit(inode);
+	}
+
+	iinfo = au_ii(inode);
+	bindex = iinfo->ii_btop;
+	if (bindex < 0)
+		return;
+
+	xwrite = au_sbi(sb)->si_xwrite;
+	try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
+	hi = au_hinode(iinfo, bindex);
+	bbot = iinfo->ii_bbot;
+	for (; bindex <= bbot; bindex++, hi++) {
+		h_inode = hi->hi_inode;
+		if (!h_inode
+		    || (!unlinked && h_inode->i_nlink))
+			continue;
+
+		/* inode may not be revalidated */
+		bi = au_br_index(sb, hi->hi_id);
+		if (bi < 0)
+			continue;
+
+		br = au_sbr(sb, bi);
+		err = au_xino_do_write(xwrite, br->br_xino.xi_file,
+				       h_inode->i_ino, /*ino*/0);
+		if (!err && try_trunc
+		    && au_test_fs_trunc_xino(au_br_sb(br)))
+			xino_try_trunc(sb, br);
+	}
+}
+
+/* get an unused inode number from bitmap */
+ino_t au_xino_new_ino(struct super_block *sb)
+{
+	ino_t ino;
+	unsigned long *p, pindex, ul, pend;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+	int free_bit, err;
+
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		return iunique(sb, AUFS_FIRST_INO);
+
+	sbinfo = au_sbi(sb);
+	mutex_lock(&sbinfo->si_xib_mtx);
+	p = sbinfo->si_xib_buf;
+	free_bit = sbinfo->si_xib_next_bit;
+	if (free_bit < page_bits && !test_bit(free_bit, p))
+		goto out; /* success */
+	free_bit = find_first_zero_bit(p, page_bits);
+	if (free_bit < page_bits)
+		goto out; /* success */
+
+	pindex = sbinfo->si_xib_last_pindex;
+	for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
+		err = xib_pindex(sb, ul);
+		if (unlikely(err))
+			goto out_err;
+		free_bit = find_first_zero_bit(p, page_bits);
+		if (free_bit < page_bits)
+			goto out; /* success */
+	}
+
+	file = sbinfo->si_xib;
+	pend = vfsub_f_size_read(file) / PAGE_SIZE;
+	for (ul = pindex + 1; ul <= pend; ul++) {
+		err = xib_pindex(sb, ul);
+		if (unlikely(err))
+			goto out_err;
+		free_bit = find_first_zero_bit(p, page_bits);
+		if (free_bit < page_bits)
+			goto out; /* success */
+	}
+	BUG();
+
+out:
+	set_bit(free_bit, p);
+	sbinfo->si_xib_next_bit = free_bit + 1;
+	pindex = sbinfo->si_xib_last_pindex;
+	mutex_unlock(&sbinfo->si_xib_mtx);
+	ino = xib_calc_ino(pindex, free_bit);
+	AuDbg("i%lu\n", (unsigned long)ino);
+	return ino;
+out_err:
+	mutex_unlock(&sbinfo->si_xib_mtx);
+	AuDbg("i0\n");
+	return 0;
+}
+
+/*
+ * read @ino from xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * if @ino does not exist and @do_new is true, get new one.
+ */
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		 ino_t *ino)
+{
+	int err;
+	ssize_t sz;
+	loff_t pos;
+	struct file *file;
+	struct au_sbinfo *sbinfo;
+
+	*ino = 0;
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		return 0; /* no xino */
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	pos = h_ino;
+	if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
+		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
+		return -EFBIG;
+	}
+	pos *= sizeof(*ino);
+
+	file = au_sbr(sb, bindex)->br_xino.xi_file;
+	if (vfsub_f_size_read(file) < pos + sizeof(*ino))
+		return 0; /* no ino */
+
+	sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
+	if (sz == sizeof(*ino))
+		return 0; /* success */
+
+	err = sz;
+	if (unlikely(sz >= 0)) {
+		err = -EIO;
+		AuIOErr("xino read error (%zd)\n", sz);
+	}
+
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* create and set a new xino file */
+
+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
+{
+	struct file *file;
+	struct dentry *h_parent, *d;
+	struct inode *h_dir, *inode;
+	int err;
+
+	/*
+	 * at mount-time, and the xino file is the default path,
+	 * hnotify is disabled so we have no notify events to ignore.
+	 * when a user specified the xino, we cannot get au_hdir to be ignored.
+	 */
+	file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+			       /* | __FMODE_NONOTIFY */,
+			       S_IRUGO | S_IWUGO);
+	if (IS_ERR(file)) {
+		if (!silent)
+			pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
+		return file;
+	}
+
+	/* keep file count */
+	err = 0;
+	inode = file_inode(file);
+	h_parent = dget_parent(file->f_path.dentry);
+	h_dir = d_inode(h_parent);
+	inode_lock_nested(h_dir, AuLsc_I_PARENT);
+	/* mnt_want_write() is unnecessary here */
+	/* no delegation since it is just created */
+	if (inode->i_nlink)
+		err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
+				   /*force*/0);
+	inode_unlock(h_dir);
+	dput(h_parent);
+	if (unlikely(err)) {
+		if (!silent)
+			pr_err("unlink %s(%d)\n", fname, err);
+		goto out;
+	}
+
+	err = -EINVAL;
+	d = file->f_path.dentry;
+	if (unlikely(sb == d->d_sb)) {
+		if (!silent)
+			pr_err("%s must be outside\n", fname);
+		goto out;
+	}
+	if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
+		if (!silent)
+			pr_err("xino doesn't support %s(%s)\n",
+			       fname, au_sbtype(d->d_sb));
+		goto out;
+	}
+	return file; /* success */
+
+out:
+	fput(file);
+	file = ERR_PTR(err);
+	return file;
+}
+
+/*
+ * find another branch who is on the same filesystem of the specified
+ * branch{@btgt}. search until @bbot.
+ */
+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
+			aufs_bindex_t bbot)
+{
+	aufs_bindex_t bindex;
+	struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
+
+	for (bindex = 0; bindex < btgt; bindex++)
+		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
+			return bindex;
+	for (bindex++; bindex <= bbot; bindex++)
+		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
+			return bindex;
+	return -1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * initialize the xinofile for the specified branch @br
+ * at the place/path where @base_file indicates.
+ * test whether another branch is on the same filesystem or not,
+ * if @do_test is true.
+ */
+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
+	       struct file *base_file, int do_test)
+{
+	int err;
+	ino_t ino;
+	aufs_bindex_t bbot, bindex;
+	struct au_branch *shared_br, *b;
+	struct file *file;
+	struct super_block *tgt_sb;
+
+	shared_br = NULL;
+	bbot = au_sbbot(sb);
+	if (do_test) {
+		tgt_sb = au_br_sb(br);
+		for (bindex = 0; bindex <= bbot; bindex++) {
+			b = au_sbr(sb, bindex);
+			if (tgt_sb == au_br_sb(b)) {
+				shared_br = b;
+				break;
+			}
+		}
+	}
+
+	if (!shared_br || !shared_br->br_xino.xi_file) {
+		struct au_xino_lock_dir ldir;
+
+		au_xino_lock_dir(sb, base_file, &ldir);
+		/* mnt_want_write() is unnecessary here */
+		file = au_xino_create2(base_file, NULL);
+		au_xino_unlock_dir(&ldir);
+		err = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto out;
+		br->br_xino.xi_file = file;
+	} else {
+		br->br_xino.xi_file = shared_br->br_xino.xi_file;
+		get_file(br->br_xino.xi_file);
+	}
+
+	ino = AUFS_ROOT_INO;
+	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
+			       h_ino, ino);
+	if (unlikely(err)) {
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = NULL;
+	}
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* trucate a xino bitmap file */
+
+/* todo: slow */
+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
+{
+	int err, bit;
+	ssize_t sz;
+	unsigned long pindex;
+	loff_t pos, pend;
+	struct au_sbinfo *sbinfo;
+	vfs_readf_t func;
+	ino_t *ino;
+	unsigned long *p;
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	MtxMustLock(&sbinfo->si_xib_mtx);
+	p = sbinfo->si_xib_buf;
+	func = sbinfo->si_xread;
+	pend = vfsub_f_size_read(file);
+	pos = 0;
+	while (pos < pend) {
+		sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
+		err = sz;
+		if (unlikely(sz <= 0))
+			goto out;
+
+		err = 0;
+		for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
+			if (unlikely(*ino < AUFS_FIRST_INO))
+				continue;
+
+			xib_calc_bit(*ino, &pindex, &bit);
+			AuDebugOn(page_bits <= bit);
+			err = xib_pindex(sb, pindex);
+			if (!err)
+				set_bit(bit, p);
+			else
+				goto out;
+		}
+	}
+
+out:
+	return err;
+}
+
+static int xib_restore(struct super_block *sb)
+{
+	int err;
+	aufs_bindex_t bindex, bbot;
+	void *page;
+
+	err = -ENOMEM;
+	page = (void *)__get_free_page(GFP_NOFS);
+	if (unlikely(!page))
+		goto out;
+
+	err = 0;
+	bbot = au_sbbot(sb);
+	for (bindex = 0; !err && bindex <= bbot; bindex++)
+		if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
+			err = do_xib_restore
+				(sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
+		else
+			AuDbg("b%d\n", bindex);
+	free_page((unsigned long)page);
+
+out:
+	return err;
+}
+
+int au_xib_trunc(struct super_block *sb)
+{
+	int err;
+	ssize_t sz;
+	loff_t pos;
+	struct au_xino_lock_dir ldir;
+	struct au_sbinfo *sbinfo;
+	unsigned long *p;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	if (!au_opt_test(sbinfo->si_mntflags, XINO))
+		goto out;
+
+	file = sbinfo->si_xib;
+	if (vfsub_f_size_read(file) <= PAGE_SIZE)
+		goto out;
+
+	au_xino_lock_dir(sb, file, &ldir);
+	/* mnt_want_write() is unnecessary here */
+	file = au_xino_create2(sbinfo->si_xib, NULL);
+	au_xino_unlock_dir(&ldir);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	fput(sbinfo->si_xib);
+	sbinfo->si_xib = file;
+
+	p = sbinfo->si_xib_buf;
+	memset(p, 0, PAGE_SIZE);
+	pos = 0;
+	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
+	if (unlikely(sz != PAGE_SIZE)) {
+		err = sz;
+		AuIOErr("err %d\n", err);
+		if (sz >= 0)
+			err = -EIO;
+		goto out;
+	}
+
+	mutex_lock(&sbinfo->si_xib_mtx);
+	/* mnt_want_write() is unnecessary here */
+	err = xib_restore(sb);
+	mutex_unlock(&sbinfo->si_xib_mtx);
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * xino mount option handlers
+ */
+
+/* xino bitmap */
+static void xino_clear_xib(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	sbinfo->si_xread = NULL;
+	sbinfo->si_xwrite = NULL;
+	if (sbinfo->si_xib)
+		fput(sbinfo->si_xib);
+	sbinfo->si_xib = NULL;
+	if (sbinfo->si_xib_buf)
+		free_page((unsigned long)sbinfo->si_xib_buf);
+	sbinfo->si_xib_buf = NULL;
+}
+
+static int au_xino_set_xib(struct super_block *sb, struct file *base)
+{
+	int err;
+	loff_t pos;
+	struct au_sbinfo *sbinfo;
+	struct file *file;
+
+	SiMustWriteLock(sb);
+
+	sbinfo = au_sbi(sb);
+	file = au_xino_create2(base, sbinfo->si_xib);
+	err = PTR_ERR(file);
+	if (IS_ERR(file))
+		goto out;
+	if (sbinfo->si_xib)
+		fput(sbinfo->si_xib);
+	sbinfo->si_xib = file;
+	sbinfo->si_xread = vfs_readf(file);
+	sbinfo->si_xwrite = vfs_writef(file);
+
+	err = -ENOMEM;
+	if (!sbinfo->si_xib_buf)
+		sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
+	if (unlikely(!sbinfo->si_xib_buf))
+		goto out_unset;
+
+	sbinfo->si_xib_last_pindex = 0;
+	sbinfo->si_xib_next_bit = 0;
+	if (vfsub_f_size_read(file) < PAGE_SIZE) {
+		pos = 0;
+		err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
+				  PAGE_SIZE, &pos);
+		if (unlikely(err != PAGE_SIZE))
+			goto out_free;
+	}
+	err = 0;
+	goto out; /* success */
+
+out_free:
+	if (sbinfo->si_xib_buf)
+		free_page((unsigned long)sbinfo->si_xib_buf);
+	sbinfo->si_xib_buf = NULL;
+	if (err >= 0)
+		err = -EIO;
+out_unset:
+	fput(sbinfo->si_xib);
+	sbinfo->si_xib = NULL;
+	sbinfo->si_xread = NULL;
+	sbinfo->si_xwrite = NULL;
+out:
+	return err;
+}
+
+/* xino for each branch */
+static void xino_clear_br(struct super_block *sb)
+{
+	aufs_bindex_t bindex, bbot;
+	struct au_branch *br;
+
+	bbot = au_sbbot(sb);
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (!br || !br->br_xino.xi_file)
+			continue;
+
+		fput(br->br_xino.xi_file);
+		br->br_xino.xi_file = NULL;
+	}
+}
+
+static int au_xino_set_br(struct super_block *sb, struct file *base)
+{
+	int err;
+	ino_t ino;
+	aufs_bindex_t bindex, bbot, bshared;
+	struct {
+		struct file *old, *new;
+	} *fpair, *p;
+	struct au_branch *br;
+	struct inode *inode;
+	vfs_writef_t writef;
+
+	SiMustWriteLock(sb);
+
+	err = -ENOMEM;
+	bbot = au_sbbot(sb);
+	fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
+	if (unlikely(!fpair))
+		goto out;
+
+	inode = d_inode(sb->s_root);
+	ino = AUFS_ROOT_INO;
+	writef = au_sbi(sb)->si_xwrite;
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
+		bshared = is_sb_shared(sb, bindex, bindex - 1);
+		if (bshared >= 0) {
+			/* shared xino */
+			*p = fpair[bshared];
+			get_file(p->new);
+		}
+
+		if (!p->new) {
+			/* new xino */
+			br = au_sbr(sb, bindex);
+			p->old = br->br_xino.xi_file;
+			p->new = au_xino_create2(base, br->br_xino.xi_file);
+			err = PTR_ERR(p->new);
+			if (IS_ERR(p->new)) {
+				p->new = NULL;
+				goto out_pair;
+			}
+		}
+
+		err = au_xino_do_write(writef, p->new,
+				       au_h_iptr(inode, bindex)->i_ino, ino);
+		if (unlikely(err))
+			goto out_pair;
+	}
+
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
+		br = au_sbr(sb, bindex);
+		if (br->br_xino.xi_file)
+			fput(br->br_xino.xi_file);
+		get_file(p->new);
+		br->br_xino.xi_file = p->new;
+	}
+
+out_pair:
+	for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
+		if (p->new)
+			fput(p->new);
+		else
+			break;
+	kfree(fpair);
+out:
+	return err;
+}
+
+void au_xino_clr(struct super_block *sb)
+{
+	struct au_sbinfo *sbinfo;
+
+	au_xigen_clr(sb);
+	xino_clear_xib(sb);
+	xino_clear_br(sb);
+	sbinfo = au_sbi(sb);
+	/* lvalue, do not call au_mntflags() */
+	au_opt_clr(sbinfo->si_mntflags, XINO);
+}
+
+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
+{
+	int err, skip;
+	struct dentry *parent, *cur_parent;
+	struct qstr *dname, *cur_name;
+	struct file *cur_xino;
+	struct inode *dir;
+	struct au_sbinfo *sbinfo;
+
+	SiMustWriteLock(sb);
+
+	err = 0;
+	sbinfo = au_sbi(sb);
+	parent = dget_parent(xino->file->f_path.dentry);
+	if (remount) {
+		skip = 0;
+		dname = &xino->file->f_path.dentry->d_name;
+		cur_xino = sbinfo->si_xib;
+		if (cur_xino) {
+			cur_parent = dget_parent(cur_xino->f_path.dentry);
+			cur_name = &cur_xino->f_path.dentry->d_name;
+			skip = (cur_parent == parent
+				&& au_qstreq(dname, cur_name));
+			dput(cur_parent);
+		}
+		if (skip)
+			goto out;
+	}
+
+	au_opt_set(sbinfo->si_mntflags, XINO);
+	dir = d_inode(parent);
+	inode_lock_nested(dir, AuLsc_I_PARENT);
+	/* mnt_want_write() is unnecessary here */
+	err = au_xino_set_xib(sb, xino->file);
+	if (!err)
+		err = au_xigen_set(sb, xino->file);
+	if (!err)
+		err = au_xino_set_br(sb, xino->file);
+	inode_unlock(dir);
+	if (!err)
+		goto out; /* success */
+
+	/* reset all */
+	AuIOErr("failed creating xino(%d).\n", err);
+	au_xigen_clr(sb);
+	xino_clear_xib(sb);
+
+out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create a xinofile at the default place/path.
+ */
+struct file *au_xino_def(struct super_block *sb)
+{
+	struct file *file;
+	char *page, *p;
+	struct au_branch *br;
+	struct super_block *h_sb;
+	struct path path;
+	aufs_bindex_t bbot, bindex, bwr;
+
+	br = NULL;
+	bbot = au_sbbot(sb);
+	bwr = -1;
+	for (bindex = 0; bindex <= bbot; bindex++) {
+		br = au_sbr(sb, bindex);
+		if (au_br_writable(br->br_perm)
+		    && !au_test_fs_bad_xino(au_br_sb(br))) {
+			bwr = bindex;
+			break;
+		}
+	}
+
+	if (bwr >= 0) {
+		file = ERR_PTR(-ENOMEM);
+		page = (void *)__get_free_page(GFP_NOFS);
+		if (unlikely(!page))
+			goto out;
+		path.mnt = au_br_mnt(br);
+		path.dentry = au_h_dptr(sb->s_root, bwr);
+		p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
+		file = (void *)p;
+		if (!IS_ERR(p)) {
+			strcat(p, "/" AUFS_XINO_FNAME);
+			AuDbg("%s\n", p);
+			file = au_xino_create(sb, p, /*silent*/0);
+			if (!IS_ERR(file))
+				au_xino_brid_set(sb, br->br_id);
+		}
+		free_page((unsigned long)page);
+	} else {
+		file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
+		if (IS_ERR(file))
+			goto out;
+		h_sb = file->f_path.dentry->d_sb;
+		if (unlikely(au_test_fs_bad_xino(h_sb))) {
+			pr_err("xino doesn't support %s(%s)\n",
+			       AUFS_XINO_DEFPATH, au_sbtype(h_sb));
+			fput(file);
+			file = ERR_PTR(-EINVAL);
+		}
+		if (!IS_ERR(file))
+			au_xino_brid_set(sb, -1);
+	}
+
+out:
+	return file;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_xino_path(struct seq_file *seq, struct file *file)
+{
+	int err;
+
+	err = au_seq_path(seq, &file->f_path);
+	if (unlikely(err))
+		goto out;
+
+#define Deleted "\\040(deleted)"
+	seq->count -= sizeof(Deleted) - 1;
+	AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
+			 sizeof(Deleted) - 1));
+#undef Deleted
+
+out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
+		       ino_t h_ino, int idx)
+{
+	struct au_xino_file *xino;
+
+	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+	xino = &au_sbr(sb, bindex)->br_xino;
+	AuDebugOn(idx < 0 || xino->xi_nondir.total <= idx);
+
+	spin_lock(&xino->xi_nondir.spin);
+	AuDebugOn(xino->xi_nondir.array[idx] != h_ino);
+	xino->xi_nondir.array[idx] = 0;
+	spin_unlock(&xino->xi_nondir.spin);
+	wake_up_all(&xino->xi_nondir.wqh);
+}
+
+static int au_xinondir_find(struct au_xino_file *xino, ino_t h_ino)
+{
+	int found, total, i;
+
+	found = -1;
+	total = xino->xi_nondir.total;
+	for (i = 0; i < total; i++) {
+		if (xino->xi_nondir.array[i] != h_ino)
+			continue;
+		found = i;
+		break;
+	}
+
+	return found;
+}
+
+static int au_xinondir_expand(struct au_xino_file *xino)
+{
+	int err, sz;
+	ino_t *p;
+
+	BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
+
+	err = -ENOMEM;
+	sz = xino->xi_nondir.total * sizeof(ino_t);
+	if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
+		goto out;
+	p = au_kzrealloc(xino->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
+			 /*may_shrink*/0);
+	if (p) {
+		xino->xi_nondir.array = p;
+		xino->xi_nondir.total <<= 1;
+		AuDbg("xi_nondir.total %d\n", xino->xi_nondir.total);
+		err = 0;
+	}
+
+out:
+	return err;
+}
+
+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+		      int *idx)
+{
+	int err, found, empty;
+	struct au_xino_file *xino;
+
+	err = 0;
+	*idx = -1;
+	if (!au_opt_test(au_mntflags(sb), XINO))
+		goto out; /* no xino */
+
+	xino = &au_sbr(sb, bindex)->br_xino;
+
+again:
+	spin_lock(&xino->xi_nondir.spin);
+	found = au_xinondir_find(xino, h_ino);
+	if (found == -1) {
+		empty = au_xinondir_find(xino, /*h_ino*/0);
+		if (empty == -1) {
+			empty = xino->xi_nondir.total;
+			err = au_xinondir_expand(xino);
+			if (unlikely(err))
+				goto out_unlock;
+		}
+		xino->xi_nondir.array[empty] = h_ino;
+		*idx = empty;
+	} else {
+		spin_unlock(&xino->xi_nondir.spin);
+		wait_event(xino->xi_nondir.wqh,
+			   xino->xi_nondir.array[found] != h_ino);
+		goto again;
+	}
+
+out_unlock:
+	spin_unlock(&xino->xi_nondir.spin);
+out:
+	return err;
+}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ce547a25e..c979c7472 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -30,6 +30,8 @@
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
 #include <linux/blk_types.h>
 
+extern int trap_non_toi_io;
+
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b91e..1b305638e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -184,7 +184,8 @@ struct bio {
 #define BIO_REFFED	8	/* bio has elevated ->bi_cnt */
 #define BIO_THROTTLED	9	/* This bio has already been subjected to
 				 * throttling rules. Don't do it again. */
-#define BIO_TRACE_COMPLETION 10	/* bio_endio() should trace the final completion
+#define BIO_TOI		10	/* bio is TuxOnIce submitted */
+#define BIO_TRACE_COMPLETION 11	/* bio_endio() should trace the final completion
 				 * of this bio. */
 /* See BVEC_POOL_OFFSET below before adding new flags */
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c9e601dce..7ae2a687a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -47,7 +47,11 @@ struct blk_queue_stats;
 struct blk_stat_callback;
 
 #define BLKDEV_MIN_RQ	4
+#ifdef CONFIG_PCK_INTERACTIVE
+#define BLKDEV_MAX_RQ	512
+#else
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
+#endif
 
 /* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
@@ -56,7 +60,7 @@ struct blk_stat_callback;
  * Maximum number of blkcg policies allowed to be registered concurrently.
  * Defined here to simplify include dependency.
  */
-#define BLKCG_MAX_POLS		3
+#define BLKCG_MAX_POLS		5
 
 typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
@@ -129,6 +133,10 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_TIMEOUT_EXPIRED	((__force req_flags_t)(1 << 20))
 /* already slept for hybrid poll */
 #define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 21))
+/* DEBUG: rq in bfq-mq dispatch list */
+#define RQF_DISP_LIST	((__force req_flags_t)(1 << 22))
+/* DEBUG: rq had get_rq_private executed on it */
+#define RQF_GOT	((__force req_flags_t)(1 << 23))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
diff --git a/include/linux/file.h b/include/linux/file.h
index 279720db9..76e38eade 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -20,6 +20,7 @@ struct dentry;
 struct path;
 extern struct file *alloc_file(const struct path *, fmode_t mode,
 	const struct file_operations *fop);
+extern struct file *get_empty_filp(void);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c6baf7676..beb2d01b2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1270,6 +1270,7 @@ extern void fasync_free(struct fasync_struct *);
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
 
+extern int setfl(int fd, struct file * filp, unsigned long arg);
 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
@@ -1313,6 +1314,8 @@ extern int send_sigurg(struct fown_struct *fown);
 #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
 #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
+extern struct list_head super_blocks;
+
 /* sb->s_iflags */
 #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
 #define SB_I_NOEXEC	0x00000002	/* Ignore executables on this fs */
@@ -1722,6 +1725,7 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
+	int (*setfl)(struct file *, unsigned long);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
@@ -1792,6 +1796,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      struct iovec *fast_pointer,
 			      struct iovec **ret_pointer);
 
+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
+				loff_t *);
+vfs_readf_t vfs_readf(struct file *file);
+vfs_writef_t vfs_writef(struct file *file);
+
 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
@@ -1866,6 +1876,8 @@ struct super_operations {
 #define S_DAX		0	/* Make all the DAX code disappear */
 #endif
 #define S_ENCRYPTED	16384	/* Encrypted file (using fs/crypto/) */
+#define S_ATOMIC_COPY	32768	/* Pages mapped with this inode need to be
+				   atomically copied (gem) */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2196,6 +2208,7 @@ extern int current_umask(void);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 extern int generic_update_time(struct inode *, struct timespec *, int);
+extern int update_time(struct inode *, struct timespec *, int);
 
 /* /sys/fs */
 extern struct kobject *fs_kobj;
@@ -2444,6 +2457,13 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
+extern int fsync_super(struct super_block *);
+extern int fsync_no_super(struct block_device *);
+#define FS_FREEZER_FUSE 1
+#define FS_FREEZER_NORMAL 2
+#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
+void freeze_filesystems(int which);
+void thaw_filesystems(int which);
 
 extern struct super_block *blockdev_superblock;
 
@@ -2476,6 +2496,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb)
 	return false;
 }
 #endif
+extern int __sync_filesystem(struct super_block *, int);
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
diff --git a/include/linux/fs_uuid.h b/include/linux/fs_uuid.h
new file mode 100644
index 000000000..3234135b5
--- /dev/null
+++ b/include/linux/fs_uuid.h
@@ -0,0 +1,19 @@
+#include <linux/device.h>
+
+struct hd_struct;
+struct block_device;
+
+struct fs_info {
+	char uuid[16];
+	dev_t dev_t;
+	char *last_mount;
+	int last_mount_size;
+};
+
+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek);
+dev_t blk_lookup_fs_info(struct fs_info *seek);
+struct fs_info *fs_info_from_block_dev(struct block_device *bdev);
+void free_fs_info(struct fs_info *fs_info);
+int bdev_matches_key(struct block_device *bdev, const char *key);
+struct block_device *next_bdev_of_type(struct block_device *last,
+	const char *key);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 1a4582b44..0b373f7c7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,8 +39,9 @@ struct vm_area_struct;
 #define ___GFP_DIRECT_RECLAIM	0x400000u
 #define ___GFP_WRITE		0x800000u
 #define ___GFP_KSWAPD_RECLAIM	0x1000000u
+#define ___GFP_TOI_NOTRACK    0x2000000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x2000000u
+#define ___GFP_NOLOCKDEP	0x4000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
@@ -187,6 +188,7 @@ struct vm_area_struct;
 #define __GFP_RETRY_MAYFAIL	((__force gfp_t)___GFP_RETRY_MAYFAIL)
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
+#define __GFP_TOI_NOTRACK	((__force gfp_t)___GFP_TOI_NOTRACK)	/* Allocator wants page untracked by TOI */
 
 /*
  * Action modifiers
@@ -205,7 +207,7 @@ struct vm_area_struct;
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a454b8aeb..f0a14e08e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -55,7 +55,11 @@ extern struct cred init_cred;
 	.pid = &init_struct_pid,				\
 }
 
+#ifdef CONFIG_SCHED_MUQSS
+#define INIT_TASK_COMM "MuQSS"
+#else
 #define INIT_TASK_COMM "swapper"
+#endif
 
 /* Attach to the init_task data structure for proper alignment */
 #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 627efac73..e25a094c2 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -53,6 +53,8 @@ enum {
  */
 static inline int task_nice_ioprio(struct task_struct *task)
 {
+	if (iso_task(task))
+		return 0;
 	return (task_nice(task) + 20) / 5;
 }
 
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 44368b19b..0ae04f09f 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -21,21 +21,6 @@ struct mem_cgroup;
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
-int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
-
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
-		return __ksm_enter(mm);
-	return 0;
-}
-
-static inline void ksm_exit(struct mm_struct *mm)
-{
-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
-}
 
 static inline struct stable_node *page_stable_node(struct page *page)
 {
@@ -65,6 +50,33 @@ struct page *ksm_might_need_to_copy(struct page *page,
 void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
 
+#ifdef CONFIG_KSM_LEGACY
+int __ksm_enter(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm);
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+		return __ksm_enter(mm);
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+		__ksm_exit(mm);
+}
+
+#elif defined(CONFIG_UKSM)
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+#endif /* !CONFIG_UKSM */
+
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
@@ -106,4 +118,6 @@ static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 #endif /* CONFIG_MMU */
 #endif /* !CONFIG_KSM */
 
+#include <linux/uksm.h>
+
 #endif /* __LINUX_KSM_H */
diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h
index 5e3581d76..dbdf85ce0 100644
--- a/include/linux/linux_logo.h
+++ b/include/linux/linux_logo.h
@@ -38,6 +38,18 @@ extern const struct linux_logo logo_linux_vga16;
 extern const struct linux_logo logo_linux_clut224;
 extern const struct linux_logo logo_blackfin_vga16;
 extern const struct linux_logo logo_blackfin_clut224;
+extern const struct linux_logo logo_zen_clut224;
+extern const struct linux_logo logo_oldzen_clut224;
+extern const struct linux_logo logo_arch_clut224;
+extern const struct linux_logo logo_gentoo_clut224;
+extern const struct linux_logo logo_exherbo_clut224;
+extern const struct linux_logo logo_slackware_clut224;
+extern const struct linux_logo logo_debian_clut224;
+extern const struct linux_logo logo_fedorasimple_clut224;
+extern const struct linux_logo logo_fedoraglossy_clut224;
+extern const struct linux_logo logo_tits_clut224;
+extern const struct linux_logo logo_bsd_clut224;
+extern const struct linux_logo logo_fbsd_clut224;
 extern const struct linux_logo logo_dec_clut224;
 extern const struct linux_logo logo_mac_clut224;
 extern const struct linux_logo logo_parisc_clut224;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6fc77d4db..27e76f0c0 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -313,6 +313,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock,
 	return lock->key == key;
 }
 
+struct lock_class *lockdep_hlock_class(struct held_lock *hlock);
+
 /*
  * Acquire a lock.
  *
@@ -439,6 +441,7 @@ struct lockdep_map { };
 
 #define lockdep_depth(tsk)	(0)
 
+#define lockdep_is_held(lock)			(1)
 #define lockdep_is_held_type(l, r)		(1)
 
 #define lockdep_assert_held(l)			do { (void)(l); } while (0)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 95a2d748e..798650c81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,6 +1368,28 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
+				      int);
+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
+
+#define vma_file_update_time(vma)	vma_do_file_update_time(vma, __func__, \
+								__LINE__)
+#define vma_pr_or_file(vma)		vma_do_pr_or_file(vma, __func__, \
+							  __LINE__)
+#define vma_get_file(vma)		vma_do_get_file(vma, __func__, __LINE__)
+#define vma_fput(vma)			vma_do_fput(vma, __func__, __LINE__)
+
+#ifndef CONFIG_MMU
+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
+extern void vmr_do_fput(struct vm_region *, const char[], int);
+
+#define vmr_pr_or_file(region)		vmr_do_pr_or_file(region, __func__, \
+							  __LINE__)
+#define vmr_fput(region)		vmr_do_fput(region, __func__, __LINE__)
+#endif /* !CONFIG_MMU */
+
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
@@ -2526,6 +2548,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 #endif
 
+void drop_pagecache(void);
 void drop_slab(void);
 void drop_slab_node(int nid);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fd1af6b95..da38fbd6e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -249,6 +249,7 @@ struct vm_region {
 	unsigned long	vm_top;		/* region allocated to here */
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
+	struct file	*vm_prfile;	/* the virtual backing file or NULL */
 
 	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
@@ -323,6 +324,7 @@ struct vm_area_struct {
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
 					   units */
 	struct file * vm_file;		/* File we map to (can be NULL). */
+	struct file *vm_prfile;		/* shadow of vm_file */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
 	atomic_long_t swap_readahead_info;
@@ -333,6 +335,9 @@ struct vm_area_struct {
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+#ifdef CONFIG_UKSM
+	struct vma_slot *uksm_vma_slot;
+#endif
 } __randomize_layout;
 
 struct core_thread {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7522a6987..58a55ff22 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -148,6 +148,9 @@ enum zone_stat_item {
 	NR_ZSPAGES,		/* allocated in zsmalloc */
 #endif
 	NR_FREE_CMA_PAGES,
+#ifdef CONFIG_UKSM
+	NR_UKSM_ZERO_PAGES,
+#endif
 	NR_VM_ZONE_STAT_ITEMS };
 
 enum node_stat_item {
@@ -866,7 +869,7 @@ static inline int is_highmem_idx(enum zone_type idx)
 }
 
 /**
- * is_highmem - helper function to quickly check if a struct zone is a 
+ * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
  * @zone - pointer to struct zone variable
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 35942084c..24f5fd1a7 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -6,11 +6,14 @@
 struct mnt_namespace;
 struct fs_struct;
 struct user_namespace;
+struct vfsmount;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
 
+extern int is_current_mnt_ns(struct vfsmount *mnt);
+
 extern const struct file_operations proc_mounts_operations;
 extern const struct file_operations proc_mountinfo_operations;
 extern const struct file_operations proc_mountstats_operations;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 50c2b8786..f8459d750 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -97,6 +97,12 @@ enum pageflags {
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+	PG_toi_untracked,	/* Don't track dirtiness of this page - assume always dirty */
+	PG_toi_ro,		/* Page was made RO by TOI */
+	PG_toi_cbw,		/* Copy the page before it is written to */
+	PG_toi_dirty,		/* Page has been modified */
+#endif
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
 	PG_young,
 	PG_idle,
@@ -361,6 +367,17 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
+#ifdef CONFIG_TOI_INCREMENTAL
+PAGEFLAG(TOI_RO, toi_ro)
+PAGEFLAG(TOI_Dirty, toi_dirty)
+PAGEFLAG(TOI_Untracked, toi_untracked)
+PAGEFLAG(TOI_CBW, toi_cbw)
+#else
+PAGEFLAG_FALSE(TOI_RO)
+PAGEFLAG_FALSE(TOI_Dirty)
+PAGEFLAG_FALSE(TOI_Untracked)
+PAGEFLAG_FALSE(TOI_CBW)
+#endif
 
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
 TESTPAGEFLAG(Young, young, PF_ANY)
@@ -731,8 +748,12 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
  * alloc-free cycle to prevent from reusing the page.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP	\
-	(((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+#ifdef CONFIG_TOI_INCREMENTAL
+#define PAGE_FLAGS_CHECK_AT_PREP	(((1UL << NR_PAGEFLAGS) - 1) & \
+        ~((1UL << PG_toi_dirty) | (1UL << PG_toi_ro) | __PG_HWPOISON))
+#else
+#define PAGE_FLAGS_CHECK_AT_PREP	(((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+#endif
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b161ef8a9..eeb4c76e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -27,6 +27,9 @@
 #include <linux/signal_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
+#ifdef CONFIG_SCHED_MUQSS
+#include <linux/skip_list.h>
+#endif
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -544,9 +547,11 @@ struct task_struct {
 	unsigned int			flags;
 	unsigned int			ptrace;
 
+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_MUQSS)
+	int on_cpu;
+#endif
 #ifdef CONFIG_SMP
 	struct llist_node		wake_entry;
-	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/* Current CPU: */
 	unsigned int			cpu;
@@ -571,10 +576,25 @@ struct task_struct {
 	int				static_prio;
 	int				normal_prio;
 	unsigned int			rt_priority;
+#ifdef CONFIG_SCHED_MUQSS
+	int time_slice;
+	u64 deadline;
+	skiplist_node node; /* Skip list node */
+	u64 last_ran;
+	u64 sched_time; /* sched_clock time spent running */
+#ifdef CONFIG_SMT_NICE
+	int smt_bias; /* Policy/nice level bias across smt siblings */
+#endif
+#ifdef CONFIG_HOTPLUG_CPU
+	bool zerobound; /* Bound to CPU0 for hotplug */
+#endif
+	unsigned long rt_timeout;
+#else /* CONFIG_SCHED_MUQSS */
 
 	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
+#endif
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
@@ -723,6 +743,10 @@ struct task_struct {
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 	u64				utimescaled;
 	u64				stimescaled;
+#endif
+#ifdef CONFIG_SCHED_MUQSS
+	/* Unbanked cpu time */
+	unsigned long utime_ns, stime_ns;
 #endif
 	u64				gtime;
 	struct prev_cputime		prev_cputime;
@@ -1117,6 +1141,40 @@ struct task_struct {
 	 */
 };
 
+#ifdef CONFIG_SCHED_MUQSS
+#define tsk_seruntime(t)		((t)->sched_time)
+#define tsk_rttimeout(t)		((t)->rt_timeout)
+
+static inline void tsk_cpus_current(struct task_struct *p)
+{
+}
+
+void print_scheduler_version(void);
+
+static inline bool iso_task(struct task_struct *p)
+{
+	return (p->policy == SCHED_ISO);
+}
+#else /* CFS */
+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
+#define tsk_rttimeout(t)	((t)->rt.timeout)
+
+static inline void tsk_cpus_current(struct task_struct *p)
+{
+	p->nr_cpus_allowed = current->nr_cpus_allowed;
+}
+
+static inline void print_scheduler_version(void)
+{
+	printk(KERN_INFO "CFS CPU scheduler.\n");
+}
+
+static inline bool iso_task(struct task_struct *p)
+{
+	return false;
+}
+#endif /* CONFIG_SCHED_MUQSS */
+
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 3d3a97d93..b03197d35 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -6,7 +6,7 @@
  * This is the interface between the scheduler and nohz/dynticks:
  */
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
 #else
@@ -23,7 +23,7 @@ static inline void nohz_balance_enter_idle(int cpu) { }
 static inline void set_cpu_sd_state_idle(void) { }
 #endif
 
-#ifdef CONFIG_NO_HZ_COMMON
+#if defined(CONFIG_NO_HZ_COMMON) && !defined(CONFIG_SCHED_MUQSS)
 void calc_load_nohz_start(void);
 void calc_load_nohz_stop(void);
 #else
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index 7d64feafc..43c9d9e50 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -20,8 +20,20 @@
  */
 
 #define MAX_USER_RT_PRIO	100
+
+#ifdef CONFIG_SCHED_MUQSS
+/* Note different MAX_RT_PRIO */
+#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
+
+#define ISO_PRIO		(MAX_RT_PRIO)
+#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
+#define IDLE_PRIO		(MAX_RT_PRIO + 2)
+#define PRIO_LIMIT		((IDLE_PRIO) + 1)
+#else /* CONFIG_SCHED_MUQSS */
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
+#endif /* CONFIG_SCHED_MUQSS */
+
 #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
 #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
 
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index e5af028c0..010b2244e 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
 
 	if (policy == SCHED_FIFO || policy == SCHED_RR)
 		return true;
+#ifndef CONFIG_SCHED_MUQSS
 	if (policy == SCHED_DEADLINE)
 		return true;
+#endif
 	return false;
 }
 
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 5be31eb7b..2cfc0347d 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -80,7 +80,7 @@ extern long kernel_wait4(pid_t, int *, int, struct rusage *);
 extern void free_task(struct task_struct *tsk);
 
 /* sched_exec is called by processes performing an exec */
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_MUQSS)
 extern void sched_exec(void);
 #else
 #define sched_exec()   {}
diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h
new file mode 100644
index 000000000..d4be84ba2
--- /dev/null
+++ b/include/linux/skip_list.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_SKIP_LISTS_H
+#define _LINUX_SKIP_LISTS_H
+typedef u64 keyType;
+typedef void *valueType;
+
+typedef struct nodeStructure skiplist_node;
+
+struct nodeStructure {
+	int level;	/* Levels in this structure */
+	keyType key;
+	valueType value;
+	skiplist_node *next[8];
+	skiplist_node *prev[8];
+};
+
+typedef struct listStructure {
+	int entries;
+	int level;	/* Maximum level of the list
+			(1 more than the number of levels in the list) */
+	skiplist_node *header; /* pointer to header */
+} skiplist;
+
+void skiplist_init(skiplist_node *slnode);
+skiplist *new_skiplist(skiplist_node *slnode);
+void free_skiplist(skiplist *l);
+void skiplist_node_init(skiplist_node *node);
+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed);
+void skiplist_delete(skiplist *l, skiplist_node *node);
+
+static inline bool skiplist_node_empty(skiplist_node *node) {
+	return (!node->next[0]);
+}
+#endif /* _LINUX_SKIP_LISTS_H */
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 74b4911ac..19789fbea 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -87,4 +87,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
 
 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
 extern const struct pipe_buf_operations default_pipe_buf_ops;
+
+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+			   loff_t *ppos, size_t len, unsigned int flags);
+extern long do_splice_to(struct file *in, loff_t *ppos,
+			 struct pipe_inode_info *pipe, size_t len,
+			 unsigned int flags);
 #endif
diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h
new file mode 100644
index 000000000..d71edba6b
--- /dev/null
+++ b/include/linux/sradix-tree.h
@@ -0,0 +1,77 @@
+#ifndef _LINUX_SRADIX_TREE_H
+#define _LINUX_SRADIX_TREE_H
+
+
+#define INIT_SRADIX_TREE(root, mask)					\
+do {									\
+	(root)->height = 0;						\
+	(root)->gfp_mask = (mask);					\
+	(root)->rnode = NULL;						\
+} while (0)
+
+#define ULONG_BITS	(sizeof(unsigned long) * 8)
+#define SRADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
+//#define SRADIX_TREE_MAP_SHIFT	6
+//#define SRADIX_TREE_MAP_SIZE	(1UL << SRADIX_TREE_MAP_SHIFT)
+//#define SRADIX_TREE_MAP_MASK	(SRADIX_TREE_MAP_SIZE-1)
+
+struct sradix_tree_node {
+	unsigned int	height;		/* Height from the bottom */
+	unsigned int	count;
+	unsigned int	fulls;		/* Number of full sublevel trees */
+	struct sradix_tree_node *parent;
+	void *stores[0];
+};
+
+/* A simple radix tree implementation */
+struct sradix_tree_root {
+	unsigned int            height;
+	struct sradix_tree_node *rnode;
+
+	/* Where found to have available empty stores in its sublevels */
+	struct sradix_tree_node *enter_node;
+	unsigned int shift;
+	unsigned int stores_size;
+	unsigned int mask;
+	unsigned long min;	/* The first hole index */
+	unsigned long num;
+	//unsigned long *height_to_maxindex;
+
+	/* How the node is allocated and freed. */
+	struct sradix_tree_node *(*alloc)(void);
+	void (*free)(struct sradix_tree_node *node);
+
+	/* When a new node is added and removed */
+	void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
+	void (*assign)(struct sradix_tree_node *node, unsigned int index, void *item);
+	void (*rm)(struct sradix_tree_node *node, unsigned int offset);
+};
+
+struct sradix_tree_path {
+	struct sradix_tree_node *node;
+	int offset;
+};
+
+static inline
+void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
+{
+	root->height = 0;
+	root->rnode = NULL;
+	root->shift = shift;
+	root->stores_size = 1UL << shift;
+	root->mask = root->stores_size - 1;
+}
+
+
+extern void *sradix_tree_next(struct sradix_tree_root *root,
+		       struct sradix_tree_node *node, unsigned long index,
+		       int (*iter)(void *, unsigned long));
+
+extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
+
+extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
+			struct sradix_tree_node *node, unsigned long index);
+
+extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
+
+#endif /* _LINUX_SRADIX_TREE_H */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 440b62f75..f0bbae15b 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -486,6 +486,73 @@ extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...);
 	no_printk(KERN_DEBUG fmt, ##__VA_ARGS__)
 #endif
 
+enum {
+	TOI_CAN_HIBERNATE,
+	TOI_CAN_RESUME,
+	TOI_RESUME_DEVICE_OK,
+	TOI_NORESUME_SPECIFIED,
+	TOI_SANITY_CHECK_PROMPT,
+	TOI_CONTINUE_REQ,
+	TOI_RESUMED_BEFORE,
+	TOI_BOOT_TIME,
+	TOI_NOW_RESUMING,
+	TOI_IGNORE_LOGLEVEL,
+	TOI_TRYING_TO_RESUME,
+	TOI_LOADING_ALT_IMAGE,
+	TOI_STOP_RESUME,
+	TOI_IO_STOPPED,
+	TOI_NOTIFIERS_PREPARE,
+	TOI_CLUSTER_MODE,
+	TOI_BOOT_KERNEL,
+	TOI_DEVICE_HOTPLUG_LOCKED,
+};
+
+#ifdef CONFIG_TOI
+
+/* Used in init dir files */
+extern unsigned long toi_state;
+#define set_toi_state(bit) (set_bit(bit, &toi_state))
+#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
+#define test_toi_state(bit) (test_bit(bit, &toi_state))
+extern int toi_running;
+
+#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
+extern int try_tuxonice_hibernate(void);
+
+#else /* !CONFIG_TOI */
+
+#define toi_state		(0)
+#define set_toi_state(bit) do { } while (0)
+#define clear_toi_state(bit) do { } while (0)
+#define test_toi_state(bit) (0)
+#define toi_running (0)
+
+static inline int try_tuxonice_hibernate(void) { return 0; }
+#define test_action_state(bit) (0)
+
+#endif /* CONFIG_TOI */
+
+#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_TOI
+extern void try_tuxonice_resume(void);
+#else
+#define try_tuxonice_resume() do { } while (0)
+#endif
+
+extern int resume_attempted;
+extern int software_resume(void);
+
+static inline void check_resume_attempted(void)
+{
+	if (resume_attempted)
+		return;
+
+	software_resume();
+}
+#else
+#define check_resume_attempted() do { } while (0)
+#define resume_attempted (0)
+#endif
 #define pm_pr_dbg(fmt, ...) \
 	__pm_pr_dbg(false, fmt, ##__VA_ARGS__)
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a1a3f4ed9..34a991609 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -315,6 +315,7 @@ void workingset_update_node(struct radix_tree_node *node);
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
 extern unsigned long nr_free_buffer_pages(void);
+extern unsigned long nr_unallocated_buffer_pages(void);
 extern unsigned long nr_free_pagecache_pages(void);
 
 /* Definition of global_zone_page_state not available yet */
@@ -354,6 +355,8 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
 						pg_data_t *pgdat,
 						unsigned long *nr_scanned);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
+extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim,
+		gfp_t mask);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern unsigned long vm_total_pages;
@@ -467,8 +470,10 @@ extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
+extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
+extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int page_swapcount(struct page *);
 extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry);
 extern int __swp_swapcount(swp_entry_t entry);
@@ -478,6 +483,8 @@ extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
 extern bool reuse_swap_page(struct page *, int *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
+extern void get_swap_range_of_type(int type, swp_entry_t *start,
+		swp_entry_t *end, unsigned int limit);
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
 extern void exit_swap_address_space(unsigned int type);
 
diff --git a/include/linux/thinkpad_ec.h b/include/linux/thinkpad_ec.h
new file mode 100644
index 000000000..1b80d7ee5
--- /dev/null
+++ b/include/linux/thinkpad_ec.h
@@ -0,0 +1,47 @@
+/*
+ *  thinkpad_ec.h - interface to ThinkPad embedded controller LPC3 functions
+ *
+ *  Copyright (C) 2005 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _THINKPAD_EC_H
+#define _THINKPAD_EC_H
+
+#ifdef __KERNEL__
+
+#define TP_CONTROLLER_ROW_LEN 16
+
+/* EC transactions input and output (possibly partial) vectors of 16 bytes. */
+struct thinkpad_ec_row {
+	u16 mask; /* bitmap of which entries of val[] are meaningful */
+	u8 val[TP_CONTROLLER_ROW_LEN];
+};
+
+extern int __must_check thinkpad_ec_lock(void);
+extern int __must_check thinkpad_ec_try_lock(void);
+extern void thinkpad_ec_unlock(void);
+
+extern int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+				struct thinkpad_ec_row *data);
+extern int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+				    struct thinkpad_ec_row *mask);
+extern int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args);
+extern void thinkpad_ec_invalidate(void);
+
+
+#endif /* __KERNEL */
+#endif /* _THINKPAD_EC_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 34f053a15..7f3d3ce56 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -44,9 +44,10 @@ enum {
 #endif
 
 #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_ZERO)
+# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | \
+				 ___GFP_TOI_NOTRACK | __GFP_ZERO)
 #else
-# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT)
+# define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_TOI_NOTRACK)
 #endif
 
 /*
diff --git a/include/linux/tuxonice.h b/include/linux/tuxonice.h
new file mode 100644
index 000000000..67b05a750
--- /dev/null
+++ b/include/linux/tuxonice.h
@@ -0,0 +1,48 @@
+/*
+ * include/linux/tuxonice.h
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef INCLUDE_LINUX_TUXONICE_H
+#define INCLUDE_LINUX_TUXONICE_H
+#ifdef CONFIG_TOI_INCREMENTAL
+extern void toi_set_logbuf_untracked(void);
+extern int toi_make_writable(pgd_t *pgd, unsigned long address);
+
+static inline int toi_incremental_support(void)
+{
+    return 1;
+}
+
+/* Copy Before Write */
+struct toi_cbw {
+    unsigned long pfn;
+    void *virt;
+    struct toi_cbw *next;
+};
+
+struct toi_cbw_state {
+    bool active;            /* Is a fault handler running? */
+    bool enabled;           /* Are we doing copy before write? */
+    int size;               /* The number of pages allocated */
+    struct toi_cbw *first, *next, *last;  /* Pointers to the data structure */
+};
+
+#define CBWS_PER_PAGE (PAGE_SIZE / sizeof(struct toi_cbw))
+DECLARE_PER_CPU(struct toi_cbw_state, toi_cbw_states);
+#else
+#define toi_set_logbuf_untracked() do { } while(0)
+static inline int toi_make_writable(pgd_t *pgd, unsigned long addr)
+{
+    return 0;
+}
+
+static inline int toi_incremental_support(void)
+{
+    return 0;
+}
+#endif
+#endif
diff --git a/include/linux/uksm.h b/include/linux/uksm.h
new file mode 100644
index 000000000..bb8651f53
--- /dev/null
+++ b/include/linux/uksm.h
@@ -0,0 +1,149 @@
+#ifndef __LINUX_UKSM_H
+#define __LINUX_UKSM_H
+/*
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork().
+ */
+
+/* if !CONFIG_UKSM this file should not be compiled at all. */
+#ifdef CONFIG_UKSM
+
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/sched.h>
+
+extern unsigned long zero_pfn __read_mostly;
+extern unsigned long uksm_zero_pfn __read_mostly;
+extern struct page *empty_uksm_zero_page;
+
+/* must be done before linked to mm */
+extern void uksm_vma_add_new(struct vm_area_struct *vma);
+extern void uksm_remove_vma(struct vm_area_struct *vma);
+
+#define UKSM_SLOT_NEED_SORT	(1 << 0)
+#define UKSM_SLOT_NEED_RERAND	(1 << 1)
+#define UKSM_SLOT_SCANNED	(1 << 2) /* It's scanned in this round */
+#define UKSM_SLOT_FUL_SCANNED	(1 << 3)
+#define UKSM_SLOT_IN_UKSM	(1 << 4)
+
+struct vma_slot {
+	struct sradix_tree_node *snode;
+	unsigned long sindex;
+
+	struct list_head slot_list;
+	unsigned long fully_scanned_round;
+	unsigned long dedup_num;
+	unsigned long pages_scanned;
+	unsigned long this_sampled;
+	unsigned long last_scanned;
+	unsigned long pages_to_scan;
+	struct scan_rung *rung;
+	struct page **rmap_list_pool;
+	unsigned int *pool_counts;
+	unsigned long pool_size;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long ctime_j;
+	unsigned long pages;
+	unsigned long flags;
+	unsigned long pages_cowed; /* pages cowed this round */
+	unsigned long pages_merged; /* pages merged this round */
+	unsigned long pages_bemerged;
+
+	/* when it has page merged in this eval round */
+	struct list_head dedup_list;
+};
+
+static inline void uksm_unmap_zero_page(pte_t pte)
+{
+	if (pte_pfn(pte) == uksm_zero_pfn)
+		__dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
+}
+
+static inline void uksm_map_zero_page(pte_t pte)
+{
+	if (pte_pfn(pte) == uksm_zero_pfn)
+		__inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
+}
+
+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
+{
+	if (vma->uksm_vma_slot && PageKsm(page))
+		vma->uksm_vma_slot->pages_cowed++;
+}
+
+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
+{
+	if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
+		vma->uksm_vma_slot->pages_cowed++;
+}
+
+static inline int uksm_flags_can_scan(unsigned long vm_flags)
+{
+#ifdef VM_SAO
+		if (vm_flags & VM_SAO)
+			return 0;
+#endif
+
+	return !(vm_flags & (VM_PFNMAP | VM_IO  | VM_DONTEXPAND |
+			     VM_HUGETLB | VM_MIXEDMAP | VM_SHARED
+			     | VM_MAYSHARE | VM_GROWSUP | VM_GROWSDOWN));
+}
+
+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
+{
+	if (uksm_flags_can_scan(*vm_flags_p))
+		*vm_flags_p |= VM_MERGEABLE;
+}
+
+/*
+ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
+ * be removed when uksm zero page patch is stable enough.
+ */
+static inline void uksm_bugon_zeropage(pte_t pte)
+{
+	BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
+}
+#else
+static inline void uksm_vma_add_new(struct vm_area_struct *vma)
+{
+}
+
+static inline void uksm_remove_vma(struct vm_area_struct *vma)
+{
+}
+
+static inline void uksm_unmap_zero_page(pte_t pte)
+{
+}
+
+static inline void uksm_map_zero_page(pte_t pte)
+{
+}
+
+static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
+{
+}
+
+static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
+{
+}
+
+static inline int uksm_flags_can_scan(unsigned long vm_flags)
+{
+	return 0;
+}
+
+static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
+{
+}
+
+static inline void uksm_bugon_zeropage(pte_t pte)
+{
+}
+#endif /* !CONFIG_UKSM */
+#endif /* __LINUX_UKSM_H */
diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
new file mode 100644
index 000000000..fb634b74a
--- /dev/null
+++ b/include/trace/events/fs.h
@@ -0,0 +1,53 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs
+
+#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_H
+
+#include <linux/fs.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(do_sys_open,
+
+	TP_PROTO(const char *filename, int flags, int mode),
+
+	TP_ARGS(filename, flags, mode),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+		__field(	int, flags			)
+		__field(	int, mode			)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+		__entry->flags = flags;
+		__entry->mode = mode;
+	),
+
+	TP_printk("\"%s\" %x %o",
+		  __get_str(filename), __entry->flags, __entry->mode)
+);
+
+TRACE_EVENT(open_exec,
+
+	TP_PROTO(const char *filename),
+
+	TP_ARGS(filename),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+	),
+
+	TP_printk("\"%s\"",
+		  __get_str(filename))
+);
+
+#endif /* _TRACE_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index dbe1bb058..c4ab27b3b 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -73,6 +73,12 @@
 #define IF_HAVE_PG_HWPOISON(flag,string)
 #endif
 
+#ifdef CONFIG_TUXONICE_
+#define IF_HAVE_PG_TUXONICE(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_TUXONICE(flag,string)
+#endif
+
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
 #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string}
 #else
@@ -103,6 +109,10 @@
 IF_HAVE_PG_MLOCK(PG_mlocked,		"mlocked"	)		\
 IF_HAVE_PG_UNCACHED(PG_uncached,	"uncached"	)		\
 IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
+IF_HAVE_PG_TUXONICE(PG_toi_untracked,   "toi_untracked" )               \
+IF_HAVE_PG_TUXONICE(PG_toi_ro,          "toi_ro"        )               \
+IF_HAVE_PG_TUXONICE(PG_toi_cbw,         "toi_cbw"       )               \
+IF_HAVE_PG_TUXONICE(PG_toi_dirty,       "toi_dirty"     )               \
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
 IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
 
diff --git a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h
new file mode 100644
index 000000000..8c95f8669
--- /dev/null
+++ b/include/uapi/linux/aufs_type.h
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2005-2018 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __AUFS_TYPE_H__
+#define __AUFS_TYPE_H__
+
+#define AUFS_NAME	"aufs"
+
+#ifdef __KERNEL__
+/*
+ * define it before including all other headers.
+ * sched.h may use pr_* macros before defining "current", so define the
+ * no-current version first, and re-define later.
+ */
+#define pr_fmt(fmt)	AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
+#include <linux/sched.h>
+#undef pr_fmt
+#define pr_fmt(fmt) \
+		AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
+		(int)sizeof(current->comm), current->comm, current->pid
+#else
+#include <stdint.h>
+#include <sys/types.h>
+#endif /* __KERNEL__ */
+
+#include <linux/limits.h>
+
+#define AUFS_VERSION	"4.16-20180409"
+
+/* todo? move this to linux-2.6.19/include/magic.h */
+#define AUFS_SUPER_MAGIC	('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_BRANCH_MAX_127
+typedef int8_t aufs_bindex_t;
+#define AUFS_BRANCH_MAX 127
+#else
+typedef int16_t aufs_bindex_t;
+#ifdef CONFIG_AUFS_BRANCH_MAX_511
+#define AUFS_BRANCH_MAX 511
+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
+#define AUFS_BRANCH_MAX 1023
+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
+#define AUFS_BRANCH_MAX 32767
+#endif
+#endif
+
+#ifdef __KERNEL__
+#ifndef AUFS_BRANCH_MAX
+#error unknown CONFIG_AUFS_BRANCH_MAX value
+#endif
+#endif /* __KERNEL__ */
+
+/* ---------------------------------------------------------------------- */
+
+#define AUFS_FSTYPE		AUFS_NAME
+
+#define AUFS_ROOT_INO		2
+#define AUFS_FIRST_INO		11
+
+#define AUFS_WH_PFX		".wh."
+#define AUFS_WH_PFX_LEN		((int)sizeof(AUFS_WH_PFX) - 1)
+#define AUFS_WH_TMP_LEN		4
+/* a limit for rmdir/rename a dir and copyup */
+#define AUFS_MAX_NAMELEN	(NAME_MAX \
+				- AUFS_WH_PFX_LEN * 2	/* doubly whiteouted */\
+				- 1			/* dot */\
+				- AUFS_WH_TMP_LEN)	/* hex */
+#define AUFS_XINO_FNAME		"." AUFS_NAME ".xino"
+#define AUFS_XINO_DEFPATH	"/tmp/" AUFS_XINO_FNAME
+#define AUFS_XINO_DEF_SEC	30 /* seconds */
+#define AUFS_XINO_DEF_TRUNC	45 /* percentage */
+#define AUFS_DIRWH_DEF		3
+#define AUFS_RDCACHE_DEF	10 /* seconds */
+#define AUFS_RDCACHE_MAX	3600 /* seconds */
+#define AUFS_RDBLK_DEF		512 /* bytes */
+#define AUFS_RDHASH_DEF		32
+#define AUFS_WKQ_NAME		AUFS_NAME "d"
+#define AUFS_MFS_DEF_SEC	30 /* seconds */
+#define AUFS_MFS_MAX_SEC	3600 /* seconds */
+#define AUFS_FHSM_CACHE_DEF_SEC	30 /* seconds */
+#define AUFS_PLINK_WARN		50 /* number of plinks in a single bucket */
+
+/* pseudo-link maintenace under /proc */
+#define AUFS_PLINK_MAINT_NAME	"plink_maint"
+#define AUFS_PLINK_MAINT_DIR	"fs/" AUFS_NAME
+#define AUFS_PLINK_MAINT_PATH	AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
+
+/* dirren, renamed dir */
+#define AUFS_DR_INFO_PFX	AUFS_WH_PFX ".dr."
+#define AUFS_DR_BRHINO_NAME	AUFS_WH_PFX "hino"
+/* whiteouted doubly */
+#define AUFS_WH_DR_INFO_PFX	AUFS_WH_PFX AUFS_DR_INFO_PFX
+#define AUFS_WH_DR_BRHINO	AUFS_WH_PFX AUFS_DR_BRHINO_NAME
+
+#define AUFS_DIROPQ_NAME	AUFS_WH_PFX ".opq" /* whiteouted doubly */
+#define AUFS_WH_DIROPQ		AUFS_WH_PFX AUFS_DIROPQ_NAME
+
+#define AUFS_BASE_NAME		AUFS_WH_PFX AUFS_NAME
+#define AUFS_PLINKDIR_NAME	AUFS_WH_PFX "plnk"
+#define AUFS_ORPHDIR_NAME	AUFS_WH_PFX "orph"
+
+/* doubly whiteouted */
+#define AUFS_WH_BASE		AUFS_WH_PFX AUFS_BASE_NAME
+#define AUFS_WH_PLINKDIR	AUFS_WH_PFX AUFS_PLINKDIR_NAME
+#define AUFS_WH_ORPHDIR		AUFS_WH_PFX AUFS_ORPHDIR_NAME
+
+/* branch permissions and attributes */
+#define AUFS_BRPERM_RW		"rw"
+#define AUFS_BRPERM_RO		"ro"
+#define AUFS_BRPERM_RR		"rr"
+#define AUFS_BRATTR_COO_REG	"coo_reg"
+#define AUFS_BRATTR_COO_ALL	"coo_all"
+#define AUFS_BRATTR_FHSM	"fhsm"
+#define AUFS_BRATTR_UNPIN	"unpin"
+#define AUFS_BRATTR_ICEX	"icex"
+#define AUFS_BRATTR_ICEX_SEC	"icexsec"
+#define AUFS_BRATTR_ICEX_SYS	"icexsys"
+#define AUFS_BRATTR_ICEX_TR	"icextr"
+#define AUFS_BRATTR_ICEX_USR	"icexusr"
+#define AUFS_BRATTR_ICEX_OTH	"icexoth"
+#define AUFS_BRRATTR_WH		"wh"
+#define AUFS_BRWATTR_NLWH	"nolwh"
+#define AUFS_BRWATTR_MOO	"moo"
+
+#define AuBrPerm_RW		1		/* writable, hardlinkable wh */
+#define AuBrPerm_RO		(1 << 1)	/* readonly */
+#define AuBrPerm_RR		(1 << 2)	/* natively readonly */
+#define AuBrPerm_Mask		(AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
+
+#define AuBrAttr_COO_REG	(1 << 3)	/* copy-up on open */
+#define AuBrAttr_COO_ALL	(1 << 4)
+#define AuBrAttr_COO_Mask	(AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
+
+#define AuBrAttr_FHSM		(1 << 5)	/* file-based hsm */
+#define AuBrAttr_UNPIN		(1 << 6)	/* rename-able top dir of
+						   branch. meaningless since
+						   linux-3.18-rc1 */
+
+/* ignore error in copying XATTR */
+#define AuBrAttr_ICEX_SEC	(1 << 7)
+#define AuBrAttr_ICEX_SYS	(1 << 8)
+#define AuBrAttr_ICEX_TR	(1 << 9)
+#define AuBrAttr_ICEX_USR	(1 << 10)
+#define AuBrAttr_ICEX_OTH	(1 << 11)
+#define AuBrAttr_ICEX		(AuBrAttr_ICEX_SEC	\
+				 | AuBrAttr_ICEX_SYS	\
+				 | AuBrAttr_ICEX_TR	\
+				 | AuBrAttr_ICEX_USR	\
+				 | AuBrAttr_ICEX_OTH)
+
+#define AuBrRAttr_WH		(1 << 12)	/* whiteout-able */
+#define AuBrRAttr_Mask		AuBrRAttr_WH
+
+#define AuBrWAttr_NoLinkWH	(1 << 13)	/* un-hardlinkable whiteouts */
+#define AuBrWAttr_MOO		(1 << 14)	/* move-up on open */
+#define AuBrWAttr_Mask		(AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
+
+#define AuBrAttr_CMOO_Mask	(AuBrAttr_COO_Mask | AuBrWAttr_MOO)
+
+/* #warning test userspace */
+#ifdef __KERNEL__
+#ifndef CONFIG_AUFS_FHSM
+#undef AuBrAttr_FHSM
+#define AuBrAttr_FHSM		0
+#endif
+#ifndef CONFIG_AUFS_XATTR
+#undef	AuBrAttr_ICEX
+#define AuBrAttr_ICEX		0
+#undef	AuBrAttr_ICEX_SEC
+#define AuBrAttr_ICEX_SEC	0
+#undef	AuBrAttr_ICEX_SYS
+#define AuBrAttr_ICEX_SYS	0
+#undef	AuBrAttr_ICEX_TR
+#define AuBrAttr_ICEX_TR	0
+#undef	AuBrAttr_ICEX_USR
+#define AuBrAttr_ICEX_USR	0
+#undef	AuBrAttr_ICEX_OTH
+#define AuBrAttr_ICEX_OTH	0
+#endif
+#endif
+
+/* the longest combination */
+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
+#define AuBrPermStrSz	sizeof(AUFS_BRPERM_RW			\
+			       "+" AUFS_BRATTR_COO_REG		\
+			       "+" AUFS_BRATTR_FHSM		\
+			       "+" AUFS_BRATTR_UNPIN		\
+			       "+" AUFS_BRATTR_ICEX_SEC		\
+			       "+" AUFS_BRATTR_ICEX_SYS		\
+			       "+" AUFS_BRATTR_ICEX_USR		\
+			       "+" AUFS_BRATTR_ICEX_OTH		\
+			       "+" AUFS_BRWATTR_NLWH)
+
+typedef struct {
+	char a[AuBrPermStrSz];
+} au_br_perm_str_t;
+
+static inline int au_br_writable(int brperm)
+{
+	return brperm & AuBrPerm_RW;
+}
+
+static inline int au_br_whable(int brperm)
+{
+	return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
+}
+
+static inline int au_br_wh_linkable(int brperm)
+{
+	return !(brperm & AuBrWAttr_NoLinkWH);
+}
+
+static inline int au_br_cmoo(int brperm)
+{
+	return brperm & AuBrAttr_CMOO_Mask;
+}
+
+static inline int au_br_fhsm(int brperm)
+{
+	return brperm & AuBrAttr_FHSM;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* ioctl */
+enum {
+	/* readdir in userspace */
+	AuCtl_RDU,
+	AuCtl_RDU_INO,
+
+	AuCtl_WBR_FD,	/* pathconf wrapper */
+	AuCtl_IBUSY,	/* busy inode */
+	AuCtl_MVDOWN,	/* move-down */
+	AuCtl_BR,	/* info about branches */
+	AuCtl_FHSM_FD	/* connection for fhsm */
+};
+
+/* borrowed from linux/include/linux/kernel.h */
+#ifndef ALIGN
+#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
+#endif
+
+/* borrowed from linux/include/linux/compiler-gcc3.h */
+#ifndef __aligned
+#define __aligned(x)			__attribute__((aligned(x)))
+#endif
+
+#ifdef __KERNEL__
+#ifndef __packed
+#define __packed			__attribute__((packed))
+#endif
+#endif
+
+struct au_rdu_cookie {
+	uint64_t	h_pos;
+	int16_t		bindex;
+	uint8_t		flags;
+	uint8_t		pad;
+	uint32_t	generation;
+} __aligned(8);
+
+struct au_rdu_ent {
+	uint64_t	ino;
+	int16_t		bindex;
+	uint8_t		type;
+	uint8_t		nlen;
+	uint8_t		wh;
+	char		name[0];
+} __aligned(8);
+
+static inline int au_rdu_len(int nlen)
+{
+	/* include the terminating NULL */
+	return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
+		     sizeof(uint64_t));
+}
+
+union au_rdu_ent_ul {
+	struct au_rdu_ent __user	*e;
+	uint64_t			ul;
+};
+
+enum {
+	AufsCtlRduV_SZ,
+	AufsCtlRduV_End
+};
+
+struct aufs_rdu {
+	/* input */
+	union {
+		uint64_t	sz;	/* AuCtl_RDU */
+		uint64_t	nent;	/* AuCtl_RDU_INO */
+	};
+	union au_rdu_ent_ul	ent;
+	uint16_t		verify[AufsCtlRduV_End];
+
+	/* input/output */
+	uint32_t		blk;
+
+	/* output */
+	union au_rdu_ent_ul	tail;
+	/* number of entries which were added in a single call */
+	uint64_t		rent;
+	uint8_t			full;
+	uint8_t			shwh;
+
+	struct au_rdu_cookie	cookie;
+} __aligned(8);
+
+/* ---------------------------------------------------------------------- */
+
+/* dirren. the branch is identified by the filename who contains this */
+struct au_drinfo {
+	uint64_t ino;
+	union {
+		uint8_t oldnamelen;
+		uint64_t _padding;
+	};
+	uint8_t oldname[0];
+} __aligned(8);
+
+struct au_drinfo_fdata {
+	uint32_t magic;
+	struct au_drinfo drinfo;
+} __aligned(8);
+
+#define AUFS_DRINFO_MAGIC_V1	('a' << 24 | 'd' << 16 | 'r' << 8 | 0x01)
+/* future */
+#define AUFS_DRINFO_MAGIC_V2	('a' << 24 | 'd' << 16 | 'r' << 8 | 0x02)
+
+/* ---------------------------------------------------------------------- */
+
+struct aufs_wbr_fd {
+	uint32_t	oflags;
+	int16_t		brid;
+} __aligned(8);
+
+/* ---------------------------------------------------------------------- */
+
+struct aufs_ibusy {
+	uint64_t	ino, h_ino;
+	int16_t		bindex;
+} __aligned(8);
+
+/* ---------------------------------------------------------------------- */
+
+/* error code for move-down */
+/* the actual message strings are implemented in aufs-util.git */
+enum {
+	EAU_MVDOWN_OPAQUE = 1,
+	EAU_MVDOWN_WHITEOUT,
+	EAU_MVDOWN_UPPER,
+	EAU_MVDOWN_BOTTOM,
+	EAU_MVDOWN_NOUPPER,
+	EAU_MVDOWN_NOLOWERBR,
+	EAU_Last
+};
+
+/* flags for move-down */
+#define AUFS_MVDOWN_DMSG	1
+#define AUFS_MVDOWN_OWLOWER	(1 << 1)	/* overwrite lower */
+#define AUFS_MVDOWN_KUPPER	(1 << 2)	/* keep upper */
+#define AUFS_MVDOWN_ROLOWER	(1 << 3)	/* do even if lower is RO */
+#define AUFS_MVDOWN_ROLOWER_R	(1 << 4)	/* did on lower RO */
+#define AUFS_MVDOWN_ROUPPER	(1 << 5)	/* do even if upper is RO */
+#define AUFS_MVDOWN_ROUPPER_R	(1 << 6)	/* did on upper RO */
+#define AUFS_MVDOWN_BRID_UPPER	(1 << 7)	/* upper brid */
+#define AUFS_MVDOWN_BRID_LOWER	(1 << 8)	/* lower brid */
+#define AUFS_MVDOWN_FHSM_LOWER	(1 << 9)	/* find fhsm attr for lower */
+#define AUFS_MVDOWN_STFS	(1 << 10)	/* req. stfs */
+#define AUFS_MVDOWN_STFS_FAILED	(1 << 11)	/* output: stfs is unusable */
+#define AUFS_MVDOWN_BOTTOM	(1 << 12)	/* output: no more lowers */
+
+/* index for move-down */
+enum {
+	AUFS_MVDOWN_UPPER,
+	AUFS_MVDOWN_LOWER,
+	AUFS_MVDOWN_NARRAY
+};
+
+/*
+ * additional info of move-down
+ * number of free blocks and inodes.
+ * subset of struct kstatfs, but smaller and always 64bit.
+ */
+struct aufs_stfs {
+	uint64_t	f_blocks;
+	uint64_t	f_bavail;
+	uint64_t	f_files;
+	uint64_t	f_ffree;
+};
+
+struct aufs_stbr {
+	int16_t			brid;	/* optional input */
+	int16_t			bindex;	/* output */
+	struct aufs_stfs	stfs;	/* output when AUFS_MVDOWN_STFS set */
+} __aligned(8);
+
+struct aufs_mvdown {
+	uint32_t		flags;			/* input/output */
+	struct aufs_stbr	stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
+	int8_t			au_errno;		/* output */
+} __aligned(8);
+
+/* ---------------------------------------------------------------------- */
+
+union aufs_brinfo {
+	/* PATH_MAX may differ between kernel-space and user-space */
+	char	_spacer[4096];
+	struct {
+		int16_t	id;
+		int	perm;
+		char	path[0];
+	};
+} __aligned(8);
+
+/* ---------------------------------------------------------------------- */
+
+#define AuCtlType		'A'
+#define AUFS_CTL_RDU		_IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
+#define AUFS_CTL_RDU_INO	_IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
+#define AUFS_CTL_WBR_FD		_IOW(AuCtlType, AuCtl_WBR_FD, \
+				     struct aufs_wbr_fd)
+#define AUFS_CTL_IBUSY		_IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
+#define AUFS_CTL_MVDOWN		_IOWR(AuCtlType, AuCtl_MVDOWN, \
+				      struct aufs_mvdown)
+#define AUFS_CTL_BRINFO		_IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
+#define AUFS_CTL_FHSM_FD	_IOW(AuCtlType, AuCtl_FHSM_FD, int)
+
+#endif /* __AUFS_TYPE_H__ */
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 776bc92e9..f82ba4c24 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -28,7 +28,9 @@
 #define NETLINK_ECRYPTFS	19
 #define NETLINK_RDMA		20
 #define NETLINK_CRYPTO		21	/* Crypto layer */
-#define NETLINK_SMC		22	/* SMC monitoring */
+#define NETLINK_TOI_USERUI	22	/* TuxOnIce's userui */
+#define NETLINK_TOI_USM		23	/* Userspace storage manager */
+#define NETLINK_SMC		24	/* SMC monitoring */
 
 #define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG
 
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 22627f800..17077cd6f 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -37,9 +37,16 @@
 #define SCHED_FIFO		1
 #define SCHED_RR		2
 #define SCHED_BATCH		3
-/* SCHED_ISO: reserved but not implemented yet */
+/* SCHED_ISO: Implemented on MuQSS only */
 #define SCHED_IDLE		5
+#ifdef CONFIG_SCHED_MUQSS
+#define SCHED_ISO		4
+#define SCHED_IDLEPRIO		SCHED_IDLE
+#define SCHED_MAX		(SCHED_IDLEPRIO)
+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
+#else /* CONFIG_SCHED_MUQSS */
 #define SCHED_DEADLINE		6
+#endif /* CONFIG_SCHED_MUQSS */
 
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h
index e9d39c485..3bceead8d 100644
--- a/include/uapi/linux/vt.h
+++ b/include/uapi/linux/vt.h
@@ -3,12 +3,25 @@
 #define _UAPI_LINUX_VT_H
 
 
+/*
+ * We will make this definition solely for the purpose of making packages
+ * such as splashutils build, because they can not understand that
+ * NR_TTY_DEVICES is defined in the kernel configuration.
+ */
+#ifndef CONFIG_NR_TTY_DEVICES
+#define CONFIG_NR_TTY_DEVICES 63
+#endif
+
 /*
  * These constants are also useful for user-level apps (e.g., VC
  * resizing).
  */
 #define MIN_NR_CONSOLES 1       /* must be at least 1 */
-#define MAX_NR_CONSOLES	63	/* serial lines start at 64 */
+/*
+ * NR_TTY_DEVICES:
+ * Value MUST be at least 12 and must never be higher then 63
+ */
+#define MAX_NR_CONSOLES CONFIG_NR_TTY_DEVICES	/* serial lines start above this */
 		/* Note: the ioctl VT_GETSTATE does not work for
 		   consoles 16 and higher (since it returns a short) */
 
diff --git a/init/Kconfig b/init/Kconfig
index e37f4b2a6..e2cf5fd15 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -38,6 +38,47 @@ config THREAD_INFO_IN_TASK
 
 menu "General setup"
 
+config PCK_INTERACTIVE
+	bool "Tune kernel for interactivity"
+	default y
+	help
+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+	  --- Virtual Memory Subsystem ---------------------------
+
+	    Mem dirty before bg writeback..:  10 %  ->  20 %
+	    Mem dirty before sync writeback:  20 %  ->  50 %
+
+	  --- Block Layer ----------------------------------------
+
+	    Queue depth...............:      128    -> 512
+	    Default MQ scheduler......: mq-deadline -> bfq
+
+	  --- CFS CPU Scheduler ----------------------------------
+
+	    Scheduling latency.............:   6    ->   3    ms
+	    Minimal granularity............:   0.75 ->   0.3  ms
+	    Wakeup granularity.............:   1    ->   0.5  ms
+	    CPU migration cost.............:   0.5  ->   0.25 ms
+	    Bandwidth slice size...........:   5    ->   3    ms
+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
+
+	  --- MuQSS CPU Scheduler --------------------------------
+
+	    Scheduling interval............:   6    ->   3    ms
+	    ISO task max realtime use......:  70 %  ->  25 %
+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
+
+config SCHED_MUQSS
+	bool "MuQSS cpu scheduler"
+	select HIGH_RES_TIMERS
+	default n
+	---help---
+	  The Multiple Queue Skiplist Scheduler for excellent interactivity and
+	  responsiveness on the desktop and highly scalable deterministic
+	  low latency on any hardware.
+
 config BROKEN
 	bool
 
@@ -619,6 +660,7 @@ config NUMA_BALANCING
 	depends on ARCH_SUPPORTS_NUMA_BALANCING
 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	depends on SMP && NUMA && MIGRATION
+	depends on !SCHED_MUQSS
 	help
 	  This option adds support for automatic NUMA aware memory/task placement.
 	  The mechanism is quite primitive and is based on migrating memory when
@@ -721,9 +763,13 @@ menuconfig CGROUP_SCHED
 	help
 	  This feature lets CPU scheduler recognize task groups and control CPU
 	  bandwidth allocation to such task groups. It uses cgroups to group
-	  tasks.
+	  tasks. In combination with MuQSS this is purely a STUB to create the
+	  files associated with the CPU controller cgroup but most of the
+	  controls do nothing. This is useful for working in environments and
+	  with applications that will only work if this control group is
+	  present.
 
-if CGROUP_SCHED
+if CGROUP_SCHED && !SCHED_MUQSS
 config FAIR_GROUP_SCHED
 	bool "Group scheduling for SCHED_OTHER"
 	depends on CGROUP_SCHED
@@ -830,6 +876,7 @@ config CGROUP_DEVICE
 
 config CGROUP_CPUACCT
 	bool "Simple CPU accounting controller"
+	depends on !SCHED_MUQSS
 	help
 	  Provides a simple controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup.
@@ -916,6 +963,22 @@ config USER_NS
 
 	  If unsure, say N.
 
+config USER_NS_UNPRIVILEGED
+	bool "Allow unprivileged users to create namespaces"
+	default y
+	depends on USER_NS
+	help
+	  When disabled, unprivileged users will not be able to create
+	  new namespaces. Allowing users to create their own namespaces
+	  has been part of several recent local privilege escalation
+	  exploits, so if you need user namespaces but are
+	  paranoid^Wsecurity-conscious you want to disable this.
+
+	  This setting can be overridden at runtime via the
+	  kernel.unprivileged_userns_clone sysctl.
+
+	  If unsure, say Y.
+
 config PID_NS
 	bool "PID Namespaces"
 	default y
@@ -936,6 +999,7 @@ endif # NAMESPACES
 
 config SCHED_AUTOGROUP
 	bool "Automatic process group scheduling"
+	depends on !SCHED_MUQSS
 	select CGROUPS
 	select CGROUP_SCHED
 	select FAIR_GROUP_SCHED
@@ -1029,6 +1093,13 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
 	  with the "-O2" compiler flag for best performance and most
 	  helpful compile-time warnings.
 
+config CC_OPTIMIZE_HARDER
+	bool "Optimize harder"
+	help
+	  This option will pass "-O3" to your compiler resulting in a
+	  larger and faster kernel. The more complex optimizations also
+	  increase compilation time and may affect stability.
+
 config CC_OPTIMIZE_FOR_SIZE
 	bool "Optimize for size"
 	help
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 7cf4f6daf..9ede0fe14 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -596,6 +596,8 @@ void __init prepare_namespace(void)
 	if (is_floppy && rd_doload && rd_load_disk(0))
 		ROOT_DEV = Root_RAM0;
 
+	check_resume_attempted();
+
 	mount_root();
 out:
 	devtmpfs_mount("dev");
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 53d4f0f32..55daa4a1e 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -16,6 +16,7 @@
 #include <linux/romfs_fs.h>
 #include <linux/initrd.h>
 #include <linux/sched.h>
+#include <linux/suspend.h>
 #include <linux/freezer.h>
 #include <linux/kmod.h>
 
@@ -80,6 +81,11 @@ static void __init handle_initrd(void)
 
 	current->flags &= ~PF_FREEZER_SKIP;
 
+	if (!resume_attempted)
+		printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
+				"any image that might exist.\n");
+	clear_toi_state(TOI_BOOT_TIME);
+
 	/* move initrd to rootfs' /old */
 	sys_mount("..", ".", NULL, MS_MOVE, NULL);
 	/* switch root and cwd back to / of rootfs */
diff --git a/init/init_task.c b/init/init_task.c
index 3ac6e754c..a5da207d7 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -59,9 +59,17 @@ struct task_struct init_task
 	.stack		= init_stack,
 	.usage		= ATOMIC_INIT(2),
 	.flags		= PF_KTHREAD,
+#ifdef CONFIG_SCHED_MUQSS
+	.prio		= NORMAL_PRIO,
+	.static_prio	= MAX_PRIO-20,
+	.normal_prio	= NORMAL_PRIO,
+	.deadline	= 0,
+	.time_slice	= 1000000,
+#else
 	.prio		= MAX_PRIO - 20,
 	.static_prio	= MAX_PRIO - 20,
 	.normal_prio	= MAX_PRIO - 20,
+#endif
 	.policy		= SCHED_NORMAL,
 	.cpus_allowed	= CPU_MASK_ALL,
 	.nr_cpus_allowed= NR_CPUS,
@@ -70,6 +78,7 @@ struct task_struct init_task
 	.restart_block	= {
 		.fn = do_no_restart_syscall,
 	},
+#ifndef CONFIG_SCHED_MUQSS
 	.se		= {
 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
 	},
@@ -77,6 +86,7 @@ struct task_struct init_task
 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
 		.time_slice	= RR_TIMESLICE,
 	},
+#endif
 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
 #ifdef CONFIG_SMP
 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
diff --git a/init/main.c b/init/main.c
index 21efbf6ac..9eeaa9804 100644
--- a/init/main.c
+++ b/init/main.c
@@ -848,7 +848,6 @@ int __init_or_module do_one_initcall(initcall_t fn)
 	return ret;
 }
 
-
 extern initcall_t __initcall_start[];
 extern initcall_t __initcall0_start[];
 extern initcall_t __initcall1_start[];
@@ -1009,6 +1008,8 @@ static int __ref kernel_init(void *unused)
 
 	rcu_end_inkernel_boot();
 
+	print_scheduler_version();
+
 	if (ramdisk_execute_command) {
 		ret = run_init_process(ramdisk_execute_command);
 		if (!ret)
diff --git a/kernel/Makefile b/kernel/Makefile
index f85ae5dfa..78c2c0cc7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = fork.o exec_domain.o panic.o \
 	    extable.o params.o \
 	    kthread.o sys_ni.o nsproxy.o \
 	    notifier.o ksysfs.o cred.o reboot.o \
-	    async.o range.o smpboot.o ucount.o
+	    async.o range.o smpboot.o ucount.o skip_list.o
 
 obj-$(CONFIG_MODULES) += kmod.o
 obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index e2764d767..2f85428d2 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -114,7 +114,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	 */
 	t1 = tsk->sched_info.pcount;
 	t2 = tsk->sched_info.run_delay;
-	t3 = tsk->se.sum_exec_runtime;
+	t3 = tsk_seruntime(tsk);
 
 	d->cpu_count += t1;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 995453d9f..6156f0cac 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -129,7 +129,7 @@ static void __exit_signal(struct task_struct *tsk)
 			sig->curr_target = next_thread(tsk);
 	}
 
-	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
+	add_device_randomness((const void*) &tsk_seruntime(tsk),
 			      sizeof(unsigned long long));
 
 	/*
@@ -150,7 +150,7 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->inblock += task_io_get_inblock(tsk);
 	sig->oublock += task_io_get_oublock(tsk);
 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
-	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
+	sig->sum_sched_runtime += tsk_seruntime(tsk);
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
 	write_sequnlock(&sig->stats_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index e5d9d405a..4ed335553 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -103,6 +103,11 @@
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#else
+#define unprivileged_userns_clone 0
+#endif
 
 /*
  * Minimum number of threads to boot the kernel
@@ -448,7 +453,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		tmp = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -474,7 +479,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			struct inode *inode = file_inode(file);
 			struct address_space *mapping = file->f_mapping;
 
-			get_file(file);
+			vma_get_file(tmp);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
 			i_mmap_lock_write(mapping);
@@ -507,7 +512,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
 		rb_link = &tmp->vm_rb.rb_right;
 		rb_parent = &tmp->vm_rb;
-
+		uksm_vma_add_new(tmp);
 		mm->map_count++;
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
 			retval = copy_page_range(mm, oldmm, mpnt);
@@ -1591,6 +1596,10 @@ static __latent_entropy struct task_struct *copy_process(
 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 
+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+		if (!capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+
 	/*
 	 * Thread groups must share signals as well, and detached threads
 	 * can only be started up within the thread group.
@@ -2385,6 +2394,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	if (unshare_flags & CLONE_NEWNS)
 		unshare_flags |= CLONE_FS;
 
+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+		err = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto bad_unshare_out;
+	}
+
 	err = check_unshare_flags(unshare_flags);
 	if (err)
 		goto bad_unshare_out;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cd50e9920..0a2deebdf 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -410,6 +410,34 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
 }
 EXPORT_SYMBOL(kthread_bind);
 
+#if defined(CONFIG_SCHED_MUQSS) && defined(CONFIG_SMP)
+extern void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+
+/*
+ * new_kthread_bind is a special variant of __kthread_bind_mask.
+ * For new threads to work on muqss we want to call do_set_cpus_allowed
+ * without the task_cpu being set and the task rescheduled until they're
+ * rescheduled on their own so we call __do_set_cpus_allowed directly which
+ * only changes the cpumask. This is particularly important for smpboot threads
+ * to work.
+ */
+static void new_kthread_bind(struct task_struct *p, unsigned int cpu)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)))
+		return;
+
+	/* It's safe because the task is inactive. */
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	__do_set_cpus_allowed(p, cpumask_of(cpu));
+	p->flags |= PF_NO_SETAFFINITY;
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+}
+#else
+#define new_kthread_bind(p, cpu) kthread_bind(p, cpu)
+#endif
+
 /**
  * kthread_create_on_cpu - Create a cpu bound kthread
  * @threadfn: the function to run until signal_pending(current).
@@ -431,7 +459,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 				   cpu);
 	if (IS_ERR(p))
 		return p;
-	kthread_bind(p, cpu);
+	new_kthread_bind(p, cpu);
 	/* CPU hotplug need to bind once again when unparking the thread. */
 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
 	to_kthread(p)->cpu = cpu;
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 7c6631e69..83451e2d5 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -290,6 +290,12 @@ static int klp_check_stack(struct task_struct *task, char *err_buf)
 	return 0;
 }
 
+#ifdef CONFIG_SCHED_MUQSS
+typedef unsigned long rq_flags_t;
+#else
+typedef struct rq_flags rq_flags_t;
+#endif
+
 /*
  * Try to safely switch a task to the target patch state.  If it's currently
  * running, or it's sleeping on a to-be-patched or to-be-unpatched function, or
@@ -298,7 +304,7 @@ static int klp_check_stack(struct task_struct *task, char *err_buf)
 static bool klp_try_switch_task(struct task_struct *task)
 {
 	struct rq *rq;
-	struct rq_flags flags;
+	rq_flags_t flags;
 	int ret;
 	bool success = false;
 	char err_buf[STACK_ERR_BUF_SIZE];
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 89b5f83f1..7bb20c5f6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -140,7 +140,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 unsigned long nr_lock_classes;
 static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
-static inline struct lock_class *hlock_class(struct held_lock *hlock)
+inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock)
 {
 	if (!hlock->class_idx) {
 		/*
@@ -151,6 +151,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 	}
 	return lock_classes + hlock->class_idx - 1;
 }
+EXPORT_SYMBOL_GPL(lockdep_hlock_class);
+#define hlock_class(hlock) lockdep_hlock_class(hlock)
 
 #ifdef CONFIG_LOCK_STAT
 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e880ca22c..0ebcae67f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -101,6 +101,244 @@ config PM_STD_PARTITION
 	  suspended image to. It will simply pick the first available swap 
 	  device.
 
+menuconfig TOI_CORE
+	bool "Enhanced Hibernation (TuxOnIce)"
+	depends on HIBERNATION
+	default y
+	---help---
+	  TuxOnIce is the 'new and improved' suspend support.
+
+	  See the TuxOnIce home page (tuxonice.net)
+	  for FAQs, HOWTOs and other documentation.
+
+	comment "Image Storage (you need at least one allocator)"
+		depends on TOI_CORE
+
+	config TOI_FILE
+		bool "File Allocator"
+		depends on TOI_CORE
+		default y
+		---help---
+		  This option enables support for storing an image in a
+		  simple file. You might want this if your swap is
+		  sometimes full enough that you don't have enough spare
+		  space to store an image.
+
+	config TOI_SWAP
+		bool "Swap Allocator"
+		depends on TOI_CORE && SWAP
+		default y
+		---help---
+		  This option enables support for storing an image in your
+		  swap space.
+
+	comment "General Options"
+		depends on TOI_CORE
+
+	config TOI_CRYPTO
+		bool "Compression support"
+		depends on TOI_CORE && CRYPTO
+		default y
+		---help---
+		  This option adds support for using cryptoapi compression
+		  algorithms. Compression is particularly useful as it can
+		  more than double your suspend and resume speed (depending
+		  upon how well your image compresses).
+
+		  You probably want this, so say Y here.
+
+	comment "No compression support available without Cryptoapi support."
+		depends on TOI_CORE && !CRYPTO
+
+	config TOI_USERUI
+		bool "Userspace User Interface support"
+		depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
+		default y
+		---help---
+		  This option enabled support for a userspace based user interface
+		  to TuxOnIce, which allows you to have a nice display while suspending
+		  and resuming, and also enables features such as pressing escape to
+		  cancel a cycle or interactive debugging.
+
+	config TOI_USERUI_DEFAULT_PATH
+		string "Default userui program location"
+		default "/usr/local/sbin/tuxoniceui_text"
+		depends on TOI_USERUI
+		---help---
+		  This entry allows you to specify a default path to the userui binary.
+
+	config TOI_DEFAULT_IMAGE_SIZE_LIMIT
+		int "Default image size limit"
+		range -2 65536 
+		default "-2"
+		depends on TOI_CORE
+		---help---
+		  This entry allows you to specify a default image size limit. It can
+		  be overridden at run-time using /sys/power/tuxonice/image_size_limit.
+
+	config TOI_KEEP_IMAGE
+		bool "Allow Keep Image Mode"
+		depends on TOI_CORE
+		---help---
+		  This option allows you to keep and image and reuse it. It is intended
+		  __ONLY__ for use with systems where all filesystems are mounted read-
+		  only (kiosks, for example). To use it, compile this option in and boot
+		  normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
+		  When you resume, the image will not be removed. You will be unable to turn
+		  off swap partitions (assuming you are using the swap allocator), but future
+		  suspends simply do a power-down. The image can be updated using the
+		  kernel command line parameter suspend_act= to turn off the keep image
+		  bit. Keep image mode is a little less user friendly on purpose - it
+		  should not be used without thought!
+
+	config TOI_REPLACE_SWSUSP
+		bool "Replace swsusp by default"
+		default y
+		depends on TOI_CORE
+		---help---
+		  TuxOnIce can replace swsusp. This option makes that the default state,
+		  requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
+		  to use the vanilla kernel functionality. Note that your initrd/ramfs will
+		  need to do this before trying to resume, too.
+		  With overriding swsusp enabled, echoing disk  to /sys/power/state will
+		  start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
+		  the swap and file allocators are compiled in, the swap allocator will be
+		  used by default.
+
+	config TOI_IGNORE_LATE_INITCALL
+		bool "Wait for initrd/ramfs to run, by default"
+		default n
+		depends on TOI_CORE
+		---help---
+		  When booting, TuxOnIce can check for an image and start to resume prior
+		  to any initrd/ramfs running (via a late initcall).
+
+		  If you don't have an initrd/ramfs, this is what you want to happen -
+		  otherwise you won't be able to safely resume. You should set this option
+		  to 'No'.
+
+		  If, however, you want your initrd/ramfs to run anyway before resuming,
+		  you need to tell TuxOnIce to ignore that earlier opportunity to resume.
+		  This can be done either by using this compile time option, or by
+		  overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
+
+		  Note that if TuxOnIce can't resume at the earlier opportunity, the
+		  value of this option won't matter - the initramfs/initrd (if any) will
+		  run anyway.
+
+	menuconfig TOI_CLUSTER
+		bool "Cluster support"
+		default n
+		depends on TOI_CORE && NET && BROKEN
+		---help---
+		  Support for linking multiple machines in a cluster so that they suspend
+		  and resume together.
+
+	config TOI_DEFAULT_CLUSTER_INTERFACE
+		string "Default cluster interface"
+		depends on TOI_CLUSTER
+		---help---
+		  The default interface on which to communicate with other nodes in
+		  the cluster.
+
+		  If no value is set here, cluster support will be disabled by default.
+
+	config TOI_DEFAULT_CLUSTER_KEY
+		string "Default cluster key"
+		default "Default"
+		depends on TOI_CLUSTER
+		---help---
+		  The default key used by this node. All nodes in the same cluster
+		  have the same key. Multiple clusters may coexist on the same lan
+		  by using different values for this key.
+
+	config TOI_CLUSTER_IMAGE_TIMEOUT
+		int "Timeout when checking for image"
+		default 15
+		depends on TOI_CLUSTER
+		---help---
+		  Timeout (seconds) before continuing to boot when waiting to see
+		  whether other nodes might have an image. Set to -1 to wait
+		  indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
+		  booting sooner than this timeout.
+
+	config TOI_CLUSTER_WAIT_UNTIL_NODES
+		int "Nodes without image before continuing"
+		default 0
+		depends on TOI_CLUSTER
+		---help---
+		  When booting and no image is found, we wait to see if other nodes
+		  have an image before continuing to boot. This value lets us
+		  continue after seeing a certain number of nodes without an image,
+		  instead of continuing to wait for the timeout. Set to 0 to only
+		  use the timeout.
+
+	config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
+		string "Default pre-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called when starting to hibernate.
+
+	config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
+		string "Default post-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called after resuming from hibernation.
+
+	config TOI_DEFAULT_WAIT
+		int "Default waiting time for emergency boot messages"
+		default "25"
+		range -1 32768
+		depends on TOI_CORE
+		help
+		  TuxOnIce can display warnings very early in the process of resuming,
+		  if (for example) it appears that you have booted a kernel that doesn't
+		  match an image on disk. It can then give you the opportunity to either
+		  continue booting that kernel, or reboot the machine. This option can be
+		  used to control how long to wait in such circumstances. -1 means wait
+		  forever. 0 means don't wait at all (do the default action, which will
+		  generally be to continue booting and remove the image). Values of 1 or
+		  more indicate a number of seconds (up to 255) to wait before doing the
+		  default.
+
+	config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
+		int "Default extra pages allowance"
+		default "2000"
+		range 500 524288
+		depends on TOI_CORE
+		help
+		  This value controls the default for the allowance TuxOnIce makes for
+		  drivers to allocate extra memory during the atomic copy. The default
+		  value of 2000 will be okay in most cases. If you are using
+		  DRI, the easiest way to find what value to use is to try to hibernate
+		  and look at how many pages were actually needed in the sysfs entry
+		  /sys/power/tuxonice/debug_info (first number on the last line), adding
+		  a little extra because the value is not always the same.
+
+	config TOI_CHECKSUM
+		bool "Checksum pageset2"
+		default n
+		depends on TOI_CORE
+		select CRYPTO
+		select CRYPTO_ALGAPI
+		select CRYPTO_MD4
+		---help---
+		  Adds support for checksumming pageset2 pages, to ensure you really get an
+		  atomic copy. Since some filesystems (XFS especially) change metadata even
+		  when there's no other activity, we need this to check for pages that have
+		  been changed while we were saving the page cache. If your debugging output
+		  always says no pages were resaved, you may be able to safely disable this
+		  option.
+
+config TOI
+	bool
+	depends on TOI_CORE!=n
+	default y
+
+config TOI_ZRAM_SUPPORT
+	def_bool y
+	depends on TOI && ZRAM!=n
+
 config PM_SLEEP
 	def_bool y
 	depends on SUSPEND || HIBERNATE_CALLBACKS
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index a3f79f0ee..3902a11ec 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -2,6 +2,37 @@
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
+tuxonice_core-y := tuxonice_modules.o
+
+obj-$(CONFIG_TOI)		+= tuxonice_builtin.o
+obj-$(CONFIG_TOI_INCREMENTAL)   += tuxonice_incremental.o \
+    tuxonice_copy_before_write.o
+
+tuxonice_core-$(CONFIG_PM_DEBUG)	+= tuxonice_alloc.o
+
+# Compile these in after allocation debugging, if used.
+
+tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \
+		tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
+		tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
+		tuxonice_power_off.o tuxonice_atomic_copy.o
+
+tuxonice_core-$(CONFIG_TOI_CHECKSUM)	+= tuxonice_checksum.o
+
+tuxonice_core-$(CONFIG_NET)	+= tuxonice_storage.o tuxonice_netlink.o
+
+obj-$(CONFIG_TOI_CORE)		+= tuxonice_core.o
+obj-$(CONFIG_TOI_PRUNE)		+= tuxonice_prune.o
+obj-$(CONFIG_TOI_CRYPTO)	+= tuxonice_compress.o
+
+tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \
+		tuxonice_bio_signature.o
+
+obj-$(CONFIG_TOI_SWAP)		+= tuxonice_bio.o tuxonice_swap.o
+obj-$(CONFIG_TOI_FILE)		+= tuxonice_bio.o tuxonice_file.o
+obj-$(CONFIG_TOI_CLUSTER)	+= tuxonice_cluster.o
+
+obj-$(CONFIG_TOI_USERUI)	+= tuxonice_userui.o
 KASAN_SANITIZE_snapshot.o	:= n
 
 obj-y				+= qos.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a5c36e9c5..cadfadef9 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -34,7 +34,7 @@
 #include <linux/ktime.h>
 #include <trace/events/power.h>
 
-#include "power.h"
+#include "tuxonice.h"
 
 
 static int nocompress;
@@ -42,7 +42,7 @@ static int noresume;
 static int nohibernate;
 static int resume_wait;
 static unsigned int resume_delay;
-static char resume_file[256] = CONFIG_PM_STD_PARTITION;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 __visible int in_suspend __nosavedata;
@@ -127,7 +127,7 @@ static int hibernation_test(int level) { return 0; }
  * platform_begin - Call platform to start hibernation.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static int platform_begin(int platform_mode)
+int platform_begin(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->begin() : 0;
@@ -137,7 +137,7 @@ static int platform_begin(int platform_mode)
  * platform_end - Call platform to finish transition to the working state.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static void platform_end(int platform_mode)
+void platform_end(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->end();
@@ -151,7 +151,7 @@ static void platform_end(int platform_mode)
  * if so configured, and return an error code if that fails.
  */
 
-static int platform_pre_snapshot(int platform_mode)
+int platform_pre_snapshot(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_snapshot() : 0;
@@ -166,7 +166,7 @@ static int platform_pre_snapshot(int platform_mode)
  *
  * This routine is called on one CPU with interrupts disabled.
  */
-static void platform_leave(int platform_mode)
+void platform_leave(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->leave();
@@ -181,7 +181,7 @@ static void platform_leave(int platform_mode)
  *
  * This routine must be called after platform_prepare().
  */
-static void platform_finish(int platform_mode)
+void platform_finish(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->finish();
@@ -197,7 +197,7 @@ static void platform_finish(int platform_mode)
  * If the restore fails after this function has been called,
  * platform_restore_cleanup() must be called.
  */
-static int platform_pre_restore(int platform_mode)
+int platform_pre_restore(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_restore() : 0;
@@ -214,7 +214,7 @@ static int platform_pre_restore(int platform_mode)
  * function must be called too, regardless of the result of
  * platform_pre_restore().
  */
-static void platform_restore_cleanup(int platform_mode)
+void platform_restore_cleanup(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->restore_cleanup();
@@ -224,7 +224,7 @@ static void platform_restore_cleanup(int platform_mode)
  * platform_recover - Recover from a failure to suspend devices.
  * @platform_mode: Whether or not to use the platform driver.
  */
-static void platform_recover(int platform_mode)
+void platform_recover(int platform_mode)
 {
 	if (platform_mode && hibernation_ops && hibernation_ops->recover)
 		hibernation_ops->recover();
@@ -680,6 +680,9 @@ int hibernate(void)
 	int error, nr_calls = 0;
 	bool snapshot_test = false;
 
+	if (test_action_state(TOI_REPLACE_SWSUSP))
+		return try_tuxonice_hibernate();
+
 	if (!hibernation_available()) {
 		pm_pr_dbg("Hibernation not available.\n");
 		return -EPERM;
@@ -784,10 +787,18 @@ int hibernate(void)
  * attempts to recover gracefully and make the kernel return to the normal mode
  * of operation.
  */
-static int software_resume(void)
+int software_resume(void)
 {
 	int error, nr_calls = 0;
 
+	resume_attempted = 1;
+
+	/*
+	 * We can't know (until an image header - if any - is loaded), whether
+	 * we did override swsusp. We therefore ensure that both are tried.
+	 */
+	try_tuxonice_resume();
+
 	/*
 	 * If the user said "noresume".. bail out early.
 	 */
@@ -1167,6 +1178,7 @@ static int __init hibernate_setup(char *str)
 static int __init noresume_setup(char *str)
 {
 	noresume = 1;
+	set_toi_state(TOI_NORESUME_SPECIFIED);
 	return 1;
 }
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 9e58bdc8a..372279f25 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -37,8 +37,12 @@ static inline char *check_image_kernel(struct swsusp_info *info)
 	return arch_hibernation_header_restore(info) ?
 			"architecture specific data" : NULL;
 }
+#else
+extern char *check_image_kernel(struct swsusp_info *info);
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+extern int init_header(struct swsusp_info *info);
 
+extern char resume_file[256];
 extern int hibernate_resume_nonboot_cpu_disable(void);
 
 /*
@@ -87,6 +91,7 @@ static struct kobj_attribute _name##_attr = {	\
 	.store	= _name##_store,		\
 }
 
+extern struct pbe *restore_pblist;
 #define power_attr_ro(_name) \
 static struct kobj_attribute _name##_attr = {	\
 	.attr	= {				\
@@ -287,6 +292,31 @@ static inline void suspend_thaw_processes(void)
 }
 #endif
 
+extern struct page *saveable_page(struct zone *z, unsigned long p);
+#ifdef CONFIG_HIGHMEM
+struct page *saveable_highmem_page(struct zone *z, unsigned long p);
+#else
+static
+inline void *saveable_highmem_page(struct zone *z, unsigned long p)
+{
+	return NULL;
+}
+#endif
+
+#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
+extern struct list_head nosave_regions;
+
+/**
+ *	This structure represents a range of page frames the contents of which
+ *	should not be saved during the suspend.
+ */
+
+struct nosave_region {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
 #ifdef CONFIG_PM_AUTOSLEEP
 
 /* kernel/power/autosleep.c */
@@ -313,3 +343,10 @@ extern int pm_wake_lock(const char *buf);
 extern int pm_wake_unlock(const char *buf);
 
 #endif /* !CONFIG_PM_WAKELOCKS */
+
+#ifdef CONFIG_TOI
+unsigned long toi_get_nonconflicting_page(void);
+#define BM_END_OF_MAP	(~0UL)
+#else
+#define toi_get_nonconflicting_page() (0)
+#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3d37c279c..a3e986bf5 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -40,6 +40,9 @@
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
 #include "power.h"
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
@@ -159,6 +162,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
 
+        if (toi_running)
+            return (void *) toi_get_nonconflicting_page();
+
 	res = (void *)get_zeroed_page(gfp_mask);
 	if (safe_needed)
 		while (res && swsusp_page_is_free(virt_to_page(res))) {
@@ -227,6 +233,11 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
 
 	page = virt_to_page(addr);
 
+        if (toi_running) {
+            toi__free_page(29, page);
+            return;
+        }
+
 	swsusp_unset_page_forbidden(page);
 	if (clear_nosave_free)
 		swsusp_unset_page_free(page);
@@ -376,12 +387,15 @@ struct bm_position {
 	int node_bit;
 };
 
+#define BM_POSITION_SLOTS (NR_CPUS * 2)
+
 struct memory_bitmap {
 	struct list_head zones;
 	struct linked_page *p_list;	/* list of pages used to store zone
-					   bitmap objects and bitmap block
-					   objects */
-	struct bm_position cur;	/* most recently used bit position */
+					 * bitmap objects and bitmap block
+					 * objects
+					 */
+	struct bm_position cur[BM_POSITION_SLOTS];    /* most recently used bit position */
 };
 
 /* Functions that operate on memory bitmaps */
@@ -547,16 +561,39 @@ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 		free_image_page(node->data, clear_nosave_free);
 }
 
-static void memory_bm_position_reset(struct memory_bitmap *bm)
+void memory_bm_position_reset(struct memory_bitmap *bm)
 {
-	bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+    int index;
+
+    for (index = 0; index < BM_POSITION_SLOTS; index++) {
+	bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
 				  list);
-	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
+	bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
 				  struct rtree_node, list);
-	bm->cur.node_pfn = 0;
-	bm->cur.node_bit = 0;
+	bm->cur[index].node_pfn = 0;
+	bm->cur[index].node_bit = 0;
+    }
 }
 
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+
+/**
+ *      memory_bm_clear
+ *      @param bm - The bitmap to clear
+ *
+ *      Only run while single threaded - locking not needed
+ */
+void memory_bm_clear(struct memory_bitmap *bm)
+{
+    memory_bm_position_reset(bm);
+
+    while (memory_bm_next_pfn(bm, 0) != BM_END_OF_MAP) {
+        memory_bm_clear_current(bm, 0);
+    }
+
+    memory_bm_position_reset(bm);
+}
 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 
 struct mem_extent {
@@ -670,7 +707,7 @@ static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
 	}
 
 	bm->p_list = ca.chain;
-	memory_bm_position_reset(bm);
+        memory_bm_position_reset(bm);
  Exit:
 	free_mem_extents(&mem_extents);
 	return error;
@@ -706,14 +743,24 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
  * Walk the radix tree to find the page containing the bit that represents @pfn
  * and return the position of the bit in @addr and @bit_nr.
  */
-static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
-			      void **addr, unsigned int *bit_nr)
+int memory_bm_find_bit(struct memory_bitmap *bm, int index,
+        unsigned long pfn, void **addr, unsigned int *bit_nr)
 {
 	struct mem_zone_bm_rtree *curr, *zone;
 	struct rtree_node *node;
 	int i, block_nr;
 
-	zone = bm->cur.zone;
+        if (!bm->cur[index].zone) {
+            // Reset
+            bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+                    list);
+            bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
+                    struct rtree_node, list);
+            bm->cur[index].node_pfn = 0;
+            bm->cur[index].node_bit = 0;
+        }
+
+	zone = bm->cur[index].zone;
 
 	if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
 		goto zone_found;
@@ -736,8 +783,9 @@ zone_found:
 	 * We have found the zone. Now walk the radix tree to find the leaf node
 	 * for our PFN.
 	 */
-	node = bm->cur.node;
-	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
+
+	node = bm->cur[index].node;
+	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur[index].node_pfn)
 		goto node_found;
 
 	node      = zone->rtree;
@@ -754,9 +802,9 @@ zone_found:
 
 node_found:
 	/* Update last position */
-	bm->cur.zone = zone;
-	bm->cur.node = node;
-	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur[index].zone = zone;
+	bm->cur[index].node = node;
+	bm->cur[index].node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
 
 	/* Set return values */
 	*addr = node->data;
@@ -765,66 +813,66 @@ node_found:
 	return 0;
 }
 
-static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	set_bit(bit, addr);
 }
 
-static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	if (!error)
 		set_bit(bit, addr);
 
 	return error;
 }
 
-static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	clear_bit(bit, addr);
 }
 
-static void memory_bm_clear_current(struct memory_bitmap *bm)
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index)
 {
 	int bit;
 
-	bit = max(bm->cur.node_bit - 1, 0);
-	clear_bit(bit, bm->cur.node->data);
+	bit = max(bm->cur[index].node_bit - 1, 0);
+	clear_bit(bit, bm->cur[index].node->data);
 }
 
-static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 	int error;
 
-	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 	BUG_ON(error);
 	return test_bit(bit, addr);
 }
 
-static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
+static bool memory_bm_pfn_present(struct memory_bitmap *bm, int index, unsigned long pfn)
 {
 	void *addr;
 	unsigned int bit;
 
-	return !memory_bm_find_bit(bm, pfn, &addr, &bit);
+	return !memory_bm_find_bit(bm, index, pfn, &addr, &bit);
 }
 
 /*
@@ -837,25 +885,25 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
  *
  * Return true if there is a next node, false otherwise.
  */
-static bool rtree_next_node(struct memory_bitmap *bm)
+static bool rtree_next_node(struct memory_bitmap *bm, int index)
 {
-	if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
-		bm->cur.node = list_entry(bm->cur.node->list.next,
+	if (!list_is_last(&bm->cur[index].node->list, &bm->cur[index].zone->leaves)) {
+		bm->cur[index].node = list_entry(bm->cur[index].node->list.next,
 					  struct rtree_node, list);
-		bm->cur.node_pfn += BM_BITS_PER_BLOCK;
-		bm->cur.node_bit  = 0;
+		bm->cur[index].node_pfn += BM_BITS_PER_BLOCK;
+		bm->cur[index].node_bit  = 0;
 		touch_softlockup_watchdog();
 		return true;
 	}
 
 	/* No more nodes, goto next zone */
-	if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
-		bm->cur.zone = list_entry(bm->cur.zone->list.next,
+	if (!list_is_last(&bm->cur[index].zone->list, &bm->zones)) {
+		bm->cur[index].zone = list_entry(bm->cur[index].zone->list.next,
 				  struct mem_zone_bm_rtree, list);
-		bm->cur.node = list_entry(bm->cur.zone->leaves.next,
+		bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
 					  struct rtree_node, list);
-		bm->cur.node_pfn = 0;
-		bm->cur.node_bit = 0;
+		bm->cur[index].node_pfn = 0;
+		bm->cur[index].node_bit = 0;
 		return true;
 	}
 
@@ -874,37 +922,29 @@ static bool rtree_next_node(struct memory_bitmap *bm)
  * It is required to run memory_bm_position_reset() before the first call to
  * this function for the given memory bitmap.
  */
-static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index)
 {
 	unsigned long bits, pfn, pages;
 	int bit;
 
+        index += NR_CPUS; /* Iteration state is separated from get/set/test */
+
 	do {
-		pages	  = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
-		bits      = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
-		bit	  = find_next_bit(bm->cur.node->data, bits,
-					  bm->cur.node_bit);
+		pages	  = bm->cur[index].zone->end_pfn - bm->cur[index].zone->start_pfn;
+		bits      = min(pages - bm->cur[index].node_pfn, BM_BITS_PER_BLOCK);
+		bit	  = find_next_bit(bm->cur[index].node->data, bits,
+					  bm->cur[index].node_bit);
 		if (bit < bits) {
-			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
-			bm->cur.node_bit = bit + 1;
+			pfn = bm->cur[index].zone->start_pfn + bm->cur[index].node_pfn + bit;
+			bm->cur[index].node_bit = bit + 1;
 			return pfn;
 		}
-	} while (rtree_next_node(bm));
+	} while (rtree_next_node(bm, index));
 
 	return BM_END_OF_MAP;
 }
 
-/*
- * This structure represents a range of page frames the contents of which
- * should not be saved during hibernation.
- */
-struct nosave_region {
-	struct list_head list;
-	unsigned long start_pfn;
-	unsigned long end_pfn;
-};
-
-static LIST_HEAD(nosave_regions);
+LIST_HEAD(nosave_regions);
 
 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
 {
@@ -991,37 +1031,37 @@ static struct memory_bitmap *free_pages_map;
 void swsusp_set_page_free(struct page *page)
 {
 	if (free_pages_map)
-		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
+		memory_bm_set_bit(free_pages_map, 0, page_to_pfn(page));
 }
 
 static int swsusp_page_is_free(struct page *page)
 {
 	return free_pages_map ?
-		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
+		memory_bm_test_bit(free_pages_map, 0, page_to_pfn(page)) : 0;
 }
 
 void swsusp_unset_page_free(struct page *page)
 {
 	if (free_pages_map)
-		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
+		memory_bm_clear_bit(free_pages_map, 0, page_to_pfn(page));
 }
 
 static void swsusp_set_page_forbidden(struct page *page)
 {
 	if (forbidden_pages_map)
-		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
+		memory_bm_set_bit(forbidden_pages_map, 0, page_to_pfn(page));
 }
 
 int swsusp_page_is_forbidden(struct page *page)
 {
 	return forbidden_pages_map ?
-		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
+		memory_bm_test_bit(forbidden_pages_map, 0, page_to_pfn(page)) : 0;
 }
 
 static void swsusp_unset_page_forbidden(struct page *page)
 {
 	if (forbidden_pages_map)
-		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
+		memory_bm_clear_bit(forbidden_pages_map, 0, page_to_pfn(page));
 }
 
 /**
@@ -1054,7 +1094,7 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 				 * touch the PFNs for which the error is
 				 * returned anyway.
 				 */
-				mem_bm_set_bit_check(bm, pfn);
+				mem_bm_set_bit_check(bm, 0, pfn);
 			}
 	}
 }
@@ -1207,7 +1247,7 @@ static unsigned int count_free_highmem_pages(void)
  * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
  * and it isn't part of a free chunk of pages.
  */
-static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
+struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 {
 	struct page *page;
 
@@ -1252,11 +1292,6 @@ static unsigned int count_highmem_pages(void)
 	}
 	return n;
 }
-#else
-static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
-{
-	return NULL;
-}
 #endif /* CONFIG_HIGHMEM */
 
 /**
@@ -1269,7 +1304,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
  * of pages statically defined as 'unsaveable', and it isn't part of
  * a free chunk of pages.
  */
-static struct page *saveable_page(struct zone *zone, unsigned long pfn)
+struct page *saveable_page(struct zone *zone, unsigned long pfn)
 {
 	struct page *page;
 
@@ -1405,15 +1440,15 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 		max_zone_pfn = zone_end_pfn(zone);
 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 			if (page_is_saveable(zone, pfn))
-				memory_bm_set_bit(orig_bm, pfn);
+				memory_bm_set_bit(orig_bm, 0, pfn);
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
 	for(;;) {
-		pfn = memory_bm_next_pfn(orig_bm);
+		pfn = memory_bm_next_pfn(orig_bm, 0);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		copy_data_page(memory_bm_next_pfn(copy_bm, 0), pfn);
 	}
 }
 
@@ -1458,8 +1493,8 @@ void swsusp_free(void)
 	memory_bm_position_reset(free_pages_map);
 
 loop:
-	fr_pfn = memory_bm_next_pfn(free_pages_map);
-	fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
+	fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
+	fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
 
 	/*
 	 * Find the next bit set in both bitmaps. This is guaranteed to
@@ -1467,16 +1502,16 @@ loop:
 	 */
 	do {
 		if (fb_pfn < fr_pfn)
-			fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
+			fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
 		if (fr_pfn < fb_pfn)
-			fr_pfn = memory_bm_next_pfn(free_pages_map);
+			fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
 	} while (fb_pfn != fr_pfn);
 
 	if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
 		struct page *page = pfn_to_page(fr_pfn);
 
-		memory_bm_clear_current(forbidden_pages_map);
-		memory_bm_clear_current(free_pages_map);
+		memory_bm_clear_current(forbidden_pages_map, 0);
+		memory_bm_clear_current(free_pages_map, 0);
 		hibernate_restore_unprotect_page(page_address(page));
 		__free_page(page);
 		goto loop;
@@ -1513,7 +1548,7 @@ static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
 		page = alloc_image_page(mask);
 		if (!page)
 			break;
-		memory_bm_set_bit(&copy_bm, page_to_pfn(page));
+		memory_bm_set_bit(&copy_bm, 0, page_to_pfn(page));
 		if (PageHighMem(page))
 			alloc_highmem++;
 		else
@@ -1609,7 +1644,7 @@ static unsigned long free_unnecessary_pages(void)
 	memory_bm_position_reset(&copy_bm);
 
 	while (to_free_normal > 0 || to_free_highmem > 0) {
-		unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+		unsigned long pfn = memory_bm_next_pfn(&copy_bm, 0);
 		struct page *page = pfn_to_page(pfn);
 
 		if (PageHighMem(page)) {
@@ -1623,7 +1658,7 @@ static unsigned long free_unnecessary_pages(void)
 			to_free_normal--;
 			alloc_normal--;
 		}
-		memory_bm_clear_bit(&copy_bm, pfn);
+		memory_bm_clear_bit(&copy_bm, 0, pfn);
 		swsusp_unset_page_forbidden(page);
 		swsusp_unset_page_free(page);
 		__free_page(page);
@@ -1905,7 +1940,7 @@ static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
 		struct page *page;
 
 		page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
-		memory_bm_set_bit(bm, page_to_pfn(page));
+		memory_bm_set_bit(bm, 0, page_to_pfn(page));
 	}
 	return nr_highmem;
 }
@@ -1946,7 +1981,7 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
 			page = alloc_image_page(GFP_ATOMIC);
 			if (!page)
 				goto err_out;
-			memory_bm_set_bit(copy_bm, page_to_pfn(page));
+			memory_bm_set_bit(copy_bm, 0, page_to_pfn(page));
 		}
 	}
 
@@ -1961,6 +1996,9 @@ asmlinkage __visible int swsusp_save(void)
 {
 	unsigned int nr_pages, nr_highmem;
 
+        if (toi_running)
+            return toi_post_context_save();
+
 	pr_info("Creating hibernation image:\n");
 
 	drain_local_pages(NULL);
@@ -2008,7 +2046,7 @@ static int init_header_complete(struct swsusp_info *info)
 	return 0;
 }
 
-static char *check_image_kernel(struct swsusp_info *info)
+char *check_image_kernel(struct swsusp_info *info)
 {
 	if (info->version_code != LINUX_VERSION_CODE)
 		return "kernel version";
@@ -2029,7 +2067,7 @@ unsigned long snapshot_get_image_size(void)
 	return nr_copy_pages + nr_meta_pages + 1;
 }
 
-static int init_header(struct swsusp_info *info)
+int init_header(struct swsusp_info *info)
 {
 	memset(info, 0, sizeof(struct swsusp_info));
 	info->num_physpages = get_num_physpages();
@@ -2053,7 +2091,7 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
-		buf[j] = memory_bm_next_pfn(bm);
+		buf[j] = memory_bm_next_pfn(bm, 0);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 		/* Save page key for data page (s390 only). */
@@ -2103,7 +2141,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 	} else {
 		struct page *page;
 
-		page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+		page = pfn_to_page(memory_bm_next_pfn(&copy_bm, 0));
 		if (PageHighMem(page)) {
 			/*
 			 * Highmem pages are copied to the buffer,
@@ -2130,10 +2168,10 @@ static void duplicate_memory_bitmap(struct memory_bitmap *dst,
 	unsigned long pfn;
 
 	memory_bm_position_reset(src);
-	pfn = memory_bm_next_pfn(src);
+	pfn = memory_bm_next_pfn(src, 0);
 	while (pfn != BM_END_OF_MAP) {
-		memory_bm_set_bit(dst, pfn);
-		pfn = memory_bm_next_pfn(src);
+		memory_bm_set_bit(dst, 0, pfn);
+		pfn = memory_bm_next_pfn(src, 0);
 	}
 }
 
@@ -2149,10 +2187,10 @@ static void mark_unsafe_pages(struct memory_bitmap *bm)
 
 	/* Clear the "free"/"unsafe" bit for all PFNs */
 	memory_bm_position_reset(free_pages_map);
-	pfn = memory_bm_next_pfn(free_pages_map);
+	pfn = memory_bm_next_pfn(free_pages_map, 0);
 	while (pfn != BM_END_OF_MAP) {
-		memory_bm_clear_current(free_pages_map);
-		pfn = memory_bm_next_pfn(free_pages_map);
+		memory_bm_clear_current(free_pages_map, 0);
+		pfn = memory_bm_next_pfn(free_pages_map, 0);
 	}
 
 	/* Mark pages that correspond to the "original" PFNs as "unsafe" */
@@ -2210,8 +2248,8 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		/* Extract and buffer page key for data page (s390 only). */
 		page_key_memorize(buf + j);
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
-			memory_bm_set_bit(bm, buf[j]);
+		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, 0, buf[j]))
+			memory_bm_set_bit(bm, 0, buf[j]);
 		else
 			return -EFAULT;
 	}
@@ -2251,12 +2289,12 @@ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
 	unsigned int cnt = 0;
 
 	memory_bm_position_reset(bm);
-	pfn = memory_bm_next_pfn(bm);
+	pfn = memory_bm_next_pfn(bm, 0);
 	while (pfn != BM_END_OF_MAP) {
 		if (PageHighMem(pfn_to_page(pfn)))
 			cnt++;
 
-		pfn = memory_bm_next_pfn(bm);
+		pfn = memory_bm_next_pfn(bm, 0);
 	}
 	return cnt;
 }
@@ -2302,7 +2340,7 @@ static int prepare_highmem_image(struct memory_bitmap *bm,
 		page = alloc_page(__GFP_HIGHMEM);
 		if (!swsusp_page_is_free(page)) {
 			/* The page is "safe", set its bit the bitmap */
-			memory_bm_set_bit(bm, page_to_pfn(page));
+			memory_bm_set_bit(bm, 0, page_to_pfn(page));
 			safe_highmem_pages++;
 		}
 		/* Mark the page as allocated */
@@ -2363,7 +2401,7 @@ static void *get_highmem_page_buffer(struct page *page,
 
 		/* Copy of the page will be stored in high memory */
 		kaddr = buffer;
-		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
+		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm, 0));
 		safe_highmem_pages--;
 		last_highmem_page = tmp;
 		pbe->copy_page = tmp;
@@ -2524,7 +2562,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 {
 	struct pbe *pbe;
 	struct page *page;
-	unsigned long pfn = memory_bm_next_pfn(bm);
+	unsigned long pfn = memory_bm_next_pfn(bm, 0);
 
 	if (pfn == BM_END_OF_MAP)
 		return ERR_PTR(-EFAULT);
@@ -2717,3 +2755,82 @@ int restore_highmem(void)
 	return 0;
 }
 #endif /* CONFIG_HIGHMEM */
+
+struct memory_bitmap *pageset1_map, *pageset2_map, *free_map, *nosave_map,
+  *pageset1_copy_map, *io_map, *page_resave_map, *compare_map;
+
+int resume_attempted;
+
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+	(int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+    int result;
+
+    memory_bm_position_reset(bm);
+
+    do {
+        result = rw_chunk(WRITE, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+        if (result)
+            return result;
+    } while (rtree_next_node(bm, 0));
+    return 0;
+}
+
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+	(int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+    int result;
+
+    memory_bm_position_reset(bm);
+
+    do {
+        result = rw_chunk(READ, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+        if (result)
+            return result;
+
+    } while (rtree_next_node(bm, 0));
+    return 0;
+}
+
+int memory_bm_space_needed(struct memory_bitmap *bm)
+{
+    unsigned long bytes = 0;
+
+    memory_bm_position_reset(bm);
+    do {
+        bytes += PAGE_SIZE;
+    } while (rtree_next_node(bm, 0));
+    return bytes;
+}
+
+int toi_alloc_bitmap(struct memory_bitmap **bm)
+{
+    int error;
+    struct memory_bitmap *bm1;
+
+    bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+    if (!bm1)
+        return -ENOMEM;
+
+    error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
+    if (error) {
+        printk("Error returned - %d.\n", error);
+        kfree(bm1);
+        return -ENOMEM;
+    }
+
+    *bm = bm1;
+    return 0;
+}
+
+void toi_free_bitmap(struct memory_bitmap **bm)
+{
+    if (!*bm)
+        return;
+
+    memory_bm_free(*bm, 0);
+    kfree(*bm);
+    *bm = NULL;
+}
diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h
new file mode 100644
index 000000000..3824c5455
--- /dev/null
+++ b/kernel/power/tuxonice.h
@@ -0,0 +1,260 @@
+/*
+ * kernel/power/tuxonice.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations used throughout swsusp.
+ *
+ */
+
+#ifndef KERNEL_POWER_TOI_H
+#define KERNEL_POWER_TOI_H
+
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/suspend.h>
+#include <linux/fs.h>
+#include <asm/setup.h>
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+#define TOI_CORE_VERSION "3.3"
+#define        TOI_HEADER_VERSION 3
+#define MY_BOOT_KERNEL_DATA_VERSION 4
+
+struct toi_boot_kernel_data {
+  int version;
+  int size;
+  unsigned long toi_action;
+  unsigned long toi_debug_state;
+  u32 toi_default_console_level;
+  int toi_io_time[2][2];
+  char toi_nosave_commandline[COMMAND_LINE_SIZE];
+  unsigned long pages_used[33];
+  unsigned long incremental_bytes_in;
+  unsigned long incremental_bytes_out;
+  unsigned long compress_bytes_in;
+  unsigned long compress_bytes_out;
+  unsigned long pruned_pages;
+};
+
+extern struct toi_boot_kernel_data toi_bkd;
+
+/* Location of book kernel data struct in kernel being resumed */
+extern unsigned long boot_kernel_data_buffer;
+
+/*                 == Action states ==                 */
+
+enum {
+  TOI_REBOOT,
+  TOI_PAUSE,
+  TOI_LOGALL,
+  TOI_CAN_CANCEL,
+  TOI_KEEP_IMAGE,
+  TOI_FREEZER_TEST,
+  TOI_SINGLESTEP,
+  TOI_PAUSE_NEAR_PAGESET_END,
+  TOI_TEST_FILTER_SPEED,
+  TOI_TEST_BIO,
+  TOI_NO_PAGESET2,
+  TOI_IGNORE_ROOTFS,
+  TOI_REPLACE_SWSUSP,
+  TOI_PAGESET2_FULL,
+  TOI_ABORT_ON_RESAVE_NEEDED,
+  TOI_NO_MULTITHREADED_IO,
+  TOI_NO_DIRECT_LOAD, /* Obsolete */
+  TOI_LATE_CPU_HOTPLUG, /* Obsolete */
+  TOI_GET_MAX_MEM_ALLOCD,
+  TOI_NO_FLUSHER_THREAD,
+  TOI_NO_PS2_IF_UNNEEDED,
+  TOI_POST_RESUME_BREAKPOINT,
+  TOI_NO_READAHEAD,
+  TOI_TRACE_DEBUG_ON,
+  TOI_INCREMENTAL_IMAGE,
+};
+
+extern unsigned long toi_bootflags_mask;
+
+#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
+
+/*                 == Result states ==                 */
+
+enum {
+  TOI_ABORTED,
+  TOI_ABORT_REQUESTED,
+  TOI_NOSTORAGE_AVAILABLE,
+  TOI_INSUFFICIENT_STORAGE,
+  TOI_FREEZING_FAILED,
+  TOI_KEPT_IMAGE,
+  TOI_WOULD_EAT_MEMORY,
+  TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
+  TOI_PM_SEM,
+  TOI_DEVICE_REFUSED,
+  TOI_SYSDEV_REFUSED,
+  TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
+  TOI_UNABLE_TO_PREPARE_IMAGE,
+  TOI_FAILED_MODULE_INIT,
+  TOI_FAILED_MODULE_CLEANUP,
+  TOI_FAILED_IO,
+  TOI_OUT_OF_MEMORY,
+  TOI_IMAGE_ERROR,
+  TOI_PLATFORM_PREP_FAILED,
+  TOI_CPU_HOTPLUG_FAILED,
+  TOI_ARCH_PREPARE_FAILED, /* Removed Linux-3.0 */
+  TOI_RESAVE_NEEDED,
+  TOI_CANT_SUSPEND,
+  TOI_NOTIFIERS_PREPARE_FAILED,
+  TOI_PRE_SNAPSHOT_FAILED,
+  TOI_PRE_RESTORE_FAILED,
+  TOI_USERMODE_HELPERS_ERR,
+  TOI_CANT_USE_ALT_RESUME,
+  TOI_HEADER_TOO_BIG,
+  TOI_WAKEUP_EVENT,
+  TOI_SYSCORE_REFUSED,
+  TOI_DPM_PREPARE_FAILED,
+  TOI_DPM_SUSPEND_FAILED,
+  TOI_NUM_RESULT_STATES        /* Used in printing debug info only */
+};
+
+extern unsigned long toi_result;
+
+#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
+#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
+    test_and_set_bit(bit, &toi_result))
+#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
+#define test_result_state(bit) (test_bit(bit, &toi_result))
+
+/*         == Debug sections and levels ==         */
+
+/* debugging levels. */
+enum {
+  TOI_STATUS = 0,
+  TOI_ERROR = 2,
+  TOI_LOW,
+  TOI_MEDIUM,
+  TOI_HIGH,
+  TOI_VERBOSE,
+};
+
+enum {
+  TOI_ANY_SECTION,
+  TOI_EAT_MEMORY,
+  TOI_IO,
+  TOI_HEADER,
+  TOI_WRITER,
+  TOI_MEMORY,
+  TOI_PAGEDIR,
+  TOI_COMPRESS,
+  TOI_BIO,
+};
+
+#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
+#define clear_debug_state(bit) \
+  (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
+#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
+
+/*                == Steps in hibernating ==        */
+
+enum {
+  STEP_HIBERNATE_PREPARE_IMAGE,
+  STEP_HIBERNATE_SAVE_IMAGE,
+  STEP_HIBERNATE_POWERDOWN,
+  STEP_RESUME_CAN_RESUME,
+  STEP_RESUME_LOAD_PS1,
+  STEP_RESUME_DO_RESTORE,
+  STEP_RESUME_READ_PS2,
+  STEP_RESUME_GO,
+  STEP_RESUME_ALT_IMAGE,
+  STEP_CLEANUP,
+  STEP_QUIET_CLEANUP
+};
+
+/*                == TuxOnIce states ==
+                  (see also include/linux/suspend.h)        */
+
+#define get_toi_state()  (toi_state)
+#define restore_toi_state(saved_state) \
+  do { toi_state = saved_state; } while (0)
+
+/*                == Module support ==                */
+
+struct toi_core_fns {
+  int (*post_context_save)(void);
+  unsigned long (*get_nonconflicting_page)(void);
+  int (*try_hibernate)(void);
+  void (*try_resume)(void);
+};
+
+extern struct toi_core_fns *toi_core_fns;
+
+/*                == All else ==                        */
+#define KB(x) ((x) << (PAGE_SHIFT - 10))
+#define MB(x) ((x) >> (20 - PAGE_SHIFT))
+
+extern int toi_start_anything(int toi_or_resume);
+extern void toi_finish_anything(int toi_or_resume);
+
+extern int save_image_part1(void);
+extern int toi_atomic_restore(void);
+
+extern int toi_try_hibernate(void);
+extern void toi_try_resume(void);
+
+extern int __toi_post_context_save(void);
+
+extern unsigned int nr_hibernates;
+extern char alt_resume_param[256];
+
+extern void copyback_post(void);
+extern int toi_hibernate(void);
+extern unsigned long extra_pd1_pages_used;
+
+#define SECTOR_SIZE 512
+
+extern void toi_early_boot_message(int can_erase_image, int default_answer,
+    char *warning_reason, ...);
+
+extern int do_check_can_resume(void);
+extern int do_toi_step(int step);
+extern int toi_launch_userspace_program(char *command, int channel_no,
+    int wait, int debug);
+
+extern char tuxonice_signature[9];
+
+extern int toi_start_other_threads(void);
+extern void toi_stop_other_threads(void);
+
+extern int toi_trace_index;
+#define TOI_TRACE_DEBUG(PFN, DESC, ...) \
+  do { \
+    if (test_action_state(TOI_TRACE_DEBUG_ON)) { \
+      printk("*TOI* %ld %02d" DESC "\n", PFN, toi_trace_index, ##__VA_ARGS__); \
+    } \
+  } while(0)
+
+#ifdef CONFIG_TOI_KEEP_IMAGE
+#define toi_keeping_image (test_action_state(TOI_KEEP_IMAGE) || test_action_state(TOI_INCREMENTAL_IMAGE))
+#else
+#define toi_keeping_image (0)
+#endif
+
+#ifdef CONFIG_TOI_INCREMENTAL
+extern void toi_reset_dirtiness_one(unsigned long pfn, int verbose);
+extern int toi_reset_dirtiness(int verbose);
+extern void toi_cbw_write(void);
+extern void toi_cbw_restore(void);
+extern int toi_allocate_cbw_data(void);
+extern void toi_free_cbw_data(void);
+extern int toi_cbw_init(void);
+extern void toi_mark_tasks_cbw(void);
+#else
+static inline int toi_reset_dirtiness(int verbose) { return 0; }
+#define toi_cbw_write() do { } while(0)
+#define toi_cbw_restore() do { } while(0)
+#define toi_allocate_cbw_data() do { } while(0)
+#define toi_free_cbw_data() do { } while(0)
+static inline int toi_cbw_init(void) { return 0; }
+#endif
+#endif
diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c
new file mode 100644
index 000000000..f644d015e
--- /dev/null
+++ b/kernel/power/tuxonice_alloc.c
@@ -0,0 +1,308 @@
+/*
+ * kernel/power/tuxonice_alloc.c
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+#define TOI_ALLOC_PATHS 41
+
+static DEFINE_MUTEX(toi_alloc_mutex);
+
+static struct toi_module_ops toi_alloc_ops;
+
+static int toi_fail_num;
+
+static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
+                toi_free_count[TOI_ALLOC_PATHS],
+                toi_test_count[TOI_ALLOC_PATHS],
+                toi_fail_count[TOI_ALLOC_PATHS];
+static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
+static int cur_allocd, max_allocd;
+
+static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
+  "", /* 0 */
+  "get_io_info_struct",
+  "extent",
+  "extent (loading chain)",
+  "userui channel",
+  "userui arg", /* 5 */
+  "attention list metadata",
+  "extra pagedir memory metadata",
+  "bdev metadata",
+  "extra pagedir memory",
+  "header_locations_read", /* 10 */
+  "bio queue",
+  "prepare_readahead",
+  "i/o buffer",
+  "writer buffer in bio_init",
+  "checksum buffer", /* 15 */
+  "compression buffer",
+  "filewriter signature op",
+  "set resume param alloc1",
+  "set resume param alloc2",
+  "debugging info buffer", /* 20 */
+  "check can resume buffer",
+  "write module config buffer",
+  "read module config buffer",
+  "write image header buffer",
+  "read pageset1 buffer", /* 25 */
+  "get_have_image_data buffer",
+  "checksum page",
+  "worker rw loop",
+  "get nonconflicting page",
+  "ps1 load addresses", /* 30 */
+  "remove swap image",
+  "swap image exists",
+  "swap parse sig location",
+  "sysfs kobj",
+  "swap mark resume attempted buffer", /* 35 */
+  "cluster member",
+  "boot kernel data buffer",
+  "setting swap signature",
+  "block i/o bdev struct",
+  "copy before write", /* 40 */
+};
+
+#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
+  do { \
+    BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
+    \
+    if (FAIL_NUM == toi_fail_num) { \
+      atomic_inc(&toi_test_count[FAIL_NUM]); \
+      toi_fail_num = 0; \
+      return FAIL_VAL; \
+    } \
+  } while (0)
+
+static void alloc_update_stats(int fail_num, void *result, int size)
+{
+  if (!result) {
+    atomic_inc(&toi_fail_count[fail_num]);
+    return;
+  }
+
+  atomic_inc(&toi_alloc_count[fail_num]);
+  if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+    mutex_lock(&toi_alloc_mutex);
+    toi_cur_allocd[fail_num]++;
+    cur_allocd += size;
+    if (unlikely(cur_allocd > max_allocd)) {
+      int i;
+
+      for (i = 0; i < TOI_ALLOC_PATHS; i++)
+        toi_max_allocd[i] = toi_cur_allocd[i];
+      max_allocd = cur_allocd;
+    }
+    mutex_unlock(&toi_alloc_mutex);
+  }
+}
+
+static void free_update_stats(int fail_num, int size)
+{
+  BUG_ON(fail_num >= TOI_ALLOC_PATHS);
+  atomic_inc(&toi_free_count[fail_num]);
+  if (unlikely(atomic_read(&toi_free_count[fail_num]) >
+        atomic_read(&toi_alloc_count[fail_num])))
+    dump_stack();
+  if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+    mutex_lock(&toi_alloc_mutex);
+    cur_allocd -= size;
+    toi_cur_allocd[fail_num]--;
+    mutex_unlock(&toi_alloc_mutex);
+  }
+}
+
+void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
+{
+  void *result;
+
+  if (toi_alloc_ops.enabled)
+    MIGHT_FAIL(fail_num, NULL);
+  result = kzalloc(size, flags);
+  if (toi_alloc_ops.enabled)
+    alloc_update_stats(fail_num, result, size);
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  return result;
+}
+
+unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+    unsigned int order)
+{
+  unsigned long result;
+
+  mask |= ___GFP_TOI_NOTRACK;
+  if (toi_alloc_ops.enabled)
+    MIGHT_FAIL(fail_num, 0);
+  result = __get_free_pages(mask, order);
+  if (toi_alloc_ops.enabled)
+    alloc_update_stats(fail_num, (void *) result,
+        PAGE_SIZE << order);
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  return result;
+}
+
+struct page *toi_alloc_page(int fail_num, gfp_t mask)
+{
+  struct page *result;
+
+  if (toi_alloc_ops.enabled)
+    MIGHT_FAIL(fail_num, NULL);
+  mask |= ___GFP_TOI_NOTRACK;
+  result = alloc_page(mask);
+  if (toi_alloc_ops.enabled)
+    alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  return result;
+}
+
+unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
+{
+  unsigned long result;
+
+  if (toi_alloc_ops.enabled)
+    MIGHT_FAIL(fail_num, 0);
+  mask |= ___GFP_TOI_NOTRACK;
+  result = get_zeroed_page(mask);
+  if (toi_alloc_ops.enabled)
+    alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  return result;
+}
+
+void toi_kfree(int fail_num, const void *arg, int size)
+{
+  if (arg && toi_alloc_ops.enabled)
+    free_update_stats(fail_num, size);
+
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  kfree(arg);
+}
+
+void toi_free_page(int fail_num, unsigned long virt)
+{
+  if (virt && toi_alloc_ops.enabled)
+    free_update_stats(fail_num, PAGE_SIZE);
+
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  free_page(virt);
+}
+
+void toi__free_page(int fail_num, struct page *page)
+{
+  if (page && toi_alloc_ops.enabled)
+    free_update_stats(fail_num, PAGE_SIZE);
+
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  __free_page(page);
+}
+
+void toi_free_pages(int fail_num, struct page *page, int order)
+{
+  if (page && toi_alloc_ops.enabled)
+    free_update_stats(fail_num, PAGE_SIZE << order);
+
+  if (fail_num == toi_trace_allocs)
+    dump_stack();
+  __free_pages(page, order);
+}
+
+void toi_alloc_print_debug_stats(void)
+{
+  int i, header_done = 0;
+
+  if (!toi_alloc_ops.enabled)
+    return;
+
+  for (i = 0; i < TOI_ALLOC_PATHS; i++)
+    if (atomic_read(&toi_alloc_count[i]) !=
+        atomic_read(&toi_free_count[i])) {
+      if (!header_done) {
+        printk(KERN_INFO "Idx  Allocs   Frees   Tests "
+            "  Fails     Max Description\n");
+        header_done = 1;
+      }
+
+      printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
+          atomic_read(&toi_alloc_count[i]),
+          atomic_read(&toi_free_count[i]),
+          atomic_read(&toi_test_count[i]),
+          atomic_read(&toi_fail_count[i]),
+          toi_max_allocd[i],
+          toi_alloc_desc[i]);
+    }
+}
+
+static int toi_alloc_initialise(int starting_cycle)
+{
+  int i;
+
+  if (!starting_cycle)
+    return 0;
+
+  if (toi_trace_allocs)
+    dump_stack();
+
+  for (i = 0; i < TOI_ALLOC_PATHS; i++) {
+    atomic_set(&toi_alloc_count[i], 0);
+    atomic_set(&toi_free_count[i], 0);
+    atomic_set(&toi_test_count[i], 0);
+    atomic_set(&toi_fail_count[i], 0);
+    toi_cur_allocd[i] = 0;
+    toi_max_allocd[i] = 0;
+  };
+
+  max_allocd = 0;
+  cur_allocd = 0;
+  return 0;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
+  SYSFS_INT("trace", SYSFS_RW, &toi_trace_allocs, 0, TOI_ALLOC_PATHS, 0,
+      NULL),
+  SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_GET_MAX_MEM_ALLOCD, 0),
+  SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
+      NULL)
+};
+
+static struct toi_module_ops toi_alloc_ops = {
+  .type                                        = MISC_HIDDEN_MODULE,
+  .name                                        = "allocation debugging",
+  .directory                                = "alloc",
+  .module                                        = THIS_MODULE,
+  .early                                        = 1,
+  .initialise                                = toi_alloc_initialise,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+int toi_alloc_init(void)
+{
+  int result = toi_register_module(&toi_alloc_ops);
+  return result;
+}
+
+void toi_alloc_exit(void)
+{
+  toi_unregister_module(&toi_alloc_ops);
+}
diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h
new file mode 100644
index 000000000..b4f4aa45f
--- /dev/null
+++ b/kernel/power/tuxonice_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * kernel/power/tuxonice_alloc.h
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/slab.h>
+#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN)
+#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
+
+#ifdef CONFIG_PM_DEBUG
+extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
+extern void toi_kfree(int fail_num, const void *arg, int size);
+
+extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+    unsigned int order);
+#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
+extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
+extern void toi_free_page(int fail_num, unsigned long buf);
+extern void toi__free_page(int fail_num, struct page *page);
+extern void toi_free_pages(int fail_num, struct page *page, int order);
+extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
+extern int toi_alloc_init(void);
+extern void toi_alloc_exit(void);
+
+extern void toi_alloc_print_debug_stats(void);
+
+#else /* CONFIG_PM_DEBUG */
+
+#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
+#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN))
+
+#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
+#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
+#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
+#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
+#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
+#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
+#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
+static inline int toi_alloc_init(void)
+{
+  return 0;
+}
+
+static inline void toi_alloc_exit(void) { }
+
+static inline void toi_alloc_print_debug_stats(void) { }
+
+#endif
+
+extern int toi_trace_allocs;
diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c
new file mode 100644
index 000000000..00b9b281d
--- /dev/null
+++ b/kernel/power/tuxonice_atomic_copy.c
@@ -0,0 +1,471 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.c
+ *
+ * Copyright 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/syscore_ops.h>
+#include <linux/ftrace.h>
+#include <asm/suspend.h>
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+
+unsigned long extra_pd1_pages_used;
+
+/**
+ * free_pbe_list - free page backup entries used by the atomic copy code.
+ * @list:        List to free.
+ * @highmem:        Whether the list is in highmem.
+ *
+ * Normally, this function isn't used. If, however, we need to abort before
+ * doing the atomic copy, we use this to free the pbes previously allocated.
+ **/
+static void free_pbe_list(struct pbe **list, int highmem)
+{
+  while (*list) {
+    int i;
+    struct pbe *free_pbe, *next_page = NULL;
+    struct page *page;
+
+    if (highmem) {
+      page = (struct page *) *list;
+      free_pbe = (struct pbe *) kmap(page);
+    } else {
+      page = virt_to_page(*list);
+      free_pbe = *list;
+    }
+
+    for (i = 0; i < PBES_PER_PAGE; i++) {
+      if (!free_pbe)
+        break;
+      if (highmem)
+        toi__free_page(29, free_pbe->address);
+      else
+        toi_free_page(29,
+            (unsigned long) free_pbe->address);
+      free_pbe = free_pbe->next;
+    }
+
+    if (highmem) {
+      if (free_pbe)
+        next_page = free_pbe;
+      kunmap(page);
+    } else {
+      if (free_pbe)
+        next_page = free_pbe;
+    }
+
+    toi__free_page(29, page);
+    *list = (struct pbe *) next_page;
+  };
+}
+
+/**
+ * copyback_post - post atomic-restore actions
+ *
+ * After doing the atomic restore, we have a few more things to do:
+ *        1) We want to retain some values across the restore, so we now copy
+ *        these from the nosave variables to the normal ones.
+ *        2) Set the status flags.
+ *        3) Resume devices.
+ *        4) Tell userui so it can redraw & restore settings.
+ *        5) Reread the page cache.
+ **/
+void copyback_post(void)
+{
+  struct toi_boot_kernel_data *bkd =
+    (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+  if (toi_activate_storage(1))
+    panic("Failed to reactivate our storage.");
+
+  toi_post_atomic_restore_modules(bkd);
+
+  toi_cond_pause(1, "About to reload secondary pagedir.");
+
+  if (read_pageset2(0))
+    panic("Unable to successfully reread the page cache.");
+
+  /*
+   * If the user wants to sleep again after resuming from full-off,
+   * it's most likely to be in order to suspend to ram, so we'll
+   * do this check after loading pageset2, to give them the fastest
+   * wakeup when they are ready to use the computer again.
+   */
+  toi_check_resleep();
+
+  if (test_action_state(TOI_INCREMENTAL_IMAGE))
+    toi_reset_dirtiness(1);
+}
+
+/**
+ * toi_copy_pageset1 - do the atomic copy of pageset1
+ *
+ * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
+ * because we can't be sure what side effects it has. On my old Duron, with
+ * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
+ * count at resume time 4 instead of 3.
+ *
+ * We don't want to call kmap_atomic unconditionally because it has the side
+ * effect of incrementing the preempt count, which will leave it one too high
+ * post resume (the page containing the preempt count will be copied after
+ * its incremented. This is essentially the same problem.
+ **/
+void toi_copy_pageset1(void)
+{
+  int i;
+  unsigned long source_index, dest_index;
+
+  memory_bm_position_reset(pageset1_map);
+  memory_bm_position_reset(pageset1_copy_map);
+
+  source_index = memory_bm_next_pfn(pageset1_map, 0);
+  dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+
+  for (i = 0; i < pagedir1.size; i++) {
+    unsigned long *origvirt, *copyvirt;
+    struct page *origpage, *copypage;
+    int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
+        was_present1, was_present2;
+
+    origpage = pfn_to_page(source_index);
+    copypage = pfn_to_page(dest_index);
+
+    origvirt = PageHighMem(origpage) ?
+      kmap_atomic(origpage) :
+      page_address(origpage);
+
+    copyvirt = PageHighMem(copypage) ?
+      kmap_atomic(copypage) :
+      page_address(copypage);
+
+    was_present1 = kernel_page_present(origpage);
+    if (!was_present1)
+      kernel_map_pages(origpage, 1, 1);
+
+    was_present2 = kernel_page_present(copypage);
+    if (!was_present2)
+      kernel_map_pages(copypage, 1, 1);
+
+    while (loop >= 0) {
+      *(copyvirt + loop) = *(origvirt + loop);
+      loop--;
+    }
+
+    if (!was_present1)
+      kernel_map_pages(origpage, 1, 0);
+
+    if (!was_present2)
+      kernel_map_pages(copypage, 1, 0);
+
+    if (PageHighMem(origpage))
+      kunmap_atomic(origvirt);
+
+    if (PageHighMem(copypage))
+      kunmap_atomic(copyvirt);
+
+    source_index = memory_bm_next_pfn(pageset1_map, 0);
+    dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+  }
+}
+
+/**
+ * __toi_post_context_save - steps after saving the cpu context
+ *
+ * Steps taken after saving the CPU state to make the actual
+ * atomic copy.
+ *
+ * Called from swsusp_save in snapshot.c via toi_post_context_save.
+ **/
+int __toi_post_context_save(void)
+{
+  unsigned long old_ps1_size = pagedir1.size;
+
+  check_checksums();
+
+  free_checksum_pages();
+
+  toi_recalculate_image_contents(1);
+
+  extra_pd1_pages_used = pagedir1.size > old_ps1_size ?
+    pagedir1.size - old_ps1_size : 0;
+
+  if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
+    printk(KERN_INFO "Pageset1 has grown by %lu pages. "
+        "extra_pages_allowance is currently only %lu.\n",
+        pagedir1.size - old_ps1_size,
+        extra_pd1_pages_allowance);
+
+    /*
+     * Highlevel code will see this, clear the state and
+     * retry if we haven't already done so twice.
+     */
+    if (any_to_free(1)) {
+      set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+      return 1;
+    }
+    if (try_allocate_extra_memory()) {
+      printk(KERN_INFO "Failed to allocate the extra memory"
+          " needed. Restarting the process.");
+      set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+      return 1;
+    }
+    printk(KERN_INFO "However it looks like there's enough"
+        " free ram and storage to handle this, so "
+        " continuing anyway.");
+    /* 
+     * What if try_allocate_extra_memory above calls
+     * toi_allocate_extra_pagedir_memory and it allocs a new
+     * slab page via toi_kzalloc which should be in ps1? So...
+     */
+    toi_recalculate_image_contents(1);
+  }
+
+  if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
+      !test_action_state(TOI_TEST_BIO))
+    toi_copy_pageset1();
+
+  return 0;
+}
+
+/**
+ * toi_hibernate - high level code for doing the atomic copy
+ *
+ * High-level code which prepares to do the atomic copy. Loosely based
+ * on the swsusp version, but with the following twists:
+ *        - We set toi_running so the swsusp code uses our code paths.
+ *        - We give better feedback regarding what goes wrong if there is a
+ *          problem.
+ *        - We use an extra function to call the assembly, just in case this code
+ *          is in a module (return address).
+ **/
+int toi_hibernate(void)
+{
+  int error;
+
+  error = toi_lowlevel_builtin();
+
+  if (!error) {
+    struct toi_boot_kernel_data *bkd =
+      (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+    /*
+     * The boot kernel's data may be larger (newer version) or
+     * smaller (older version) than ours. Copy the minimum
+     * of the two sizes, so that we don't overwrite valid values
+     * from pre-atomic copy.
+     */
+
+    memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
+        min_t(int, sizeof(struct toi_boot_kernel_data),
+          bkd->size));
+  }
+
+  return error;
+}
+
+/**
+ * toi_atomic_restore - prepare to do the atomic restore
+ *
+ * Get ready to do the atomic restore. This part gets us into the same
+ * state we are in prior to do calling do_toi_lowlevel while
+ * hibernating: hot-unplugging secondary cpus and freeze processes,
+ * before starting the thread that will do the restore.
+ **/
+int toi_atomic_restore(void)
+{
+  int error;
+
+  toi_prepare_status(DONT_CLEAR_BAR,        "Atomic restore.");
+
+  memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
+      strlen(saved_command_line));
+
+  toi_pre_atomic_restore_modules(&toi_bkd);
+
+  if (add_boot_kernel_data_pbe())
+    goto Failed;
+
+  toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+  if (toi_go_atomic(PMSG_QUIESCE, 0))
+    goto Failed;
+
+  /* We'll ignore saved state, but this gets preempt count (etc) right */
+  save_processor_state();
+
+  error = swsusp_arch_resume();
+  /*
+   * Code below is only ever reached in case of failure. Otherwise
+   * execution continues at place where swsusp_arch_suspend was called.
+   *
+   * We don't know whether it's safe to continue (this shouldn't happen),
+   * so lets err on the side of caution.
+   */
+  BUG();
+
+Failed:
+  free_pbe_list(&restore_pblist, 0);
+#ifdef CONFIG_HIGHMEM
+  free_pbe_list(&restore_highmem_pblist, 1);
+#endif
+  return 1;
+}
+
+/**
+ * toi_go_atomic - do the actual atomic copy/restore
+ * @state:           The state to use for dpm_suspend_start & power_down calls.
+ * @suspend_time:  Whether we're suspending or resuming.
+ **/
+int toi_go_atomic(pm_message_t state, int suspend_time)
+{
+  if (suspend_time) {
+    pm_suspend_clear_flags();
+    if (platform_begin(1)) {
+      set_abort_result(TOI_PLATFORM_PREP_FAILED);
+      toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+      return 1;
+    }
+
+    if (dpm_prepare(PMSG_FREEZE)) {
+      set_abort_result(TOI_DPM_PREPARE_FAILED);
+      dpm_complete(PMSG_RECOVER);
+      toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+      return 1;
+    }
+  }
+
+  suspend_console();
+  pm_restrict_gfp_mask();
+
+  if (suspend_time) {
+    if (dpm_suspend(state)) {
+      set_abort_result(TOI_DPM_SUSPEND_FAILED);
+      toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+      return 1;
+    }
+  } else {
+    if (dpm_suspend_start(state)) {
+      set_abort_result(TOI_DPM_SUSPEND_FAILED);
+      toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+      return 1;
+    }
+  }
+
+  /* At this point, dpm_suspend_start() has been called, but *not*
+   * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now.
+   * Otherwise, drivers for some devices (e.g. interrupt controllers)
+   * become desynchronized with the actual state of the hardware
+   * at resume time, and evil weirdness ensues.
+   */
+
+  if (dpm_suspend_end(state)) {
+    set_abort_result(TOI_DEVICE_REFUSED);
+    toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
+    return 1;
+  }
+
+  if (suspend_time) {
+    if (platform_pre_snapshot(1))
+      set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
+  } else {
+    if (platform_pre_restore(1))
+      set_abort_result(TOI_PRE_RESTORE_FAILED);
+  }
+
+  if (test_result_state(TOI_ABORTED)) {
+    toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
+    return 1;
+  }
+
+  if ((suspend_time && disable_nonboot_cpus()) ||
+      (!suspend_time && hibernate_resume_nonboot_cpu_disable())) {
+    set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+    toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
+        suspend_time, 1);
+    return 1;
+  }
+
+  local_irq_disable();
+
+  if (syscore_suspend()) {
+    set_abort_result(TOI_SYSCORE_REFUSED);
+    toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1);
+    return 1;
+  }
+
+  if (suspend_time && pm_wakeup_pending()) {
+    set_abort_result(TOI_WAKEUP_EVENT);
+    toi_end_atomic(ATOMIC_STEP_SYSCORE_RESUME, suspend_time, 1);
+    return 1;
+  }
+  return 0;
+}
+
+/**
+ * toi_end_atomic - post atomic copy/restore routines
+ * @stage:                What step to start at.
+ * @suspend_time:        Whether we're suspending or resuming.
+ * @error:                Whether we're recovering from an error.
+ **/
+void toi_end_atomic(int stage, int suspend_time, int error)
+{
+  pm_message_t msg = suspend_time ? (error ? PMSG_RECOVER : PMSG_THAW) :
+    PMSG_RESTORE;
+
+  switch (stage) {
+    case ATOMIC_ALL_STEPS:
+      if (!suspend_time) {
+        events_check_enabled = false;
+      }
+      platform_leave(1);
+    case ATOMIC_STEP_SYSCORE_RESUME:
+      syscore_resume();
+    case ATOMIC_STEP_IRQS:
+      local_irq_enable();
+    case ATOMIC_STEP_CPU_HOTPLUG:
+      enable_nonboot_cpus();
+    case ATOMIC_STEP_PLATFORM_FINISH:
+      if (!suspend_time && error & 2)
+        platform_restore_cleanup(1);
+      else 
+        platform_finish(1);
+      dpm_resume_start(msg);
+    case ATOMIC_STEP_DEVICE_RESUME:
+      if (suspend_time && (error & 2))
+        platform_recover(1);
+      dpm_resume(msg);
+      if (!toi_in_suspend()) {
+        dpm_resume_end(PMSG_RECOVER);
+      }
+      if (error || !toi_in_suspend()) {
+        pm_restore_gfp_mask();
+      }
+      resume_console();
+    case ATOMIC_STEP_DPM_COMPLETE:
+      dpm_complete(msg);
+    case ATOMIC_STEP_PLATFORM_END:
+      platform_end(1);
+
+      toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
+  }
+}
diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h
new file mode 100644
index 000000000..d9461dfcc
--- /dev/null
+++ b/kernel/power/tuxonice_atomic_copy.h
@@ -0,0 +1,25 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.h
+ *
+ * Copyright 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+enum {
+  ATOMIC_ALL_STEPS,
+  ATOMIC_STEP_SYSCORE_RESUME,
+  ATOMIC_STEP_IRQS,
+  ATOMIC_STEP_CPU_HOTPLUG,
+  ATOMIC_STEP_PLATFORM_FINISH,
+  ATOMIC_STEP_DEVICE_RESUME,
+  ATOMIC_STEP_DPM_COMPLETE,
+  ATOMIC_STEP_PLATFORM_END,
+};
+
+int toi_go_atomic(pm_message_t state, int toi_time);
+void toi_end_atomic(int stage, int toi_time, int error);
+
+extern void platform_recover(int platform_mode);
diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h
new file mode 100644
index 000000000..8d6a33bab
--- /dev/null
+++ b/kernel/power/tuxonice_bio.h
@@ -0,0 +1,80 @@
+/*
+ * kernel/power/tuxonice_bio.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+#include <linux/buffer_head.h>
+#include "tuxonice_extent.h"
+
+void toi_put_extent_chain(struct hibernate_extent_chain *chain);
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+    unsigned long start, unsigned long end);
+
+struct hibernate_extent_saved_state {
+  int extent_num;
+  struct hibernate_extent *extent_ptr;
+  unsigned long offset;
+};
+
+struct toi_bdev_info {
+  struct toi_bdev_info *next;
+  struct hibernate_extent_chain blocks;
+  struct block_device *bdev;
+  struct toi_module_ops *allocator;
+  int allocator_index;
+  struct hibernate_extent_chain allocations;
+  char name[266]; /* "swap on " or "file " + up to 256 chars */
+
+  /* Saved in header */
+  char uuid[17];
+  dev_t dev_t;
+  int prio;
+  int bmap_shift;
+  int blocks_per_page;
+  unsigned long pages_used;
+  struct hibernate_extent_saved_state saved_state[4];
+};
+
+struct toi_extent_iterate_state {
+  struct toi_bdev_info *current_chain;
+  int num_chains;
+  int saved_chain_number[4];
+  struct toi_bdev_info *saved_chain_ptr[4];
+};
+
+/*
+ * Our exported interface so the swapwriter and filewriter don't
+ * need these functions duplicated.
+ */
+struct toi_bio_ops {
+  int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
+      struct page *page);
+  int (*register_storage)(struct toi_bdev_info *new);
+  void (*free_storage)(void);
+};
+
+struct toi_allocator_ops {
+  unsigned long (*toi_swap_storage_available) (void);
+};
+
+extern struct toi_bio_ops toi_bio_ops;
+
+extern char *toi_writer_buffer;
+extern int toi_writer_buffer_posn;
+
+struct toi_bio_allocator_ops {
+  int (*register_storage) (void);
+  unsigned long (*storage_available)(void);
+  int (*allocate_storage) (struct toi_bdev_info *, unsigned long);
+  int (*bmap) (struct toi_bdev_info *);
+  void (*free_storage) (struct toi_bdev_info *);
+  unsigned long (*free_unused_storage) (struct toi_bdev_info *, unsigned long used);
+};
+
+extern int toi_bio_register_storage(void);
diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c
new file mode 100644
index 000000000..f6a5cfb4e
--- /dev/null
+++ b/kernel/power/tuxonice_bio_chains.c
@@ -0,0 +1,1121 @@
+/*
+ * kernel/power/tuxonice_bio_devinfo.c
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/mm_types.h>
+#include "tuxonice_bio.h"
+#include "tuxonice_bio_internal.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+#include "tuxonice_io.h"
+
+static struct toi_bdev_info *prio_chain_head;
+static int num_chains;
+
+/* Pointer to current entry being loaded/saved. */
+struct toi_extent_iterate_state toi_writer_posn;
+
+#define metadata_size (sizeof(struct toi_bdev_info) - \
+    offsetof(struct toi_bdev_info, uuid))
+
+/*
+ * After section 0 (header) comes 2 => next_section[0] = 2
+ */
+static int next_section[3] = { 2, 3, 1 };
+
+/**
+ * dump_block_chains - print the contents of the bdev info array.
+ **/
+void dump_block_chains(void)
+{
+  int i = 0;
+  int j;
+  struct toi_bdev_info *cur_chain = prio_chain_head;
+
+  while (cur_chain) {
+    struct hibernate_extent *this = cur_chain->blocks.first;
+
+    printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio);
+
+    while (this) {
+      printk(KERN_CONT " [%lu-%lu]%s", this->start,
+          this->end, this->next ? "," : "");
+      this = this->next;
+    }
+
+    printk("\n");
+    cur_chain = cur_chain->next;
+    i++;
+  }
+
+  printk(KERN_DEBUG "Saved states:\n");
+  for (i = 0; i < 4; i++) {
+    printk(KERN_DEBUG "Slot %d: Chain %d.\n",
+        i, toi_writer_posn.saved_chain_number[i]);
+
+    cur_chain = prio_chain_head;
+    j = 0;
+    while (cur_chain) {
+      printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n",
+          j, cur_chain->saved_state[i].extent_num,
+          cur_chain->saved_state[i].offset);
+      cur_chain = cur_chain->next;
+      j++;
+    }
+    printk(KERN_CONT "\n");
+  }
+}
+
+/**
+ *
+ **/
+static void toi_extent_chain_next(void)
+{
+  struct toi_bdev_info *this = toi_writer_posn.current_chain;
+
+  if (!this->blocks.current_extent)
+    return;
+
+  if (this->blocks.current_offset == this->blocks.current_extent->end) {
+    if (this->blocks.current_extent->next) {
+      this->blocks.current_extent =
+        this->blocks.current_extent->next;
+      this->blocks.current_offset =
+        this->blocks.current_extent->start;
+    } else {
+      this->blocks.current_extent = NULL;
+      this->blocks.current_offset = 0;
+    }
+  } else
+    this->blocks.current_offset++;
+}
+
+/**
+ *
+ */
+
+static struct toi_bdev_info *__find_next_chain_same_prio(void)
+{
+  struct toi_bdev_info *start_chain = toi_writer_posn.current_chain;
+  struct toi_bdev_info *this = start_chain;
+  int orig_prio = this->prio;
+
+  do {
+    this = this->next;
+
+    if (!this)
+      this = prio_chain_head;
+
+    /* Back on original chain? Use it again. */
+    if (this == start_chain)
+      return start_chain;
+
+  } while (!this->blocks.current_extent || this->prio != orig_prio);
+
+  return this;
+}
+
+static void find_next_chain(void)
+{
+  struct toi_bdev_info *this;
+
+  this = __find_next_chain_same_prio();
+
+  /*
+   * If we didn't get another chain of the same priority that we
+   * can use, look for the next priority.
+   */
+  while (this && !this->blocks.current_extent)
+    this = this->next;
+
+  toi_writer_posn.current_chain = this;
+}
+
+/**
+ * toi_extent_state_next - go to the next extent
+ * @blocks: The number of values to progress.
+ * @stripe_mode: Whether to spread usage across all chains.
+ *
+ * Given a state, progress to the next valid entry. We may begin in an
+ * invalid state, as we do when invoked after extent_state_goto_start below.
+ *
+ * When using compression and expected_compression > 0, we let the image size
+ * be larger than storage, so we can validly run out of data to return.
+ **/
+static unsigned long toi_extent_state_next(int blocks, int current_stream)
+{
+  int i;
+
+  if (!toi_writer_posn.current_chain)
+    return -ENOSPC;
+
+  /* Assume chains always have lengths that are multiples of @blocks */
+  for (i = 0; i < blocks; i++)
+    toi_extent_chain_next();
+
+  /* The header stream is not striped */
+  if (current_stream ||
+      !toi_writer_posn.current_chain->blocks.current_extent)
+    find_next_chain();
+
+  return  toi_writer_posn.current_chain ? 0 : -ENOSPC;
+}
+
+static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this)
+{
+  struct toi_bdev_info **prev_ptr;
+  struct toi_bdev_info *cur;
+
+  /* Loop through the existing chain, finding where to insert it */
+  prev_ptr = &prio_chain_head;
+  cur = prio_chain_head;
+
+  while (cur && cur->prio >= this->prio) {
+    prev_ptr = &cur->next;
+    cur = cur->next;
+  }
+
+  this->next = *prev_ptr;
+  *prev_ptr = this;
+
+  this = prio_chain_head;
+  while (this)
+    this = this->next;
+  num_chains++;
+}
+
+/**
+ * toi_extent_state_goto_start - reinitialize an extent chain iterator
+ * @state:        Iterator to reinitialize
+ **/
+void toi_extent_state_goto_start(void)
+{
+  struct toi_bdev_info *this = prio_chain_head;
+
+  while (this) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0,
+        "Setting current extent to %p.", this->blocks.first);
+    this->blocks.current_extent = this->blocks.first;
+    if (this->blocks.current_extent) {
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          "Setting current offset to %lu.",
+          this->blocks.current_extent->start);
+      this->blocks.current_offset =
+        this->blocks.current_extent->start;
+    }
+
+    this = this->next;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Setting current chain to %p.",
+      prio_chain_head);
+  toi_writer_posn.current_chain = prio_chain_head;
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Leaving extent state goto start.");
+}
+
+/**
+ * toi_extent_state_save - save state of the iterator
+ * @state:                Current state of the chain
+ * @saved_state:        Iterator to populate
+ *
+ * Given a state and a struct hibernate_extent_state_store, save the current
+ * position in a format that can be used with relocated chains (at
+ * resume time).
+ **/
+void toi_extent_state_save(int slot)
+{
+  struct toi_bdev_info *cur_chain = prio_chain_head;
+  struct hibernate_extent *extent;
+  struct hibernate_extent_saved_state *chain_state;
+  int i = 0;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.",
+      slot);
+
+  if (!toi_writer_posn.current_chain) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current chain => "
+        "chain_num = -1.");
+    toi_writer_posn.saved_chain_number[slot] = -1;
+    return;
+  }
+
+  while (cur_chain) {
+    i++;
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saving chain %d (%p) "
+        "state, slot %d.", i, cur_chain, slot);
+
+    chain_state = &cur_chain->saved_state[slot];
+
+    chain_state->offset = cur_chain->blocks.current_offset;
+
+    if (toi_writer_posn.current_chain == cur_chain) {
+      toi_writer_posn.saved_chain_number[slot] = i;
+      toi_message(TOI_BIO, TOI_VERBOSE, 0, "This is the chain "
+          "we were on => chain_num is %d.", i);
+    }
+
+    if (!cur_chain->blocks.current_extent) {
+      chain_state->extent_num = 0;
+      toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current extent "
+          "for this chain => extent_num %d is 0.",
+          i);
+      cur_chain = cur_chain->next;
+      continue;
+    }
+
+    extent = cur_chain->blocks.first;
+    chain_state->extent_num = 1;
+
+    while (extent != cur_chain->blocks.current_extent) {
+      chain_state->extent_num++;
+      extent = extent->next;
+    }
+
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "extent num %d is %d.", i,
+        chain_state->extent_num);
+
+    cur_chain = cur_chain->next;
+  }
+  toi_message(TOI_BIO, TOI_VERBOSE, 0,
+      "Completed saving extent state slot %d.", slot);
+}
+
+/**
+ * toi_extent_state_restore - restore the position saved by extent_state_save
+ * @state:                State to populate
+ * @saved_state:        Iterator saved to restore
+ **/
+void toi_extent_state_restore(int slot)
+{
+  int i = 0;
+  struct toi_bdev_info *cur_chain = prio_chain_head;
+  struct hibernate_extent_saved_state *chain_state;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0,
+      "toi_extent_state_restore - slot %d.", slot);
+
+  if (toi_writer_posn.saved_chain_number[slot] == -1) {
+    toi_writer_posn.current_chain = NULL;
+    return;
+  }
+
+  while (cur_chain) {
+    int posn;
+    int j;
+    i++;
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Restoring chain %d (%p) "
+        "state, slot %d.", i, cur_chain, slot);
+
+    chain_state = &cur_chain->saved_state[slot];
+
+    posn = chain_state->extent_num;
+
+    cur_chain->blocks.current_extent = cur_chain->blocks.first;
+    cur_chain->blocks.current_offset = chain_state->offset;
+
+    if (i == toi_writer_posn.saved_chain_number[slot]) {
+      toi_writer_posn.current_chain = cur_chain;
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          "Found current chain.");
+    }
+
+    for (j = 0; j < 4; j++)
+      if (i == toi_writer_posn.saved_chain_number[j]) {
+        toi_writer_posn.saved_chain_ptr[j] = cur_chain;
+        toi_message(TOI_BIO, TOI_VERBOSE, 0,
+            "Found saved chain ptr %d (%p) (offset"
+            " %d).", j, cur_chain,
+            cur_chain->saved_state[j].offset);
+      }
+
+    if (posn) {
+      while (--posn)
+        cur_chain->blocks.current_extent =
+          cur_chain->blocks.current_extent->next;
+    } else
+      cur_chain->blocks.current_extent = NULL;
+
+    cur_chain = cur_chain->next;
+  }
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done.");
+  if (test_action_state(TOI_LOGALL))
+    dump_block_chains();
+}
+
+/*
+ * Storage needed
+ *
+ * Returns amount of space in the image header required
+ * for the chain data. This ignores the links between
+ * pages, which we factor in when allocating the space.
+ */
+int toi_bio_devinfo_storage_needed(void)
+{
+  int result = sizeof(num_chains);
+  struct toi_bdev_info *chain = prio_chain_head;
+
+  while (chain) {
+    result += metadata_size;
+
+    /* Chain size */
+    result += sizeof(int);
+
+    /* Extents */
+    result += (2 * sizeof(unsigned long) *
+        chain->blocks.num_extents);
+
+    chain = chain->next;
+  }
+
+  result += 4 * sizeof(int);
+  return result;
+}
+
+static unsigned long chain_pages_used(struct toi_bdev_info *chain)
+{
+  struct hibernate_extent *this = chain->blocks.first;
+  struct hibernate_extent_saved_state *state = &chain->saved_state[3];
+  unsigned long size = 0;
+  int extent_idx = 1;
+
+  if (!state->extent_num) {
+    if (!this)
+      return 0;
+    else
+      return chain->blocks.size;
+  }
+
+  while (extent_idx < state->extent_num) {
+    size += (this->end - this->start + 1);
+    this = this->next;
+    extent_idx++;
+  }
+
+  /* We didn't use the one we're sitting on, so don't count it */
+  return size + state->offset - this->start;
+}
+
+void toi_bio_free_unused_storage_chain(struct toi_bdev_info *chain)
+{
+  unsigned long used = chain_pages_used(chain);
+
+  /* Free the storage */
+  unsigned long first_freed = 0;
+
+  if (chain->allocator->bio_allocator_ops->free_unused_storage)
+    first_freed = chain->allocator->bio_allocator_ops->free_unused_storage(chain, used);
+
+  printk(KERN_EMERG "Used %ld blocks in this chain. First extent freed is %lx.\n", used, first_freed);
+
+  /* Adjust / free the extents. */
+  toi_put_extent_chain_from(&chain->blocks, first_freed);
+
+  {
+    struct hibernate_extent *this = chain->blocks.first;
+    while (this) {
+      printk("Extent %lx-%lx.\n", this->start, this->end);
+      this = this->next;
+    }
+  }
+}
+
+/**
+ * toi_serialise_extent_chain - write a chain in the image
+ * @chain:        Chain to write.
+ **/
+static int toi_serialise_extent_chain(struct toi_bdev_info *chain)
+{
+  struct hibernate_extent *this;
+  int ret;
+  int i = 1;
+
+  chain->pages_used = chain_pages_used(chain);
+
+  if (test_action_state(TOI_LOGALL))
+    dump_block_chains();
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).",
+      chain->dev_t);
+  /* Device info -  dev_t, prio, bmap_shift, blocks per page, positions */
+  ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+      (char *) &chain->uuid, metadata_size);
+  if (ret)
+    return ret;
+
+  /* Num extents */
+  ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+      (char *) &chain->blocks.num_extents, sizeof(int));
+  if (ret)
+    return ret;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+      chain->blocks.num_extents);
+
+  this = chain->blocks.first;
+  while (this) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i);
+    ret = toiActiveAllocator->rw_header_chunk(WRITE,
+        &toi_blockwriter_ops,
+        (char *) this, 2 * sizeof(this->start));
+    if (ret)
+      return ret;
+    this = this->next;
+    i++;
+  }
+
+  return ret;
+}
+
+int toi_serialise_extent_chains(void)
+{
+  struct toi_bdev_info *this = prio_chain_head;
+  int result;
+
+  /* Write the number of chains */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Write number of chains (%d)",
+      num_chains);
+  result = toiActiveAllocator->rw_header_chunk(WRITE,
+      &toi_blockwriter_ops, (char *) &num_chains,
+      sizeof(int));
+  if (result)
+    return result;
+
+  /* Then the chains themselves */
+  while (this) {
+    result = toi_serialise_extent_chain(this);
+    if (result)
+      return result;
+    this = this->next;
+  }
+
+  /*
+   * Finally, the chain we should be on at the start of each
+   * section.
+   */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saved chain numbers.");
+  result = toiActiveAllocator->rw_header_chunk(WRITE,
+      &toi_blockwriter_ops,
+      (char *) &toi_writer_posn.saved_chain_number[0],
+      4 * sizeof(int));
+
+  return result;
+}
+
+int toi_register_storage_chain(struct toi_bdev_info *new)
+{
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Inserting chain %p into list.",
+      new);
+  toi_insert_chain_in_prio_list(new);
+  return 0;
+}
+
+static void free_bdev_info(struct toi_bdev_info *chain)
+{
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Free chain %p.", chain);
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Block extents.");
+  toi_put_extent_chain(&chain->blocks);
+
+  /*
+   * The allocator may need to do more than just free the chains
+   * (swap_free, for example). Don't call from boot kernel.
+   */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Allocator extents.");
+  if (chain->allocator)
+    chain->allocator->bio_allocator_ops->free_storage(chain);
+
+  /*
+   * Dropping out of reading atomic copy? Need to undo
+   * toi_open_by_devnum.
+   */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Bdev.");
+  if (chain->bdev && !IS_ERR(chain->bdev) &&
+      chain->bdev != resume_block_device &&
+      chain->bdev != header_block_device &&
+      test_toi_state(TOI_TRYING_TO_RESUME))
+    toi_close_bdev(chain->bdev);
+
+  /* Poison */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Struct.");
+  toi_kfree(39, chain, sizeof(*chain));
+
+  if (prio_chain_head == chain)
+    prio_chain_head = NULL;
+
+  num_chains--;
+}
+
+void free_all_bdev_info(void)
+{
+  struct toi_bdev_info *this = prio_chain_head;
+
+  while (this) {
+    struct toi_bdev_info *next = this->next;
+    free_bdev_info(this);
+    this = next;
+  }
+
+  memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn));
+  prio_chain_head = NULL;
+}
+
+static void set_up_start_position(void)
+{
+  toi_writer_posn.current_chain = prio_chain_head;
+  go_next_page(0, 0);
+}
+
+/**
+ * toi_load_extent_chain - read back a chain saved in the image
+ * @chain:        Chain to load
+ *
+ * The linked list of extents is reconstructed from the disk. chain will point
+ * to the first entry.
+ **/
+int toi_load_extent_chain(int index, int *num_loaded)
+{
+  struct toi_bdev_info *chain = toi_kzalloc(39,
+      sizeof(struct toi_bdev_info), GFP_ATOMIC);
+  struct hibernate_extent *this, *last = NULL;
+  int i, ret;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Loading extent chain %d.", index);
+  /* Get dev_t, prio, bmap_shift, blocks per page, positions */
+  ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+      (char *) &chain->uuid, metadata_size);
+
+  if (ret) {
+    printk(KERN_ERR "Failed to read the size of extent chain.\n");
+    toi_kfree(39, chain, sizeof(*chain));
+    return 1;
+  }
+
+  toi_bkd.pages_used[index] = chain->pages_used;
+
+  ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+      (char *) &chain->blocks.num_extents, sizeof(int));
+  if (ret) {
+    printk(KERN_ERR "Failed to read the size of extent chain.\n");
+    toi_kfree(39, chain, sizeof(*chain));
+    return 1;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+      chain->blocks.num_extents);
+
+  for (i = 0; i < chain->blocks.num_extents; i++) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i + 1);
+
+    this = toi_kzalloc(2, sizeof(struct hibernate_extent),
+        TOI_ATOMIC_GFP);
+    if (!this) {
+      printk(KERN_INFO "Failed to allocate a new extent.\n");
+      free_bdev_info(chain);
+      return -ENOMEM;
+    }
+    this->next = NULL;
+    /* Get the next page */
+    ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+        NULL, (char *) this, 2 * sizeof(this->start));
+    if (ret) {
+      printk(KERN_INFO "Failed to read an extent.\n");
+      toi_kfree(2, this, sizeof(struct hibernate_extent));
+      free_bdev_info(chain);
+      return 1;
+    }
+
+    if (last)
+      last->next = this;
+    else {
+      char b1[32], b2[32], b3[32];
+      /*
+       * Open the bdev
+       */
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          "Chain dev_t is %s. Resume dev t is %s. Header"
+          " bdev_t is %s.\n",
+          format_dev_t(b1, chain->dev_t),
+          format_dev_t(b2, resume_dev_t),
+          format_dev_t(b3, toi_sig_data->header_dev_t));
+
+      if (chain->dev_t == resume_dev_t)
+        chain->bdev = resume_block_device;
+      else if (chain->dev_t == toi_sig_data->header_dev_t)
+        chain->bdev = header_block_device;
+      else {
+        chain->bdev = toi_open_bdev(chain->uuid,
+            chain->dev_t, 1);
+        if (IS_ERR(chain->bdev)) {
+          free_bdev_info(chain);
+          return -ENODEV;
+        }
+      }
+
+      toi_message(TOI_BIO, TOI_VERBOSE, 0, "Chain bmap shift "
+          "is %d and blocks per page is %d.",
+          chain->bmap_shift,
+          chain->blocks_per_page);
+
+      chain->blocks.first = this;
+
+      /*
+       * Couldn't do this earlier, but can't do
+       * goto_start now - we may have already used blocks
+       * in the first chain.
+       */
+      chain->blocks.current_extent = this;
+      chain->blocks.current_offset = this->start;
+
+      /*
+       * Can't wait until we've read the whole chain
+       * before we insert it in the list. We might need
+       * this chain to read the next page in the header
+       */
+      toi_insert_chain_in_prio_list(chain);
+    }
+
+    /*
+     * We have to wait until 2 extents are loaded before setting up
+     * properly because if the first extent has only one page, we
+     * will need to put the position on the second extent. Sounds
+     * obvious, but it wasn't!
+     */
+    (*num_loaded)++;
+    if ((*num_loaded) == 2)
+      set_up_start_position();
+    last = this;
+  }
+
+  /*
+   * Shouldn't get empty chains, but it's not impossible. Link them in so
+   * they get freed properly later.
+   */
+  if (!chain->blocks.num_extents)
+    toi_insert_chain_in_prio_list(chain);
+
+  if (!chain->blocks.current_extent) {
+    chain->blocks.current_extent = chain->blocks.first;
+    if (chain->blocks.current_extent)
+      chain->blocks.current_offset =
+        chain->blocks.current_extent->start;
+  }
+  return 0;
+}
+
+int toi_load_extent_chains(void)
+{
+  int result;
+  int to_load;
+  int i;
+  int extents_loaded = 0;
+
+  result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+      (char *) &to_load,
+      sizeof(int));
+  if (result)
+    return result;
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d chains to read.", to_load);
+
+  for (i = 0; i < to_load; i++) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.",
+        i, to_load);
+    result = toi_load_extent_chain(i, &extents_loaded);
+    if (result)
+      return result;
+  }
+
+  /* If we never got to a second extent, we still need to do this. */
+  if (extents_loaded == 1)
+    set_up_start_position();
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Save chain numbers.");
+  result = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+      &toi_blockwriter_ops,
+      (char *) &toi_writer_posn.saved_chain_number[0],
+      4 * sizeof(int));
+
+  return result;
+}
+
+static int toi_end_of_stream(int writing, int section_barrier)
+{
+  struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+  int compare_to = next_section[current_stream];
+  struct toi_bdev_info *compare_chain =
+    toi_writer_posn.saved_chain_ptr[compare_to];
+  int compare_offset = compare_chain ?
+    compare_chain->saved_state[compare_to].offset : 0;
+
+  if (!section_barrier)
+    return 0;
+
+  if (!cur_chain)
+    return 1;
+
+  if (cur_chain == compare_chain &&
+      cur_chain->blocks.current_offset == compare_offset) {
+    if (writing) {
+      if (!current_stream) {
+        debug_broken_header();
+        return 1;
+      }
+    } else {
+      more_readahead = 0;
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          "Reached the end of stream %d "
+          "(not an error).", current_stream);
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+/**
+ * go_next_page - skip blocks to the start of the next page
+ * @writing: Whether we're reading or writing the image.
+ *
+ * Go forward one page.
+ **/
+int go_next_page(int writing, int section_barrier)
+{
+  struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+  int max = cur_chain ? cur_chain->blocks_per_page : 1;
+
+  /* Nope. Go foward a page - or maybe two. Don't stripe the header,
+   * so that bad fragmentation doesn't put the extent data containing
+   * the location of the second page out of the first header page.
+   */
+  if (toi_extent_state_next(max, current_stream)) {
+    /* Don't complain if readahead falls off the end */
+    if (writing && section_barrier) {
+      toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent state eof. "
+          "Expected compression ratio too optimistic?");
+      if (test_action_state(TOI_LOGALL))
+        dump_block_chains();
+    }
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Ran out of extents to "
+        "read/write. (Not necessarily a fatal error.");
+    return -ENOSPC;
+  }
+
+  return 0;
+}
+
+int devices_of_same_priority(struct toi_bdev_info *this)
+{
+  struct toi_bdev_info *check = prio_chain_head;
+  int i = 0;
+
+  while (check) {
+    if (check->prio == this->prio)
+      i++;
+    check = check->next;
+  }
+
+  return i;
+}
+
+/**
+ * toi_bio_rw_page - do i/o on the next disk page in the image
+ * @writing: Whether reading or writing.
+ * @page: Page to do i/o on.
+ * @is_readahead: Whether we're doing readahead
+ * @free_group: The group used in allocating the page
+ *
+ * Submit a page for reading or writing, possibly readahead.
+ * Pass the group used in allocating the page as well, as it should
+ * be freed on completion of the bio if we're writing the page.
+ **/
+int toi_bio_rw_page(int writing, struct page *page,
+    int is_readahead, int free_group)
+{
+  int result = toi_end_of_stream(writing, 1);
+  struct toi_bdev_info *dev_info = toi_writer_posn.current_chain;
+
+  if (result) {
+    if (writing)
+      abort_hibernate(TOI_INSUFFICIENT_STORAGE,
+          "Insufficient storage for your image.");
+    else
+      toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking to "
+          "read/write another page when stream has "
+          "ended.");
+    return -ENOSPC;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0,
+      "%s %lx:%ld",
+      writing ? "Write" : "Read",
+      dev_info->dev_t, dev_info->blocks.current_offset);
+
+  result = toi_do_io(writing, dev_info->bdev,
+      dev_info->blocks.current_offset << dev_info->bmap_shift,
+      page, is_readahead, 0, free_group);
+
+  /* Ignore the result here - will check end of stream if come in again */
+  go_next_page(writing, 1);
+
+  if (result)
+    printk(KERN_ERR "toi_do_io returned %d.\n", result);
+  return result;
+}
+
+dev_t get_header_dev_t(void)
+{
+  return prio_chain_head->dev_t;
+}
+
+struct block_device *get_header_bdev(void)
+{
+  return prio_chain_head->bdev;
+}
+
+unsigned long get_headerblock(void)
+{
+  return prio_chain_head->blocks.first->start <<
+    prio_chain_head->bmap_shift;
+}
+
+int get_main_pool_phys_params(void)
+{
+  struct toi_bdev_info *this = prio_chain_head;
+  int result;
+
+  while (this) {
+    result = this->allocator->bio_allocator_ops->bmap(this);
+    if (result)
+      return result;
+    this = this->next;
+  }
+
+  return 0;
+}
+
+static int apply_header_reservation(void)
+{
+  int i;
+
+  if (!header_pages_reserved) {
+    toi_message(TOI_BIO, TOI_VERBOSE, 0,
+        "No header pages reserved at the moment.");
+    return 0;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Applying header reservation.");
+
+  /* Apply header space reservation */
+  toi_extent_state_goto_start();
+
+  for (i = 0; i < header_pages_reserved; i++)
+    if (go_next_page(1, 0))
+      return -ENOSPC;
+
+  /* The end of header pages will be the start of pageset 2 */
+  toi_extent_state_save(2);
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0,
+      "Finished applying header reservation.");
+  return 0;
+}
+
+int toi_bio_register_storage(void)
+{
+  int result = 0;
+  struct toi_module_ops *this_module;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+      "Registering storage.");
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        this_module->type != BIO_ALLOCATOR_MODULE)
+      continue;
+    toi_message(TOI_BIO, TOI_VERBOSE, 0,
+        "Registering storage from %s.",
+        this_module->name);
+    result = this_module->bio_allocator_ops->register_storage();
+    if (result)
+      break;
+  }
+
+  return result;
+}
+
+void toi_bio_free_unused_storage(void)
+{
+  struct toi_bdev_info *this = prio_chain_head;
+
+  while (this) {
+    toi_bio_free_unused_storage_chain(this);
+    this = this->next;
+  }
+}
+
+int toi_bio_allocate_storage(unsigned long request)
+{
+  struct toi_bdev_info *chain = prio_chain_head;
+  unsigned long to_get = request;
+  unsigned long extra_pages, needed;
+  int no_free = 0;
+
+  if (!chain) {
+    printk("TuxOnIce: No storage was registered.\n");
+    return 0;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+      "Request is %lu pages.", request);
+  extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
+        + sizeof(int)), PAGE_SIZE);
+  needed = request + extra_pages + header_pages_reserved;
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu "
+      "for header => %lu.",
+      extra_pages, header_pages_reserved, needed);
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Already allocated %lu pages.",
+      raw_pages_allocd);
+
+  to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0;
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get);
+
+  if (!to_get)
+    return apply_header_reservation();
+
+  while (to_get && chain) {
+    int num_group = devices_of_same_priority(chain);
+    int divisor = num_group - no_free;
+    int i;
+    unsigned long portion = DIV_ROUND_UP(to_get, divisor);
+    unsigned long got = 0;
+    unsigned long got_this_round = 0;
+    struct toi_bdev_info *top = chain;
+
+    toi_message(TOI_BIO, TOI_VERBOSE, 0,
+        " Start of loop. To get is %lu. Divisor is %d.",
+        to_get, divisor);
+    no_free = 0;
+
+    /*
+     * We're aiming to spread the allocated storage as evenly
+     * as possible, but we also want to get all the storage we
+     * can off this priority.
+     */
+    for (i = 0; i < num_group; i++) {
+      struct toi_bio_allocator_ops *ops =
+        chain->allocator->bio_allocator_ops;
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          " Asking for %lu pages from chain %p.",
+          portion, chain);
+      got = ops->allocate_storage(chain, portion);
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          " Got %lu pages from allocator %p.",
+          got, chain);
+      if (!got)
+        no_free++;
+      got_this_round += got;
+      chain = chain->next;
+    }
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, " Loop finished. Got a "
+        "total of %lu pages from %d allocators.",
+        got_this_round, divisor - no_free);
+
+    raw_pages_allocd += got_this_round;
+    to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd :
+      0;
+
+    /*
+     * If we got anything from chains of this priority and we
+     * still have storage to allocate, go over this priority
+     * again.
+     */
+    if (got_this_round && to_get)
+      chain = top;
+    else
+      no_free = 0;
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Finished allocating. Calling "
+      "get_main_pool_phys_params");
+  /* Now let swap allocator bmap the pages */
+  get_main_pool_phys_params();
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done. Reserving header.");
+  return apply_header_reservation();
+}
+
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd)
+{
+  int i = 0;
+  struct toi_bdev_info *cur_chain = prio_chain_head;
+
+  while (cur_chain) {
+    cur_chain->pages_used = bkd->pages_used[i];
+    cur_chain = cur_chain->next;
+    i++;
+  }
+}
+
+int toi_bio_chains_debug_info(char *buffer, int size)
+{
+  /* Show what we actually used */
+  struct toi_bdev_info *cur_chain = prio_chain_head;
+  int len = 0;
+
+  while (cur_chain) {
+    len += scnprintf(buffer + len, size - len, "  Used %lu pages "
+        "from %s.\n", cur_chain->pages_used,
+        cur_chain->name);
+    cur_chain = cur_chain->next;
+  }
+
+  return len;
+}
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+  struct toi_bdev_info *this = toi_writer_posn.current_chain,
+                       *cmp = prio_chain_head;
+
+  ptr->save.chain = 1;
+  while (this != cmp) {
+    ptr->save.chain++;
+    cmp = cmp->next;
+  }
+  ptr->save.block = this->blocks.current_offset;
+
+  /* Save the raw info internally for quicker access when updating pointers */
+  ptr->bdev = this->bdev;
+  ptr->block = this->blocks.current_offset << this->bmap_shift;
+}
+
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+  int i = ptr->save.chain - 1;
+  struct toi_bdev_info *this;
+  struct hibernate_extent *hib;
+
+  /* Find chain by stored index */
+  this = prio_chain_head;
+  while (i) {
+    this = this->next;
+    i--;
+  }
+  toi_writer_posn.current_chain = this;
+
+  /* Restore block */
+  this->blocks.current_offset = ptr->save.block;
+
+  /* Find current offset from block number */
+  hib = this->blocks.first;
+
+  while (hib->start > ptr->save.block) {
+    hib = hib->next;
+  }
+
+  this->blocks.last_touched = this->blocks.current_extent = hib;
+}
diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c
new file mode 100644
index 000000000..2dc6ed127
--- /dev/null
+++ b/kernel/power/tuxonice_bio_core.c
@@ -0,0 +1,1944 @@
+/*
+ * kernel/power/tuxonice_bio.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains block io functions for TuxOnIce. These are
+ * used by the swapwriter and it is planned that they will also
+ * be used by the NFSwriter.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/syscalls.h>
+#include <linux/suspend.h>
+#include <linux/ctype.h>
+#include <linux/mount.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+#define MEMORY_ONLY 1
+#define THROTTLE_WAIT 2
+
+/* #define MEASURE_MUTEX_CONTENTION */
+#ifndef MEASURE_MUTEX_CONTENTION
+#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
+#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
+#else
+unsigned long mutex_times[2][2][NR_CPUS];
+#define my_mutex_lock(index, the_lock) do { \
+  int have_mutex; \
+  have_mutex = mutex_trylock(the_lock); \
+  if (!have_mutex) { \
+    mutex_lock(the_lock); \
+    mutex_times[index][0][smp_processor_id()]++; \
+  } else { \
+    mutex_times[index][1][smp_processor_id()]++; \
+  }
+
+#define my_mutex_unlock(index, the_lock) \
+  mutex_unlock(the_lock); \
+} while (0)
+#endif
+
+static int page_idx, reset_idx;
+
+static int target_outstanding_io = 1024;
+static int max_outstanding_writes, max_outstanding_reads;
+
+static struct page *bio_queue_head, *bio_queue_tail;
+static atomic_t toi_bio_queue_size;
+static DEFINE_SPINLOCK(bio_queue_lock);
+
+static int free_mem_throttle, throughput_throttle;
+int more_readahead = 1;
+static struct page *readahead_list_head, *readahead_list_tail;
+
+static struct page *waiting_on;
+
+static atomic_t toi_io_in_progress, toi_io_done;
+static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
+
+int current_stream;
+/* Not static, so that the allocators can setup and complete
+ * writing the header */
+char *toi_writer_buffer;
+int toi_writer_buffer_posn;
+
+static DEFINE_MUTEX(toi_bio_mutex);
+static DEFINE_MUTEX(toi_bio_readahead_mutex);
+
+static struct task_struct *toi_queue_flusher;
+static int toi_bio_queue_flush_pages(int dedicated_thread);
+
+struct toi_module_ops toi_blockwriter_ops;
+
+struct toi_incremental_image_pointer toi_inc_ptr[2][2];
+
+#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
+    atomic_read(&toi_bio_queue_size))
+
+unsigned long raw_pages_allocd, header_pages_reserved;
+
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+    int no_readahead);
+
+/**
+ * set_free_mem_throttle - set the point where we pause to avoid oom.
+ *
+ * Initially, this value is zero, but when we first fail to allocate memory,
+ * we set it (plus a buffer) and thereafter throttle i/o once that limit is
+ * reached.
+ **/
+static void set_free_mem_throttle(void)
+{
+  int new_throttle = nr_free_buffer_pages() + 256;
+
+  if (new_throttle > free_mem_throttle)
+    free_mem_throttle = new_throttle;
+}
+
+#define NUM_REASONS 7
+static atomic_t reasons[NUM_REASONS];
+static char *reason_name[NUM_REASONS] = {
+  "readahead not ready",
+  "bio allocation",
+  "synchronous I/O",
+  "toi_bio_get_new_page",
+  "memory low",
+  "readahead buffer allocation",
+  "throughput_throttle",
+};
+
+/* User Specified Parameters. */
+unsigned long resume_firstblock;
+dev_t resume_dev_t;
+struct block_device *resume_block_device;
+static atomic_t resume_bdev_open_count;
+
+struct block_device *header_block_device;
+
+/**
+ * toi_open_bdev: Open a bdev at resume time.
+ *
+ * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
+ * (the user can have resume= pointing at a swap partition/file that isn't
+ * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
+ * header. It will be from a swap partition that was enabled when we hibernated,
+ * but we don't know it's real index until we read that first page.
+ * dev_t: The device major/minor.
+ * display_errs: Whether to try to do this quietly.
+ *
+ * We stored a dev_t in the image header. Open the matching device without
+ * requiring /dev/<whatever> in most cases and record the details needed
+ * to close it later and avoid duplicating work.
+ */
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+    int display_errs)
+{
+  struct block_device *bdev;
+  dev_t device = default_device;
+  char buf[32];
+  int retried = 0;
+
+retry:
+  if (uuid) {
+    struct fs_info seek;
+    strncpy((char *) &seek.uuid, uuid, 16);
+    seek.dev_t = 0;
+    seek.last_mount_size = 0;
+    device = blk_lookup_fs_info(&seek);
+    if (!device) {
+      device = default_device;
+      printk(KERN_DEBUG "Unable to resolve uuid. Falling back"
+          " to dev_t.\n");
+    } else
+      printk(KERN_DEBUG "Resolved uuid to device %s.\n",
+          format_dev_t(buf, device));
+  }
+
+  if (!device) {
+    printk(KERN_ERR "TuxOnIce attempting to open a "
+        "blank dev_t!\n");
+    dump_stack();
+    return NULL;
+  }
+  bdev = toi_open_by_devnum(device);
+
+  if (IS_ERR(bdev) || !bdev) {
+    if (!retried) {
+      retried = 1;
+      wait_for_device_probe();
+      goto retry;
+    }
+    if (display_errs)
+      toi_early_boot_message(1, TOI_CONTINUE_REQ,
+          "Failed to get access to block device "
+          "\"%x\" (error %d).\n Maybe you need "
+          "to run mknod and/or lvmsetup in an "
+          "initrd/ramfs?", device, bdev);
+    return ERR_PTR(-EINVAL);
+  }
+  toi_message(TOI_BIO, TOI_VERBOSE, 0,
+      "TuxOnIce got bdev %p for dev_t %x.",
+      bdev, device);
+
+  return bdev;
+}
+
+static void toi_bio_reserve_header_space(unsigned long request)
+{
+  header_pages_reserved = request;
+}
+
+/**
+ * do_bio_wait - wait for some TuxOnIce I/O to complete
+ * @reason: The array index of the reason we're waiting.
+ *
+ * Wait for a particular page of I/O if we're after a particular page.
+ * If we're not after a particular page, wait instead for all in flight
+ * I/O to be completed or for us to have enough free memory to be able
+ * to submit more I/O.
+ *
+ * If we wait, we also update our statistics regarding why we waited.
+ **/
+static void do_bio_wait(int reason)
+{
+  struct page *was_waiting_on = waiting_on;
+
+  /* On SMP, waiting_on can be reset, so we make a copy */
+  if (was_waiting_on) {
+    wait_on_page_locked(was_waiting_on);
+    atomic_inc(&reasons[reason]);
+  } else {
+    atomic_inc(&reasons[reason]);
+
+    wait_event(num_in_progress_wait,
+        !atomic_read(&toi_io_in_progress) ||
+        nr_free_buffer_pages() > free_mem_throttle);
+  }
+}
+
+/**
+ * throttle_if_needed - wait for I/O completion if throttle points are reached
+ * @flags: What to check and how to act.
+ *
+ * Check whether we need to wait for some I/O to complete. We always check
+ * whether we have enough memory available, but may also (depending upon
+ * @reason) check if the throughput throttle limit has been reached.
+ **/
+static int throttle_if_needed(int flags)
+{
+  int free_pages = nr_free_buffer_pages();
+
+  /* Getting low on memory and I/O is in progress? */
+  while (unlikely(free_pages < free_mem_throttle) &&
+      atomic_read(&toi_io_in_progress) &&
+      !test_result_state(TOI_ABORTED)) {
+    if (!(flags & THROTTLE_WAIT))
+      return -ENOMEM;
+    do_bio_wait(4);
+    free_pages = nr_free_buffer_pages();
+  }
+
+  while (!(flags & MEMORY_ONLY) && throughput_throttle &&
+      TOTAL_OUTSTANDING_IO >= throughput_throttle &&
+      !test_result_state(TOI_ABORTED)) {
+    int result = toi_bio_queue_flush_pages(0);
+    if (result)
+      return result;
+    atomic_inc(&reasons[6]);
+    wait_event(num_in_progress_wait,
+        !atomic_read(&toi_io_in_progress) ||
+        TOTAL_OUTSTANDING_IO < throughput_throttle);
+  }
+
+  return 0;
+}
+
+/**
+ * update_throughput_throttle - update the raw throughput throttle
+ * @jif_index: The number of times this function has been called.
+ *
+ * This function is called four times per second by the core, and used to limit
+ * the amount of I/O we submit at once, spreading out our waiting through the
+ * whole job and letting userui get an opportunity to do its work.
+ *
+ * We don't start limiting I/O until 1/4s has gone so that we get a
+ * decent sample for our initial limit, and keep updating it because
+ * throughput may vary (on rotating media, eg) with our block number.
+ *
+ * We throttle to 1/10s worth of I/O.
+ **/
+static void update_throughput_throttle(int jif_index)
+{
+  int done = atomic_read(&toi_io_done);
+  throughput_throttle = done * 2 / 5 / jif_index;
+}
+
+/**
+ * toi_finish_all_io - wait for all outstanding i/o to complete
+ *
+ * Flush any queued but unsubmitted I/O and wait for it all to complete.
+ **/
+static int toi_finish_all_io(void)
+{
+  int result = toi_bio_queue_flush_pages(0);
+  toi_bio_queue_flusher_should_finish = 1;
+  wake_up(&toi_io_queue_flusher);
+  wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
+  return result;
+}
+
+/**
+ * toi_end_bio - bio completion function.
+ * @bio: bio that has completed.
+ *
+ * Function called by the block driver from interrupt context when I/O is
+ * completed. If we were writing the page, we want to free it and will have
+ * set bio->bi_private to the parameter we should use in telling the page
+ * allocation accounting code what the page was allocated for. If we're
+ * reading the page, it will be in the singly linked list made from
+ * page->private pointers.
+ **/
+static void toi_end_bio(struct bio *bio)
+{
+  struct page *page = bio->bi_io_vec[0].bv_page;
+
+  BUG_ON(bio->bi_status == BLK_STS_IOERR);
+
+  unlock_page(page);
+  bio_put(bio);
+
+  if (waiting_on == page)
+    waiting_on = NULL;
+
+  put_page(page);
+
+  if (bio->bi_private)
+    toi__free_page((int) ((unsigned long) bio->bi_private) , page);
+
+  bio_put(bio);
+
+  atomic_dec(&toi_io_in_progress);
+  atomic_inc(&toi_io_done);
+
+  wake_up(&num_in_progress_wait);
+}
+
+/**
+ * submit - submit BIO request
+ * @writing: READ or WRITE.
+ * @dev: The block device we're using.
+ * @first_block: The first sector we're using.
+ * @page: The page being used for I/O.
+ * @free_group: If writing, the group that was used in allocating the page
+ *         and which will be used in freeing the page from the completion
+ *         routine.
+ *
+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
+ * textbook - allocate and initialize the bio. If we're writing, make sure
+ * the page is marked as dirty. Then submit it and carry on."
+ *
+ * If we're just testing the speed of our own code, we fake having done all
+ * the hard work and all toi_end_bio immediately.
+ **/
+static int submit(int writing, struct block_device *dev, sector_t first_block,
+    struct page *page, int free_group)
+{
+  struct bio *bio = NULL;
+  int cur_outstanding_io, result;
+  unsigned int op;
+
+  /*
+   * Shouldn't throttle if reading - can deadlock in the single
+   * threaded case as pages are only freed when we use the
+   * readahead.
+   */
+  if (writing) {
+    result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
+    if (result)
+      return result;
+  }
+
+  while (!bio) {
+    bio = bio_alloc(TOI_ATOMIC_GFP, 1);
+    if (!bio) {
+      set_free_mem_throttle();
+      do_bio_wait(1);
+    }
+  }
+
+  bio_set_dev(bio, dev);
+  bio->bi_iter.bi_sector = first_block;
+  bio->bi_private = (void *) ((unsigned long) free_group);
+  bio->bi_end_io = toi_end_bio;
+  bio_set_flag(bio, BIO_TOI);
+
+  op = writing ? REQ_OP_WRITE : REQ_OP_READ;
+  // op needs to be an unsigned int to avoid the warning triggering.
+  bio_set_op_attrs(bio, op, REQ_SYNC);
+
+  if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+    printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n",
+        (unsigned long long) first_block);
+    bio_put(bio);
+    return -EFAULT;
+  }
+
+  bio_get(bio);
+
+  cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
+  if (writing) {
+    if (cur_outstanding_io > max_outstanding_writes)
+      max_outstanding_writes = cur_outstanding_io;
+  } else {
+    if (cur_outstanding_io > max_outstanding_reads)
+      max_outstanding_reads = cur_outstanding_io;
+  }
+
+  /* Still read the header! */
+  if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) {
+    /* Fake having done the hard work */
+    bio->bi_status = BLK_STS_OK;
+    toi_end_bio(bio);
+  } else {
+    submit_bio(bio);
+  }
+
+  return 0;
+}
+
+/**
+ * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
+ *
+ * @writing: Whether reading or writing.
+ * @bdev: The block device which we're using.
+ * @block0: The first sector we're reading or writing.
+ * @page: The page on which I/O is being done.
+ * @readahead_index: If doing readahead, the index (reset this flag when done).
+ * @syncio: Whether the i/o is being done synchronously.
+ *
+ * Prepare and start a read or write operation.
+ *
+ * Note that we always work with our own page. If writing, we might be given a
+ * compression buffer that will immediately be used to start compressing the
+ * next page. For reading, we do readahead and therefore don't know the final
+ * address where the data needs to go.
+ **/
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+    struct page *page, int is_readahead, int syncio, int free_group)
+{
+  page->private = 0;
+
+  /* Do here so we don't race against toi_bio_get_next_page_read */
+  lock_page(page);
+
+  if (is_readahead) {
+    if (readahead_list_head)
+      readahead_list_tail->private = (unsigned long) page;
+    else
+      readahead_list_head = page;
+
+    readahead_list_tail = page;
+  }
+
+  /* Done before submitting to avoid races. */
+  if (syncio)
+    waiting_on = page;
+
+  /* Submit the page */
+  get_page(page);
+
+  if (submit(writing, bdev, block0, page, free_group))
+    return -EFAULT;
+
+  if (syncio)
+    do_bio_wait(2);
+
+  return 0;
+}
+
+/**
+ * toi_bdev_page_io - simpler interface to do directly i/o on a single page
+ * @writing: Whether reading or writing.
+ * @bdev: Block device on which we're operating.
+ * @pos: Sector at which page to read or write starts.
+ * @page: Page to be read/written.
+ *
+ * A simple interface to submit a page of I/O and wait for its completion.
+ * The caller must free the page used.
+ **/
+static int toi_bdev_page_io(int writing, struct block_device *bdev,
+    long pos, struct page *page)
+{
+  return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
+}
+
+/**
+ * toi_bio_memory_needed - report the amount of memory needed for block i/o
+ *
+ * We want to have at least enough memory so as to have target_outstanding_io
+ * or more transactions on the fly at once. If we can do more, fine.
+ **/
+static int toi_bio_memory_needed(void)
+{
+  return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
+      sizeof(struct bio));
+}
+
+/**
+ * toi_bio_print_debug_stats - put out debugging info in the buffer provided
+ * @buffer: A buffer of size @size into which text should be placed.
+ * @size: The size of @buffer.
+ *
+ * Fill a buffer with debugging info. This is used for both our debug_info sysfs
+ * entry and for recording the same info in dmesg.
+ **/
+static int toi_bio_print_debug_stats(char *buffer, int size)
+{
+  int len = 0;
+
+  if (toiActiveAllocator != &toi_blockwriter_ops) {
+    len = scnprintf(buffer, size,
+        "- Block I/O inactive.\n");
+    return len;
+  }
+
+  len = scnprintf(buffer, size, "- Block I/O active.\n");
+
+  len += toi_bio_chains_debug_info(buffer + len, size - len);
+
+  len += scnprintf(buffer + len, size - len,
+      "- Max outstanding reads %d. Max writes %d.\n",
+      max_outstanding_reads, max_outstanding_writes);
+
+  len += scnprintf(buffer + len, size - len,
+      "  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
+      target_outstanding_io,
+      PAGE_SIZE, (unsigned int) sizeof(struct request),
+      (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
+
+#ifdef MEASURE_MUTEX_CONTENTION
+  {
+    int i;
+
+    len += scnprintf(buffer + len, size - len,
+        "  Mutex contention while reading:\n  Contended      Free\n");
+
+    for_each_online_cpu(i)
+      len += scnprintf(buffer + len, size - len,
+          "  %9lu %9lu\n",
+          mutex_times[0][0][i], mutex_times[0][1][i]);
+
+    len += scnprintf(buffer + len, size - len,
+        "  Mutex contention while writing:\n  Contended      Free\n");
+
+    for_each_online_cpu(i)
+      len += scnprintf(buffer + len, size - len,
+          "  %9lu %9lu\n",
+          mutex_times[1][0][i], mutex_times[1][1][i]);
+
+  }
+#endif
+
+  return len + scnprintf(buffer + len, size - len,
+      "  Free mem throttle point reached %d.\n", free_mem_throttle);
+}
+
+static int total_header_bytes;
+static int unowned;
+
+void debug_broken_header(void)
+{
+  printk(KERN_DEBUG "Image header too big for size allocated!\n");
+  print_toi_header_storage_for_modules();
+  printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed());
+  printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header));
+  printk(KERN_DEBUG "Total unowned : %d.\n", unowned);
+  printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes,
+      DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
+  printk(KERN_DEBUG "Space needed now : %ld.\n",
+      get_header_storage_needed(0));
+  dump_block_chains();
+  abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
+}
+
+static int toi_bio_update_previous_inc_img_ptr(int stream)
+{
+  int result;
+  char * buffer = (char *) toi_get_zeroed_page(12, TOI_ATOMIC_GFP);
+  struct page *page;
+  struct toi_incremental_image_pointer *prev, *this;
+
+  prev = &toi_inc_ptr[stream][0];
+  this = &toi_inc_ptr[stream][1];
+
+  if (!buffer) {
+    // We're at the start of writing a pageset. Memory should not be that scarce.
+    return -ENOMEM;
+  }
+
+  page = virt_to_page(buffer);
+  result = toi_do_io(READ, prev->bdev, prev->block, page, 0, 1, 0);
+
+  if (result)
+    goto out;
+
+  memcpy(buffer, (char *) this, sizeof(this->save));
+
+  result = toi_do_io(WRITE, prev->bdev, prev->block, page, 0, 0, 12);
+
+  // If the IO is successfully submitted (!result), the page will be freed
+  // asynchronously on completion.
+out:
+  if (result)
+    toi__free_page(12, virt_to_page(buffer));
+  return result;
+}
+
+/**
+ * toi_rw_init_incremental - incremental image part of setting up to write new section
+ */
+static int toi_write_init_incremental(int stream)
+{
+  int result = 0;
+
+  // Remember the location of this block so we can link to it.
+  toi_bio_store_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+  // Update the pointer at the start of the last pageset with the same stream number.
+  result = toi_bio_update_previous_inc_img_ptr(stream);
+  if (result)
+    return result;
+
+  // Move the current to the previous slot.
+  memcpy(&toi_inc_ptr[stream][0], &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]));
+
+  // Store a blank pointer at the start of this incremental pageset
+  memset(&toi_inc_ptr[stream][1], 0, sizeof(toi_inc_ptr[stream][1]));
+  result = toi_rw_buffer(WRITE, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+  if (result)
+    return result;
+
+  // Serialise extent chains if this is an incremental pageset
+  return toi_serialise_extent_chains();
+}
+
+/**
+ * toi_read_init_incremental - incremental image part of setting up to read new section
+ */
+static int toi_read_init_incremental(int stream)
+{
+  int result;
+
+  // Set our position to the start of the next pageset
+  toi_bio_restore_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+  // Read the start of the next incremental pageset (if any)
+  result = toi_rw_buffer(READ, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+
+  if (!result)
+    result = toi_load_extent_chains();
+
+  return result;
+}
+
+/**
+ * toi_rw_init - prepare to read or write a stream in the image
+ * @writing: Whether reading or writing.
+ * @stream number: Section of the image being processed.
+ *
+ * Prepare to read or write a section ('stream') in the image.
+ **/
+static int toi_rw_init(int writing, int stream_number)
+{
+  if (stream_number)
+    toi_extent_state_restore(stream_number);
+  else
+    toi_extent_state_goto_start();
+
+  if (writing) {
+    reset_idx = 0;
+    if (!current_stream)
+      page_idx = 0;
+  } else {
+    reset_idx = 1;
+  }
+
+  atomic_set(&toi_io_done, 0);
+  if (!toi_writer_buffer)
+    toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+        TOI_ATOMIC_GFP);
+  toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
+
+  current_stream = stream_number;
+
+  more_readahead = 1;
+
+  if (test_result_state(TOI_KEPT_IMAGE)) {
+    int result;
+
+    if (writing) {
+      result = toi_write_init_incremental(stream_number);
+    } else {
+      result = toi_read_init_incremental(stream_number);
+    }
+
+    if (result)
+      return result;
+  }
+
+  return toi_writer_buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * toi_bio_queue_write - queue a page for writing
+ * @full_buffer: Pointer to a page to be queued
+ *
+ * Add a page to the queue to be submitted. If we're the queue flusher,
+ * we'll do this once we've dropped toi_bio_mutex, so other threads can
+ * continue to submit I/O while we're on the slow path doing the actual
+ * submission.
+ **/
+static void toi_bio_queue_write(char **full_buffer)
+{
+  struct page *page = virt_to_page(*full_buffer);
+  unsigned long flags;
+
+  *full_buffer = NULL;
+  page->private = 0;
+
+  spin_lock_irqsave(&bio_queue_lock, flags);
+  if (!bio_queue_head)
+    bio_queue_head = page;
+  else
+    bio_queue_tail->private = (unsigned long) page;
+
+  bio_queue_tail = page;
+  atomic_inc(&toi_bio_queue_size);
+
+  spin_unlock_irqrestore(&bio_queue_lock, flags);
+  wake_up(&toi_io_queue_flusher);
+}
+
+/**
+ * toi_rw_cleanup - Cleanup after i/o.
+ * @writing: Whether we were reading or writing.
+ *
+ * Flush all I/O and clean everything up after reading or writing a
+ * section of the image.
+ **/
+static int toi_rw_cleanup(int writing)
+{
+  int i, result = 0;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_rw_cleanup.");
+  if (writing) {
+    if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
+      toi_bio_queue_write(&toi_writer_buffer);
+
+    while (bio_queue_head && !result)
+      result = toi_bio_queue_flush_pages(0);
+
+    if (result)
+      return result;
+
+    if (current_stream == 2)
+      toi_extent_state_save(1);
+    else if (current_stream == 1)
+      toi_extent_state_save(3);
+  }
+
+  result = toi_finish_all_io();
+
+  while (readahead_list_head) {
+    void *next = (void *) readahead_list_head->private;
+    toi__free_page(12, readahead_list_head);
+    readahead_list_head = next;
+  }
+
+  readahead_list_tail = NULL;
+
+  if (!current_stream)
+    return result;
+
+  for (i = 0; i < NUM_REASONS; i++) {
+    if (!atomic_read(&reasons[i]))
+      continue;
+    printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n",
+        reason_name[i], atomic_read(&reasons[i]));
+    atomic_set(&reasons[i], 0);
+  }
+
+  current_stream = 0;
+  return result;
+}
+
+/**
+ * toi_start_one_readahead - start one page of readahead
+ * @dedicated_thread: Is this a thread dedicated to doing readahead?
+ *
+ * Start one new page of readahead. If this is being called by a thread
+ * whose only just is to submit readahead, don't quit because we failed
+ * to allocate a page.
+ **/
+static int toi_start_one_readahead(int dedicated_thread)
+{
+  char *buffer = NULL;
+  int oom = 0, result;
+
+  result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
+  if (result) {
+    printk("toi_start_one_readahead: throttle_if_needed returned %d.\n", result);
+    return result;
+  }
+
+  mutex_lock(&toi_bio_readahead_mutex);
+
+  while (!buffer) {
+    buffer = (char *) toi_get_zeroed_page(12,
+        TOI_ATOMIC_GFP);
+    if (!buffer) {
+      if (oom && !dedicated_thread) {
+        mutex_unlock(&toi_bio_readahead_mutex);
+        printk("toi_start_one_readahead: oom and !dedicated thread %d.\n", result);
+        return -ENOMEM;
+      }
+
+      oom = 1;
+      set_free_mem_throttle();
+      do_bio_wait(5);
+    }
+  }
+
+  result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
+  if (result) {
+    printk("toi_start_one_readahead: toi_bio_rw_page returned %d.\n", result);
+  }
+  if (result == -ENOSPC)
+    toi__free_page(12, virt_to_page(buffer));
+  mutex_unlock(&toi_bio_readahead_mutex);
+  if (result) {
+    if (result == -ENOSPC)
+      toi_message(TOI_BIO, TOI_VERBOSE, 0,
+          "Last readahead page submitted.");
+    else
+      printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n",
+          result);
+  }
+  return result;
+}
+
+/**
+ * toi_start_new_readahead - start new readahead
+ * @dedicated_thread: Are we dedicated to this task?
+ *
+ * Start readahead of image pages.
+ *
+ * We can be called as a thread dedicated to this task (may be helpful on
+ * systems with lots of CPUs), in which case we don't exit until there's no
+ * more readahead.
+ *
+ * If this is not called by a dedicated thread, we top up our queue until
+ * there's no more readahead to submit, we've submitted the number given
+ * in target_outstanding_io or the number in progress exceeds the target
+ * outstanding I/O value.
+ *
+ * No mutex needed because this is only ever called by the first cpu.
+ **/
+static int toi_start_new_readahead(int dedicated_thread)
+{
+  int last_result, num_submitted = 0;
+
+  /* Start a new readahead? */
+  if (!more_readahead)
+    return 0;
+
+  do {
+    last_result = toi_start_one_readahead(dedicated_thread);
+
+    if (last_result) {
+      if (last_result == -ENOMEM || last_result == -ENOSPC)
+        return 0;
+
+      printk(KERN_DEBUG
+          "Begin read chunk returned %d.\n",
+          last_result);
+    } else
+      num_submitted++;
+
+  } while (more_readahead && !last_result &&
+      (dedicated_thread ||
+       (num_submitted < target_outstanding_io &&
+        atomic_read(&toi_io_in_progress) < target_outstanding_io)));
+
+  return last_result;
+}
+
+/**
+ * bio_io_flusher - start the dedicated I/O flushing routine
+ * @writing: Whether we're writing the image.
+ **/
+static int bio_io_flusher(int writing)
+{
+
+  if (writing)
+    return toi_bio_queue_flush_pages(1);
+  else
+    return toi_start_new_readahead(1);
+}
+
+/**
+ * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
+ * @no_readahead: Whether we can use readahead
+ *
+ * Read a page from disk, submitting readahead and cleaning up finished i/o
+ * while we wait for the page we're after.
+ **/
+static int toi_bio_get_next_page_read(int no_readahead)
+{
+  char *virt;
+  struct page *old_readahead_list_head;
+
+  /*
+   * When reading the second page of the header, we have to
+   * delay submitting the read until after we've gotten the
+   * extents out of the first page.
+   */
+  if (unlikely(no_readahead)) {
+    int result = toi_start_one_readahead(0);
+    if (result) {
+      printk(KERN_EMERG "No readahead and toi_start_one_readahead "
+          "returned non-zero.\n");
+      return -EIO;
+    }
+  }
+
+  if (unlikely(!readahead_list_head)) {
+    /*
+     * If the last page finishes exactly on the page
+     * boundary, we will be called one extra time and
+     * have no data to return. In this case, we should
+     * not BUG(), like we used to!
+     */
+    if (!more_readahead) {
+      printk(KERN_EMERG "No more readahead.\n");
+      return -ENOSPC;
+    }
+    if (unlikely(toi_start_one_readahead(0))) {
+      printk(KERN_EMERG "No readahead and "
+          "toi_start_one_readahead returned non-zero.\n");
+      return -EIO;
+    }
+  }
+
+  if (PageLocked(readahead_list_head)) {
+    waiting_on = readahead_list_head;
+    do_bio_wait(0);
+  }
+
+  virt = page_address(readahead_list_head);
+  memcpy(toi_writer_buffer, virt, PAGE_SIZE);
+
+  mutex_lock(&toi_bio_readahead_mutex);
+  old_readahead_list_head = readahead_list_head;
+  readahead_list_head = (struct page *) readahead_list_head->private;
+  mutex_unlock(&toi_bio_readahead_mutex);
+  toi__free_page(12, old_readahead_list_head);
+  return 0;
+}
+
+/**
+ * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
+ * @dedicated_thread: Whether we're a dedicated thread
+ *
+ * Flush the queue of pages ready to be written to disk.
+ *
+ * If we're a dedicated thread, stay in here until told to leave,
+ * sleeping in wait_event.
+ *
+ * The first thread is normally the only one to come in here. Another
+ * thread can enter this routine too, though, via throttle_if_needed.
+ * Since that's the case, we must be careful to only have one thread
+ * doing this work at a time. Otherwise we have a race and could save
+ * pages out of order.
+ *
+ * If an error occurs, free all remaining pages without submitting them
+ * for I/O.
+ **/
+
+int toi_bio_queue_flush_pages(int dedicated_thread)
+{
+  unsigned long flags;
+  int result = 0;
+  static DEFINE_MUTEX(busy);
+
+  if (!mutex_trylock(&busy))
+    return 0;
+
+top:
+  spin_lock_irqsave(&bio_queue_lock, flags);
+  while (bio_queue_head) {
+    struct page *page = bio_queue_head;
+    bio_queue_head = (struct page *) page->private;
+    if (bio_queue_tail == page)
+      bio_queue_tail = NULL;
+    atomic_dec(&toi_bio_queue_size);
+    spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+    /* Don't generate more error messages if already had one */
+    if (!result)
+      result = toi_bio_rw_page(WRITE, page, 0, 11);
+    /*
+     * If writing the page failed, don't drop out.
+     * Flush the rest of the queue too.
+     */
+    if (result)
+      toi__free_page(11 , page);
+    spin_lock_irqsave(&bio_queue_lock, flags);
+  }
+  spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+  if (dedicated_thread) {
+    wait_event(toi_io_queue_flusher, bio_queue_head ||
+        toi_bio_queue_flusher_should_finish);
+    if (likely(!toi_bio_queue_flusher_should_finish))
+      goto top;
+    toi_bio_queue_flusher_should_finish = 0;
+  }
+
+  mutex_unlock(&busy);
+  return result;
+}
+
+/**
+ * toi_bio_get_new_page - get a new page for I/O
+ * @full_buffer: Pointer to a page to allocate.
+ **/
+static int toi_bio_get_new_page(char **full_buffer)
+{
+  int result = throttle_if_needed(THROTTLE_WAIT);
+  if (result)
+    return result;
+
+  while (!*full_buffer) {
+    *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+    if (!*full_buffer) {
+      set_free_mem_throttle();
+      do_bio_wait(3);
+    }
+  }
+
+  return 0;
+}
+
+/**
+ * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
+ * @writing:                Bool - whether writing (or reading).
+ * @buffer:                The start of the buffer to write or fill.
+ * @buffer_size:        The size of the buffer to write or fill.
+ * @no_readahead:        Don't try to start readhead (when getting extents).
+ **/
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+    int no_readahead)
+{
+  int bytes_left = buffer_size, result = 0;
+
+  while (bytes_left) {
+    char *source_start = buffer + buffer_size - bytes_left;
+    char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
+    int capacity = PAGE_SIZE - toi_writer_buffer_posn;
+    char *to = writing ? dest_start : source_start;
+    char *from = writing ? source_start : dest_start;
+
+    if (bytes_left <= capacity) {
+      memcpy(to, from, bytes_left);
+      toi_writer_buffer_posn += bytes_left;
+      return 0;
+    }
+
+    /* Complete this page and start a new one */
+    memcpy(to, from, capacity);
+    bytes_left -= capacity;
+
+    if (!writing) {
+      /*
+       * Perform actual I/O:
+       * read readahead_list_head into toi_writer_buffer
+       */
+      int result = toi_bio_get_next_page_read(no_readahead);
+      if (result && bytes_left) {
+        printk("toi_bio_get_next_page_read "
+            "returned %d. Expecting to read %d bytes.\n", result, bytes_left);
+        return result;
+      }
+    } else {
+      toi_bio_queue_write(&toi_writer_buffer);
+      result = toi_bio_get_new_page(&toi_writer_buffer);
+      if (result) {
+        printk(KERN_ERR "toi_bio_get_new_page returned "
+            "%d.\n", result);
+        return result;
+      }
+    }
+
+    toi_writer_buffer_posn = 0;
+    toi_cond_pause(0, NULL);
+  }
+
+  return 0;
+}
+
+/**
+ * toi_bio_read_page - read a page of the image
+ * @pfn:                The pfn where the data belongs.
+ * @buffer_page:        The page containing the (possibly compressed) data.
+ * @buf_size:                The number of bytes on @buffer_page used (PAGE_SIZE).
+ *
+ * Read a (possibly compressed) page from the image, into buffer_page,
+ * returning its pfn and the buffer size.
+ **/
+static int toi_bio_read_page(unsigned long *pfn, int buf_type,
+    void *buffer_page, unsigned int *buf_size)
+{
+  int result = 0;
+  int this_idx;
+  char *buffer_virt = TOI_MAP(buf_type, buffer_page);
+
+  /*
+   * Only call start_new_readahead if we don't have a dedicated thread
+   * and we're the queue flusher.
+   */
+  if (current == toi_queue_flusher && more_readahead &&
+      !test_action_state(TOI_NO_READAHEAD)) {
+    int result2 = toi_start_new_readahead(0);
+    if (result2) {
+      printk(KERN_DEBUG "Queue flusher and "
+          "toi_start_one_readahead returned non-zero.\n");
+      result = -EIO;
+      goto out;
+    }
+  }
+
+  my_mutex_lock(0, &toi_bio_mutex);
+
+  /*
+   * Structure in the image:
+   *        [destination pfn|page size|page data]
+   * buf_size is PAGE_SIZE
+   * We can validly find there's nothing to read in a multithreaded
+   * situation.
+   */
+  if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) ||
+      toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
+      toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
+      toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
+    result = -ENODATA;
+    goto out_unlock;
+  }
+
+  if (reset_idx) {
+    page_idx = this_idx;
+    reset_idx = 0;
+  } else {
+    page_idx++;
+    if (!this_idx)
+      result = -ENODATA;
+    else if (page_idx != this_idx)
+      printk(KERN_ERR "Got page index %d, expected %d.\n",
+          this_idx, page_idx);
+  }
+
+out_unlock:
+  my_mutex_unlock(0, &toi_bio_mutex);
+out:
+  TOI_UNMAP(buf_type, buffer_page);
+  return result;
+}
+
+/**
+ * toi_bio_write_page - write a page of the image
+ * @pfn:                The pfn where the data belongs.
+ * @buffer_page:        The page containing the (possibly compressed) data.
+ * @buf_size:        The number of bytes on @buffer_page used.
+ *
+ * Write a (possibly compressed) page to the image from the buffer, together
+ * with it's index and buffer size.
+ **/
+static int toi_bio_write_page(unsigned long pfn, int buf_type,
+    void *buffer_page, unsigned int buf_size)
+{
+  char *buffer_virt;
+  int result = 0, result2 = 0;
+
+  if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
+    return 0;
+
+  my_mutex_lock(1, &toi_bio_mutex);
+
+  if (test_result_state(TOI_ABORTED)) {
+    my_mutex_unlock(1, &toi_bio_mutex);
+    return 0;
+  }
+
+  buffer_virt = TOI_MAP(buf_type, buffer_page);
+  page_idx++;
+
+  /*
+   * Structure in the image:
+   *        [destination pfn|page size|page data]
+   * buf_size is PAGE_SIZE
+   */
+  if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) ||
+      toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
+      toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
+      toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
+    printk(KERN_DEBUG "toi_rw_buffer returned non-zero to "
+        "toi_bio_write_page.\n");
+    result = -EIO;
+  }
+
+  TOI_UNMAP(buf_type, buffer_page);
+  my_mutex_unlock(1, &toi_bio_mutex);
+
+  if (current == toi_queue_flusher)
+    result2 = toi_bio_queue_flush_pages(0);
+
+  return result ? result : result2;
+}
+
+/**
+ * _toi_rw_header_chunk - read or write a portion of the image header
+ * @writing:                Whether reading or writing.
+ * @owner:                The module for which we're writing.
+ *                        Used for confirming that modules
+ *                        don't use more header space than they asked for.
+ * @buffer:                Address of the data to write.
+ * @buffer_size:        Size of the data buffer.
+ * @no_readahead:        Don't try to start readhead (when getting extents).
+ *
+ * Perform PAGE_SIZE I/O. Start readahead if needed.
+ **/
+static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+    char *buffer, int buffer_size, int no_readahead)
+{
+  int result = 0;
+
+  if (owner) {
+    owner->header_used += buffer_size;
+    toi_message(TOI_HEADER, TOI_LOW, 1,
+        "Header: %s : %d bytes (%d/%d) from offset %d.",
+        owner->name,
+        buffer_size, owner->header_used,
+        owner->header_requested,
+        toi_writer_buffer_posn);
+    if (owner->header_used > owner->header_requested && writing) {
+      printk(KERN_EMERG "TuxOnIce module %s is using more "
+          "header space (%u) than it requested (%u).\n",
+          owner->name,
+          owner->header_used,
+          owner->header_requested);
+      return buffer_size;
+    }
+  } else {
+    unowned += buffer_size;
+    toi_message(TOI_HEADER, TOI_LOW, 1,
+        "Header: (No owner): %d bytes (%d total so far) from "
+        "offset %d.", buffer_size, unowned,
+        toi_writer_buffer_posn);
+  }
+
+  if (!writing && !no_readahead && more_readahead) {
+    result = toi_start_new_readahead(0);
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Start new readahead "
+        "returned %d.", result);
+  }
+
+  if (!result) {
+    result = toi_rw_buffer(writing, buffer, buffer_size,
+        no_readahead);
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "rw_buffer returned "
+        "%d.", result);
+  }
+
+  total_header_bytes += buffer_size;
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning "
+      "%d.", result);
+  return result;
+}
+
+static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+    char *buffer, int size)
+{
+  return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+static int toi_rw_header_chunk_noreadahead(int writing,
+    struct toi_module_ops *owner, char *buffer, int size)
+{
+  return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+/**
+ * toi_bio_storage_needed - get the amount of storage needed for my fns
+ **/
+static int toi_bio_storage_needed(void)
+{
+  return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed();
+}
+
+/**
+ * toi_bio_save_config_info - save block I/O config to image header
+ * @buf:        PAGE_SIZE'd buffer into which data should be saved.
+ **/
+static int toi_bio_save_config_info(char *buf)
+{
+  int *ints = (int *) buf;
+  ints[0] = target_outstanding_io;
+  return sizeof(int);
+}
+
+/**
+ * toi_bio_load_config_info - restore block I/O config
+ * @buf:        Data to be reloaded.
+ * @size:        Size of the buffer saved.
+ **/
+static void toi_bio_load_config_info(char *buf, int size)
+{
+  int *ints = (int *) buf;
+  target_outstanding_io  = ints[0];
+}
+
+void close_resume_dev_t(int force)
+{
+  if (!resume_block_device)
+    return;
+
+  if (force)
+    atomic_set(&resume_bdev_open_count, 0);
+  else
+    atomic_dec(&resume_bdev_open_count);
+
+  if (!atomic_read(&resume_bdev_open_count)) {
+    toi_close_bdev(resume_block_device);
+    resume_block_device = NULL;
+  }
+}
+
+int open_resume_dev_t(int force, int quiet)
+{
+  if (force) {
+    close_resume_dev_t(1);
+    atomic_set(&resume_bdev_open_count, 1);
+  } else
+    atomic_inc(&resume_bdev_open_count);
+
+  if (resume_block_device)
+    return 0;
+
+  resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0);
+  if (IS_ERR(resume_block_device)) {
+    if (!quiet)
+      toi_early_boot_message(1, TOI_CONTINUE_REQ,
+          "Failed to open device %x, where"
+          " the header should be found.",
+          resume_dev_t);
+    resume_block_device = NULL;
+    atomic_set(&resume_bdev_open_count, 0);
+    return 1;
+  }
+
+  return 0;
+}
+
+/**
+ * toi_bio_initialise - initialise bio code at start of some action
+ * @starting_cycle:        Whether starting a hibernation cycle, or just reading or
+ *                        writing a sysfs value.
+ **/
+static int toi_bio_initialise(int starting_cycle)
+{
+  int result;
+
+  if (!starting_cycle || !resume_dev_t)
+    return 0;
+
+  max_outstanding_writes = 0;
+  max_outstanding_reads = 0;
+  current_stream = 0;
+  toi_queue_flusher = current;
+#ifdef MEASURE_MUTEX_CONTENTION
+  {
+    int i, j, k;
+
+    for (i = 0; i < 2; i++)
+      for (j = 0; j < 2; j++)
+        for_each_online_cpu(k)
+          mutex_times[i][j][k] = 0;
+  }
+#endif
+  result = open_resume_dev_t(0, 1);
+
+  if (result)
+    return result;
+
+  result = toi_bio_register_storage();
+
+  if (result)
+    return result;
+
+  return get_signature_page();
+}
+
+static unsigned long raw_to_real(unsigned long raw)
+{
+  unsigned long extra;
+
+  extra = (raw * (sizeof(unsigned long) + sizeof(int)) +
+      (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
+    (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
+
+  return raw > extra ? raw - extra : 0;
+}
+
+static unsigned long toi_bio_storage_available(void)
+{
+  unsigned long sum = 0;
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        this_module->type != BIO_ALLOCATOR_MODULE)
+      continue;
+    toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking storage "
+        "available from %s.", this_module->name);
+    sum += this_module->bio_allocator_ops->storage_available();
+  }
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Total storage available is %lu "
+      "pages (%d header pages).", sum, header_pages_reserved);
+
+  return sum > header_pages_reserved ?
+    raw_to_real(sum - header_pages_reserved) : 0;
+
+}
+
+static unsigned long toi_bio_storage_allocated(void)
+{
+  return raw_pages_allocd > header_pages_reserved ?
+    raw_to_real(raw_pages_allocd - header_pages_reserved) : 0;
+}
+
+/*
+ * If we have read part of the image, we might have filled  memory with
+ * data that should be zeroed out.
+ */
+static void toi_bio_noresume_reset(void)
+{
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_noresume_reset.");
+  toi_rw_cleanup(READ);
+  free_all_bdev_info();
+}
+
+/**
+ * toi_bio_cleanup - cleanup after some action
+ * @finishing_cycle:        Whether completing a cycle.
+ **/
+static void toi_bio_cleanup(int finishing_cycle)
+{
+  if (!finishing_cycle)
+    return;
+
+  if (toi_writer_buffer) {
+    toi_free_page(11, (unsigned long) toi_writer_buffer);
+    toi_writer_buffer = NULL;
+  }
+
+  forget_signature_page();
+
+  if (header_block_device && toi_sig_data &&
+      toi_sig_data->header_dev_t != resume_dev_t)
+    toi_close_bdev(header_block_device);
+
+  header_block_device = NULL;
+
+  close_resume_dev_t(0);
+}
+
+static int toi_bio_write_header_init(void)
+{
+  int result;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_write_header_init");
+  toi_rw_init(WRITE, 0);
+  toi_writer_buffer_posn = 0;
+
+  /* Info needed to bootstrap goes at the start of the header.
+   * First we save the positions and devinfo, including the number
+   * of header pages. Then we save the structs containing data needed
+   * for reading the header pages back.
+   * Note that even if header pages take more than one page, when we
+   * read back the info, we will have restored the location of the
+   * next header page by the time we go to use it.
+   */
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise extent chains.");
+  result = toi_serialise_extent_chains();
+
+  if (result)
+    return result;
+
+  /*
+   * Signature page hasn't been modified at this point. Write it in
+   * the header so we can restore it later.
+   */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise signature page.");
+  return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops,
+      (char *) toi_cur_sig_page,
+      PAGE_SIZE);
+}
+
+static int toi_bio_write_header_cleanup(void)
+{
+  int result = 0;
+
+  if (toi_writer_buffer_posn)
+    toi_bio_queue_write(&toi_writer_buffer);
+
+  result = toi_finish_all_io();
+
+  unowned = 0;
+  total_header_bytes = 0;
+
+  /* Set signature to save we have an image */
+  if (!result)
+    result = toi_bio_mark_have_image();
+
+  return result;
+}
+
+/*
+ * toi_bio_read_header_init()
+ *
+ * Description:
+ * 1. Attempt to read the device specified with resume=.
+ * 2. Check the contents of the swap header for our signature.
+ * 3. Warn, ignore, reset and/or continue as appropriate.
+ * 4. If continuing, read the toi_swap configuration section
+ *    of the header and set up block device info so we can read
+ *    the rest of the header & image.
+ *
+ * Returns:
+ * May not return if user choose to reboot at a warning.
+ * -EINVAL if cannot resume at this time. Booting should continue
+ * normally.
+ */
+
+static int toi_bio_read_header_init(void)
+{
+  int result = 0;
+  char buf[32];
+
+  toi_writer_buffer_posn = 0;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_init");
+
+  if (!toi_sig_data) {
+    printk(KERN_INFO "toi_bio_read_header_init called when we "
+        "haven't verified there is an image!\n");
+    return -EINVAL;
+  }
+
+  /*
+   * If the header is not on the resume_swap_dev_t, get the resume device
+   * first.
+   */
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "Header dev_t is %lx.",
+      toi_sig_data->header_dev_t);
+  if (toi_sig_data->have_uuid) {
+    struct fs_info seek;
+    dev_t device;
+
+    strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16);
+    seek.dev_t = toi_sig_data->header_dev_t;
+    seek.last_mount_size = 0;
+    device = blk_lookup_fs_info(&seek);
+    if (device) {
+      printk("Using dev_t %s, returned by blk_lookup_fs_info.\n",
+          format_dev_t(buf, device));
+      toi_sig_data->header_dev_t = device;
+    }
+  }
+  if (toi_sig_data->header_dev_t != resume_dev_t) {
+    header_block_device = toi_open_bdev(NULL,
+        toi_sig_data->header_dev_t, 1);
+
+    if (IS_ERR(header_block_device))
+      return PTR_ERR(header_block_device);
+  } else
+    header_block_device = resume_block_device;
+
+  if (!toi_writer_buffer)
+    toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+        TOI_ATOMIC_GFP);
+  more_readahead = 1;
+
+  /*
+   * Read toi_swap configuration.
+   * Headerblock size taken into account already.
+   */
+  result = toi_bio_ops.bdev_page_io(READ, header_block_device,
+      toi_sig_data->first_header_block,
+      virt_to_page((unsigned long) toi_writer_buffer));
+  if (result)
+    return result;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "load extent chains.");
+  result = toi_load_extent_chains();
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "load original signature page.");
+  toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+  if (!toi_orig_sig_page) {
+    printk(KERN_ERR "Failed to allocate memory for the current"
+        " image signature.\n");
+    return -ENOMEM;
+  }
+
+  return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops,
+      (char *) toi_orig_sig_page,
+      PAGE_SIZE);
+}
+
+static int toi_bio_read_header_cleanup(void)
+{
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup.");
+  return toi_rw_cleanup(READ);
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+/*
+ * UUID must be 32 chars long. It may have dashes, but nothing
+ * else.
+ */
+char *uuid_from_commandline(char *commandline)
+{
+  int low = 0;
+  char *result = NULL, *output, *ptr;
+
+  if (strncmp(commandline, "UUID=", 5))
+    return NULL;
+
+  result = kzalloc(17, GFP_KERNEL);
+  if (!result) {
+    printk("Failed to kzalloc UUID text memory.\n");
+    return NULL;
+  }
+
+  ptr = commandline + 5;
+  output = result;
+
+  while (*ptr && (output - result) < 16) {
+    if (isxdigit(*ptr)) {
+      int value = isdigit(*ptr) ? *ptr - '0' :
+        TOLOWER(*ptr) - 'a' + 10;
+      if (low) {
+        *output += value;
+        output++;
+      } else {
+        *output = value << 4;
+      }
+      low = !low;
+    } else if (*ptr != '-')
+      break;
+    ptr++;
+  }
+
+  if ((output - result) < 16 || *ptr) {
+    printk(KERN_DEBUG "Found resume=UUID=, but the value looks "
+        "invalid.\n");
+    kfree(result);
+    result = NULL;
+  }
+
+  return result;
+}
+
+#define retry_if_fails(command) \
+  do { \
+    command; \
+    if (!resume_dev_t && !waited_for_device_probe) { \
+      wait_for_device_probe(); \
+      command; \
+      waited_for_device_probe = 1; \
+    } \
+  } while(0)
+
+/**
+ * try_to_open_resume_device: Try to parse and open resume=
+ *
+ * Any "swap:" has been stripped away and we just have the path to deal with.
+ * We attempt to do name_to_dev_t, open and stat the file. Having opened the
+ * file, get the struct block_device * to match.
+ */
+static int try_to_open_resume_device(char *commandline, int quiet)
+{
+  struct kstat stat;
+  int error = 0;
+  char *uuid = uuid_from_commandline(commandline);
+  int waited_for_device_probe = 0;
+
+  resume_dev_t = MKDEV(0, 0);
+
+  if (!strlen(commandline))
+    retry_if_fails(toi_bio_scan_for_image(quiet));
+
+  if (uuid) {
+    struct fs_info seek;
+    strncpy((char *) &seek.uuid, uuid, 16);
+    seek.dev_t = resume_dev_t;
+    seek.last_mount_size = 0;
+    retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek));
+    kfree(uuid);
+  }
+
+  if (!resume_dev_t)
+    retry_if_fails(resume_dev_t = name_to_dev_t(commandline));
+
+  if (!resume_dev_t) {
+    struct file *file = filp_open(commandline,
+        O_RDONLY|O_LARGEFILE, 0);
+
+    if (!IS_ERR(file) && file) {
+      vfs_getattr(&file->f_path, &stat,
+          STATX_INO, AT_STATX_SYNC_AS_STAT);
+      filp_close(file, NULL);
+    } else
+      error = vfs_stat(commandline, &stat);
+    if (!error)
+      resume_dev_t = stat.rdev;
+  }
+
+  if (!resume_dev_t) {
+    if (quiet)
+      return 1;
+
+    if (test_toi_state(TOI_TRYING_TO_RESUME))
+      toi_early_boot_message(1, toi_translate_err_default,
+          "Failed to translate \"%s\" into a device id.\n",
+          commandline);
+    else
+      printk("TuxOnIce: Can't translate \"%s\" into a device "
+          "id yet.\n", commandline);
+    return 1;
+  }
+
+  return open_resume_dev_t(1, quiet);
+}
+
+/*
+ * Parse Image Location
+ *
+ * Attempt to parse a resume= parameter.
+ * Swap Writer accepts:
+ * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
+ *
+ * Where:
+ * DEVNAME is convertable to a dev_t by name_to_dev_t
+ * FIRSTBLOCK is the location of the first block in the swap file
+ * (specifying for a swap partition is nonsensical but not prohibited).
+ * Data is validated by attempting to read a swap header from the
+ * location given. Failure will result in toi_swap refusing to
+ * save an image, and a reboot with correct parameters will be
+ * necessary.
+ */
+static int toi_bio_parse_sig_location(char *commandline,
+    int only_allocator, int quiet)
+{
+  char *thischar, *devstart, *colon = NULL;
+  int signature_found, result = -EINVAL, temp_result = 0;
+
+  if (strncmp(commandline, "swap:", 5) &&
+      strncmp(commandline, "file:", 5)) {
+    /*
+     * Failing swap:, we'll take a simple resume=/dev/hda2, or a
+     * blank value (scan) but fall through to other allocators
+     * if /dev/ or UUID= isn't matched.
+     */
+    if (strncmp(commandline, "/dev/", 5) &&
+        strncmp(commandline, "UUID=", 5) &&
+        strlen(commandline))
+      return 1;
+  } else
+    commandline += 5;
+
+  devstart = commandline;
+  thischar = commandline;
+  while ((*thischar != ':') && (*thischar != '@') &&
+      ((thischar - commandline) < 250) && (*thischar))
+    thischar++;
+
+  if (*thischar == ':') {
+    colon = thischar;
+    *colon = 0;
+    thischar++;
+  }
+
+  while ((thischar - commandline) < 250 && *thischar)
+    thischar++;
+
+  if (colon) {
+    unsigned long block;
+    temp_result = kstrtoul(colon + 1, 0, &block);
+    if (!temp_result)
+      resume_firstblock = (int) block;
+  } else
+    resume_firstblock = 0;
+
+  clear_toi_state(TOI_CAN_HIBERNATE);
+  clear_toi_state(TOI_CAN_RESUME);
+
+  if (!temp_result)
+    temp_result = try_to_open_resume_device(devstart, quiet);
+
+  if (colon)
+    *colon = ':';
+
+  /* No error if we only scanned */
+  if (temp_result)
+    return strlen(commandline) ? -EINVAL : 1;
+
+  signature_found = toi_bio_image_exists(quiet);
+
+  if (signature_found != -1) {
+    result = 0;
+    /*
+     * TODO: If only file storage, CAN_HIBERNATE should only be
+     * set if file allocator's target is valid.
+     */
+    set_toi_state(TOI_CAN_HIBERNATE);
+    set_toi_state(TOI_CAN_RESUME);
+  } else
+    if (!quiet)
+      printk(KERN_ERR "TuxOnIce: Block I/O: No "
+          "signature found at %s.\n", devstart);
+
+  return result;
+}
+
+static void toi_bio_release_storage(void)
+{
+  header_pages_reserved = 0;
+  raw_pages_allocd = 0;
+
+  free_all_bdev_info();
+}
+
+/* toi_swap_remove_image
+ *
+ */
+static int toi_bio_remove_image(void)
+{
+  int result;
+
+  toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_remove_image.");
+
+  result = toi_bio_restore_original_signature();
+
+  /*
+   * We don't do a sanity check here: we want to restore the swap
+   * whatever version of kernel made the hibernate image.
+   *
+   * We need to write swap, but swap may not be enabled so
+   * we write the device directly
+   *
+   * If we don't have an current_signature_page, we didn't
+   * read an image header, so don't change anything.
+   */
+
+  toi_bio_release_storage();
+
+  return result;
+}
+
+struct toi_bio_ops toi_bio_ops = {
+  .bdev_page_io = toi_bdev_page_io,
+  .register_storage = toi_register_storage_chain,
+  .free_storage = toi_bio_release_storage,
+};
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
+      0, 16384, 0, NULL),
+};
+
+struct toi_module_ops toi_blockwriter_ops = {
+  .type                           = WRITER_MODULE,
+  .name                           = "block i/o",
+  .directory                      = "block_io",
+  .module                         = THIS_MODULE,
+  .memory_needed                  = toi_bio_memory_needed,
+  .print_debug_info               = toi_bio_print_debug_stats,
+  .storage_needed                 = toi_bio_storage_needed,
+  .save_config_info               = toi_bio_save_config_info,
+  .load_config_info               = toi_bio_load_config_info,
+  .initialise                     = toi_bio_initialise,
+  .cleanup                        = toi_bio_cleanup,
+  .post_atomic_restore            = toi_bio_chains_post_atomic,
+
+  .rw_init                        = toi_rw_init,
+  .rw_cleanup                     = toi_rw_cleanup,
+  .read_page                      = toi_bio_read_page,
+  .write_page                     = toi_bio_write_page,
+  .rw_header_chunk                = toi_rw_header_chunk,
+  .rw_header_chunk_noreadahead    = toi_rw_header_chunk_noreadahead,
+  .io_flusher                     = bio_io_flusher,
+  .update_throughput_throttle     = update_throughput_throttle,
+  .finish_all_io                  = toi_finish_all_io,
+
+  .noresume_reset                 = toi_bio_noresume_reset,
+  .storage_available              = toi_bio_storage_available,
+  .storage_allocated              = toi_bio_storage_allocated,
+  .reserve_header_space           = toi_bio_reserve_header_space,
+  .allocate_storage               = toi_bio_allocate_storage,
+  .free_unused_storage            = toi_bio_free_unused_storage,
+  .image_exists                   = toi_bio_image_exists,
+  .mark_resume_attempted          = toi_bio_mark_resume_attempted,
+  .write_header_init              = toi_bio_write_header_init,
+  .write_header_cleanup           = toi_bio_write_header_cleanup,
+  .read_header_init               = toi_bio_read_header_init,
+  .read_header_cleanup            = toi_bio_read_header_cleanup,
+  .get_header_version             = toi_bio_get_header_version,
+  .remove_image                   = toi_bio_remove_image,
+  .parse_sig_location             = toi_bio_parse_sig_location,
+
+  .sysfs_data                     = sysfs_params,
+  .num_sysfs_entries              = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/**
+ * toi_block_io_load - load time routine for block I/O module
+ *
+ * Register block i/o ops and sysfs entries.
+ **/
+static __init int toi_block_io_load(void)
+{
+  return toi_register_module(&toi_blockwriter_ops);
+}
+
+late_initcall(toi_block_io_load);
diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h
new file mode 100644
index 000000000..7b2015160
--- /dev/null
+++ b/kernel/power/tuxonice_bio_internal.h
@@ -0,0 +1,101 @@
+/*
+ * kernel/power/tuxonice_bio_internal.h
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+/* Extent chains */
+void toi_extent_state_goto_start(void);
+void toi_extent_state_save(int slot);
+int go_next_page(int writing, int section_barrier);
+void toi_extent_state_restore(int slot);
+void free_all_bdev_info(void);
+int devices_of_same_priority(struct toi_bdev_info *this);
+int toi_register_storage_chain(struct toi_bdev_info *new);
+int toi_serialise_extent_chains(void);
+int toi_load_extent_chains(void);
+int toi_bio_rw_page(int writing, struct page *page, int is_readahead,
+    int free_group);
+int toi_bio_restore_original_signature(void);
+int toi_bio_devinfo_storage_needed(void);
+unsigned long get_headerblock(void);
+dev_t get_header_dev_t(void);
+struct block_device *get_header_bdev(void);
+int toi_bio_allocate_storage(unsigned long request);
+void toi_bio_free_unused_storage(void);
+
+/* Signature functions */
+#define HaveImage "HaveImage"
+#define NoImage "TuxOnIce"
+#define sig_size (sizeof(HaveImage))
+
+struct sig_data {
+  char sig[sig_size];
+  int have_image;
+  int resumed_before;
+
+  char have_uuid;
+  char header_uuid[17];
+  dev_t header_dev_t;
+  unsigned long first_header_block;
+
+  /* Repeat the signature to be sure we have a header version */
+  char sig2[sig_size];
+  int header_version;
+};
+
+void forget_signature_page(void);
+int toi_check_for_signature(void);
+int toi_bio_image_exists(int quiet);
+int get_signature_page(void);
+int toi_bio_mark_resume_attempted(int);
+extern char *toi_cur_sig_page;
+extern char *toi_orig_sig_page;
+int toi_bio_mark_have_image(void);
+extern struct sig_data *toi_sig_data;
+extern dev_t resume_dev_t;
+extern struct block_device *resume_block_device;
+extern struct block_device *header_block_device;
+extern unsigned long resume_firstblock;
+
+struct block_device *open_bdev(dev_t device, int display_errs);
+extern int current_stream;
+extern int more_readahead;
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+    struct page *page, int is_readahead, int syncio, int free_group);
+int get_main_pool_phys_params(void);
+
+void toi_close_bdev(struct block_device *bdev);
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+    int display_errs);
+
+extern struct toi_module_ops toi_blockwriter_ops;
+void dump_block_chains(void);
+void debug_broken_header(void);
+extern unsigned long raw_pages_allocd, header_pages_reserved;
+int toi_bio_chains_debug_info(char *buffer, int size);
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd);
+int toi_bio_scan_for_image(int quiet);
+int toi_bio_get_header_version(void);
+
+void close_resume_dev_t(int force);
+int open_resume_dev_t(int force, int quiet);
+
+struct toi_incremental_image_pointer_saved_data {
+  unsigned long block;
+  int chain;
+};
+
+struct toi_incremental_image_pointer {
+  struct toi_incremental_image_pointer_saved_data save;
+  struct block_device *bdev;
+  unsigned long block;
+};
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c
new file mode 100644
index 000000000..cfc886679
--- /dev/null
+++ b/kernel/power/tuxonice_bio_signature.c
@@ -0,0 +1,403 @@
+/*
+ * kernel/power/tuxonice_bio_signature.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+struct sig_data *toi_sig_data;
+
+/* Struct of swap header pages */
+
+struct old_sig_data {
+  dev_t device;
+  unsigned long sector;
+  int resume_attempted;
+  int orig_sig_type;
+};
+
+union diskpage {
+  union swap_header swh;        /* swh.magic is the only member used */
+  struct sig_data sig_data;
+  struct old_sig_data old_sig_data;
+};
+
+union p_diskpage {
+  union diskpage *pointer;
+  char *ptr;
+  unsigned long address;
+};
+
+char *toi_cur_sig_page;
+char *toi_orig_sig_page;
+int have_image;
+int have_old_image;
+
+int get_signature_page(void)
+{
+  if (!toi_cur_sig_page) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0,
+        "Allocating current signature page.");
+    toi_cur_sig_page = (char *) toi_get_zeroed_page(38,
+        TOI_ATOMIC_GFP);
+    if (!toi_cur_sig_page) {
+      printk(KERN_ERR "Failed to allocate memory for the "
+          "current image signature.\n");
+      return -ENOMEM;
+    }
+
+    toi_sig_data = (struct sig_data *) toi_cur_sig_page;
+  }
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx,"
+      " sector %d.",
+      resume_block_device->bd_dev, resume_firstblock);
+
+  return toi_bio_ops.bdev_page_io(READ, resume_block_device,
+      resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+void forget_signature_page(void)
+{
+  if (toi_cur_sig_page) {
+    toi_sig_data = NULL;
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page"
+        " (%p).", toi_cur_sig_page);
+    toi_free_page(38, (unsigned long) toi_cur_sig_page);
+    toi_cur_sig_page = NULL;
+  }
+
+  if (toi_orig_sig_page) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page"
+        " (%p).", toi_orig_sig_page);
+    toi_free_page(38, (unsigned long) toi_orig_sig_page);
+    toi_orig_sig_page = NULL;
+  }
+}
+
+/*
+ * We need to ensure we use the signature page that's currently on disk,
+ * so as to not remove the image header. Post-atomic-restore, the orig sig
+ * page will be empty, so we can use that as our method of knowing that we
+ * need to load the on-disk signature and not use the non-image sig in
+ * memory. (We're going to powerdown after writing the change, so it's safe.
+ */
+int toi_bio_mark_resume_attempted(int flag)
+{
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.",
+      flag);
+  if (!toi_orig_sig_page) {
+    forget_signature_page();
+    get_signature_page();
+  }
+  toi_sig_data->resumed_before = flag;
+  return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+      resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int toi_bio_mark_have_image(void)
+{
+  int result = 0;
+  char buf[32];
+  struct fs_info *fs_info;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists.");
+  memcpy(toi_sig_data->sig, tuxonice_signature,
+      sizeof(tuxonice_signature));
+  toi_sig_data->have_image = 1;
+  toi_sig_data->resumed_before = 0;
+  toi_sig_data->header_dev_t = get_header_dev_t();
+  toi_sig_data->have_uuid = 0;
+
+  fs_info = fs_info_from_block_dev(get_header_bdev());
+  if (fs_info && !IS_ERR(fs_info)) {
+    memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16);
+    free_fs_info(fs_info);
+  } else
+    result = (int) PTR_ERR(fs_info);
+
+  if (!result) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.",
+        format_dev_t(buf, get_header_dev_t()));
+    toi_sig_data->have_uuid = 1;
+  } else
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for "
+        "dev_t %s.",
+        format_dev_t(buf, get_header_dev_t()));
+
+  toi_sig_data->first_header_block = get_headerblock();
+  have_image = 1;
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block "
+      "is %d.", toi_sig_data->header_dev_t,
+      toi_sig_data->first_header_block);
+
+  memcpy(toi_sig_data->sig2, tuxonice_signature,
+      sizeof(tuxonice_signature));
+  toi_sig_data->header_version = TOI_HEADER_VERSION;
+
+  return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+      resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int remove_old_signature(void)
+{
+  union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page;
+  char *orig_sig;
+  char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+  int result;
+  struct block_device *header_bdev;
+  struct old_sig_data *old_sig_data =
+    &swap_header_page.pointer->old_sig_data;
+
+  header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1);
+  result = toi_bio_ops.bdev_page_io(READ, header_bdev,
+      old_sig_data->sector, virt_to_page(header_start));
+
+  if (result)
+    goto out;
+
+  /*
+   * TODO: Get the original contents of the first bytes of the swap
+   * header page.
+   */
+  if (!old_sig_data->orig_sig_type)
+    orig_sig = "SWAP-SPACE";
+  else
+    orig_sig = "SWAPSPACE2";
+
+  memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
+  memcpy(swap_header_page.ptr, header_start, 10);
+
+  result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+      resume_firstblock, virt_to_page(swap_header_page.ptr));
+
+out:
+  toi_close_bdev(header_bdev);
+  have_old_image = 0;
+  toi_free_page(38, (unsigned long) header_start);
+  return result;
+}
+
+/*
+ * toi_bio_restore_original_signature - restore the original signature
+ *
+ * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used.
+ * It will have the original signature page contents, stored in the image
+ * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain
+ * the contents that were loaded when we started the cycle.
+ */
+int toi_bio_restore_original_signature(void)
+{
+  char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page;
+
+  if (have_old_image)
+    return remove_old_signature();
+
+  if (!use) {
+    printk("toi_bio_restore_original_signature: No signature "
+        "page loaded.\n");
+    return 0;
+  }
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists.");
+  have_image = 0;
+  toi_sig_data->have_image = 0;
+  return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+      resume_firstblock, virt_to_page(use));
+}
+
+/*
+ * check_for_signature - See whether we have an image.
+ *
+ * Returns 0 if no image, 1 if there is one, -1 if indeterminate.
+ */
+int toi_check_for_signature(void)
+{
+  union p_diskpage swap_header_page;
+  int type;
+  const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" };
+  const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" };
+  char *swap_header;
+
+  if (!toi_cur_sig_page) {
+    int result = get_signature_page();
+
+    if (result)
+      return result;
+  }
+
+  /*
+   * Start by looking for the binary header.
+   */
+  if (!memcmp(tuxonice_signature, toi_cur_sig_page,
+        sizeof(tuxonice_signature))) {
+    have_image = toi_sig_data->have_image;
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. "
+        "Have image is %d.", have_image);
+    if (have_image)
+      toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is "
+          "%x. First block is %d.",
+          toi_sig_data->header_dev_t,
+          toi_sig_data->first_header_block);
+    return toi_sig_data->have_image;
+  }
+
+  /*
+   * Failing that, try old file allocator headers.
+   */
+
+  if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) {
+    have_image = 1;
+    return 1;
+  }
+
+  have_image = 0;
+
+  if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage)))
+    return 0;
+
+  /*
+   * Nope? How about swap?
+   */
+  swap_header_page = (union p_diskpage) toi_cur_sig_page;
+  swap_header = swap_header_page.pointer->swh.magic.magic;
+
+  /* Normal swapspace? */
+  for (type = 0; type < 2; type++)
+    if (!memcmp(normal_sigs[type], swap_header,
+          strlen(normal_sigs[type])))
+      return 0;
+
+  /* Swsusp or uswsusp? */
+  for (type = 0; type < 3; type++)
+    if (!memcmp(swsusp_sigs[type], swap_header,
+          strlen(swsusp_sigs[type])))
+      return 2;
+
+  /* Old TuxOnIce version? */
+  if (!memcmp(tuxonice_signature, swap_header,
+        sizeof(tuxonice_signature) - 1)) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce "
+        "signature.");
+    have_old_image = 1;
+    return 3;
+  }
+
+  return -1;
+}
+
+/*
+ * Image_exists
+ *
+ * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
+ */
+int toi_bio_image_exists(int quiet)
+{
+  int result;
+  char *msg = NULL;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists.");
+
+  if (!resume_dev_t) {
+    if (!quiet)
+      printk(KERN_INFO "Not even trying to read header "
+          "because resume_dev_t is not set.\n");
+    return -1;
+  }
+
+  if (open_resume_dev_t(0, quiet))
+    return -1;
+
+  result = toi_check_for_signature();
+
+  clear_toi_state(TOI_RESUMED_BEFORE);
+  if (toi_sig_data->resumed_before)
+    set_toi_state(TOI_RESUMED_BEFORE);
+
+  if (quiet || result == -ENOMEM)
+    return result;
+
+  if (result == -1)
+    msg = "TuxOnIce: Unable to find a signature."
+      " Could you have moved a swap file?\n";
+  else if (!result)
+    msg = "TuxOnIce: No image found.\n";
+  else if (result == 1)
+    msg = "TuxOnIce: Image found.\n";
+  else if (result == 2)
+    msg = "TuxOnIce: uswsusp or swsusp image found.\n";
+  else if (result == 3)
+    msg = "TuxOnIce: Old implementation's signature found.\n";
+
+  printk(KERN_INFO "%s", msg);
+
+  return result;
+}
+
+int toi_bio_scan_for_image(int quiet)
+{
+  struct block_device *bdev;
+  char default_name[255] = "";
+
+  if (!quiet)
+    printk(KERN_DEBUG "Scanning swap devices for TuxOnIce "
+        "signature...\n");
+  for (bdev = next_bdev_of_type(NULL, "swap"); bdev;
+      bdev = next_bdev_of_type(bdev, "swap")) {
+    int result;
+    char name[255] = "";
+    sprintf(name, "%u:%u", MAJOR(bdev->bd_dev),
+        MINOR(bdev->bd_dev));
+    if (!quiet)
+      printk(KERN_DEBUG "- Trying %s.\n", name);
+    resume_block_device = bdev;
+    resume_dev_t = bdev->bd_dev;
+
+    result = toi_check_for_signature();
+
+    resume_block_device = NULL;
+    resume_dev_t = MKDEV(0, 0);
+
+    if (!default_name[0])
+      strcpy(default_name, name);
+
+    if (result == 1) {
+      /* Got one! */
+      strcpy(resume_file, name);
+      next_bdev_of_type(bdev, NULL);
+      if (!quiet)
+        printk(KERN_DEBUG " ==> Image found on %s.\n",
+            resume_file);
+      return 1;
+    }
+    forget_signature_page();
+  }
+
+  if (!quiet)
+    printk(KERN_DEBUG "TuxOnIce scan: No image found.\n");
+  strcpy(resume_file, default_name);
+  return 0;
+}
+
+int toi_bio_get_header_version(void)
+{
+  return (memcmp(toi_sig_data->sig2, tuxonice_signature,
+        sizeof(tuxonice_signature))) ?
+    0 : toi_sig_data->header_version;
+
+}
diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c
new file mode 100644
index 000000000..324e7eaf0
--- /dev/null
+++ b/kernel/power/tuxonice_builtin.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/kernel.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/bio.h>
+#include <linux/root_dev.h>
+#include <linux/freezer.h>
+#include <linux/reboot.h>
+#include <linux/writeback.h>
+#include <linux/tty.h>
+#include <linux/crypto.h>
+#include <linux/cpu.h>
+#include <linux/ctype.h>
+#include <linux/kthread.h>
+#include <trace/events/power.h>
+#include "tuxonice_io.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_alloc.h"
+
+unsigned long toi_bootflags_mask;
+
+/*
+ * Highmem related functions (x86 only).
+ */
+
+#ifdef CONFIG_HIGHMEM
+
+/**
+ * copyback_high: Restore highmem pages.
+ *
+ * Highmem data and pbe lists are/can be stored in highmem.
+ * The format is slightly different to the lowmem pbe lists
+ * used for the assembly code: the last pbe in each page is
+ * a struct page * instead of struct pbe *, pointing to the
+ * next page where pbes are stored (or NULL if happens to be
+ * the end of the list). Since we don't want to generate
+ * unnecessary deltas against swsusp code, we use a cast
+ * instead of a union.
+ **/
+
+static void copyback_high(void)
+{
+  struct page *pbe_page = (struct page *) restore_highmem_pblist;
+  struct pbe *this_pbe, *first_pbe;
+  unsigned long *origpage, *copypage;
+  int pbe_index = 1;
+
+  if (!pbe_page)
+    return;
+
+  this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+  first_pbe = this_pbe;
+
+  while (this_pbe) {
+    int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
+
+    origpage = kmap_atomic(pfn_to_page((unsigned long) this_pbe->orig_address));
+    copypage = kmap_atomic((struct page *) this_pbe->address);
+
+    while (loop >= 0) {
+      *(origpage + loop) = *(copypage + loop);
+      loop--;
+    }
+
+    kunmap_atomic(origpage);
+    kunmap_atomic(copypage);
+
+    if (!this_pbe->next)
+      break;
+
+    if (pbe_index < PBES_PER_PAGE) {
+      this_pbe++;
+      pbe_index++;
+    } else {
+      pbe_page = (struct page *) this_pbe->next;
+      kunmap_atomic(first_pbe);
+      if (!pbe_page)
+        return;
+      this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+      first_pbe = this_pbe;
+      pbe_index = 1;
+    }
+  }
+  kunmap_atomic(first_pbe);
+}
+
+#else /* CONFIG_HIGHMEM */
+static void copyback_high(void) { }
+#endif
+
+char toi_wait_for_keypress_dev_console(int timeout)
+{
+  int fd, this_timeout = 255, orig_kthread = 0;
+  char key = '\0';
+  struct termios t, t_backup;
+
+  /* We should be guaranteed /dev/console exists after populate_rootfs()
+   * in init/main.c.
+   */
+  fd = sys_open("/dev/console", O_RDONLY, 0);
+  if (fd < 0) {
+    printk(KERN_INFO "Couldn't open /dev/console.\n");
+    return key;
+  }
+
+  if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
+    goto out_close;
+
+  memcpy(&t_backup, &t, sizeof(t));
+
+  t.c_lflag &= ~(ISIG|ICANON|ECHO);
+  t.c_cc[VMIN] = 0;
+
+new_timeout:
+  if (timeout > 0) {
+    this_timeout = timeout < 26 ? timeout : 25;
+    timeout -= this_timeout;
+    this_timeout *= 10;
+  }
+
+  t.c_cc[VTIME] = this_timeout;
+
+  if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
+    goto out_restore;
+
+  if (current->flags & PF_KTHREAD) {
+    orig_kthread = (current->flags & PF_KTHREAD);
+    current->flags &= ~PF_KTHREAD;
+  }
+
+  while (1) {
+    if (sys_read(fd, &key, 1) <= 0) {
+      if (timeout)
+        goto new_timeout;
+      key = '\0';
+      break;
+    }
+    key = tolower(key);
+    if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
+      if (key == 'c') {
+        set_toi_state(TOI_CONTINUE_REQ);
+        break;
+      } else if (key == ' ')
+        break;
+    } else
+      break;
+  }
+  if (orig_kthread) {
+    current->flags |= PF_KTHREAD;
+  }
+
+out_restore:
+  sys_ioctl(fd, TCSETS, (long)&t_backup);
+out_close:
+  sys_close(fd);
+
+  return key;
+}
+
+struct toi_boot_kernel_data toi_bkd __nosavedata
+__attribute__((aligned(PAGE_SIZE))) = {
+  MY_BOOT_KERNEL_DATA_VERSION,
+  0,
+#ifdef CONFIG_TOI_REPLACE_SWSUSP
+  (1 << TOI_REPLACE_SWSUSP) |
+#endif
+    (1 << TOI_NO_FLUSHER_THREAD) |
+    (1 << TOI_PAGESET2_FULL),
+};
+
+struct block_device *toi_open_by_devnum(dev_t dev)
+{
+  struct block_device *bdev = bdget(dev);
+  int err = -ENOMEM;
+  if (bdev)
+    err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
+  return err ? ERR_PTR(err) : bdev;
+}
+
+/**
+ * toi_close_bdev: Close a swap bdev.
+ *
+ * int: The swap entry number to close.
+ */
+void toi_close_bdev(struct block_device *bdev)
+{
+  blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+}
+
+int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
+struct toi_core_fns *toi_core_fns;
+unsigned long toi_result;
+struct pagedir pagedir1 = {1};
+struct toi_cbw **toi_first_cbw;
+int toi_next_cbw;
+
+unsigned long toi_get_nonconflicting_page(void)
+{
+  return toi_core_fns->get_nonconflicting_page();
+}
+
+int toi_post_context_save(void)
+{
+  return toi_core_fns->post_context_save();
+}
+
+int try_tuxonice_hibernate(void)
+{
+  if (!toi_core_fns)
+    return -ENODEV;
+
+  return toi_core_fns->try_hibernate();
+}
+
+static int num_resume_calls;
+#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
+static int ignore_late_initcall = 1;
+#else
+static int ignore_late_initcall;
+#endif
+
+int toi_translate_err_default = TOI_CONTINUE_REQ;
+
+void try_tuxonice_resume(void)
+{
+  if (!hibernation_available())
+    return;
+
+  /* Don't let it wrap around eventually */
+  if (num_resume_calls < 2)
+    num_resume_calls++;
+
+  if (num_resume_calls == 1 && ignore_late_initcall) {
+    printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
+    return;
+  }
+
+  if (toi_core_fns)
+    toi_core_fns->try_resume();
+  else
+    printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
+}
+
+int toi_lowlevel_builtin(void)
+{
+  int error = 0;
+
+  save_processor_state();
+  trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
+  error = swsusp_arch_suspend();
+  restore_processor_state();
+  trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
+
+  /* Restore control flow appears here */
+  if (!toi_in_hibernate) {
+    copyback_high();
+    set_toi_state(TOI_NOW_RESUMING);
+  } else {
+    if (error)
+      printk(KERN_ERR "Error %d hibernating\n", error);
+  }
+
+  return error;
+}
+
+unsigned long toi_compress_bytes_in;
+unsigned long toi_compress_bytes_out;
+
+int toi_in_suspend(void)
+{
+  return in_suspend;
+}
+
+unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
+    (1 << TOI_IGNORE_LOGLEVEL) |
+    (1 << TOI_IO_STOPPED));
+
+/* The number of hibernates we have started (some may have been cancelled) */
+unsigned int nr_hibernates;
+int toi_running;
+__nosavedata int toi_in_hibernate;
+__nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_trace_allocs;
+
+void toi_read_lock_tasklist(void)
+{
+  read_lock(&tasklist_lock);
+}
+
+void toi_read_unlock_tasklist(void)
+{
+  read_unlock(&tasklist_lock);
+}
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+int (*toi_flag_zram_disks) (void);
+
+int toi_do_flag_zram_disks(void)
+{
+  return toi_flag_zram_disks ? (*toi_flag_zram_disks)() : 0;
+}
+
+#endif
+
+/* toi_generate_free_page_map
+ *
+ * Description:        This routine generates a bitmap of free pages from the
+ *                 lists used by the memory manager. We then use the bitmap
+ *                 to quickly calculate which pages to save and in which
+ *                 pagesets.
+ */
+void toi_generate_free_page_map(void)
+{
+  int order, cpu, t;
+  unsigned long flags, i;
+  struct zone *zone;
+  struct list_head *curr;
+  unsigned long pfn;
+  struct page *page;
+
+  for_each_populated_zone(zone) {
+
+    if (!zone->spanned_pages)
+      continue;
+
+    spin_lock_irqsave(&zone->lock, flags);
+
+    for (i = 0; i < zone->spanned_pages; i++) {
+      pfn = zone->zone_start_pfn + i;
+
+      if (!pfn_valid(pfn))
+        continue;
+
+      page = pfn_to_page(pfn);
+
+      ClearPageNosaveFree(page);
+    }
+
+    for_each_migratetype_order(order, t) {
+      list_for_each(curr,
+          &zone->free_area[order].free_list[t]) {
+        unsigned long j;
+
+        pfn = page_to_pfn(list_entry(curr, struct page,
+              lru));
+        for (j = 0; j < (1UL << order); j++)
+          SetPageNosaveFree(pfn_to_page(pfn + j));
+      }
+    }
+
+    for_each_online_cpu(cpu) {
+      struct per_cpu_pageset *pset =
+        per_cpu_ptr(zone->pageset, cpu);
+      struct per_cpu_pages *pcp = &pset->pcp;
+      struct page *page;
+      int t;
+
+      for (t = 0; t < MIGRATE_PCPTYPES; t++)
+        list_for_each_entry(page, &pcp->lists[t], lru)
+          SetPageNosaveFree(page);
+    }
+
+    spin_unlock_irqrestore(&zone->lock, flags);
+  }
+}
+
+/* toi_size_of_free_region
+ *
+ * Description:        Return the number of pages that are free, beginning with and
+ *                 including this one.
+ */
+int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn)
+{
+  unsigned long this_pfn = start_pfn,
+                end_pfn = zone_end_pfn(zone);
+
+  while (pfn_valid(this_pfn) && this_pfn < end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
+    this_pfn++;
+
+  return this_pfn - start_pfn;
+}
+
+static int __init toi_wait_setup(char *str)
+{
+  int value;
+
+  if (sscanf(str, "=%d", &value)) {
+    if (value < -1 || value > 255)
+      printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
+          "255.\n");
+    else
+      toi_wait = value;
+  }
+
+  return 1;
+}
+__setup("toi_wait", toi_wait_setup);
+
+static int __init toi_translate_retry_setup(char *str)
+{
+  toi_translate_err_default = 0;
+  return 1;
+}
+__setup("toi_translate_retry", toi_translate_retry_setup);
+
+static int __init toi_debug_setup(char *str)
+{
+  toi_bkd.toi_action |= (1 << TOI_LOGALL);
+  toi_bootflags_mask |= (1 << TOI_LOGALL);
+  toi_bkd.toi_debug_state = 255;
+  toi_bkd.toi_default_console_level = 7;
+  return 1;
+}
+__setup("toi_debug_setup", toi_debug_setup);
+
+static int __init toi_pause_setup(char *str)
+{
+  toi_bkd.toi_action |= (1 << TOI_PAUSE);
+  toi_bootflags_mask |= (1 << TOI_PAUSE);
+  return 1;
+}
+__setup("toi_pause", toi_pause_setup);
+
+#ifdef CONFIG_PM_DEBUG
+static int __init toi_trace_allocs_setup(char *str)
+{
+  int value;
+
+  if (sscanf(str, "=%d", &value))
+    toi_trace_allocs = value;
+
+  return 1;
+}
+__setup("toi_trace_allocs", toi_trace_allocs_setup);
+#endif
+
+static int __init toi_ignore_late_initcall_setup(char *str)
+{
+  int value;
+
+  if (sscanf(str, "=%d", &value))
+    ignore_late_initcall = value;
+
+  return 1;
+}
+__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
+
+static int __init toi_force_no_multithreaded_setup(char *str)
+{
+  int value;
+
+  toi_bkd.toi_action &= ~(1 << TOI_NO_MULTITHREADED_IO);
+  toi_bootflags_mask |= (1 << TOI_NO_MULTITHREADED_IO);
+
+  if (sscanf(str, "=%d", &value) && value)
+    toi_bkd.toi_action |= (1 << TOI_NO_MULTITHREADED_IO);
+
+  return 1;
+}
+__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup);
+
+#ifdef CONFIG_KGDB
+static int __init toi_post_resume_breakpoint_setup(char *str)
+{
+  int value;
+
+  toi_bkd.toi_action &= ~(1 << TOI_POST_RESUME_BREAKPOINT);
+  toi_bootflags_mask |= (1 << TOI_POST_RESUME_BREAKPOINT);
+  if (sscanf(str, "=%d", &value) && value)
+    toi_bkd.toi_action |= (1 << TOI_POST_RESUME_BREAKPOINT);
+
+  return 1;
+}
+__setup("toi_post_resume_break", toi_post_resume_breakpoint_setup);
+#endif
+
+static int __init toi_disable_readahead_setup(char *str)
+{
+  int value;
+
+  toi_bkd.toi_action &= ~(1 << TOI_NO_READAHEAD);
+  toi_bootflags_mask |= (1 << TOI_NO_READAHEAD);
+  if (sscanf(str, "=%d", &value) && value)
+    toi_bkd.toi_action |= (1 << TOI_NO_READAHEAD);
+
+  return 1;
+}
+__setup("toi_no_readahead", toi_disable_readahead_setup);
diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h
new file mode 100644
index 000000000..9539818e0
--- /dev/null
+++ b/kernel/power/tuxonice_builtin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <asm/setup.h>
+
+extern struct toi_core_fns *toi_core_fns;
+extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
+extern unsigned int nr_hibernates;
+extern int toi_in_hibernate;
+
+extern __nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_lowlevel_builtin(void);
+
+#ifdef CONFIG_HIGHMEM
+extern __nosavedata struct zone_data *toi_nosave_zone_list;
+extern __nosavedata unsigned long toi_nosave_max_pfn;
+#endif
+
+extern unsigned long toi_get_nonconflicting_page(void);
+extern int toi_post_context_save(void);
+
+extern char toi_wait_for_keypress_dev_console(int timeout);
+extern struct block_device *toi_open_by_devnum(dev_t dev);
+extern void toi_close_bdev(struct block_device *bdev);
+extern int toi_wait;
+extern int toi_translate_err_default;
+extern int toi_force_no_multithreaded;
+extern void toi_read_lock_tasklist(void);
+extern void toi_read_unlock_tasklist(void);
+extern int toi_in_suspend(void);
+extern void toi_generate_free_page_map(void);
+extern int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn);
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+extern int toi_do_flag_zram_disks(void);
+#else
+#define toi_do_flag_zram_disks() (0)
+#endif
diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c
new file mode 100644
index 000000000..49645d0d4
--- /dev/null
+++ b/kernel/power/tuxonice_checksum.c
@@ -0,0 +1,401 @@
+/*
+ * kernel/power/tuxonice_checksum.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#include <crypto/hash.h>
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/scatterlist.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+
+static struct toi_module_ops toi_checksum_ops;
+
+/* Constant at the mo, but I might allow tuning later */
+static char toi_checksum_name[32] = "md4";
+/* Bytes per checksum */
+#define CHECKSUM_SIZE (16)
+
+#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
+
+struct cpu_context {
+  struct shash_desc *desc;
+  char *buf;
+};
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+static int pages_allocated;
+static unsigned long page_list;
+
+static int toi_num_resaved;
+
+static unsigned long this_checksum, next_page;
+static int checksum_count;
+
+static inline int checksum_pages_needed(void)
+{
+  return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
+}
+
+/* ---- Local buffer management ---- */
+
+/*
+ * toi_checksum_cleanup
+ *
+ * Frees memory allocated for our labours.
+ */
+static void toi_checksum_cleanup(int ending_cycle)
+{
+  int cpu;
+
+  if (ending_cycle) {
+    for_each_online_cpu(cpu) {
+      struct cpu_context *this = &per_cpu(contexts, cpu);
+      if (this->desc) {
+        size_t size = sizeof(*this->desc) +
+          crypto_shash_descsize(this->desc->tfm);
+        crypto_free_shash(this->desc->tfm);
+        toi_kfree(26, this->desc, size);
+        this->desc = NULL;
+      }
+
+      if (this->buf) {
+        toi_free_page(27, (unsigned long) this->buf);
+        this->buf = NULL;
+      }
+    }
+  }
+}
+
+/*
+ * toi_crypto_initialise
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ * Returns: Int: Zero. Even if we can't set up checksum, we still
+ * seek to hibernate.
+ */
+static int toi_checksum_initialise(int starting_cycle)
+{
+  int cpu;
+
+  if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
+    return 0;
+
+  if (!*toi_checksum_name) {
+    printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
+    return 1;
+  }
+
+  for_each_online_cpu(cpu) {
+    struct cpu_context *this = &per_cpu(contexts, cpu);
+    struct page *page;
+    struct crypto_shash *tfm;
+    struct shash_desc *tdesc;
+
+
+    tfm = crypto_alloc_shash(toi_checksum_name, 0, 0);
+    if (IS_ERR(tfm)) {
+      printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+          "%s checksum algorithm: %ld.\n",
+          toi_checksum_name, (long) tfm);
+      return 1;
+    }
+
+    tdesc = toi_kzalloc(26, sizeof(*this->desc) + crypto_shash_descsize(tfm),
+        GFP_KERNEL);
+
+    if (!tdesc) {
+      printk(KERN_INFO "TuxOnIce: Failed to allocate memory "
+          "in checksum initialisation.\n");
+      return 1;
+    }
+    tdesc->tfm = tfm;
+    this->desc = tdesc;
+
+    page = toi_alloc_page(27, GFP_KERNEL);
+    if (!page)
+      return 1;
+    this->buf = page_address(page);
+  }
+  return 0;
+}
+
+/*
+ * toi_checksum_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_checksum_print_debug_stats(char *buffer, int size)
+{
+  int len;
+
+  if (!toi_checksum_ops.enabled)
+    return scnprintf(buffer, size,
+        "- Checksumming disabled.\n");
+
+  len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
+      toi_checksum_name);
+  len += scnprintf(buffer + len, size - len,
+      "  %d pages resaved in atomic copy.\n", toi_num_resaved);
+  return len;
+}
+
+static int toi_checksum_memory_needed(void)
+{
+  return toi_checksum_ops.enabled ?
+    checksum_pages_needed() << PAGE_SHIFT : 0;
+}
+
+static int toi_checksum_storage_needed(void)
+{
+  if (toi_checksum_ops.enabled)
+    return strlen(toi_checksum_name) + sizeof(int) + 1;
+  else
+    return 0;
+}
+
+/*
+ * toi_checksum_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_checksum_save_config_info(char *buffer)
+{
+  int namelen = strlen(toi_checksum_name) + 1;
+  int total_len;
+
+  *((unsigned int *) buffer) = namelen;
+  strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
+  total_len = sizeof(unsigned int) + namelen;
+  return total_len;
+}
+
+/* toi_checksum_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for dechecksuming the image at
+ * resume time.
+ */
+static void toi_checksum_load_config_info(char *buffer, int size)
+{
+  int namelen;
+
+  namelen = *((unsigned int *) (buffer));
+  strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
+      namelen);
+  return;
+}
+
+/*
+ * Free Checksum Memory
+ */
+
+void free_checksum_pages(void)
+{
+  while (pages_allocated) {
+    unsigned long next = *((unsigned long *) page_list);
+    ClearPageNosave(virt_to_page(page_list));
+    toi_free_page(15, (unsigned long) page_list);
+    page_list = next;
+    pages_allocated--;
+  }
+}
+
+/*
+ * Allocate Checksum Memory
+ */
+
+int allocate_checksum_pages(void)
+{
+  int pages_needed = checksum_pages_needed();
+
+  if (!toi_checksum_ops.enabled)
+    return 0;
+
+  while (pages_allocated < pages_needed) {
+    unsigned long *new_page =
+      (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
+    if (!new_page) {
+      printk(KERN_ERR "Unable to allocate checksum pages.\n");
+      return -ENOMEM;
+    }
+    SetPageNosave(virt_to_page(new_page));
+    (*new_page) = page_list;
+    page_list = (unsigned long) new_page;
+    pages_allocated++;
+  }
+
+  next_page = (unsigned long) page_list;
+  checksum_count = 0;
+
+  return 0;
+}
+
+char *tuxonice_get_next_checksum(void)
+{
+  if (!toi_checksum_ops.enabled)
+    return NULL;
+
+  if (checksum_count % CHECKSUMS_PER_PAGE)
+    this_checksum += CHECKSUM_SIZE;
+  else {
+    this_checksum = next_page + sizeof(void *);
+    next_page = *((unsigned long *) next_page);
+  }
+
+  checksum_count++;
+  return (char *) this_checksum;
+}
+
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+{
+  char *pa;
+  int result, cpu = smp_processor_id();
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+  if (!toi_checksum_ops.enabled)
+    return 0;
+
+  pa = kmap(page);
+  memcpy(ctx->buf, pa, PAGE_SIZE);
+  kunmap(page);
+  result = crypto_shash_digest(ctx->desc, ctx->buf, PAGE_SIZE,
+      checksum_locn);
+  if (result)
+    printk(KERN_ERR "TuxOnIce checksumming: crypto_shash_digest "
+        "returned %d.\n", result);
+  return result;
+}
+/*
+ * Calculate checksums
+ */
+
+void check_checksums(void)
+{
+  int index = 0, cpu = smp_processor_id();
+  char current_checksum[CHECKSUM_SIZE];
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+  unsigned long pfn;
+
+  if (!toi_checksum_ops.enabled) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksumming disabled.");
+    return;
+  }
+
+  next_page = (unsigned long) page_list;
+
+  toi_num_resaved = 0;
+  this_checksum = 0;
+
+  toi_trace_index++;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Verifying checksums.");
+  memory_bm_position_reset(pageset2_map);
+  for (pfn = memory_bm_next_pfn(pageset2_map, 0); pfn != BM_END_OF_MAP;
+      pfn = memory_bm_next_pfn(pageset2_map, 0)) {
+    int ret, resave_needed = false;
+    char *pa;
+    struct page *page = pfn_to_page(pfn);
+
+    if (index < checksum_count) {
+      if (index % CHECKSUMS_PER_PAGE) {
+        this_checksum += CHECKSUM_SIZE;
+      } else {
+        this_checksum = next_page + sizeof(void *);
+        next_page = *((unsigned long *) next_page);
+      }
+
+      /* Done when IRQs disabled so must be atomic */
+      pa = kmap_atomic(page);
+      memcpy(ctx->buf, pa, PAGE_SIZE);
+      kunmap_atomic(pa);
+      ret = crypto_shash_digest(ctx->desc, ctx->buf, PAGE_SIZE,
+          current_checksum);
+
+      if (ret) {
+        printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
+        return;
+      }
+
+      resave_needed = memcmp(current_checksum, (char *) this_checksum,
+          CHECKSUM_SIZE);
+    } else {
+      resave_needed = true;
+    }
+
+    if (resave_needed) {
+      TOI_TRACE_DEBUG(pfn, "_Resaving %d", resave_needed);
+      SetPageResave(pfn_to_page(pfn));
+      toi_num_resaved++;
+      if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
+        set_abort_result(TOI_RESAVE_NEEDED);
+    }
+
+    index++;
+  }
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksum verification complete.");
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
+      NULL),
+  SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_ABORT_ON_RESAVE_NEEDED, 0)
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_checksum_ops = {
+  .type                        = MISC_MODULE,
+  .name                        = "checksumming",
+  .directory                = "checksum",
+  .module                        = THIS_MODULE,
+  .initialise                = toi_checksum_initialise,
+  .cleanup                = toi_checksum_cleanup,
+  .print_debug_info        = toi_checksum_print_debug_stats,
+  .save_config_info        = toi_checksum_save_config_info,
+  .load_config_info        = toi_checksum_load_config_info,
+  .memory_needed                = toi_checksum_memory_needed,
+  .storage_needed                = toi_checksum_storage_needed,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+int toi_checksum_init(void)
+{
+  int result = toi_register_module(&toi_checksum_ops);
+  return result;
+}
+
+void toi_checksum_exit(void)
+{
+  toi_unregister_module(&toi_checksum_ops);
+}
diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h
new file mode 100644
index 000000000..a26e24052
--- /dev/null
+++ b/kernel/power/tuxonice_checksum.h
@@ -0,0 +1,31 @@
+/*
+ * kernel/power/tuxonice_checksum.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#if defined(CONFIG_TOI_CHECKSUM)
+extern int toi_checksum_init(void);
+extern void toi_checksum_exit(void);
+void check_checksums(void);
+int allocate_checksum_pages(void);
+void free_checksum_pages(void);
+char *tuxonice_get_next_checksum(void);
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
+#else
+static inline int toi_checksum_init(void) { return 0; }
+static inline void toi_checksum_exit(void) { }
+static inline void check_checksums(void) { };
+static inline int allocate_checksum_pages(void) { return 0; };
+static inline void free_checksum_pages(void) { };
+static inline char *tuxonice_get_next_checksum(void) { return NULL; };
+static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+{ return 0; }
+#endif
+
diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c
new file mode 100644
index 000000000..a98f82088
--- /dev/null
+++ b/kernel/power/tuxonice_cluster.c
@@ -0,0 +1,1058 @@
+/*
+ * kernel/power/tuxonice_cluster.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines for cluster hibernation support.
+ *
+ * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
+ *
+ * How does it work?
+ *
+ * There is no 'master' node that tells everyone else what to do. All nodes
+ * send messages to the broadcast address/port, maintain a list of peers
+ * and figure out when to progress to the next step in hibernating or resuming.
+ * This makes us more fault tolerant when it comes to nodes coming and going
+ * (which may be more of an issue if we're hibernating when power supplies
+ * are being unreliable).
+ *
+ * At boot time, we start a ktuxonice thread that handles communication with
+ * other nodes. This node maintains a state machine that controls our progress
+ * through hibernating and resuming, keeping us in step with other nodes. Nodes
+ * are identified by their hw address.
+ *
+ * On startup, the node sends CLUSTER_PING on the configured interface's
+ * broadcast address, port $toi_cluster_port (see below) and begins to listen
+ * for other broadcast messages. CLUSTER_PING messages are repeated at
+ * intervals of 5 minutes, with a random offset to spread traffic out.
+ *
+ * A hibernation cycle is initiated from any node via
+ *
+ * echo > /sys/power/tuxonice/do_hibernate
+ *
+ * and (possibily) the hibernate script. At each step of the process, the node
+ * completes its work, and waits for all other nodes to signal completion of
+ * their work (or timeout) before progressing to the next step.
+ *
+ * Request/state  Action before reply        Possible reply        Next state
+ * HIBERNATE          capable, pre-script        HIBERNATE|ACK        NODE_PREP
+ *                                         HIBERNATE|NACK        INIT_0
+ *
+ * PREP                  prepare_image                PREP|ACK        IMAGE_WRITE
+ *                                         PREP|NACK        INIT_0
+ *                                         ABORT                RUNNING
+ *
+ * IO                  write image                IO|ACK                power off
+ *                                         ABORT                POST_RESUME
+ *
+ * (Boot time)          check for image        IMAGE|ACK        RESUME_PREP
+ *                                         (Note 1)
+ *                                         IMAGE|NACK        (Note 2)
+ *
+ * PREP                  prepare read image        PREP|ACK        IMAGE_READ
+ *                                         PREP|NACK        (As NACK_IMAGE)
+ *
+ * IO                  read image                IO|ACK                POST_RESUME
+ *
+ * POST_RESUME          thaw, post-script                        RUNNING
+ *
+ * INIT_0          init 0
+ *
+ * Other messages:
+ *
+ * - PING: Request for all other live nodes to send a PONG. Used at startup to
+ *   announce presence, when a node is suspected dead and periodically, in case
+ *   segments of the network are [un]plugged.
+ *
+ * - PONG: Response to a PING.
+ *
+ * - ABORT: Request to cancel writing an image.
+*
+* - BYE: Notification that this node is shutting down.
+*
+* Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
+* nodes which are slower to start up can get state synchronised. If a node
+* starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
+* ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
+* must invalidate its image (if any) and boot normally.
+*
+  * Note 2: May occur when one node lost power or powered off while others
+* hibernated. This node waits for others to complete resuming (ACK_READ)
+  * before completing its boot, so that it appears as a fail node restarting.
+  *
+  * If any node has an image, then it also has a list of nodes that hibernated
+  * in synchronisation with it. The node will wait for other nodes to appear
+  * or timeout before beginning its restoration.
+  *
+  * If a node has no image, it needs to wait, in case other nodes which do have
+  * an image are going to resume, but are taking longer to announce their
+  * presence. For this reason, the user can specify a timeout value and a number
+  * of nodes detected before we just continue. (We might want to assume in a
+      * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
+      * the remaining nodes will too. This might help in situations where some nodes
+      * are much slower to boot, or more subject to hardware failures or such like).
+  */
+
+#include <linux/suspend.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+
+#if 1
+#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
+#else
+#define PRINTK(a, b...) do { } while (0)
+#endif
+
+  static int loopback_mode;
+  static int num_local_nodes = 1;
+#define MAX_LOCAL_NODES 8
+#define SADDR (loopback_mode ? b->sid : h->saddr)
+
+#define MYNAME "TuxOnIce Clustering"
+
+  enum cluster_message {
+    MSG_ACK = 1,
+    MSG_NACK = 2,
+    MSG_PING = 4,
+    MSG_ABORT = 8,
+    MSG_BYE = 16,
+    MSG_HIBERNATE = 32,
+    MSG_IMAGE = 64,
+    MSG_IO = 128,
+    MSG_RUNNING = 256
+  };
+
+static char *str_message(int message)
+{
+  switch (message) {
+    case 4:
+      return "Ping";
+    case 8:
+      return "Abort";
+    case 9:
+      return "Abort acked";
+    case 10:
+      return "Abort nacked";
+    case 16:
+      return "Bye";
+    case 17:
+      return "Bye acked";
+    case 18:
+      return "Bye nacked";
+    case 32:
+      return "Hibernate request";
+    case 33:
+      return "Hibernate ack";
+    case 34:
+      return "Hibernate nack";
+    case 64:
+      return "Image exists?";
+    case 65:
+      return "Image does exist";
+    case 66:
+      return "No image here";
+    case 128:
+      return "I/O";
+    case 129:
+      return "I/O okay";
+    case 130:
+      return "I/O failed";
+    case 256:
+      return "Running";
+    default:
+      printk(KERN_ERR "Unrecognised message %d.\n", message);
+      return "Unrecognised message (see dmesg)";
+  }
+}
+
+#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
+#define MSG_STATE_MASK (~MSG_ACK_MASK)
+
+struct node_info {
+  struct list_head member_list;
+  wait_queue_head_t member_events;
+  spinlock_t member_list_lock;
+  spinlock_t receive_lock;
+  int peer_count, ignored_peer_count;
+  struct toi_sysfs_data sysfs_data;
+  enum cluster_message current_message;
+};
+
+struct node_info node_array[MAX_LOCAL_NODES];
+
+struct cluster_member {
+  __be32 addr;
+  enum cluster_message message;
+  struct list_head list;
+  int ignore;
+};
+
+#define toi_cluster_port_send 3501
+#define toi_cluster_port_recv 3502
+
+static struct net_device *net_dev;
+static struct toi_module_ops toi_cluster_ops;
+
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+    struct packet_type *pt, struct net_device *orig_dev);
+
+static struct packet_type toi_cluster_packet_type = {
+  .type =        __constant_htons(ETH_P_IP),
+  .func =        toi_recv,
+};
+
+struct toi_pkt {                /* BOOTP packet format */
+  struct iphdr iph;        /* IP header */
+  struct udphdr udph;        /* UDP header */
+  u8 htype;                /* HW address type */
+  u8 hlen;                /* HW address length */
+  __be32 xid;                /* Transaction ID */
+  __be16 secs;                /* Seconds since we started */
+  __be16 flags;                /* Just what it says */
+  u8 hw_addr[16];                /* Sender's HW address */
+  u16 message;                /* Message */
+  unsigned long sid;        /* Source ID for loopback testing */
+};
+
+static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
+
+static int added_pack;
+
+static int others_have_image;
+
+/* Key used to allow multiple clusters on the same lan */
+static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
+static char pre_hibernate_script[255] =
+CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
+static char post_hibernate_script[255] =
+CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
+
+/*                        List of cluster members                        */
+static unsigned long continue_delay = 5 * HZ;
+static unsigned long cluster_message_timeout = 3 * HZ;
+
+/*                 === Membership list ===         */
+
+static void print_member_info(int index)
+{
+  struct cluster_member *this;
+
+  printk(KERN_INFO "==> Dumping node %d.\n", index);
+
+  list_for_each_entry(this, &node_array[index].member_list, list)
+    printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
+        NIPQUAD(this->addr),
+        str_message(this->message),
+        this->ignore ? "(Ignored)" : "");
+  printk(KERN_INFO "== Done ==\n");
+}
+
+static struct cluster_member *__find_member(int index, __be32 addr)
+{
+  struct cluster_member *this;
+
+  list_for_each_entry(this, &node_array[index].member_list, list) {
+    if (this->addr != addr)
+      continue;
+
+    return this;
+  }
+
+  return NULL;
+}
+
+static void set_ignore(int index, __be32 addr, struct cluster_member *this)
+{
+  if (this->ignore) {
+    PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
+        index, NIPQUAD(addr));
+    return;
+  }
+
+  PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
+      index, NIPQUAD(addr));
+  this->ignore = 1;
+  node_array[index].ignored_peer_count++;
+}
+
+static int __add_update_member(int index, __be32 addr, int message)
+{
+  struct cluster_member *this;
+
+  this = __find_member(index, addr);
+  if (this) {
+    if (this->message != message) {
+      this->message = message;
+      if ((message & MSG_NACK) &&
+          (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+        set_ignore(index, addr, this);
+      PRINTK("Node %d sees node %d.%d.%d.%d now sending "
+          "%s.\n", index, NIPQUAD(addr),
+          str_message(message));
+      wake_up(&node_array[index].member_events);
+    }
+    return 0;
+  }
+
+  this = (struct cluster_member *) toi_kzalloc(36,
+      sizeof(struct cluster_member), GFP_KERNEL);
+
+  if (!this)
+    return -1;
+
+  this->addr = addr;
+  this->message = message;
+  this->ignore = 0;
+  INIT_LIST_HEAD(&this->list);
+
+  node_array[index].peer_count++;
+
+  PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
+      NIPQUAD(addr), str_message(message));
+
+  if ((message & MSG_NACK) &&
+      (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+    set_ignore(index, addr, this);
+  list_add_tail(&this->list, &node_array[index].member_list);
+  return 1;
+}
+
+static int add_update_member(int index, __be32 addr, int message)
+{
+  int result;
+  unsigned long flags;
+  spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+  result = __add_update_member(index, addr, message);
+  spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+
+  print_member_info(index);
+
+  wake_up(&node_array[index].member_events);
+
+  return result;
+}
+
+static void del_member(int index, __be32 addr)
+{
+  struct cluster_member *this;
+  unsigned long flags;
+
+  spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+  this = __find_member(index, addr);
+
+  if (this) {
+    list_del_init(&this->list);
+    toi_kfree(36, this, sizeof(*this));
+    node_array[index].peer_count--;
+  }
+
+  spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+/*                 === Message transmission ===        */
+
+static void toi_send_if(int message, unsigned long my_id);
+
+/*
+ *  Process received TOI packet.
+ */
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+    struct packet_type *pt, struct net_device *orig_dev)
+{
+  struct toi_pkt *b;
+  struct iphdr *h;
+  int len, result, index;
+  unsigned long addr, message, ack;
+
+  /* Perform verifications before taking the lock.  */
+  if (skb->pkt_type == PACKET_OTHERHOST)
+    goto drop;
+
+  if (dev != net_dev)
+    goto drop;
+
+  skb = skb_share_check(skb, GFP_ATOMIC);
+  if (!skb)
+    return NET_RX_DROP;
+
+  if (!pskb_may_pull(skb,
+        sizeof(struct iphdr) +
+        sizeof(struct udphdr)))
+    goto drop;
+
+  b = (struct toi_pkt *)skb_network_header(skb);
+  h = &b->iph;
+
+  if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
+    goto drop;
+
+  /* Fragments are not supported */
+  if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
+    if (net_ratelimit())
+      printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
+          "cluster message.\n");
+    goto drop;
+  }
+
+  if (skb->len < ntohs(h->tot_len))
+    goto drop;
+
+  if (ip_fast_csum((char *) h, h->ihl))
+    goto drop;
+
+  if (b->udph.source != htons(toi_cluster_port_send) ||
+      b->udph.dest != htons(toi_cluster_port_recv))
+    goto drop;
+
+  if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+    goto drop;
+
+  len = ntohs(b->udph.len) - sizeof(struct udphdr);
+
+  /* Ok the front looks good, make sure we can get at the rest.  */
+  if (!pskb_may_pull(skb, skb->len))
+    goto drop;
+
+  b = (struct toi_pkt *)skb_network_header(skb);
+  h = &b->iph;
+
+  addr = SADDR;
+  PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
+      str_message(b->message), NIPQUAD(addr));
+
+  message = b->message & MSG_STATE_MASK;
+  ack = b->message & MSG_ACK_MASK;
+
+  for (index = 0; index < num_local_nodes; index++) {
+    int new_message = node_array[index].current_message,
+        old_message = new_message;
+
+    if (index == SADDR || !old_message) {
+      PRINTK("Ignoring node %d (offline or self).\n", index);
+      continue;
+    }
+
+    /* One message at a time, please. */
+    spin_lock(&node_array[index].receive_lock);
+
+    result = add_update_member(index, SADDR, b->message);
+    if (result == -1) {
+      printk(KERN_INFO "Failed to add new cluster member "
+          NIPQUAD_FMT ".\n",
+          NIPQUAD(addr));
+      goto drop_unlock;
+    }
+
+    switch (b->message & MSG_STATE_MASK) {
+      case MSG_PING:
+        break;
+      case MSG_ABORT:
+        break;
+      case MSG_BYE:
+        break;
+      case MSG_HIBERNATE:
+        /* Can I hibernate? */
+        new_message = MSG_HIBERNATE |
+          ((index & 1) ? MSG_NACK : MSG_ACK);
+        break;
+      case MSG_IMAGE:
+        /* Can I resume? */
+        new_message = MSG_IMAGE |
+          ((index & 1) ? MSG_NACK : MSG_ACK);
+        if (new_message != old_message)
+          printk(KERN_ERR "Setting whether I can resume "
+              "to %d.\n", new_message);
+        break;
+      case MSG_IO:
+        new_message = MSG_IO | MSG_ACK;
+        break;
+      case MSG_RUNNING:
+        break;
+      default:
+        if (net_ratelimit())
+          printk(KERN_ERR "Unrecognised TuxOnIce cluster"
+              " message %d from " NIPQUAD_FMT ".\n",
+              b->message, NIPQUAD(addr));
+    };
+
+    if (old_message != new_message) {
+      node_array[index].current_message = new_message;
+      printk(KERN_INFO ">>> Sending new message for node "
+          "%d.\n", index);
+      toi_send_if(new_message, index);
+    } else if (!ack) {
+      printk(KERN_INFO ">>> Resending message for node %d.\n",
+          index);
+      toi_send_if(new_message, index);
+    }
+drop_unlock:
+    spin_unlock(&node_array[index].receive_lock);
+  };
+
+drop:
+  /* Throw the packet out. */
+  kfree_skb(skb);
+
+  return 0;
+}
+
+/*
+ *  Send cluster message to single interface.
+ */
+static void toi_send_if(int message, unsigned long my_id)
+{
+  struct sk_buff *skb;
+  struct toi_pkt *b;
+  int hh_len = LL_RESERVED_SPACE(net_dev);
+  struct iphdr *h;
+
+  /* Allocate packet */
+  skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
+  if (!skb)
+    return;
+  skb_reserve(skb, hh_len);
+  b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
+  memset(b, 0, sizeof(struct toi_pkt));
+
+  /* Construct IP header */
+  skb_reset_network_header(skb);
+  h = ip_hdr(skb);
+  h->version = 4;
+  h->ihl = 5;
+  h->tot_len = htons(sizeof(struct toi_pkt));
+  h->frag_off = htons(IP_DF);
+  h->ttl = 64;
+  h->protocol = IPPROTO_UDP;
+  h->daddr = htonl(INADDR_BROADCAST);
+  h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+  /* Construct UDP header */
+  b->udph.source = htons(toi_cluster_port_send);
+  b->udph.dest = htons(toi_cluster_port_recv);
+  b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
+  /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+  /* Construct message */
+  b->message = message;
+  b->sid = my_id;
+  b->htype = net_dev->type; /* can cause undefined behavior */
+  b->hlen = net_dev->addr_len;
+  memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
+  b->secs = htons(3); /* 3 seconds */
+
+  /* Chain packet down the line... */
+  skb->dev = net_dev;
+  skb->protocol = htons(ETH_P_IP);
+  if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
+          net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
+      dev_queue_xmit(skb) < 0)
+    printk(KERN_INFO "E");
+}
+
+/*        =========================================                */
+
+/*                        kTOICluster                        */
+
+static atomic_t num_cluster_threads;
+static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
+
+static int kTOICluster(void *data)
+{
+  unsigned long my_id;
+
+  my_id = atomic_add_return(1, &num_cluster_threads) - 1;
+  node_array[my_id].current_message = (unsigned long) data;
+
+  PRINTK("kTOICluster daemon %lu starting.\n", my_id);
+
+  current->flags |= PF_NOFREEZE;
+
+  while (node_array[my_id].current_message) {
+    toi_send_if(node_array[my_id].current_message, my_id);
+    sleep_on_timeout(&clusterd_events,
+        cluster_message_timeout);
+    PRINTK("Link state %lu is %d.\n", my_id,
+        node_array[my_id].current_message);
+  }
+
+  toi_send_if(MSG_BYE, my_id);
+  atomic_dec(&num_cluster_threads);
+  wake_up(&clusterd_events);
+
+  PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
+  __set_current_state(TASK_RUNNING);
+  return 0;
+}
+
+static void kill_clusterd(void)
+{
+  int i;
+
+  for (i = 0; i < num_local_nodes; i++) {
+    if (node_array[i].current_message) {
+      PRINTK("Seeking to kill clusterd %d.\n", i);
+      node_array[i].current_message = 0;
+    }
+  }
+  wait_event(clusterd_events,
+      !atomic_read(&num_cluster_threads));
+  PRINTK("All cluster daemons have exited.\n");
+}
+
+static int peers_not_in_message(int index, int message, int precise)
+{
+  struct cluster_member *this;
+  unsigned long flags;
+  int result = 0;
+
+  spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+  list_for_each_entry(this, &node_array[index].member_list, list) {
+    if (this->ignore)
+      continue;
+
+    PRINTK("Peer %d.%d.%d.%d sending %s. "
+        "Seeking %s.\n",
+        NIPQUAD(this->addr),
+        str_message(this->message), str_message(message));
+    if ((precise ? this->message :
+          this->message & MSG_STATE_MASK) !=
+        message)
+      result++;
+  }
+  spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+  PRINTK("%d peers in sought message.\n", result);
+  return result;
+}
+
+static void reset_ignored(int index)
+{
+  struct cluster_member *this;
+  unsigned long flags;
+
+  spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+  list_for_each_entry(this, &node_array[index].member_list, list)
+    this->ignore = 0;
+  node_array[index].ignored_peer_count = 0;
+  spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+static int peers_in_message(int index, int message, int precise)
+{
+  return node_array[index].peer_count -
+    node_array[index].ignored_peer_count -
+    peers_not_in_message(index, message, precise);
+}
+
+static int time_to_continue(int index, unsigned long start, int message)
+{
+  int first = peers_not_in_message(index, message, 0);
+  int second = peers_in_message(index, message, 1);
+
+  PRINTK("First part returns %d, second returns %d.\n", first, second);
+
+  if (!first && !second) {
+    PRINTK("All peers answered message %d.\n",
+        message);
+    return 1;
+  }
+
+  if (time_after(jiffies, start + continue_delay)) {
+    PRINTK("Timeout reached.\n");
+    return 1;
+  }
+
+  PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
+      start + continue_delay);
+  return 0;
+}
+
+void toi_initiate_cluster_hibernate(void)
+{
+  int result;
+  unsigned long start;
+
+  result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+  if (result)
+    return;
+
+  toi_send_if(MSG_HIBERNATE, 0);
+
+  start = jiffies;
+  wait_event(node_array[0].member_events,
+      time_to_continue(0, start, MSG_HIBERNATE));
+
+  if (test_action_state(TOI_FREEZER_TEST)) {
+    toi_send_if(MSG_ABORT, 0);
+
+    start = jiffies;
+    wait_event(node_array[0].member_events,
+        time_to_continue(0, start, MSG_RUNNING));
+
+    do_toi_step(STEP_QUIET_CLEANUP);
+    return;
+  }
+
+  toi_send_if(MSG_IO, 0);
+
+  result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+  if (result)
+    return;
+
+  /* This code runs at resume time too! */
+  if (toi_in_hibernate)
+    result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+}
+
+/* toi_cluster_print_debug_stats
+ *
+ * Description:        Print information to be recorded for debugging purposes into a
+ *                 buffer.
+ * Arguments:        buffer: Pointer to a buffer into which the debug info will be
+ *                         printed.
+ *                 size:        Size of the buffer.
+ * Returns:        Number of characters written to the buffer.
+ */
+static int toi_cluster_print_debug_stats(char *buffer, int size)
+{
+  int len;
+
+  if (strlen(toi_cluster_iface))
+    len = scnprintf(buffer, size,
+        "- Cluster interface is '%s'.\n",
+        toi_cluster_iface);
+  else
+    len = scnprintf(buffer, size,
+        "- Cluster support is disabled.\n");
+  return len;
+}
+
+/* cluster_memory_needed
+ *
+ * Description:        Tell the caller how much memory we need to operate during
+ *                 hibernate/resume.
+ * Returns:        Unsigned long. Maximum number of bytes of memory required for
+ *                 operation.
+ */
+static int toi_cluster_memory_needed(void)
+{
+  return 0;
+}
+
+static int toi_cluster_storage_needed(void)
+{
+  return 1 + strlen(toi_cluster_iface);
+}
+
+/* toi_cluster_save_config_info
+ *
+ * Description:        Save informaton needed when reloading the image at resume time.
+ * Arguments:        Buffer:                Pointer to a buffer of size PAGE_SIZE.
+ * Returns:        Number of bytes used for saving our data.
+ */
+static int toi_cluster_save_config_info(char *buffer)
+{
+  strcpy(buffer, toi_cluster_iface);
+  return strlen(toi_cluster_iface + 1);
+}
+
+/* toi_cluster_load_config_info
+ *
+ * Description:        Reload information needed for declustering the image at
+ *                 resume time.
+ * Arguments:        Buffer:                Pointer to the start of the data.
+ *                Size:                Number of bytes that were saved.
+ */
+static void toi_cluster_load_config_info(char *buffer, int size)
+{
+  strncpy(toi_cluster_iface, buffer, size);
+  return;
+}
+
+static void cluster_startup(void)
+{
+  int have_image = do_check_can_resume(), i;
+  unsigned long start = jiffies, initial_message;
+  struct task_struct *p;
+
+  initial_message = MSG_IMAGE;
+
+  have_image = 1;
+
+  for (i = 0; i < num_local_nodes; i++) {
+    PRINTK("Starting ktoiclusterd %d.\n", i);
+    p = kthread_create(kTOICluster, (void *) initial_message,
+        "ktoiclusterd/%d", i);
+    if (IS_ERR(p)) {
+      printk(KERN_ERR "Failed to start ktoiclusterd.\n");
+      return;
+    }
+
+    wake_up_process(p);
+  }
+
+  /* Wait for delay or someone else sending first message */
+  wait_event(node_array[0].member_events, time_to_continue(0, start,
+        MSG_IMAGE));
+
+  others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
+
+  printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
+      " %d.\n", have_image ? "" : "don't ", others_have_image);
+
+  if (have_image) {
+    int result;
+
+    /* Start to resume */
+    printk(KERN_INFO "  === Starting to resume ===  \n");
+    node_array[0].current_message = MSG_IO;
+    toi_send_if(MSG_IO, 0);
+
+    /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
+    result = 0;
+
+    if (!result) {
+      /*
+       * Atomic restore - we'll come back in the hibernation
+       * path.
+       */
+
+      /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
+      result = 0;
+
+      /* do_toi_step(STEP_QUIET_CLEANUP); */
+    }
+
+    node_array[0].current_message |= MSG_NACK;
+
+    /* For debugging - disable for real life? */
+    wait_event(node_array[0].member_events,
+        time_to_continue(0, start, MSG_IO));
+  }
+
+  if (others_have_image) {
+    /* Wait for them to resume */
+    printk(KERN_INFO "Waiting for other nodes to resume.\n");
+    start = jiffies;
+    wait_event(node_array[0].member_events,
+        time_to_continue(0, start, MSG_RUNNING));
+    if (peers_not_in_message(0, MSG_RUNNING, 0))
+      printk(KERN_INFO "Timed out while waiting for other "
+          "nodes to resume.\n");
+  }
+
+  /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
+   * as appropriate.
+   *
+   * If we don't have an image:
+   * - Wait until someone else says they have one, or conditions are met
+   *   for continuing to boot (n machines or t seconds).
+   * - If anyone has an image, wait for them to resume before continuing
+   *   to boot.
+   *
+   * If we have an image:
+   * - Wait until conditions are met before continuing to resume (n
+   *   machines or t seconds). Send RESUME_PREP and freeze processes.
+   *   NACK_PREP if freezing fails (shouldn't) and follow logic for
+   *   us having no image above. On success, wait for [N]ACK_PREP from
+   *   other machines. Read image (including atomic restore) until done.
+   *   Wait for ACK_READ from others (should never fail). Thaw processes
+   *   and do post-resume. (The section after the atomic restore is done
+   *   via the code for hibernating).
+   */
+
+  node_array[0].current_message = MSG_RUNNING;
+}
+
+/* toi_cluster_open_iface
+ *
+ * Description:        Prepare to use an interface.
+ */
+
+static int toi_cluster_open_iface(void)
+{
+  struct net_device *dev;
+
+  rtnl_lock();
+
+  for_each_netdev(&init_net, dev) {
+    if (/* dev == &init_net.loopback_dev || */
+        strcmp(dev->name, toi_cluster_iface))
+      continue;
+
+    net_dev = dev;
+    break;
+  }
+
+  rtnl_unlock();
+
+  if (!net_dev) {
+    printk(KERN_ERR MYNAME ": Device %s not found.\n",
+        toi_cluster_iface);
+    return -ENODEV;
+  }
+
+  dev_add_pack(&toi_cluster_packet_type);
+  added_pack = 1;
+
+  loopback_mode = (net_dev == init_net.loopback_dev);
+  num_local_nodes = loopback_mode ? 8 : 1;
+
+  PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
+      loopback_mode ? "on" : "off", num_local_nodes);
+
+  cluster_startup();
+  return 0;
+}
+
+/* toi_cluster_close_iface
+ *
+ * Description: Stop using an interface.
+ */
+
+static int toi_cluster_close_iface(void)
+{
+  kill_clusterd();
+  if (added_pack) {
+    dev_remove_pack(&toi_cluster_packet_type);
+    added_pack = 0;
+  }
+  return 0;
+}
+
+static void write_side_effect(void)
+{
+  if (toi_cluster_ops.enabled) {
+    toi_cluster_open_iface();
+    set_toi_state(TOI_CLUSTER_MODE);
+  } else {
+    toi_cluster_close_iface();
+    clear_toi_state(TOI_CLUSTER_MODE);
+  }
+}
+
+static void node_write_side_effect(void)
+{
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
+      NULL),
+  SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
+      write_side_effect),
+  SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
+  SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
+      256, 0, NULL),
+  SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
+      256, 0, STRING),
+  SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
+      0)
+};
+
+/*
+ * Ops structure.
+ */
+
+static struct toi_module_ops toi_cluster_ops = {
+  .type                        = FILTER_MODULE,
+  .name                        = "Cluster",
+  .directory                = "cluster",
+  .module                        = THIS_MODULE,
+  .memory_needed                 = toi_cluster_memory_needed,
+  .print_debug_info        = toi_cluster_print_debug_stats,
+  .save_config_info        = toi_cluster_save_config_info,
+  .load_config_info        = toi_cluster_load_config_info,
+  .storage_needed                = toi_cluster_storage_needed,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+#ifdef MODULE
+#define INIT static __init
+#define EXIT static __exit
+#else
+#define INIT
+#define EXIT
+#endif
+
+INIT int toi_cluster_init(void)
+{
+  int temp = toi_register_module(&toi_cluster_ops), i;
+  struct kobject *kobj = toi_cluster_ops.dir_kobj;
+
+  for (i = 0; i < MAX_LOCAL_NODES; i++) {
+    node_array[i].current_message = 0;
+    INIT_LIST_HEAD(&node_array[i].member_list);
+    init_waitqueue_head(&node_array[i].member_events);
+    spin_lock_init(&node_array[i].member_list_lock);
+    spin_lock_init(&node_array[i].receive_lock);
+
+    /* Set up sysfs entry */
+    node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
+        sizeof(node_array[i].sysfs_data.attr.name),
+        GFP_KERNEL);
+    sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
+        i);
+    node_array[i].sysfs_data.attr.mode = SYSFS_RW;
+    node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
+    node_array[i].sysfs_data.flags = 0;
+    node_array[i].sysfs_data.data.integer.variable =
+      (int *) &node_array[i].current_message;
+    node_array[i].sysfs_data.data.integer.minimum = 0;
+    node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
+    node_array[i].sysfs_data.write_side_effect =
+      node_write_side_effect;
+    toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
+  }
+
+  toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
+
+  if (toi_cluster_ops.enabled)
+    toi_cluster_open_iface();
+
+  return temp;
+}
+
+EXIT void toi_cluster_exit(void)
+{
+  int i;
+  toi_cluster_close_iface();
+
+  for (i = 0; i < MAX_LOCAL_NODES; i++)
+    toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
+        &node_array[i].sysfs_data);
+  toi_unregister_module(&toi_cluster_ops);
+}
+
+static int __init toi_cluster_iface_setup(char *iface)
+{
+  toi_cluster_ops.enabled = (*iface &&
+      strcmp(iface, "off"));
+
+  if (toi_cluster_ops.enabled)
+    strncpy(toi_cluster_iface, iface, strlen(iface));
+}
+
+__setup("toi_cluster=", toi_cluster_iface_setup);
diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h
new file mode 100644
index 000000000..84356b304
--- /dev/null
+++ b/kernel/power/tuxonice_cluster.h
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_cluster.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_TOI_CLUSTER
+extern int toi_cluster_init(void);
+extern void toi_cluster_exit(void);
+extern void toi_initiate_cluster_hibernate(void);
+#else
+static inline int toi_cluster_init(void) { return 0; }
+static inline void toi_cluster_exit(void) { }
+static inline void toi_initiate_cluster_hibernate(void) { }
+#endif
+
diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c
new file mode 100644
index 000000000..482559362
--- /dev/null
+++ b/kernel/power/tuxonice_compress.c
@@ -0,0 +1,452 @@
+/*
+ * kernel/power/compression.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data compression routines for TuxOnIce,
+ * using cryptoapi.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+static int toi_expected_compression;
+
+static struct toi_module_ops toi_compression_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_compressor_name[32] = "lzo";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+  u8 *page_buffer;
+  struct crypto_comp *transform;
+  unsigned int len;
+  u8 *buffer_start;
+  u8 *output_buffer;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_compress_crypto_prepare(void)
+{
+  int cpu;
+
+  if (!*toi_compressor_name) {
+    printk(KERN_INFO "TuxOnIce: Compression enabled but no "
+        "compressor name set.\n");
+    return 1;
+  }
+
+  for_each_online_cpu(cpu) {
+    struct cpu_context *this = &per_cpu(contexts, cpu);
+    this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
+    if (IS_ERR(this->transform)) {
+      printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+          "%s compression transform.\n",
+          toi_compressor_name);
+      this->transform = NULL;
+      return 1;
+    }
+
+    this->page_buffer =
+      (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
+
+    if (!this->page_buffer) {
+      printk(KERN_ERR
+          "Failed to allocate a page buffer for TuxOnIce "
+          "compression driver.\n");
+      return -ENOMEM;
+    }
+
+    this->output_buffer =
+      (char *) vmalloc_32(OUT_BUF_SIZE);
+
+    if (!this->output_buffer) {
+      printk(KERN_ERR
+          "Failed to allocate a output buffer for TuxOnIce "
+          "compression driver.\n");
+      return -ENOMEM;
+    }
+  }
+
+  return 0;
+}
+
+static int toi_compress_rw_cleanup(int writing)
+{
+  int cpu;
+
+  for_each_online_cpu(cpu) {
+    struct cpu_context *this = &per_cpu(contexts, cpu);
+    if (this->transform) {
+      crypto_free_comp(this->transform);
+      this->transform = NULL;
+    }
+
+    if (this->page_buffer)
+      toi_free_page(16, (unsigned long) this->page_buffer);
+
+    this->page_buffer = NULL;
+
+    if (this->output_buffer)
+      vfree(this->output_buffer);
+
+    this->output_buffer = NULL;
+  }
+
+  return 0;
+}
+
+/*
+ * toi_compress_init
+ */
+
+static int toi_compress_init(int toi_or_resume)
+{
+  if (!toi_or_resume)
+    return 0;
+
+  toi_compress_bytes_in = 0;
+  toi_compress_bytes_out = 0;
+
+  next_driver = toi_get_next_filter(&toi_compression_ops);
+
+  return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_compress_rw_init()
+ */
+
+static int toi_compress_rw_init(int rw, int stream_number)
+{
+  if (toi_compress_crypto_prepare()) {
+    printk(KERN_ERR "Failed to initialise compression "
+        "algorithm.\n");
+    if (rw == READ) {
+      printk(KERN_INFO "Unable to read the image.\n");
+      return -ENODEV;
+    } else {
+      printk(KERN_INFO "Continuing without "
+          "compressing the image.\n");
+      toi_compression_ops.enabled = 0;
+    }
+  }
+
+  return 0;
+}
+
+/*
+ * toi_compress_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be compressed.
+ *
+ * Returns:        0 on success. Otherwise the error is that returned by later
+ *                 modules, -ECHILD if we have a broken pipeline or -EIO if
+ *                 zlib errs.
+ */
+static int toi_compress_write_page(unsigned long index, int buf_type,
+    void *buffer_page, unsigned int buf_size)
+{
+  int ret = 0, cpu = smp_processor_id();
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+  u8* output_buffer = buffer_page;
+  int output_len = buf_size;
+  int out_buf_type = buf_type;
+
+  if (ctx->transform) {
+
+    ctx->buffer_start = TOI_MAP(buf_type, buffer_page);
+    ctx->len = OUT_BUF_SIZE;
+
+    ret = crypto_comp_compress(ctx->transform,
+        ctx->buffer_start, buf_size,
+        ctx->output_buffer, &ctx->len);
+
+    TOI_UNMAP(buf_type, buffer_page);
+
+    toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+        "CPU %d, index %lu: %d bytes",
+        cpu, index, ctx->len);
+
+    if (!ret && ctx->len < buf_size) { /* some compression */
+      output_buffer = ctx->output_buffer;
+      output_len = ctx->len;
+      out_buf_type = TOI_VIRT;
+    }
+
+  }
+
+  mutex_lock(&stats_lock);
+
+  toi_compress_bytes_in += buf_size;
+  toi_compress_bytes_out += output_len;
+
+  mutex_unlock(&stats_lock);
+
+  if (!ret)
+    ret = next_driver->write_page(index, out_buf_type,
+        output_buffer, output_len);
+
+  return ret;
+}
+
+/*
+ * toi_compress_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules and decompress it until the input buffer
+ * is filled.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_compress_read_page(unsigned long *index, int buf_type,
+    void *buffer_page, unsigned int *buf_size)
+{
+  int ret, cpu = smp_processor_id();
+  unsigned int len;
+  unsigned int outlen = PAGE_SIZE;
+  char *buffer_start;
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+  if (!ctx->transform)
+    return next_driver->read_page(index, TOI_PAGE, buffer_page,
+        buf_size);
+
+  /*
+   * All our reads must be synchronous - we can't decompress
+   * data that hasn't been read yet.
+   */
+
+  ret = next_driver->read_page(index, TOI_VIRT, ctx->page_buffer, &len);
+
+  buffer_start = kmap(buffer_page);
+
+  /* Error or uncompressed data */
+  if (ret || len == PAGE_SIZE) {
+    memcpy(buffer_start, ctx->page_buffer, len);
+    goto out;
+  }
+
+  ret = crypto_comp_decompress(
+      ctx->transform,
+      ctx->page_buffer,
+      len, buffer_start, &outlen);
+
+  toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+      "CPU %d, index %lu: %d=>%d (%d).",
+      cpu, *index, len, outlen, ret);
+
+  if (ret)
+    abort_hibernate(TOI_FAILED_IO,
+        "Compress_read returned %d.\n", ret);
+  else if (outlen != PAGE_SIZE) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Decompression yielded %d bytes instead of %ld.\n",
+        outlen, PAGE_SIZE);
+    printk(KERN_ERR "Decompression yielded %d bytes instead of "
+        "%ld.\n", outlen, PAGE_SIZE);
+    ret = -EIO;
+    *buf_size = outlen;
+  }
+out:
+  TOI_UNMAP(buf_type, buffer_page);
+  return ret;
+}
+
+/*
+ * toi_compress_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_compress_print_debug_stats(char *buffer, int size)
+{
+  unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
+                pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
+  int len;
+
+  /* Output the compression ratio achieved. */
+  if (*toi_compressor_name)
+    len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+        toi_compressor_name);
+  else
+    len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+  if (pages_in)
+    len += scnprintf(buffer+len, size - len, "  Compressed "
+        "%lu bytes into %lu (%ld percent compression).\n",
+        toi_compress_bytes_in,
+        toi_compress_bytes_out,
+        (pages_in - pages_out) * 100 / pages_in);
+  return len;
+}
+
+/*
+ * toi_compress_compression_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_compress_memory_needed(void)
+{
+  return 2 * PAGE_SIZE;
+}
+
+static int toi_compress_storage_needed(void)
+{
+  return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+    strlen(toi_compressor_name) + 1;
+}
+
+/*
+ * toi_compress_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_compress_save_config_info(char *buffer)
+{
+  int len = strlen(toi_compressor_name) + 1, offset = 0;
+
+  *((unsigned long *) buffer) = toi_compress_bytes_in;
+  offset += sizeof(unsigned long);
+  *((unsigned long *) (buffer + offset)) = toi_compress_bytes_out;
+  offset += sizeof(unsigned long);
+  *((int *) (buffer + offset)) = toi_expected_compression;
+  offset += sizeof(int);
+  *((int *) (buffer + offset)) = len;
+  offset += sizeof(int);
+  strncpy(buffer + offset, toi_compressor_name, len);
+  return offset + len;
+}
+
+/* toi_compress_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for decompressing the image at
+ * resume time.
+ */
+static void toi_compress_load_config_info(char *buffer, int size)
+{
+  int len, offset = 0;
+
+  toi_compress_bytes_in = *((unsigned long *) buffer);
+  offset += sizeof(unsigned long);
+  toi_compress_bytes_out = *((unsigned long *) (buffer + offset));
+  offset += sizeof(unsigned long);
+  toi_expected_compression = *((int *) (buffer + offset));
+  offset += sizeof(int);
+  len = *((int *) (buffer + offset));
+  offset += sizeof(int);
+  strncpy(toi_compressor_name, buffer + offset, len);
+}
+
+static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+  bkd->compress_bytes_in = toi_compress_bytes_in;
+  bkd->compress_bytes_out = toi_compress_bytes_out;
+}
+
+static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+  toi_compress_bytes_in = bkd->compress_bytes_in;
+  toi_compress_bytes_out = bkd->compress_bytes_out;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Description:        Returns the expected ratio between data passed into this module
+ *                 and the amount of data output when writing.
+ * Returns:        100 if the module is disabled. Otherwise the value set by the
+ *                 user via our sysfs entry.
+ */
+
+static int toi_compress_expected_ratio(void)
+{
+  if (!toi_compression_ops.enabled)
+    return 100;
+  else
+    return 100 - toi_expected_compression;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
+      0, 99, 0, NULL),
+  SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
+      NULL),
+  SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_compression_ops = {
+  .type                        = FILTER_MODULE,
+  .name                        = "compression",
+  .directory                = "compression",
+  .module                        = THIS_MODULE,
+  .initialise                = toi_compress_init,
+  .memory_needed                 = toi_compress_memory_needed,
+  .print_debug_info        = toi_compress_print_debug_stats,
+  .save_config_info        = toi_compress_save_config_info,
+  .load_config_info        = toi_compress_load_config_info,
+  .storage_needed                = toi_compress_storage_needed,
+  .expected_compression        = toi_compress_expected_ratio,
+
+  .pre_atomic_restore        = toi_compress_pre_atomic_restore,
+  .post_atomic_restore        = toi_compress_post_atomic_restore,
+
+  .rw_init                = toi_compress_rw_init,
+  .rw_cleanup                = toi_compress_rw_cleanup,
+
+  .write_page                = toi_compress_write_page,
+  .read_page                = toi_compress_read_page,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_compress_load(void)
+{
+  return toi_register_module(&toi_compression_ops);
+}
+
+late_initcall(toi_compress_load);
diff --git a/kernel/power/tuxonice_copy_before_write.c b/kernel/power/tuxonice_copy_before_write.c
new file mode 100644
index 000000000..294e7c073
--- /dev/null
+++ b/kernel/power/tuxonice_copy_before_write.c
@@ -0,0 +1,240 @@
+/*
+ * kernel/power/tuxonice_copy_before_write.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines (apart from the fault handling code) to deal with allocating memory
+ * for copying pages before they are modified, restoring the contents and getting
+ * the contents written to disk.
+ */
+
+#include <linux/percpu-defs.h>
+#include <linux/sched.h>
+#include <linux/tuxonice.h>
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+DEFINE_PER_CPU(struct toi_cbw_state, toi_cbw_states);
+#define CBWS_PER_PAGE (PAGE_SIZE / sizeof(struct toi_cbw))
+#define toi_cbw_pool_size 100
+
+static void _toi_free_cbw_data(struct toi_cbw_state *state)
+{
+  struct toi_cbw *page_ptr, *ptr, *next;
+
+  page_ptr = ptr = state->first;
+
+  while(ptr) {
+    next = ptr->next;
+
+    if (ptr->virt) {
+      toi__free_page(40, virt_to_page(ptr->virt));
+    }
+    if ((((unsigned long) ptr) & PAGE_MASK) != (unsigned long) page_ptr) {
+      /* Must be on a new page - free the previous one. */
+      toi__free_page(40, virt_to_page(page_ptr));
+      page_ptr = ptr;
+    }
+    ptr = next;
+  }
+
+  if (page_ptr) {
+    toi__free_page(40, virt_to_page(page_ptr));
+  }
+
+  state->first = state->next = state->last = NULL;
+  state->size = 0;
+}
+
+void toi_free_cbw_data(void)
+{
+  int i;
+
+  for_each_online_cpu(i) {
+    struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+    if (!state->first)
+      continue;
+
+    state->enabled = 0;
+
+    while (state->active) {
+      schedule();
+    }
+
+    _toi_free_cbw_data(state);
+  }
+}
+
+static int _toi_allocate_cbw_data(struct toi_cbw_state *state)
+{
+  while(state->size < toi_cbw_pool_size) {
+    int i;
+    struct toi_cbw *ptr;
+
+    ptr = (struct toi_cbw *) toi_get_zeroed_page(40, GFP_KERNEL);
+
+    if (!ptr) {
+      return -ENOMEM;
+    }
+
+    if (!state->first) {
+      state->first = state->next = state->last = ptr;
+    }
+
+    for (i = 0; i < CBWS_PER_PAGE; i++) {
+      struct toi_cbw *cbw = &ptr[i];
+
+      cbw->virt = (char *) toi_get_zeroed_page(40, GFP_KERNEL);
+      if (!cbw->virt) {
+        state->size += i;
+        printk("Out of memory allocating CBW pages.\n");
+        return -ENOMEM;
+      }
+
+      if (cbw == state->first)
+        continue;
+
+      state->last->next = cbw;
+      state->last = cbw;
+    }
+
+    state->size += CBWS_PER_PAGE;
+  }
+
+  state->enabled = 1;
+
+  return 0;
+}
+
+
+int toi_allocate_cbw_data(void)
+{
+  int i, result;
+
+  for_each_online_cpu(i) {
+    struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+    result = _toi_allocate_cbw_data(state);
+
+    if (result)
+      return result;
+  }
+
+  return 0;
+}
+
+void toi_cbw_restore(void)
+{
+  if (!toi_keeping_image)
+    return;
+
+}
+
+void toi_cbw_write(void)
+{
+  if (!toi_keeping_image)
+    return;
+
+}
+
+/**
+ * toi_cbw_test_read - Test copy before write on one page
+ *
+ * Allocate copy before write buffers, then make one page only copy-before-write
+ * and attempt to write to it. We should then be able to retrieve the original
+ * version from the cbw buffer and the modified version from the page itself.
+ */
+static int toi_cbw_test_read(const char *buffer, int count)
+{
+  unsigned long virt = toi_get_zeroed_page(40, GFP_KERNEL);
+  char *original = "Original contents";
+  char *modified = "Modified material";
+  struct page *page = virt_to_page(virt);
+  int i, len = 0, found = 0, pfn = page_to_pfn(page);
+
+  if (!page) {
+    printk("toi_cbw_test_read: Unable to allocate a page for testing.\n");
+    return -ENOMEM;
+  }
+
+  memcpy((char *) virt, original, strlen(original));
+
+  if (toi_allocate_cbw_data()) {
+    printk("toi_cbw_test_read: Unable to allocate cbw data.\n");
+    return -ENOMEM;
+  }
+
+  toi_reset_dirtiness_one(pfn, 0);
+
+  SetPageTOI_CBW(page);
+
+  memcpy((char *) virt, modified, strlen(modified));
+
+  if (strncmp((char *) virt, modified, strlen(modified))) {
+    len += sprintf((char *) buffer + len, "Failed to write to page after protecting it.\n");
+  }
+
+  for_each_online_cpu(i) {
+    struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+    struct toi_cbw *ptr = state->first, *last_ptr = ptr;
+
+    if (!found) {
+      while (ptr) {
+        if (ptr->pfn == pfn) {
+          found = 1;
+          if (strncmp(ptr->virt, original, strlen(original))) {
+            len += sprintf((char *) buffer + len, "Contents of original buffer are not original.\n");
+          } else {
+            len += sprintf((char *) buffer + len, "Test passed. Buffer changed and original contents preserved.\n");
+          }
+          break;
+        }
+
+        last_ptr = ptr;
+        ptr = ptr->next;
+      }
+    }
+
+    if (!last_ptr)
+      len += sprintf((char *) buffer + len, "All available CBW buffers on cpu %d used.\n", i);
+  }
+
+  if (!found)
+    len += sprintf((char *) buffer + len, "Copy before write buffer not found.\n");
+
+  toi_free_cbw_data();
+
+  return len;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_CUSTOM("test", SYSFS_RW, toi_cbw_test_read,
+      NULL, SYSFS_NEEDS_SM_FOR_READ, NULL),
+};
+
+static struct toi_module_ops toi_cbw_ops = {
+  .type                                        = MISC_HIDDEN_MODULE,
+  .name                                        = "copy_before_write debugging",
+  .directory                                = "cbw",
+  .module                                        = THIS_MODULE,
+  .early                                        = 1,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+int toi_cbw_init(void)
+{
+  int result = toi_register_module(&toi_cbw_ops);
+  return result;
+}
diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c
new file mode 100644
index 000000000..fdb9e6c82
--- /dev/null
+++ b/kernel/power/tuxonice_extent.c
@@ -0,0 +1,144 @@
+/*
+ * kernel/power/tuxonice_extent.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * These functions encapsulate the manipulation of storage metadata.
+ */
+
+#include <linux/suspend.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+
+/**
+ * toi_get_extent - return a free extent
+ *
+ * May fail, returning NULL instead.
+ **/
+static struct hibernate_extent *toi_get_extent(void)
+{
+  return (struct hibernate_extent *) toi_kzalloc(2,
+      sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
+}
+
+/**
+ * toi_put_extent_chain - free a chain of extents starting from value 'from'
+ * @chain:        Chain to free.
+ *
+ * Note that 'from' is an extent value, and may be part way through an extent.
+ * In this case, the extent should be truncated (if necessary) and following
+ * extents freed.
+ **/
+void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from)
+{
+  struct hibernate_extent *this;
+
+  this = chain->first;
+
+  while (this) {
+    struct hibernate_extent *next = this->next;
+
+    // Delete the whole extent?
+    if (this->start >= from) {
+      chain->size -= (this->end - this->start + 1);
+      if (chain->first == this)
+        chain->first = next;
+      if (chain->last_touched == this)
+        chain->last_touched = NULL;
+      if (chain->current_extent == this)
+        chain->current_extent = NULL;
+      toi_kfree(2, this, sizeof(*this));
+      chain->num_extents--;
+    } else if (this->end >= from) {
+      // Delete part of the extent
+      chain->size -= (this->end - from + 1);
+      this->start = from;
+    }
+    this = next;
+  }
+}
+
+/**
+ * toi_put_extent_chain - free a whole chain of extents
+ * @chain:        Chain to free.
+ **/
+void toi_put_extent_chain(struct hibernate_extent_chain *chain)
+{
+  toi_put_extent_chain_from(chain, 0);
+}
+
+/**
+ * toi_add_to_extent_chain - add an extent to an existing chain
+ * @chain:        Chain to which the extend should be added
+ * @start:        Start of the extent (first physical block)
+ * @end:        End of the extent (last physical block)
+ *
+ * The chain information is updated if the insertion is successful.
+ **/
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+    unsigned long start, unsigned long end)
+{
+  struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0,
+      "Adding extent %lu-%lu to chain %p.\n", start, end, chain);
+
+  /* Find the right place in the chain */
+  if (chain->last_touched && chain->last_touched->start < start)
+    cur_ext = chain->last_touched;
+  else if (chain->first && chain->first->start < start)
+    cur_ext = chain->first;
+
+  if (cur_ext) {
+    while (cur_ext->next && cur_ext->next->start < start)
+      cur_ext = cur_ext->next;
+
+    if (cur_ext->end == (start - 1)) {
+      struct hibernate_extent *next_ext = cur_ext->next;
+      cur_ext->end = end;
+
+      /* Merge with the following one? */
+      if (next_ext && cur_ext->end + 1 == next_ext->start) {
+        cur_ext->end = next_ext->end;
+        cur_ext->next = next_ext->next;
+        toi_kfree(2, next_ext, sizeof(*next_ext));
+        chain->num_extents--;
+      }
+
+      chain->last_touched = cur_ext;
+      chain->size += (end - start + 1);
+
+      return 0;
+    }
+  }
+
+  new_ext = toi_get_extent();
+  if (!new_ext) {
+    printk(KERN_INFO "Error unable to append a new extent to the "
+        "chain.\n");
+    return -ENOMEM;
+  }
+
+  chain->num_extents++;
+  chain->size += (end - start + 1);
+  new_ext->start = start;
+  new_ext->end = end;
+
+  chain->last_touched = new_ext;
+
+  if (cur_ext) {
+    new_ext->next = cur_ext->next;
+    cur_ext->next = new_ext;
+  } else {
+    if (chain->first)
+      new_ext->next = chain->first;
+    chain->first = new_ext;
+  }
+
+  return 0;
+}
diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h
new file mode 100644
index 000000000..f38a56193
--- /dev/null
+++ b/kernel/power/tuxonice_extent.h
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_extent.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations related to extents. Extents are
+ * TuxOnIce's method of storing some of the metadata for the image.
+ * See tuxonice_extent.c for more info.
+ *
+ */
+
+#include "tuxonice_modules.h"
+
+#ifndef EXTENT_H
+#define EXTENT_H
+
+struct hibernate_extent {
+  unsigned long start, end;
+  struct hibernate_extent *next;
+};
+
+struct hibernate_extent_chain {
+  unsigned long size; /* size of the chain ie sum (max-min+1) */
+  int num_extents;
+  struct hibernate_extent *first, *last_touched;
+  struct hibernate_extent *current_extent;
+  unsigned long current_offset;
+};
+
+/* Simplify iterating through all the values in an extent chain */
+#define toi_extent_for_each(extent_chain, extentpointer, value) \
+  if ((extent_chain)->first) \
+for ((extentpointer) = (extent_chain)->first, (value) = \
+    (extentpointer)->start; \
+    ((extentpointer) && ((extentpointer)->next || (value) <= \
+      (extentpointer)->end)); \
+    (((value) == (extentpointer)->end) ? \
+     ((extentpointer) = (extentpointer)->next, (value) = \
+      ((extentpointer) ? (extentpointer)->start : 0)) : \
+     (value)++))
+
+extern void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from);
+#endif
diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c
new file mode 100644
index 000000000..36e5fce15
--- /dev/null
+++ b/kernel/power/tuxonice_file.c
@@ -0,0 +1,484 @@
+/*
+ * kernel/power/tuxonice_file.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of a simple file as a
+ * backing store. It is based upon the swapallocator, and shares the
+ * same basic working. Here, though, we have nothing to do with
+ * swapspace, and only one device to worry about.
+ *
+ * The user can just
+ *
+ * echo TuxOnIce > /path/to/my_file
+ *
+ * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
+ *
+ * and
+ *
+ * echo /path/to/my_file > /sys/power/tuxonice/file/target
+ *
+ * then put what they find in /sys/power/tuxonice/resume
+ * as their resume= parameter in lilo.conf (and rerun lilo if using it).
+ *
+ * Having done this, they're ready to hibernate and resume.
+ *
+ * TODO:
+ * - File resizing.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+
+#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
+
+static struct toi_module_ops toi_fileops;
+
+static struct file *target_file;
+static struct block_device *toi_file_target_bdev;
+static unsigned long pages_available, pages_allocated;
+static char toi_file_target[256];
+static struct inode *target_inode;
+static int file_target_priority;
+static int used_devt;
+static int target_claim;
+static dev_t toi_file_dev_t;
+static int sig_page_index;
+
+/* For test_toi_file_target */
+static struct toi_bdev_info *file_chain;
+
+static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num)
+{
+  int j;
+  sector_t last = 0;
+
+  for (j = 0; j < dev_info->blocks_per_page; j++) {
+    sector_t this = bmap(target_inode,
+        page_num * dev_info->blocks_per_page + j);
+
+    if (!this || (last && (last + 1) != this))
+      break;
+
+    last = this;
+  }
+
+  return j == dev_info->blocks_per_page;
+}
+
+static unsigned long get_usable_pages(struct toi_bdev_info *dev_info)
+{
+  unsigned long result = 0;
+  struct block_device *bdev = dev_info->bdev;
+  int i;
+
+  switch (target_inode->i_mode & S_IFMT) {
+    case S_IFSOCK:
+    case S_IFCHR:
+    case S_IFIFO: /* Socket, Char, Fifo */
+      return -1;
+    case S_IFREG: /* Regular file: current size - holes + free
+                     space on part */
+      for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) {
+        if (has_contiguous_blocks(dev_info, i))
+          result++;
+      }
+      break;
+    case S_IFBLK: /* Block device */
+      if (!bdev->bd_disk) {
+        toi_message(TOI_IO, TOI_VERBOSE, 0,
+            "bdev->bd_disk null.");
+        return 0;
+      }
+
+      result = (bdev->bd_part ?
+          bdev->bd_part->nr_sects :
+          get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
+  }
+
+
+  return result;
+}
+
+static int toi_file_register_storage(void)
+{
+  struct toi_bdev_info *devinfo;
+  int result = 0;
+  struct fs_info *fs_info;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage.");
+  if (!strlen(toi_file_target)) {
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: "
+        "No target filename set.");
+    return 0;
+  }
+
+  target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0);
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.",
+      toi_file_target, target_file);
+
+  if (IS_ERR(target_file) || !target_file) {
+    target_file = NULL;
+    toi_file_dev_t = name_to_dev_t(toi_file_target);
+    if (!toi_file_dev_t) {
+      struct kstat stat;
+      int error = vfs_stat(toi_file_target, &stat);
+      printk(KERN_INFO "Open file %s returned %p and "
+          "name_to_devt failed.\n",
+          toi_file_target, target_file);
+      if (error) {
+        printk(KERN_INFO "Stating the file also failed."
+            " Nothing more we can do.\n");
+        return 0;
+      } else
+        toi_file_dev_t = stat.rdev;
+    }
+
+    toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t);
+    if (IS_ERR(toi_file_target_bdev)) {
+      printk(KERN_INFO "Got a dev_num (%lx) but failed to "
+          "open it.\n",
+          (unsigned long) toi_file_dev_t);
+      toi_file_target_bdev = NULL;
+      return 0;
+    }
+    used_devt = 1;
+    target_inode = toi_file_target_bdev->bd_inode;
+  } else
+    target_inode = target_file->f_mapping->host;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target.");
+  if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
+      S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
+    printk(KERN_INFO "File support works with regular files,"
+        " character files and block devices.\n");
+    /* Cleanup routine will undo the above */
+    return 0;
+  }
+
+  if (!used_devt) {
+    if (S_ISBLK(target_inode->i_mode)) {
+      toi_file_target_bdev = I_BDEV(target_inode);
+      if (!blkdev_get(toi_file_target_bdev, FMODE_WRITE |
+            FMODE_READ, NULL))
+        target_claim = 1;
+    } else
+      toi_file_target_bdev = target_inode->i_sb->s_bdev;
+    if (!toi_file_target_bdev) {
+      printk(KERN_INFO "%s is not a valid file allocator "
+          "target.\n", toi_file_target);
+      return 0;
+    }
+    toi_file_dev_t = toi_file_target_bdev->bd_dev;
+  }
+
+  devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC);
+  if (!devinfo) {
+    printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n");
+    return -ENOMEM;
+  }
+
+  devinfo->bdev = toi_file_target_bdev;
+  devinfo->allocator = &toi_fileops;
+  devinfo->allocator_index = 0;
+
+  fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+  if (fs_info && !IS_ERR(fs_info)) {
+    memcpy(devinfo->uuid, &fs_info->uuid, 16);
+    free_fs_info(fs_info);
+  } else
+    result = (int) PTR_ERR(fs_info);
+
+  /* Unlike swap code, only complain if fs_info_from_block_dev returned
+   * -ENOMEM. The 'file' might be a full partition, so might validly not
+   * have an identifiable type, UUID etc.
+   */
+  if (result)
+    printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n",
+        result);
+  devinfo->dev_t = toi_file_dev_t;
+  devinfo->prio = file_target_priority;
+  devinfo->bmap_shift = target_inode->i_blkbits - 9;
+  devinfo->blocks_per_page =
+    (1 << (PAGE_SHIFT - target_inode->i_blkbits));
+  sprintf(devinfo->name, "file %s", toi_file_target);
+  file_chain = devinfo;
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap "
+      "shift is %d. Blocks per page %d.",
+      devinfo->dev_t, devinfo->prio, devinfo->bmap_shift,
+      devinfo->blocks_per_page);
+
+  /* Keep one aside for the signature */
+  pages_available = get_usable_pages(devinfo) - 1;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu "
+      "pages.", pages_available);
+
+  toi_bio_ops.register_storage(devinfo);
+  return 0;
+}
+
+static unsigned long toi_file_storage_available(void)
+{
+  return pages_available;
+}
+
+static int toi_file_allocate_storage(struct toi_bdev_info *chain,
+    unsigned long request)
+{
+  unsigned long available = pages_available - pages_allocated;
+  unsigned long to_add = min(available, request);
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated "
+      "is %lu. Allocating %lu pages from file.",
+      pages_available, pages_allocated, to_add);
+  pages_allocated += to_add;
+
+  return to_add;
+}
+
+/**
+ * __populate_block_list - add an extent to the chain
+ * @min:        Start of the extent (first physical block = sector)
+ * @max:        End of the extent (last physical block = sector)
+ *
+ * If TOI_TEST_BIO is set, print a debug message, outputting the min and max
+ * fs block numbers.
+ **/
+static int __populate_block_list(struct toi_bdev_info *chain, int min, int max)
+{
+  if (test_action_state(TOI_TEST_BIO))
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.",
+        min << chain->bmap_shift,
+        ((max + 1) << chain->bmap_shift) - 1);
+
+  return toi_add_to_extent_chain(&chain->blocks, min, max);
+}
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+  int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0;
+  unsigned long pages_mapped = 0;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks.");
+
+  if (chain->blocks.first)
+    toi_put_extent_chain(&chain->blocks);
+
+  if (!target_is_normal_file()) {
+    result = (pages_available > 0) ?
+      __populate_block_list(chain, chain->blocks_per_page,
+          (pages_allocated + 1) *
+          chain->blocks_per_page - 1) : 0;
+    return result;
+  }
+
+  /*
+   * FIXME: We are assuming the first page is contiguous. Is that
+   * assumption always right?
+   */
+
+  for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
+    sector_t new_sector;
+
+    if (!has_contiguous_blocks(chain, i))
+      continue;
+
+    if (!have_sig_page) {
+      have_sig_page = 1;
+      sig_page_index = i;
+      continue;
+    }
+
+    pages_mapped++;
+
+    /* Ignore first page - it has the header */
+    if (pages_mapped == 1)
+      continue;
+
+    new_sector = bmap(target_inode, (i * chain->blocks_per_page));
+
+    /*
+     * I'd love to be able to fill in holes and resize
+     * files, but not yet...
+     */
+
+    if (new_sector == extent_max + 1)
+      extent_max += chain->blocks_per_page;
+    else {
+      if (extent_min > -1) {
+        result = __populate_block_list(chain,
+            extent_min, extent_max);
+        if (result)
+          return result;
+      }
+
+      extent_min = new_sector;
+      extent_max = extent_min +
+        chain->blocks_per_page - 1;
+    }
+
+    if (pages_mapped == pages_allocated)
+      break;
+  }
+
+  if (extent_min > -1) {
+    result = __populate_block_list(chain, extent_min, extent_max);
+    if (result)
+      return result;
+  }
+
+  return 0;
+}
+
+static void toi_file_free_storage(struct toi_bdev_info *chain)
+{
+  pages_allocated = 0;
+  file_chain = NULL;
+}
+
+/**
+ * toi_file_print_debug_stats - print debug info
+ * @buffer:        Buffer to data to populate
+ * @size:        Size of the buffer
+ **/
+static int toi_file_print_debug_stats(char *buffer, int size)
+{
+  int len = scnprintf(buffer, size, "- File Allocator active.\n");
+
+  len += scnprintf(buffer+len, size-len, "  Storage available for "
+      "image: %lu pages.\n", pages_available);
+
+  return len;
+}
+
+static void toi_file_cleanup(int finishing_cycle)
+{
+  if (toi_file_target_bdev) {
+    if (target_claim) {
+      blkdev_put(toi_file_target_bdev, FMODE_WRITE | FMODE_READ);
+      target_claim = 0;
+    }
+
+    if (used_devt) {
+      blkdev_put(toi_file_target_bdev,
+          FMODE_READ | FMODE_NDELAY);
+      used_devt = 0;
+    }
+    toi_file_target_bdev = NULL;
+    target_inode = NULL;
+  }
+
+  if (target_file) {
+    filp_close(target_file, NULL);
+    target_file = NULL;
+  }
+
+  pages_available = 0;
+}
+
+/**
+ * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target
+ *
+ * Test wheter the target file is valid for hibernating.
+ **/
+static void test_toi_file_target(void)
+{
+  int result = toi_file_register_storage();
+  sector_t sector;
+  char buf[50];
+  struct fs_info *fs_info;
+
+  if (result || !file_chain)
+    return;
+
+  /* This doesn't mean we're in business. Is any storage available? */
+  if (!pages_available)
+    goto out;
+
+  toi_file_allocate_storage(file_chain, 1);
+  result = get_main_pool_phys_params(file_chain);
+  if (result)
+    goto out;
+
+
+  sector = bmap(target_inode, sig_page_index *
+      file_chain->blocks_per_page) << file_chain->bmap_shift;
+
+  /* Use the uuid, or the dev_t if that fails */
+  fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+  if (!fs_info || IS_ERR(fs_info)) {
+    bdevname(toi_file_target_bdev, buf);
+    sprintf(resume_file, "/dev/%s:%llu", buf,
+        (unsigned long long) sector);
+  } else {
+    int i;
+    hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0);
+
+    /* Remove the spaces */
+    for (i = 1; i < 16; i++) {
+      buf[2 * i] = buf[3 * i];
+      buf[2 * i + 1] = buf[3 * i + 1];
+    }
+    buf[32] = 0;
+    sprintf(resume_file, "UUID=%s:0x%llx", buf,
+        (unsigned long long) sector);
+    free_fs_info(fs_info);
+  }
+
+  toi_attempt_to_parse_resume_device(0);
+out:
+  toi_file_free_storage(file_chain);
+  toi_bio_ops.free_storage();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
+      SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
+  SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL),
+  SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095,
+      4096, 0, NULL),
+};
+
+static struct toi_bio_allocator_ops toi_bio_fileops = {
+  .register_storage                        = toi_file_register_storage,
+  .storage_available                        = toi_file_storage_available,
+  .allocate_storage                        = toi_file_allocate_storage,
+  .bmap                                        = get_main_pool_phys_params,
+  .free_storage                                = toi_file_free_storage,
+};
+
+static struct toi_module_ops toi_fileops = {
+  .type                                        = BIO_ALLOCATOR_MODULE,
+  .name                                        = "file storage",
+  .directory                                = "file",
+  .module                                        = THIS_MODULE,
+  .print_debug_info                        = toi_file_print_debug_stats,
+  .cleanup                                = toi_file_cleanup,
+  .bio_allocator_ops                        = &toi_bio_fileops,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_file_load(void)
+{
+  return toi_register_module(&toi_fileops);
+}
+
+late_initcall(toi_file_load);
diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c
new file mode 100644
index 000000000..71c811a9f
--- /dev/null
+++ b/kernel/power/tuxonice_highlevel.c
@@ -0,0 +1,1429 @@
+/*
+ * kernel/power/tuxonice_highlevel.c
+ */
+/** \mainpage TuxOnIce.
+ *
+ * TuxOnIce provides support for saving and restoring an image of
+ * system memory to an arbitrary storage device, either on the local computer,
+ * or across some network. The support is entirely OS based, so TuxOnIce
+ * works without requiring BIOS, APM or ACPI support. The vast majority of the
+ * code is also architecture independant, so it should be very easy to port
+ * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
+ * and preemption. Initramfses and initrds are also supported.
+ *
+ * TuxOnIce uses a modular design, in which the method of storing the image is
+ * completely abstracted from the core code, as are transformations on the data
+ * such as compression and/or encryption (multiple 'modules' can be used to
+ * provide arbitrary combinations of functionality). The user interface is also
+ * modular, so that arbitrarily simple or complex interfaces can be used to
+ * provide anything from debugging information through to eye candy.
+ *
+ * \section Copyright
+ *
+ * TuxOnIce is released under the GPLv2.
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)<BR>
+ *
+ * \section Credits
+ *
+ * Nigel would like to thank the following people for their work:
+ *
+ * Bernard Blackham <bernard@blackham.com.au><BR>
+ * Web page & Wiki administration, some coding. A person without whom
+ * TuxOnIce would not be where it is.
+ *
+ * Michael Frank <mhf@linuxmail.org><BR>
+ * Extensive testing and help with improving stability. I was constantly
+ * amazed by the quality and quantity of Michael's help.
+ *
+ * Pavel Machek <pavel@ucw.cz><BR>
+ * Modifications, defectiveness pointing, being with Gabor at the very
+ * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
+ * 2.5.17. Even though Pavel and I disagree on the direction suspend to
+ * disk should take, I appreciate the valuable work he did in helping Gabor
+ * get the concept working.
+ *
+ * ..and of course the myriads of TuxOnIce users who have helped diagnose
+ * and fix bugs, made suggestions on how to improve the code, proofread
+ * documentation, and donated time and money.
+ *
+ * Thanks also to corporate sponsors:
+ *
+ * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
+ *
+ * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
+ * allowed him to work on TuxOnIce and PM related issues on company time.
+ *
+ * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
+ * 2003 to Jan 2004.
+ *
+ * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
+ * maintenance of SMP and Highmem support.
+ *
+ * <B>OSDL.</B> Provided access to various hardware configurations, make
+ * occasional small donations to the project.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+#include <generated/utsrelease.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/writeback.h>
+#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
+#include <linux/bio.h>
+#include <linux/kgdb.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_cluster.h"
+
+/*! Pageset metadata. */
+struct pagedir pagedir2 = {2};
+
+static mm_segment_t oldfs;
+static DEFINE_MUTEX(tuxonice_in_use);
+static int block_dump_save;
+static int tuxonice_nr_calls;
+
+int toi_trace_index;
+
+/* Binary signature if an image is present */
+char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
+
+unsigned long boot_kernel_data_buffer;
+
+static char *result_strings[] = {
+  "Hibernation was aborted",
+  "The user requested that we cancel the hibernation",
+  "No storage was available",
+  "Insufficient storage was available",
+  "Freezing filesystems and/or tasks failed",
+  "A pre-existing image was used",
+  "We would free memory, but image size limit doesn't allow this",
+  "Unable to free enough memory to hibernate",
+  "Unable to obtain the Power Management Semaphore",
+  "A device suspend/resume returned an error",
+  "A system device suspend/resume returned an error",
+  "The extra pages allowance is too small",
+  "We were unable to successfully prepare an image",
+  "TuxOnIce module initialisation failed",
+  "TuxOnIce module cleanup failed",
+  "I/O errors were encountered",
+  "Ran out of memory",
+  "An error was encountered while reading the image",
+  "Platform preparation failed",
+  "CPU Hotplugging failed",
+  "Architecture specific preparation failed",
+  "Pages needed resaving, but we were told to abort if this happens",
+  "We can't hibernate at the moment (invalid resume= or filewriter "
+    "target?)",
+  "A hibernation preparation notifier chain member cancelled the "
+    "hibernation",
+  "Pre-snapshot preparation failed",
+  "Pre-restore preparation failed",
+  "Failed to disable usermode helpers",
+  "Can't resume from alternate image",
+  "Header reservation too small",
+  "Device Power Management Preparation failed",
+};
+
+/**
+ * toi_finish_anything - cleanup after doing anything
+ * @hibernate_or_resume:        Whether finishing a cycle or attempt at
+ *                                resuming.
+ *
+ * This is our basic clean-up routine, matching start_anything below. We
+ * call cleanup routines, drop module references and restore process fs and
+ * cpus allowed masks, together with the global block_dump variable's value.
+ **/
+void toi_finish_anything(int hibernate_or_resume)
+{
+  toi_running = 0;
+  toi_cleanup_modules(hibernate_or_resume);
+  toi_put_modules();
+  if (hibernate_or_resume) {
+    block_dump = block_dump_save;
+    set_cpus_allowed_ptr(current, cpu_all_mask);
+    __pm_notifier_call_chain(PM_POST_HIBERNATION, tuxonice_nr_calls, NULL);
+    pm_restore_console();
+    toi_alloc_print_debug_stats();
+    atomic_inc(&snapshot_device_available);
+    unlock_system_sleep();
+  }
+
+  set_fs(oldfs);
+  mutex_unlock(&tuxonice_in_use);
+}
+
+/**
+ * toi_start_anything - basic initialisation for TuxOnIce
+ * @toi_or_resume:        Whether starting a cycle or attempt at resuming.
+ *
+ * Our basic initialisation routine. Take references on modules, use the
+ * kernel segment, recheck resume= if no active allocator is set, initialise
+ * modules, save and reset block_dump and ensure we're running on CPU0.
+ **/
+int toi_start_anything(int hibernate_or_resume)
+{
+  int error;
+  mutex_lock(&tuxonice_in_use);
+
+  oldfs = get_fs();
+  set_fs(KERNEL_DS);
+
+  toi_trace_index = 0;
+
+  if (hibernate_or_resume) {
+    lock_system_sleep();
+
+    if (!atomic_add_unless(&snapshot_device_available, -1, 0))
+      goto snapshotdevice_unavailable;
+
+    pm_prepare_console();
+
+    error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &tuxonice_nr_calls);
+    if (error) {
+      goto notifier_chain_error;
+    }
+  }
+
+  if (hibernate_or_resume == SYSFS_HIBERNATE)
+    toi_print_modules();
+
+  if (toi_get_modules()) {
+    printk(KERN_INFO "TuxOnIce: Get modules failed!\n");
+    goto get_modules_error;
+  }
+
+  if (hibernate_or_resume) {
+    block_dump_save = block_dump;
+    block_dump = 0;
+    set_cpus_allowed_ptr(current,
+        cpumask_of(cpumask_first(cpu_online_mask)));
+  }
+
+  if (toi_initialise_modules_early(hibernate_or_resume))
+    goto early_init_err;
+
+  if (!toiActiveAllocator)
+    toi_attempt_to_parse_resume_device(!hibernate_or_resume);
+
+  if (!toi_initialise_modules_late(hibernate_or_resume)) {
+    toi_running = 1; /* For the swsusp code we use :< */
+    return 0;
+  }
+
+  toi_cleanup_modules(hibernate_or_resume);
+early_init_err:
+  if (hibernate_or_resume) {
+    block_dump_save = block_dump;
+    set_cpus_allowed_ptr(current, cpu_all_mask);
+  }
+get_modules_error:
+  toi_put_modules();
+notifier_chain_error:
+  if (hibernate_or_resume) {
+    __pm_notifier_call_chain(PM_POST_HIBERNATION, tuxonice_nr_calls, NULL);
+    pm_restore_console();
+  }
+snapshotdevice_unavailable:
+  if (hibernate_or_resume) {
+    atomic_inc(&snapshot_device_available);
+    unlock_system_sleep();
+  }
+  set_fs(oldfs);
+  mutex_unlock(&tuxonice_in_use);
+  return -EBUSY;
+}
+
+/*
+ * Nosave page tracking.
+ *
+ * Here rather than in prepare_image because we want to do it once only at the
+ * start of a cycle.
+ */
+
+/**
+ * mark_nosave_pages - set up our Nosave bitmap
+ *
+ * Build a bitmap of Nosave pages from the list. The bitmap allows faster
+ * use when preparing the image.
+ **/
+static void mark_nosave_pages(void)
+{
+  struct nosave_region *region;
+
+  list_for_each_entry(region, &nosave_regions, list) {
+    unsigned long pfn;
+
+    for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+      if (pfn_valid(pfn)) {
+        SetPageNosave(pfn_to_page(pfn));
+      }
+  }
+}
+
+/**
+ * allocate_bitmaps - allocate bitmaps used to record page states
+ *
+ * Allocate the bitmaps we use to record the various TuxOnIce related
+ * page states.
+ **/
+static int allocate_bitmaps(void)
+{
+  if (toi_alloc_bitmap(&pageset1_map) ||
+      toi_alloc_bitmap(&pageset1_copy_map) ||
+      toi_alloc_bitmap(&pageset2_map) ||
+      toi_alloc_bitmap(&io_map) ||
+      toi_alloc_bitmap(&nosave_map) ||
+      toi_alloc_bitmap(&free_map) ||
+      toi_alloc_bitmap(&compare_map) ||
+      toi_alloc_bitmap(&page_resave_map))
+    return 1;
+
+  return 0;
+}
+
+/**
+ * free_bitmaps - free the bitmaps used to record page states
+ *
+ * Free the bitmaps allocated above. It is not an error to call
+ * memory_bm_free on a bitmap that isn't currently allocated.
+ **/
+static void free_bitmaps(void)
+{
+  toi_free_bitmap(&pageset1_map);
+  toi_free_bitmap(&pageset1_copy_map);
+  toi_free_bitmap(&pageset2_map);
+  toi_free_bitmap(&io_map);
+  toi_free_bitmap(&nosave_map);
+  toi_free_bitmap(&free_map);
+  toi_free_bitmap(&compare_map);
+  toi_free_bitmap(&page_resave_map);
+}
+
+/**
+ * io_MB_per_second - return the number of MB/s read or written
+ * @write:        Whether to return the speed at which we wrote.
+ *
+ * Calculate the number of megabytes per second that were read or written.
+ **/
+static int io_MB_per_second(int write)
+{
+  return (toi_bkd.toi_io_time[write][1]) ?
+    MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
+    toi_bkd.toi_io_time[write][1] : 0;
+}
+
+#define SNPRINTF(a...)         do { len += scnprintf(((char *) buffer) + len, \
+    count - len - 1, ## a); } while (0)
+
+/**
+ * get_debug_info - fill a buffer with debugging information
+ * @buffer:        The buffer to be filled.
+ * @count:        The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_debug_info(const char *buffer, int count)
+{
+  int len = 0, i, first_result = 1;
+
+  SNPRINTF("TuxOnIce debugging info:\n");
+  SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
+  SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
+  SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
+  SNPRINTF("- Attempt number : %d\n", nr_hibernates);
+  SNPRINTF("- Parameters     : %ld %ld %ld %d %ld %ld\n",
+      toi_result,
+      toi_bkd.toi_action,
+      toi_bkd.toi_debug_state,
+      toi_bkd.toi_default_console_level,
+      image_size_limit,
+      toi_poweroff_method);
+  SNPRINTF("- Overall expected compression percentage: %d.\n",
+      100 - toi_expected_compression_ratio());
+  len += toi_print_module_debug_info(((char *) buffer) + len,
+      count - len - 1);
+  if (toi_bkd.toi_io_time[0][1]) {
+    if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
+      SNPRINTF("- I/O speed: Write %ld KB/s",
+          (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+           toi_bkd.toi_io_time[0][1]));
+      if (toi_bkd.toi_io_time[1][1])
+        SNPRINTF(", Read %ld KB/s",
+            (KB((unsigned long)
+                toi_bkd.toi_io_time[1][0]) * HZ /
+             toi_bkd.toi_io_time[1][1]));
+    } else {
+      SNPRINTF("- I/O speed: Write %ld MB/s",
+          (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+           toi_bkd.toi_io_time[0][1]));
+      if (toi_bkd.toi_io_time[1][1])
+        SNPRINTF(", Read %ld MB/s",
+            (MB((unsigned long)
+                toi_bkd.toi_io_time[1][0]) * HZ /
+             toi_bkd.toi_io_time[1][1]));
+    }
+    SNPRINTF(".\n");
+  } else
+    SNPRINTF("- No I/O speed stats available.\n");
+  SNPRINTF("- Extra pages    : %lu used/%lu.\n",
+      extra_pd1_pages_used, extra_pd1_pages_allowance);
+
+  for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
+    if (test_result_state(i)) {
+      SNPRINTF("%s: %s.\n", first_result ?
+          "- Result         " :
+          "                 ",
+          result_strings[i]);
+      first_result = 0;
+    }
+  if (first_result)
+    SNPRINTF("- Result         : %s.\n", nr_hibernates ?
+        "Succeeded" :
+        "No hibernation attempts so far");
+  return len;
+}
+
+#ifdef CONFIG_TOI_INCREMENTAL
+/**
+ * get_toi_page_state - fill a buffer with page state information
+ * @buffer:        The buffer to be filled.
+ * @count:        The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_page_state(const char *buffer, int count)
+{
+  int free = 0, untracked = 0, dirty = 0, ro = 0, invalid = 0, other = 0, total = 0;
+  int len = 0;
+  struct zone *zone;
+  int allocated_bitmaps = 0;
+
+  set_cpus_allowed_ptr(current,
+      cpumask_of(cpumask_first(cpu_online_mask)));
+
+  if (!free_map) {
+    BUG_ON(toi_alloc_bitmap(&free_map));
+    allocated_bitmaps = 1;
+  }
+
+  toi_generate_free_page_map();
+
+  for_each_populated_zone(zone) {
+    unsigned long loop;
+
+    total += zone->spanned_pages;
+
+    for (loop = 0; loop < zone->spanned_pages; loop++) {
+      unsigned long pfn = zone->zone_start_pfn + loop;
+      struct page *page;
+      int chunk_size;
+
+      if (!pfn_valid(pfn)) {
+        continue;
+      }
+
+      chunk_size = toi_size_of_free_region(zone, pfn);
+      if (chunk_size) {
+        /*
+         * If the page gets allocated, it will be need
+         * saving in an image.
+         * Don't bother with explicitly removing any
+         * RO protection applied below.
+         * We'll SetPageTOI_Dirty(page) if/when it
+         * gets allocated.
+         */
+        free += chunk_size;
+        loop += chunk_size - 1;
+        continue;
+      }
+
+      page = pfn_to_page(pfn);
+
+      if (PageTOI_Untracked(page)) {
+        untracked++;
+      } else if (PageTOI_RO(page)) {
+        ro++;
+      } else if (PageTOI_Dirty(page)) {
+        dirty++;
+      } else {
+        printk("Page %ld state 'other'.\n", pfn);
+        other++;
+      }
+    }
+  }
+
+  if (allocated_bitmaps) {
+    toi_free_bitmap(&free_map);
+  }
+
+  set_cpus_allowed_ptr(current, cpu_all_mask);
+
+  SNPRINTF("TuxOnIce page breakdown:\n");
+  SNPRINTF("- Free           : %d\n", free);
+  SNPRINTF("- Untracked      : %d\n", untracked);
+  SNPRINTF("- Read only      : %d\n", ro);
+  SNPRINTF("- Dirty          : %d\n", dirty);
+  SNPRINTF("- Other          : %d\n", other);
+  SNPRINTF("- Invalid        : %d\n", invalid);
+  SNPRINTF("- Total          : %d\n", total);
+  return len;
+}
+#endif
+
+/**
+ * do_cleanup - cleanup after attempting to hibernate or resume
+ * @get_debug_info:        Whether to allocate and return debugging info.
+ *
+ * Cleanup after attempting to hibernate or resume, possibly getting
+ * debugging info as we do so.
+ **/
+static void do_cleanup(int get_debug_info, int restarting)
+{
+  int i = 0;
+  char *buffer = NULL;
+
+  trap_non_toi_io = 0;
+
+  if (get_debug_info)
+    toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
+
+  free_checksum_pages();
+
+  toi_cbw_restore();
+  toi_free_cbw_data();
+
+  if (get_debug_info)
+    buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
+
+  if (buffer)
+    i = get_toi_debug_info(buffer, PAGE_SIZE);
+
+  toi_free_extra_pagedir_memory();
+
+  pagedir1.size = 0;
+  pagedir2.size = 0;
+  set_highmem_size(pagedir1, 0);
+  set_highmem_size(pagedir2, 0);
+
+  if (boot_kernel_data_buffer) {
+    if (!test_toi_state(TOI_BOOT_KERNEL))
+      toi_free_page(37, boot_kernel_data_buffer);
+    boot_kernel_data_buffer = 0;
+  }
+
+  if (test_toi_state(TOI_DEVICE_HOTPLUG_LOCKED)) {
+    unlock_device_hotplug();
+    clear_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+  }
+
+  clear_toi_state(TOI_BOOT_KERNEL);
+  if (current->flags & PF_SUSPEND_TASK)
+    thaw_processes();
+
+  if (!restarting)
+    toi_stop_other_threads();
+
+  if (toi_keeping_image &&
+      !test_result_state(TOI_ABORTED)) {
+    toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+        "TuxOnIce: Not invalidating the image due "
+        "to Keep Image or Incremental Image being enabled.");
+    set_result_state(TOI_KEPT_IMAGE);
+
+    /*
+     * For an incremental image, free unused storage so
+     * swap (if any) can be used for normal system operation,
+     * if so desired.
+     */
+
+    toiActiveAllocator->free_unused_storage();
+  } else
+    if (toiActiveAllocator)
+      toiActiveAllocator->remove_image();
+
+  free_bitmaps();
+  usermodehelper_enable();
+
+  if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
+    __pm_notifier_call_chain(PM_POST_HIBERNATION, tuxonice_nr_calls, NULL);
+    clear_toi_state(TOI_NOTIFIERS_PREPARE);
+  }
+
+  if (buffer && i) {
+    /* Printk can only handle 1023 bytes, including
+     * its level mangling. */
+    for (i = 0; i < 3; i++)
+      printk(KERN_ERR "%s", buffer + (1023 * i));
+    toi_free_page(20, (unsigned long) buffer);
+  }
+
+  if (!restarting)
+    toi_cleanup_console();
+
+  free_attention_list();
+
+  if (!restarting)
+    toi_deactivate_storage(0);
+
+  clear_toi_state(TOI_IGNORE_LOGLEVEL);
+  clear_toi_state(TOI_TRYING_TO_RESUME);
+  clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * check_still_keeping_image - we kept an image; check whether to reuse it.
+ *
+ * We enter this routine when we have kept an image. If the user has said they
+ * want to still keep it, all we need to do is powerdown. If powering down
+ * means hibernating to ram and the power doesn't run out, we'll return 1.
+ * If we do power off properly or the battery runs out, we'll resume via the
+ * normal paths.
+ *
+ * If the user has said they want to remove the previously kept image, we
+ * remove it, and return 0. We'll then store a new image.
+ **/
+static int check_still_keeping_image(void)
+{
+  if (toi_keeping_image) {
+    if (!test_action_state(TOI_INCREMENTAL_IMAGE)) {
+      printk(KERN_INFO "Image already stored: powering down "
+          "immediately.");
+      do_toi_step(STEP_HIBERNATE_POWERDOWN);
+      return 1;
+    }
+    /**
+     * Incremental image - need to write new part.
+     * We detect that we're writing an incremental image by looking
+     * at test_result_state(TOI_KEPT_IMAGE)
+     **/
+    return 0;
+  }
+
+  printk(KERN_INFO "Invalidating previous image.\n");
+  toiActiveAllocator->remove_image();
+
+  return 0;
+}
+
+/**
+ * toi_init - prepare to hibernate to disk
+ *
+ * Initialise variables & data structures, in preparation for
+ * hibernating to disk.
+ **/
+static int toi_init(int restarting)
+{
+  int result, i, j;
+
+  toi_result = 0;
+
+  printk(KERN_INFO "Initiating a hibernation cycle.\n");
+
+  nr_hibernates++;
+
+  for (i = 0; i < 2; i++)
+    for (j = 0; j < 2; j++)
+      toi_bkd.toi_io_time[i][j] = 0;
+
+  if (!test_toi_state(TOI_CAN_HIBERNATE) ||
+      allocate_bitmaps())
+    return 1;
+
+  mark_nosave_pages();
+
+  if (!restarting)
+    toi_prepare_console();
+
+  result = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &tuxonice_nr_calls);
+  if (result) {
+    set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
+    return 1;
+  }
+  set_toi_state(TOI_NOTIFIERS_PREPARE);
+
+  if (!restarting) {
+    printk(KERN_ERR "Starting other threads.");
+    toi_start_other_threads();
+  }
+
+  result = usermodehelper_disable();
+  if (result) {
+    printk(KERN_ERR "TuxOnIce: Failed to disable usermode "
+        "helpers\n");
+    set_result_state(TOI_USERMODE_HELPERS_ERR);
+    return 1;
+  }
+
+  boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
+  if (!boot_kernel_data_buffer) {
+    printk(KERN_ERR "TuxOnIce: Failed to allocate "
+        "boot_kernel_data_buffer.\n");
+    set_result_state(TOI_OUT_OF_MEMORY);
+    return 1;
+  }
+
+  toi_allocate_cbw_data();
+
+  return 0;
+}
+
+/**
+ * can_hibernate - perform basic 'Can we hibernate?' tests
+ *
+ * Perform basic tests that must pass if we're going to be able to hibernate:
+ * Can we get the pm_mutex? Is resume= valid (we need to know where to write
+ * the image header).
+ **/
+static int can_hibernate(void)
+{
+  if (!test_toi_state(TOI_CAN_HIBERNATE))
+    toi_attempt_to_parse_resume_device(0);
+
+  if (!test_toi_state(TOI_CAN_HIBERNATE)) {
+    printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
+        "This may be because you haven't put something along "
+        "the lines of\n\nresume=swap:/dev/hda1\n\n"
+        "in lilo.conf or equivalent. (Where /dev/hda1 is your "
+        "swap partition).\n");
+    set_abort_result(TOI_CANT_SUSPEND);
+    return 0;
+  }
+
+  if (strlen(alt_resume_param)) {
+    attempt_to_parse_alt_resume_param();
+
+    if (!strlen(alt_resume_param)) {
+      printk(KERN_INFO "Alternate resume parameter now "
+          "invalid. Aborting.\n");
+      set_abort_result(TOI_CANT_USE_ALT_RESUME);
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+/**
+ * do_post_image_write - having written an image, figure out what to do next
+ *
+ * After writing an image, we might load an alternate image or power down.
+ * Powering down might involve hibernating to ram, in which case we also
+ * need to handle reloading pageset2.
+ **/
+static int do_post_image_write(void)
+{
+  /* If switching images fails, do normal powerdown */
+  if (alt_resume_param[0])
+    do_toi_step(STEP_RESUME_ALT_IMAGE);
+
+  toi_power_down();
+
+  barrier();
+  mb();
+  return 0;
+}
+
+/**
+ * __save_image - do the hard work of saving the image
+ *
+ * High level routine for getting the image saved. The key assumptions made
+ * are that processes have been frozen and sufficient memory is available.
+ *
+ * We also exit through here at resume time, coming back from toi_hibernate
+ * after the atomic restore. This is the reason for the toi_in_hibernate
+ * test.
+ **/
+static int __save_image(void)
+{
+  int temp_result, did_copy = 0;
+
+  toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
+
+  toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+      " - Final values: %d and %d.",
+      pagedir1.size, pagedir2.size);
+
+  toi_cond_pause(1, "About to write pagedir2.");
+
+  temp_result = write_pageset(&pagedir2);
+
+  if (temp_result == -1 || test_result_state(TOI_ABORTED))
+    return 1;
+
+  toi_cond_pause(1, "About to copy pageset 1.");
+
+  if (test_result_state(TOI_ABORTED))
+    return 1;
+
+  toi_deactivate_storage(1);
+
+  toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+  toi_in_hibernate = 1;
+
+  if (toi_go_atomic(PMSG_FREEZE, 1))
+    goto Failed;
+
+  temp_result = toi_hibernate();
+
+#ifdef CONFIG_KGDB
+  if (test_action_state(TOI_POST_RESUME_BREAKPOINT))
+    kgdb_breakpoint();
+#endif
+
+  if (!temp_result)
+    did_copy = 1;
+
+  /* We return here at resume time too! */
+  toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
+
+Failed:
+  if (toi_activate_storage(1))
+    panic("Failed to reactivate our storage.");
+
+  /* Resume time? */
+  if (!toi_in_hibernate) {
+    copyback_post();
+    return 0;
+  }
+
+  /* Nope. Hibernating. So, see if we can save the image... */
+
+  if (temp_result || test_result_state(TOI_ABORTED)) {
+    if (did_copy)
+      goto abort_reloading_pagedir_two;
+    else
+      return 1;
+  }
+
+  toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
+      NULL);
+
+  if (test_result_state(TOI_ABORTED))
+    goto abort_reloading_pagedir_two;
+
+  toi_cond_pause(1, "About to write pageset1.");
+
+  toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1");
+
+  temp_result = write_pageset(&pagedir1);
+
+  /* We didn't overwrite any memory, so no reread needs to be done. */
+  if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+      test_action_state(TOI_TEST_BIO))
+    return 1;
+
+  if (temp_result == 1 || test_result_state(TOI_ABORTED))
+    goto abort_reloading_pagedir_two;
+
+  toi_cond_pause(1, "About to write header.");
+
+  if (test_result_state(TOI_ABORTED))
+    goto abort_reloading_pagedir_two;
+
+  temp_result = write_image_header();
+
+  if (!temp_result && !test_result_state(TOI_ABORTED))
+    return 0;
+
+abort_reloading_pagedir_two:
+  temp_result = read_pageset2(1);
+
+  /* If that failed, we're sunk. Panic! */
+  if (temp_result)
+    panic("Attempt to reload pagedir 2 while aborting "
+        "a hibernate failed.");
+
+  return 1;
+}
+
+static void map_ps2_pages(int enable)
+{
+  unsigned long pfn = 0;
+
+  memory_bm_position_reset(pageset2_map);
+  pfn = memory_bm_next_pfn(pageset2_map, 0);
+
+  while (pfn != BM_END_OF_MAP) {
+    struct page *page = pfn_to_page(pfn);
+    kernel_map_pages(page, 1, enable);
+    pfn = memory_bm_next_pfn(pageset2_map, 0);
+  }
+}
+
+/**
+ * do_save_image - save the image and handle the result
+ *
+ * Save the prepared image. If we fail or we're in the path returning
+ * from the atomic restore, cleanup.
+ **/
+static int do_save_image(void)
+{
+  int result;
+  map_ps2_pages(0);
+  result = __save_image();
+  map_ps2_pages(1);
+  return result;
+}
+
+/**
+ * do_prepare_image - try to prepare an image
+ *
+ * Seek to initialise and prepare an image to be saved. On failure,
+ * cleanup.
+ **/
+static int do_prepare_image(void)
+{
+  int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+
+  if (!restarting && toi_activate_storage(0))
+    return 1;
+
+  /*
+   * If kept image and still keeping image and hibernating to RAM, (non
+   * incremental image case) we will return 1 after hibernating and
+   * resuming (provided the power doesn't run out. In that case, we skip
+   * directly to cleaning up and exiting.
+   */
+
+  if (!can_hibernate() ||
+      (test_result_state(TOI_KEPT_IMAGE) &&
+       check_still_keeping_image()))
+    return 1;
+
+  if (toi_init(restarting) || toi_prepare_image() ||
+      test_result_state(TOI_ABORTED))
+    return 1;
+
+  trap_non_toi_io = 1;
+
+  return 0;
+}
+
+/**
+ * do_check_can_resume - find out whether an image has been stored
+ *
+ * Read whether an image exists. We use the same routine as the
+ * image_exists sysfs entry, and just look to see whether the
+ * first character in the resulting buffer is a '1'.
+ **/
+int do_check_can_resume(void)
+{
+  int result = -1;
+
+  if (toi_activate_storage(0))
+    return -1;
+
+  if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+    toi_attempt_to_parse_resume_device(1);
+
+  if (toiActiveAllocator)
+    result = toiActiveAllocator->image_exists(1);
+
+  toi_deactivate_storage(0);
+  return result;
+}
+
+/**
+ * do_load_atomic_copy - load the first part of an image, if it exists
+ *
+ * Check whether we have an image. If one exists, do sanity checking
+ * (possibly invalidating the image or even rebooting if the user
+ * requests that) before loading it into memory in preparation for the
+ * atomic restore.
+ *
+ * If and only if we have an image loaded and ready to restore, we return 1.
+ **/
+static int do_load_atomic_copy(void)
+{
+  int read_image_result = 0;
+
+  if (sizeof(swp_entry_t) != sizeof(long)) {
+    printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
+        " of long. Please report this!\n");
+    return 1;
+  }
+
+  if (!resume_file[0])
+    printk(KERN_WARNING "TuxOnIce: "
+        "You need to use a resume= command line parameter to "
+        "tell TuxOnIce where to look for an image.\n");
+
+  toi_activate_storage(0);
+
+  if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
+      !toi_attempt_to_parse_resume_device(0)) {
+    /*
+     * Without a usable storage device we can do nothing -
+     * even if noresume is given
+     */
+
+    if (!toiNumAllocators)
+      printk(KERN_ALERT "TuxOnIce: "
+          "No storage allocators have been registered.\n");
+    else
+      printk(KERN_ALERT "TuxOnIce: "
+          "Missing or invalid storage location "
+          "(resume= parameter). Please correct and "
+          "rerun lilo (or equivalent) before "
+          "hibernating.\n");
+    toi_deactivate_storage(0);
+    return 1;
+  }
+
+  if (allocate_bitmaps())
+    return 1;
+
+  read_image_result = read_pageset1(); /* non fatal error ignored */
+
+  if (test_toi_state(TOI_NORESUME_SPECIFIED))
+    clear_toi_state(TOI_NORESUME_SPECIFIED);
+
+  toi_deactivate_storage(0);
+
+  if (read_image_result)
+    return 1;
+
+  return 0;
+}
+
+/**
+ * prepare_restore_load_alt_image - save & restore alt image variables
+ *
+ * Save and restore the pageset1 maps, when loading an alternate image.
+ **/
+static void prepare_restore_load_alt_image(int prepare)
+{
+  static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save;
+
+  if (prepare) {
+    pageset1_map_save = pageset1_map;
+    pageset1_map = NULL;
+    pageset1_copy_map_save = pageset1_copy_map;
+    pageset1_copy_map = NULL;
+    set_toi_state(TOI_LOADING_ALT_IMAGE);
+    toi_reset_alt_image_pageset2_pfn();
+  } else {
+    toi_free_bitmap(&pageset1_map);
+    pageset1_map = pageset1_map_save;
+    toi_free_bitmap(&pageset1_copy_map);
+    pageset1_copy_map = pageset1_copy_map_save;
+    clear_toi_state(TOI_NOW_RESUMING);
+    clear_toi_state(TOI_LOADING_ALT_IMAGE);
+  }
+}
+
+/**
+ * do_toi_step - perform a step in hibernating or resuming
+ *
+ * Perform a step in hibernating or resuming an image. This abstraction
+ * is in preparation for implementing cluster support, and perhaps replacing
+ * uswsusp too (haven't looked whether that's possible yet).
+ **/
+int do_toi_step(int step)
+{
+  switch (step) {
+    case STEP_HIBERNATE_PREPARE_IMAGE:
+      return do_prepare_image();
+    case STEP_HIBERNATE_SAVE_IMAGE:
+      return do_save_image();
+    case STEP_HIBERNATE_POWERDOWN:
+      return do_post_image_write();
+    case STEP_RESUME_CAN_RESUME:
+      return do_check_can_resume();
+    case STEP_RESUME_LOAD_PS1:
+      return do_load_atomic_copy();
+    case STEP_RESUME_DO_RESTORE:
+      /*
+       * If we succeed, this doesn't return.
+       * Instead, we return from do_save_image() in the
+       * hibernated kernel.
+       */
+      return toi_atomic_restore();
+    case STEP_RESUME_ALT_IMAGE:
+      printk(KERN_INFO "Trying to resume alternate image.\n");
+      toi_in_hibernate = 0;
+      save_restore_alt_param(SAVE, NOQUIET);
+      prepare_restore_load_alt_image(1);
+      if (!do_check_can_resume()) {
+        printk(KERN_INFO "Nothing to resume from.\n");
+        goto out;
+      }
+      if (!do_load_atomic_copy())
+        toi_atomic_restore();
+
+      printk(KERN_INFO "Failed to load image.\n");
+out:
+      prepare_restore_load_alt_image(0);
+      save_restore_alt_param(RESTORE, NOQUIET);
+      break;
+    case STEP_CLEANUP:
+      do_cleanup(1, 0);
+      break;
+    case STEP_QUIET_CLEANUP:
+      do_cleanup(0, 0);
+      break;
+  }
+
+  return 0;
+}
+
+/* -- Functions for kickstarting a hibernate or resume --- */
+
+/**
+ * toi_try_resume - try to do the steps in resuming
+ *
+ * Check if we have an image and if so try to resume. Clear the status
+ * flags too.
+ **/
+void toi_try_resume(void)
+{
+  set_toi_state(TOI_TRYING_TO_RESUME);
+  resume_attempted = 1;
+
+  current->flags |= PF_MEMALLOC;
+  toi_start_other_threads();
+
+  if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
+      !do_toi_step(STEP_RESUME_LOAD_PS1))
+    do_toi_step(STEP_RESUME_DO_RESTORE);
+
+  toi_stop_other_threads();
+  do_cleanup(0, 0);
+
+  current->flags &= ~PF_MEMALLOC;
+
+  clear_toi_state(TOI_IGNORE_LOGLEVEL);
+  clear_toi_state(TOI_TRYING_TO_RESUME);
+  clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume
+ *
+ * Wrapper for when __toi_try_resume is called from swsusp resume path,
+ * rather than from echo > /sys/power/tuxonice/do_resume.
+ **/
+static void toi_sys_power_disk_try_resume(void)
+{
+  resume_attempted = 1;
+
+  /*
+   * There's a comment in kernel/power/disk.c that indicates
+   * we should be able to use mutex_lock_nested below. That
+   * doesn't seem to cut it, though, so let's just turn lockdep
+   * off for now.
+   */
+  lockdep_off();
+
+  if (toi_start_anything(SYSFS_RESUMING))
+    goto out;
+
+  toi_try_resume();
+
+  /*
+   * For initramfs, we have to clear the boot time
+   * flag after trying to resume
+   */
+  clear_toi_state(TOI_BOOT_TIME);
+
+  toi_finish_anything(SYSFS_RESUMING);
+out:
+  lockdep_on();
+}
+
+/**
+ * toi_try_hibernate - try to start a hibernation cycle
+ *
+ * Start a hibernation cycle, coming in from either
+ * echo > /sys/power/tuxonice/do_suspend
+ *
+ * or
+ *
+ * echo disk > /sys/power/state
+ *
+ * In the later case, we come in without pm_sem taken; in the
+ * former, it has been taken.
+ **/
+int toi_try_hibernate(void)
+{
+  int result = 0, sys_power_disk = 0, retries = 0;
+
+  if (!mutex_is_locked(&tuxonice_in_use)) {
+    /* Came in via /sys/power/disk */
+    if (toi_start_anything(SYSFS_HIBERNATING))
+      return -EBUSY;
+    sys_power_disk = 1;
+  }
+
+  current->flags |= PF_MEMALLOC;
+
+  if (test_toi_state(TOI_CLUSTER_MODE)) {
+    toi_initiate_cluster_hibernate();
+    goto out;
+  }
+
+prepare:
+  result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+
+  if (result)
+    goto out;
+
+  if (test_action_state(TOI_FREEZER_TEST))
+    goto out_restore_gfp_mask;
+
+  result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+
+  if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) {
+    if (retries < 2) {
+      do_cleanup(0, 1);
+      retries++;
+      clear_result_state(TOI_ABORTED);
+      extra_pd1_pages_allowance = extra_pd1_pages_used + 500;
+      printk(KERN_INFO "Automatically adjusting the extra"
+          " pages allowance to %ld and restarting.\n",
+          extra_pd1_pages_allowance);
+      pm_restore_gfp_mask();
+      goto prepare;
+    }
+
+    printk(KERN_INFO "Adjusted extra pages allowance twice and "
+        "still couldn't hibernate successfully. Giving up.");
+  }
+
+  /* This code runs at resume time too! */
+  if (!result && toi_in_hibernate)
+    result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+
+out_restore_gfp_mask:
+  pm_restore_gfp_mask();
+out:
+  do_cleanup(1, 0);
+  current->flags &= ~PF_MEMALLOC;
+
+  if (sys_power_disk)
+    toi_finish_anything(SYSFS_HIBERNATING);
+
+  return result;
+}
+
+/*
+ * channel_no: If !0, -c <channel_no> is added to args (userui).
+ */
+int toi_launch_userspace_program(char *command, int channel_no,
+    int wait, int debug)
+{
+  int retval;
+  static char *envp[] = {
+    "HOME=/",
+    "TERM=linux",
+    "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+    NULL };
+  static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+  };
+  char *channel = NULL;
+  int arg = 0, size;
+  char test_read[255];
+  char *orig_posn = command;
+
+  if (!strlen(orig_posn))
+    return 1;
+
+  if (channel_no) {
+    channel = toi_kzalloc(4, 6, GFP_KERNEL);
+    if (!channel) {
+      printk(KERN_INFO "Failed to allocate memory in "
+          "preparing to launch userspace program.\n");
+      return 1;
+    }
+  }
+
+  /* Up to 6 args supported */
+  while (arg < 6) {
+    sscanf(orig_posn, "%s", test_read);
+    size = strlen(test_read);
+    if (!(size))
+      break;
+    argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
+    strcpy(argv[arg], test_read);
+    orig_posn += size + 1;
+    *test_read = 0;
+    arg++;
+  }
+
+  if (channel_no) {
+    sprintf(channel, "-c%d", channel_no);
+    argv[arg] = channel;
+  } else
+    arg--;
+
+  if (debug) {
+    argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
+    strcpy(argv[arg], "--debug");
+  }
+
+  retval = call_usermodehelper(argv[0], argv, envp, wait);
+
+  /*
+   * If the program reports an error, retval = 256. Don't complain
+   * about that here.
+   */
+  if (retval && retval != 256)
+    printk(KERN_ERR "Failed to launch userspace program '%s': "
+        "Error %d\n", command, retval);
+
+  {
+    int i;
+    for (i = 0; i < arg; i++)
+      if (argv[i] && argv[i] != channel)
+        toi_kfree(5, argv[i], sizeof(*argv[i]));
+  }
+
+  toi_kfree(4, channel, sizeof(*channel));
+
+  return retval;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
+      &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
+  SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
+      image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
+  SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
+      SYSFS_NEEDS_SM_FOR_WRITE,
+      attempt_to_parse_resume_device2),
+  SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
+      SYSFS_NEEDS_SM_FOR_WRITE,
+      attempt_to_parse_alt_resume_param),
+  SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
+      NULL),
+  SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_IGNORE_ROOTFS, 0),
+  SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2,
+      INT_MAX, 0),
+  SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
+  SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_NO_MULTITHREADED_IO, 0),
+  SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_NO_FLUSHER_THREAD, 0),
+  SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_PAGESET2_FULL, 0),
+  SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
+  SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_REPLACE_SWSUSP, 0),
+  SYSFS_STRING("resume_commandline", SYSFS_RW,
+      toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
+      NULL),
+  SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
+  SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_FREEZER_TEST, 0),
+  SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
+  SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_TEST_FILTER_SPEED, 0),
+  SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_NO_PAGESET2, 0),
+  SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_NO_PS2_IF_UNNEEDED, 0),
+  SYSFS_STRING("binary_signature", SYSFS_READONLY,
+      tuxonice_signature, 9, 0, NULL),
+  SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0,
+      NULL),
+#ifdef CONFIG_KGDB
+  SYSFS_BIT("post_resume_breakpoint", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_POST_RESUME_BREAKPOINT, 0),
+#endif
+  SYSFS_BIT("no_readahead", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_NO_READAHEAD, 0),
+  SYSFS_BIT("trace_debug_on", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_TRACE_DEBUG_ON, 0),
+#ifdef CONFIG_TOI_KEEP_IMAGE
+  SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
+      0),
+#endif
+#ifdef CONFIG_TOI_INCREMENTAL
+  SYSFS_CUSTOM("pagestate", SYSFS_READONLY, get_toi_page_state, NULL, 0,
+      NULL),
+  SYSFS_BIT("incremental", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_INCREMENTAL_IMAGE, 1),
+#endif
+};
+
+static struct toi_core_fns my_fns = {
+  .get_nonconflicting_page = __toi_get_nonconflicting_page,
+  .post_context_save = __toi_post_context_save,
+  .try_hibernate = toi_try_hibernate,
+  .try_resume = toi_sys_power_disk_try_resume,
+};
+
+/**
+ * core_load - initialisation of TuxOnIce core
+ *
+ * Initialise the core, beginning with sysfs. Checksum and so on are part of
+ * the core, but have their own initialisation routines because they either
+ * aren't compiled in all the time or have their own subdirectories.
+ **/
+static __init int core_load(void)
+{
+  int i,
+      numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+  printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
+      " (http://tuxonice.net)\n");
+
+  if (!hibernation_available()) {
+    printk(KERN_INFO "TuxOnIce disabled due to request for hibernation"
+        " to be disabled in this kernel.\n");
+    return 1;
+  }
+
+  if (toi_sysfs_init())
+    return 1;
+
+  for (i = 0; i < numfiles; i++)
+    toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+  toi_core_fns = &my_fns;
+
+  if (toi_alloc_init())
+    return 1;
+  if (toi_checksum_init())
+    return 1;
+  if (toi_usm_init())
+    return 1;
+  if (toi_ui_init())
+    return 1;
+  if (toi_poweroff_init())
+    return 1;
+  if (toi_cluster_init())
+    return 1;
+  if (toi_cbw_init())
+    return 1;
+
+  return 0;
+}
+
+late_initcall(core_load);
diff --git a/kernel/power/tuxonice_incremental.c b/kernel/power/tuxonice_incremental.c
new file mode 100644
index 000000000..62c465e13
--- /dev/null
+++ b/kernel/power/tuxonice_incremental.c
@@ -0,0 +1,402 @@
+/*
+ * kernel/power/tuxonice_incremental.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines related to storing incremental images - that
+ * is, retaining an image after an initial cycle and then storing incremental
+ * changes on subsequent hibernations.
+ *
+ * Based in part on on...
+ *
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/mm.h>
+#include <linux/tuxonice.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include "tuxonice_pageflags.h"
+#include "tuxonice_builtin.h"
+#include "power.h"
+
+int toi_do_incremental_initcall;
+
+extern void kdb_init(int level);
+extern noinline void kgdb_breakpoint(void);
+
+#undef pr_debug
+#if 0
+#define pr_debug(a, b...) do { printk(a, ##b); } while(0)
+#else
+#define pr_debug(a, b...) do { } while(0)
+#endif
+
+/* Multipliers for offsets within the PTEs */
+#define PTE_LEVEL_MULT (PAGE_SIZE)
+#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+
+/*
+ * This function gets called on a break in a continuous series
+ * of PTE entries; the next one is different so we need to
+ * print what we collected so far.
+ */
+static void note_page(void *addr)
+{
+  static struct page *lastpage;
+  struct page *page;
+
+  page = virt_to_page(addr);
+
+  if (page != lastpage) {
+    unsigned int level;
+    pte_t *pte = lookup_address((unsigned long) addr, &level);
+    struct page *pt_page2 = pte_page(*pte);
+    //debug("Note page %p (=> %p => %p|%ld).\n", addr, pte, pt_page2, page_to_pfn(pt_page2));
+    SetPageTOI_Untracked(pt_page2);
+    lastpage = page;
+  }
+}
+
+static void walk_pte_level(pmd_t addr)
+{
+  int i;
+  pte_t *start;
+
+  start = (pte_t *) pmd_page_vaddr(addr);
+  for (i = 0; i < PTRS_PER_PTE; i++) {
+    note_page(start);
+    start++;
+  }
+}
+
+#if PTRS_PER_PMD > 1
+
+static void walk_pmd_level(pud_t addr)
+{
+  int i;
+  pmd_t *start;
+
+  start = (pmd_t *) pud_page_vaddr(addr);
+  for (i = 0; i < PTRS_PER_PMD; i++) {
+    if (!pmd_none(*start)) {
+      if (pmd_large(*start) || !pmd_present(*start))
+        note_page(start);
+      else
+        walk_pte_level(*start);
+    } else
+      note_page(start);
+    start++;
+  }
+}
+
+#else
+#define walk_pmd_level(a) walk_pte_level(__pmd(pud_val(a)))
+#define pud_large(a) pmd_large(__pmd(pud_val(a)))
+#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
+#endif
+
+#if PTRS_PER_PUD > 1
+
+static void walk_pud_level(pgd_t addr)
+{
+  int i;
+  pud_t *start;
+
+  start = (pud_t *) pgd_page_vaddr(addr);
+
+  for (i = 0; i < PTRS_PER_PUD; i++) {
+    if (!pud_none(*start)) {
+      if (pud_large(*start) || !pud_present(*start))
+        note_page(start);
+      else
+        walk_pmd_level(*start);
+    } else
+      note_page(start);
+
+    start++;
+  }
+}
+
+#else
+#define walk_pud_level(a) walk_pmd_level(__pud(pgd_val(a)))
+#define pgd_large(a) pud_large(__pud(pgd_val(a)))
+#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
+#endif
+
+/*
+ * Not static in the original at the time of writing, so needs renaming here.
+ */
+static void toi_ptdump_walk_pgd_level(pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+  pgd_t *start = (pgd_t *) &init_level4_pgt;
+#else
+  pgd_t *start = swapper_pg_dir;
+#endif
+  int i;
+  if (pgd) {
+    start = pgd;
+  }
+
+  for (i = 0; i < PTRS_PER_PGD; i++) {
+    if (!pgd_none(*start)) {
+      if (pgd_large(*start) || !pgd_present(*start))
+        note_page(start);
+      else
+        walk_pud_level(*start);
+    } else
+      note_page(start);
+
+    start++;
+  }
+
+  /* Flush out the last page */
+  note_page(start);
+}
+
+#ifdef CONFIG_PARAVIRT
+extern struct pv_info pv_info;
+
+static void toi_set_paravirt_ops_untracked(void) {
+  int i;
+
+  unsigned long pvpfn = page_to_pfn(virt_to_page(__parainstructions)),
+                pvpfn_end = page_to_pfn(virt_to_page(__parainstructions_end));
+  //debug(KERN_EMERG ".parainstructions goes from pfn %ld to %ld.\n", pvpfn, pvpfn_end);
+  for (i = pvpfn; i <= pvpfn_end; i++) {
+    SetPageTOI_Untracked(pfn_to_page(i));
+  }
+}
+#else
+#define toi_set_paravirt_ops_untracked() { do { } while(0) }
+#endif
+
+extern void toi_mark_per_cpus_pages_untracked(void);
+
+void toi_untrack_stack(unsigned long *stack)
+{
+  int i;
+  struct page *stack_page = virt_to_page(stack);
+
+  for (i = 0; i < (1 << THREAD_SIZE_ORDER); i++) {
+    pr_debug("Untrack stack page %p.\n", page_address(stack_page + i));
+    SetPageTOI_Untracked(stack_page + i);
+  }
+}
+void toi_untrack_process(struct task_struct *p)
+{
+  SetPageTOI_Untracked(virt_to_page(p));
+  pr_debug("Untrack process %d page %p.\n", p->pid, page_address(virt_to_page(p)));
+
+  toi_untrack_stack(p->stack);
+}
+
+void toi_generate_untracked_map(void)
+{
+  struct task_struct *p, *t;
+  struct page *page;
+  pte_t *pte;
+  int i;
+  unsigned int level;
+  static int been_here = 0;
+
+  if (been_here)
+    return;
+
+  been_here = 1;
+
+  /* Pagetable pages */
+  toi_ptdump_walk_pgd_level(NULL);
+
+  /* Printk buffer - not normally needed but can be helpful for debugging. */
+  //toi_set_logbuf_untracked();
+
+  /* Paravirt ops */
+  toi_set_paravirt_ops_untracked();
+
+  /* Task structs and stacks */
+  for_each_process_thread(p, t) {
+    toi_untrack_process(p);
+    //toi_untrack_stack((unsigned long *) t->thread.sp);
+  }
+
+  for (i = 0; i < NR_CPUS; i++) {
+    struct task_struct *idle = idle_task(i);
+
+    if (idle) {
+      pr_debug("Untrack idle process for CPU %d.\n", i);
+      toi_untrack_process(idle);
+    }
+
+    /* IRQ stack */
+    pr_debug("Untrack IRQ stack for CPU %d.\n", i);
+    toi_untrack_stack((unsigned long *)per_cpu(irq_stack_ptr, i));
+  }
+
+  /* Per CPU data */
+  //pr_debug("Untracking per CPU variable pages.\n");
+  toi_mark_per_cpus_pages_untracked();
+
+  /* Init stack - for bringing up secondary CPUs */
+  page = virt_to_page(init_stack);
+  for (i = 0; i < DIV_ROUND_UP(sizeof(init_stack), PAGE_SIZE); i++) {
+    SetPageTOI_Untracked(page + i);
+  }
+
+  pte = lookup_address((unsigned long) &mmu_cr4_features, &level);
+  SetPageTOI_Untracked(pte_page(*pte));
+  SetPageTOI_Untracked(virt_to_page(trampoline_cr4_features));
+}
+
+/**
+ * toi_reset_dirtiness_one
+ */
+
+void toi_reset_dirtiness_one(unsigned long pfn, int verbose)
+{
+  struct page *page = pfn_to_page(pfn);
+
+  /**
+   * Don't worry about whether the Dirty flag is
+   * already set. If this is our first call, it
+   * won't be.
+   */
+
+  preempt_disable();
+
+  ClearPageTOI_Dirty(page);
+  SetPageTOI_RO(page);
+  if (verbose)
+    printk(KERN_EMERG "Making page %ld (%p|%p) read only.\n", pfn, page, page_address(page));
+
+  set_memory_ro((unsigned long) page_address(page), 1);
+
+  preempt_enable();
+}
+
+/**
+ * TuxOnIce's incremental image support works by marking all memory apart from
+ * the page tables read-only, then in the page-faults that result enabling
+ * writing if appropriate and flagging the page as dirty. Free pages are also
+ * marked as dirty and not protected so that if allocated, they will be included
+ * in the image without further processing.
+ *
+ * toi_reset_dirtiness is called when and image exists and incremental images are
+ * enabled, and each time we resume thereafter. It is not invoked on a fresh boot.
+ *
+ * This routine should be called from a single-cpu-running context to avoid races in setting
+ * page dirty/read only flags.
+ *
+ * TODO: Make "it is not invoked on a fresh boot" true  when I've finished developing it!
+ *
+ * TODO: Consider Xen paravirt guest boot issues. See arch/x86/mm/pageattr.c.
+ **/
+
+int toi_reset_dirtiness(int verbose)
+{
+  struct zone *zone;
+  unsigned long loop;
+  int allocated_map = 0;
+
+  toi_generate_untracked_map();
+
+  if (!free_map) {
+    if (!toi_alloc_bitmap(&free_map))
+      return -ENOMEM;
+    allocated_map = 1;
+  }
+
+  toi_generate_free_page_map();
+
+  pr_debug(KERN_EMERG "Reset dirtiness.\n");
+  for_each_populated_zone(zone) {
+    // 64 bit only. No need to worry about highmem.
+    for (loop = 0; loop < zone->spanned_pages; loop++) {
+      unsigned long pfn = zone->zone_start_pfn + loop;
+      struct page *page;
+      int chunk_size;
+
+      if (!pfn_valid(pfn)) {
+        continue;
+      }
+
+      chunk_size = toi_size_of_free_region(zone, pfn);
+      if (chunk_size) {
+        loop += chunk_size - 1;
+        continue;
+      }
+
+      page = pfn_to_page(pfn);
+
+      if (PageNosave(page) || !saveable_page(zone, pfn)) {
+        continue;
+      }
+
+      if (PageTOI_Untracked(page)) {
+        continue;
+      }
+
+      /**
+       * Do we need to (re)protect the page?
+       * If it is already protected (PageTOI_RO), there is
+       * nothing to do - skip the following.
+       * If it is marked as dirty (PageTOI_Dirty), it was
+       * either free and has been allocated or has been
+       * written to and marked dirty. Reset the dirty flag
+       * and (re)apply the protection.
+       */
+      if (!PageTOI_RO(page)) {
+        toi_reset_dirtiness_one(pfn, verbose);
+      }
+    }
+  }
+
+  pr_debug(KERN_EMERG "Done resetting dirtiness.\n");
+
+  if (allocated_map) {
+    toi_free_bitmap(&free_map);
+  }
+  return 0;
+}
+
+static int toi_reset_dirtiness_initcall(void)
+{
+  if (toi_do_incremental_initcall) {
+    pr_info("TuxOnIce: Enabling dirty page tracking.\n");
+    toi_reset_dirtiness(0);
+  }
+  return 1;
+}
+extern void toi_generate_untracked_map(void);
+
+// Leave early_initcall for pages to register untracked sections.
+early_initcall(toi_reset_dirtiness_initcall);
+
+static int __init toi_incremental_initcall_setup(char *str)
+{
+  int value;
+
+  if (sscanf(str, "=%d", &value) && value)
+    toi_do_incremental_initcall = value;
+
+  return 1;
+}
+__setup("toi_incremental_initcall", toi_incremental_initcall_setup);
diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c
new file mode 100644
index 000000000..678dbd402
--- /dev/null
+++ b/kernel/power/tuxonice_io.c
@@ -0,0 +1,1936 @@
+/*
+ * kernel/power/tuxonice_io.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/version.h>
+#include <linux/utsname.h>
+#include <linux/mount.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/cpu.h>
+#include <linux/fs_struct.h>
+#include <linux/bio.h>
+#include <linux/fs_uuid.h>
+#include <linux/kmod.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_alloc.h"
+char alt_resume_param[256];
+
+/* Version read from image header at resume */
+static int toi_image_header_version;
+
+#define read_if_version(VERS, VAR, DESC, ERR_ACT) do {                                        \
+  if (likely(toi_image_header_version >= VERS))                                \
+  if (toiActiveAllocator->rw_header_chunk(READ, NULL,                \
+        (char *) &VAR, sizeof(VAR))) {                \
+    abort_hibernate(TOI_FAILED_IO, "Failed to read DESC.");        \
+    ERR_ACT;                                        \
+  }                                                                \
+} while(0)                                                                        \
+
+/* Variables shared between threads and updated under the mutex */
+static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
+static int io_index, io_nextupdate, io_pc, io_pc_step;
+static DEFINE_MUTEX(io_mutex);
+static DEFINE_PER_CPU(struct page *, last_sought);
+static DEFINE_PER_CPU(struct page *, last_high_page);
+static DEFINE_PER_CPU(char *, checksum_locn);
+static DEFINE_PER_CPU(struct pbe *, last_low_page);
+static atomic_t io_count;
+atomic_t toi_io_workers;
+
+static int using_flusher;
+
+DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
+
+int toi_bio_queue_flusher_should_finish;
+
+int toi_max_workers;
+
+static char *image_version_error = "The image header version is newer than " \
+                                    "this kernel supports.";
+
+struct toi_module_ops *first_filter;
+
+static atomic_t toi_num_other_threads;
+static DECLARE_WAIT_QUEUE_HEAD(toi_worker_wait_queue);
+enum toi_worker_commands {
+  TOI_IO_WORKER_STOP,
+  TOI_IO_WORKER_RUN,
+  TOI_IO_WORKER_EXIT
+};
+static enum toi_worker_commands toi_worker_command;
+
+/**
+ * toi_attempt_to_parse_resume_device - determine if we can hibernate
+ *
+ * Can we hibernate, using the current resume= parameter?
+ **/
+int toi_attempt_to_parse_resume_device(int quiet)
+{
+  struct list_head *Allocator;
+  struct toi_module_ops *thisAllocator;
+  int result, returning = 0;
+
+  if (toi_activate_storage(0))
+    return 0;
+
+  toiActiveAllocator = NULL;
+  clear_toi_state(TOI_RESUME_DEVICE_OK);
+  clear_toi_state(TOI_CAN_RESUME);
+  clear_result_state(TOI_ABORTED);
+
+  if (!toiNumAllocators) {
+    if (!quiet)
+      printk(KERN_INFO "TuxOnIce: No storage allocators have "
+          "been registered. Hibernating will be "
+          "disabled.\n");
+    goto cleanup;
+  }
+
+  list_for_each(Allocator, &toiAllocators) {
+    thisAllocator = list_entry(Allocator, struct toi_module_ops,
+        type_list);
+
+    /*
+     * Not sure why you'd want to disable an allocator, but
+     * we should honour the flag if we're providing it
+     */
+    if (!thisAllocator->enabled)
+      continue;
+
+    result = thisAllocator->parse_sig_location(
+        resume_file, (toiNumAllocators == 1),
+        quiet);
+
+    switch (result) {
+      case -EINVAL:
+        /* For this allocator, but not a valid
+         * configuration. Error already printed. */
+        goto cleanup;
+
+      case 0:
+        /* For this allocator and valid. */
+        toiActiveAllocator = thisAllocator;
+
+        set_toi_state(TOI_RESUME_DEVICE_OK);
+        set_toi_state(TOI_CAN_RESUME);
+        returning = 1;
+        goto cleanup;
+    }
+  }
+  if (!quiet)
+    printk(KERN_INFO "TuxOnIce: No matching enabled allocator "
+        "found. Resuming disabled.\n");
+cleanup:
+  toi_deactivate_storage(0);
+  return returning;
+}
+
+void attempt_to_parse_resume_device2(void)
+{
+  toi_prepare_usm();
+  toi_attempt_to_parse_resume_device(0);
+  toi_cleanup_usm();
+}
+
+void save_restore_alt_param(int replace, int quiet)
+{
+  static char resume_param_save[255];
+  static unsigned long toi_state_save;
+
+  if (replace) {
+    toi_state_save = toi_state;
+    strcpy(resume_param_save, resume_file);
+    strcpy(resume_file, alt_resume_param);
+  } else {
+    strcpy(resume_file, resume_param_save);
+    toi_state = toi_state_save;
+  }
+  toi_attempt_to_parse_resume_device(quiet);
+}
+
+void attempt_to_parse_alt_resume_param(void)
+{
+  int ok = 0;
+
+  /* Temporarily set resume_param to the poweroff value */
+  if (!strlen(alt_resume_param))
+    return;
+
+  printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n");
+  save_restore_alt_param(SAVE, NOQUIET);
+  if (test_toi_state(TOI_CAN_RESUME))
+    ok = 1;
+
+  printk(KERN_INFO "=== Done ===\n");
+  save_restore_alt_param(RESTORE, QUIET);
+
+  /* If not ok, clear the string */
+  if (ok)
+    return;
+
+  printk(KERN_INFO "Can't resume from that location; clearing "
+      "alt_resume_param.\n");
+  alt_resume_param[0] = '\0';
+}
+
+/**
+ * noresume_reset_modules - reset data structures in case of non resuming
+ *
+ * When we read the start of an image, modules (and especially the
+ * active allocator) might need to reset data structures if we
+ * decide to remove the image rather than resuming from it.
+ **/
+static void noresume_reset_modules(void)
+{
+  struct toi_module_ops *this_filter;
+
+  list_for_each_entry(this_filter, &toi_filters, type_list)
+    if (this_filter->noresume_reset)
+      this_filter->noresume_reset();
+
+  if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
+    toiActiveAllocator->noresume_reset();
+}
+
+/**
+ * fill_toi_header - fill the hibernate header structure
+ * @struct toi_header: Header data structure to be filled.
+ **/
+static int fill_toi_header(struct toi_header *sh)
+{
+  int i, error;
+
+  error = init_header((struct swsusp_info *) sh);
+  if (error)
+    return error;
+
+  sh->pagedir = pagedir1;
+  sh->pageset_2_size = pagedir2.size;
+  sh->param0 = toi_result;
+  sh->param1 = toi_bkd.toi_action;
+  sh->param2 = toi_bkd.toi_debug_state;
+  sh->param3 = toi_bkd.toi_default_console_level;
+  sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
+  for (i = 0; i < 4; i++)
+    sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
+  sh->bkd = boot_kernel_data_buffer;
+  return 0;
+}
+
+/**
+ * rw_init_modules - initialize modules
+ * @rw:                Whether we are reading of writing an image.
+ * @which:        Section of the image being processed.
+ *
+ * Iterate over modules, preparing the ones that will be used to read or write
+ * data.
+ **/
+static int rw_init_modules(int rw, int which)
+{
+  struct toi_module_ops *this_module;
+  /* Initialise page transformers */
+  list_for_each_entry(this_module, &toi_filters, type_list) {
+    if (!this_module->enabled)
+      continue;
+    if (this_module->rw_init && this_module->rw_init(rw, which)) {
+      abort_hibernate(TOI_FAILED_MODULE_INIT,
+          "Failed to initialize the %s filter.",
+          this_module->name);
+      return 1;
+    }
+  }
+
+  /* Initialise allocator */
+  if (toiActiveAllocator->rw_init(rw, which)) {
+    abort_hibernate(TOI_FAILED_MODULE_INIT,
+        "Failed to initialise the allocator.");
+    return 1;
+  }
+
+  /* Initialise other modules */
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        this_module->type == FILTER_MODULE ||
+        this_module->type == WRITER_MODULE)
+      continue;
+    if (this_module->rw_init && this_module->rw_init(rw, which)) {
+      set_abort_result(TOI_FAILED_MODULE_INIT);
+      printk(KERN_INFO "Setting aborted flag due to module "
+          "init failure.\n");
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+/**
+ * rw_cleanup_modules - cleanup modules
+ * @rw:        Whether we are reading of writing an image.
+ *
+ * Cleanup components after reading or writing a set of pages.
+ * Only the allocator may fail.
+ **/
+static int rw_cleanup_modules(int rw)
+{
+  struct toi_module_ops *this_module;
+  int result = 0;
+
+  /* Cleanup other modules */
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        this_module->type == FILTER_MODULE ||
+        this_module->type == WRITER_MODULE)
+      continue;
+    if (this_module->rw_cleanup)
+      result |= this_module->rw_cleanup(rw);
+  }
+
+  /* Flush data and cleanup */
+  list_for_each_entry(this_module, &toi_filters, type_list) {
+    if (!this_module->enabled)
+      continue;
+    if (this_module->rw_cleanup)
+      result |= this_module->rw_cleanup(rw);
+  }
+
+  result |= toiActiveAllocator->rw_cleanup(rw);
+
+  return result;
+}
+
+static struct page *copy_page_from_orig_page(struct page *orig_page, int is_high)
+{
+  int index, min, max;
+  struct page *high_page = NULL,
+              **my_last_high_page = raw_cpu_ptr(&last_high_page),
+              **my_last_sought = raw_cpu_ptr(&last_sought);
+  struct pbe *this, **my_last_low_page = raw_cpu_ptr(&last_low_page);
+  void *compare;
+
+  if (is_high) {
+    if (*my_last_sought && *my_last_high_page &&
+        *my_last_sought < orig_page)
+      high_page = *my_last_high_page;
+    else
+      high_page = (struct page *) restore_highmem_pblist;
+    this = (struct pbe *) kmap(high_page);
+    compare = orig_page;
+  } else {
+    if (*my_last_sought && *my_last_low_page &&
+        *my_last_sought < orig_page)
+      this = *my_last_low_page;
+    else
+      this = restore_pblist;
+    compare = page_address(orig_page);
+  }
+
+  *my_last_sought = orig_page;
+
+  /* Locate page containing pbe */
+  while (this[PBES_PER_PAGE - 1].next &&
+      this[PBES_PER_PAGE - 1].orig_address < compare) {
+    if (is_high) {
+      struct page *next_high_page = (struct page *)
+        this[PBES_PER_PAGE - 1].next;
+      kunmap(high_page);
+      this = kmap(next_high_page);
+      high_page = next_high_page;
+    } else
+      this = this[PBES_PER_PAGE - 1].next;
+  }
+
+  /* Do a binary search within the page */
+  min = 0;
+  max = PBES_PER_PAGE;
+  index = PBES_PER_PAGE / 2;
+  while (max - min) {
+    if (!this[index].orig_address ||
+        this[index].orig_address > compare)
+      max = index;
+    else if (this[index].orig_address == compare) {
+      if (is_high) {
+        struct page *page = this[index].address;
+        *my_last_high_page = high_page;
+        kunmap(high_page);
+        return page;
+      }
+      *my_last_low_page = this;
+      return virt_to_page(this[index].address);
+    } else
+      min = index;
+    index = ((max + min) / 2);
+  };
+
+  if (is_high)
+    kunmap(high_page);
+
+  abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
+      " orig page %p. This[min].orig_address=%p.\n", orig_page,
+      this[index].orig_address);
+  return NULL;
+}
+
+/**
+ * write_next_page - write the next page in a pageset
+ * @data_pfn: The pfn where the next data to write is located.
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn number to write in the image (where the data belongs).
+ *
+ * Get the pfn of the next page to write, map the page if necessary and do the
+ * write.
+ **/
+static int write_next_page(unsigned long *data_pfn, int *my_io_index,
+    unsigned long *write_pfn)
+{
+  struct page *page;
+  char **my_checksum_locn = raw_cpu_ptr(&checksum_locn);
+  int result = 0, was_present;
+
+  *data_pfn = memory_bm_next_pfn(io_map, 0);
+
+  /* Another thread could have beaten us to it. */
+  if (*data_pfn == BM_END_OF_MAP) {
+    if (atomic_read(&io_count)) {
+      printk(KERN_INFO "Ran out of pfns but io_count is "
+          "still %d.\n", atomic_read(&io_count));
+      BUG();
+    }
+    mutex_unlock(&io_mutex);
+    return -ENODATA;
+  }
+
+  *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+  memory_bm_clear_bit(io_map, 0, *data_pfn);
+  page = pfn_to_page(*data_pfn);
+
+  was_present = kernel_page_present(page);
+  if (!was_present)
+    kernel_map_pages(page, 1, 1);
+
+  if (io_pageset == 1)
+    *write_pfn = memory_bm_next_pfn(pageset1_map, 0);
+  else {
+    *write_pfn = *data_pfn;
+    *my_checksum_locn = tuxonice_get_next_checksum();
+  }
+
+  TOI_TRACE_DEBUG(*data_pfn, "_PS%d_write %d", io_pageset, *my_io_index);
+
+  mutex_unlock(&io_mutex);
+
+  if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
+    return 1;
+
+  result = first_filter->write_page(*write_pfn, TOI_PAGE, page,
+      PAGE_SIZE);
+
+  if (!was_present)
+    kernel_map_pages(page, 1, 0);
+
+  return result;
+}
+
+/**
+ * read_next_page - read the next page in a pageset
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn in which the data belongs.
+ *
+ * Read a page of the image into our buffer. It can happen (here and in the
+ * write routine) that threads don't get run until after other CPUs have done
+ * all the work. This was the cause of the long standing issue with
+ * occasionally getting -ENODATA errors at the end of reading the image. We
+ * therefore need to check there's actually a page to read before trying to
+ * retrieve one.
+ **/
+
+static int read_next_page(int *my_io_index, unsigned long *write_pfn,
+    struct page *buffer)
+{
+  unsigned int buf_size = PAGE_SIZE;
+  unsigned long left = atomic_read(&io_count);
+
+  if (!left)
+    return -ENODATA;
+
+  /* Start off assuming the page we read isn't resaved */
+  *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+  mutex_unlock(&io_mutex);
+
+  /*
+   * Are we aborting? If so, don't submit any more I/O as
+   * resetting the resume_attempted flag (from ui.c) will
+   * clear the bdev flags, making this thread oops.
+   */
+  if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+    atomic_dec(&toi_io_workers);
+    if (!atomic_read(&toi_io_workers)) {
+      /*
+       * So we can be sure we'll have memory for
+       * marking that we haven't resumed.
+       */
+      rw_cleanup_modules(READ);
+      set_toi_state(TOI_IO_STOPPED);
+    }
+    while (1)
+      schedule();
+  }
+
+  /*
+   * See toi_bio_read_page in tuxonice_bio.c:
+   * read the next page in the image.
+   */
+  return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size);
+}
+
+static void use_read_page(unsigned long write_pfn, struct page *buffer)
+{
+  struct page *final_page = pfn_to_page(write_pfn),
+              *copy_page = final_page;
+  char *virt, *buffer_virt;
+  int was_present, cpu = smp_processor_id();
+  unsigned long idx = 0;
+
+  if (io_pageset == 1 && (!pageset1_copy_map ||
+        !memory_bm_test_bit(pageset1_copy_map, cpu, write_pfn))) {
+    int is_high = PageHighMem(final_page);
+    copy_page = copy_page_from_orig_page(is_high ? (void *) write_pfn : final_page, is_high);
+  }
+
+  if (!memory_bm_test_bit(io_map, cpu, write_pfn)) {
+    int test = !memory_bm_test_bit(io_map, cpu, write_pfn);
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Discard %ld (%d).", write_pfn, test);
+    mutex_lock(&io_mutex);
+    idx = atomic_add_return(1, &io_count);
+    mutex_unlock(&io_mutex);
+    return;
+  }
+
+  virt = kmap(copy_page);
+  buffer_virt = kmap(buffer);
+  was_present = kernel_page_present(copy_page);
+  if (!was_present)
+    kernel_map_pages(copy_page, 1, 1);
+  memcpy(virt, buffer_virt, PAGE_SIZE);
+  flush_icache_range((unsigned long) virt,
+      (unsigned long) virt + PAGE_SIZE);
+  if (!was_present)
+    kernel_map_pages(copy_page, 1, 0);
+  kunmap(copy_page);
+  kunmap(buffer);
+  memory_bm_clear_bit(io_map, cpu, write_pfn);
+  TOI_TRACE_DEBUG(write_pfn, "_PS%d_read", io_pageset);
+}
+
+static unsigned long status_update(int writing, unsigned long done,
+    unsigned long ticks)
+{
+  int cs_index = writing ? 0 : 1;
+  unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks;
+  unsigned long msec = jiffies_to_msecs(abs(ticks_so_far));
+  unsigned long pgs_per_s, estimate = 0, pages_left;
+
+  if (msec) {
+    pages_left = io_barmax - done;
+    pgs_per_s = 1000 * done / msec;
+    if (pgs_per_s)
+      estimate = DIV_ROUND_UP(pages_left, pgs_per_s);
+  }
+
+  if (estimate && ticks > HZ / 2)
+    return toi_update_status(done, io_barmax,
+        " %d/%d MB (%lu sec left)",
+        MB(done+1), MB(io_barmax), estimate);
+
+  return toi_update_status(done, io_barmax, " %d/%d MB",
+      MB(done+1), MB(io_barmax));
+}
+
+/**
+ * worker_rw_loop - main loop to read/write pages
+ *
+ * The main I/O loop for reading or writing pages. The io_map bitmap is used to
+ * track the pages to read/write.
+ * If we are reading, the pages are loaded to their final (mapped) pfn.
+ * Data is non zero iff this is a thread started via start_other_threads.
+ * In that case, we stay in here until told to quit.
+ **/
+static int worker_rw_loop(void *data)
+{
+  unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4,
+                jif_index = 1, start_time = jiffies, thread_num;
+  int result = 0, my_io_index = 0, last_worker;
+  struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
+  cpumask_var_t orig_mask;
+
+  if (!alloc_cpumask_var(&orig_mask, GFP_KERNEL)) {
+    printk(KERN_EMERG "Failed to allocate cpumask for TuxOnIce I/O thread %ld.\n", (unsigned long) data);
+    result = -ENOMEM;
+    goto out;
+  }
+
+  cpumask_copy(orig_mask, &current->cpus_allowed);
+
+  current->flags |= PF_NOFREEZE;
+
+top:
+  mutex_lock(&io_mutex);
+  thread_num = atomic_read(&toi_io_workers);
+
+  cpumask_copy(&current->cpus_allowed, orig_mask);
+  schedule();
+
+  atomic_inc(&toi_io_workers);
+
+  while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
+      !(io_write && test_result_state(TOI_ABORTED)) &&
+      toi_worker_command == TOI_IO_WORKER_RUN) {
+    if (!thread_num && jiffies > next_jiffies) {
+      next_jiffies += HZ / 4;
+      if (toiActiveAllocator->update_throughput_throttle)
+        toiActiveAllocator->update_throughput_throttle(
+            jif_index);
+      jif_index++;
+    }
+
+    /*
+     * What page to use? If reading, don't know yet which page's
+     * data will be read, so always use the buffer. If writing,
+     * use the copy (Pageset1) or original page (Pageset2), but
+     * always write the pfn of the original page.
+     */
+    if (io_write)
+      result = write_next_page(&data_pfn, &my_io_index,
+          &write_pfn);
+    else /* Reading */
+      result = read_next_page(&my_io_index, &write_pfn,
+          buffer);
+
+    if (result) {
+      mutex_lock(&io_mutex);
+      /* Nothing to do? */
+      if (result == -ENODATA) {
+        toi_message(TOI_IO, TOI_VERBOSE, 0,
+            "Thread %d has no more work.",
+            smp_processor_id());
+        break;
+      }
+
+      io_result = result;
+
+      if (io_write) {
+        printk(KERN_INFO "Write chunk returned %d.\n",
+            result);
+        abort_hibernate(TOI_FAILED_IO,
+            "Failed to write a chunk of the "
+            "image.");
+        break;
+      }
+
+      if (io_pageset == 1) {
+        printk(KERN_ERR "\nBreaking out of I/O loop "
+            "because of result code %d.\n", result);
+        break;
+      }
+      panic("Read chunk returned (%d)", result);
+    }
+
+    /*
+     * Discard reads of resaved pages while reading ps2
+     * and unwanted pages while rereading ps2 when aborting.
+     */
+    if (!io_write) {
+      if (!PageResave(pfn_to_page(write_pfn)))
+        use_read_page(write_pfn, buffer);
+      else {
+        toi_message(TOI_IO, TOI_VERBOSE, 0,
+            "Resaved %ld.", write_pfn);
+        memory_bm_clear_bit(io_map, smp_processor_id(), write_pfn);
+      }
+    }
+
+    if (!thread_num) {
+      if(my_io_index + io_base > io_nextupdate)
+        io_nextupdate = status_update(io_write,
+            my_io_index + io_base,
+            jiffies - start_time);
+
+      if (my_io_index > io_pc) {
+        printk(KERN_CONT "...%d%%", 20 * io_pc_step);
+        io_pc_step++;
+        io_pc = io_finish_at * io_pc_step / 5;
+      }
+    }
+
+    toi_cond_pause(0, NULL);
+
+    /*
+     * Subtle: If there's less I/O still to be done than threads
+     * running, quit. This stops us doing I/O beyond the end of
+     * the image when reading.
+     *
+     * Possible race condition. Two threads could do the test at
+     * the same time; one should exit and one should continue.
+     * Therefore we take the mutex before comparing and exiting.
+     */
+
+    mutex_lock(&io_mutex);
+  }
+
+  last_worker = atomic_dec_and_test(&toi_io_workers);
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers));
+  mutex_unlock(&io_mutex);
+
+  if ((unsigned long) data && toi_worker_command != TOI_IO_WORKER_EXIT) {
+    /* Were we the last thread and we're using a flusher thread? */
+    if (last_worker && using_flusher) {
+      toiActiveAllocator->finish_all_io();
+    }
+    /* First, if we're doing I/O, wait for it to finish */
+    wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_RUN);
+    /* Then wait to be told what to do next */
+    wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_STOP);
+    if (toi_worker_command == TOI_IO_WORKER_RUN)
+      goto top;
+  }
+
+  if (thread_num)
+    atomic_dec(&toi_num_other_threads);
+
+out:
+  toi_message(TOI_IO, TOI_LOW, 0, "Thread %d exiting.", thread_num);
+  toi__free_page(28, buffer);
+  free_cpumask_var(orig_mask);
+
+  return result;
+}
+
+int toi_start_other_threads(void)
+{
+  int cpu;
+  struct task_struct *p;
+  int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1;
+  unsigned long num_started = 0;
+
+  if (test_action_state(TOI_NO_MULTITHREADED_IO))
+    return 0;
+
+  toi_worker_command = TOI_IO_WORKER_STOP;
+
+  for_each_online_cpu(cpu) {
+    if (num_started == to_start)
+      break;
+
+    if (cpu == smp_processor_id())
+      continue;
+
+    p = kthread_create_on_node(worker_rw_loop, (void *) num_started + 1,
+        cpu_to_node(cpu), "ktoi_io/%d", cpu);
+    if (IS_ERR(p)) {
+      printk(KERN_ERR "ktoi_io for %i failed\n", cpu);
+      continue;
+    }
+    kthread_bind(p, cpu);
+    p->flags |= PF_MEMALLOC;
+    wake_up_process(p);
+    num_started++;
+    atomic_inc(&toi_num_other_threads);
+  }
+
+  toi_message(TOI_IO, TOI_LOW, 0, "Started %d threads.", num_started);
+  return num_started;
+}
+
+void toi_stop_other_threads(void)
+{
+  toi_message(TOI_IO, TOI_LOW, 0, "Stopping other threads.");
+  toi_worker_command = TOI_IO_WORKER_EXIT;
+  wake_up(&toi_worker_wait_queue);
+}
+
+/**
+ * do_rw_loop - main highlevel function for reading or writing pages
+ *
+ * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
+ **/
+static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
+    int base, int barmax, int pageset)
+{
+  int index = 0, cpu, result = 0, workers_started;
+  unsigned long pfn, next;
+
+  first_filter = toi_get_next_filter(NULL);
+
+  if (!finish_at)
+    return 0;
+
+  io_write = write;
+  io_finish_at = finish_at;
+  io_base = base;
+  io_barmax = barmax;
+  io_pageset = pageset;
+  io_index = 0;
+  io_pc = io_finish_at / 5;
+  io_pc_step = 1;
+  io_result = 0;
+  io_nextupdate = base + 1;
+  toi_bio_queue_flusher_should_finish = 0;
+
+  for_each_online_cpu(cpu) {
+    per_cpu(last_sought, cpu) = NULL;
+    per_cpu(last_low_page, cpu) = NULL;
+    per_cpu(last_high_page, cpu) = NULL;
+  }
+
+  /* Ensure all bits clear */
+  memory_bm_clear(io_map);
+
+  memory_bm_position_reset(io_map);
+  next = memory_bm_next_pfn(io_map, 0);
+
+  BUG_ON(next != BM_END_OF_MAP);
+
+  /* Set the bits for the pages to write */
+  memory_bm_position_reset(pageflags);
+
+  pfn = memory_bm_next_pfn(pageflags, 0);
+  toi_trace_index++;
+
+  while (pfn != BM_END_OF_MAP && index < finish_at) {
+    TOI_TRACE_DEBUG(pfn, "_io_pageset_%d (%d/%d)", pageset, index + 1, finish_at);
+    memory_bm_set_bit(io_map, 0, pfn);
+    pfn = memory_bm_next_pfn(pageflags, 0);
+    index++;
+  }
+
+  BUG_ON(next != BM_END_OF_MAP || index < finish_at);
+
+  memory_bm_position_reset(io_map);
+  toi_trace_index++;
+
+  atomic_set(&io_count, finish_at);
+
+  memory_bm_position_reset(pageset1_map);
+
+  mutex_lock(&io_mutex);
+
+  clear_toi_state(TOI_IO_STOPPED);
+
+  using_flusher = (atomic_read(&toi_num_other_threads) &&
+      toiActiveAllocator->io_flusher &&
+      !test_action_state(TOI_NO_FLUSHER_THREAD));
+
+  workers_started = atomic_read(&toi_num_other_threads);
+
+  memory_bm_position_reset(io_map);
+  memory_bm_position_reset(pageset1_copy_map);
+
+  toi_worker_command = TOI_IO_WORKER_RUN;
+  wake_up(&toi_worker_wait_queue);
+
+  mutex_unlock(&io_mutex);
+
+  if (using_flusher)
+    result = toiActiveAllocator->io_flusher(write);
+  else
+    worker_rw_loop(NULL);
+
+  while (atomic_read(&toi_io_workers))
+    schedule();
+
+  printk(KERN_CONT "\n");
+
+  toi_worker_command = TOI_IO_WORKER_STOP;
+  wake_up(&toi_worker_wait_queue);
+
+  if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+    if (!atomic_read(&toi_io_workers)) {
+      rw_cleanup_modules(READ);
+      set_toi_state(TOI_IO_STOPPED);
+    }
+    while (1)
+      schedule();
+  }
+  set_toi_state(TOI_IO_STOPPED);
+
+  if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
+    unsigned long next;
+
+    toi_update_status(io_base + io_finish_at, io_barmax,
+        " %d/%d MB ",
+        MB(io_base + io_finish_at), MB(io_barmax));
+
+    memory_bm_position_reset(io_map);
+    next = memory_bm_next_pfn(io_map, 0);
+    if  (next != BM_END_OF_MAP) {
+      printk(KERN_INFO "Finished I/O loop but still work to "
+          "do?\nFinish at = %d. io_count = %d.\n",
+          finish_at, atomic_read(&io_count));
+      printk(KERN_INFO "I/O bitmap still records work to do."
+          "%ld.\n", next);
+      BUG();
+      do {
+        cpu_relax();
+      } while (0);
+    }
+  }
+
+  return io_result ? io_result : result;
+}
+
+/**
+ * write_pageset - write a pageset to disk.
+ * @pagedir:        Which pagedir to write.
+ *
+ * Returns:
+ *        Zero on success or -1 on failure.
+ **/
+int write_pageset(struct pagedir *pagedir)
+{
+  int finish_at, base = 0;
+  int barmax = pagedir1.size + pagedir2.size;
+  long error = 0;
+  struct memory_bitmap *pageflags;
+  unsigned long start_time, end_time;
+
+  /*
+   * Even if there is nothing to read or write, the allocator
+   * may need the init/cleanup for it's housekeeping.  (eg:
+   * Pageset1 may start where pageset2 ends when writing).
+   */
+  finish_at = pagedir->size;
+
+  if (pagedir->id == 1) {
+    toi_prepare_status(DONT_CLEAR_BAR,
+        "Writing kernel & process data...");
+    base = pagedir2.size;
+    if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+        test_action_state(TOI_TEST_BIO))
+      pageflags = pageset1_map;
+    else
+      pageflags = pageset1_copy_map;
+  } else {
+    toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
+    pageflags = pageset2_map;
+  }
+
+  start_time = jiffies;
+
+  if (rw_init_modules(WRITE, pagedir->id)) {
+    abort_hibernate(TOI_FAILED_MODULE_INIT,
+        "Failed to initialise modules for writing.");
+    error = 1;
+  }
+
+  if (!error)
+    error = do_rw_loop(WRITE, finish_at, pageflags, base, barmax,
+        pagedir->id);
+
+  if (rw_cleanup_modules(WRITE) && !error) {
+    abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+        "Failed to cleanup after writing.");
+    error = 1;
+  }
+
+  end_time = jiffies;
+
+  if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+    toi_bkd.toi_io_time[0][0] += finish_at,
+      toi_bkd.toi_io_time[0][1] += (end_time - start_time);
+  }
+
+  return error;
+}
+
+/**
+ * read_pageset - highlevel function to read a pageset from disk
+ * @pagedir:                        pageset to read
+ * @overwrittenpagesonly:        Whether to read the whole pageset or
+ *                                only part of it.
+ *
+ * Returns:
+ *        Zero on success or -1 on failure.
+ **/
+static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
+{
+  int result = 0, base = 0;
+  int finish_at = pagedir->size;
+  int barmax = pagedir1.size + pagedir2.size;
+  struct memory_bitmap *pageflags;
+  unsigned long start_time, end_time;
+
+  if (pagedir->id == 1) {
+    toi_prepare_status(DONT_CLEAR_BAR,
+        "Reading kernel & process data...");
+    pageflags = pageset1_map;
+  } else {
+    toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
+    if (overwrittenpagesonly) {
+      barmax = min(pagedir1.size, pagedir2.size);
+      finish_at = min(pagedir1.size, pagedir2.size);
+    } else
+      base = pagedir1.size;
+    pageflags = pageset2_map;
+  }
+
+  start_time = jiffies;
+
+  if (rw_init_modules(READ, pagedir->id)) {
+    toiActiveAllocator->remove_image();
+    result = 1;
+  } else
+    result = do_rw_loop(READ, finish_at, pageflags, base, barmax,
+        pagedir->id);
+
+  if (rw_cleanup_modules(READ) && !result) {
+    abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+        "Failed to cleanup after reading.");
+    result = 1;
+  }
+
+  /* Statistics */
+  end_time = jiffies;
+
+  if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+    toi_bkd.toi_io_time[1][0] += finish_at,
+      toi_bkd.toi_io_time[1][1] += (end_time - start_time);
+  }
+
+  return result;
+}
+
+/**
+ * write_module_configs - store the modules configuration
+ *
+ * The configuration for each module is stored in the image header.
+ * Returns: Int
+ *        Zero on success, Error value otherwise.
+ **/
+static int write_module_configs(void)
+{
+  struct toi_module_ops *this_module;
+  char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
+  int len, index = 1;
+  struct toi_module_header toi_module_header;
+
+  if (!buffer) {
+    printk(KERN_INFO "Failed to allocate a buffer for saving "
+        "module configuration info.\n");
+    return -ENOMEM;
+  }
+
+  /*
+   * We have to know which data goes with which module, so we at
+   * least write a length of zero for a module. Note that we are
+   * also assuming every module's config data takes <= PAGE_SIZE.
+   */
+
+  /* For each module (in registration order) */
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled || !this_module->storage_needed ||
+        (this_module->type == WRITER_MODULE &&
+         toiActiveAllocator != this_module))
+      continue;
+
+    /* Get the data from the module */
+    len = 0;
+    if (this_module->save_config_info)
+      len = this_module->save_config_info(buffer);
+
+    /* Save the details of the module */
+    toi_module_header.enabled = this_module->enabled;
+    toi_module_header.type = this_module->type;
+    toi_module_header.index = index++;
+    strncpy(toi_module_header.name, this_module->name,
+        sizeof(toi_module_header.name));
+    toiActiveAllocator->rw_header_chunk(WRITE,
+        this_module,
+        (char *) &toi_module_header,
+        sizeof(toi_module_header));
+
+    /* Save the size of the data and any data returned */
+    toiActiveAllocator->rw_header_chunk(WRITE,
+        this_module,
+        (char *) &len, sizeof(int));
+    if (len)
+      toiActiveAllocator->rw_header_chunk(
+          WRITE, this_module, buffer, len);
+  }
+
+  /* Write a blank header to terminate the list */
+  toi_module_header.name[0] = '\0';
+  toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+      (char *) &toi_module_header, sizeof(toi_module_header));
+
+  toi_free_page(22, (unsigned long) buffer);
+  return 0;
+}
+
+/**
+ * read_one_module_config - read and configure one module
+ *
+ * Read the configuration for one module, and configure the module
+ * to match if it is loaded.
+ *
+ * Returns: Int
+ *        Zero on success, Error value otherwise.
+ **/
+static int read_one_module_config(struct toi_module_header *header)
+{
+  struct toi_module_ops *this_module;
+  int result, len;
+  char *buffer;
+
+  /* Find the module */
+  this_module = toi_find_module_given_name(header->name);
+
+  if (!this_module) {
+    if (header->enabled) {
+      toi_early_boot_message(1, TOI_CONTINUE_REQ,
+          "It looks like we need module %s for reading "
+          "the image but it hasn't been registered.\n",
+          header->name);
+      if (!(test_toi_state(TOI_CONTINUE_REQ)))
+        return -EINVAL;
+    } else
+      printk(KERN_INFO "Module %s configuration data found, "
+          "but the module hasn't registered. Looks like "
+          "it was disabled, so we're ignoring its data.",
+          header->name);
+  }
+
+  /* Get the length of the data (if any) */
+  result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
+      sizeof(int));
+  if (result) {
+    printk(KERN_ERR "Failed to read the length of the module %s's"
+        " configuration data.\n",
+        header->name);
+    return -EINVAL;
+  }
+
+  /* Read any data and pass to the module (if we found one) */
+  if (!len)
+    return 0;
+
+  buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
+
+  if (!buffer) {
+    printk(KERN_ERR "Failed to allocate a buffer for reloading "
+        "module configuration info.\n");
+    return -ENOMEM;
+  }
+
+  toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
+
+  if (!this_module)
+    goto out;
+
+  if (!this_module->save_config_info)
+    printk(KERN_ERR "Huh? Module %s appears to have a "
+        "save_config_info, but not a load_config_info "
+        "function!\n", this_module->name);
+  else
+    this_module->load_config_info(buffer, len);
+
+  /*
+   * Now move this module to the tail of its lists. This will put it in
+   * order. Any new modules will end up at the top of the lists. They
+   * should have been set to disabled when loaded (people will
+   * normally not edit an initrd to load a new module and then hibernate
+   * without using it!).
+   */
+
+  toi_move_module_tail(this_module);
+
+  this_module->enabled = header->enabled;
+
+out:
+  toi_free_page(23, (unsigned long) buffer);
+  return 0;
+}
+
+/**
+ * read_module_configs - reload module configurations from the image header.
+ *
+ * Returns: Int
+ *        Zero on success or an error code.
+ **/
+static int read_module_configs(void)
+{
+  int result = 0;
+  struct toi_module_header toi_module_header;
+  struct toi_module_ops *this_module;
+
+  /* All modules are initially disabled. That way, if we have a module
+   * loaded now that wasn't loaded when we hibernated, it won't be used
+   * in trying to read the data.
+   */
+  list_for_each_entry(this_module, &toi_modules, module_list)
+    this_module->enabled = 0;
+
+  /* Get the first module header */
+  result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+      (char *) &toi_module_header,
+      sizeof(toi_module_header));
+  if (result) {
+    printk(KERN_ERR "Failed to read the next module header.\n");
+    return -EINVAL;
+  }
+
+  /* For each module (in registration order) */
+  while (toi_module_header.name[0]) {
+    result = read_one_module_config(&toi_module_header);
+
+    if (result)
+      return -EINVAL;
+
+    /* Get the next module header */
+    result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+        (char *) &toi_module_header,
+        sizeof(toi_module_header));
+
+    if (result) {
+      printk(KERN_ERR "Failed to read the next module "
+          "header.\n");
+      return -EINVAL;
+    }
+  }
+
+  return 0;
+}
+
+static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev)
+{
+  return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1;
+}
+
+int fs_info_space_needed(int reset)
+{
+  static int last_result = 0;
+  const struct super_block *sb;
+  int result = sizeof(int);
+
+  if (!last_result || reset) {
+    list_for_each_entry(sb, &super_blocks, s_list) {
+      struct fs_info *fs;
+
+      if (!sb->s_bdev)
+        continue;
+
+      fs = fs_info_from_block_dev(sb->s_bdev);
+      if (save_fs_info(fs, sb->s_bdev))
+        result += 16 + sizeof(dev_t) + sizeof(int) +
+          fs->last_mount_size;
+      free_fs_info(fs);
+    }
+    last_result = result;
+  }
+  return result;
+}
+
+static int fs_info_num_to_save(void)
+{
+  const struct super_block *sb;
+  int to_save = 0;
+
+  list_for_each_entry(sb, &super_blocks, s_list) {
+    struct fs_info *fs;
+
+    if (!sb->s_bdev)
+      continue;
+
+    fs = fs_info_from_block_dev(sb->s_bdev);
+    if (save_fs_info(fs, sb->s_bdev))
+      to_save++;
+    free_fs_info(fs);
+  }
+
+  return to_save;
+}
+
+static int fs_info_save(void)
+{
+  const struct super_block *sb;
+  int to_save = fs_info_num_to_save();
+
+  if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save,
+        sizeof(int))) {
+    abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info"
+        " to save.");
+    return -EIO;
+  }
+
+  list_for_each_entry(sb, &super_blocks, s_list) {
+    struct fs_info *fs;
+
+    if (!sb->s_bdev)
+      continue;
+
+    fs = fs_info_from_block_dev(sb->s_bdev);
+    if (save_fs_info(fs, sb->s_bdev)) {
+      if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+            &fs->uuid[0], 16)) {
+        abort_hibernate(TOI_FAILED_IO, "Failed to "
+            "write uuid.");
+        return -EIO;
+      }
+      if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+            (char *) &fs->dev_t, sizeof(dev_t))) {
+        abort_hibernate(TOI_FAILED_IO, "Failed to "
+            "write dev_t.");
+        return -EIO;
+      }
+      if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+            (char *) &fs->last_mount_size, sizeof(int))) {
+        abort_hibernate(TOI_FAILED_IO, "Failed to "
+            "write last mount length.");
+        return -EIO;
+      }
+      if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+            fs->last_mount, fs->last_mount_size)) {
+        abort_hibernate(TOI_FAILED_IO, "Failed to "
+            "write uuid.");
+        return -EIO;
+      }
+    }
+    free_fs_info(fs);
+  }
+  return 0;
+}
+
+static int fs_info_load_and_check_one(void)
+{
+  char uuid[16], *last_mount;
+  int result = 0, ln;
+  dev_t dev_t;
+  struct block_device *dev;
+  struct fs_info *fs_info, seek;
+
+  if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) {
+    abort_hibernate(TOI_FAILED_IO, "Failed to read uuid.");
+    return -EIO;
+  }
+
+  read_if_version(3, dev_t, "uuid dev_t field", return -EIO);
+
+  if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln,
+        sizeof(int))) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Failed to read last mount size.");
+    return -EIO;
+  }
+
+  last_mount = kzalloc(ln, GFP_KERNEL);
+
+  if (!last_mount)
+    return -ENOMEM;
+
+  if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount,        ln)) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Failed to read last mount timestamp.");
+    result = -EIO;
+    goto out_lmt;
+  }
+
+  strncpy((char *) &seek.uuid, uuid, 16);
+  seek.dev_t = dev_t;
+  seek.last_mount_size = ln;
+  seek.last_mount = last_mount;
+  dev_t = blk_lookup_fs_info(&seek);
+  if (!dev_t)
+    goto out_lmt;
+
+  dev = toi_open_by_devnum(dev_t);
+
+  fs_info = fs_info_from_block_dev(dev);
+  if (fs_info && !IS_ERR(fs_info)) {
+    if (ln != fs_info->last_mount_size) {
+      printk(KERN_EMERG "Found matching uuid but last mount "
+          "time lengths differ?! "
+          "(%d vs %d).\n", ln,
+          fs_info->last_mount_size);
+      result = -EINVAL;
+    } else {
+      char buf[BDEVNAME_SIZE];
+      result = !!memcmp(fs_info->last_mount, last_mount, ln);
+      if (result)
+        printk(KERN_EMERG "Last mount time for %s has "
+            "changed!\n", bdevname(dev, buf));
+    }
+  }
+  toi_close_bdev(dev);
+  free_fs_info(fs_info);
+out_lmt:
+  kfree(last_mount);
+  return result;
+}
+
+static int fs_info_load_and_check(void)
+{
+  int to_do, result = 0;
+
+  if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do,
+        sizeof(int))) {
+    abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info "
+        "to load.");
+    return -EIO;
+  }
+
+  while(to_do--)
+    result |= fs_info_load_and_check_one();
+
+  return result;
+}
+
+/**
+ * write_image_header - write the image header after write the image proper
+ *
+ * Returns: Int
+ *        Zero on success, error value otherwise.
+ **/
+int write_image_header(void)
+{
+  int ret;
+  int total = pagedir1.size + pagedir2.size+2;
+  char *header_buffer = NULL;
+
+  /* Now prepare to write the header */
+  ret = toiActiveAllocator->write_header_init();
+  if (ret) {
+    abort_hibernate(TOI_FAILED_MODULE_INIT,
+        "Active allocator's write_header_init"
+        " function failed.");
+    goto write_image_header_abort;
+  }
+
+  /* Get a buffer */
+  header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
+  if (!header_buffer) {
+    abort_hibernate(TOI_OUT_OF_MEMORY,
+        "Out of memory when trying to get page for header!");
+    goto write_image_header_abort;
+  }
+
+  /* Write hibernate header */
+  if (fill_toi_header((struct toi_header *) header_buffer)) {
+    abort_hibernate(TOI_OUT_OF_MEMORY,
+        "Failure to fill header information!");
+    goto write_image_header_abort;
+  }
+
+  if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+        header_buffer, sizeof(struct toi_header))) {
+    abort_hibernate(TOI_OUT_OF_MEMORY,
+        "Failure to write header info.");
+    goto write_image_header_abort;
+  }
+
+  if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+        (char *) &toi_max_workers, sizeof(toi_max_workers))) {
+    abort_hibernate(TOI_OUT_OF_MEMORY,
+        "Failure to number of workers to use.");
+    goto write_image_header_abort;
+  }
+
+  /* Write filesystem info */
+  if (fs_info_save())
+    goto write_image_header_abort;
+
+  /* Write module configurations */
+  ret = write_module_configs();
+  if (ret) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Failed to write module configs.");
+    goto write_image_header_abort;
+  }
+
+  if (memory_bm_write(pageset1_map,
+        toiActiveAllocator->rw_header_chunk)) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Failed to write bitmaps.");
+    goto write_image_header_abort;
+  }
+
+  /* Flush data and let allocator cleanup */
+  if (toiActiveAllocator->write_header_cleanup()) {
+    abort_hibernate(TOI_FAILED_IO,
+        "Failed to cleanup writing header.");
+    goto write_image_header_abort_no_cleanup;
+  }
+
+  if (test_result_state(TOI_ABORTED))
+    goto write_image_header_abort_no_cleanup;
+
+  toi_update_status(total, total, NULL);
+
+out:
+  if (header_buffer)
+    toi_free_page(24, (unsigned long) header_buffer);
+  return ret;
+
+write_image_header_abort:
+  toiActiveAllocator->write_header_cleanup();
+write_image_header_abort_no_cleanup:
+  ret = -1;
+  goto out;
+}
+
+/**
+ * sanity_check - check the header
+ * @sh:        the header which was saved at hibernate time.
+ *
+ * Perform a few checks, seeking to ensure that the kernel being
+ * booted matches the one hibernated. They need to match so we can
+ * be _sure_ things will work. It is not absolutely impossible for
+ * resuming from a different kernel to work, just not assured.
+ **/
+static char *sanity_check(struct toi_header *sh)
+{
+  char *reason = check_image_kernel((struct swsusp_info *) sh);
+
+  if (reason)
+    return reason;
+
+  if (!test_action_state(TOI_IGNORE_ROOTFS)) {
+    const struct super_block *sb;
+    list_for_each_entry(sb, &super_blocks, s_list) {
+      if ((!(sb->s_flags & MS_RDONLY)) &&
+          (sb->s_type->fs_flags & FS_REQUIRES_DEV))
+        return "Device backed fs has been mounted "
+          "rw prior to resume or initrd/ramfs "
+          "is mounted rw.";
+    }
+  }
+
+  return NULL;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
+
+#define FREEZE_IN_PROGRESS (~0)
+
+static int freeze_result;
+
+static void do_freeze(struct work_struct *dummy)
+{
+  freeze_result = freeze_processes();
+  wake_up(&freeze_wait);
+  trap_non_toi_io = 1;
+}
+
+static DECLARE_WORK(freeze_work, do_freeze);
+
+/**
+ * __read_pageset1 - test for the existence of an image and attempt to load it
+ *
+ * Returns:        Int
+ *        Zero if image found and pageset1 successfully loaded.
+ *        Error if no image found or loaded.
+ **/
+static int __read_pageset1(void)
+{
+  int i, result = 0;
+  char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
+       *sanity_error = NULL;
+  struct toi_header *toi_header;
+
+  if (!header_buffer) {
+    printk(KERN_INFO "Unable to allocate a page for reading the "
+        "signature.\n");
+    return -ENOMEM;
+  }
+
+  /* Check for an image */
+  result = toiActiveAllocator->image_exists(1);
+  if (result == 3) {
+    result = -ENODATA;
+    toi_early_boot_message(1, 0, "The signature from an older "
+        "version of TuxOnIce has been detected.");
+    goto out_remove_image;
+  }
+
+  if (result != 1) {
+    result = -ENODATA;
+    noresume_reset_modules();
+    printk(KERN_INFO "TuxOnIce: No image found.\n");
+    goto out;
+  }
+
+  /*
+   * Prepare the active allocator for reading the image header. The
+   * activate allocator might read its own configuration.
+   *
+   * NB: This call may never return because there might be a signature
+   * for a different image such that we warn the user and they choose
+   * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
+   * location of the image might be unavailable if it was stored on a
+   * network connection).
+   */
+
+  result = toiActiveAllocator->read_header_init();
+  if (result) {
+    printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
+        "image header.\n");
+    goto out_remove_image;
+  }
+
+  /* Check for noresume command line option */
+  if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
+    printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
+        "image.\n");
+    goto out_remove_image;
+  }
+
+  /* Check whether we've resumed before */
+  if (test_toi_state(TOI_RESUMED_BEFORE)) {
+    toi_early_boot_message(1, 0, NULL);
+    if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+      printk(KERN_INFO "TuxOnIce: Tried to resume before: "
+          "Invalidated image.\n");
+      goto out_remove_image;
+    }
+  }
+
+  clear_toi_state(TOI_CONTINUE_REQ);
+
+  toi_image_header_version = toiActiveAllocator->get_header_version();
+
+  if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) {
+    toi_early_boot_message(1, 0, image_version_error);
+    if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+      printk(KERN_INFO "TuxOnIce: Header version too new: "
+          "Invalidated image.\n");
+      goto out_remove_image;
+    }
+  }
+
+  /* Read hibernate header */
+  result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+      header_buffer, sizeof(struct toi_header));
+  if (result < 0) {
+    printk(KERN_ERR "TuxOnIce: Failed to read the image "
+        "signature.\n");
+    goto out_remove_image;
+  }
+
+  toi_header = (struct toi_header *) header_buffer;
+
+  /*
+   * NB: This call may also result in a reboot rather than returning.
+   */
+
+  sanity_error = sanity_check(toi_header);
+  if (sanity_error) {
+    toi_early_boot_message(1, TOI_CONTINUE_REQ,
+        sanity_error);
+    printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
+    goto out_remove_image;
+  }
+
+  /*
+   * We have an image and it looks like it will load okay.
+   *
+   * Get metadata from header. Don't override commandline parameters.
+   *
+   * We don't need to save the image size limit because it's not used
+   * during resume and will be restored with the image anyway.
+   */
+
+  memcpy((char *) &pagedir1,
+      (char *) &toi_header->pagedir, sizeof(pagedir1));
+  toi_result = toi_header->param0;
+  if (!toi_bkd.toi_debug_state) {
+    toi_bkd.toi_action =
+      (toi_header->param1 & ~toi_bootflags_mask) |
+      (toi_bkd.toi_action & toi_bootflags_mask);
+    toi_bkd.toi_debug_state = toi_header->param2;
+    toi_bkd.toi_default_console_level = toi_header->param3;
+  }
+  clear_toi_state(TOI_IGNORE_LOGLEVEL);
+  pagedir2.size = toi_header->pageset_2_size;
+  for (i = 0; i < 4; i++)
+    toi_bkd.toi_io_time[i/2][i%2] =
+      toi_header->io_time[i/2][i%2];
+
+  set_toi_state(TOI_BOOT_KERNEL);
+  boot_kernel_data_buffer = toi_header->bkd;
+
+  read_if_version(1, toi_max_workers, "TuxOnIce max workers",
+      goto out_remove_image);
+
+  /* Read filesystem info */
+  if (fs_info_load_and_check()) {
+    printk(KERN_EMERG "TuxOnIce: File system mount time checks "
+        "failed. Refusing to corrupt your filesystems!\n");
+    goto out_remove_image;
+  }
+
+  /* Read module configurations */
+  result = read_module_configs();
+  if (result) {
+    pagedir1.size = 0;
+    pagedir2.size = 0;
+    printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
+        "configurations.\n");
+    clear_action_state(TOI_KEEP_IMAGE);
+    goto out_remove_image;
+  }
+
+  toi_prepare_console();
+
+  set_toi_state(TOI_NOW_RESUMING);
+
+  result = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+  if (result)
+    goto out_notifier_call_chain;;
+
+  if (usermodehelper_disable())
+    goto out_enable_usermodehelper;
+
+  current->flags |= PF_NOFREEZE;
+  freeze_result = FREEZE_IN_PROGRESS;
+
+  schedule_work_on(cpumask_first(cpu_online_mask), &freeze_work);
+
+  toi_cond_pause(1, "About to read original pageset1 locations.");
+
+  /*
+   * See _toi_rw_header_chunk in tuxonice_bio.c:
+   * Initialize pageset1_map by reading the map from the image.
+   */
+  if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
+    goto out_thaw;
+
+  /*
+   * See toi_rw_cleanup in tuxonice_bio.c:
+   * Clean up after reading the header.
+   */
+  result = toiActiveAllocator->read_header_cleanup();
+  if (result) {
+    printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the "
+        "image header.\n");
+    goto out_thaw;
+  }
+
+  toi_cond_pause(1, "About to read pagedir.");
+
+  /*
+   * Get the addresses of pages into which we will load the kernel to
+   * be copied back and check if they conflict with the ones we are using.
+   */
+  if (toi_get_pageset1_load_addresses()) {
+    printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
+        "pageset1.\n");
+    goto out_thaw;
+  }
+
+  /* Read the original kernel back */
+  toi_cond_pause(1, "About to read pageset 1.");
+
+  /* Given the pagemap, read back the data from disk */
+  if (read_pageset(&pagedir1, 0)) {
+    toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
+    result = -EIO;
+    goto out_thaw;
+  }
+
+  toi_cond_pause(1, "About to restore original kernel.");
+  result = 0;
+
+  if (!toi_keeping_image &&
+      toiActiveAllocator->mark_resume_attempted)
+    toiActiveAllocator->mark_resume_attempted(1);
+
+  wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+out:
+  current->flags &= ~PF_NOFREEZE;
+  toi_free_page(25, (unsigned long) header_buffer);
+  return result;
+
+out_thaw:
+  wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+  trap_non_toi_io = 0;
+  thaw_processes();
+out_enable_usermodehelper:
+  usermodehelper_enable();
+out_notifier_call_chain:
+  pm_notifier_call_chain(PM_POST_RESTORE);
+  toi_cleanup_console();
+out_remove_image:
+  result = -EINVAL;
+  if (!toi_keeping_image)
+    toiActiveAllocator->remove_image();
+  toiActiveAllocator->read_header_cleanup();
+  noresume_reset_modules();
+  goto out;
+}
+
+/**
+ * read_pageset1 - highlevel function to read the saved pages
+ *
+ * Attempt to read the header and pageset1 of a hibernate image.
+ * Handle the outcome, complaining where appropriate.
+ **/
+int read_pageset1(void)
+{
+  int error;
+
+  error = __read_pageset1();
+
+  if (error && error != -ENODATA && error != -EINVAL &&
+      !test_result_state(TOI_ABORTED))
+    abort_hibernate(TOI_IMAGE_ERROR,
+        "TuxOnIce: Error %d resuming\n", error);
+
+  return error;
+}
+
+/**
+ * get_have_image_data - check the image header
+ **/
+static char *get_have_image_data(void)
+{
+  char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
+  struct toi_header *toi_header;
+
+  if (!output_buffer) {
+    printk(KERN_INFO "Output buffer null.\n");
+    return NULL;
+  }
+
+  /* Check for an image */
+  if (!toiActiveAllocator->image_exists(1) ||
+      toiActiveAllocator->read_header_init() ||
+      toiActiveAllocator->rw_header_chunk(READ, NULL,
+        output_buffer, sizeof(struct toi_header))) {
+    sprintf(output_buffer, "0\n");
+    /*
+     * From an initrd/ramfs, catting have_image and
+     * getting a result of 0 is sufficient.
+     */
+    clear_toi_state(TOI_BOOT_TIME);
+    goto out;
+  }
+
+  toi_header = (struct toi_header *) output_buffer;
+
+  sprintf(output_buffer, "1\n%s\n%s\n",
+      toi_header->uts.machine,
+      toi_header->uts.version);
+
+  /* Check whether we've resumed before */
+  if (test_toi_state(TOI_RESUMED_BEFORE))
+    strcat(output_buffer, "Resumed before.\n");
+
+out:
+  noresume_reset_modules();
+  return output_buffer;
+}
+
+/**
+ * read_pageset2 - read second part of the image
+ * @overwrittenpagesonly:        Read only pages which would have been
+ *                                verwritten by pageset1?
+ *
+ * Read in part or all of pageset2 of an image, depending upon
+ * whether we are hibernating and have only overwritten a portion
+ * with pageset1 pages, or are resuming and need to read them
+ * all.
+ *
+ * Returns: Int
+ *        Zero if no error, otherwise the error value.
+ **/
+int read_pageset2(int overwrittenpagesonly)
+{
+  int result = 0;
+
+  if (!pagedir2.size)
+    return 0;
+
+  result = read_pageset(&pagedir2, overwrittenpagesonly);
+
+  toi_cond_pause(1, "Pagedir 2 read.");
+
+  return result;
+}
+
+/**
+ * image_exists_read - has an image been found?
+ * @page:        Output buffer
+ *
+ * Store 0 or 1 in page, depending on whether an image is found.
+ * Incoming buffer is PAGE_SIZE and result is guaranteed
+ * to be far less than that, so we don't worry about
+ * overflow.
+ **/
+int image_exists_read(const char *page, int count)
+{
+  int len = 0;
+  char *result;
+
+  if (toi_activate_storage(0))
+    return count;
+
+  if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+    toi_attempt_to_parse_resume_device(0);
+
+  if (!toiActiveAllocator) {
+    len = sprintf((char *) page, "-1\n");
+  } else {
+    result = get_have_image_data();
+    if (result) {
+      len = sprintf((char *) page, "%s",  result);
+      toi_free_page(26, (unsigned long) result);
+    }
+  }
+
+  toi_deactivate_storage(0);
+
+  return len;
+}
+
+/**
+ * image_exists_write - invalidate an image if one exists
+ **/
+int image_exists_write(const char *buffer, int count)
+{
+  if (toi_activate_storage(0))
+    return count;
+
+  if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
+    toiActiveAllocator->remove_image();
+
+  toi_deactivate_storage(0);
+
+  clear_result_state(TOI_KEPT_IMAGE);
+
+  return count;
+}
diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h
new file mode 100644
index 000000000..6de3cb2a9
--- /dev/null
+++ b/kernel/power/tuxonice_io.h
@@ -0,0 +1,72 @@
+/*
+ * kernel/power/tuxonice_io.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/utsname.h>
+#include "tuxonice_pagedir.h"
+
+/* Non-module data saved in our image header */
+struct toi_header {
+  /*
+   * Mirror struct swsusp_info, but without
+   * the page aligned attribute
+   */
+  struct new_utsname uts;
+  u32 version_code;
+  unsigned long num_physpages;
+  int cpus;
+  unsigned long image_pages;
+  unsigned long pages;
+  unsigned long size;
+
+  /* Our own data */
+  unsigned long orig_mem_free;
+  int page_size;
+  int pageset_2_size;
+  int param0;
+  int param1;
+  int param2;
+  int param3;
+  int progress0;
+  int progress1;
+  int progress2;
+  int progress3;
+  int io_time[2][2];
+  struct pagedir pagedir;
+  dev_t root_fs;
+  unsigned long bkd; /* Boot kernel data locn */
+};
+
+extern int write_pageset(struct pagedir *pagedir);
+extern int write_image_header(void);
+extern int read_pageset1(void);
+extern int read_pageset2(int overwrittenpagesonly);
+
+extern int toi_attempt_to_parse_resume_device(int quiet);
+extern void attempt_to_parse_resume_device2(void);
+extern void attempt_to_parse_alt_resume_param(void);
+int image_exists_read(const char *page, int count);
+int image_exists_write(const char *buffer, int count);
+extern void save_restore_alt_param(int replace, int quiet);
+extern atomic_t toi_io_workers;
+
+/* Args to save_restore_alt_param */
+#define RESTORE 0
+#define SAVE 1
+
+#define NOQUIET 0
+#define QUIET 1
+
+extern wait_queue_head_t toi_io_queue_flusher;
+extern int toi_bio_queue_flusher_should_finish;
+
+int fs_info_space_needed(int reset);
+
+extern int toi_max_workers;
diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c
new file mode 100644
index 000000000..c91e13f4a
--- /dev/null
+++ b/kernel/power/tuxonice_modules.c
@@ -0,0 +1,520 @@
+/*
+ * kernel/power/tuxonice_modules.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+
+LIST_HEAD(toi_filters);
+LIST_HEAD(toiAllocators);
+
+LIST_HEAD(toi_modules);
+
+struct toi_module_ops *toiActiveAllocator;
+
+static int toi_num_filters;
+int toiNumAllocators, toi_num_modules;
+
+/*
+ * toi_header_storage_for_modules
+ *
+ * Returns the amount of space needed to store configuration
+ * data needed by the modules prior to copying back the original
+ * kernel. We can exclude data for pageset2 because it will be
+ * available anyway once the kernel is copied back.
+ */
+long toi_header_storage_for_modules(void)
+{
+  struct toi_module_ops *this_module;
+  int bytes = 0;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        (this_module->type == WRITER_MODULE &&
+         toiActiveAllocator != this_module))
+      continue;
+    if (this_module->storage_needed) {
+      int this = this_module->storage_needed() +
+        sizeof(struct toi_module_header) +
+        sizeof(int);
+      this_module->header_requested = this;
+      bytes += this;
+    }
+  }
+
+  /* One more for the empty terminator */
+  return bytes + sizeof(struct toi_module_header);
+}
+
+void print_toi_header_storage_for_modules(void)
+{
+  struct toi_module_ops *this_module;
+  int bytes = 0;
+
+  printk(KERN_DEBUG "Header storage:\n");
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled ||
+        (this_module->type == WRITER_MODULE &&
+         toiActiveAllocator != this_module))
+      continue;
+    if (this_module->storage_needed) {
+      int this = this_module->storage_needed() +
+        sizeof(struct toi_module_header) +
+        sizeof(int);
+      this_module->header_requested = this;
+      bytes += this;
+      printk(KERN_DEBUG "+ %16s : %-4d/%d.\n",
+          this_module->name,
+          this_module->header_used, this);
+    }
+  }
+
+  printk(KERN_DEBUG "+ empty terminator : %zu.\n",
+      sizeof(struct toi_module_header));
+  printk(KERN_DEBUG "                     ====\n");
+  printk(KERN_DEBUG "                     %zu\n",
+      bytes + sizeof(struct toi_module_header));
+}
+
+/*
+ * toi_memory_for_modules
+ *
+ * Returns the amount of memory requested by modules for
+ * doing their work during the cycle.
+ */
+
+long toi_memory_for_modules(int print_parts)
+{
+  long bytes = 0, result;
+  struct toi_module_ops *this_module;
+
+  if (print_parts)
+    printk(KERN_INFO "Memory for modules:\n===================\n");
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    int this;
+    if (!this_module->enabled)
+      continue;
+    if (this_module->memory_needed) {
+      this = this_module->memory_needed();
+      if (print_parts)
+        printk(KERN_INFO "%10d bytes (%5ld pages) for "
+            "module '%s'.\n", this,
+            DIV_ROUND_UP(this, PAGE_SIZE),
+            this_module->name);
+      bytes += this;
+    }
+  }
+
+  result = DIV_ROUND_UP(bytes, PAGE_SIZE);
+  if (print_parts)
+    printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
+
+  return result;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Returns the compression ratio expected when saving the image.
+ */
+
+int toi_expected_compression_ratio(void)
+{
+  int ratio = 100;
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled)
+      continue;
+    if (this_module->expected_compression)
+      ratio = ratio * this_module->expected_compression()
+        / 100;
+  }
+
+  return ratio;
+}
+
+/* toi_find_module_given_dir
+ * Functionality :        Return a module (if found), given a pointer
+ *                         to its directory name
+ */
+
+static struct toi_module_ops *toi_find_module_given_dir(char *name)
+{
+  struct toi_module_ops *this_module, *found_module = NULL;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!strcmp(name, this_module->directory)) {
+      found_module = this_module;
+      break;
+    }
+  }
+
+  return found_module;
+}
+
+/* toi_find_module_given_name
+ * Functionality :        Return a module (if found), given a pointer
+ *                         to its name
+ */
+
+struct toi_module_ops *toi_find_module_given_name(char *name)
+{
+  struct toi_module_ops *this_module, *found_module = NULL;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!strcmp(name, this_module->name)) {
+      found_module = this_module;
+      break;
+    }
+  }
+
+  return found_module;
+}
+
+/*
+ * toi_print_module_debug_info
+ * Functionality   : Get debugging info from modules into a buffer.
+ */
+int toi_print_module_debug_info(char *buffer, int buffer_size)
+{
+  struct toi_module_ops *this_module;
+  int len = 0;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled)
+      continue;
+    if (this_module->print_debug_info) {
+      int result;
+      result = this_module->print_debug_info(buffer + len,
+          buffer_size - len);
+      len += result;
+    }
+  }
+
+  /* Ensure null terminated */
+  buffer[buffer_size] = 0;
+
+  return len;
+}
+
+/*
+ * toi_register_module
+ *
+ * Register a module.
+ */
+int toi_register_module(struct toi_module_ops *module)
+{
+  int i;
+  struct kobject *kobj;
+
+  if (!hibernation_available())
+    return -ENODEV;
+
+  module->enabled = 1;
+
+  if (toi_find_module_given_name(module->name)) {
+    printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
+        " which is already registered.\n",
+        module->name);
+    return -EBUSY;
+  }
+
+  switch (module->type) {
+    case FILTER_MODULE:
+      list_add_tail(&module->type_list, &toi_filters);
+      toi_num_filters++;
+      break;
+    case WRITER_MODULE:
+      list_add_tail(&module->type_list, &toiAllocators);
+      toiNumAllocators++;
+      break;
+    case MISC_MODULE:
+    case MISC_HIDDEN_MODULE:
+    case BIO_ALLOCATOR_MODULE:
+      break;
+    default:
+      printk(KERN_ERR "Hmmm. Module '%s' has an invalid type."
+          " It has been ignored.\n", module->name);
+      return -EINVAL;
+  }
+  list_add_tail(&module->module_list, &toi_modules);
+  toi_num_modules++;
+
+  if ((!module->directory && !module->shared_directory) ||
+      !module->sysfs_data || !module->num_sysfs_entries)
+    return 0;
+
+  /*
+   * Modules may share a directory, but those with shared_dir
+   * set must be loaded (via symbol dependencies) after parents
+   * and unloaded beforehand.
+   */
+  if (module->shared_directory) {
+    struct toi_module_ops *shared =
+      toi_find_module_given_dir(module->shared_directory);
+    if (!shared) {
+      printk(KERN_ERR "TuxOnIce: Module %s wants to share "
+          "%s's directory but %s isn't loaded.\n",
+          module->name, module->shared_directory,
+          module->shared_directory);
+      toi_unregister_module(module);
+      return -ENODEV;
+    }
+    kobj = shared->dir_kobj;
+  } else {
+    if (!strncmp(module->directory, "[ROOT]", 6))
+      kobj = tuxonice_kobj;
+    else
+      kobj = make_toi_sysdir(module->directory);
+  }
+  module->dir_kobj = kobj;
+  for (i = 0; i < module->num_sysfs_entries; i++) {
+    int result = toi_register_sysfs_file(kobj,
+        &module->sysfs_data[i]);
+    if (result)
+      return result;
+  }
+  return 0;
+}
+
+/*
+ * toi_unregister_module
+ *
+ * Remove a module.
+ */
+void toi_unregister_module(struct toi_module_ops *module)
+{
+  int i;
+
+  if (module->dir_kobj)
+    for (i = 0; i < module->num_sysfs_entries; i++)
+      toi_unregister_sysfs_file(module->dir_kobj,
+          &module->sysfs_data[i]);
+
+  if (!module->shared_directory && module->directory &&
+      strncmp(module->directory, "[ROOT]", 6))
+    remove_toi_sysdir(module->dir_kobj);
+
+  switch (module->type) {
+    case FILTER_MODULE:
+      list_del(&module->type_list);
+      toi_num_filters--;
+      break;
+    case WRITER_MODULE:
+      list_del(&module->type_list);
+      toiNumAllocators--;
+      if (toiActiveAllocator == module) {
+        toiActiveAllocator = NULL;
+        clear_toi_state(TOI_CAN_RESUME);
+        clear_toi_state(TOI_CAN_HIBERNATE);
+      }
+      break;
+    case MISC_MODULE:
+    case MISC_HIDDEN_MODULE:
+    case BIO_ALLOCATOR_MODULE:
+      break;
+    default:
+      printk(KERN_ERR "Module '%s' has an invalid type."
+          " It has been ignored.\n", module->name);
+      return;
+  }
+  list_del(&module->module_list);
+  toi_num_modules--;
+}
+
+/*
+ * toi_move_module_tail
+ *
+ * Rearrange modules when reloading the config.
+ */
+void toi_move_module_tail(struct toi_module_ops *module)
+{
+  switch (module->type) {
+    case FILTER_MODULE:
+      if (toi_num_filters > 1)
+        list_move_tail(&module->type_list, &toi_filters);
+      break;
+    case WRITER_MODULE:
+      if (toiNumAllocators > 1)
+        list_move_tail(&module->type_list, &toiAllocators);
+      break;
+    case MISC_MODULE:
+    case MISC_HIDDEN_MODULE:
+    case BIO_ALLOCATOR_MODULE:
+      break;
+    default:
+      printk(KERN_ERR "Module '%s' has an invalid type."
+          " It has been ignored.\n", module->name);
+      return;
+  }
+  if ((toi_num_filters + toiNumAllocators) > 1)
+    list_move_tail(&module->module_list, &toi_modules);
+}
+
+/*
+ * toi_initialise_modules
+ *
+ * Get ready to do some work!
+ */
+int toi_initialise_modules(int starting_cycle, int early)
+{
+  struct toi_module_ops *this_module;
+  int result;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    this_module->header_requested = 0;
+    this_module->header_used = 0;
+    if (!this_module->enabled)
+      continue;
+    if (this_module->early != early)
+      continue;
+    if (this_module->initialise) {
+      result = this_module->initialise(starting_cycle);
+      if (result) {
+        toi_cleanup_modules(starting_cycle);
+        return result;
+      }
+      this_module->initialised = 1;
+    }
+  }
+
+  return 0;
+}
+
+/*
+ * toi_cleanup_modules
+ *
+ * Tell modules the work is done.
+ */
+void toi_cleanup_modules(int finishing_cycle)
+{
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (!this_module->enabled || !this_module->initialised)
+      continue;
+    if (this_module->cleanup)
+      this_module->cleanup(finishing_cycle);
+    this_module->initialised = 0;
+  }
+}
+
+/*
+ * toi_pre_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (this_module->enabled && this_module->pre_atomic_restore)
+      this_module->pre_atomic_restore(bkd);
+  }
+}
+
+/*
+ * toi_post_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (this_module->enabled && this_module->post_atomic_restore)
+      this_module->post_atomic_restore(bkd);
+  }
+}
+
+/*
+ * toi_get_next_filter
+ *
+ * Get the next filter in the pipeline.
+ */
+struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
+{
+  struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
+
+  list_for_each_entry(this_filter, &toi_filters, type_list) {
+    if (!this_filter->enabled)
+      continue;
+    if ((last_filter == filter_sought) || (!filter_sought))
+      return this_filter;
+    last_filter = this_filter;
+  }
+
+  return toiActiveAllocator;
+}
+
+/**
+ * toi_show_modules: Printk what support is loaded.
+ */
+void toi_print_modules(void)
+{
+  struct toi_module_ops *this_module;
+  int prev = 0;
+
+  printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for");
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    if (this_module->type == MISC_HIDDEN_MODULE)
+      continue;
+    printk("%s %s%s%s", prev ? "," : "",
+        this_module->enabled ? "" : "[",
+        this_module->name,
+        this_module->enabled ? "" : "]");
+    prev = 1;
+  }
+
+  printk(".\n");
+}
+
+/* toi_get_modules
+ *
+ * Take a reference to modules so they can't go away under us.
+ */
+
+int toi_get_modules(void)
+{
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list) {
+    struct toi_module_ops *this_module2;
+
+    if (try_module_get(this_module->module))
+      continue;
+
+    /* Failed! Reverse gets and return error */
+    list_for_each_entry(this_module2, &toi_modules,
+        module_list) {
+      if (this_module == this_module2)
+        return -EINVAL;
+      module_put(this_module2->module);
+    }
+  }
+  return 0;
+}
+
+/* toi_put_modules
+ *
+ * Release our references to modules we used.
+ */
+
+void toi_put_modules(void)
+{
+  struct toi_module_ops *this_module;
+
+  list_for_each_entry(this_module, &toi_modules, module_list)
+    module_put(this_module->module);
+}
diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h
new file mode 100644
index 000000000..18e28b431
--- /dev/null
+++ b/kernel/power/tuxonice_modules.h
@@ -0,0 +1,212 @@
+/*
+ * kernel/power/tuxonice_modules.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations for modules. Modules are additions to
+ * TuxOnIce that provide facilities such as image compression or
+ * encryption, backends for storage of the image and user interfaces.
+ *
+ */
+
+#ifndef TOI_MODULES_H
+#define TOI_MODULES_H
+
+/* This is the maximum size we store in the image header for a module name */
+#define TOI_MAX_MODULE_NAME_LENGTH 30
+
+struct toi_boot_kernel_data;
+
+/* Per-module metadata */
+struct toi_module_header {
+  char name[TOI_MAX_MODULE_NAME_LENGTH];
+  int enabled;
+  int type;
+  int index;
+  int data_length;
+  unsigned long signature;
+};
+
+enum {
+  FILTER_MODULE,
+  WRITER_MODULE,
+  BIO_ALLOCATOR_MODULE,
+  MISC_MODULE,
+  MISC_HIDDEN_MODULE,
+};
+
+enum {
+  TOI_ASYNC,
+  TOI_SYNC
+};
+
+enum {
+  TOI_VIRT,
+  TOI_PAGE,
+};
+
+#define TOI_MAP(type, addr) \
+  (type == TOI_PAGE ? kmap(addr) : addr)
+
+#define TOI_UNMAP(type, addr) \
+  do { \
+    if (type == TOI_PAGE) \
+    kunmap(addr); \
+  } while(0)
+
+struct toi_module_ops {
+  /* Functions common to all modules */
+  int type;
+  char *name;
+  char *directory;
+  char *shared_directory;
+  struct kobject *dir_kobj;
+  struct module *module;
+  int enabled, early, initialised;
+  struct list_head module_list;
+
+  /* List of filters or allocators */
+  struct list_head list, type_list;
+
+  /*
+   * Requirements for memory and storage in
+   * the image header..
+   */
+  int (*memory_needed) (void);
+  int (*storage_needed) (void);
+
+  int header_requested, header_used;
+
+  int (*expected_compression) (void);
+
+  /*
+   * Debug info
+   */
+  int (*print_debug_info) (char *buffer, int size);
+  int (*save_config_info) (char *buffer);
+  void (*load_config_info) (char *buffer, int len);
+
+  /*
+   * Initialise & cleanup - general routines called
+   * at the start and end of a cycle.
+   */
+  int (*initialise) (int starting_cycle);
+  void (*cleanup) (int finishing_cycle);
+
+  void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd);
+  void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd);
+
+  /*
+   * Calls for allocating storage (allocators only).
+   *
+   * Header space is requested separately and cannot fail, but the
+   * reservation is only applied when main storage is allocated.
+   * The header space reservation is thus always set prior to
+   * requesting the allocation of storage - and prior to querying
+   * how much storage is available.
+   */
+
+  unsigned long (*storage_available) (void);
+  void (*reserve_header_space) (unsigned long space_requested);
+  int (*register_storage) (void);
+  int (*allocate_storage) (unsigned long space_requested);
+  unsigned long (*storage_allocated) (void);
+  void (*free_unused_storage) (void);
+
+  /*
+   * Routines used in image I/O.
+   */
+  int (*rw_init) (int rw, int stream_number);
+  int (*rw_cleanup) (int rw);
+  int (*write_page) (unsigned long index, int buf_type, void *buf,
+      unsigned int buf_size);
+  int (*read_page) (unsigned long *index, int buf_type, void *buf,
+      unsigned int *buf_size);
+  int (*io_flusher) (int rw);
+
+  /* Reset module if image exists but reading aborted */
+  void (*noresume_reset) (void);
+
+  /* Read and write the metadata */
+  int (*write_header_init) (void);
+  int (*write_header_cleanup) (void);
+
+  int (*read_header_init) (void);
+  int (*read_header_cleanup) (void);
+
+  /* To be called after read_header_init */
+  int (*get_header_version) (void);
+
+  int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
+      char *buffer_start, int buffer_size);
+
+  int (*rw_header_chunk_noreadahead) (int rw,
+      struct toi_module_ops *owner, char *buffer_start,
+      int buffer_size);
+
+  /* Attempt to parse an image location */
+  int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
+
+  /* Throttle I/O according to throughput */
+  void (*update_throughput_throttle) (int jif_index);
+
+  /* Flush outstanding I/O */
+  int (*finish_all_io) (void);
+
+  /* Determine whether image exists that we can restore */
+  int (*image_exists) (int quiet);
+
+  /* Mark the image as having tried to resume */
+  int (*mark_resume_attempted) (int);
+
+  /* Destroy image if one exists */
+  int (*remove_image) (void);
+
+  /* Sysfs Data */
+  struct toi_sysfs_data *sysfs_data;
+  int num_sysfs_entries;
+
+  /* Block I/O allocator */
+  struct toi_bio_allocator_ops *bio_allocator_ops;
+};
+
+extern int toi_num_modules, toiNumAllocators;
+
+extern struct toi_module_ops *toiActiveAllocator;
+extern struct list_head toi_filters, toiAllocators, toi_modules;
+
+extern void toi_prepare_console_modules(void);
+extern void toi_cleanup_console_modules(void);
+
+extern struct toi_module_ops *toi_find_module_given_name(char *name);
+extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
+
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_move_module_tail(struct toi_module_ops *module);
+
+extern long toi_header_storage_for_modules(void);
+extern long toi_memory_for_modules(int print_parts);
+extern void print_toi_header_storage_for_modules(void);
+extern int toi_expected_compression_ratio(void);
+
+extern int toi_print_module_debug_info(char *buffer, int buffer_size);
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_unregister_module(struct toi_module_ops *module);
+
+extern int toi_initialise_modules(int starting_cycle, int early);
+#define toi_initialise_modules_early(starting) \
+  toi_initialise_modules(starting, 1)
+#define toi_initialise_modules_late(starting) \
+  toi_initialise_modules(starting, 0)
+extern void toi_cleanup_modules(int finishing_cycle);
+
+extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+
+extern void toi_print_modules(void);
+
+int toi_get_modules(void);
+void toi_put_modules(void);
+#endif
diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c
new file mode 100644
index 000000000..78d94d316
--- /dev/null
+++ b/kernel/power/tuxonice_netlink.c
@@ -0,0 +1,324 @@
+/*
+ * kernel/power/tuxonice_netlink.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Functions for communicating with a userspace helper via netlink.
+ */
+
+#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/kmod.h>
+#include "tuxonice_netlink.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct user_helper_data *uhd_list;
+
+/*
+ * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
+ * none can be allocated).
+ */
+static void toi_fill_skb_pool(struct user_helper_data *uhd)
+{
+  while (uhd->pool_level < uhd->pool_limit) {
+    struct sk_buff *new_skb =
+      alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+    if (!new_skb)
+      break;
+
+    new_skb->next = uhd->emerg_skbs;
+    uhd->emerg_skbs = new_skb;
+    uhd->pool_level++;
+  }
+}
+
+/*
+ * Try to allocate a single skb. If we can't get one, try to use one from
+ * our pool.
+ */
+static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
+{
+  struct sk_buff *skb =
+    alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+  if (skb)
+    return skb;
+
+  skb = uhd->emerg_skbs;
+  if (skb) {
+    uhd->pool_level--;
+    uhd->emerg_skbs = skb->next;
+    skb->next = NULL;
+  }
+
+  return skb;
+}
+
+void toi_send_netlink_message(struct user_helper_data *uhd,
+    int type, void *params, size_t len)
+{
+  struct sk_buff *skb;
+  struct nlmsghdr *nlh;
+  void *dest;
+  struct task_struct *t;
+
+  if (uhd->pid == -1)
+    return;
+
+  if (uhd->debug)
+    printk(KERN_ERR "toi_send_netlink_message: Send "
+        "message type %d.\n", type);
+
+  skb = toi_get_skb(uhd);
+  if (!skb) {
+    printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
+    return;
+  }
+
+  nlh = nlmsg_put(skb, 0, uhd->sock_seq, type, len, 0);
+  uhd->sock_seq++;
+
+  dest = NLMSG_DATA(nlh);
+  if (params && len > 0)
+    memcpy(dest, params, len);
+
+  netlink_unicast(uhd->nl, skb, uhd->pid, 0);
+
+  toi_read_lock_tasklist();
+  t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+  if (!t) {
+    toi_read_unlock_tasklist();
+    if (uhd->pid > -1)
+      printk(KERN_INFO "Hmm. Can't find the userspace task"
+          " %d.\n", uhd->pid);
+    return;
+  }
+  wake_up_process(t);
+  toi_read_unlock_tasklist();
+
+  yield();
+}
+
+static void send_whether_debugging(struct user_helper_data *uhd)
+{
+  static u8 is_debugging = 1;
+
+  toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
+      &is_debugging, sizeof(u8));
+}
+
+/*
+ * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
+ * are hibernating.
+ */
+static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
+{
+  struct task_struct *t;
+
+  if (uhd->debug)
+    printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
+
+  toi_read_lock_tasklist();
+  t = find_task_by_pid_ns(pid, &init_pid_ns);
+  if (!t) {
+    toi_read_unlock_tasklist();
+    printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
+        pid);
+    return -EINVAL;
+  }
+
+  t->flags |= PF_NOFREEZE;
+
+  toi_read_unlock_tasklist();
+  uhd->pid = pid;
+
+  toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
+
+  return 0;
+}
+
+/*
+ * Called when the userspace process has informed us that it's ready to roll.
+ */
+static int nl_ready(struct user_helper_data *uhd, u32 version)
+{
+  if (version != uhd->interface_version) {
+    printk(KERN_INFO "%s userspace process using invalid interface"
+        " version (%d - kernel wants %d). Trying to "
+        "continue without it.\n",
+        uhd->name, version, uhd->interface_version);
+    if (uhd->not_ready)
+      uhd->not_ready();
+    return -EINVAL;
+  }
+
+  complete(&uhd->wait_for_process);
+
+  return 0;
+}
+
+void toi_netlink_close_complete(struct user_helper_data *uhd)
+{
+  if (uhd->nl) {
+    netlink_kernel_release(uhd->nl);
+    uhd->nl = NULL;
+  }
+
+  while (uhd->emerg_skbs) {
+    struct sk_buff *next = uhd->emerg_skbs->next;
+    kfree_skb(uhd->emerg_skbs);
+    uhd->emerg_skbs = next;
+  }
+
+  uhd->pid = -1;
+}
+
+static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
+    struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+  int type = nlh->nlmsg_type;
+  int *data;
+  int err;
+
+  if (uhd->debug)
+    printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
+        type);
+
+  /* Let the more specific handler go first. It returns
+   * 1 for valid messages that it doesn't know. */
+  err = uhd->rcv_msg(skb, nlh);
+  if (err != 1)
+    return err;
+
+  /* Only allow one task to receive NOFREEZE privileges */
+  if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
+    printk(KERN_INFO "Received extra nofreeze me requests.\n");
+    return -EBUSY;
+  }
+
+  data = NLMSG_DATA(nlh);
+
+  switch (type) {
+    case NETLINK_MSG_NOFREEZE_ME:
+      return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
+    case NETLINK_MSG_GET_DEBUGGING:
+      send_whether_debugging(uhd);
+      return 0;
+    case NETLINK_MSG_READY:
+      if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
+        printk(KERN_INFO "Invalid ready mesage.\n");
+        if (uhd->not_ready)
+          uhd->not_ready();
+        return -EINVAL;
+      }
+      return nl_ready(uhd, (u32) *data);
+    case NETLINK_MSG_CLEANUP:
+      toi_netlink_close_complete(uhd);
+      return 0;
+  }
+
+  return -EINVAL;
+}
+
+static void toi_user_rcv_skb(struct sk_buff *skb)
+{
+  int err;
+  struct nlmsghdr *nlh;
+  struct user_helper_data *uhd = uhd_list;
+
+  while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
+    uhd = uhd->next;
+
+  if (!uhd)
+    return;
+
+  while (skb->len >= NLMSG_SPACE(0)) {
+    u32 rlen;
+
+    nlh = (struct nlmsghdr *) skb->data;
+    if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+      return;
+
+    rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+    if (rlen > skb->len)
+      rlen = skb->len;
+
+    err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
+    if (err)
+      netlink_ack(skb, nlh, err, NULL);
+    else if (nlh->nlmsg_flags & NLM_F_ACK)
+      netlink_ack(skb, nlh, 0, NULL);
+    skb_pull(skb, rlen);
+  }
+}
+
+static int netlink_prepare(struct user_helper_data *uhd)
+{
+  struct netlink_kernel_cfg cfg = {
+    .groups = 0,
+    .input = toi_user_rcv_skb,
+  };
+
+  uhd->next = uhd_list;
+  uhd_list = uhd;
+
+  uhd->sock_seq = 0x42c0ffee;
+  uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, &cfg);
+  if (!uhd->nl) {
+    printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
+        uhd->name);
+    return -ENOMEM;
+  }
+
+  toi_fill_skb_pool(uhd);
+
+  return 0;
+}
+
+void toi_netlink_close(struct user_helper_data *uhd)
+{
+  struct task_struct *t;
+
+  toi_read_lock_tasklist();
+  t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+  if (t)
+    t->flags &= ~PF_NOFREEZE;
+  toi_read_unlock_tasklist();
+
+  toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
+}
+int toi_netlink_setup(struct user_helper_data *uhd)
+{
+  /* In case userui didn't cleanup properly on us */
+  toi_netlink_close_complete(uhd);
+
+  if (netlink_prepare(uhd) < 0) {
+    printk(KERN_INFO "Netlink prepare failed.\n");
+    return 1;
+  }
+
+  if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
+        UMH_WAIT_EXEC, uhd->debug) < 0) {
+    printk(KERN_INFO "Launch userspace program failed.\n");
+    toi_netlink_close_complete(uhd);
+    return 1;
+  }
+
+  /* Wait 2 seconds for the userspace process to make contact */
+  wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
+
+  if (uhd->pid == -1) {
+    printk(KERN_INFO "%s: Failed to contact userspace process.\n",
+        uhd->name);
+    toi_netlink_close_complete(uhd);
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h
new file mode 100644
index 000000000..3dca566cb
--- /dev/null
+++ b/kernel/power/tuxonice_netlink.h
@@ -0,0 +1,62 @@
+/*
+ * kernel/power/tuxonice_netlink.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for functions for communicating with a userspace helper
+ * via netlink.
+ */
+
+#include <linux/netlink.h>
+#include <net/sock.h>
+
+#define NETLINK_MSG_BASE 0x10
+
+#define NETLINK_MSG_READY 0x10
+#define        NETLINK_MSG_NOFREEZE_ME 0x16
+#define NETLINK_MSG_GET_DEBUGGING 0x19
+#define NETLINK_MSG_CLEANUP 0x24
+#define NETLINK_MSG_NOFREEZE_ACK 0x27
+#define NETLINK_MSG_IS_DEBUGGING 0x28
+
+struct user_helper_data {
+  int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
+  void (*not_ready) (void);
+  struct sock *nl;
+  u32 sock_seq;
+  pid_t pid;
+  char *comm;
+  char program[256];
+  int pool_level;
+  int pool_limit;
+  struct sk_buff *emerg_skbs;
+  int skb_size;
+  int netlink_id;
+  char *name;
+  struct user_helper_data *next;
+  struct completion wait_for_process;
+  u32 interface_version;
+  int must_init;
+  int debug;
+};
+
+#ifdef CONFIG_NET
+int toi_netlink_setup(struct user_helper_data *uhd);
+void toi_netlink_close(struct user_helper_data *uhd);
+void toi_send_netlink_message(struct user_helper_data *uhd,
+    int type, void *params, size_t len);
+void toi_netlink_close_complete(struct user_helper_data *uhd);
+#else
+static inline int toi_netlink_setup(struct user_helper_data *uhd)
+{
+  return 0;
+}
+
+static inline void toi_netlink_close(struct user_helper_data *uhd) { };
+static inline void toi_send_netlink_message(struct user_helper_data *uhd,
+    int type, void *params, size_t len) { };
+static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
+{ };
+#endif
diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c
new file mode 100644
index 000000000..7854f5e21
--- /dev/null
+++ b/kernel/power/tuxonice_pagedir.c
@@ -0,0 +1,345 @@
+/*
+ * kernel/power/tuxonice_pagedir.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for handling pagesets.
+ * Note that pbes aren't actually stored as such. They're stored as
+ * bitmaps and extents.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
+
+static int ptoi_pfn;
+static struct pbe *this_low_pbe;
+static struct pbe **last_low_pbe_ptr;
+
+void toi_reset_alt_image_pageset2_pfn(void)
+{
+  memory_bm_position_reset(pageset2_map);
+}
+
+static struct page *first_conflicting_page;
+
+/*
+ * free_conflicting_pages
+ */
+
+static void free_conflicting_pages(void)
+{
+  while (first_conflicting_page) {
+    struct page *next =
+      *((struct page **) kmap(first_conflicting_page));
+    kunmap(first_conflicting_page);
+    toi__free_page(29, first_conflicting_page);
+    first_conflicting_page = next;
+  }
+}
+
+/* __toi_get_nonconflicting_page
+ *
+ * Description: Gets order zero pages that won't be overwritten
+ *                while copying the original pages.
+ */
+
+struct page *___toi_get_nonconflicting_page(int can_be_highmem)
+{
+  struct page *page;
+  gfp_t flags = TOI_ATOMIC_GFP;
+  if (can_be_highmem)
+    flags |= __GFP_HIGHMEM;
+
+
+  if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
+      pageset2_map && ptoi_pfn) {
+    do {
+      ptoi_pfn = memory_bm_next_pfn(pageset2_map, 0);
+      if (ptoi_pfn != BM_END_OF_MAP) {
+        page = pfn_to_page(ptoi_pfn);
+        if (!PagePageset1(page) &&
+            (can_be_highmem || !PageHighMem(page)))
+          return page;
+      }
+    } while (ptoi_pfn);
+  }
+
+  do {
+    page = toi_alloc_page(29, flags | __GFP_ZERO);
+    if (!page) {
+      printk(KERN_INFO "Failed to get nonconflicting "
+          "page.\n");
+      return NULL;
+    }
+    if (PagePageset1(page)) {
+      struct page **next = (struct page **) kmap(page);
+      *next = first_conflicting_page;
+      first_conflicting_page = page;
+      kunmap(page);
+    }
+  } while (PagePageset1(page));
+
+  return page;
+}
+
+unsigned long __toi_get_nonconflicting_page(void)
+{
+  struct page *page = ___toi_get_nonconflicting_page(0);
+  return page ? (unsigned long) page_address(page) : 0;
+}
+
+static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
+    int highmem)
+{
+  if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
+        + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
+    struct page *new_page =
+      ___toi_get_nonconflicting_page(highmem);
+    if (!new_page)
+      return ERR_PTR(-ENOMEM);
+    this_pbe = (struct pbe *) kmap(new_page);
+    memset(this_pbe, 0, PAGE_SIZE);
+    *page_ptr = new_page;
+  } else
+    this_pbe++;
+
+  return this_pbe;
+}
+
+/**
+ * get_pageset1_load_addresses - generate pbes for conflicting pages
+ *
+ * We check here that pagedir & pages it points to won't collide
+ * with pages where we're going to restore from the loaded pages
+ * later.
+ *
+ * Returns:
+ *        Zero on success, one if couldn't find enough pages (shouldn't
+ *        happen).
+ **/
+int toi_get_pageset1_load_addresses(void)
+{
+  int pfn, highallocd = 0, lowallocd = 0;
+  int low_needed = pagedir1.size - get_highmem_size(pagedir1);
+  int high_needed = get_highmem_size(pagedir1);
+  int low_pages_for_highmem = 0;
+  gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
+  struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
+              *low_pbe_page, *last_low_pbe_page = NULL;
+  struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
+             *this_high_pbe = NULL;
+  unsigned long orig_low_pfn, orig_high_pfn;
+  int high_pbes_done = 0, low_pbes_done = 0;
+  int low_direct = 0, high_direct = 0, result = 0, i;
+  int high_page = 1, high_offset = 0, low_page = 1, low_offset = 0;
+
+  toi_trace_index++;
+
+  memory_bm_position_reset(pageset1_map);
+  memory_bm_position_reset(pageset1_copy_map);
+
+  last_low_pbe_ptr = &restore_pblist;
+
+  /* First, allocate pages for the start of our pbe lists. */
+  if (high_needed) {
+    high_pbe_page = ___toi_get_nonconflicting_page(1);
+    if (!high_pbe_page) {
+      result = -ENOMEM;
+      goto out;
+    }
+    this_high_pbe = (struct pbe *) kmap(high_pbe_page);
+    memset(this_high_pbe, 0, PAGE_SIZE);
+  }
+
+  low_pbe_page = ___toi_get_nonconflicting_page(0);
+  if (!low_pbe_page) {
+    result = -ENOMEM;
+    goto out;
+  }
+  this_low_pbe = (struct pbe *) page_address(low_pbe_page);
+
+  /*
+   * Next, allocate the number of pages we need.
+   */
+
+  i = low_needed + high_needed;
+
+  do {
+    int is_high;
+
+    if (i == low_needed)
+      flags &= ~__GFP_HIGHMEM;
+
+    page = toi_alloc_page(30, flags);
+    BUG_ON(!page);
+
+    SetPagePageset1Copy(page);
+    is_high = PageHighMem(page);
+
+    if (PagePageset1(page)) {
+      if (is_high)
+        high_direct++;
+      else
+        low_direct++;
+    } else {
+      if (is_high)
+        highallocd++;
+      else
+        lowallocd++;
+    }
+  } while (--i);
+
+  high_needed -= high_direct;
+  low_needed -= low_direct;
+
+  /*
+   * Do we need to use some lowmem pages for the copies of highmem
+   * pages?
+   */
+  if (high_needed > highallocd) {
+    low_pages_for_highmem = high_needed - highallocd;
+    high_needed -= low_pages_for_highmem;
+    low_needed += low_pages_for_highmem;
+  }
+
+  /*
+   * Now generate our pbes (which will be used for the atomic restore),
+   * and free unneeded pages.
+   */
+  memory_bm_position_reset(pageset1_copy_map);
+  for (pfn = memory_bm_next_pfn(pageset1_copy_map, 0); pfn != BM_END_OF_MAP;
+      pfn = memory_bm_next_pfn(pageset1_copy_map, 0)) {
+    int is_high;
+    page = pfn_to_page(pfn);
+    is_high = PageHighMem(page);
+
+    if (PagePageset1(page))
+      continue;
+
+    /* Nope. We're going to use this page. Add a pbe. */
+    if (is_high || low_pages_for_highmem) {
+      struct page *orig_page;
+      high_pbes_done++;
+      if (!is_high)
+        low_pages_for_highmem--;
+      do {
+        orig_high_pfn = memory_bm_next_pfn(pageset1_map, 0);
+        BUG_ON(orig_high_pfn == BM_END_OF_MAP);
+        orig_page = pfn_to_page(orig_high_pfn);
+      } while (!PageHighMem(orig_page) ||
+          PagePageset1Copy(orig_page));
+
+      this_high_pbe->orig_address = (void *) orig_high_pfn;
+      this_high_pbe->address = page;
+      this_high_pbe->next = NULL;
+      toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "High pbe %d/%d: %p(%d)=>%p",
+          high_page, high_offset, page, orig_high_pfn, orig_page);
+      if (last_high_pbe_page != high_pbe_page) {
+        *last_high_pbe_ptr =
+          (struct pbe *) high_pbe_page;
+        if (last_high_pbe_page) {
+          kunmap(last_high_pbe_page);
+          high_page++;
+          high_offset = 0;
+        } else
+          high_offset++;
+        last_high_pbe_page = high_pbe_page;
+      } else {
+        *last_high_pbe_ptr = this_high_pbe;
+        high_offset++;
+      }
+      last_high_pbe_ptr = &this_high_pbe->next;
+      this_high_pbe = get_next_pbe(&high_pbe_page,
+          this_high_pbe, 1);
+      if (IS_ERR(this_high_pbe)) {
+        printk(KERN_INFO
+            "This high pbe is an error.\n");
+        return -ENOMEM;
+      }
+    } else {
+      struct page *orig_page;
+      low_pbes_done++;
+      do {
+        orig_low_pfn = memory_bm_next_pfn(pageset1_map, 0);
+        BUG_ON(orig_low_pfn == BM_END_OF_MAP);
+        orig_page = pfn_to_page(orig_low_pfn);
+      } while (PageHighMem(orig_page) ||
+          PagePageset1Copy(orig_page));
+
+      this_low_pbe->orig_address = page_address(orig_page);
+      this_low_pbe->address = page_address(page);
+      this_low_pbe->next = NULL;
+      toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "Low pbe %d/%d: %p(%d)=>%p",
+          low_page, low_offset, this_low_pbe->orig_address,
+          orig_low_pfn, this_low_pbe->address);
+      TOI_TRACE_DEBUG(orig_low_pfn, "LoadAddresses (%d/%d): %p=>%p", low_page, low_offset, this_low_pbe->orig_address, this_low_pbe->address);
+      *last_low_pbe_ptr = this_low_pbe;
+      last_low_pbe_ptr = &this_low_pbe->next;
+      this_low_pbe = get_next_pbe(&low_pbe_page,
+          this_low_pbe, 0);
+      if (low_pbe_page != last_low_pbe_page) {
+        if (last_low_pbe_page) {
+          low_page++;
+          low_offset = 0;
+        } else {
+          low_offset++;
+        }
+        last_low_pbe_page = low_pbe_page;
+      } else
+        low_offset++;
+      if (IS_ERR(this_low_pbe)) {
+        printk(KERN_INFO "this_low_pbe is an error.\n");
+        return -ENOMEM;
+      }
+    }
+  }
+
+  if (high_pbe_page)
+    kunmap(high_pbe_page);
+
+  if (last_high_pbe_page != high_pbe_page) {
+    if (last_high_pbe_page)
+      kunmap(last_high_pbe_page);
+    toi__free_page(29, high_pbe_page);
+  }
+
+  free_conflicting_pages();
+
+out:
+  return result;
+}
+
+int add_boot_kernel_data_pbe(void)
+{
+  this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
+  if (!this_low_pbe->address) {
+    printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
+    return -ENOMEM;
+  }
+
+  toi_bkd.size = sizeof(toi_bkd);
+  memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
+
+  *last_low_pbe_ptr = this_low_pbe;
+  this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
+  this_low_pbe->next = NULL;
+  return 0;
+}
diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h
new file mode 100644
index 000000000..d322a1908
--- /dev/null
+++ b/kernel/power/tuxonice_pagedir.h
@@ -0,0 +1,50 @@
+/*
+ * kernel/power/tuxonice_pagedir.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for routines for handling pagesets.
+ */
+
+#ifndef KERNEL_POWER_PAGEDIR_H
+#define KERNEL_POWER_PAGEDIR_H
+
+/* Pagedir
+ *
+ * Contains the metadata for a set of pages saved in the image.
+ */
+
+struct pagedir {
+  int id;
+  unsigned long size;
+#ifdef CONFIG_HIGHMEM
+  unsigned long size_high;
+#endif
+};
+
+#ifdef CONFIG_HIGHMEM
+#define get_highmem_size(pagedir) (pagedir.size_high)
+#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
+#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
+#else
+#define get_highmem_size(pagedir) (0)
+#define set_highmem_size(pagedir, sz) do { } while (0)
+#define inc_highmem_size(pagedir) do { } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size)
+#endif
+
+extern struct pagedir pagedir1, pagedir2;
+
+extern void toi_copy_pageset1(void);
+
+extern int toi_get_pageset1_load_addresses(void);
+
+extern unsigned long __toi_get_nonconflicting_page(void);
+struct page *___toi_get_nonconflicting_page(int can_be_highmem);
+
+extern void toi_reset_alt_image_pageset2_pfn(void);
+extern int add_boot_kernel_data_pbe(void);
+#endif
diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c
new file mode 100644
index 000000000..9d6ca0774
--- /dev/null
+++ b/kernel/power/tuxonice_pageflags.c
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_pageflags.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for serialising and relocating pageflags in which we
+ * store our image metadata.
+ */
+
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+int toi_pageflags_space_needed(void)
+{
+  return memory_bm_space_needed(pageset1_map);
+}
diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h
new file mode 100644
index 000000000..f004e4697
--- /dev/null
+++ b/kernel/power/tuxonice_pageflags.h
@@ -0,0 +1,106 @@
+/*
+ * kernel/power/tuxonice_pageflags.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+
+struct  memory_bitmap;
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+void memory_bm_clear(struct memory_bitmap *bm);
+
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index);
+void memory_bm_position_reset(struct memory_bitmap *bm);
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+int toi_alloc_bitmap(struct memory_bitmap **bm);
+void toi_free_bitmap(struct memory_bitmap **bm);
+void memory_bm_clear(struct memory_bitmap *bm);
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_clear_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+
+struct toi_module_ops;
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+    (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+    (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+  int memory_bm_space_needed(struct memory_bitmap *bm);
+
+  extern struct memory_bitmap *pageset1_map;
+  extern struct memory_bitmap *pageset1_copy_map;
+  extern struct memory_bitmap *pageset2_map;
+  extern struct memory_bitmap *page_resave_map;
+  extern struct memory_bitmap *io_map;
+  extern struct memory_bitmap *nosave_map;
+  extern struct memory_bitmap *free_map;
+  extern struct memory_bitmap *compare_map;
+
+#define PagePageset1(page) \
+    (pageset1_map && memory_bm_test_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1(page) \
+    (memory_bm_set_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1(page) \
+    (memory_bm_clear_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset1Copy(page) \
+    (memory_bm_test_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1Copy(page) \
+    (memory_bm_set_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1Copy(page) \
+    (memory_bm_clear_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset2(page) \
+    (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset2(page) \
+    (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset2(page) \
+    (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageWasRW(page) \
+    (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPageWasRW(page) \
+    (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageWasRW(page) \
+    (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageResave(page) (page_resave_map ? \
+    memory_bm_test_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageResave(page) \
+                                                                                   (memory_bm_set_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageResave(page) \
+                                                                                   (memory_bm_clear_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosave(page) (nosave_map ? \
+    memory_bm_test_bit(nosave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosave(page) \
+                                                                              (mem_bm_set_bit_check(nosave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosave(page) \
+                                                                              (memory_bm_clear_bit(nosave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosaveFree(page) (free_map ? \
+    memory_bm_test_bit(free_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosaveFree(page) \
+                                                                            (memory_bm_set_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosaveFree(page) \
+                                                                            (memory_bm_clear_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageCompareChanged(page) (compare_map ? \
+    memory_bm_test_bit(compare_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageCompareChanged(page) \
+                                                                               (memory_bm_set_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageCompareChanged(page) \
+                                                                               (memory_bm_clear_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+
+                                                                             extern void save_pageflags(struct memory_bitmap *pagemap);
+                                                                             extern int load_pageflags(struct memory_bitmap *pagemap);
+                                                                             extern int toi_pageflags_space_needed(void);
+#endif
diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c
new file mode 100644
index 000000000..3ddf674fa
--- /dev/null
+++ b/kernel/power/tuxonice_power_off.c
@@ -0,0 +1,286 @@
+/*
+ * kernel/power/tuxonice_power_off.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for powering down.
+ */
+
+#include <linux/device.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/fs.h>
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+
+unsigned long toi_poweroff_method; /* 0 - Kernel power off */
+
+static int wake_delay;
+static char lid_state_file[256], wake_alarm_dir[256];
+static struct file *lid_file, *alarm_file, *epoch_file;
+static int post_wake_state = -1;
+
+static int did_suspend_to_both;
+
+/*
+ * __toi_power_down
+ * Functionality   : Powers down or reboots the computer once the image
+ *                   has been written to disk.
+ * Key Assumptions : Able to reboot/power down via code called or that
+ *                   the warning emitted if the calls fail will be visible
+ *                   to the user (ie printk resumes devices).
+ */
+
+static void __toi_power_down(int method)
+{
+  int error;
+
+  toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
+      "Powering down.");
+
+  if (test_result_state(TOI_ABORTED))
+    goto out;
+
+  if (test_action_state(TOI_REBOOT))
+    kernel_restart(NULL);
+
+  switch (method) {
+    case 0:
+      break;
+    case 3:
+      /*
+       * Re-read the overwritten part of pageset2 to make post-resume
+       * faster.
+       */
+      if (read_pageset2(1))
+        panic("Attempt to reload pagedir 2 failed. "
+            "Try rebooting.");
+
+      pm_prepare_console();
+
+      error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+      if (!error) {
+        pm_restore_gfp_mask();
+        error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+        pm_restrict_gfp_mask();
+        if (!error)
+          did_suspend_to_both = 1;
+      }
+      pm_notifier_call_chain(PM_POST_SUSPEND);
+      pm_restore_console();
+
+      /* Success - we're now post-resume-from-ram */
+      if (did_suspend_to_both)
+        return;
+
+      /* Failed to suspend to ram - do normal power off */
+      break;
+    case 4:
+      /*
+       * If succeeds, doesn't return. If fails, do a simple
+       * powerdown.
+       */
+      hibernation_platform_enter();
+      break;
+    case 5:
+      /* Historic entry only now */
+      break;
+  }
+
+  if (method && method != 5)
+    toi_cond_pause(1,
+        "Falling back to alternate power off method.");
+
+  if (test_result_state(TOI_ABORTED))
+    goto out;
+
+  if (pm_power_off)
+    kernel_power_off();
+  kernel_halt();
+  toi_cond_pause(1, "Powerdown failed.");
+  while (1)
+    cpu_relax();
+
+out:
+  if (read_pageset2(1))
+    panic("Attempt to reload pagedir 2 failed. Try rebooting.");
+  return;
+}
+
+#define CLOSE_FILE(file) \
+  if (file) { \
+    filp_close(file, NULL); file = NULL; \
+  }
+
+static void powerdown_cleanup(int toi_or_resume)
+{
+  if (!toi_or_resume)
+    return;
+
+  CLOSE_FILE(lid_file);
+  CLOSE_FILE(alarm_file);
+  CLOSE_FILE(epoch_file);
+}
+
+static void open_file(char *format, char *arg, struct file **var, int mode,
+    char *desc)
+{
+  char buf[256];
+
+  if (strlen(arg)) {
+    sprintf(buf, format, arg);
+    *var = filp_open(buf, mode, 0);
+    if (IS_ERR(*var) || !*var) {
+      printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
+          desc, buf, *var);
+      *var = NULL;
+    }
+  }
+}
+
+static int powerdown_init(int toi_or_resume)
+{
+  if (!toi_or_resume)
+    return 0;
+
+  did_suspend_to_both = 0;
+
+  open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
+      O_RDONLY, "lid");
+
+  if (strlen(wake_alarm_dir)) {
+    open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
+        &alarm_file, O_WRONLY, "alarm");
+
+    open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
+        &epoch_file, O_RDONLY, "epoch");
+  }
+
+  return 0;
+}
+
+static int lid_closed(void)
+{
+  char array[25];
+  ssize_t size;
+  loff_t pos = 0;
+
+  if (!lid_file)
+    return 0;
+
+  size = vfs_read(lid_file, (char __user *) array, 25, &pos);
+  if ((int) size < 1) {
+    printk(KERN_INFO "Failed to read lid state file (%d).\n",
+        (int) size);
+    return 0;
+  }
+
+  if (!strcmp(array, "state:      closed\n"))
+    return 1;
+
+  return 0;
+}
+
+static void write_alarm_file(int value)
+{
+  ssize_t size;
+  char buf[40];
+  loff_t pos = 0;
+
+  if (!alarm_file)
+    return;
+
+  sprintf(buf, "%d\n", value);
+
+  size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
+
+  if (size < 0)
+    printk(KERN_INFO "Error %d writing alarm value %s.\n",
+        (int) size, buf);
+}
+
+/**
+ * toi_check_resleep: See whether to powerdown again after waking.
+ *
+ * After waking, check whether we should powerdown again in a (usually
+ * different) way. We only do this if the lid switch is still closed.
+ */
+void toi_check_resleep(void)
+{
+  /* We only return if we suspended to ram and woke. */
+  if (lid_closed() && post_wake_state >= 0)
+    __toi_power_down(post_wake_state);
+}
+
+void toi_power_down(void)
+{
+  if (alarm_file && wake_delay) {
+    char array[25];
+    loff_t pos = 0;
+    size_t size = vfs_read(epoch_file, (char __user *) array, 25,
+        &pos);
+
+    if (((int) size) < 1)
+      printk(KERN_INFO "Failed to read epoch file (%d).\n",
+          (int) size);
+    else {
+      unsigned long since_epoch;
+      if (!kstrtoul(array, 0, &since_epoch)) {
+        /* Clear any wakeup time. */
+        write_alarm_file(0);
+
+        /* Set new wakeup time. */
+        write_alarm_file(since_epoch + wake_delay);
+      }
+    }
+  }
+
+  __toi_power_down(toi_poweroff_method);
+
+  toi_check_resleep();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_ACPI)
+  SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
+  SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
+  SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
+  SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
+      NULL),
+  SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
+  SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
+      0, 0, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops powerdown_ops = {
+  .type                                = MISC_HIDDEN_MODULE,
+  .name                                = "poweroff",
+  .initialise                        = powerdown_init,
+  .cleanup                        = powerdown_cleanup,
+  .directory                        = "[ROOT]",
+  .module                                = THIS_MODULE,
+  .sysfs_data                        = sysfs_params,
+  .num_sysfs_entries                = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+int toi_poweroff_init(void)
+{
+  return toi_register_module(&powerdown_ops);
+}
+
+void toi_poweroff_exit(void)
+{
+  toi_unregister_module(&powerdown_ops);
+}
diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h
new file mode 100644
index 000000000..6e1d8bb39
--- /dev/null
+++ b/kernel/power/tuxonice_power_off.h
@@ -0,0 +1,24 @@
+/*
+ * kernel/power/tuxonice_power_off.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for the powering down.
+ */
+
+int toi_pm_state_finish(void);
+void toi_power_down(void);
+extern unsigned long toi_poweroff_method;
+int toi_poweroff_init(void);
+void toi_poweroff_exit(void);
+void toi_check_resleep(void);
+
+extern int platform_begin(int platform_mode);
+extern int platform_pre_snapshot(int platform_mode);
+extern void platform_leave(int platform_mode);
+extern void platform_end(int platform_mode);
+extern void platform_finish(int platform_mode);
+extern int platform_pre_restore(int platform_mode);
+extern void platform_restore_cleanup(int platform_mode);
diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c
new file mode 100644
index 000000000..0db3e0871
--- /dev/null
+++ b/kernel/power/tuxonice_prepare_image.c
@@ -0,0 +1,1090 @@
+/*
+ * kernel/power/tuxonice_prepare_image.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * We need to eat memory until we can:
+ * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
+ * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
+ *    main_storage_needed())
+ * 3. Reload the pagedir and pageset1 to places that don't collide with their
+ *    final destinations, not knowing to what extent the resumed kernel will
+ *    overlap with the one loaded at boot time. I think the resumed kernel
+ *    should overlap completely, but I don't want to rely on this as it is
+ *    an unproven assumption. We therefore assume there will be no overlap at
+ *    all (worse case).
+ * 4. Meet the user's requested limit (if any) on the size of the image.
+ *    The limit is in MB, so pages/256 (assuming 4K pages).
+ *
+ */
+
+#include <linux/highmem.h>
+#include <linux/freezer.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/console.h>
+#include <linux/sched/signal.h>
+#include <linux/tuxonice.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_builtin.h"
+
+static unsigned long num_nosave, main_storage_allocated, storage_limit,
+                     header_storage_needed;
+unsigned long extra_pd1_pages_allowance =
+CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
+long image_size_limit = CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT;
+static int no_ps2_needed;
+
+struct attention_list {
+  struct task_struct *task;
+  struct attention_list *next;
+};
+
+static struct attention_list *attention_list;
+
+#define PAGESET1 0
+#define PAGESET2 1
+
+void free_attention_list(void)
+{
+  struct attention_list *last = NULL;
+
+  while (attention_list) {
+    last = attention_list;
+    attention_list = attention_list->next;
+    toi_kfree(6, last, sizeof(*last));
+  }
+}
+
+static int build_attention_list(void)
+{
+  int i, task_count = 0;
+  struct task_struct *p;
+  struct attention_list *next;
+
+  /*
+   * Count all userspace process (with task->mm) marked PF_NOFREEZE.
+   */
+  toi_read_lock_tasklist();
+  for_each_process(p)
+    if ((p->flags & PF_NOFREEZE) || p == current)
+      task_count++;
+  toi_read_unlock_tasklist();
+
+  /*
+   * Allocate attention list structs.
+   */
+  for (i = 0; i < task_count; i++) {
+    struct attention_list *this =
+      toi_kzalloc(6, sizeof(struct attention_list),
+          TOI_WAIT_GFP);
+    if (!this) {
+      printk(KERN_INFO "Failed to allocate slab for "
+          "attention list.\n");
+      free_attention_list();
+      return 1;
+    }
+    this->next = NULL;
+    if (attention_list)
+      this->next = attention_list;
+    attention_list = this;
+  }
+
+  next = attention_list;
+  toi_read_lock_tasklist();
+  for_each_process(p)
+    if ((p->flags & PF_NOFREEZE) || p == current) {
+      next->task = p;
+      next = next->next;
+    }
+  toi_read_unlock_tasklist();
+  return 0;
+}
+
+static void pageset2_full(void)
+{
+  struct zone *zone;
+  struct page *page, *next;
+  unsigned long flags;
+  int i;
+
+  toi_trace_index++;
+
+  for_each_populated_zone(zone) {
+    spin_lock_irqsave(zone_lru_lock(zone), flags);
+    for_each_lru(i) {
+      if (!zone_page_state(zone, NR_LRU_BASE + i))
+        continue;
+
+      list_for_each_entry_safe(page, next, &zone->zone_pgdat->lruvec.lists[i], lru) {
+        struct address_space *mapping;
+
+        mapping = page_mapping(page);
+        if (!mapping || !mapping->host ||
+            !(mapping->host->i_flags & S_ATOMIC_COPY)) {
+          if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+            TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 unmodified.");
+          } else {
+            TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 pageset2_full.");
+            SetPagePageset2(page);
+          }
+        }
+      }
+    }
+    spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+  }
+}
+
+/*
+ * toi_mark_task_as_pageset
+ * Functionality   : Marks all the saveable pages belonging to a given process
+ *                      as belonging to a particular pageset.
+ */
+
+static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
+{
+  struct vm_area_struct *vma;
+  struct mm_struct *mm;
+
+  mm = t->active_mm;
+
+  if (!mm || !mm->mmap)
+    return;
+
+  toi_trace_index++;
+
+  if (!irqs_disabled())
+    down_read(&mm->mmap_sem);
+
+  for (vma = mm->mmap; vma; vma = vma->vm_next) {
+    unsigned long posn;
+
+    if (!vma->vm_start ||
+        vma->vm_flags & VM_PFNMAP)
+      continue;
+
+    for (posn = vma->vm_start; posn < vma->vm_end;
+        posn += PAGE_SIZE) {
+      struct page *page = follow_page(vma, posn, 0);
+      struct address_space *mapping;
+
+      if (!page || !pfn_valid(page_to_pfn(page)))
+        continue;
+
+      mapping = page_mapping(page);
+      if (mapping && mapping->host &&
+          mapping->host->i_flags & S_ATOMIC_COPY && pageset2)
+        continue;
+
+      if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+        TOI_TRACE_DEBUG(page_to_pfn(page), "_Unmodified %d", pageset2 ? 1 : 2);
+        continue;
+      }
+
+      if (pageset2) {
+        TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 1");
+        SetPagePageset2(page);
+      } else {
+        TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 2");
+        ClearPagePageset2(page);
+        SetPagePageset1(page);
+      }
+    }
+  }
+
+  if (!irqs_disabled())
+    up_read(&mm->mmap_sem);
+}
+
+static void mark_tasks(int pageset)
+{
+  struct task_struct *p;
+
+  toi_read_lock_tasklist();
+  for_each_process(p) {
+    if (!p->mm)
+      continue;
+
+    if (p->flags & PF_KTHREAD)
+      continue;
+
+    toi_mark_task_as_pageset(p, pageset);
+  }
+  toi_read_unlock_tasklist();
+
+}
+
+/* mark_pages_for_pageset2
+ *
+ * Description:        Mark unshared pages in processes not needed for hibernate as
+ *                 being able to be written out in a separate pagedir.
+ *                 HighMem pages are simply marked as pageset2. They won't be
+ *                 needed during hibernate.
+ */
+
+static void toi_mark_pages_for_pageset2(void)
+{
+  struct attention_list *this = attention_list;
+
+  memory_bm_clear(pageset2_map);
+
+  if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
+    return;
+
+  if (test_action_state(TOI_PAGESET2_FULL))
+    pageset2_full();
+  else
+    mark_tasks(PAGESET2);
+
+  /*
+   * Because the tasks in attention_list are ones related to hibernating,
+   * we know that they won't go away under us.
+   */
+
+  while (this) {
+    if (!test_result_state(TOI_ABORTED))
+      toi_mark_task_as_pageset(this->task, PAGESET1);
+    this = this->next;
+  }
+}
+
+/*
+ * The atomic copy of pageset1 is stored in pageset2 pages.
+ * But if pageset1 is larger (normally only just after boot),
+ * we need to allocate extra pages to store the atomic copy.
+ * The following data struct and functions are used to handle
+ * the allocation and freeing of that memory.
+ */
+
+static unsigned long extra_pages_allocated;
+
+struct extras {
+  struct page *page;
+  int order;
+  struct extras *next;
+};
+
+static struct extras *extras_list;
+
+/* toi_free_extra_pagedir_memory
+ *
+ * Description:        Free previously allocated extra pagedir memory.
+ */
+void toi_free_extra_pagedir_memory(void)
+{
+  /* Free allocated pages */
+  while (extras_list) {
+    struct extras *this = extras_list;
+    int i;
+
+    extras_list = this->next;
+
+    for (i = 0; i < (1 << this->order); i++)
+      ClearPageNosave(this->page + i);
+
+    toi_free_pages(9, this->page, this->order);
+    toi_kfree(7, this, sizeof(*this));
+  }
+
+  extra_pages_allocated = 0;
+}
+
+/* toi_allocate_extra_pagedir_memory
+ *
+ * Description:        Allocate memory for making the atomic copy of pagedir1 in the
+ *                 case where it is bigger than pagedir2.
+ * Arguments:        int        num_to_alloc: Number of extra pages needed.
+ * Result:        int.         Number of extra pages we now have allocated.
+ */
+static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
+{
+  int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
+  gfp_t flags = TOI_ATOMIC_GFP;
+
+  if (num_to_alloc < 1)
+    return 0;
+
+  order = fls(num_to_alloc);
+  if (order >= MAX_ORDER)
+    order = MAX_ORDER - 1;
+
+  while (num_to_alloc) {
+    struct page *newpage;
+    unsigned long virt;
+    struct extras *extras_entry;
+
+    while ((1 << order) > num_to_alloc)
+      order--;
+
+    extras_entry = (struct extras *) toi_kzalloc(7,
+        sizeof(struct extras), TOI_ATOMIC_GFP);
+
+    if (!extras_entry)
+      return extra_pages_allocated;
+
+    virt = toi_get_free_pages(9, flags, order);
+    while (!virt && order) {
+      order--;
+      virt = toi_get_free_pages(9, flags, order);
+    }
+
+    if (!virt) {
+      toi_kfree(7, extras_entry, sizeof(*extras_entry));
+      return extra_pages_allocated;
+    }
+
+    newpage = virt_to_page(virt);
+
+    extras_entry->page = newpage;
+    extras_entry->order = order;
+    extras_entry->next = extras_list;
+
+    extras_list = extras_entry;
+
+    for (j = 0; j < (1 << order); j++) {
+      SetPageNosave(newpage + j);
+      SetPagePageset1Copy(newpage + j);
+    }
+
+    extra_pages_allocated += (1 << order);
+    num_to_alloc -= (1 << order);
+  }
+
+  return extra_pages_allocated;
+}
+
+/*
+ * real_nr_free_pages: Count pcp pages for a zone type or all zones
+ * (-1 for all, otherwise zone_idx() result desired).
+ */
+unsigned long real_nr_free_pages(unsigned long zone_idx_mask)
+{
+  struct zone *zone;
+  int result = 0, cpu;
+
+  /* PCP lists */
+  for_each_populated_zone(zone) {
+    if (!(zone_idx_mask & (1 << zone_idx(zone))))
+      continue;
+
+    for_each_online_cpu(cpu) {
+      struct per_cpu_pageset *pset =
+        per_cpu_ptr(zone->pageset, cpu);
+      struct per_cpu_pages *pcp = &pset->pcp;
+      result += pcp->count;
+    }
+
+    result += zone_page_state(zone, NR_FREE_PAGES);
+  }
+  return result;
+}
+
+/*
+ * Discover how much extra memory will be required by the drivers
+ * when they're asked to hibernate. We can then ensure that amount
+ * of memory is available when we really want it.
+ */
+static void get_extra_pd1_allowance(void)
+{
+  unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final;
+
+  toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
+
+  if (toi_go_atomic(PMSG_FREEZE, 1))
+    return;
+
+  final = real_nr_free_pages(all_zones_mask);
+  toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0);
+
+  extra_pd1_pages_allowance = (orig_num_free > final) ?
+    orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE :
+    MIN_EXTRA_PAGES_ALLOWANCE;
+}
+
+/*
+ * Amount of storage needed, possibly taking into account the
+ * expected compression ratio and possibly also ignoring our
+ * allowance for extra pages.
+ */
+static unsigned long main_storage_needed(int use_ecr,
+    int ignore_extra_pd1_allow)
+{
+  return (pagedir1.size + pagedir2.size +
+      (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
+    (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
+}
+
+/*
+ * Storage needed for the image header, in bytes until the return.
+ *
+ * fs_info_space_needed is saved in a static variable unless we
+ * explicitly want to reset the value (done at the start of a cycle)
+ * as it requires memory allocation that may result in a hang if we're
+ * also trying to free memory.
+ */
+unsigned long get_header_storage_needed(int reset)
+{
+  unsigned long bytes = sizeof(struct toi_header) +
+    toi_header_storage_for_modules() +
+    toi_pageflags_space_needed() +
+    fs_info_space_needed(0);
+
+  return DIV_ROUND_UP(bytes, PAGE_SIZE);
+}
+
+/*
+ * When freeing memory, pages from either pageset might be freed.
+ *
+ * When seeking to free memory to be able to hibernate, for every ps1 page
+ * freed, we need 2 less pages for the atomic copy because there is one less
+ * page to copy and one more page into which data can be copied.
+ *
+ * Freeing ps2 pages saves us nothing directly. No more memory is available
+ * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
+ * that's too much work to figure out.
+ *
+ * => ps1_to_free functions
+ *
+ * Of course if we just want to reduce the image size, because of storage
+ * limitations or an image size limit either ps will do.
+ *
+ * => any_to_free function
+ */
+
+static unsigned long lowpages_usable_for_highmem_copy(void)
+{
+  unsigned long needed = get_lowmem_size(pagedir1) +
+    extra_pd1_pages_allowance + MIN_FREE_RAM +
+    toi_memory_for_modules(0),
+    available = get_lowmem_size(pagedir2) +
+      real_nr_free_low_pages() + extra_pages_allocated;
+
+  return available > needed ? available - needed : 0;
+}
+
+static unsigned long highpages_ps1_to_free(void)
+{
+  unsigned long need = get_highmem_size(pagedir1),
+                available = get_highmem_size(pagedir2) +
+                  real_nr_free_high_pages() +
+                  lowpages_usable_for_highmem_copy();
+
+  return need > available ? DIV_ROUND_UP(need - available, 2) : 0;
+}
+
+static unsigned long lowpages_ps1_to_free(void)
+{
+  unsigned long needed = get_lowmem_size(pagedir1) +
+    extra_pd1_pages_allowance + MIN_FREE_RAM +
+    toi_memory_for_modules(0),
+    available = get_lowmem_size(pagedir2) +
+      real_nr_free_low_pages() + extra_pages_allocated;
+
+  return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0;
+}
+
+static unsigned long current_image_size(void)
+{
+  return pagedir1.size + pagedir2.size + header_storage_needed;
+}
+
+static unsigned long storage_still_required(void)
+{
+  unsigned long needed = main_storage_needed(1, 1);
+  return needed > storage_limit ? needed - storage_limit : 0;
+}
+
+static unsigned long ram_still_required(void)
+{
+  unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) +
+    2 * extra_pd1_pages_allowance,
+      available = real_nr_free_low_pages() + extra_pages_allocated;
+  return needed > available ? needed - available : 0;
+}
+
+unsigned long any_to_free(int use_image_size_limit)
+{
+  int use_soft_limit = use_image_size_limit && image_size_limit > 0;
+  unsigned long current_size = current_image_size(),
+                soft_limit = use_soft_limit ? (image_size_limit << 8) : 0,
+                to_free = use_soft_limit ? (current_size > soft_limit ?
+                    current_size - soft_limit : 0) : 0,
+                storage_limit = storage_still_required(),
+                ram_limit = ram_still_required(),
+                first_max = max(to_free, storage_limit);
+
+  return max(first_max, ram_limit);
+}
+
+static int need_pageset2(void)
+{
+  return (real_nr_free_low_pages() + extra_pages_allocated -
+      2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
+      toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
+}
+
+/* amount_needed
+ *
+ * Calculates the amount by which the image size needs to be reduced to meet
+ * our constraints.
+ */
+static unsigned long amount_needed(int use_image_size_limit)
+{
+  return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
+      any_to_free(use_image_size_limit));
+}
+
+static int image_not_ready(int use_image_size_limit)
+{
+  toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+      "Amount still needed (%lu) > 0:%u,"
+      " Storage allocd: %lu < %lu: %u.\n",
+      amount_needed(use_image_size_limit),
+      (amount_needed(use_image_size_limit) > 0),
+      main_storage_allocated,
+      main_storage_needed(1, 1),
+      main_storage_allocated < main_storage_needed(1, 1));
+
+  toi_cond_pause(0, NULL);
+
+  return (amount_needed(use_image_size_limit) > 0) ||
+    main_storage_allocated < main_storage_needed(1, 1);
+}
+
+static void display_failure_reason(int tries_exceeded)
+{
+  unsigned long storage_required = storage_still_required(),
+                ram_required = ram_still_required(),
+                high_ps1 = highpages_ps1_to_free(),
+                low_ps1 = lowpages_ps1_to_free();
+
+  printk(KERN_INFO "Failed to prepare the image because...\n");
+
+  if (!storage_limit) {
+    printk(KERN_INFO "- You need some storage available to be "
+        "able to hibernate.\n");
+    return;
+  }
+
+  if (tries_exceeded)
+    printk(KERN_INFO "- The maximum number of iterations was "
+        "reached without successfully preparing the "
+        "image.\n");
+
+  if (storage_required) {
+    printk(KERN_INFO " - We need at least %lu pages of storage "
+        "(ignoring the header), but only have %lu.\n",
+        main_storage_needed(1, 1),
+        main_storage_allocated);
+    set_abort_result(TOI_INSUFFICIENT_STORAGE);
+  }
+
+  if (ram_required) {
+    printk(KERN_INFO " - We need %lu more free pages of low "
+        "memory.\n", ram_required);
+    printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
+    printk(KERN_INFO "   + Reqd. by modules : %8lu\n",
+        toi_memory_for_modules(0));
+    printk(KERN_INFO "   + 2 * extra allow  : %8lu\n",
+        2 * extra_pd1_pages_allowance);
+    printk(KERN_INFO "   - Currently free   : %8lu\n",
+        real_nr_free_low_pages());
+    printk(KERN_INFO "   - Pages allocd     : %8lu\n",
+        extra_pages_allocated);
+    printk(KERN_INFO "                      : ========\n");
+    printk(KERN_INFO "     Still needed     : %8lu\n",
+        ram_required);
+
+    /* Print breakdown of memory needed for modules */
+    toi_memory_for_modules(1);
+    set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+  }
+
+  if (high_ps1) {
+    printk(KERN_INFO "- We need to free %lu highmem pageset 1 "
+        "pages.\n", high_ps1);
+    set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+  }
+
+  if (low_ps1) {
+    printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
+        "pages.\n", low_ps1);
+    set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+  }
+}
+
+static void display_stats(int always, int sub_extra_pd1_allow)
+{
+  char buffer[255];
+  snprintf(buffer, 254,
+      "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). "
+      "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). "
+      "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n",
+
+      /* Free */
+      real_nr_free_pages(all_zones_mask),
+      real_nr_free_low_pages(),
+
+      /* Sets */
+      pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
+      pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
+
+      /* Nosave */
+      num_nosave, extra_pages_allocated,
+      num_nosave - extra_pages_allocated,
+
+      /* Storage */
+      main_storage_allocated,
+      storage_limit,
+      main_storage_needed(1, sub_extra_pd1_allow),
+      main_storage_needed(1, 1),
+
+      /* Needed */
+      lowpages_ps1_to_free(), highpages_ps1_to_free(),
+      any_to_free(1),
+      MIN_FREE_RAM, toi_memory_for_modules(0),
+      extra_pd1_pages_allowance,
+      image_size_limit,
+
+      need_pageset2() ? "yes" : "no");
+
+  if (always)
+    printk("%s", buffer);
+  else
+    toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
+}
+
+/* flag_image_pages
+ *
+ * This routine generates our lists of pages to be stored in each
+ * pageset. Since we store the data using extents, and adding new
+ * extents might allocate a new extent page, this routine may well
+ * be called more than once.
+ */
+static void flag_image_pages(int atomic_copy)
+{
+  int num_free = 0, num_unmodified = 0;
+  unsigned long loop;
+  struct zone *zone;
+
+  pagedir1.size = 0;
+  pagedir2.size = 0;
+
+  set_highmem_size(pagedir1, 0);
+  set_highmem_size(pagedir2, 0);
+
+  num_nosave = 0;
+  toi_trace_index++;
+
+  memory_bm_clear(pageset1_map);
+
+  toi_generate_free_page_map();
+
+  /*
+   * Pages not to be saved are marked Nosave irrespective of being
+   * reserved.
+   */
+  for_each_populated_zone(zone) {
+    int highmem = is_highmem(zone);
+
+    for (loop = 0; loop < zone->spanned_pages; loop++) {
+      unsigned long pfn = zone->zone_start_pfn + loop;
+      struct page *page;
+      int chunk_size;
+
+      if (!pfn_valid(pfn)) {
+        TOI_TRACE_DEBUG(pfn, "_Flag Invalid");
+        continue;
+      }
+
+      chunk_size = toi_size_of_free_region(zone, pfn);
+      if (chunk_size) {
+        unsigned long y;
+        for (y = pfn; y < pfn + chunk_size; y++) {
+          page = pfn_to_page(y);
+          TOI_TRACE_DEBUG(y, "_Flag Free");
+          ClearPagePageset1(page);
+          ClearPagePageset2(page);
+        }
+        num_free += chunk_size;
+        loop += chunk_size - 1;
+        continue;
+      }
+
+      page = pfn_to_page(pfn);
+
+      if (PageNosave(page)) {
+        char *desc = PagePageset1Copy(page) ? "Pageset1Copy" : "NoSave";
+        TOI_TRACE_DEBUG(pfn, "_Flag %s", desc);
+        num_nosave++;
+        continue;
+      }
+
+      page = highmem ? saveable_highmem_page(zone, pfn) :
+        saveable_page(zone, pfn);
+
+      if (!page) {
+        TOI_TRACE_DEBUG(pfn, "_Flag Nosave2");
+        num_nosave++;
+        continue;
+      }
+
+      if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+        TOI_TRACE_DEBUG(pfn, "_Unmodified");
+        num_unmodified++;
+        continue;
+      }
+
+      if (PagePageset2(page)) {
+        pagedir2.size++;
+        TOI_TRACE_DEBUG(pfn, "_Flag PS2");
+        if (PageHighMem(page))
+          inc_highmem_size(pagedir2);
+        else
+          SetPagePageset1Copy(page);
+        if (PageResave(page)) {
+          SetPagePageset1(page);
+          ClearPagePageset1Copy(page);
+          pagedir1.size++;
+          if (PageHighMem(page))
+            inc_highmem_size(pagedir1);
+        }
+      } else {
+        pagedir1.size++;
+        TOI_TRACE_DEBUG(pfn, "_Flag PS1");
+        SetPagePageset1(page);
+        if (PageHighMem(page))
+          inc_highmem_size(pagedir1);
+      }
+    }
+  }
+
+  if (!atomic_copy)
+    toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
+        "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
+        " + Unmodified (%d) + NumFree (%d) = %d.\n",
+        pagedir1.size, pagedir2.size, num_nosave, num_unmodified,
+        num_free, pagedir1.size + pagedir2.size + num_nosave + num_free);
+}
+
+void toi_recalculate_image_contents(int atomic_copy)
+{
+  memory_bm_clear(pageset1_map);
+  if (!atomic_copy) {
+    unsigned long pfn;
+    memory_bm_position_reset(pageset2_map);
+    for (pfn = memory_bm_next_pfn(pageset2_map, 0);
+        pfn != BM_END_OF_MAP;
+        pfn = memory_bm_next_pfn(pageset2_map, 0))
+      ClearPagePageset1Copy(pfn_to_page(pfn));
+    /* Need to call this before getting pageset1_size! */
+    toi_mark_pages_for_pageset2();
+  }
+  memory_bm_position_reset(pageset2_map);
+  flag_image_pages(atomic_copy);
+
+  if (!atomic_copy) {
+    storage_limit = toiActiveAllocator->storage_available();
+    display_stats(0, 0);
+  }
+}
+
+int try_allocate_extra_memory(void)
+{
+  unsigned long wanted = pagedir1.size +  extra_pd1_pages_allowance -
+    get_lowmem_size(pagedir2);
+  if (wanted > extra_pages_allocated) {
+    unsigned long got = toi_allocate_extra_pagedir_memory(wanted);
+    if (wanted < got) {
+      toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+          "Want %d extra pages for pageset1, got %d.\n",
+          wanted, got);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/* update_image
+ *
+ * Allocate [more] memory and storage for the image.
+ */
+static void update_image(int ps2_recalc)
+{
+  int old_header_req;
+  unsigned long seek;
+
+  if (try_allocate_extra_memory())
+    return;
+
+  if (ps2_recalc)
+    goto recalc;
+
+  thaw_kernel_threads();
+
+  /*
+   * Allocate remaining storage space, if possible, up to the
+   * maximum we know we'll need. It's okay to allocate the
+   * maximum if the writer is the swapwriter, but
+   * we don't want to grab all available space on an NFS share.
+   * We therefore ignore the expected compression ratio here,
+   * thereby trying to allocate the maximum image size we could
+   * need (assuming compression doesn't expand the image), but
+   * don't complain if we can't get the full amount we're after.
+   */
+
+  do {
+    int result;
+
+    old_header_req = header_storage_needed;
+    toiActiveAllocator->reserve_header_space(header_storage_needed);
+
+    /* How much storage is free with the reservation applied? */
+    storage_limit = toiActiveAllocator->storage_available();
+    seek = min(storage_limit, main_storage_needed(0, 0));
+
+    result = toiActiveAllocator->allocate_storage(seek);
+    if (result)
+      printk("Failed to allocate storage (%d).\n", result);
+
+    main_storage_allocated =
+      toiActiveAllocator->storage_allocated();
+
+    /* Need more header because more storage allocated? */
+    header_storage_needed = get_header_storage_needed(0);
+
+  } while (header_storage_needed > old_header_req);
+
+  if (freeze_kernel_threads())
+    set_abort_result(TOI_FREEZING_FAILED);
+
+recalc:
+  toi_recalculate_image_contents(0);
+}
+
+/* attempt_to_freeze
+ *
+ * Try to freeze processes.
+ */
+
+static int attempt_to_freeze(void)
+{
+  int result;
+
+  /* Stop processes before checking again */
+  toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
+      "filesystems.");
+  result = freeze_processes();
+
+  if (result)
+    set_abort_result(TOI_FREEZING_FAILED);
+
+  result = freeze_kernel_threads();
+
+  if (result)
+    set_abort_result(TOI_FREEZING_FAILED);
+
+  return result;
+}
+
+/* eat_memory
+ *
+ * Try to free some memory, either to meet hard or soft constraints on the image
+ * characteristics.
+ *
+ * Hard constraints:
+ * - Pageset1 must be < half of memory;
+ * - We must have enough memory free at resume time to have pageset1
+ *   be able to be loaded in pages that don't conflict with where it has to
+ *   be restored.
+ * Soft constraints
+ * - User specificied image size limit.
+ */
+static void eat_memory(void)
+{
+  unsigned long amount_wanted = 0;
+  int did_eat_memory = 0;
+
+  /*
+   * Note that if we have enough storage space and enough free memory, we
+   * may exit without eating anything. We give up when the last 10
+   * iterations ate no extra pages because we're not going to get much
+   * more anyway, but the few pages we get will take a lot of time.
+   *
+   * We freeze processes before beginning, and then unfreeze them if we
+   * need to eat memory until we think we have enough. If our attempts
+   * to freeze fail, we give up and abort.
+   */
+
+  amount_wanted = amount_needed(1);
+
+  switch (image_size_limit) {
+    case -1: /* Don't eat any memory */
+      if (amount_wanted > 0) {
+        set_abort_result(TOI_WOULD_EAT_MEMORY);
+        return;
+      }
+      break;
+    case -2:  /* Free caches only */
+      drop_pagecache();
+      toi_recalculate_image_contents(0);
+      amount_wanted = amount_needed(1);
+      break;
+    default:
+      break;
+  }
+
+  if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
+      image_size_limit != -1) {
+    unsigned long request = amount_wanted;
+    unsigned long high_req = max(highpages_ps1_to_free(),
+        any_to_free(1));
+    unsigned long low_req = lowpages_ps1_to_free();
+    unsigned long got = 0;
+
+    toi_prepare_status(CLEAR_BAR,
+        "Seeking to free %ldMB of memory.",
+        MB(amount_wanted));
+
+    thaw_kernel_threads();
+
+    /*
+     * Ask for too many because shrink_memory_mask doesn't
+     * currently return enough most of the time.
+     */
+
+    if (low_req)
+      got = shrink_memory_mask(low_req, GFP_KERNEL);
+    if (high_req)
+      shrink_memory_mask(high_req - got, GFP_HIGHUSER);
+
+    did_eat_memory = 1;
+
+    toi_recalculate_image_contents(0);
+
+    amount_wanted = amount_needed(1);
+
+    printk(KERN_DEBUG "Asked shrink_memory_mask for %ld low pages &"
+        " %ld pages from anywhere, got %ld.\n",
+        high_req, low_req,
+        request - amount_wanted);
+
+    toi_cond_pause(0, NULL);
+
+    if (freeze_kernel_threads())
+      set_abort_result(TOI_FREEZING_FAILED);
+  }
+
+  if (did_eat_memory)
+    toi_recalculate_image_contents(0);
+}
+
+/* toi_prepare_image
+ *
+ * Entry point to the whole image preparation section.
+ *
+ * We do four things:
+ * - Freeze processes;
+ * - Ensure image size constraints are met;
+ * - Complete all the preparation for saving the image,
+ *   including allocation of storage. The only memory
+ *   that should be needed when we're finished is that
+ *   for actually storing the image (and we know how
+ *   much is needed for that because the modules tell
+ *   us).
+ * - Make sure that all dirty buffers are written out.
+ */
+#define MAX_TRIES 2
+int toi_prepare_image(void)
+{
+  int result = 1, tries = 1;
+
+  main_storage_allocated = 0;
+
+  // Force recalculation of the amount of header storage needed for fs info.
+  fs_info_space_needed(1);
+
+  no_ps2_needed = 0;
+
+  if (attempt_to_freeze())
+    return 1;
+
+  lock_device_hotplug();
+  set_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+
+  if (!extra_pd1_pages_allowance)
+    get_extra_pd1_allowance();
+
+  storage_limit = toiActiveAllocator->storage_available();
+
+  if (!storage_limit) {
+    printk(KERN_INFO "No storage available. Didn't try to prepare "
+        "an image.\n");
+    display_failure_reason(0);
+    set_abort_result(TOI_NOSTORAGE_AVAILABLE);
+    return 1;
+  }
+
+  if (build_attention_list()) {
+    abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+        "Unable to successfully prepare the image.\n");
+    return 1;
+  }
+
+  toi_recalculate_image_contents(0);
+
+  do {
+    toi_prepare_status(CLEAR_BAR,
+        "Preparing Image. Try %d.", tries);
+
+    eat_memory();
+
+    if (test_result_state(TOI_ABORTED))
+      break;
+
+    update_image(0);
+
+    tries++;
+
+  } while (image_not_ready(1) && tries <= MAX_TRIES &&
+      !test_result_state(TOI_ABORTED));
+
+  result = image_not_ready(0);
+
+  /* TODO: Handle case where need to remove existing image and resave
+   * instead of adding to incremental image. */
+
+  if (!test_result_state(TOI_ABORTED)) {
+    if (result) {
+      display_stats(1, 0);
+      display_failure_reason(tries > MAX_TRIES);
+      abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+          "Unable to successfully prepare the image.\n");
+    } else {
+      /* Pageset 2 needed? */
+      if (!need_pageset2() &&
+          test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
+        no_ps2_needed = 1;
+        toi_recalculate_image_contents(0);
+        update_image(1);
+      }
+
+      toi_cond_pause(1, "Image preparation complete.");
+    }
+  }
+
+  return result ? result : allocate_checksum_pages();
+}
diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h
new file mode 100644
index 000000000..8870de2d4
--- /dev/null
+++ b/kernel/power/tuxonice_prepare_image.h
@@ -0,0 +1,38 @@
+/*
+ * kernel/power/tuxonice_prepare_image.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <asm/sections.h>
+
+extern int toi_prepare_image(void);
+extern void toi_recalculate_image_contents(int storage_available);
+extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask);
+extern long image_size_limit;
+extern void toi_free_extra_pagedir_memory(void);
+extern unsigned long extra_pd1_pages_allowance;
+extern void free_attention_list(void);
+
+#define MIN_FREE_RAM 100
+#define MIN_EXTRA_PAGES_ALLOWANCE 500
+
+#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
+#ifdef CONFIG_HIGHMEM
+#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
+      (1 << ZONE_HIGHMEM)))
+#else
+#define real_nr_free_high_pages() (0)
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
+
+/* For eat_memory function */
+#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
+#endif
+
+unsigned long get_header_storage_needed(int reset);
+unsigned long any_to_free(int use_image_size_limit);
+int try_allocate_extra_memory(void);
diff --git a/kernel/power/tuxonice_prune.c b/kernel/power/tuxonice_prune.c
new file mode 100644
index 000000000..f9b7f444f
--- /dev/null
+++ b/kernel/power/tuxonice_prune.c
@@ -0,0 +1,406 @@
+/*
+ * kernel/power/tuxonice_prune.c
+ *
+ * Copyright (C) 2012 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file implements a TuxOnIce module that seeks to prune the
+ * amount of data written to disk. It builds a table of hashes
+ * of the uncompressed data, and writes the pfn of the previous page
+ * with the same contents instead of repeating the data when a match
+ * is found.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/hash.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+/*
+ * We never write a page bigger than PAGE_SIZE, so use a large number
+ * to indicate that data is a PFN.
+ */
+#define PRUNE_DATA_IS_PFN (PAGE_SIZE + 100)
+
+static unsigned long toi_pruned_pages;
+
+static struct toi_module_ops toi_prune_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_prune_hash_algo_name[32] = "sha1";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+  struct shash_desc desc;
+  char *digest;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_prune_crypto_prepare(void)
+{
+  int cpu, ret, digestsize;
+
+  if (!*toi_prune_hash_algo_name) {
+    printk(KERN_INFO "TuxOnIce: Pruning enabled but no "
+        "hash algorithm set.\n");
+    return 1;
+  }
+
+  for_each_online_cpu(cpu) {
+    struct cpu_context *this = &per_cpu(contexts, cpu);
+    this->desc.tfm = crypto_alloc_shash(toi_prune_hash_algo_name, 0, 0);
+    if (IS_ERR(this->desc.tfm)) {
+      printk(KERN_INFO "TuxOnIce: Failed to allocate the "
+          "%s prune hash algorithm.\n",
+          toi_prune_hash_algo_name);
+      this->desc.tfm = NULL;
+      return 1;
+    }
+
+    if (!digestsize)
+      digestsize = crypto_shash_digestsize(this->desc.tfm);
+
+    this->digest = kmalloc(digestsize, GFP_KERNEL);
+    if (!this->digest) {
+      printk(KERN_INFO "TuxOnIce: Failed to allocate space "
+          "for digest output.\n");
+      crypto_free_shash(this->desc.tfm);
+      this->desc.tfm = NULL;
+    }
+
+    this->desc.flags = 0;
+
+    ret = crypto_shash_init(&this->desc);
+    if (ret < 0) {
+      printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+          "%s prune hash algorithm.\n",
+          toi_prune_hash_algo_name);
+      kfree(this->digest);
+      this->digest = NULL;
+      crypto_free_shash(this->desc.tfm);
+      this->desc.tfm = NULL;
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+static int toi_prune_rw_cleanup(int writing)
+{
+  int cpu;
+
+  for_each_online_cpu(cpu) {
+    struct cpu_context *this = &per_cpu(contexts, cpu);
+    if (this->desc.tfm) {
+      crypto_free_shash(this->desc.tfm);
+      this->desc.tfm = NULL;
+    }
+
+    if (this->digest) {
+      kfree(this->digest);
+      this->digest = NULL;
+    }
+  }
+
+  return 0;
+}
+
+/*
+ * toi_prune_init
+ */
+
+static int toi_prune_init(int toi_or_resume)
+{
+  if (!toi_or_resume)
+    return 0;
+
+  toi_pruned_pages = 0;
+
+  next_driver = toi_get_next_filter(&toi_prune_ops);
+
+  return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_prune_rw_init()
+ */
+
+static int toi_prune_rw_init(int rw, int stream_number)
+{
+  if (toi_prune_crypto_prepare()) {
+    printk(KERN_ERR "Failed to initialise prune "
+        "algorithm.\n");
+    if (rw == READ) {
+      printk(KERN_INFO "Unable to read the image.\n");
+      return -ENODEV;
+    } else {
+      printk(KERN_INFO "Continuing without "
+          "pruning the image.\n");
+      toi_prune_ops.enabled = 0;
+    }
+  }
+
+  return 0;
+}
+
+/*
+ * toi_prune_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be checked.
+ *
+ * Returns:        0 on success. Otherwise the error is that returned by later
+ *                 modules, -ECHILD if we have a broken pipeline or -EIO if
+ *                 zlib errs.
+ */
+static int toi_prune_write_page(unsigned long index, int buf_type,
+    void *buffer_page, unsigned int buf_size)
+{
+  int ret = 0, cpu = smp_processor_id(), write_data = 1;
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+  u8* output_buffer = buffer_page;
+  int output_len = buf_size;
+  int out_buf_type = buf_type;
+  void *buffer_start;
+  u32 buf[4];
+
+  if (ctx->desc.tfm) {
+
+    buffer_start = TOI_MAP(buf_type, buffer_page);
+    ctx->len = OUT_BUF_SIZE;
+
+    ret = crypto_shash_digest(&ctx->desc, buffer_start, buf_size, &ctx->digest);
+    if (ret) {
+      printk(KERN_INFO "TuxOnIce: Failed to calculate digest (%d).\n", ret);
+    } else {
+      mutex_lock(&stats_lock);
+
+      toi_pruned_pages++;
+
+      mutex_unlock(&stats_lock);
+
+    }
+
+    TOI_UNMAP(buf_type, buffer_page);
+  }
+
+  if (write_data)
+    ret = next_driver->write_page(index, out_buf_type,
+        output_buffer, output_len);
+  else
+    ret = next_driver->write_page(index, out_buf_type,
+        output_buffer, output_len);
+
+  return ret;
+}
+
+/*
+ * toi_prune_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules or from a previously loaded page and
+ * fill the input buffer.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_prune_read_page(unsigned long *index, int buf_type,
+    void *buffer_page, unsigned int *buf_size)
+{
+  int ret, cpu = smp_processor_id();
+  unsigned int len;
+  char *buffer_start;
+  struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+  if (!ctx->desc.tfm)
+    return next_driver->read_page(index, TOI_PAGE, buffer_page,
+        buf_size);
+
+  /*
+   * All our reads must be synchronous - we can't handle
+   * data that hasn't been read yet.
+   */
+
+  ret = next_driver->read_page(index, buf_type, buffer_page, &len);
+
+  if (len == PRUNE_DATA_IS_PFN) {
+    buffer_start = kmap(buffer_page);
+  }
+
+  return ret;
+}
+
+/*
+ * toi_prune_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_prune_print_debug_stats(char *buffer, int size)
+{
+  int len;
+
+  /* Output the number of pages pruned. */
+  if (*toi_prune_hash_algo_name)
+    len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+        toi_prune_hash_algo_name);
+  else
+    len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+  if (toi_pruned_pages)
+    len += scnprintf(buffer+len, size - len, "  Pruned "
+        "%lu pages).\n",
+        toi_pruned_pages);
+  return len;
+}
+
+/*
+ * toi_prune_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_prune_memory_needed(void)
+{
+  return 2 * PAGE_SIZE;
+}
+
+static int toi_prune_storage_needed(void)
+{
+  return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+    strlen(toi_prune_hash_algo_name) + 1;
+}
+
+/*
+ * toi_prune_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_prune_save_config_info(char *buffer)
+{
+  int len = strlen(toi_prune_hash_algo_name) + 1, offset = 0;
+
+  *((unsigned long *) buffer) = toi_pruned_pages;
+  offset += sizeof(unsigned long);
+  *((int *) (buffer + offset)) = len;
+  offset += sizeof(int);
+  strncpy(buffer + offset, toi_prune_hash_algo_name, len);
+  return offset + len;
+}
+
+/* toi_prune_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:        Reload information needed for passing back to the
+ * resumed kernel.
+ */
+static void toi_prune_load_config_info(char *buffer, int size)
+{
+  int len, offset = 0;
+
+  toi_pruned_pages = *((unsigned long *) buffer);
+  offset += sizeof(unsigned long);
+  len = *((int *) (buffer + offset));
+  offset += sizeof(int);
+  strncpy(toi_prune_hash_algo_name, buffer + offset, len);
+}
+
+static void toi_prune_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+  bkd->pruned_pages = toi_pruned_pages;
+}
+
+static void toi_prune_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+  toi_pruned_pages = bkd->pruned_pages;
+}
+
+/*
+ * toi_expected_ratio
+ *
+ * Description:        Returns the expected ratio between data passed into this module
+ *                 and the amount of data output when writing.
+ * Returns:        100 - we have no idea how many pages will be pruned.
+ */
+
+static int toi_prune_expected_ratio(void)
+{
+  return 100;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_INT("enabled", SYSFS_RW, &toi_prune_ops.enabled, 0, 1, 0,
+      NULL),
+  SYSFS_STRING("algorithm", SYSFS_RW, toi_prune_hash_algo_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_prune_ops = {
+  .type                        = FILTER_MODULE,
+  .name                        = "prune",
+  .directory                = "prune",
+  .module                        = THIS_MODULE,
+  .initialise                = toi_prune_init,
+  .memory_needed                 = toi_prune_memory_needed,
+  .print_debug_info        = toi_prune_print_debug_stats,
+  .save_config_info        = toi_prune_save_config_info,
+  .load_config_info        = toi_prune_load_config_info,
+  .storage_needed                = toi_prune_storage_needed,
+  .expected_compression        = toi_prune_expected_ratio,
+
+  .pre_atomic_restore        = toi_prune_pre_atomic_restore,
+  .post_atomic_restore        = toi_prune_post_atomic_restore,
+
+  .rw_init                = toi_prune_rw_init,
+  .rw_cleanup                = toi_prune_rw_cleanup,
+
+  .write_page                = toi_prune_write_page,
+  .read_page                = toi_prune_read_page,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_prune_load(void)
+{
+  return toi_register_module(&toi_prune_ops);
+}
+
+late_initcall(toi_prune_load);
diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c
new file mode 100644
index 000000000..f8a3e164d
--- /dev/null
+++ b/kernel/power/tuxonice_storage.c
@@ -0,0 +1,282 @@
+/*
+ * kernel/power/tuxonice_storage.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for talking to a userspace program that manages storage.
+ *
+ * The kernel side:
+ * - starts the userspace program;
+ * - sends messages telling it when to open and close the connection;
+ * - tells it when to quit;
+ *
+ * The user space side:
+ * - passes messages regarding status;
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_ui.h"
+
+static struct user_helper_data usm_helper_data;
+static struct toi_module_ops usm_ops;
+static int message_received, usm_prepare_count;
+static int storage_manager_last_action, storage_manager_action;
+
+static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+  int type;
+  int *data;
+
+  type = nlh->nlmsg_type;
+
+  /* A control message: ignore them */
+  if (type < NETLINK_MSG_BASE)
+    return 0;
+
+  /* Unknown message: reply with EINVAL */
+  if (type >= USM_MSG_MAX)
+    return -EINVAL;
+
+  /* All operations require privileges, even GET */
+  if (!capable(CAP_NET_ADMIN))
+    return -EPERM;
+
+  /* Only allow one task to receive NOFREEZE privileges */
+  if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
+    return -EBUSY;
+
+  data = (int *) NLMSG_DATA(nlh);
+
+  switch (type) {
+    case USM_MSG_SUCCESS:
+    case USM_MSG_FAILED:
+      message_received = type;
+      complete(&usm_helper_data.wait_for_process);
+      break;
+    default:
+      printk(KERN_INFO "Storage manager doesn't recognise "
+          "message %d.\n", type);
+  }
+
+  return 1;
+}
+
+#ifdef CONFIG_NET
+static int activations;
+
+int toi_activate_storage(int force)
+{
+  int tries = 1;
+
+  if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+    return 0;
+
+  message_received = 0;
+  activations++;
+
+  if (activations > 1 && !force)
+    return 0;
+
+  while ((!message_received || message_received == USM_MSG_FAILED) &&
+      tries < 2) {
+    toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
+        "%d.\n", tries);
+
+    init_completion(&usm_helper_data.wait_for_process);
+
+    toi_send_netlink_message(&usm_helper_data,
+        USM_MSG_CONNECT,
+        NULL, 0);
+
+    /* Wait 2 seconds for the userspace process to make contact */
+    wait_for_completion_timeout(&usm_helper_data.wait_for_process,
+        2*HZ);
+
+    tries++;
+  }
+
+  return 0;
+}
+
+int toi_deactivate_storage(int force)
+{
+  if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+    return 0;
+
+  message_received = 0;
+  activations--;
+
+  if (activations && !force)
+    return 0;
+
+  init_completion(&usm_helper_data.wait_for_process);
+
+  toi_send_netlink_message(&usm_helper_data,
+      USM_MSG_DISCONNECT,
+      NULL, 0);
+
+  wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
+
+  if (!message_received || message_received == USM_MSG_FAILED) {
+    printk(KERN_INFO "Returning failure disconnecting storage.\n");
+    return 1;
+  }
+
+  return 0;
+}
+#endif
+
+static void storage_manager_simulate(void)
+{
+  printk(KERN_INFO "--- Storage manager simulate ---\n");
+  toi_prepare_usm();
+  schedule();
+  printk(KERN_INFO "--- Activate storage 1 ---\n");
+  toi_activate_storage(1);
+  schedule();
+  printk(KERN_INFO "--- Deactivate storage 1 ---\n");
+  toi_deactivate_storage(1);
+  schedule();
+  printk(KERN_INFO "--- Cleanup usm ---\n");
+  toi_cleanup_usm();
+  schedule();
+  printk(KERN_INFO "--- Storage manager simulate ends ---\n");
+}
+
+static int usm_storage_needed(void)
+{
+  return sizeof(int) + strlen(usm_helper_data.program) + 1;
+}
+
+static int usm_save_config_info(char *buf)
+{
+  int len = strlen(usm_helper_data.program);
+  memcpy(buf, usm_helper_data.program, len + 1);
+  return sizeof(int) + len + 1;
+}
+
+static void usm_load_config_info(char *buf, int size)
+{
+  /* Don't load the saved path if one has already been set */
+  if (usm_helper_data.program[0])
+    return;
+
+  memcpy(usm_helper_data.program, buf + sizeof(int), *((int *) buf));
+}
+
+static int usm_memory_needed(void)
+{
+  /* ball park figure of 32 pages */
+  return 32 * PAGE_SIZE;
+}
+
+/* toi_prepare_usm
+*/
+int toi_prepare_usm(void)
+{
+  usm_prepare_count++;
+
+  if (usm_prepare_count > 1 || !usm_ops.enabled)
+    return 0;
+
+  usm_helper_data.pid = -1;
+
+  if (!*usm_helper_data.program)
+    return 0;
+
+  toi_netlink_setup(&usm_helper_data);
+
+  if (usm_helper_data.pid == -1)
+    printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
+        " start it.\n");
+
+  toi_activate_storage(0);
+
+  return usm_helper_data.pid != -1;
+}
+
+void toi_cleanup_usm(void)
+{
+  usm_prepare_count--;
+
+  if (usm_helper_data.pid > -1 && !usm_prepare_count) {
+    toi_deactivate_storage(0);
+    toi_netlink_close(&usm_helper_data);
+  }
+}
+
+static void storage_manager_activate(void)
+{
+  if (storage_manager_action == storage_manager_last_action)
+    return;
+
+  if (storage_manager_action)
+    toi_prepare_usm();
+  else
+    toi_cleanup_usm();
+
+  storage_manager_last_action = storage_manager_action;
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
+  SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
+  SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
+      NULL),
+  SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
+      0, storage_manager_activate)
+};
+
+static struct toi_module_ops usm_ops = {
+  .type                                = MISC_MODULE,
+  .name                                = "usm",
+  .directory                        = "storage_manager",
+  .module                                = THIS_MODULE,
+  .storage_needed                        = usm_storage_needed,
+  .save_config_info                = usm_save_config_info,
+  .load_config_info                = usm_load_config_info,
+  .memory_needed                        = usm_memory_needed,
+
+  .sysfs_data                        = sysfs_params,
+  .num_sysfs_entries                = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* toi_usm_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+int toi_usm_init(void)
+{
+  usm_helper_data.nl = NULL;
+  usm_helper_data.program[0] = '\0';
+  usm_helper_data.pid = -1;
+  usm_helper_data.skb_size = 0;
+  usm_helper_data.pool_limit = 6;
+  usm_helper_data.netlink_id = NETLINK_TOI_USM;
+  usm_helper_data.name = "userspace storage manager";
+  usm_helper_data.rcv_msg = usm_user_rcv_msg;
+  usm_helper_data.interface_version = 2;
+  usm_helper_data.must_init = 0;
+  init_completion(&usm_helper_data.wait_for_process);
+
+  return toi_register_module(&usm_ops);
+}
+
+void toi_usm_exit(void)
+{
+  toi_netlink_close_complete(&usm_helper_data);
+  toi_unregister_module(&usm_ops);
+}
diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h
new file mode 100644
index 000000000..db22c8362
--- /dev/null
+++ b/kernel/power/tuxonice_storage.h
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_storage.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_NET
+int toi_prepare_usm(void);
+void toi_cleanup_usm(void);
+
+int toi_activate_storage(int force);
+int toi_deactivate_storage(int force);
+extern int toi_usm_init(void);
+extern void toi_usm_exit(void);
+#else
+static inline int toi_usm_init(void) { return 0; }
+static inline void toi_usm_exit(void) { }
+
+static inline int toi_activate_storage(int force)
+{
+  return 0;
+}
+
+static inline int toi_deactivate_storage(int force)
+{
+  return 0;
+}
+
+static inline int toi_prepare_usm(void) { return 0; }
+static inline void toi_cleanup_usm(void) { }
+#endif
+
+enum {
+  USM_MSG_BASE = 0x10,
+
+  /* Kernel -> Userspace */
+  USM_MSG_CONNECT = 0x30,
+  USM_MSG_DISCONNECT = 0x31,
+  USM_MSG_SUCCESS = 0x40,
+  USM_MSG_FAILED = 0x41,
+
+  USM_MSG_MAX,
+};
diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c
new file mode 100644
index 000000000..1ba562cbb
--- /dev/null
+++ b/kernel/power/tuxonice_swap.c
@@ -0,0 +1,474 @@
+/*
+ * kernel/power/tuxonice_swap.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of swap space as a
+ * backing store.
+ */
+
+#include <linux/suspend.h>
+#include <linux/blkdev.h>
+#include <linux/swapops.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct toi_module_ops toi_swapops;
+
+/* For swapfile automatically swapon/off'd. */
+static char swapfilename[255] = "";
+static int toi_swapon_status;
+
+/* Swap Pages */
+static unsigned long swap_allocated;
+
+static struct sysinfo swapinfo;
+
+static int is_ram_backed(struct swap_info_struct *si)
+{
+  if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3) ||
+      !strncmp(si->bdev->bd_disk->disk_name, "zram", 4))
+    return 1;
+
+  return 0;
+}
+
+/**
+ * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
+ *
+ * Activate the given swapfile if it wasn't already enabled. Remember whether
+ * we really did swapon it for swapoffing later.
+ */
+static void enable_swapfile(void)
+{
+  int activateswapresult = -EINVAL;
+
+  if (swapfilename[0]) {
+    /* Attempt to swap on with maximum priority */
+    activateswapresult = sys_swapon(swapfilename, 0xFFFF);
+    if (activateswapresult && activateswapresult != -EBUSY)
+      printk(KERN_ERR "TuxOnIce: The swapfile/partition "
+          "specified by /sys/power/tuxonice/swap/swapfile"
+          " (%s) could not be turned on (error %d). "
+          "Attempting to continue.\n",
+          swapfilename, activateswapresult);
+    if (!activateswapresult)
+      toi_swapon_status = 1;
+  }
+}
+
+/**
+ * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
+ *
+ * If we did successfully swapon a file at the start of the cycle, swapoff
+ * it now (finishing up).
+ */
+static void disable_swapfile(void)
+{
+  if (!toi_swapon_status)
+    return;
+
+  sys_swapoff(swapfilename);
+  toi_swapon_status = 0;
+}
+
+static int add_blocks_to_extent_chain(struct toi_bdev_info *chain,
+    unsigned long start, unsigned long end)
+{
+  if (test_action_state(TOI_TEST_BIO))
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to "
+        "chain %p.", start << chain->bmap_shift,
+        end << chain->bmap_shift, chain);
+
+  return toi_add_to_extent_chain(&chain->blocks, start, end);
+}
+
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+  struct hibernate_extent *extentpointer = NULL;
+  unsigned long address, extent_min = 0, extent_max = 0;
+  int empty = 1;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for "
+      "chain %d.", chain->allocator_index);
+
+  if (!chain->allocations.first)
+    return 0;
+
+  if (chain->blocks.first)
+    toi_put_extent_chain(&chain->blocks);
+
+  toi_extent_for_each(&chain->allocations, extentpointer, address) {
+    swp_entry_t swap_address = (swp_entry_t) { address };
+    struct block_device *bdev;
+    sector_t new_sector = map_swap_entry(swap_address, &bdev);
+
+    if (empty) {
+      empty = 0;
+      extent_min = extent_max = new_sector;
+      continue;
+    }
+
+    if (new_sector == extent_max + 1) {
+      extent_max++;
+      continue;
+    }
+
+    if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+      printk(KERN_ERR "Out of memory while making block "
+          "chains.\n");
+      return -ENOMEM;
+    }
+
+    extent_min = new_sector;
+    extent_max = new_sector;
+  }
+
+  if (!empty &&
+      add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+    printk(KERN_ERR "Out of memory while making block chains.\n");
+    return -ENOMEM;
+  }
+
+  return 0;
+}
+
+/*
+ * Like si_swapinfo, except that we don't include ram backed swap (compcache!)
+ * and don't need to use the spinlocks (userspace is stopped when this
+ * function is called).
+ */
+void si_swapinfo_no_compcache(void)
+{
+  unsigned int i;
+
+  si_swapinfo(&swapinfo);
+  swapinfo.freeswap = 0;
+  swapinfo.totalswap = 0;
+
+  for (i = 0; i < MAX_SWAPFILES; i++) {
+    struct swap_info_struct *si = get_swap_info_struct(i);
+    if (si && (si->flags & SWP_WRITEOK) && !is_ram_backed(si)) {
+      swapinfo.totalswap += si->inuse_pages;
+      swapinfo.freeswap += si->pages - si->inuse_pages;
+    }
+  }
+}
+/*
+ * We can't just remember the value from allocation time, because other
+ * processes might have allocated swap in the mean time.
+ */
+static unsigned long toi_swap_storage_available(void)
+{
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available.");
+  si_swapinfo_no_compcache();
+  return swapinfo.freeswap + swap_allocated;
+}
+
+static int toi_swap_initialise(int starting_cycle)
+{
+  if (!starting_cycle)
+    return 0;
+
+  enable_swapfile();
+  return 0;
+}
+
+static void toi_swap_cleanup(int ending_cycle)
+{
+  if (!ending_cycle)
+    return;
+
+  disable_swapfile();
+}
+
+static void toi_swap_free_storage(struct toi_bdev_info *chain)
+{
+  /* Free swap entries */
+  struct hibernate_extent *extentpointer;
+  unsigned long extentvalue;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.",
+      chain);
+
+  swap_allocated -= chain->allocations.size;
+  toi_extent_for_each(&chain->allocations, extentpointer, extentvalue)
+    swap_free((swp_entry_t) { extentvalue });
+
+  toi_put_extent_chain(&chain->allocations);
+}
+
+static void free_swap_range(unsigned long min, unsigned long max)
+{
+  int j;
+
+  for (j = min; j <= max; j++)
+    swap_free((swp_entry_t) { j });
+  swap_allocated -= (max - min + 1);
+}
+
+/*
+ * Allocation of a single swap type. Swap priorities are handled at the higher
+ * level.
+ */
+static int toi_swap_allocate_storage(struct toi_bdev_info *chain,
+    unsigned long request)
+{
+  unsigned long gotten = 0;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "  Swap allocate storage: Asked to"
+      " allocate %lu pages from device %d.", request,
+      chain->allocator_index);
+
+  while (gotten < request) {
+    swp_entry_t start, end;
+    if (0) {
+      /* Broken at the moment for SSDs */
+      get_swap_range_of_type(chain->allocator_index, &start, &end,
+          request - gotten + 1);
+    } else {
+      start = end = get_swap_page_of_type(chain->allocator_index);
+    }
+    if (start.val) {
+      int added = end.val - start.val + 1;
+      if (toi_add_to_extent_chain(&chain->allocations,
+            start.val, end.val)) {
+        printk(KERN_INFO "Failed to allocate extent for "
+            "%lu-%lu.\n", start.val, end.val);
+        free_swap_range(start.val, end.val);
+        break;
+      }
+      gotten += added;
+      swap_allocated += added;
+    } else
+      break;
+  }
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "  Allocated %lu pages.", gotten);
+  return gotten;
+}
+
+static int toi_swap_register_storage(void)
+{
+  int i, result = 0;
+
+  toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage.");
+  for (i = 0; i < MAX_SWAPFILES; i++) {
+    struct swap_info_struct *si = get_swap_info_struct(i);
+    struct toi_bdev_info *devinfo;
+    unsigned char *p;
+    unsigned char buf[256];
+    struct fs_info *fs_info;
+
+    if (!si || !(si->flags & SWP_WRITEOK) || is_ram_backed(si))
+      continue;
+
+    devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info),
+        GFP_ATOMIC);
+    if (!devinfo) {
+      printk("Failed to allocate devinfo struct for swap "
+          "device %d.\n", i);
+      return -ENOMEM;
+    }
+
+    devinfo->bdev = si->bdev;
+    devinfo->allocator = &toi_swapops;
+    devinfo->allocator_index = i;
+
+    fs_info = fs_info_from_block_dev(si->bdev);
+    if (fs_info && !IS_ERR(fs_info)) {
+      memcpy(devinfo->uuid, &fs_info->uuid, 16);
+      free_fs_info(fs_info);
+    } else
+      result = (int) PTR_ERR(fs_info);
+
+    if (!fs_info)
+      printk("fs_info from block dev returned %d.\n", result);
+    devinfo->dev_t = si->bdev->bd_dev;
+    devinfo->prio = si->prio;
+    devinfo->bmap_shift = 3;
+    devinfo->blocks_per_page = 1;
+
+    p = d_path(&si->swap_file->f_path, buf, sizeof(buf));
+    sprintf(devinfo->name, "swap on %s", p);
+
+    toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:"
+        " Device %d (%lx), prio %d.", i,
+        (unsigned long) devinfo->dev_t, devinfo->prio);
+    toi_bio_ops.register_storage(devinfo);
+  }
+
+  return 0;
+}
+
+static unsigned long toi_swap_free_unused_storage(struct toi_bdev_info *chain, unsigned long used)
+{
+  struct hibernate_extent *extentpointer = NULL;
+  unsigned long extentvalue;
+  unsigned long i = 0, first_freed = 0;
+
+  toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) {
+    i++;
+    if (i > used) {
+      swap_free((swp_entry_t) { extentvalue });
+      if (!first_freed)
+        first_freed = extentvalue;
+    }
+  }
+
+  return first_freed;
+}
+
+/*
+ * workspace_size
+ *
+ * Description:
+ * Returns the number of bytes of RAM needed for this
+ * code to do its work. (Used when calculating whether
+ * we have enough memory to be able to hibernate & resume).
+ *
+ */
+static int toi_swap_memory_needed(void)
+{
+  return 1;
+}
+
+/*
+ * Print debug info
+ *
+ * Description:
+ */
+static int toi_swap_print_debug_stats(char *buffer, int size)
+{
+  int len = 0;
+
+  len = scnprintf(buffer, size, "- Swap Allocator enabled.\n");
+  if (swapfilename[0])
+    len += scnprintf(buffer+len, size-len,
+        "  Attempting to automatically swapon: %s.\n",
+        swapfilename);
+
+  si_swapinfo_no_compcache();
+
+  len += scnprintf(buffer+len, size-len,
+      "  Swap available for image: %lu pages.\n",
+      swapinfo.freeswap + swap_allocated);
+
+  return len;
+}
+
+static int header_locations_read_sysfs(const char *page, int count)
+{
+  int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
+  struct inode *swapf = NULL;
+  int zone;
+  char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
+  char *path, *output = (char *) page;
+  int path_len;
+
+  if (!page)
+    return 0;
+
+  for (i = 0; i < MAX_SWAPFILES; i++) {
+    struct swap_info_struct *si =  get_swap_info_struct(i);
+
+    if (!si || !(si->flags & SWP_WRITEOK))
+      continue;
+
+    if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
+      haveswap = 1;
+      if (!printedpartitionsmessage) {
+        len += sprintf(output + len,
+            "For swap partitions, simply use the "
+            "format: resume=swap:/dev/hda1.\n");
+        printedpartitionsmessage = 1;
+      }
+    } else {
+      path_len = 0;
+
+      path = d_path(&si->swap_file->f_path, path_page,
+          PAGE_SIZE);
+      path_len = snprintf(path_page, PAGE_SIZE, "%s", path);
+
+      haveswap = 1;
+      swapf = si->swap_file->f_mapping->host;
+      zone = bmap(swapf, 0);
+      if (!zone) {
+        len += sprintf(output + len,
+            "Swapfile %s has been corrupted. Reuse"
+            " mkswap on it and try again.\n",
+            path_page);
+      } else {
+        char name_buffer[BDEVNAME_SIZE];
+        len += sprintf(output + len,
+            "For swapfile `%s`,"
+            " use resume=swap:/dev/%s:0x%x.\n",
+            path_page,
+            bdevname(si->bdev, name_buffer),
+            zone << (swapf->i_blkbits - 9));
+      }
+    }
+  }
+
+  if (!haveswap)
+    len = sprintf(output, "You need to turn on swap partitions "
+        "before examining this file.\n");
+
+  toi_free_page(10, (unsigned long) path_page);
+  return len;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
+  SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
+      header_locations_read_sysfs, NULL, 0, NULL),
+  SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
+      attempt_to_parse_resume_device2),
+};
+
+static struct toi_bio_allocator_ops toi_bio_swapops = {
+  .register_storage                        = toi_swap_register_storage,
+  .storage_available                        = toi_swap_storage_available,
+  .allocate_storage                        = toi_swap_allocate_storage,
+  .bmap                                        = get_main_pool_phys_params,
+  .free_storage                                = toi_swap_free_storage,
+  .free_unused_storage                    = toi_swap_free_unused_storage,
+};
+
+static struct toi_module_ops toi_swapops = {
+  .type                                        = BIO_ALLOCATOR_MODULE,
+  .name                                        = "swap storage",
+  .directory                                = "swap",
+  .module                                        = THIS_MODULE,
+  .memory_needed                                = toi_swap_memory_needed,
+  .print_debug_info                        = toi_swap_print_debug_stats,
+  .initialise                                = toi_swap_initialise,
+  .cleanup                                = toi_swap_cleanup,
+  .bio_allocator_ops                        = &toi_bio_swapops,
+
+  .sysfs_data                = sysfs_params,
+  .num_sysfs_entries        = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_swap_load(void)
+{
+  return toi_register_module(&toi_swapops);
+}
+
+late_initcall(toi_swap_load);
diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c
new file mode 100644
index 000000000..99d270040
--- /dev/null
+++ b/kernel/power/tuxonice_sysfs.c
@@ -0,0 +1,333 @@
+/*
+ * kernel/power/tuxonice_sysfs.c
+ *
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains support for sysfs entries for tuning TuxOnIce.
+ *
+ * We have a generic handler that deals with the most common cases, and
+ * hooks for special handlers to use.
+ */
+
+#include <linux/suspend.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_alloc.h"
+
+static int toi_sysfs_initialised;
+
+static void toi_initialise_sysfs(void);
+
+static struct toi_sysfs_data sysfs_params[];
+
+#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
+
+static void toi_main_wrapper(void)
+{
+  toi_try_hibernate();
+}
+
+static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
+    char *page)
+{
+  struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+  int len = 0;
+  int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
+
+  if (full_prep && toi_start_anything(0))
+    return -EBUSY;
+
+  if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+    toi_prepare_usm();
+
+  switch (sysfs_data->type) {
+    case TOI_SYSFS_DATA_CUSTOM:
+      len = (sysfs_data->data.special.read_sysfs) ?
+        (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
+        : 0;
+      break;
+    case TOI_SYSFS_DATA_BIT:
+      len = sprintf(page, "%d\n",
+          -test_bit(sysfs_data->data.bit.bit,
+            sysfs_data->data.bit.bit_vector));
+      break;
+    case TOI_SYSFS_DATA_INTEGER:
+      len = sprintf(page, "%d\n",
+          *(sysfs_data->data.integer.variable));
+      break;
+    case TOI_SYSFS_DATA_LONG:
+      len = sprintf(page, "%ld\n",
+          *(sysfs_data->data.a_long.variable));
+      break;
+    case TOI_SYSFS_DATA_UL:
+      len = sprintf(page, "%lu\n",
+          *(sysfs_data->data.ul.variable));
+      break;
+    case TOI_SYSFS_DATA_STRING:
+      len = sprintf(page, "%s\n",
+          sysfs_data->data.string.variable);
+      break;
+  }
+
+  if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+    toi_cleanup_usm();
+
+  if (full_prep)
+    toi_finish_anything(0);
+
+  return len;
+}
+
+#define BOUND(_variable, _type) do { \
+  if (*_variable < sysfs_data->data._type.minimum) \
+  *_variable = sysfs_data->data._type.minimum; \
+  else if (*_variable > sysfs_data->data._type.maximum) \
+  *_variable = sysfs_data->data._type.maximum; \
+} while (0)
+
+static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
+    const char *my_buf, size_t count)
+{
+  int assigned_temp_buffer = 0, result = count;
+  struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+
+  if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
+    return -EBUSY;
+
+  ((char *) my_buf)[count] = 0;
+
+  if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+    toi_prepare_usm();
+
+  switch (sysfs_data->type) {
+    case TOI_SYSFS_DATA_CUSTOM:
+      if (sysfs_data->data.special.write_sysfs)
+        result = (sysfs_data->data.special.write_sysfs)(my_buf,
+            count);
+      break;
+    case TOI_SYSFS_DATA_BIT:
+      {
+        unsigned long value;
+        result = kstrtoul(my_buf, 0, &value);
+        if (result)
+          break;
+        if (value)
+          set_bit(sysfs_data->data.bit.bit,
+              (sysfs_data->data.bit.bit_vector));
+        else
+          clear_bit(sysfs_data->data.bit.bit,
+              (sysfs_data->data.bit.bit_vector));
+      }
+      break;
+    case TOI_SYSFS_DATA_INTEGER:
+      {
+        long temp;
+        result = kstrtol(my_buf, 0, &temp);
+        if (result)
+          break;
+        *(sysfs_data->data.integer.variable) = (int) temp;
+        BOUND(sysfs_data->data.integer.variable, integer);
+        break;
+      }
+    case TOI_SYSFS_DATA_LONG:
+      {
+        long *variable =
+          sysfs_data->data.a_long.variable;
+        result = kstrtol(my_buf, 0, variable);
+        if (result)
+          break;
+        BOUND(variable, a_long);
+        break;
+      }
+    case TOI_SYSFS_DATA_UL:
+      {
+        unsigned long *variable =
+          sysfs_data->data.ul.variable;
+        result = kstrtoul(my_buf, 0, variable);
+        if (result)
+          break;
+        BOUND(variable, ul);
+        break;
+      }
+      break;
+    case TOI_SYSFS_DATA_STRING:
+      {
+        int copy_len = count;
+        char *variable =
+          sysfs_data->data.string.variable;
+
+        if (sysfs_data->data.string.max_length &&
+            (copy_len > sysfs_data->data.string.max_length))
+          copy_len = sysfs_data->data.string.max_length;
+
+        if (!variable) {
+          variable = (char *) toi_get_zeroed_page(31,
+              TOI_ATOMIC_GFP);
+          sysfs_data->data.string.variable = variable;
+          assigned_temp_buffer = 1;
+        }
+        strncpy(variable, my_buf, copy_len);
+        if (copy_len && my_buf[copy_len - 1] == '\n')
+          variable[count - 1] = 0;
+        variable[count] = 0;
+      }
+      break;
+  }
+
+  if (!result)
+    result = count;
+
+  /* Side effect routine? */
+  if (result == count && sysfs_data->write_side_effect)
+    sysfs_data->write_side_effect();
+
+  /* Free temporary buffers */
+  if (assigned_temp_buffer) {
+    toi_free_page(31,
+        (unsigned long) sysfs_data->data.string.variable);
+    sysfs_data->data.string.variable = NULL;
+  }
+
+  if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+    toi_cleanup_usm();
+
+  toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
+
+  return result;
+}
+
+static struct sysfs_ops toi_sysfs_ops = {
+  .show        = &toi_attr_show,
+  .store        = &toi_attr_store,
+};
+
+static struct kobj_type toi_ktype = {
+  .sysfs_ops        = &toi_sysfs_ops,
+};
+
+struct kobject *tuxonice_kobj;
+
+/* Non-module sysfs entries.
+ *
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+  SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
+      SYSFS_HIBERNATING, toi_main_wrapper),
+  SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
+      SYSFS_RESUMING, toi_try_resume)
+};
+
+void remove_toi_sysdir(struct kobject *kobj)
+{
+  if (!kobj)
+    return;
+
+  kobject_put(kobj);
+}
+
+struct kobject *make_toi_sysdir(char *name)
+{
+  struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
+
+  if (!kobj) {
+    printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
+        "dir!\n");
+    return NULL;
+  }
+
+  kobj->ktype = &toi_ktype;
+
+  return kobj;
+}
+
+/* toi_register_sysfs_file
+ *
+ * Helper for registering a new /sysfs/tuxonice entry.
+ */
+
+int toi_register_sysfs_file(
+    struct kobject *kobj,
+    struct toi_sysfs_data *toi_sysfs_data)
+{
+  int result;
+
+  if (!toi_sysfs_initialised)
+    toi_initialise_sysfs();
+
+  result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
+  if (result)
+    printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
+        "returned %d.\n",
+        toi_sysfs_data->attr.name, result);
+  kobj->ktype = &toi_ktype;
+
+  return result;
+}
+
+/* toi_unregister_sysfs_file
+ *
+ * Helper for removing unwanted /sys/power/tuxonice entries.
+ *
+ */
+void toi_unregister_sysfs_file(struct kobject *kobj,
+    struct toi_sysfs_data *toi_sysfs_data)
+{
+  sysfs_remove_file(kobj, &toi_sysfs_data->attr);
+}
+
+void toi_cleanup_sysfs(void)
+{
+  int i,
+      numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+  if (!toi_sysfs_initialised)
+    return;
+
+  for (i = 0; i < numfiles; i++)
+    toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+  kobject_put(tuxonice_kobj);
+  toi_sysfs_initialised = 0;
+}
+
+/* toi_initialise_sysfs
+ *
+ * Initialise the /sysfs/tuxonice directory.
+ */
+
+static void toi_initialise_sysfs(void)
+{
+  int i;
+  int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+  if (toi_sysfs_initialised)
+    return;
+
+  /* Make our TuxOnIce directory a child of /sys/power */
+  tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
+  if (!tuxonice_kobj)
+    return;
+
+  toi_sysfs_initialised = 1;
+
+  for (i = 0; i < numfiles; i++)
+    toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+}
+
+int toi_sysfs_init(void)
+{
+  toi_initialise_sysfs();
+  return 0;
+}
+
+void toi_sysfs_exit(void)
+{
+  toi_cleanup_sysfs();
+}
diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h
new file mode 100644
index 000000000..2a9267e9d
--- /dev/null
+++ b/kernel/power/tuxonice_sysfs.h
@@ -0,0 +1,137 @@
+/*
+ * kernel/power/tuxonice_sysfs.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sysfs.h>
+
+struct toi_sysfs_data {
+  struct attribute attr;
+  int type;
+  int flags;
+  union {
+    struct {
+      unsigned long *bit_vector;
+      int bit;
+    } bit;
+    struct {
+      int *variable;
+      int minimum;
+      int maximum;
+    } integer;
+    struct {
+      long *variable;
+      long minimum;
+      long maximum;
+    } a_long;
+    struct {
+      unsigned long *variable;
+      unsigned long minimum;
+      unsigned long maximum;
+    } ul;
+    struct {
+      char *variable;
+      int max_length;
+    } string;
+    struct {
+      int (*read_sysfs) (const char *buffer, int count);
+      int (*write_sysfs) (const char *buffer, int count);
+      void *data;
+    } special;
+  } data;
+
+  /* Side effects routine. Used, eg, for reparsing the
+   * resume= entry when it changes */
+  void (*write_side_effect) (void);
+  struct list_head sysfs_data_list;
+};
+
+enum {
+  TOI_SYSFS_DATA_NONE = 1,
+  TOI_SYSFS_DATA_CUSTOM,
+  TOI_SYSFS_DATA_BIT,
+  TOI_SYSFS_DATA_INTEGER,
+  TOI_SYSFS_DATA_UL,
+  TOI_SYSFS_DATA_LONG,
+  TOI_SYSFS_DATA_STRING
+};
+
+#define SYSFS_WRITEONLY 0200
+#define SYSFS_READONLY 0444
+#define SYSFS_RW 0644
+
+#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_BIT, \
+  .flags = _flags, \
+  .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
+
+#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_INTEGER, \
+  .flags = _flags, \
+  .data = { .integer = { .variable = _int, .minimum = _min, \
+    .maximum = _max } }, \
+  .write_side_effect = _wse }
+
+#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_UL, \
+  .flags = _flags, \
+  .data = { .ul = { .variable = _ul, .minimum = _min, \
+    .maximum = _max } } }
+
+#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_LONG, \
+  .flags = _flags, \
+  .data = { .a_long = { .variable = _long, .minimum = _min, \
+    .maximum = _max } } }
+
+#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_STRING, \
+  .flags = _flags, \
+  .data = { .string = { .variable = _string, .max_length = _max_len } }, \
+  .write_side_effect = _wse }
+
+#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
+  .attr = {.name  = _name , .mode   = _mode }, \
+  .type = TOI_SYSFS_DATA_CUSTOM, \
+  .flags = _flags, \
+  .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
+  .write_side_effect = _wse }
+
+#define SYSFS_NONE(_name, _wse) { \
+  .attr = {.name  = _name , .mode   = SYSFS_WRITEONLY }, \
+  .type = TOI_SYSFS_DATA_NONE, \
+  .write_side_effect = _wse, \
+}
+
+/* Flags */
+#define SYSFS_NEEDS_SM_FOR_READ 1
+#define SYSFS_NEEDS_SM_FOR_WRITE 2
+#define SYSFS_HIBERNATE 4
+#define SYSFS_RESUME 8
+#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
+#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_NEEDS_SM_FOR_BOTH \
+  (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
+
+int toi_register_sysfs_file(struct kobject *kobj,
+    struct toi_sysfs_data *toi_sysfs_data);
+void toi_unregister_sysfs_file(struct kobject *kobj,
+    struct toi_sysfs_data *toi_sysfs_data);
+
+extern struct kobject *tuxonice_kobj;
+
+struct kobject *make_toi_sysdir(char *name);
+void remove_toi_sysdir(struct kobject *obj);
+extern void toi_cleanup_sysfs(void);
+
+extern int toi_sysfs_init(void);
+extern void toi_sysfs_exit(void);
diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c
new file mode 100644
index 000000000..8cb678d90
--- /dev/null
+++ b/kernel/power/tuxonice_ui.c
@@ -0,0 +1,247 @@
+/*
+ * kernel/power/tuxonice_ui.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/reboot.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_builtin.h"
+
+static char local_printf_buf[1024];        /* Same as printk - should be safe */
+struct ui_ops *toi_current_ui;
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress, either from userui or /dev/console if userui isn't
+ * available. The non-userui path is particularly for at boot-time, prior
+ * to userui being started, when we have an important warning to give to
+ * the user.
+ */
+static char toi_wait_for_keypress(int timeout)
+{
+  if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
+    return ' ';
+
+  return toi_wait_for_keypress_dev_console(timeout);
+}
+
+/* toi_early_boot_message()
+ * Description:        Handle errors early in the process of booting.
+ *                 The user may press C to continue booting, perhaps
+ *                 invalidating the image,  or space to reboot.
+ *                 This works from either the serial console or normally
+ *                 attached keyboard.
+ *
+ *                 Note that we come in here from init, while the kernel is
+ *                 locked. If we want to get events from the serial console,
+ *                 we need to temporarily unlock the kernel.
+ *
+ *                 toi_early_boot_message may also be called post-boot.
+ *                 In this case, it simply printks the message and returns.
+ *
+ * Arguments:        int        Whether we are able to erase the image.
+ *                 int        default_answer. What to do when we timeout. This
+ *                         will normally be continue, but the user might
+ *                         provide command line options (__setup) to override
+ *                         particular cases.
+ *                 Char *. Pointer to a string explaining why we're moaning.
+ */
+
+#define say(message, a...) printk(KERN_EMERG message, ##a)
+
+void toi_early_boot_message(int message_detail, int default_answer,
+    char *warning_reason, ...)
+{
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+  unsigned long orig_state = get_toi_state(), continue_req = 0;
+  unsigned long orig_loglevel = console_loglevel;
+  int can_ask = 1;
+#else
+  int can_ask = 0;
+#endif
+
+  va_list args;
+  int printed_len;
+
+  if (!toi_wait) {
+    set_toi_state(TOI_CONTINUE_REQ);
+    can_ask = 0;
+  }
+
+  if (warning_reason) {
+    va_start(args, warning_reason);
+    printed_len = vsnprintf(local_printf_buf,
+        sizeof(local_printf_buf),
+        warning_reason,
+        args);
+    va_end(args);
+  }
+
+  if (!test_toi_state(TOI_BOOT_TIME)) {
+    printk("TuxOnIce: %s\n", local_printf_buf);
+    return;
+  }
+
+  if (!can_ask) {
+    continue_req = !!default_answer;
+    goto post_ask;
+  }
+
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+  console_loglevel = 7;
+
+  say("=== TuxOnIce ===\n\n");
+  if (warning_reason) {
+    say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
+    switch (message_detail) {
+      case 0:
+        say("If you continue booting, note that any image WILL"
+            "NOT BE REMOVED.\nTuxOnIce is unable to do so "
+            "because the appropriate modules aren't\n"
+            "loaded. You should manually remove the image "
+            "to avoid any\npossibility of corrupting your "
+            "filesystem(s) later.\n");
+        break;
+      case 1:
+        say("If you want to use the current TuxOnIce image, "
+            "reboot and try\nagain with the same kernel "
+            "that you hibernated from. If you want\n"
+            "to forget that image, continue and the image "
+            "will be erased.\n");
+        break;
+    }
+    say("Press SPACE to reboot or C to continue booting with "
+        "this kernel\n\n");
+    if (toi_wait > 0)
+      say("Default action if you don't select one in %d "
+          "seconds is: %s.\n",
+          toi_wait,
+          default_answer == TOI_CONTINUE_REQ ?
+          "continue booting" : "reboot");
+  } else {
+    say("BIG FAT WARNING!!\n\n"
+        "You have tried to resume from this image before.\n"
+        "If it failed once, it may well fail again.\n"
+        "Would you like to remove the image and boot "
+        "normally?\nThis will be equivalent to entering "
+        "noresume on the\nkernel command line.\n\n"
+        "Press SPACE to remove the image or C to continue "
+        "resuming.\n\n");
+    if (toi_wait > 0)
+      say("Default action if you don't select one in %d "
+          "seconds is: %s.\n", toi_wait,
+          !!default_answer ?
+          "continue resuming" : "remove the image");
+  }
+  console_loglevel = orig_loglevel;
+
+  set_toi_state(TOI_SANITY_CHECK_PROMPT);
+  clear_toi_state(TOI_CONTINUE_REQ);
+
+  if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
+    continue_req = !!default_answer;
+  else
+    continue_req = test_toi_state(TOI_CONTINUE_REQ);
+
+#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
+
+post_ask:
+  if ((warning_reason) && (!continue_req))
+    kernel_restart(NULL);
+
+  restore_toi_state(orig_state);
+  if (continue_req)
+    set_toi_state(TOI_CONTINUE_REQ);
+}
+
+#undef say
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+  SYSFS_INT("default_console_level", SYSFS_RW,
+      &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
+  SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
+      1 << 30, 0),
+  SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
+      0)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+  .type                                = MISC_HIDDEN_MODULE,
+  .name                                = "printk ui",
+  .directory                        = "user_interface",
+  .module                                = THIS_MODULE,
+  .sysfs_data                        = sysfs_params,
+  .num_sysfs_entries                = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+int toi_register_ui_ops(struct ui_ops *this_ui)
+{
+  if (toi_current_ui) {
+    printk(KERN_INFO "Only one TuxOnIce user interface module can "
+        "be loaded at a time.");
+    return -EBUSY;
+  }
+
+  toi_current_ui = this_ui;
+
+  return 0;
+}
+
+void toi_remove_ui_ops(struct ui_ops *this_ui)
+{
+  if (toi_current_ui != this_ui)
+    return;
+
+  toi_current_ui = NULL;
+}
+
+/* toi_console_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+
+int toi_ui_init(void)
+{
+  return toi_register_module(&userui_ops);
+}
+
+void toi_ui_exit(void)
+{
+  toi_unregister_module(&userui_ops);
+}
diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h
new file mode 100644
index 000000000..fb0668ed2
--- /dev/null
+++ b/kernel/power/tuxonice_ui.h
@@ -0,0 +1,97 @@
+/*
+ * kernel/power/tuxonice_ui.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ */
+
+enum {
+  DONT_CLEAR_BAR,
+  CLEAR_BAR
+};
+
+enum {
+  /* Userspace -> Kernel */
+  USERUI_MSG_ABORT = 0x11,
+  USERUI_MSG_SET_STATE = 0x12,
+  USERUI_MSG_GET_STATE = 0x13,
+  USERUI_MSG_GET_DEBUG_STATE = 0x14,
+  USERUI_MSG_SET_DEBUG_STATE = 0x15,
+  USERUI_MSG_SPACE = 0x18,
+  USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
+  USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
+  USERUI_MSG_GET_LOGLEVEL = 0x1C,
+  USERUI_MSG_SET_LOGLEVEL = 0x1D,
+  USERUI_MSG_PRINTK = 0x1E,
+
+  /* Kernel -> Userspace */
+  USERUI_MSG_MESSAGE = 0x21,
+  USERUI_MSG_PROGRESS = 0x22,
+  USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
+
+  USERUI_MSG_MAX,
+};
+
+struct userui_msg_params {
+  u32 a, b, c, d;
+  char text[255];
+};
+
+struct ui_ops {
+  char (*wait_for_key) (int timeout);
+  u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
+  void (*prepare_status) (int clearbar, const char *fmt, ...);
+  void (*cond_pause) (int pause, char *message);
+  void (*abort)(int result_code, const char *fmt, ...);
+  void (*prepare)(void);
+  void (*cleanup)(void);
+  void (*message)(u32 section, u32 level, u32 normally_logged,
+      const char *fmt, ...);
+};
+
+extern struct ui_ops *toi_current_ui;
+
+#define toi_update_status(val, max, fmt, args...) \
+  (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
+   max)
+
+#define toi_prepare_console(void) \
+  do { if (toi_current_ui) \
+    (toi_current_ui->prepare)(); \
+  } while (0)
+
+#define toi_cleanup_console(void) \
+  do { if (toi_current_ui) \
+    (toi_current_ui->cleanup)(); \
+  } while (0)
+
+#define abort_hibernate(result, fmt, args...) \
+  do { if (toi_current_ui) \
+    (toi_current_ui->abort)(result, fmt, ##args); \
+    else { \
+      set_abort_result(result); \
+    } \
+  } while (0)
+
+#define toi_cond_pause(pause, message) \
+  do { if (toi_current_ui) \
+    (toi_current_ui->cond_pause)(pause, message); \
+  } while (0)
+
+#define toi_prepare_status(clear, fmt, args...) \
+  do { if (toi_current_ui) \
+    (toi_current_ui->prepare_status)(clear, fmt, ##args); \
+    else \
+    printk(KERN_INFO fmt "%s", ##args, "\n"); \
+  } while (0)
+
+#define toi_message(sn, lev, log, fmt, a...) \
+  do { \
+    if (toi_current_ui && (!sn || test_debug_state(sn))) \
+    toi_current_ui->message(sn, lev, log, fmt, ##a); \
+  } while (0)
+
+__exit void toi_ui_cleanup(void);
+extern int toi_ui_init(void);
+extern void toi_ui_exit(void);
+extern int toi_register_ui_ops(struct ui_ops *this_ui);
+extern void toi_remove_ui_ops(struct ui_ops *this_ui);
diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c
new file mode 100644
index 000000000..7f9d93edb
--- /dev/null
+++ b/kernel/power/tuxonice_userui.c
@@ -0,0 +1,658 @@
+/*
+ * kernel/power/user_ui.c
+ *
+ * Copyright (C) 2005-2007 Bernard Blackham
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/reboot.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/vt.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+
+static char local_printf_buf[1024];        /* Same as printk - should be safe */
+
+static struct user_helper_data ui_helper_data;
+static struct toi_module_ops userui_ops;
+static int orig_kmsg;
+
+static char lastheader[512];
+static int lastheader_message_len;
+static int ui_helper_changed; /* Used at resume-time so don't overwrite value
+                                 set from initrd/ramfs. */
+
+/* Number of distinct progress amounts that userspace can display */
+static int progress_granularity = 30;
+
+static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
+static int userui_wait_should_wake;
+
+#define toi_stop_waiting_for_userui_key() \
+{ \
+  userui_wait_should_wake = true; \
+  wake_up_interruptible(&userui_wait_for_key); \
+}
+
+/**
+ * ui_nl_set_state - Update toi_action based on a message from userui.
+ *
+ * @n: The bit (1 << bit) to set.
+ */
+static void ui_nl_set_state(int n)
+{
+  /* Only let them change certain settings */
+  static const u32 toi_action_mask =
+    (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
+    (1 << TOI_LOGALL) |
+    (1 << TOI_SINGLESTEP) |
+    (1 << TOI_PAUSE_NEAR_PAGESET_END);
+  static unsigned long new_action;
+
+  new_action = (toi_bkd.toi_action & (~toi_action_mask)) |
+    (n & toi_action_mask);
+
+  printk(KERN_DEBUG "n is %x. Action flags being changed from %lx "
+      "to %lx.", n, toi_bkd.toi_action, new_action);
+  toi_bkd.toi_action = new_action;
+
+  if (!test_action_state(TOI_PAUSE) &&
+      !test_action_state(TOI_SINGLESTEP))
+    toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_post_atomic_restore - Tell userui that atomic restore just happened.
+ *
+ * Tell userui that atomic restore just occured, so that it can do things like
+ * redrawing the screen, re-getting settings and so on.
+ */
+static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+  toi_send_netlink_message(&ui_helper_data,
+      USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
+}
+
+/**
+ * userui_storage_needed - Report how much memory in image header is needed.
+ */
+static int userui_storage_needed(void)
+{
+  return sizeof(ui_helper_data.program) + 1 + sizeof(int);
+}
+
+/**
+ * userui_save_config_info - Fill buffer with config info for image header.
+ *
+ * @buf: Buffer into which to put the config info we want to save.
+ */
+static int userui_save_config_info(char *buf)
+{
+  *((int *) buf) = progress_granularity;
+  memcpy(buf + sizeof(int), ui_helper_data.program,
+      sizeof(ui_helper_data.program));
+  return sizeof(ui_helper_data.program) + sizeof(int) + 1;
+}
+
+/**
+ * userui_load_config_info - Restore config info from buffer.
+ *
+ * @buf: Buffer containing header info loaded.
+ * @size: Size of data loaded for this module.
+ */
+static void userui_load_config_info(char *buf, int size)
+{
+  progress_granularity = *((int *) buf);
+  size -= sizeof(int);
+
+  /* Don't load the saved path if one has already been set */
+  if (ui_helper_changed)
+    return;
+
+  if (size > sizeof(ui_helper_data.program))
+    size = sizeof(ui_helper_data.program);
+
+  memcpy(ui_helper_data.program, buf + sizeof(int), size);
+  ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
+}
+
+/**
+ * set_ui_program_set: Record that userui program was changed.
+ *
+ * Side effect routine for when the userui program is set. In an initrd or
+ * ramfs, the user may set a location for the userui program. If this happens,
+ * we don't want to reload the value that was saved in the image header. This
+ * routine allows us to flag that we shouldn't restore the program name from
+ * the image header.
+ */
+static void set_ui_program_set(void)
+{
+  ui_helper_changed = 1;
+}
+
+/**
+ * userui_memory_needed - Tell core how much memory to reserve for us.
+ */
+static int userui_memory_needed(void)
+{
+  /* ball park figure of 128 pages */
+  return 128 * PAGE_SIZE;
+}
+
+/**
+ * userui_update_status - Update the progress bar and (if on) in-bar message.
+ *
+ * @value: Current progress percentage numerator.
+ * @maximum: Current progress percentage denominator.
+ * @fmt: Message to be displayed in the middle of the progress bar.
+ *
+ * Note that a NULL message does not mean that any previous message is erased!
+ * For that, you need toi_prepare_status with clearbar on.
+ *
+ * Returns an unsigned long, being the next numerator (as determined by the
+ * maximum and progress granularity) where status needs to be updated.
+ * This is to reduce unnecessary calls to update_status.
+ */
+static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
+{
+  static u32 last_step = 9999;
+  struct userui_msg_params msg;
+  u32 this_step, next_update;
+  int bitshift;
+
+  if (ui_helper_data.pid == -1)
+    return 0;
+
+  if ((!maximum) || (!progress_granularity))
+    return maximum;
+
+  if (value < 0)
+    value = 0;
+
+  if (value > maximum)
+    value = maximum;
+
+  /* Try to avoid math problems - we can't do 64 bit math here
+   * (and shouldn't need it - anyone got screen resolution
+   * of 65536 pixels or more?) */
+  bitshift = fls(maximum) - 16;
+  if (bitshift > 0) {
+    u32 temp_maximum = maximum >> bitshift;
+    u32 temp_value = value >> bitshift;
+    this_step = (u32)
+      (temp_value * progress_granularity / temp_maximum);
+    next_update = (((this_step + 1) * temp_maximum /
+          progress_granularity) + 1) << bitshift;
+  } else {
+    this_step = (u32) (value * progress_granularity / maximum);
+    next_update = ((this_step + 1) * maximum /
+        progress_granularity) + 1;
+  }
+
+  if (this_step == last_step)
+    return next_update;
+
+  memset(&msg, 0, sizeof(msg));
+
+  msg.a = this_step;
+  msg.b = progress_granularity;
+
+  if (fmt) {
+    va_list args;
+    va_start(args, fmt);
+    vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+    va_end(args);
+    msg.text[sizeof(msg.text)-1] = '\0';
+  }
+
+  toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
+      &msg, sizeof(msg));
+  last_step = this_step;
+
+  return next_update;
+}
+
+/**
+ * userui_message - Display a message without necessarily logging it.
+ *
+ * @section: Type of message. Messages can be filtered by type.
+ * @level: Degree of importance of the message. Lower values = higher priority.
+ * @normally_logged: Whether logged even if log_everything is off.
+ * @fmt: Message (and parameters).
+ *
+ * This function is intended to do the same job as printk, but without normally
+ * logging what is printed. The point is to be able to get debugging info on
+ * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
+ *
+ * It may be called from an interrupt context - can't sleep!
+ */
+static void userui_message(u32 section, u32 level, u32 normally_logged,
+    const char *fmt, ...)
+{
+  struct userui_msg_params msg;
+
+  if ((level) && (level > console_loglevel))
+    return;
+
+  memset(&msg, 0, sizeof(msg));
+
+  msg.a = section;
+  msg.b = level;
+  msg.c = normally_logged;
+
+  if (fmt) {
+    va_list args;
+    va_start(args, fmt);
+    vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+    va_end(args);
+    msg.text[sizeof(msg.text)-1] = '\0';
+  }
+
+  if (test_action_state(TOI_LOGALL))
+    printk(KERN_INFO "%s\n", msg.text);
+
+  toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
+      &msg, sizeof(msg));
+}
+
+/**
+ * wait_for_key_via_userui - Wait for userui to receive a keypress.
+ */
+static void wait_for_key_via_userui(void)
+{
+  DECLARE_WAITQUEUE(wait, current);
+
+  add_wait_queue(&userui_wait_for_key, &wait);
+  set_current_state(TASK_INTERRUPTIBLE);
+
+  wait_event_interruptible(userui_wait_for_key, userui_wait_should_wake);
+  userui_wait_should_wake = false;
+
+  set_current_state(TASK_RUNNING);
+  remove_wait_queue(&userui_wait_for_key, &wait);
+}
+
+/**
+ * userui_prepare_status - Display high level messages.
+ *
+ * @clearbar: Whether to clear the progress bar.
+ * @fmt...: New message for the title.
+ *
+ * Prepare the 'nice display', drawing the header and version, along with the
+ * current action and perhaps also resetting the progress bar.
+ */
+static void userui_prepare_status(int clearbar, const char *fmt, ...)
+{
+  va_list args;
+
+  if (fmt) {
+    va_start(args, fmt);
+    lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
+    va_end(args);
+  }
+
+  if (clearbar)
+    toi_update_status(0, 1, NULL);
+
+  if (ui_helper_data.pid == -1)
+    printk(KERN_EMERG "%s\n", lastheader);
+  else
+    toi_message(0, TOI_STATUS, 1, lastheader, NULL);
+}
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress from userui.
+ *
+ * FIXME: Implement timeout?
+ */
+static char userui_wait_for_keypress(int timeout)
+{
+  char key = '\0';
+
+  if (ui_helper_data.pid != -1) {
+    wait_for_key_via_userui();
+    key = ' ';
+  }
+
+  return key;
+}
+
+/**
+ * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
+ *
+ * @result_code: Reason why we're aborting (1 << bit).
+ * @fmt: Message to display if telling the user what's going on.
+ *
+ * Abort a cycle. If this wasn't at the user's request (and we're displaying
+ * output), tell the user why and wait for them to acknowledge the message.
+ */
+static void userui_abort_hibernate(int result_code, const char *fmt, ...)
+{
+  va_list args;
+  int printed_len = 0;
+
+  set_result_state(result_code);
+
+  if (test_result_state(TOI_ABORTED))
+    return;
+
+  set_result_state(TOI_ABORTED);
+
+  if (test_result_state(TOI_ABORT_REQUESTED))
+    return;
+
+  va_start(args, fmt);
+  printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
+      fmt, args);
+  va_end(args);
+  if (ui_helper_data.pid != -1)
+    printed_len = sprintf(local_printf_buf + printed_len,
+        " (Press SPACE to continue)");
+
+  toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
+
+  if (ui_helper_data.pid != -1)
+    userui_wait_for_keypress(0);
+}
+
+/**
+ * request_abort_hibernate - Abort hibernating or resuming at user request.
+ *
+ * Handle the user requesting the cancellation of a hibernation or resume by
+ * pressing escape.
+ */
+static void request_abort_hibernate(void)
+{
+  if (test_result_state(TOI_ABORT_REQUESTED) ||
+      !test_action_state(TOI_CAN_CANCEL))
+    return;
+
+  if (test_toi_state(TOI_NOW_RESUMING)) {
+    toi_prepare_status(CLEAR_BAR, "Escape pressed. "
+        "Powering down again.");
+    set_toi_state(TOI_STOP_RESUME);
+    while (!test_toi_state(TOI_IO_STOPPED))
+      schedule();
+    if (toiActiveAllocator->mark_resume_attempted)
+      toiActiveAllocator->mark_resume_attempted(0);
+    toi_power_down();
+  }
+
+  toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
+      " ABORTING HIBERNATION ---");
+  set_abort_result(TOI_ABORT_REQUESTED);
+  toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_user_rcv_msg - Receive a netlink message from userui.
+ *
+ * @skb: skb received.
+ * @nlh: Netlink header received.
+ */
+static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+  int type;
+  int *data;
+
+  type = nlh->nlmsg_type;
+
+  /* A control message: ignore them */
+  if (type < NETLINK_MSG_BASE)
+    return 0;
+
+  /* Unknown message: reply with EINVAL */
+  if (type >= USERUI_MSG_MAX)
+    return -EINVAL;
+
+  /* All operations require privileges, even GET */
+  if (!capable(CAP_NET_ADMIN))
+    return -EPERM;
+
+  /* Only allow one task to receive NOFREEZE privileges */
+  if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
+    printk(KERN_INFO "Got NOFREEZE_ME request when "
+        "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
+    return -EBUSY;
+  }
+
+  data = (int *) NLMSG_DATA(nlh);
+
+  switch (type) {
+    case USERUI_MSG_ABORT:
+      request_abort_hibernate();
+      return 0;
+    case USERUI_MSG_GET_STATE:
+      toi_send_netlink_message(&ui_helper_data,
+          USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
+          sizeof(toi_bkd.toi_action));
+      return 0;
+    case USERUI_MSG_GET_DEBUG_STATE:
+      toi_send_netlink_message(&ui_helper_data,
+          USERUI_MSG_GET_DEBUG_STATE,
+          &toi_bkd.toi_debug_state,
+          sizeof(toi_bkd.toi_debug_state));
+      return 0;
+    case USERUI_MSG_SET_STATE:
+      if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+        return -EINVAL;
+      ui_nl_set_state(*data);
+      return 0;
+    case USERUI_MSG_SET_DEBUG_STATE:
+      if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+        return -EINVAL;
+      toi_bkd.toi_debug_state = (*data);
+      return 0;
+    case USERUI_MSG_SPACE:
+      toi_stop_waiting_for_userui_key();
+      return 0;
+    case USERUI_MSG_GET_POWERDOWN_METHOD:
+      toi_send_netlink_message(&ui_helper_data,
+          USERUI_MSG_GET_POWERDOWN_METHOD,
+          &toi_poweroff_method,
+          sizeof(toi_poweroff_method));
+      return 0;
+    case USERUI_MSG_SET_POWERDOWN_METHOD:
+      if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char)))
+        return -EINVAL;
+      toi_poweroff_method = (unsigned long)(*data);
+      return 0;
+    case USERUI_MSG_GET_LOGLEVEL:
+      toi_send_netlink_message(&ui_helper_data,
+          USERUI_MSG_GET_LOGLEVEL,
+          &toi_bkd.toi_default_console_level,
+          sizeof(toi_bkd.toi_default_console_level));
+      return 0;
+    case USERUI_MSG_SET_LOGLEVEL:
+      if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+        return -EINVAL;
+      toi_bkd.toi_default_console_level = (*data);
+      return 0;
+    case USERUI_MSG_PRINTK:
+      printk(KERN_INFO "%s", (char *) data);
+      return 0;
+  }
+
+  /* Unhandled here */
+  return 1;
+}
+
+/**
+ * userui_cond_pause - Possibly pause at user request.
+ *
+ * @pause: Whether to pause or just display the message.
+ * @message: Message to display at the start of pausing.
+ *
+ * Potentially pause and wait for the user to tell us to continue. We normally
+ * only pause when @pause is set. While paused, the user can do things like
+ * changing the loglevel, toggling the display of debugging sections and such
+ * like.
+ */
+static void userui_cond_pause(int pause, char *message)
+{
+  int displayed_message = 0, last_key = 0;
+
+  while (last_key != 32 &&
+      ui_helper_data.pid != -1 &&
+      ((test_action_state(TOI_PAUSE) && pause) ||
+       (test_action_state(TOI_SINGLESTEP)))) {
+    if (!displayed_message) {
+      toi_prepare_status(DONT_CLEAR_BAR,
+          "%s Press SPACE to continue.%s",
+          message ? message : "",
+          (test_action_state(TOI_SINGLESTEP)) ?
+          " Single step on." : "");
+      displayed_message = 1;
+    }
+    last_key = userui_wait_for_keypress(0);
+  }
+  schedule();
+}
+
+/**
+ * userui_prepare_console - Prepare the console for use.
+ *
+ * Prepare a console for use, saving current kmsg settings and attempting to
+ * start userui. Console loglevel changes are handled by userui.
+ */
+static void userui_prepare_console(void)
+{
+  orig_kmsg = vt_kmsg_redirect(fg_console + 1);
+
+  ui_helper_data.pid = -1;
+
+  if (!userui_ops.enabled) {
+    printk(KERN_INFO "TuxOnIce: Userui disabled.\n");
+    return;
+  }
+
+  if (*ui_helper_data.program)
+    toi_netlink_setup(&ui_helper_data);
+  else
+    printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
+}
+
+/**
+ * userui_cleanup_console - Cleanup after a cycle.
+ *
+ * Tell userui to cleanup, and restore kmsg_redirect to its original value.
+ */
+
+static void userui_cleanup_console(void)
+{
+  if (ui_helper_data.pid > -1)
+    toi_netlink_close(&ui_helper_data);
+
+  vt_kmsg_redirect(orig_kmsg);
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+  SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_CAN_CANCEL, 0),
+  SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
+      TOI_PAUSE, 0),
+  SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
+  SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+      2048, 0, NULL),
+  SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
+      set_ui_program_set),
+  SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+  .type                                = MISC_MODULE,
+  .name                                = "userui",
+  .shared_directory                = "user_interface",
+  .module                                = THIS_MODULE,
+  .storage_needed                        = userui_storage_needed,
+  .save_config_info                = userui_save_config_info,
+  .load_config_info                = userui_load_config_info,
+  .memory_needed                        = userui_memory_needed,
+  .post_atomic_restore                = userui_post_atomic_restore,
+  .sysfs_data                        = sysfs_params,
+  .num_sysfs_entries                = sizeof(sysfs_params) /
+    sizeof(struct toi_sysfs_data),
+};
+
+static struct ui_ops my_ui_ops = {
+  .update_status                        = userui_update_status,
+  .message                        = userui_message,
+  .prepare_status                        = userui_prepare_status,
+  .abort                                = userui_abort_hibernate,
+  .cond_pause                        = userui_cond_pause,
+  .prepare                        = userui_prepare_console,
+  .cleanup                        = userui_cleanup_console,
+  .wait_for_key                        = userui_wait_for_keypress,
+};
+
+/**
+ * toi_user_ui_init - Boot time initialisation for user interface.
+ *
+ * Invoked from the core init routine.
+ */
+static __init int toi_user_ui_init(void)
+{
+  int result;
+
+  ui_helper_data.nl = NULL;
+  strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
+  ui_helper_data.pid = -1;
+  ui_helper_data.skb_size = sizeof(struct userui_msg_params);
+  ui_helper_data.pool_limit = 6;
+  ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
+  ui_helper_data.name = "userspace ui";
+  ui_helper_data.rcv_msg = userui_user_rcv_msg;
+  ui_helper_data.interface_version = 8;
+  ui_helper_data.must_init = 0;
+  ui_helper_data.not_ready = userui_cleanup_console;
+  init_completion(&ui_helper_data.wait_for_process);
+  result = toi_register_module(&userui_ops);
+  if (!result) {
+    result = toi_register_ui_ops(&my_ui_ops);
+    if (result)
+      toi_unregister_module(&userui_ops);
+  }
+
+  return result;
+}
+
+late_initcall(toi_user_ui_init);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 9210379c0..2dd2f0384 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -93,7 +93,7 @@ config CONTEXT_TRACKING
 config CONTEXT_TRACKING_FORCE
 	bool "Force context tracking"
 	depends on CONTEXT_TRACKING
-	default y if !NO_HZ_FULL
+	default y if !NO_HZ_FULL && !SCHED_MUQSS
 	help
 	  The major pre-requirement for full dynticks to work is to
 	  support the context tracking subsystem. But there are also
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index e2f9d4fef..c8197dc6b 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -16,14 +16,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
-obj-y += core.o loadavg.o clock.o cputime.o
+ifdef CONFIG_SCHED_MUQSS
+obj-y += MuQSS.o clock.o
+else
+obj-y += core.o loadavg.o clock.o
 obj-y += idle_task.o fair.o rt.o deadline.o
-obj-y += wait.o wait_bit.o swait.o completion.o idle.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
+obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
-obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
+endif
+obj-y += cputime.o
+obj-y += wait.o wait_bit.o swait.o completion.o idle.o
+obj-$(CONFIG_SMP) += cpupri.o topology.o
+obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
new file mode 100644
index 000000000..9be7d9ef4
--- /dev/null
+++ b/kernel/sched/MuQSS.c
@@ -0,0 +1,7231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  kernel/sched/MuQSS.c, was kernel/sched.c
+ *
+ *  Kernel scheduler and related syscalls
+ *
+ *  Copyright (C) 1991-2002  Linus Torvalds
+ *
+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
+ *		make semaphores SMP safe
+ *  1998-11-19	Implemented schedule_timeout() and related stuff
+ *		by Andrea Arcangeli
+ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
+ *		hybrid priority-list and round-robin design with
+ *		an array-switch method of distributing timeslices
+ *		and per-CPU runqueues.  Cleanups and useful suggestions
+ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
+ *  2003-09-03	Interactivity tuning by Con Kolivas.
+ *  2004-04-02	Scheduler domains code by Nick Piggin
+ *  2007-04-15  Work begun on replacing all interactivity tuning with a
+ *              fair scheduling design by Con Kolivas.
+ *  2007-05-05  Load balancing (smp-nice) and other improvements
+ *              by Peter Williams
+ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
+ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
+ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
+ *              Thomas Gleixner, Mike Kravetz
+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
+ *              a whole lot of those previous things.
+ *  2016-10-01  Multiple Queue Skiplist Scheduler scalable evolution of BFS
+ * 		scheduler by Con Kolivas.
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
+#include <linux/cpuset.h>
+#include <linux/delayacct.h>
+#include <linux/init_task.h>
+#include <linux/binfmts.h>
+#include <linux/context_tracking.h>
+#include <linux/rcupdate_wait.h>
+#include <linux/compat.h>
+#include <linux/skip_list.h>
+
+#include <linux/blkdev.h>
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/prefetch.h>
+#include <linux/profile.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/sched/isolation.h>
+#include <linux/tick.h>
+
+#include <asm/switch_to.h>
+#include <asm/tlb.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
+
+#include "../workqueue_internal.h"
+#include "../smpboot.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/sched.h>
+
+#include "MuQSS.h"
+
+#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
+#define rt_task(p)		rt_prio((p)->prio)
+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
+					(policy) == SCHED_RR)
+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
+
+#define is_idle_policy(policy)	((policy) == SCHED_IDLEPRIO)
+#define idleprio_task(p)	unlikely(is_idle_policy((p)->policy))
+#define task_running_idle(p)	unlikely((p)->prio == IDLE_PRIO)
+
+#define is_iso_policy(policy)	((policy) == SCHED_ISO)
+#define iso_task(p)		unlikely(is_iso_policy((p)->policy))
+#define task_running_iso(p)	unlikely((p)->prio == ISO_PRIO)
+
+#define rq_idle(rq)		((rq)->rq_prio == PRIO_LIMIT)
+
+#define ISO_PERIOD		(5 * HZ)
+
+#define STOP_PRIO		(MAX_RT_PRIO - 1)
+
+/*
+ * Some helpers for converting to/from various scales. Use shifts to get
+ * approximate multiples of ten for less overhead.
+ */
+#define JIFFIES_TO_NS(TIME)	((TIME) * (1073741824 / HZ))
+#define JIFFY_NS		(1073741824 / HZ)
+#define JIFFY_US		(1048576 / HZ)
+#define NS_TO_JIFFIES(TIME)	((TIME) / JIFFY_NS)
+#define HALF_JIFFY_NS		(1073741824 / HZ / 2)
+#define HALF_JIFFY_US		(1048576 / HZ / 2)
+#define MS_TO_NS(TIME)		((TIME) << 20)
+#define MS_TO_US(TIME)		((TIME) << 10)
+#define NS_TO_MS(TIME)		((TIME) >> 20)
+#define NS_TO_US(TIME)		((TIME) >> 10)
+#define US_TO_NS(TIME)		((TIME) << 10)
+
+#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
+
+void print_scheduler_version(void)
+{
+	printk(KERN_INFO "MuQSS CPU scheduler v0.171 by Con Kolivas.\n");
+}
+
+#define RQSHARE_NONE 0
+#define RQSHARE_SMT 1
+#define RQSHARE_MC 2
+#define RQSHARE_SMP 3
+
+/*
+ * This determines what level of runqueue sharing will be done and is
+ * configurable at boot time with the bootparam rqshare =
+ */
+static int rqshare __read_mostly = CONFIG_SHARERQ; /* Default RQSHARE_MC */
+
+static int __init set_rqshare(char *str)
+{
+	if (!strncmp(str, "none", 4)) {
+		rqshare = RQSHARE_NONE;
+		return 0;
+	}
+	if (!strncmp(str, "smt", 3)) {
+		rqshare = RQSHARE_SMT;
+		return 0;
+	}
+	if (!strncmp(str, "mc", 2)) {
+		rqshare = RQSHARE_MC;
+		return 0;
+	}
+	if (!strncmp(str, "smp", 2)) {
+		rqshare = RQSHARE_SMP;
+		return 0;
+	}
+	return 1;
+}
+__setup("rqshare=", set_rqshare);
+
+/*
+ * This is the time all tasks within the same priority round robin.
+ * Value is in ms and set to a minimum of 6ms.
+ * Tunable via /proc interface.
+ */
+#ifdef CONFIG_PCK_INTERACTIVE
+int rr_interval __read_mostly = 3;
+#else
+int rr_interval __read_mostly = 6;
+#endif
+
+/*
+ * Tunable to choose whether to prioritise latency or throughput, simple
+ * binary yes or no
+ */
+int sched_interactive __read_mostly = 1;
+
+/*
+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
+ * are allowed to run five seconds as real time tasks. This is the total over
+ * all online cpus.
+ */
+#ifdef CONFIG_PCK_INTERACTIVE
+int sched_iso_cpu __read_mostly = 25;
+#else
+int sched_iso_cpu __read_mostly = 70;
+#endif
+
+/*
+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
+ * 0: No yield.
+ * 1: Yield only to better priority/deadline tasks. (default)
+ * 2: Expire timeslice and recalculate deadline.
+ */
+int sched_yield_type __read_mostly = 1;
+
+/*
+ * The relative length of deadline for each priority(nice) level.
+ */
+static int prio_ratios[NICE_WIDTH] __read_mostly;
+
+
+/*
+ * The quota handed out to tasks of all priority levels when refilling their
+ * time_slice.
+ */
+static inline int timeslice(void)
+{
+	return MS_TO_US(rr_interval);
+}
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+
+#ifdef CONFIG_SMP
+/*
+ * Total number of runqueues. Equals number of CPUs when there is no runqueue
+ * sharing but is usually less with SMT/MC sharing of runqueues.
+ */
+static int total_runqueues __read_mostly = 1;
+
+static cpumask_t cpu_idle_map ____cacheline_aligned_in_smp;
+
+struct rq *cpu_rq(int cpu)
+{
+	return &per_cpu(runqueues, (cpu));
+}
+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+
+/*
+ * For asym packing, by default the lower numbered cpu has higher priority.
+ */
+int __weak arch_asym_cpu_priority(int cpu)
+{
+	return -cpu;
+}
+
+int __weak arch_sd_sibling_asym_packing(void)
+{
+       return 0*SD_ASYM_PACKING;
+}
+#else
+struct rq *uprq;
+#endif /* CONFIG_SMP */
+
+#include "stats.h"
+
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(next)	do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev)	do { } while (0)
+#endif
+#ifndef finish_arch_post_lock_switch
+# define finish_arch_post_lock_switch()	do { } while (0)
+#endif
+
+/*
+ * All common locking functions performed on rq->lock. rq->clock is local to
+ * the CPU accessing it so it can be modified just with interrupts disabled
+ * when we're not updating niffies.
+ * Looking up task_rq must be done under rq->lock to be safe.
+ */
+
+/*
+ * RQ-clock updating methods:
+ */
+
+static void update_rq_clock_task(struct rq *rq, s64 delta)
+{
+/*
+ * In theory, the compile should just see 0 here, and optimize out the call
+ * to sched_rt_avg_update. But I don't trust it...
+ */
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	s64 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
+
+	/*
+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
+	 * this case when a previous update_rq_clock() happened inside a
+	 * {soft,}irq region.
+	 *
+	 * When this happens, we stop ->clock_task and only update the
+	 * prev_irq_time stamp to account for the part that fit, so that a next
+	 * update will consume the rest. This ensures ->clock_task is
+	 * monotonic.
+	 *
+	 * It does however cause some slight miss-attribution of {soft,}irq
+	 * time, a more accurate solution would be to update the irq_time using
+	 * the current rq->clock timestamp, except that would require using
+	 * atomic ops.
+	 */
+	if (irq_delta > delta)
+		irq_delta = delta;
+
+	rq->prev_irq_time += irq_delta;
+	delta -= irq_delta;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	if (static_key_false((&paravirt_steal_rq_enabled))) {
+		s64 steal = paravirt_steal_clock(cpu_of(rq));
+
+		steal -= rq->prev_steal_time_rq;
+
+		if (unlikely(steal > delta))
+			steal = delta;
+
+		rq->prev_steal_time_rq += steal;
+
+		delta -= steal;
+	}
+#endif
+	rq->clock_task += delta;
+}
+
+static inline void update_rq_clock(struct rq *rq)
+{
+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
+
+	if (unlikely(delta < 0))
+		return;
+	rq->clock += delta;
+	update_rq_clock_task(rq, delta);
+}
+
+/*
+ * Niffies are a globally increasing nanosecond counter. They're only used by
+ * update_load_avg and time_slice_expired, however deadlines are based on them
+ * across CPUs. Update them whenever we will call one of those functions, and
+ * synchronise them across CPUs whenever we hold both runqueue locks.
+ */
+static inline void update_clocks(struct rq *rq)
+{
+	s64 ndiff, minndiff;
+	long jdiff;
+
+	update_rq_clock(rq);
+	ndiff = rq->clock - rq->old_clock;
+	rq->old_clock = rq->clock;
+	jdiff = jiffies - rq->last_jiffy;
+
+	/* Subtract any niffies added by balancing with other rqs */
+	ndiff -= rq->niffies - rq->last_niffy;
+	minndiff = JIFFIES_TO_NS(jdiff) - rq->niffies + rq->last_jiffy_niffies;
+	if (minndiff < 0)
+		minndiff = 0;
+	ndiff = max(ndiff, minndiff);
+	rq->niffies += ndiff;
+	rq->last_niffy = rq->niffies;
+	if (jdiff) {
+		rq->last_jiffy += jdiff;
+		rq->last_jiffy_niffies = rq->niffies;
+	}
+}
+
+static inline int task_on_rq_queued(struct task_struct *p)
+{
+	return p->on_rq == TASK_ON_RQ_QUEUED;
+}
+
+static inline int task_on_rq_migrating(struct task_struct *p)
+{
+	return p->on_rq == TASK_ON_RQ_MIGRATING;
+}
+
+/*
+ * Any time we have two runqueues locked we use that as an opportunity to
+ * synchronise niffies to the highest value as idle ticks may have artificially
+ * kept niffies low on one CPU and the truth can only be later.
+ */
+static inline void synchronise_niffies(struct rq *rq1, struct rq *rq2)
+{
+	if (rq1->niffies > rq2->niffies)
+		rq2->niffies = rq1->niffies;
+	else
+		rq1->niffies = rq2->niffies;
+}
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+
+/* For when we know rq1 != rq2 */
+static inline void __double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	if (rq1 < rq2) {
+		raw_spin_lock(rq1->lock);
+		raw_spin_lock_nested(rq2->lock, SINGLE_DEPTH_NESTING);
+	} else {
+		raw_spin_lock(rq2->lock);
+		raw_spin_lock_nested(rq1->lock, SINGLE_DEPTH_NESTING);
+	}
+}
+
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	BUG_ON(!irqs_disabled());
+	if (rq1->lock == rq2->lock) {
+		raw_spin_lock(rq1->lock);
+		__acquire(rq2->lock);	/* Fake it out ;) */
+	} else
+		__double_rq_lock(rq1, rq2);
+	synchronise_niffies(rq1, rq2);
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
+{
+	raw_spin_unlock(rq1->lock);
+	if (rq1->lock != rq2->lock)
+		raw_spin_unlock(rq2->lock);
+	else
+		__release(rq2->lock);
+}
+
+static inline void lock_all_rqs(void)
+{
+	int cpu;
+
+	preempt_disable();
+	for_each_possible_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+
+		do_raw_spin_lock(rq->lock);
+	}
+}
+
+static inline void unlock_all_rqs(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+
+		do_raw_spin_unlock(rq->lock);
+	}
+	preempt_enable();
+}
+
+/* Specially nest trylock an rq */
+static inline bool trylock_rq(struct rq *this_rq, struct rq *rq)
+{
+	if (unlikely(!do_raw_spin_trylock(rq->lock)))
+		return false;
+	spin_acquire(rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_);
+	synchronise_niffies(this_rq, rq);
+	return true;
+}
+
+/* Unlock a specially nested trylocked rq */
+static inline void unlock_rq(struct rq *rq)
+{
+	spin_release(rq->lock.dep_map, 1, _RET_IP_);
+	do_raw_spin_unlock(rq->lock);
+}
+
+/*
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#define fetch_or(ptr, mask)						\
+	({								\
+		typeof(ptr) _ptr = (ptr);				\
+		typeof(mask) _mask = (mask);				\
+		typeof(*_ptr) _old, _val = *_ptr;			\
+									\
+		for (;;) {						\
+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
+			if (_old == _val)				\
+				break;					\
+			_val = _old;					\
+		}							\
+	_old;								\
+})
+
+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
+/*
+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
+ * this avoids any races wrt polling state changes and thereby avoids
+ * spurious IPIs.
+ */
+static bool set_nr_and_not_polling(struct task_struct *p)
+{
+	struct thread_info *ti = task_thread_info(p);
+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
+}
+
+/*
+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
+ *
+ * If this returns true, then the idle task promises to call
+ * sched_ttwu_pending() and reschedule soon.
+ */
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	struct thread_info *ti = task_thread_info(p);
+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
+
+	for (;;) {
+		if (!(val & _TIF_POLLING_NRFLAG))
+			return false;
+		if (val & _TIF_NEED_RESCHED)
+			return true;
+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
+		if (old == val)
+			break;
+		val = old;
+	}
+	return true;
+}
+
+#else
+static bool set_nr_and_not_polling(struct task_struct *p)
+{
+	set_tsk_need_resched(p);
+	return true;
+}
+
+#ifdef CONFIG_SMP
+static bool set_nr_if_polling(struct task_struct *p)
+{
+	return false;
+}
+#endif
+#endif
+
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+	struct wake_q_node *node = &task->wake_q;
+
+	/*
+	 * Atomically grab the task, if ->wake_q is !nil already it means
+	 * its already queued (either by us or someone else) and will get the
+	 * wakeup due to that.
+	 *
+	 * This cmpxchg() implies a full barrier, which pairs with the write
+	 * barrier implied by the wakeup in wake_up_q().
+	 */
+	if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
+		return;
+
+	get_task_struct(task);
+
+	/*
+	 * The head is context local, there can be no concurrency.
+	 */
+	*head->lastp = node;
+	head->lastp = &node->next;
+}
+
+void wake_up_q(struct wake_q_head *head)
+{
+	struct wake_q_node *node = head->first;
+
+	while (node != WAKE_Q_TAIL) {
+		struct task_struct *task;
+
+		task = container_of(node, struct task_struct, wake_q);
+		BUG_ON(!task);
+		/* Task can safely be re-inserted now */
+		node = node->next;
+		task->wake_q.next = NULL;
+
+		/*
+		 * wake_up_process() implies a wmb() to pair with the queueing
+		 * in wake_q_add() so as not to miss wakeups.
+		 */
+		wake_up_process(task);
+		put_task_struct(task);
+	}
+}
+
+static inline void smp_sched_reschedule(int cpu)
+{
+	if (likely(cpu_online(cpu)))
+		smp_send_reschedule(cpu);
+}
+
+/*
+ * resched_task - mark a task 'to be rescheduled now'.
+ *
+ * On UP this means the setting of the need_resched flag, on SMP it
+ * might also involve a cross-CPU call to trigger the scheduler on
+ * the target CPU.
+ */
+void resched_task(struct task_struct *p)
+{
+	int cpu;
+#ifdef CONFIG_LOCKDEP
+	/* Kernel threads call this when creating workqueues while still
+	 * inactive from __kthread_bind_mask, holding only the pi_lock */
+	if (!(p->flags & PF_KTHREAD)) {
+		struct rq *rq = task_rq(p);
+
+		lockdep_assert_held(rq->lock);
+	}
+#endif
+	if (test_tsk_need_resched(p))
+		return;
+
+	cpu = task_cpu(p);
+	if (cpu == smp_processor_id()) {
+		set_tsk_need_resched(p);
+		set_preempt_need_resched();
+		return;
+	}
+
+	if (set_nr_and_not_polling(p))
+		smp_sched_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
+}
+
+/*
+ * A task that is not running or queued will not have a node set.
+ * A task that is queued but not running will have a node set.
+ * A task that is currently running will have ->on_cpu set but no node set.
+ */
+static inline bool task_queued(struct task_struct *p)
+{
+	return !skiplist_node_empty(&p->node);
+}
+
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
+static inline void resched_if_idle(struct rq *rq);
+
+/* Dodgy workaround till we figure out where the softirqs are going */
+static inline void do_pending_softirq(struct rq *rq, struct task_struct *next)
+{
+	if (unlikely(next == rq->idle && local_softirq_pending() && !in_interrupt()))
+		do_softirq_own_stack();
+}
+
+static inline bool deadline_before(u64 deadline, u64 time)
+{
+	return (deadline < time);
+}
+
+/*
+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
+ * is the key to everything. It distributes cpu fairly amongst tasks of the
+ * same nice value, it proportions cpu according to nice level, it means the
+ * task that last woke up the longest ago has the earliest deadline, thus
+ * ensuring that interactive tasks get low latency on wake up. The CPU
+ * proportion works out to the square of the virtual deadline difference, so
+ * this equation will give nice 19 3% CPU compared to nice 0.
+ */
+static inline u64 prio_deadline_diff(int user_prio)
+{
+	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
+}
+
+static inline u64 task_deadline_diff(struct task_struct *p)
+{
+	return prio_deadline_diff(TASK_USER_PRIO(p));
+}
+
+static inline u64 static_deadline_diff(int static_prio)
+{
+	return prio_deadline_diff(USER_PRIO(static_prio));
+}
+
+static inline int longest_deadline_diff(void)
+{
+	return prio_deadline_diff(39);
+}
+
+static inline int ms_longest_deadline_diff(void)
+{
+	return NS_TO_MS(longest_deadline_diff());
+}
+
+static inline bool rq_local(struct rq *rq);
+
+#ifndef SCHED_CAPACITY_SCALE
+#define SCHED_CAPACITY_SCALE 1024
+#endif
+
+static inline int rq_load(struct rq *rq)
+{
+	return rq->nr_running;
+}
+
+/*
+ * Update the load average for feeding into cpu frequency governors. Use a
+ * rough estimate of a rolling average with ~ time constant of 32ms.
+ * 80/128 ~ 0.63. * 80 / 32768 / 128 == * 5 / 262144
+ * Make sure a call to update_clocks has been made before calling this to get
+ * an updated rq->niffies.
+ */
+static void update_load_avg(struct rq *rq, unsigned int flags)
+{
+	unsigned long us_interval, curload;
+	long load;
+
+	if (unlikely(rq->niffies <= rq->load_update))
+		return;
+
+	us_interval = NS_TO_US(rq->niffies - rq->load_update);
+	curload = rq_load(rq);
+	load = rq->load_avg - (rq->load_avg * us_interval * 5 / 262144);
+	if (unlikely(load < 0))
+		load = 0;
+	load += curload * curload * SCHED_CAPACITY_SCALE * us_interval * 5 / 262144;
+	rq->load_avg = load;
+
+	rq->load_update = rq->niffies;
+	if (likely(rq_local(rq)))
+		cpufreq_trigger(rq, flags);
+}
+
+/*
+ * Removing from the runqueue. Enter with rq locked. Deleting a task
+ * from the skip list is done via the stored node reference in the task struct
+ * and does not require a full look up. Thus it occurs in O(k) time where k
+ * is the "level" of the list the task was stored at - usually < 4, max 8.
+ */
+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+{
+	skiplist_delete(rq->sl, &p->node);
+	rq->best_key = rq->node->next[0]->key;
+	update_clocks(rq);
+
+	if (!(flags & DEQUEUE_SAVE))
+		sched_info_dequeued(task_rq(p), p);
+	rq->nr_running--;
+	update_load_avg(rq, flags);
+}
+
+#ifdef CONFIG_PREEMPT_RCU
+static bool rcu_read_critical(struct task_struct *p)
+{
+	return p->rcu_read_unlock_special.b.blocked;
+}
+#else /* CONFIG_PREEMPT_RCU */
+#define rcu_read_critical(p) (false)
+#endif /* CONFIG_PREEMPT_RCU */
+
+/*
+ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
+ * an idle task, we ensure none of the following conditions are met.
+ */
+static bool idleprio_suitable(struct task_struct *p)
+{
+	return (!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)) &&
+		!signal_pending(p) && !rcu_read_critical(p) && !freezing(p));
+}
+
+/*
+ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
+ * that the iso_refractory flag is not set.
+ */
+static inline bool isoprio_suitable(struct rq *rq)
+{
+	return !rq->iso_refractory;
+}
+
+/*
+ * Adding to the runqueue. Enter with rq locked.
+ */
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
+{
+	unsigned int randseed, cflags = 0;
+	u64 sl_id;
+
+	if (!rt_task(p)) {
+		/* Check it hasn't gotten rt from PI */
+		if ((idleprio_task(p) && idleprio_suitable(p)) ||
+		   (iso_task(p) && isoprio_suitable(rq)))
+			p->prio = p->normal_prio;
+		else
+			p->prio = NORMAL_PRIO;
+	}
+	/*
+	 * The sl_id key passed to the skiplist generates a sorted list.
+	 * Realtime and sched iso tasks run FIFO so they only need be sorted
+	 * according to priority. The skiplist will put tasks of the same
+	 * key inserted later in FIFO order. Tasks of sched normal, batch
+	 * and idleprio are sorted according to their deadlines. Idleprio
+	 * tasks are offset by an impossibly large deadline value ensuring
+	 * they get sorted into last positions, but still according to their
+	 * own deadlines. This creates a "landscape" of skiplists running
+	 * from priority 0 realtime in first place to the lowest priority
+	 * idleprio tasks last. Skiplist insertion is an O(log n) process.
+	 */
+	if (p->prio <= ISO_PRIO) {
+		sl_id = p->prio;
+		cflags = SCHED_CPUFREQ_RT;
+	} else {
+		sl_id = p->deadline;
+		if (idleprio_task(p)) {
+			if (p->prio == IDLE_PRIO)
+				sl_id |= 0xF000000000000000;
+			else
+				sl_id += longest_deadline_diff();
+		}
+	}
+	/*
+	 * Some architectures don't have better than microsecond resolution
+	 * so mask out ~microseconds as the random seed for skiplist insertion.
+	 */
+	update_clocks(rq);
+	if (!(flags & ENQUEUE_RESTORE))
+		sched_info_queued(rq, p);
+	randseed = (rq->niffies >> 10) & 0xFFFFFFFF;
+	skiplist_insert(rq->sl, &p->node, sl_id, p, randseed);
+	rq->best_key = rq->node->next[0]->key;
+	if (p->in_iowait)
+		cflags |= SCHED_CPUFREQ_IOWAIT;
+	rq->nr_running++;
+	update_load_avg(rq, cflags);
+}
+
+/*
+ * Returns the relative length of deadline all compared to the shortest
+ * deadline which is that of nice -20.
+ */
+static inline int task_prio_ratio(struct task_struct *p)
+{
+	return prio_ratios[TASK_USER_PRIO(p)];
+}
+
+/*
+ * task_timeslice - all tasks of all priorities get the exact same timeslice
+ * length. CPU distribution is handled by giving different deadlines to
+ * tasks of different priorities. Use 128 as the base value for fast shifts.
+ */
+static inline int task_timeslice(struct task_struct *p)
+{
+	return (rr_interval * task_prio_ratio(p) / 128);
+}
+
+#ifdef CONFIG_SMP
+/* Entered with rq locked */
+static inline void resched_if_idle(struct rq *rq)
+{
+	if (rq_idle(rq))
+		resched_task(rq->curr);
+}
+
+static inline bool rq_local(struct rq *rq)
+{
+	return (rq->cpu == smp_processor_id());
+}
+#ifdef CONFIG_SMT_NICE
+static const cpumask_t *thread_cpumask(int cpu);
+
+/* Find the best real time priority running on any SMT siblings of cpu and if
+ * none are running, the static priority of the best deadline task running.
+ * The lookups to the other runqueues is done lockless as the occasional wrong
+ * value would be harmless. */
+static int best_smt_bias(struct rq *this_rq)
+{
+	int other_cpu, best_bias = 0;
+
+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
+		struct rq *rq = cpu_rq(other_cpu);
+
+		if (rq_idle(rq))
+			continue;
+		if (unlikely(!rq->online))
+			continue;
+		if (!rq->rq_mm)
+			continue;
+		if (likely(rq->rq_smt_bias > best_bias))
+			best_bias = rq->rq_smt_bias;
+	}
+	return best_bias;
+}
+
+static int task_prio_bias(struct task_struct *p)
+{
+	if (rt_task(p))
+		return 1 << 30;
+	else if (task_running_iso(p))
+		return 1 << 29;
+	else if (task_running_idle(p))
+		return 0;
+	return MAX_PRIO - p->static_prio;
+}
+
+static bool smt_always_schedule(struct task_struct __maybe_unused *p, struct rq __maybe_unused *this_rq)
+{
+	return true;
+}
+
+static bool (*smt_schedule)(struct task_struct *p, struct rq *this_rq) = &smt_always_schedule;
+
+/* We've already decided p can run on CPU, now test if it shouldn't for SMT
+ * nice reasons. */
+static bool smt_should_schedule(struct task_struct *p, struct rq *this_rq)
+{
+	int best_bias, task_bias;
+
+	/* Kernel threads always run */
+	if (unlikely(!p->mm))
+		return true;
+	if (rt_task(p))
+		return true;
+	if (!idleprio_suitable(p))
+		return true;
+	best_bias = best_smt_bias(this_rq);
+	/* The smt siblings are all idle or running IDLEPRIO */
+	if (best_bias < 1)
+		return true;
+	task_bias = task_prio_bias(p);
+	if (task_bias < 1)
+		return false;
+	if (task_bias >= best_bias)
+		return true;
+	/* Dither 25% cpu of normal tasks regardless of nice difference */
+	if (best_bias % 4 == 1)
+		return true;
+	/* Sorry, you lose */
+	return false;
+}
+#else /* CONFIG_SMT_NICE */
+#define smt_schedule(p, this_rq) (true)
+#endif /* CONFIG_SMT_NICE */
+
+static inline void atomic_set_cpu(int cpu, cpumask_t *cpumask)
+{
+	set_bit(cpu, (volatile unsigned long *)cpumask);
+}
+
+/*
+ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
+ * allow easy lookup of whether any suitable idle CPUs are available.
+ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
+ * idle_cpus variable than to do a full bitmask check when we are busy. The
+ * bits are set atomically but read locklessly as occasional false positive /
+ * negative is harmless.
+ */
+static inline void set_cpuidle_map(int cpu)
+{
+	if (likely(cpu_online(cpu)))
+		atomic_set_cpu(cpu, &cpu_idle_map);
+}
+
+static inline void atomic_clear_cpu(int cpu, cpumask_t *cpumask)
+{
+	clear_bit(cpu, (volatile unsigned long *)cpumask);
+}
+
+static inline void clear_cpuidle_map(int cpu)
+{
+	atomic_clear_cpu(cpu, &cpu_idle_map);
+}
+
+static bool suitable_idle_cpus(struct task_struct *p)
+{
+	return (cpumask_intersects(&p->cpus_allowed, &cpu_idle_map));
+}
+
+/*
+ * Resched current on rq. We don't know if rq is local to this CPU nor if it
+ * is locked so we do not use an intermediate variable for the task to avoid
+ * having it dereferenced.
+ */
+static void resched_curr(struct rq *rq)
+{
+	int cpu;
+
+	if (test_tsk_need_resched(rq->curr))
+		return;
+
+	rq->preempt = rq->curr;
+	cpu = rq->cpu;
+
+	/* We're doing this without holding the rq lock if it's not task_rq */
+
+	if (cpu == smp_processor_id()) {
+		set_tsk_need_resched(rq->curr);
+		set_preempt_need_resched();
+		return;
+	}
+
+	if (set_nr_and_not_polling(rq->curr))
+		smp_sched_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
+}
+
+#define CPUIDLE_DIFF_THREAD	(1)
+#define CPUIDLE_DIFF_CORE	(2)
+#define CPUIDLE_CACHE_BUSY	(4)
+#define CPUIDLE_DIFF_CPU	(8)
+#define CPUIDLE_THREAD_BUSY	(16)
+#define CPUIDLE_DIFF_NODE	(32)
+
+/*
+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
+ * lowest value would give the most suitable CPU to schedule p onto next. The
+ * order works out to be the following:
+ *
+ * Same thread, idle or busy cache, idle or busy threads
+ * Other core, same cache, idle or busy cache, idle threads.
+ * Same node, other CPU, idle cache, idle threads.
+ * Same node, other CPU, busy cache, idle threads.
+ * Other core, same cache, busy threads.
+ * Same node, other CPU, busy threads.
+ * Other node, other CPU, idle cache, idle threads.
+ * Other node, other CPU, busy cache, idle threads.
+ * Other node, other CPU, busy threads.
+ */
+static int best_mask_cpu(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
+{
+	int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
+		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
+		CPUIDLE_DIFF_THREAD;
+	int cpu_tmp;
+
+	if (cpumask_test_cpu(best_cpu, tmpmask))
+		goto out;
+
+	for_each_cpu(cpu_tmp, tmpmask) {
+		int ranking, locality;
+		struct rq *tmp_rq;
+
+		ranking = 0;
+		tmp_rq = cpu_rq(cpu_tmp);
+
+		locality = rq->cpu_locality[cpu_tmp];
+#ifdef CONFIG_NUMA
+		if (locality > 3)
+			ranking |= CPUIDLE_DIFF_NODE;
+		else
+#endif
+		if (locality > 2)
+			ranking |= CPUIDLE_DIFF_CPU;
+#ifdef CONFIG_SCHED_MC
+		else if (locality == 2)
+			ranking |= CPUIDLE_DIFF_CORE;
+		else if (!(tmp_rq->cache_idle(tmp_rq)))
+			ranking |= CPUIDLE_CACHE_BUSY;
+#endif
+#ifdef CONFIG_SCHED_SMT
+		if (locality == 1)
+			ranking |= CPUIDLE_DIFF_THREAD;
+		if (!(tmp_rq->siblings_idle(tmp_rq)))
+			ranking |= CPUIDLE_THREAD_BUSY;
+#endif
+		if (ranking < best_ranking) {
+			best_cpu = cpu_tmp;
+			best_ranking = ranking;
+		}
+	}
+out:
+	return best_cpu;
+}
+
+bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+	struct rq *this_rq = cpu_rq(this_cpu);
+
+	return (this_rq->cpu_locality[that_cpu] < 3);
+}
+
+/* As per resched_curr but only will resched idle task */
+static inline void resched_idle(struct rq *rq)
+{
+	if (test_tsk_need_resched(rq->idle))
+		return;
+
+	rq->preempt = rq->idle;
+
+	set_tsk_need_resched(rq->idle);
+
+	if (rq_local(rq)) {
+		set_preempt_need_resched();
+		return;
+	}
+
+	smp_sched_reschedule(rq->cpu);
+}
+
+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
+{
+	cpumask_t tmpmask;
+	struct rq *rq;
+	int best_cpu;
+
+	cpumask_and(&tmpmask, &p->cpus_allowed, &cpu_idle_map);
+	best_cpu = best_mask_cpu(cpu, task_rq(p), &tmpmask);
+	rq = cpu_rq(best_cpu);
+	if (!smt_schedule(p, rq))
+		return NULL;
+	rq->preempt = p;
+	resched_idle(rq);
+	return rq;
+}
+
+static inline void resched_suitable_idle(struct task_struct *p)
+{
+	if (suitable_idle_cpus(p))
+		resched_best_idle(p, task_cpu(p));
+}
+
+static inline struct rq *rq_order(struct rq *rq, int cpu)
+{
+	return rq->rq_order[cpu];
+}
+#else /* CONFIG_SMP */
+static inline void set_cpuidle_map(int cpu)
+{
+}
+
+static inline void clear_cpuidle_map(int cpu)
+{
+}
+
+static inline bool suitable_idle_cpus(struct task_struct *p)
+{
+	return uprq->curr == uprq->idle;
+}
+
+static inline void resched_suitable_idle(struct task_struct *p)
+{
+}
+
+static inline void resched_curr(struct rq *rq)
+{
+	resched_task(rq->curr);
+}
+
+static inline void resched_if_idle(struct rq *rq)
+{
+}
+
+static inline bool rq_local(struct rq *rq)
+{
+	return true;
+}
+
+static inline struct rq *rq_order(struct rq *rq, int cpu)
+{
+	return rq;
+}
+
+static inline bool smt_schedule(struct task_struct *p, struct rq *rq)
+{
+	return true;
+}
+#endif /* CONFIG_SMP */
+
+static inline int normal_prio(struct task_struct *p)
+{
+	if (has_rt_policy(p))
+		return MAX_RT_PRIO - 1 - p->rt_priority;
+	if (idleprio_task(p))
+		return IDLE_PRIO;
+	if (iso_task(p))
+		return ISO_PRIO;
+	return NORMAL_PRIO;
+}
+
+/*
+ * Calculate the current priority, i.e. the priority
+ * taken into account by the scheduler. This value might
+ * be boosted by RT tasks as it will be RT if the task got
+ * RT-boosted. If not then it returns p->normal_prio.
+ */
+static int effective_prio(struct task_struct *p)
+{
+	p->normal_prio = normal_prio(p);
+	/*
+	 * If we are RT tasks or we were boosted to RT priority,
+	 * keep the priority unchanged. Otherwise, update priority
+	 * to the normal priority:
+	 */
+	if (!rt_prio(p->prio))
+		return p->normal_prio;
+	return p->prio;
+}
+
+/*
+ * activate_task - move a task to the runqueue. Enter with rq locked.
+ */
+static void activate_task(struct task_struct *p, struct rq *rq)
+{
+	resched_if_idle(rq);
+
+	/*
+	 * Sleep time is in units of nanosecs, so shift by 20 to get a
+	 * milliseconds-range estimation of the amount of time that the task
+	 * spent sleeping:
+	 */
+	if (unlikely(prof_on == SLEEP_PROFILING)) {
+		if (p->state == TASK_UNINTERRUPTIBLE)
+			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
+				     (rq->niffies - p->last_ran) >> 20);
+	}
+
+	p->prio = effective_prio(p);
+	if (task_contributes_to_load(p))
+		rq->nr_uninterruptible--;
+
+	enqueue_task(rq, p, 0);
+	p->on_rq = TASK_ON_RQ_QUEUED;
+}
+
+/*
+ * deactivate_task - If it's running, it's not on the runqueue and we can just
+ * decrement the nr_running. Enter with rq locked.
+ */
+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
+{
+	if (task_contributes_to_load(p))
+		rq->nr_uninterruptible++;
+
+	p->on_rq = 0;
+	sched_info_dequeued(rq, p);
+}
+
+#ifdef CONFIG_SMP
+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+{
+	struct rq *rq;
+
+	if (task_cpu(p) == new_cpu)
+		return;
+
+	/* Do NOT call set_task_cpu on a currently queued task as we will not
+	 * be reliably holding the rq lock after changing CPU. */
+	BUG_ON(task_queued(p));
+	rq = task_rq(p);
+
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * The caller should hold either p->pi_lock or rq->lock, when changing
+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+	 *
+	 * Furthermore, all task_rq users should acquire both locks, see
+	 * task_rq_lock().
+	 */
+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
+				      lockdep_is_held(rq->lock)));
+#endif
+
+	trace_sched_migrate_task(p, new_cpu);
+	perf_event_task_migrate(p);
+
+	/*
+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+	 * successfully executed on another CPU. We must ensure that updates of
+	 * per-task data have been completed by this moment.
+	 */
+	smp_wmb();
+
+	p->wake_cpu = new_cpu;
+
+	if (task_running(rq, p)) {
+		/*
+		 * We should only be calling this on a running task if we're
+		 * holding rq lock.
+		 */
+		lockdep_assert_held(rq->lock);
+
+		/*
+		 * We can't change the task_thread_info CPU on a running task
+		 * as p will still be protected by the rq lock of the CPU it
+		 * is still running on so we only set the wake_cpu for it to be
+		 * lazily updated once off the CPU.
+		 */
+		return;
+	}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	p->cpu = new_cpu;
+#else
+	task_thread_info(p)->cpu = new_cpu;
+#endif
+	/* We're no longer protecting p after this point since we're holding
+	 * the wrong runqueue lock. */
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Move a task off the runqueue and take it to a cpu for it will
+ * become the running task.
+ */
+static inline void take_task(struct rq *rq, int cpu, struct task_struct *p)
+{
+	struct rq *p_rq = task_rq(p);
+
+	dequeue_task(p_rq, p, DEQUEUE_SAVE);
+	if (p_rq != rq) {
+		sched_info_dequeued(p_rq, p);
+		sched_info_queued(rq, p);
+	}
+	set_task_cpu(p, cpu);
+}
+
+/*
+ * Returns a descheduling task to the runqueue unless it is being
+ * deactivated.
+ */
+static inline void return_task(struct task_struct *p, struct rq *rq,
+			       int cpu, bool deactivate)
+{
+	if (deactivate)
+		deactivate_task(p, rq);
+	else {
+#ifdef CONFIG_SMP
+		/*
+		 * set_task_cpu was called on the running task that doesn't
+		 * want to deactivate so it has to be enqueued to a different
+		 * CPU and we need its lock. Tag it to be moved with as the
+		 * lock is dropped in finish_lock_switch.
+		 */
+		if (unlikely(p->wake_cpu != cpu))
+			p->on_rq = TASK_ON_RQ_MIGRATING;
+		else
+#endif
+			enqueue_task(rq, p, ENQUEUE_RESTORE);
+	}
+}
+
+/* Enter with rq lock held. We know p is on the local cpu */
+static inline void __set_tsk_resched(struct task_struct *p)
+{
+	set_tsk_need_resched(p);
+	set_preempt_need_resched();
+}
+
+/**
+ * task_curr - is this task currently executing on a CPU?
+ * @p: the task in question.
+ *
+ * Return: 1 if the task is currently executing. 0 otherwise.
+ */
+inline int task_curr(const struct task_struct *p)
+{
+	return cpu_curr(task_cpu(p)) == p;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * wait_task_inactive - wait for a thread to unschedule.
+ *
+ * If @match_state is nonzero, it's the @p->state value just checked and
+ * not expected to change.  If it changes, i.e. @p might have woken up,
+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
+ * we return a positive number (its total switch count).  If a second call
+ * a short while later returns the same number, the caller can be sure that
+ * @p has remained unscheduled the whole time.
+ *
+ * The caller must ensure that the task *will* unschedule sometime soon,
+ * else this function might spin for a *long* time. This function can't
+ * be called with interrupts off, or it may introduce deadlock with
+ * smp_call_function() if an IPI is sent by the same process we are
+ * waiting to become inactive.
+ */
+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+{
+	int running, queued;
+	unsigned long flags;
+	unsigned long ncsw;
+	struct rq *rq;
+
+	for (;;) {
+		rq = task_rq(p);
+
+		/*
+		 * If the task is actively running on another CPU
+		 * still, just relax and busy-wait without holding
+		 * any locks.
+		 *
+		 * NOTE! Since we don't hold any locks, it's not
+		 * even sure that "rq" stays as the right runqueue!
+		 * But we don't care, since this will return false
+		 * if the runqueue has changed and p is actually now
+		 * running somewhere else!
+		 */
+		while (task_running(rq, p)) {
+			if (match_state && unlikely(p->state != match_state))
+				return 0;
+			cpu_relax();
+		}
+
+		/*
+		 * Ok, time to look more closely! We need the rq
+		 * lock now, to be *sure*. If we're wrong, we'll
+		 * just go back and repeat.
+		 */
+		rq = task_rq_lock(p, &flags);
+		trace_sched_wait_task(p);
+		running = task_running(rq, p);
+		queued = task_on_rq_queued(p);
+		ncsw = 0;
+		if (!match_state || p->state == match_state)
+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		task_rq_unlock(rq, p, &flags);
+
+		/*
+		 * If it changed from the expected state, bail out now.
+		 */
+		if (unlikely(!ncsw))
+			break;
+
+		/*
+		 * Was it really running after all now that we
+		 * checked with the proper locks actually held?
+		 *
+		 * Oops. Go back and try again..
+		 */
+		if (unlikely(running)) {
+			cpu_relax();
+			continue;
+		}
+
+		/*
+		 * It's not enough that it's not actively running,
+		 * it must be off the runqueue _entirely_, and not
+		 * preempted!
+		 *
+		 * So if it was still runnable (but just not actively
+		 * running right now), it's preempted, and we should
+		 * yield - it could be a while.
+		 */
+		if (unlikely(queued)) {
+			ktime_t to = NSEC_PER_SEC / HZ;
+
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
+			continue;
+		}
+
+		/*
+		 * Ahh, all good. It wasn't running, and it wasn't
+		 * runnable, which means that it will never become
+		 * running in the future either. We're all done!
+		 */
+		break;
+	}
+
+	return ncsw;
+}
+
+/***
+ * kick_process - kick a running thread to enter/exit the kernel
+ * @p: the to-be-kicked thread
+ *
+ * Cause a process which is running on another CPU to enter
+ * kernel-mode, without any delay. (to get signals handled.)
+ *
+ * NOTE: this function doesn't have to take the runqueue lock,
+ * because all it wants to ensure is that the remote task enters
+ * the kernel. If the IPI races and the task has been migrated
+ * to another CPU then no harm is done and the purpose has been
+ * achieved as well.
+ */
+void kick_process(struct task_struct *p)
+{
+	int cpu;
+
+	preempt_disable();
+	cpu = task_cpu(p);
+	if ((cpu != smp_processor_id()) && task_curr(p))
+		smp_sched_reschedule(cpu);
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kick_process);
+#endif
+
+/*
+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
+ * between themselves, they cooperatively multitask. An idle rq scores as
+ * prio PRIO_LIMIT so it is always preempted.
+ */
+static inline bool
+can_preempt(struct task_struct *p, int prio, u64 deadline)
+{
+	/* Better static priority RT task or better policy preemption */
+	if (p->prio < prio)
+		return true;
+	if (p->prio > prio)
+		return false;
+	if (p->policy == SCHED_BATCH)
+		return false;
+	/* SCHED_NORMAL and ISO will preempt based on deadline */
+	if (!deadline_before(p->deadline, deadline))
+		return false;
+	return true;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Check to see if p can run on cpu, and if not, whether there are any online
+ * CPUs it can run on instead. This only happens with the hotplug threads that
+ * bring up the CPUs.
+ */
+static inline bool sched_other_cpu(struct task_struct *p, int cpu)
+{
+	if (likely(cpumask_test_cpu(cpu, &p->cpus_allowed)))
+		return false;
+	if (p->nr_cpus_allowed == 1) {
+		cpumask_t valid_mask;
+
+		cpumask_and(&valid_mask, &p->cpus_allowed, cpu_online_mask);
+		if (unlikely(cpumask_empty(&valid_mask)))
+			return false;
+	}
+	return true;
+}
+
+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
+{
+	if (cpumask_test_cpu(cpu, &p->cpus_allowed))
+		return false;
+	return true;
+}
+
+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
+
+static void try_preempt(struct task_struct *p, struct rq *this_rq)
+{
+	int i, this_entries = rq_load(this_rq);
+	cpumask_t tmp;
+
+	if (suitable_idle_cpus(p) && resched_best_idle(p, task_cpu(p)))
+		return;
+
+	/* IDLEPRIO tasks never preempt anything but idle */
+	if (p->policy == SCHED_IDLEPRIO)
+		return;
+
+	cpumask_and(&tmp, &cpu_online_map, &p->cpus_allowed);
+
+	for (i = 0; i < num_possible_cpus(); i++) {
+		struct rq *rq = this_rq->cpu_order[i];
+
+		if (!cpumask_test_cpu(rq->cpu, &tmp))
+			continue;
+
+		if (!sched_interactive && rq != this_rq && rq_load(rq) <= this_entries)
+			continue;
+		if (smt_schedule(p, rq) && can_preempt(p, rq->rq_prio, rq->rq_deadline)) {
+			/* We set rq->preempting lockless, it's a hint only */
+			rq->preempting = p;
+			resched_curr(rq);
+			return;
+		}
+	}
+}
+
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+				  const struct cpumask *new_mask, bool check);
+#else /* CONFIG_SMP */
+static inline bool needs_other_cpu(struct task_struct *p, int cpu)
+{
+	return false;
+}
+
+static void try_preempt(struct task_struct *p, struct rq *this_rq)
+{
+	if (p->policy == SCHED_IDLEPRIO)
+		return;
+	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
+		resched_curr(uprq);
+}
+
+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+					 const struct cpumask *new_mask, bool check)
+{
+	return set_cpus_allowed_ptr(p, new_mask);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * wake flags
+ */
+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
+#define WF_FORK		0x02		/* child wakeup after fork */
+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
+
+static void
+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
+{
+	struct rq *rq;
+
+	if (!schedstat_enabled())
+		return;
+
+	rq = this_rq();
+
+#ifdef CONFIG_SMP
+	if (cpu == rq->cpu) {
+		__schedstat_inc(rq->ttwu_local);
+	} else {
+		struct sched_domain *sd;
+
+		rcu_read_lock();
+		for_each_domain(rq->cpu, sd) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+				__schedstat_inc(sd->ttwu_wake_remote);
+				break;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+#endif /* CONFIG_SMP */
+
+	__schedstat_inc(rq->ttwu_count);
+}
+
+static inline void ttwu_activate(struct rq *rq, struct task_struct *p)
+{
+	activate_task(p, rq);
+
+	/* if a worker is waking up, notify the workqueue */
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_waking_up(p, cpu_of(rq));
+}
+
+/*
+ * Mark the task runnable and perform wakeup-preemption.
+ */
+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+	/*
+	 * Sync wakeups (i.e. those types of wakeups where the waker
+	 * has indicated that it will leave the CPU in short order)
+	 * don't trigger a preemption if there are no idle cpus,
+	 * instead waiting for current to deschedule.
+	 */
+	if (wake_flags & WF_SYNC)
+		resched_suitable_idle(p);
+	else
+		try_preempt(p, rq);
+	p->state = TASK_RUNNING;
+	trace_sched_wakeup(p);
+}
+
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+	lockdep_assert_held(rq->lock);
+
+#ifdef CONFIG_SMP
+	if (p->sched_contributes_to_load)
+		rq->nr_uninterruptible--;
+#endif
+
+	ttwu_activate(rq, p);
+	ttwu_do_wakeup(rq, p, wake_flags);
+}
+
+/*
+ * Called in case the task @p isn't fully descheduled from its runqueue,
+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
+ * since all we need to do is flip p->state to TASK_RUNNING, since
+ * the task is still ->on_rq.
+ */
+static int ttwu_remote(struct task_struct *p, int wake_flags)
+{
+	struct rq *rq;
+	int ret = 0;
+
+	rq = __task_rq_lock(p);
+	if (likely(task_on_rq_queued(p))) {
+		ttwu_do_wakeup(rq, p, wake_flags);
+		ret = 1;
+	}
+	__task_rq_unlock(rq);
+
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct llist_node *llist = llist_del_all(&rq->wake_list);
+	struct task_struct *p, *t;
+	unsigned long flags;
+
+	if (!llist)
+		return;
+
+	rq_lock_irqsave(rq, &flags);
+
+	llist_for_each_entry_safe(p, t, llist, wake_entry)
+		ttwu_do_activate(rq, p, 0);
+
+	rq_unlock_irqrestore(rq, &flags);
+}
+
+void scheduler_ipi(void)
+{
+	/*
+	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+	 * TIF_NEED_RESCHED remotely (for the first time) will also send
+	 * this IPI.
+	 */
+	preempt_fold_need_resched();
+
+	if (llist_empty(&this_rq()->wake_list) && (!idle_cpu(smp_processor_id()) || need_resched()))
+		return;
+
+	/*
+	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
+	 * traditionally all their work was done from the interrupt return
+	 * path. Now that we actually do some work, we need to make sure
+	 * we do call them.
+	 *
+	 * Some archs already do call them, luckily irq_enter/exit nest
+	 * properly.
+	 *
+	 * Arguably we should visit all archs and update all handlers,
+	 * however a fair share of IPIs are still resched only so this would
+	 * somewhat pessimize the simple resched case.
+	 */
+	irq_enter();
+	sched_ttwu_pending();
+	irq_exit();
+}
+
+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
+		if (!set_nr_if_polling(rq->idle))
+			smp_sched_reschedule(cpu);
+		else
+			trace_sched_wake_idle_without_ipi(cpu);
+	}
+}
+
+void wake_up_if_idle(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	rcu_read_lock();
+
+	if (!is_idle_task(rcu_dereference(rq->curr)))
+		goto out;
+
+	if (set_nr_if_polling(rq->idle)) {
+		trace_sched_wake_idle_without_ipi(cpu);
+	} else {
+		rq_lock_irqsave(rq, &flags);
+		if (likely(is_idle_task(rq->curr)))
+			smp_sched_reschedule(cpu);
+		/* Else cpu is not in idle, do nothing here */
+		rq_unlock_irqrestore(rq, &flags);
+	}
+
+out:
+	rcu_read_unlock();
+}
+
+static int valid_task_cpu(struct task_struct *p)
+{
+	cpumask_t valid_mask;
+
+	if (p->flags & PF_KTHREAD)
+		cpumask_and(&valid_mask, &p->cpus_allowed, cpu_all_mask);
+	else
+		cpumask_and(&valid_mask, &p->cpus_allowed, cpu_active_mask);
+
+	if (unlikely(!cpumask_weight(&valid_mask))) {
+		/* We shouldn't be hitting this any more */
+		printk(KERN_WARNING "SCHED: No cpumask for %s/%d weight %d\n", p->comm,
+		       p->pid, cpumask_weight(&p->cpus_allowed));
+		return cpumask_any(&p->cpus_allowed);
+	}
+	return cpumask_any(&valid_mask);
+}
+
+/*
+ * For a task that's just being woken up we have a valuable balancing
+ * opportunity so choose the nearest cache most lightly loaded runqueue.
+ * Entered with rq locked and returns with the chosen runqueue locked.
+ */
+static inline int select_best_cpu(struct task_struct *p)
+{
+	unsigned int idlest = ~0U;
+	struct rq *rq = NULL;
+	int i;
+
+	if (suitable_idle_cpus(p)) {
+		int cpu = task_cpu(p);
+
+		if (unlikely(needs_other_cpu(p, cpu)))
+			cpu = valid_task_cpu(p);
+		rq = resched_best_idle(p, cpu);
+		if (likely(rq))
+			return rq->cpu;
+	}
+
+	for (i = 0; i < num_possible_cpus(); i++) {
+		struct rq *other_rq = task_rq(p)->cpu_order[i];
+		int entries;
+
+		if (!other_rq->online)
+			continue;
+		if (needs_other_cpu(p, other_rq->cpu))
+			continue;
+		entries = rq_load(other_rq);
+		if (entries >= idlest)
+			continue;
+		idlest = entries;
+		rq = other_rq;
+	}
+	if (unlikely(!rq))
+		return task_cpu(p);
+	return rq->cpu;
+}
+#else /* CONFIG_SMP */
+static int valid_task_cpu(struct task_struct *p)
+{
+	return 0;
+}
+
+static inline int select_best_cpu(struct task_struct *p)
+{
+	return 0;
+}
+
+static struct rq *resched_best_idle(struct task_struct *p, int cpu)
+{
+	return NULL;
+}
+#endif /* CONFIG_SMP */
+
+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+#if defined(CONFIG_SMP)
+	if (!cpus_share_cache(smp_processor_id(), cpu)) {
+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
+		ttwu_queue_remote(p, cpu, wake_flags);
+		return;
+	}
+#endif
+	rq_lock(rq);
+	ttwu_do_activate(rq, p, wake_flags);
+	rq_unlock(rq);
+}
+
+/***
+ * try_to_wake_up - wake up a thread
+ * @p: the thread to be awakened
+ * @state: the mask of task states that can be woken
+ * @wake_flags: wake modifier flags (WF_*)
+ *
+ * Put it on the run-queue if it's not already there. The "current"
+ * thread is always on the run-queue (except when the actual
+ * re-schedule is in progress), and as such you're allowed to do
+ * the simpler "current->state = TASK_RUNNING" to mark yourself
+ * runnable without the overhead of this.
+ *
+ * Return: %true if @p was woken up, %false if it was already running.
+ * or @state didn't match @p's state.
+ */
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+{
+	unsigned long flags;
+	int cpu, success = 0;
+
+	/*
+	 * If we are going to wake up a thread waiting for CONDITION we
+	 * need to ensure that CONDITION=1 done by the caller can not be
+	 * reordered with p->state check below. This pairs with mb() in
+	 * set_current_state() the waiting thread does.
+	 */
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	smp_mb__after_spinlock();
+	/* state is a volatile long, どうして、分からない */
+	if (!((unsigned int)p->state & state))
+		goto out;
+
+	trace_sched_waking(p);
+
+	/* We're going to change ->state: */
+	success = 1;
+	cpu = task_cpu(p);
+
+	/*
+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
+	 * in smp_cond_load_acquire() below.
+	 *
+	 * sched_ttwu_pending()                 try_to_wake_up()
+	 *   [S] p->on_rq = 1;                  [L] P->state
+	 *       UNLOCK rq->lock  -----.
+	 *                              \
+	 *				 +---   RMB
+	 * schedule()                   /
+	 *       LOCK rq->lock    -----'
+	 *       UNLOCK rq->lock
+	 *
+	 * [task p]
+	 *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+	 *
+	 * Pairs with the UNLOCK+LOCK on rq->lock from the
+	 * last wakeup of our task and the schedule that got our task
+	 * current.
+	 */
+	smp_rmb();
+	if (p->on_rq && ttwu_remote(p, wake_flags))
+		goto stat;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+	 * possible to, falsely, observe p->on_cpu == 0.
+	 *
+	 * One must be running (->on_cpu == 1) in order to remove oneself
+	 * from the runqueue.
+	 *
+	 *  [S] ->on_cpu = 1;	[L] ->on_rq
+	 *      UNLOCK rq->lock
+	 *			RMB
+	 *      LOCK   rq->lock
+	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 *
+	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+	 * from the consecutive calls to schedule(); the first switching to our
+	 * task, the second putting it to sleep.
+	 */
+	smp_rmb();
+
+	/*
+	 * If the owning (remote) CPU is still in the middle of schedule() with
+	 * this task as prev, wait until its done referencing the task.
+	 *
+	 * Pairs with the smp_store_release() in finish_task().
+	 *
+	 * This ensures that tasks getting woken will be fully ordered against
+	 * their previous state and preserve Program Order.
+	 */
+	smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
+	p->state = TASK_WAKING;
+
+	if (p->in_iowait) {
+		delayacct_blkio_end(p);
+		atomic_dec(&task_rq(p)->nr_iowait);
+	}
+
+	cpu = select_best_cpu(p);
+	if (task_cpu(p) != cpu)
+		set_task_cpu(p, cpu);
+
+#else /* CONFIG_SMP */
+
+	if (p->in_iowait) {
+		delayacct_blkio_end(p);
+		atomic_dec(&task_rq(p)->nr_iowait);
+	}
+
+#endif /* CONFIG_SMP */
+
+	ttwu_queue(p, cpu, wake_flags);
+stat:
+	ttwu_stat(p, cpu, wake_flags);
+out:
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	return success;
+}
+
+/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not already there. The caller must
+ * ensure that rq is locked and, @p is not the current task.
+ * rq stays locked over invocation.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+	struct rq *rq = task_rq(p);
+
+	if (WARN_ON_ONCE(rq != this_rq()) ||
+	    WARN_ON_ONCE(p == current))
+		return;
+
+	lockdep_assert_held(rq->lock);
+
+	if (!raw_spin_trylock(&p->pi_lock)) {
+		/*
+		 * This is OK, because current is on_cpu, which avoids it being
+		 * picked for load-balance and preemption/IRQs are still
+		 * disabled avoiding further scheduler activity on it and we've
+		 * not yet picked a replacement task.
+		 */
+		rq_unlock(rq);
+		raw_spin_lock(&p->pi_lock);
+		rq_lock(rq);
+	}
+
+	if (!(p->state & TASK_NORMAL))
+		goto out;
+
+	trace_sched_waking(p);
+
+	if (!task_on_rq_queued(p)) {
+		if (p->in_iowait) {
+			delayacct_blkio_end(p);
+			atomic_dec(&rq->nr_iowait);
+		}
+		ttwu_activate(rq, p);
+	}
+
+	ttwu_do_wakeup(rq, p, 0);
+	ttwu_stat(p, smp_processor_id(), 0);
+out:
+	raw_spin_unlock(&p->pi_lock);
+}
+
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
+int wake_up_process(struct task_struct *p)
+{
+	return try_to_wake_up(p, TASK_NORMAL, 0);
+}
+EXPORT_SYMBOL(wake_up_process);
+
+int wake_up_state(struct task_struct *p, unsigned int state)
+{
+	return try_to_wake_up(p, state, 0);
+}
+
+static void time_slice_expired(struct task_struct *p, struct rq *rq);
+
+/*
+ * Perform scheduler related setup for a newly forked process p.
+ * p is forked by current.
+ */
+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p)
+{
+	unsigned long flags;
+	int cpu = get_cpu();
+
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	INIT_HLIST_HEAD(&p->preempt_notifiers);
+#endif
+	/*
+	 * We mark the process as NEW here. This guarantees that
+	 * nobody will actually run it, and a signal or other external
+	 * event cannot wake it up and insert it on the runqueue either.
+	 */
+	p->state = TASK_NEW;
+
+	/*
+	 * The process state is set to the same value of the process executing
+	 * do_fork() code. That is running. This guarantees that nobody will
+	 * actually run it, and a signal or other external event cannot wake
+	 * it up and insert it on the runqueue either.
+	 */
+
+	/* Should be reset in fork.c but done here for ease of MuQSS patching */
+	p->on_cpu =
+	p->on_rq =
+	p->utime =
+	p->stime =
+	p->sched_time =
+	p->stime_ns =
+	p->utime_ns = 0;
+	skiplist_node_init(&p->node);
+
+	/*
+	 * Revert to default priority/policy on fork if requested.
+	 */
+	if (unlikely(p->sched_reset_on_fork)) {
+		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
+			p->policy = SCHED_NORMAL;
+			p->normal_prio = normal_prio(p);
+		}
+
+		if (PRIO_TO_NICE(p->static_prio) < 0) {
+			p->static_prio = NICE_TO_PRIO(0);
+			p->normal_prio = p->static_prio;
+		}
+
+		/*
+		 * We don't need the reset flag anymore after the fork. It has
+		 * fulfilled its duty:
+		 */
+		p->sched_reset_on_fork = 0;
+	}
+
+	/*
+	 * Silence PROVE_RCU.
+	 */
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	set_task_cpu(p, cpu);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+#ifdef CONFIG_SCHED_INFO
+	if (unlikely(sched_info_on()))
+		memset(&p->sched_info, 0, sizeof(p->sched_info));
+#endif
+	init_task_preempt_count(p);
+
+	put_cpu();
+	return 0;
+}
+
+#ifdef CONFIG_SCHEDSTATS
+
+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+static bool __initdata __sched_schedstats = false;
+
+static void set_schedstats(bool enabled)
+{
+	if (enabled)
+		static_branch_enable(&sched_schedstats);
+	else
+		static_branch_disable(&sched_schedstats);
+}
+
+void force_schedstat_enabled(void)
+{
+	if (!schedstat_enabled()) {
+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
+		static_branch_enable(&sched_schedstats);
+	}
+}
+
+static int __init setup_schedstats(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+
+	/*
+	 * This code is called before jump labels have been set up, so we can't
+	 * change the static branch directly just yet.  Instead set a temporary
+	 * variable so init_schedstats() can do it later.
+	 */
+	if (!strcmp(str, "enable")) {
+		__sched_schedstats = true;
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		__sched_schedstats = false;
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("Unable to parse schedstats=\n");
+
+	return ret;
+}
+__setup("schedstats=", setup_schedstats);
+
+static void __init init_schedstats(void)
+{
+	set_schedstats(__sched_schedstats);
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_schedstats(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+	int err;
+	int state = static_branch_likely(&sched_schedstats);
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	t = *table;
+	t.data = &state;
+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (write)
+		set_schedstats(state);
+	return err;
+}
+#endif /* CONFIG_PROC_SYSCTL */
+#else  /* !CONFIG_SCHEDSTATS */
+static inline void init_schedstats(void) {}
+#endif /* CONFIG_SCHEDSTATS */
+
+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p);
+
+static void account_task_cpu(struct rq *rq, struct task_struct *p)
+{
+	update_clocks(rq);
+	/* This isn't really a context switch but accounting is the same */
+	update_cpu_clock_switch(rq, p);
+	p->last_ran = rq->niffies;
+}
+
+bool sched_smp_initialized __read_mostly;
+
+static inline int hrexpiry_enabled(struct rq *rq)
+{
+	if (unlikely(!cpu_active(cpu_of(rq)) || !sched_smp_initialized))
+		return 0;
+	return hrtimer_is_hres_active(&rq->hrexpiry_timer);
+}
+
+/*
+ * Use HR-timers to deliver accurate preemption points.
+ */
+static inline void hrexpiry_clear(struct rq *rq)
+{
+	if (!hrexpiry_enabled(rq))
+		return;
+	if (hrtimer_active(&rq->hrexpiry_timer))
+		hrtimer_cancel(&rq->hrexpiry_timer);
+}
+
+/*
+ * High-resolution time_slice expiry.
+ * Runs from hardirq context with interrupts disabled.
+ */
+static enum hrtimer_restart hrexpiry(struct hrtimer *timer)
+{
+	struct rq *rq = container_of(timer, struct rq, hrexpiry_timer);
+	struct task_struct *p;
+
+	/* This can happen during CPU hotplug / resume */
+	if (unlikely(cpu_of(rq) != smp_processor_id()))
+		goto out;
+
+	/*
+	 * We're doing this without the runqueue lock but this should always
+	 * be run on the local CPU. Time slice should run out in __schedule
+	 * but we set it to zero here in case niffies is slightly less.
+	 */
+	p = rq->curr;
+	p->time_slice = 0;
+	__set_tsk_resched(p);
+out:
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Called to set the hrexpiry timer state.
+ *
+ * called with irqs disabled from the local CPU only
+ */
+static void hrexpiry_start(struct rq *rq, u64 delay)
+{
+	if (!hrexpiry_enabled(rq))
+		return;
+
+	hrtimer_start(&rq->hrexpiry_timer, ns_to_ktime(delay),
+		      HRTIMER_MODE_REL_PINNED);
+}
+
+static void init_rq_hrexpiry(struct rq *rq)
+{
+	hrtimer_init(&rq->hrexpiry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rq->hrexpiry_timer.function = hrexpiry;
+}
+
+static inline int rq_dither(struct rq *rq)
+{
+	if (!hrexpiry_enabled(rq))
+		return HALF_JIFFY_US;
+	return 0;
+}
+
+/*
+ * wake_up_new_task - wake up a newly created task for the first time.
+ *
+ * This function will do some initial scheduler statistics housekeeping
+ * that must be done for every newly created context, then puts the task
+ * on the runqueue and wakes it.
+ */
+void wake_up_new_task(struct task_struct *p)
+{
+	struct task_struct *parent, *rq_curr;
+	struct rq *rq, *new_rq;
+	unsigned long flags;
+
+	parent = p->parent;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	p->state = TASK_RUNNING;
+	/* Task_rq can't change yet on a new task */
+	new_rq = rq = task_rq(p);
+	if (unlikely(needs_other_cpu(p, task_cpu(p)))) {
+		set_task_cpu(p, valid_task_cpu(p));
+		new_rq = task_rq(p);
+	}
+
+	double_rq_lock(rq, new_rq);
+	rq_curr = rq->curr;
+
+	/*
+	 * Make sure we do not leak PI boosting priority to the child.
+	 */
+	p->prio = rq_curr->normal_prio;
+
+	trace_sched_wakeup_new(p);
+
+	/*
+	 * Share the timeslice between parent and child, thus the
+	 * total amount of pending timeslices in the system doesn't change,
+	 * resulting in more scheduling fairness. If it's negative, it won't
+	 * matter since that's the same as being 0. rq->rq_deadline is only
+	 * modified within schedule() so it is always equal to
+	 * current->deadline.
+	 */
+	account_task_cpu(rq, rq_curr);
+	p->last_ran = rq_curr->last_ran;
+	if (likely(rq_curr->policy != SCHED_FIFO)) {
+		rq_curr->time_slice /= 2;
+		if (rq_curr->time_slice < RESCHED_US) {
+			/*
+			 * Forking task has run out of timeslice. Reschedule it and
+			 * start its child with a new time slice and deadline. The
+			 * child will end up running first because its deadline will
+			 * be slightly earlier.
+			 */
+			__set_tsk_resched(rq_curr);
+			time_slice_expired(p, new_rq);
+			if (suitable_idle_cpus(p))
+				resched_best_idle(p, task_cpu(p));
+			else if (unlikely(rq != new_rq))
+				try_preempt(p, new_rq);
+		} else {
+			p->time_slice = rq_curr->time_slice;
+			if (rq_curr == parent && rq == new_rq && !suitable_idle_cpus(p)) {
+				/*
+				 * The VM isn't cloned, so we're in a good position to
+				 * do child-runs-first in anticipation of an exec. This
+				 * usually avoids a lot of COW overhead.
+				 */
+				__set_tsk_resched(rq_curr);
+			} else {
+				/*
+				 * Adjust the hrexpiry since rq_curr will keep
+				 * running and its timeslice has been shortened.
+				 */
+				hrexpiry_start(rq, US_TO_NS(rq_curr->time_slice));
+				try_preempt(p, new_rq);
+			}
+		}
+	} else {
+		time_slice_expired(p, new_rq);
+		try_preempt(p, new_rq);
+	}
+	activate_task(p, new_rq);
+	double_rq_unlock(rq, new_rq);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+}
+
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE;
+
+void preempt_notifier_inc(void)
+{
+	static_key_slow_inc(&preempt_notifier_key);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
+
+void preempt_notifier_dec(void)
+{
+	static_key_slow_dec(&preempt_notifier_key);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
+
+/**
+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
+ * @notifier: notifier struct to register
+ */
+void preempt_notifier_register(struct preempt_notifier *notifier)
+{
+	if (!static_key_false(&preempt_notifier_key))
+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
+
+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_register);
+
+/**
+ * preempt_notifier_unregister - no longer interested in preemption notifications
+ * @notifier: notifier struct to unregister
+ *
+ * This is *not* safe to call from within a preemption notifier.
+ */
+void preempt_notifier_unregister(struct preempt_notifier *notifier)
+{
+	hlist_del(&notifier->link);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
+
+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+	struct preempt_notifier *notifier;
+
+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
+}
+
+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+	if (static_key_false(&preempt_notifier_key))
+		__fire_sched_in_preempt_notifiers(curr);
+}
+
+static void
+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
+				 struct task_struct *next)
+{
+	struct preempt_notifier *notifier;
+
+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
+		notifier->ops->sched_out(notifier, next);
+}
+
+static __always_inline void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+				 struct task_struct *next)
+{
+	if (static_key_false(&preempt_notifier_key))
+		__fire_sched_out_preempt_notifiers(curr, next);
+}
+
+#else /* !CONFIG_PREEMPT_NOTIFIERS */
+
+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+}
+
+static inline void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+				 struct task_struct *next)
+{
+}
+
+#endif /* CONFIG_PREEMPT_NOTIFIERS */
+
+static inline void prepare_task(struct task_struct *next)
+{
+	/*
+	 * Claim the task as running, we do this before switching to it
+	 * such that any running task will have this set.
+	 */
+	next->on_cpu = 1;
+}
+
+static inline void finish_task(struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 *
+	 * In particular, the load of prev->state in finish_task_switch() must
+	 * happen before this.
+	 *
+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
+	 */
+	smp_store_release(&prev->on_cpu, 0);
+#endif
+}
+
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+	/*
+	 * Since the runqueue lock will be released by the next
+	 * task (which is an invalid locking op but in the case
+	 * of the scheduler it's an obvious special-case), so we
+	 * do an early lockdep release here:
+	 */
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+#ifdef CONFIG_DEBUG_SPINLOCK
+	/* this is a valid case when another task releases the spinlock */
+	rq->lock.owner = next;
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+	/*
+	 * If we are tracking spinlock dependencies then we have to
+	 * fix up the runqueue lock - which gets 'carried over' from
+	 * prev into current:
+	 */
+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
+#ifdef CONFIG_SMP
+	/*
+	 * If prev was marked as migrating to another CPU in return_task, drop
+	 * the local runqueue lock but leave interrupts disabled and grab the
+	 * remote lock we're migrating it to before enabling them.
+	 */
+	if (unlikely(task_on_rq_migrating(prev))) {
+		sched_info_dequeued(rq, prev);
+		/*
+		 * We move the ownership of prev to the new cpu now. ttwu can't
+		 * activate prev to the wrong cpu since it has to grab this
+		 * runqueue in ttwu_remote.
+		 */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+		prev->cpu = prev->wake_cpu;
+#else
+		task_thread_info(prev)->cpu = prev->wake_cpu;
+#endif
+		raw_spin_unlock(rq->lock);
+
+		raw_spin_lock(&prev->pi_lock);
+		rq = __task_rq_lock(prev);
+		/* Check that someone else hasn't already queued prev */
+		if (likely(!task_queued(prev))) {
+			enqueue_task(rq, prev, 0);
+			prev->on_rq = TASK_ON_RQ_QUEUED;
+			/* Wake up the CPU if it's not already running */
+			resched_if_idle(rq);
+		}
+		raw_spin_unlock(&prev->pi_lock);
+	}
+#endif
+	rq_unlock(rq);
+
+	do_pending_softirq(rq, current);
+
+	local_irq_enable();
+}
+
+/**
+ * prepare_task_switch - prepare to switch tasks
+ * @rq: the runqueue preparing to switch
+ * @next: the task we are going to switch to.
+ *
+ * This is called with the rq lock held and interrupts off. It must
+ * be paired with a subsequent finish_task_switch after the context
+ * switch.
+ *
+ * prepare_task_switch sets up locking and calls architecture specific
+ * hooks.
+ */
+static inline void
+prepare_task_switch(struct rq *rq, struct task_struct *prev,
+		    struct task_struct *next)
+{
+	sched_info_switch(rq, prev, next);
+	perf_event_task_sched_out(prev, next);
+	fire_sched_out_preempt_notifiers(prev, next);
+	prepare_task(next);
+	prepare_arch_switch(next);
+}
+
+/**
+ * finish_task_switch - clean up after a task-switch
+ * @rq: runqueue associated with task-switch
+ * @prev: the thread we just switched away from.
+ *
+ * finish_task_switch must be called after the context switch, paired
+ * with a prepare_task_switch call before the context switch.
+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
+ * and do any other architecture-specific cleanup actions.
+ *
+ * Note that we may have delayed dropping an mm in context_switch(). If
+ * so, we finish that here outside of the runqueue lock.  (Doing it
+ * with the lock held can cause deadlocks; see schedule() for
+ * details.)
+ *
+ * The context switch have flipped the stack from under us and restored the
+ * local variables which were saved when this task called schedule() in the
+ * past. prev == current is still correct but we need to recalculate this_rq
+ * because prev may have moved to another CPU.
+ */
+static void finish_task_switch(struct task_struct *prev)
+	__releases(rq->lock)
+{
+	struct rq *rq = this_rq();
+	struct mm_struct *mm = rq->prev_mm;
+	long prev_state;
+
+	/*
+	 * The previous task will have left us with a preempt_count of 2
+	 * because it left us after:
+	 *
+	 *	schedule()
+	 *	  preempt_disable();			// 1
+	 *	  __schedule()
+	 *	    raw_spin_lock_irq(rq->lock)	// 2
+	 *
+	 * Also, see FORK_PREEMPT_COUNT.
+	 */
+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
+		      "corrupted preempt_count: %s/%d/0x%x\n",
+		      current->comm, current->pid, preempt_count()))
+		preempt_count_set(FORK_PREEMPT_COUNT);
+
+	rq->prev_mm = NULL;
+
+	/*
+	 * A task struct has one reference for the use as "current".
+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
+	 * schedule one last time. The schedule call will never return, and
+	 * the scheduled task must drop that reference.
+	 *
+	 * We must observe prev->state before clearing prev->on_cpu (in
+	 * finish_task), otherwise a concurrent wakeup can get prev
+	 * running on another CPU and we could rave with its RUNNING -> DEAD
+	 * transition, resulting in a double drop.
+	 */
+	prev_state = prev->state;
+	vtime_task_switch(prev);
+	perf_event_task_sched_in(prev, current);
+	finish_task(prev);
+	finish_lock_switch(rq, prev);
+	finish_arch_post_lock_switch();
+
+	fire_sched_in_preempt_notifiers(current);
+	/*
+	 * When switching through a kernel thread, the loop in
+	 * membarrier_{private,global}_expedited() may have observed that
+	 * kernel thread and not issued an IPI. It is therefore possible to
+	 * schedule between user->kernel->user threads without passing though
+	 * switch_mm(). Membarrier requires a barrier after storing to
+	 * rq->curr, before returning to userspace, so provide them here:
+	 *
+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
+	 *   provided by mmdrop(),
+	 * - a sync_core for SYNC_CORE.
+	 */
+	if (mm) {
+		membarrier_mm_sync_core_before_usermode(mm);
+		mmdrop(mm);
+	}
+	if (unlikely(prev_state == TASK_DEAD)) {
+		/*
+		 * Remove function-return probe instances associated with this
+		 * task and put them back on the free list.
+		 */
+		kprobe_flush_task(prev);
+
+		/* Task is done with its stack. */
+		put_task_stack(prev);
+
+		put_task_struct(prev);
+	}
+}
+
+/**
+ * schedule_tail - first thing a freshly forked thread must call.
+ * @prev: the thread we just switched away from.
+ */
+asmlinkage __visible void schedule_tail(struct task_struct *prev)
+{
+	/*
+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
+	 * finish_task_switch() for details.
+	 *
+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
+	 * and the preempt_enable() will end up enabling preemption (on
+	 * PREEMPT_COUNT kernels).
+	 */
+
+	finish_task_switch(prev);
+	preempt_enable();
+
+	if (current->set_child_tid)
+		put_user(task_pid_vnr(current), current->set_child_tid);
+}
+
+/*
+ * context_switch - switch to the new MM and the new thread's register state.
+ */
+static __always_inline void
+context_switch(struct rq *rq, struct task_struct *prev,
+	       struct task_struct *next)
+{
+	struct mm_struct *mm, *oldmm;
+
+	prepare_task_switch(rq, prev, next);
+
+	mm = next->mm;
+	oldmm = prev->active_mm;
+	/*
+	 * For paravirt, this is coupled with an exit in switch_to to
+	 * combine the page table reload and the switch backend into
+	 * one hypercall.
+	 */
+	arch_start_context_switch(prev);
+
+	/*
+	 * If mm is non-NULL, we pass through switch_mm(). If mm is
+	 * NULL, we will pass through mmdrop() in finish_task_switch().
+	 * Both of these contain the full memory barrier required by
+	 * membarrier after storing to rq->curr, before returning to
+	 * user-space.
+	 */
+	if (!mm) {
+		next->active_mm = oldmm;
+		mmgrab(oldmm);
+		enter_lazy_tlb(oldmm, next);
+	} else
+		switch_mm_irqs_off(oldmm, mm, next);
+
+	if (!prev->mm) {
+		prev->active_mm = NULL;
+		rq->prev_mm = oldmm;
+	}
+	prepare_lock_switch(rq, next);
+
+	/* Here we just switch the register state and the stack. */
+	switch_to(prev, next, prev);
+	barrier();
+
+	finish_task_switch(prev);
+}
+
+/*
+ * nr_running, nr_uninterruptible and nr_context_switches:
+ *
+ * externally visible scheduler statistics: current number of runnable
+ * threads, total number of context switches performed since bootup.
+ */
+unsigned long nr_running(void)
+{
+	unsigned long i, sum = 0;
+
+	for_each_online_cpu(i)
+		sum += cpu_rq(i)->nr_running;
+
+	return sum;
+}
+
+static unsigned long nr_uninterruptible(void)
+{
+	unsigned long i, sum = 0;
+
+	for_each_online_cpu(i)
+		sum += cpu_rq(i)->nr_uninterruptible;
+
+	return sum;
+}
+
+/*
+ * Check if only the current task is running on the CPU.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
+ */
+bool single_task_running(void)
+{
+	struct rq *rq = cpu_rq(smp_processor_id());
+
+	if (rq_load(rq) == 1)
+		return true;
+	else
+		return false;
+}
+EXPORT_SYMBOL(single_task_running);
+
+unsigned long long nr_context_switches(void)
+{
+	int i;
+	unsigned long long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += cpu_rq(i)->nr_switches;
+
+	return sum;
+}
+
+/*
+ * IO-wait accounting, and how its mostly bollocks (on SMP).
+ *
+ * The idea behind IO-wait account is to account the idle time that we could
+ * have spend running if it were not for IO. That is, if we were to improve the
+ * storage performance, we'd have a proportional reduction in IO-wait time.
+ *
+ * This all works nicely on UP, where, when a task blocks on IO, we account
+ * idle time as IO-wait, because if the storage were faster, it could've been
+ * running and we'd not be idle.
+ *
+ * This has been extended to SMP, by doing the same for each CPU. This however
+ * is broken.
+ *
+ * Imagine for instance the case where two tasks block on one CPU, only the one
+ * CPU will have IO-wait accounted, while the other has regular idle. Even
+ * though, if the storage were faster, both could've ran at the same time,
+ * utilising both CPUs.
+ *
+ * This means, that when looking globally, the current IO-wait accounting on
+ * SMP is a lower bound, by reason of under accounting.
+ *
+ * Worse, since the numbers are provided per CPU, they are sometimes
+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
+ * associated with any one particular CPU, it can wake to another CPU than it
+ * blocked on. This means the per CPU IO-wait number is meaningless.
+ *
+ * Task CPU affinities can make all that even more 'interesting'.
+ */
+
+unsigned long nr_iowait(void)
+{
+	unsigned long i, sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+
+	return sum;
+}
+
+/*
+ * Consumers of these two interfaces, like for example the cpufreq menu
+ * governor are using nonsensical data. Boosting frequency for a CPU that has
+ * IO-wait which might not even end up running the task when it does become
+ * runnable.
+ */
+
+unsigned long nr_iowait_cpu(int cpu)
+{
+	struct rq *this = cpu_rq(cpu);
+	return atomic_read(&this->nr_iowait);
+}
+
+unsigned long nr_active(void)
+{
+	return nr_running() + nr_uninterruptible();
+}
+
+/*
+ * I/O wait is the number of running or queued tasks with their ->rq pointer
+ * set to this cpu as being the CPU they're more likely to run on.
+ */
+void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
+{
+	struct rq *rq = this_rq();
+
+	*nr_waiters = atomic_read(&rq->nr_iowait);
+	*load = rq_load(rq);
+}
+
+/* Variables and functions for calc_load */
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+/**
+ * get_avenrun - get the load average array
+ * @loads:	pointer to dest load array
+ * @offset:	offset to add
+ * @shift:	shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+{
+	loads[0] = (avenrun[0] + offset) << shift;
+	loads[1] = (avenrun[1] + offset) << shift;
+	loads[2] = (avenrun[2] + offset) << shift;
+}
+
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	unsigned long newload;
+
+	newload = load * exp + active * (FIXED_1 - exp);
+	if (active >= load)
+		newload += FIXED_1-1;
+
+	return newload / FIXED_1;
+}
+
+/*
+ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
+ */
+void calc_global_load(unsigned long ticks)
+{
+	long active;
+
+	if (time_before(jiffies, READ_ONCE(calc_load_update)))
+		return;
+	active = nr_active() * FIXED_1;
+
+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+	calc_load_update = jiffies + LOAD_FREQ;
+}
+
+DEFINE_PER_CPU(struct kernel_stat, kstat);
+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
+
+EXPORT_PER_CPU_SYMBOL(kstat);
+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
+
+#ifdef CONFIG_PARAVIRT
+static inline u64 steal_ticks(u64 steal)
+{
+	if (unlikely(steal > NSEC_PER_SEC))
+		return div_u64(steal, TICK_NSEC);
+
+	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
+}
+#endif
+
+#ifndef nsecs_to_cputime
+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
+#endif
+
+/*
+ * On each tick, add the number of nanoseconds to the unbanked variables and
+ * once one tick's worth has accumulated, account it allowing for accurate
+ * sub-tick accounting and totals.
+ */
+static void pc_idle_time(struct rq *rq, struct task_struct *idle, unsigned long ns)
+{
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long ticks;
+
+	if (atomic_read(&rq->nr_iowait) > 0) {
+		rq->iowait_ns += ns;
+		if (rq->iowait_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->iowait_ns);
+			cpustat[CPUTIME_IOWAIT] += (__force u64)TICK_NSEC * ticks;
+			rq->iowait_ns %= JIFFY_NS;
+		}
+	} else {
+		rq->idle_ns += ns;
+		if (rq->idle_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->idle_ns);
+			cpustat[CPUTIME_IDLE] += (__force u64)TICK_NSEC * ticks;
+			rq->idle_ns %= JIFFY_NS;
+		}
+	}
+	acct_update_integrals(idle);
+}
+
+static void pc_system_time(struct rq *rq, struct task_struct *p,
+			   int hardirq_offset, unsigned long ns)
+{
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long ticks;
+
+	p->stime_ns += ns;
+	if (p->stime_ns >= JIFFY_NS) {
+		ticks = NS_TO_JIFFIES(p->stime_ns);
+		p->stime_ns %= JIFFY_NS;
+		p->stime += (__force u64)TICK_NSEC * ticks;
+		account_group_system_time(p, TICK_NSEC * ticks);
+	}
+	p->sched_time += ns;
+	account_group_exec_runtime(p, ns);
+
+	if (hardirq_count() - hardirq_offset) {
+		rq->irq_ns += ns;
+		if (rq->irq_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->irq_ns);
+			cpustat[CPUTIME_IRQ] += (__force u64)TICK_NSEC * ticks;
+			rq->irq_ns %= JIFFY_NS;
+		}
+	} else if (in_serving_softirq()) {
+		rq->softirq_ns += ns;
+		if (rq->softirq_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_NSEC * ticks;
+			rq->softirq_ns %= JIFFY_NS;
+		}
+	} else {
+		rq->system_ns += ns;
+		if (rq->system_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->system_ns);
+			cpustat[CPUTIME_SYSTEM] += (__force u64)TICK_NSEC * ticks;
+			rq->system_ns %= JIFFY_NS;
+		}
+	}
+	acct_update_integrals(p);
+}
+
+static void pc_user_time(struct rq *rq, struct task_struct *p, unsigned long ns)
+{
+	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long ticks;
+
+	p->utime_ns += ns;
+	if (p->utime_ns >= JIFFY_NS) {
+		ticks = NS_TO_JIFFIES(p->utime_ns);
+		p->utime_ns %= JIFFY_NS;
+		p->utime += (__force u64)TICK_NSEC * ticks;
+		account_group_user_time(p, TICK_NSEC * ticks);
+	}
+	p->sched_time += ns;
+	account_group_exec_runtime(p, ns);
+
+	if (this_cpu_ksoftirqd() == p) {
+		/*
+		 * ksoftirqd time do not get accounted in cpu_softirq_time.
+		 * So, we have to handle it separately here.
+		 */
+		rq->softirq_ns += ns;
+		if (rq->softirq_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->softirq_ns);
+			cpustat[CPUTIME_SOFTIRQ] += (__force u64)TICK_NSEC * ticks;
+			rq->softirq_ns %= JIFFY_NS;
+		}
+	}
+
+	if (task_nice(p) > 0 || idleprio_task(p)) {
+		rq->nice_ns += ns;
+		if (rq->nice_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->nice_ns);
+			cpustat[CPUTIME_NICE] += (__force u64)TICK_NSEC * ticks;
+			rq->nice_ns %= JIFFY_NS;
+		}
+	} else {
+		rq->user_ns += ns;
+		if (rq->user_ns >= JIFFY_NS) {
+			ticks = NS_TO_JIFFIES(rq->user_ns);
+			cpustat[CPUTIME_USER] += (__force u64)TICK_NSEC * ticks;
+			rq->user_ns %= JIFFY_NS;
+		}
+	}
+	acct_update_integrals(p);
+}
+
+/*
+ * This is called on clock ticks.
+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
+ * CPU scheduler quota accounting is also performed here in microseconds.
+ */
+static void update_cpu_clock_tick(struct rq *rq, struct task_struct *p)
+{
+	s64 account_ns = rq->niffies - p->last_ran;
+	struct task_struct *idle = rq->idle;
+
+	/* Accurate tick timekeeping */
+	if (user_mode(get_irq_regs()))
+		pc_user_time(rq, p, account_ns);
+	else if (p != idle || (irq_count() != HARDIRQ_OFFSET)) {
+		pc_system_time(rq, p, HARDIRQ_OFFSET, account_ns);
+	} else
+		pc_idle_time(rq, idle, account_ns);
+
+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
+	if (p->policy != SCHED_FIFO && p != idle)
+		p->time_slice -= NS_TO_US(account_ns);
+
+	p->last_ran = rq->niffies;
+}
+
+/*
+ * This is called on context switches.
+ * Bank in p->sched_time the ns elapsed since the last tick or switch.
+ * CPU scheduler quota accounting is also performed here in microseconds.
+ */
+static void update_cpu_clock_switch(struct rq *rq, struct task_struct *p)
+{
+	s64 account_ns = rq->niffies - p->last_ran;
+	struct task_struct *idle = rq->idle;
+
+	/* Accurate subtick timekeeping */
+	if (p != idle)
+		pc_user_time(rq, p, account_ns);
+	else
+		pc_idle_time(rq, idle, account_ns);
+
+	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
+	if (p->policy != SCHED_FIFO && p != idle)
+		p->time_slice -= NS_TO_US(account_ns);
+}
+
+/*
+ * Return any ns on the sched_clock that have not yet been accounted in
+ * @p in case that task is currently running.
+ *
+ * Called with task_rq_lock(p) held.
+ */
+static inline u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+	u64 ns = 0;
+
+	/*
+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
+	 * project cycles that may never be accounted to this
+	 * thread, breaking clock_gettime().
+	 */
+	if (p == rq->curr && task_on_rq_queued(p)) {
+		update_clocks(rq);
+		ns = rq->niffies - p->last_ran;
+	}
+
+	return ns;
+}
+
+/*
+ * Return accounted runtime for the task.
+ * Return separately the current's pending runtime that have not been
+ * accounted yet.
+ *
+ */
+unsigned long long task_sched_runtime(struct task_struct *p)
+{
+	unsigned long flags;
+	struct rq *rq;
+	u64 ns;
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+	/*
+	 * 64-bit doesn't need locks to atomically read a 64bit value.
+	 * So we have a optimization chance when the task's delta_exec is 0.
+	 * Reading ->on_cpu is racy, but this is ok.
+	 *
+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
+	 * If we race with it entering CPU, unaccounted time is 0. This is
+	 * indistinguishable from the read occurring a few cycles earlier.
+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
+	 * been accounted, so we're correct here as well.
+	 */
+	if (!p->on_cpu || !task_on_rq_queued(p))
+		return tsk_seruntime(p);
+#endif
+
+	rq = task_rq_lock(p, &flags);
+	ns = p->sched_time + do_task_delta_exec(p, rq);
+	task_rq_unlock(rq, p, &flags);
+
+	return ns;
+}
+
+/*
+ * Functions to test for when SCHED_ISO tasks have used their allocated
+ * quota as real time scheduling and convert them back to SCHED_NORMAL. All
+ * data is modified only by the local runqueue during scheduler_tick with
+ * interrupts disabled.
+ */
+
+/*
+ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
+ * tasks and set the refractory flag if necessary. There is 10% hysteresis
+ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
+ * slow division.
+ */
+static inline void iso_tick(struct rq *rq)
+{
+	rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
+	rq->iso_ticks += 100;
+	if (rq->iso_ticks > ISO_PERIOD * sched_iso_cpu) {
+		rq->iso_refractory = true;
+		if (unlikely(rq->iso_ticks > ISO_PERIOD * 100))
+			rq->iso_ticks = ISO_PERIOD * 100;
+	}
+}
+
+/* No SCHED_ISO task was running so decrease rq->iso_ticks */
+static inline void no_iso_tick(struct rq *rq, int ticks)
+{
+	if (rq->iso_ticks > 0 || rq->iso_refractory) {
+		rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - ticks) / ISO_PERIOD;
+		if (rq->iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) {
+			rq->iso_refractory = false;
+			if (unlikely(rq->iso_ticks < 0))
+				rq->iso_ticks = 0;
+		}
+	}
+}
+
+/* This manages tasks that have run out of timeslice during a scheduler_tick */
+static void task_running_tick(struct rq *rq)
+{
+	struct task_struct *p = rq->curr;
+
+	/*
+	 * If a SCHED_ISO task is running we increment the iso_ticks. In
+	 * order to prevent SCHED_ISO tasks from causing starvation in the
+	 * presence of true RT tasks we account those as iso_ticks as well.
+	 */
+	if (rt_task(p) || task_running_iso(p))
+		iso_tick(rq);
+	else
+		no_iso_tick(rq, 1);
+
+	/* SCHED_FIFO tasks never run out of timeslice. */
+	if (p->policy == SCHED_FIFO)
+		return;
+
+	if (iso_task(p)) {
+		if (task_running_iso(p)) {
+			if (rq->iso_refractory) {
+				/*
+				 * SCHED_ISO task is running as RT and limit
+				 * has been hit. Force it to reschedule as
+				 * SCHED_NORMAL by zeroing its time_slice
+				 */
+				p->time_slice = 0;
+			}
+		} else if (!rq->iso_refractory) {
+			/* Can now run again ISO. Reschedule to pick up prio */
+			goto out_resched;
+		}
+	}
+
+	/*
+	 * Tasks that were scheduled in the first half of a tick are not
+	 * allowed to run into the 2nd half of the next tick if they will
+	 * run out of time slice in the interim. Otherwise, if they have
+	 * less than RESCHED_US μs of time slice left they will be rescheduled.
+	 * Dither is used as a backup for when hrexpiry is disabled or high res
+	 * timers not configured in.
+	 */
+	if (p->time_slice - rq->dither >= RESCHED_US)
+		return;
+out_resched:
+	rq_lock(rq);
+	__set_tsk_resched(p);
+	rq_unlock(rq);
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+/*
+ * We can stop the timer tick any time highres timers are active since
+ * we rely entirely on highres timeouts for task expiry rescheduling.
+ */
+static void sched_stop_tick(struct rq *rq, int cpu)
+{
+	if (!hrexpiry_enabled(rq))
+		return;
+	if (!tick_nohz_full_enabled())
+		return;
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+
+static inline void sched_start_tick(struct rq *rq, int cpu)
+{
+	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+
+/**
+ * scheduler_tick_max_deferment
+ *
+ * Keep at least one tick per second when a single
+ * active task is running.
+ *
+ * This makes sure that uptime continues to move forward, even
+ * with a very low granularity.
+ *
+ * Return: Maximum deferment in nanoseconds.
+ */
+u64 scheduler_tick_max_deferment(void)
+{
+	struct rq *rq = this_rq();
+	unsigned long next, now = READ_ONCE(jiffies);
+
+	next = rq->last_jiffy + HZ;
+
+	if (time_before_eq(next, now))
+		return 0;
+
+	return jiffies_to_nsecs(next - now);
+}
+#else
+static inline void sched_stop_tick(struct rq *rq, int cpu)
+{
+}
+
+static inline void sched_start_tick(struct rq *rq, int cpu)
+{
+}
+#endif
+
+/*
+ * This function gets called by the timer code, with HZ frequency.
+ * We call it with interrupts disabled.
+ */
+void scheduler_tick(void)
+{
+	int cpu __maybe_unused = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+
+	sched_clock_tick();
+	update_clocks(rq);
+	update_load_avg(rq, 0);
+	update_cpu_clock_tick(rq, rq->curr);
+	if (!rq_idle(rq))
+		task_running_tick(rq);
+	else if (rq->last_jiffy > rq->last_scheduler_tick)
+		no_iso_tick(rq, rq->last_jiffy - rq->last_scheduler_tick);
+	rq->last_scheduler_tick = rq->last_jiffy;
+	rq->last_tick = rq->clock;
+	perf_event_task_tick();
+	sched_stop_tick(rq, cpu);
+}
+
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+				defined(CONFIG_PREEMPT_TRACER))
+/*
+ * If the value passed in is equal to the current preempt count
+ * then we just disabled preemption. Start timing the latency.
+ */
+static inline void preempt_latency_start(int val)
+{
+	if (preempt_count() == val) {
+		unsigned long ip = get_lock_parent_ip();
+#ifdef CONFIG_DEBUG_PREEMPT
+		current->preempt_disable_ip = ip;
+#endif
+		trace_preempt_off(CALLER_ADDR0, ip);
+	}
+}
+
+void preempt_count_add(int val)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	/*
+	 * Underflow?
+	 */
+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
+		return;
+#endif
+	__preempt_count_add(val);
+#ifdef CONFIG_DEBUG_PREEMPT
+	/*
+	 * Spinlock count overflowing soon?
+	 */
+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
+				PREEMPT_MASK - 10);
+#endif
+	preempt_latency_start(val);
+}
+EXPORT_SYMBOL(preempt_count_add);
+NOKPROBE_SYMBOL(preempt_count_add);
+
+/*
+ * If the value passed in equals to the current preempt count
+ * then we just enabled preemption. Stop timing the latency.
+ */
+static inline void preempt_latency_stop(int val)
+{
+	if (preempt_count() == val)
+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+}
+
+void preempt_count_sub(int val)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	/*
+	 * Underflow?
+	 */
+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+		return;
+	/*
+	 * Is the spinlock portion underflowing?
+	 */
+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
+			!(preempt_count() & PREEMPT_MASK)))
+		return;
+#endif
+
+	preempt_latency_stop(val);
+	__preempt_count_sub(val);
+}
+EXPORT_SYMBOL(preempt_count_sub);
+NOKPROBE_SYMBOL(preempt_count_sub);
+
+#else
+static inline void preempt_latency_start(int val) { }
+static inline void preempt_latency_stop(int val) { }
+#endif
+
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	return p->preempt_disable_ip;
+#else
+	return 0;
+#endif
+}
+
+/*
+ * The time_slice is only refilled when it is empty and that is when we set a
+ * new deadline. Make sure update_clocks has been called recently to update
+ * rq->niffies.
+ */
+static void time_slice_expired(struct task_struct *p, struct rq *rq)
+{
+	p->time_slice = timeslice();
+	p->deadline = rq->niffies + task_deadline_diff(p);
+#ifdef CONFIG_SMT_NICE
+	if (!p->mm)
+		p->smt_bias = 0;
+	else if (rt_task(p))
+		p->smt_bias = 1 << 30;
+	else if (task_running_iso(p))
+		p->smt_bias = 1 << 29;
+	else if (idleprio_task(p)) {
+		if (task_running_idle(p))
+			p->smt_bias = 0;
+		else
+			p->smt_bias = 1;
+	} else if (--p->smt_bias < 1)
+		p->smt_bias = MAX_PRIO - p->static_prio;
+#endif
+}
+
+/*
+ * Timeslices below RESCHED_US are considered as good as expired as there's no
+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
+ * have been flagged be not latency sensitive and likely to be fully CPU
+ * bound so every time they're rescheduled they have their time_slice
+ * refilled, but get a new later deadline to have little effect on
+ * SCHED_NORMAL tasks.
+
+ */
+static inline void check_deadline(struct task_struct *p, struct rq *rq)
+{
+	if (p->time_slice < RESCHED_US || batch_task(p))
+		time_slice_expired(p, rq);
+}
+
+/*
+ * Task selection with skiplists is a simple matter of picking off the first
+ * task in the sorted list, an O(1) operation. The lookup is amortised O(1)
+ * being bound to the number of processors.
+ *
+ * Runqueues are selectively locked based on their unlocked data and then
+ * unlocked if not needed. At most 3 locks will be held at any time and are
+ * released as soon as they're no longer needed. All balancing between CPUs
+ * is thus done here in an extremely simple first come best fit manner.
+ *
+ * This iterates over runqueues in cache locality order. In interactive mode
+ * it iterates over all CPUs and finds the task with the best key/deadline.
+ * In non-interactive mode it will only take a task if it's from the current
+ * runqueue or a runqueue with more tasks than the current one with a better
+ * key/deadline.
+ */
+#ifdef CONFIG_SMP
+static inline struct task_struct
+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
+{
+	struct rq *locked = NULL, *chosen = NULL;
+	struct task_struct *edt = idle;
+	int i, best_entries = 0;
+	u64 best_key = ~0ULL;
+
+	for (i = 0; i < total_runqueues; i++) {
+		struct rq *other_rq = rq_order(rq, i);
+		skiplist_node *next;
+		int entries;
+
+		entries = other_rq->sl->entries;
+		/*
+		 * Check for queued entres lockless first. The local runqueue
+		 * is locked so entries will always be accurate.
+		 */
+		if (!sched_interactive) {
+			/*
+			 * Don't reschedule balance across nodes unless the CPU
+			 * is idle.
+			 */
+			if (edt != idle && rq->cpu_locality[other_rq->cpu] > 3)
+				break;
+			if (entries <= best_entries)
+				continue;
+		} else if (!entries)
+			continue;
+
+		/* if (i) implies other_rq != rq */
+		if (i) {
+			/* Check for best id queued lockless first */
+			if (other_rq->best_key >= best_key)
+				continue;
+
+			if (unlikely(!trylock_rq(rq, other_rq)))
+				continue;
+
+			/* Need to reevaluate entries after locking */
+			entries = other_rq->sl->entries;
+			if (unlikely(!entries)) {
+				unlock_rq(other_rq);
+				continue;
+			}
+		}
+
+		next = other_rq->node;
+		/*
+		 * In interactive mode we check beyond the best entry on other
+		 * runqueues if we can't get the best for smt or affinity
+		 * reasons.
+		 */
+		while ((next = next->next[0]) != other_rq->node) {
+			struct task_struct *p;
+			u64 key = next->key;
+
+			/* Reevaluate key after locking */
+			if (key >= best_key)
+				break;
+
+			p = next->value;
+			if (!smt_schedule(p, rq)) {
+				if (i && !sched_interactive)
+					break;
+				continue;
+			}
+
+			if (sched_other_cpu(p, cpu)) {
+				if (sched_interactive || !i)
+					continue;
+				break;
+			}
+			/* Make sure affinity is ok */
+			if (i) {
+				/* From this point on p is the best so far */
+				if (locked)
+					unlock_rq(locked);
+				chosen = locked = other_rq;
+			}
+			best_entries = entries;
+			best_key = key;
+			edt = p;
+			break;
+		}
+		/* rq->preempting is a hint only as the state may have changed
+		 * since it was set with the resched call but if we have met
+		 * the condition we can break out here. */
+		if (edt == rq->preempting)
+			break;
+		if (i && other_rq != chosen)
+			unlock_rq(other_rq);
+	}
+
+	if (likely(edt != idle))
+		take_task(rq, cpu, edt);
+
+	if (locked)
+		unlock_rq(locked);
+
+	rq->preempting = NULL;
+
+	return edt;
+}
+#else /* CONFIG_SMP */
+static inline struct task_struct
+*earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
+{
+	struct task_struct *edt;
+
+	if (unlikely(!rq->sl->entries))
+		return idle;
+	edt = rq->node->next[0]->value;
+	take_task(rq, cpu, edt);
+	return edt;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Print scheduling while atomic bug:
+ */
+static noinline void __schedule_bug(struct task_struct *prev)
+{
+	/* Save this before calling printk(), since that will clobber it */
+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
+
+	if (oops_in_progress)
+		return;
+
+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
+		prev->comm, prev->pid, preempt_count());
+
+	debug_show_held_locks(prev);
+	print_modules();
+	if (irqs_disabled())
+		print_irqtrace_events(prev);
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+	    && in_atomic_preempt_off()) {
+		pr_err("Preemption disabled at:");
+		print_ip_sym(preempt_disable_ip);
+		pr_cont("\n");
+	}
+	dump_stack();
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+}
+
+/*
+ * Various schedule()-time debugging checks and statistics:
+ */
+static inline void schedule_debug(struct task_struct *prev)
+{
+#ifdef CONFIG_SCHED_STACK_END_CHECK
+	if (task_stack_end_corrupted(prev))
+		panic("corrupted stack end detected inside scheduler\n");
+#endif
+
+	if (unlikely(in_atomic_preempt_off())) {
+		__schedule_bug(prev);
+		preempt_count_set(PREEMPT_DISABLED);
+	}
+	rcu_sleep_check();
+
+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
+
+	schedstat_inc(this_rq()->sched_count);
+}
+
+/*
+ * The currently running task's information is all stored in rq local data
+ * which is only modified by the local CPU.
+ */
+static inline void set_rq_task(struct rq *rq, struct task_struct *p)
+{
+	if (p == rq->idle || p->policy == SCHED_FIFO)
+		hrexpiry_clear(rq);
+	else
+		hrexpiry_start(rq, US_TO_NS(p->time_slice));
+	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
+		rq->dither = 0;
+	else
+		rq->dither = rq_dither(rq);
+
+	rq->rq_deadline = p->deadline;
+	rq->rq_prio = p->prio;
+#ifdef CONFIG_SMT_NICE
+	rq->rq_mm = p->mm;
+	rq->rq_smt_bias = p->smt_bias;
+#endif
+}
+
+#ifdef CONFIG_SMT_NICE
+static void check_no_siblings(struct rq __maybe_unused *this_rq) {}
+static void wake_no_siblings(struct rq __maybe_unused *this_rq) {}
+static void (*check_siblings)(struct rq *this_rq) = &check_no_siblings;
+static void (*wake_siblings)(struct rq *this_rq) = &wake_no_siblings;
+
+/* Iterate over smt siblings when we've scheduled a process on cpu and decide
+ * whether they should continue running or be descheduled. */
+static void check_smt_siblings(struct rq *this_rq)
+{
+	int other_cpu;
+
+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
+		struct task_struct *p;
+		struct rq *rq;
+
+		rq = cpu_rq(other_cpu);
+		if (rq_idle(rq))
+			continue;
+		p = rq->curr;
+		if (!smt_schedule(p, this_rq))
+			resched_curr(rq);
+	}
+}
+
+static void wake_smt_siblings(struct rq *this_rq)
+{
+	int other_cpu;
+
+	for_each_cpu(other_cpu, &this_rq->thread_mask) {
+		struct rq *rq;
+
+		rq = cpu_rq(other_cpu);
+		if (rq_idle(rq))
+			resched_idle(rq);
+	}
+}
+#else
+static void check_siblings(struct rq __maybe_unused *this_rq) {}
+static void wake_siblings(struct rq __maybe_unused *this_rq) {}
+#endif
+
+/*
+ * schedule() is the main scheduler function.
+ *
+ * The main means of driving the scheduler and thus entering this function are:
+ *
+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
+ *
+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
+ *      paths. For example, see arch/x86/entry_64.S.
+ *
+ *      To drive preemption between tasks, the scheduler sets the flag in timer
+ *      interrupt handler scheduler_tick().
+ *
+ *   3. Wakeups don't really cause entry into schedule(). They add a
+ *      task to the run-queue and that's it.
+ *
+ *      Now, if the new task added to the run-queue preempts the current
+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
+ *      called on the nearest possible occasion:
+ *
+ *       - If the kernel is preemptible (CONFIG_PREEMPT=y):
+ *
+ *         - in syscall or exception context, at the next outmost
+ *           preempt_enable(). (this might be as soon as the wake_up()'s
+ *           spin_unlock()!)
+ *
+ *         - in IRQ context, return from interrupt-handler to
+ *           preemptible context
+ *
+ *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
+ *         then at the next:
+ *
+ *          - cond_resched() call
+ *          - explicit schedule() call
+ *          - return from syscall or exception to user-space
+ *          - return from interrupt-handler to user-space
+ *
+ * WARNING: must be called with preemption disabled!
+ */
+static void __sched notrace __schedule(bool preempt)
+{
+	struct task_struct *prev, *next, *idle;
+	unsigned long *switch_count;
+	bool deactivate = false;
+	struct rq *rq;
+	u64 niffies;
+	int cpu;
+
+	cpu = smp_processor_id();
+	rq = cpu_rq(cpu);
+	prev = rq->curr;
+	idle = rq->idle;
+
+	schedule_debug(prev);
+
+	local_irq_disable();
+	rcu_note_context_switch(preempt);
+
+	/*
+	 * Make sure that signal_pending_state()->signal_pending() below
+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+	 * done by the caller to avoid the race with signal_wake_up().
+	 *
+	 * The membarrier system call requires a full memory barrier
+	 * after coming from user-space, before storing to rq->curr.
+	 */
+	rq_lock(rq);
+	smp_mb__after_spinlock();
+#ifdef CONFIG_SMP
+	if (rq->preempt) {
+		/*
+		 * Make sure resched_curr hasn't triggered a preemption
+		 * locklessly on a task that has since scheduled away. Spurious
+		 * wakeup of idle is okay though.
+		 */
+		if (unlikely(preempt && prev != idle && !test_tsk_need_resched(prev))) {
+			rq->preempt = NULL;
+			clear_preempt_need_resched();
+			rq_unlock_irq(rq);
+			return;
+		}
+		rq->preempt = NULL;
+	}
+#endif
+
+	switch_count = &prev->nivcsw;
+	if (!preempt && prev->state) {
+		if (unlikely(signal_pending_state(prev->state, prev))) {
+			prev->state = TASK_RUNNING;
+		} else {
+			deactivate = true;
+			prev->on_rq = 0;
+
+			if (prev->in_iowait) {
+				atomic_inc(&rq->nr_iowait);
+				delayacct_blkio_start();
+			}
+
+			/*
+			 * If a worker is going to sleep, notify and
+			 * ask workqueue whether it wants to wake up a
+			 * task to maintain concurrency.  If so, wake
+			 * up the task.
+			 */
+			if (prev->flags & PF_WQ_WORKER) {
+				struct task_struct *to_wakeup;
+
+				to_wakeup = wq_worker_sleeping(prev);
+				if (to_wakeup)
+					try_to_wake_up_local(to_wakeup);
+			}
+		}
+		switch_count = &prev->nvcsw;
+	}
+
+	/*
+	 * Store the niffy value here for use by the next task's last_ran
+	 * below to avoid losing niffies due to update_clocks being called
+	 * again after this point.
+	 */
+	update_clocks(rq);
+	niffies = rq->niffies;
+	update_cpu_clock_switch(rq, prev);
+
+	clear_tsk_need_resched(prev);
+	clear_preempt_need_resched();
+
+	if (idle != prev) {
+		check_deadline(prev, rq);
+		return_task(prev, rq, cpu, deactivate);
+	}
+
+	next = earliest_deadline_task(rq, cpu, idle);
+	if (likely(next->prio != PRIO_LIMIT))
+		clear_cpuidle_map(cpu);
+	else {
+		set_cpuidle_map(cpu);
+		update_load_avg(rq, 0);
+	}
+
+	set_rq_task(rq, next);
+	next->last_ran = niffies;
+
+	if (likely(prev != next)) {
+		/*
+		 * Don't reschedule an idle task or deactivated tasks
+		 */
+		if (prev == idle)
+			rq->nr_running++;
+		else if (!deactivate)
+			resched_suitable_idle(prev);
+		if (unlikely(next == idle)) {
+			rq->nr_running--;
+			wake_siblings(rq);
+		} else
+			check_siblings(rq);
+		rq->nr_switches++;
+		rq->curr = next;
+		/*
+		 * The membarrier system call requires each architecture
+		 * to have a full memory barrier after updating
+		 * rq->curr, before returning to user-space.
+		 *
+		 * Here are the schemes providing that barrier on the
+		 * various architectures:
+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
+		 * - finish_lock_switch() for weakly-ordered
+		 *   architectures where spin_unlock is a full barrier,
+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
+		 *   is a RELEASE barrier),
+		 */
+		++*switch_count;
+
+		trace_sched_switch(preempt, prev, next);
+		context_switch(rq, prev, next); /* unlocks the rq */
+	} else {
+		check_siblings(rq);
+		rq_unlock(rq);
+		do_pending_softirq(rq, next);
+		local_irq_enable();
+	}
+}
+
+void __noreturn do_task_dead(void)
+{
+	/*
+	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+	 * when the following two conditions become true.
+	 *   - There is race condition of mmap_sem (It is acquired by
+	 *     exit_mm()), and
+	 *   - SMI occurs before setting TASK_RUNINNG.
+	 *     (or hypervisor of virtual machine switches to other guest)
+	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+	 *
+	 * To avoid it, we have to wait for releasing tsk->pi_lock which
+	 * is held by try_to_wake_up()
+	 */
+	raw_spin_lock_irq(&current->pi_lock);
+	raw_spin_unlock_irq(&current->pi_lock);
+
+	/* Causes final put_task_struct in finish_task_switch(). */
+	__set_current_state(TASK_DEAD);
+
+	/* Tell freezer to ignore us: */
+	current->flags |= PF_NOFREEZE;
+	__schedule(false);
+	BUG();
+
+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
+	for (;;)
+		cpu_relax();
+}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+	if (!tsk->state || tsk_is_pi_blocked(tsk) ||
+	    preempt_count() ||
+	    signal_pending_state(tsk->state, tsk))
+		return;
+
+	/*
+	 * If we are going to sleep and we have plugged IO queued,
+	 * make sure to submit it to avoid deadlocks.
+	 */
+	if (blk_needs_flush_plug(tsk))
+		blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage __visible void __sched schedule(void)
+{
+	struct task_struct *tsk = current;
+
+	sched_submit_work(tsk);
+	do {
+		preempt_disable();
+		__schedule(false);
+		sched_preempt_enable_no_resched();
+	} while (need_resched());
+}
+
+EXPORT_SYMBOL(schedule);
+
+/*
+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
+ * state (have scheduled out non-voluntarily) by making sure that all
+ * tasks have either left the run queue or have gone into user space.
+ * As idle tasks do not do either, they must not ever be preempted
+ * (schedule out non-voluntarily).
+ *
+ * schedule_idle() is similar to schedule_preempt_disable() except that it
+ * never enables preemption because it does not call sched_submit_work().
+ */
+void __sched schedule_idle(void)
+{
+	/*
+	 * As this skips calling sched_submit_work(), which the idle task does
+	 * regardless because that function is a nop when the task is in a
+	 * TASK_RUNNING state, make sure this isn't used someplace that the
+	 * current task can be in any other state. Note, idle is always in the
+	 * TASK_RUNNING state.
+	 */
+	WARN_ON_ONCE(current->state);
+	do {
+		__schedule(false);
+	} while (need_resched());
+}
+
+#ifdef CONFIG_CONTEXT_TRACKING
+asmlinkage __visible void __sched schedule_user(void)
+{
+	/*
+	 * If we come here after a random call to set_need_resched(),
+	 * or we have been woken up remotely but the IPI has not yet arrived,
+	 * we haven't yet exited the RCU idle mode. Do it here manually until
+	 * we find a better solution.
+	 *
+	 * NB: There are buggy callers of this function.  Ideally we
+	 * should warn if prev_state != IN_USER, but that will trigger
+	 * too frequently to make sense yet.
+	 */
+	enum ctx_state prev_state = exception_enter();
+	schedule();
+	exception_exit(prev_state);
+}
+#endif
+
+/**
+ * schedule_preempt_disabled - called with preemption disabled
+ *
+ * Returns with preemption disabled. Note: preempt_count must be 1
+ */
+void __sched schedule_preempt_disabled(void)
+{
+	sched_preempt_enable_no_resched();
+	schedule();
+	preempt_disable();
+}
+
+static void __sched notrace preempt_schedule_common(void)
+{
+	do {
+		/*
+		 * Because the function tracer can trace preempt_count_sub()
+		 * and it also uses preempt_enable/disable_notrace(), if
+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
+		 * by the function tracer will call this function again and
+		 * cause infinite recursion.
+		 *
+		 * Preemption must be disabled here before the function
+		 * tracer can trace. Break up preempt_disable() into two
+		 * calls. One to disable preemption without fear of being
+		 * traced. The other to still record the preemption latency,
+		 * which can also be traced by the function tracer.
+		 */
+		preempt_disable_notrace();
+		preempt_latency_start(1);
+		__schedule(true);
+		preempt_latency_stop(1);
+		preempt_enable_no_resched_notrace();
+
+		/*
+		 * Check again in case we missed a preemption opportunity
+		 * between schedule and now.
+		 */
+	} while (need_resched());
+}
+
+#ifdef CONFIG_PREEMPT
+/*
+ * this is the entry point to schedule() from in-kernel preemption
+ * off of preempt_enable. Kernel preemptions off return from interrupt
+ * occur there and call schedule directly.
+ */
+asmlinkage __visible void __sched notrace preempt_schedule(void)
+{
+	/*
+	 * If there is a non-zero preempt_count or interrupts are disabled,
+	 * we do not want to preempt the current task. Just return..
+	 */
+	if (likely(!preemptible()))
+		return;
+
+	preempt_schedule_common();
+}
+NOKPROBE_SYMBOL(preempt_schedule);
+EXPORT_SYMBOL(preempt_schedule);
+
+/**
+ * preempt_schedule_notrace - preempt_schedule called by tracing
+ *
+ * The tracing infrastructure uses preempt_enable_notrace to prevent
+ * recursion and tracing preempt enabling caused by the tracing
+ * infrastructure itself. But as tracing can happen in areas coming
+ * from userspace or just about to enter userspace, a preempt enable
+ * can occur before user_exit() is called. This will cause the scheduler
+ * to be called when the system is still in usermode.
+ *
+ * To prevent this, the preempt_enable_notrace will use this function
+ * instead of preempt_schedule() to exit user context if needed before
+ * calling the scheduler.
+ */
+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+{
+	enum ctx_state prev_ctx;
+
+	if (likely(!preemptible()))
+		return;
+
+	do {
+		/*
+		 * Because the function tracer can trace preempt_count_sub()
+		 * and it also uses preempt_enable/disable_notrace(), if
+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
+		 * by the function tracer will call this function again and
+		 * cause infinite recursion.
+		 *
+		 * Preemption must be disabled here before the function
+		 * tracer can trace. Break up preempt_disable() into two
+		 * calls. One to disable preemption without fear of being
+		 * traced. The other to still record the preemption latency,
+		 * which can also be traced by the function tracer.
+		 */
+		preempt_disable_notrace();
+		preempt_latency_start(1);
+		/*
+		 * Needs preempt disabled in case user_exit() is traced
+		 * and the tracer calls preempt_enable_notrace() causing
+		 * an infinite recursion.
+		 */
+		prev_ctx = exception_enter();
+		__schedule(true);
+		exception_exit(prev_ctx);
+
+		preempt_latency_stop(1);
+		preempt_enable_no_resched_notrace();
+	} while (need_resched());
+}
+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
+
+#endif /* CONFIG_PREEMPT */
+
+/*
+ * this is the entry point to schedule() from kernel preemption
+ * off of irq context.
+ * Note, that this is called and return with irqs disabled. This will
+ * protect us against recursive calling from irq.
+ */
+asmlinkage __visible void __sched preempt_schedule_irq(void)
+{
+	enum ctx_state prev_state;
+
+	/* Catch callers which need to be fixed */
+	BUG_ON(preempt_count() || !irqs_disabled());
+
+	prev_state = exception_enter();
+
+	do {
+		preempt_disable();
+		local_irq_enable();
+		__schedule(true);
+		local_irq_disable();
+		sched_preempt_enable_no_resched();
+	} while (need_resched());
+
+	exception_exit(prev_state);
+}
+
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
+			  void *key)
+{
+	return try_to_wake_up(curr->private, mode, wake_flags);
+}
+EXPORT_SYMBOL(default_wake_function);
+
+#ifdef CONFIG_RT_MUTEXES
+
+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
+{
+	if (pi_task)
+		prio = min(prio, pi_task->prio);
+
+	return prio;
+}
+
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
+
+	return __rt_effective_prio(pi_task, prio);
+}
+
+/*
+ * rt_mutex_setprio - set the current priority of a task
+ * @p: task to boost
+ * @pi_task: donor task
+ *
+ * This function changes the 'effective' priority of a task. It does
+ * not touch ->normal_prio like __setscheduler().
+ *
+ * Used by the rt_mutex code to implement priority inheritance
+ * logic. Call site only calls if the priority of the task changed.
+ */
+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
+{
+	int prio, oldprio;
+	struct rq *rq;
+
+	/* XXX used to be waiter->prio, not waiter->task->prio */
+	prio = __rt_effective_prio(pi_task, p->normal_prio);
+
+	/*
+	 * If nothing changed; bail early.
+	 */
+	if (p->pi_top_task == pi_task && prio == p->prio)
+		return;
+
+	rq = __task_rq_lock(p);
+	update_rq_clock(rq);
+	/*
+	 * Set under pi_lock && rq->lock, such that the value can be used under
+	 * either lock.
+	 *
+	 * Note that there is loads of tricky to make this pointer cache work
+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
+	 * task is allowed to run again (and can exit). This ensures the pointer
+	 * points to a blocked task -- which guaratees the task is present.
+	 */
+	p->pi_top_task = pi_task;
+
+	/*
+	 * For FIFO/RR we only need to set prio, if that matches we're done.
+	 */
+	if (prio == p->prio)
+		goto out_unlock;
+
+	/*
+	 * Idle task boosting is a nono in general. There is one
+	 * exception, when PREEMPT_RT and NOHZ is active:
+	 *
+	 * The idle task calls get_next_timer_interrupt() and holds
+	 * the timer wheel base->lock on the CPU and another CPU wants
+	 * to access the timer (probably to cancel it). We can safely
+	 * ignore the boosting request, as the idle CPU runs this code
+	 * with interrupts disabled and will complete the lock
+	 * protected section without being interrupted. So there is no
+	 * real need to boost.
+	 */
+	if (unlikely(p == rq->idle)) {
+		WARN_ON(p != rq->curr);
+		WARN_ON(p->pi_blocked_on);
+		goto out_unlock;
+	}
+
+	trace_sched_pi_setprio(p, pi_task);
+	oldprio = p->prio;
+	p->prio = prio;
+	if (task_running(rq, p)){
+		if (prio > oldprio)
+			resched_task(p);
+	} else if (task_queued(p)) {
+		dequeue_task(rq, p, DEQUEUE_SAVE);
+		enqueue_task(rq, p, ENQUEUE_RESTORE);
+		if (prio < oldprio)
+			try_preempt(p, rq);
+	}
+out_unlock:
+	__task_rq_unlock(rq);
+}
+#else
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+	return prio;
+}
+#endif
+
+/*
+ * Adjust the deadline for when the priority is to change, before it's
+ * changed.
+ */
+static inline void adjust_deadline(struct task_struct *p, int new_prio)
+{
+	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
+}
+
+void set_user_nice(struct task_struct *p, long nice)
+{
+	int new_static, old_static;
+	unsigned long flags;
+	struct rq *rq;
+
+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
+		return;
+	new_static = NICE_TO_PRIO(nice);
+	/*
+	 * We have to be careful, if called from sys_setpriority(),
+	 * the task might be in the middle of scheduling on another CPU.
+	 */
+	rq = task_rq_lock(p, &flags);
+	update_rq_clock(rq);
+
+	/*
+	 * The RT priorities are set via sched_setscheduler(), but we still
+	 * allow the 'normal' nice value to be set - but as expected
+	 * it wont have any effect on scheduling until the task is
+	 * not SCHED_NORMAL/SCHED_BATCH:
+	 */
+	if (has_rt_policy(p)) {
+		p->static_prio = new_static;
+		goto out_unlock;
+	}
+
+	adjust_deadline(p, new_static);
+	old_static = p->static_prio;
+	p->static_prio = new_static;
+	p->prio = effective_prio(p);
+
+	if (task_queued(p)) {
+		dequeue_task(rq, p, DEQUEUE_SAVE);
+		enqueue_task(rq, p, ENQUEUE_RESTORE);
+		if (new_static < old_static)
+			try_preempt(p, rq);
+	} else if (task_running(rq, p)) {
+		set_rq_task(rq, p);
+		if (old_static < new_static)
+			resched_task(p);
+	}
+out_unlock:
+	task_rq_unlock(rq, p, &flags);
+}
+EXPORT_SYMBOL(set_user_nice);
+
+/*
+ * can_nice - check if a task can reduce its nice value
+ * @p: task
+ * @nice: nice value
+ */
+int can_nice(const struct task_struct *p, const int nice)
+{
+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
+	int nice_rlim = nice_to_rlimit(nice);
+
+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
+		capable(CAP_SYS_NICE));
+}
+
+#ifdef __ARCH_WANT_SYS_NICE
+
+/*
+ * sys_nice - change the priority of the current process.
+ * @increment: priority increment
+ *
+ * sys_setpriority is a more generic, but much slower function that
+ * does similar things.
+ */
+SYSCALL_DEFINE1(nice, int, increment)
+{
+	long nice, retval;
+
+	/*
+	 * Setpriority might change our priority at the same moment.
+	 * We don't have to worry. Conceptually one call occurs first
+	 * and we have a single winner.
+	 */
+
+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
+	nice = task_nice(current) + increment;
+
+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
+	if (increment < 0 && !can_nice(current, nice))
+		return -EPERM;
+
+	retval = security_task_setnice(current, nice);
+	if (retval)
+		return retval;
+
+	set_user_nice(current, nice);
+	return 0;
+}
+
+#endif
+
+/**
+ * task_prio - return the priority value of a given task.
+ * @p: the task in question.
+ *
+ * Return: The priority value as seen by users in /proc.
+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
+ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
+ */
+int task_prio(const struct task_struct *p)
+{
+	int delta, prio = p->prio - MAX_RT_PRIO;
+
+	/* rt tasks and iso tasks */
+	if (prio <= 0)
+		goto out;
+
+	/* Convert to ms to avoid overflows */
+	delta = NS_TO_MS(p->deadline - task_rq(p)->niffies);
+	if (unlikely(delta < 0))
+		delta = 0;
+	delta = delta * 40 / ms_longest_deadline_diff();
+	if (delta <= 80)
+		prio += delta;
+	if (idleprio_task(p))
+		prio += 40;
+out:
+	return prio;
+}
+
+/**
+ * idle_cpu - is a given CPU idle currently?
+ * @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
+ */
+int idle_cpu(int cpu)
+{
+	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+}
+
+/**
+ * idle_task - return the idle task for a given CPU.
+ * @cpu: the processor in question.
+ *
+ * Return: The idle task for the CPU @cpu.
+ */
+struct task_struct *idle_task(int cpu)
+{
+	return cpu_rq(cpu)->idle;
+}
+
+/**
+ * find_process_by_pid - find a process with a matching PID value.
+ * @pid: the pid in question.
+ *
+ * The task of @pid, if found. %NULL otherwise.
+ */
+static inline struct task_struct *find_process_by_pid(pid_t pid)
+{
+	return pid ? find_task_by_vpid(pid) : current;
+}
+
+/* Actually do priority change: must hold rq lock. */
+static void __setscheduler(struct task_struct *p, struct rq *rq, int policy,
+			   int prio, bool keep_boost)
+{
+	int oldrtprio, oldprio;
+
+	p->policy = policy;
+	oldrtprio = p->rt_priority;
+	p->rt_priority = prio;
+	p->normal_prio = normal_prio(p);
+	oldprio = p->prio;
+	/*
+	 * Keep a potential priority boosting if called from
+	 * sched_setscheduler().
+	 */
+	p->prio = normal_prio(p);
+	if (keep_boost)
+		p->prio = rt_effective_prio(p, p->prio);
+
+	if (task_running(rq, p)) {
+		set_rq_task(rq, p);
+		resched_task(p);
+	} else if (task_queued(p)) {
+		dequeue_task(rq, p, DEQUEUE_SAVE);
+		enqueue_task(rq, p, ENQUEUE_RESTORE);
+		if (p->prio < oldprio || p->rt_priority > oldrtprio)
+			try_preempt(p, rq);
+	}
+}
+
+/*
+ * Check the target process has a UID that matches the current process's
+ */
+static bool check_same_owner(struct task_struct *p)
+{
+	const struct cred *cred = current_cred(), *pcred;
+	bool match;
+
+	rcu_read_lock();
+	pcred = __task_cred(p);
+	match = (uid_eq(cred->euid, pcred->euid) ||
+		 uid_eq(cred->euid, pcred->uid));
+	rcu_read_unlock();
+	return match;
+}
+
+static int __sched_setscheduler(struct task_struct *p,
+				const struct sched_attr *attr,
+				bool user, bool pi)
+{
+	int retval, policy = attr->sched_policy, oldpolicy = -1, priority = attr->sched_priority;
+	unsigned long flags, rlim_rtprio = 0;
+	int reset_on_fork;
+	struct rq *rq;
+
+	/* The pi code expects interrupts enabled */
+	BUG_ON(pi && in_interrupt());
+
+	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
+		unsigned long lflags;
+
+		if (!lock_task_sighand(p, &lflags))
+			return -ESRCH;
+		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+		unlock_task_sighand(p, &lflags);
+		if (rlim_rtprio)
+			goto recheck;
+		/*
+		 * If the caller requested an RT policy without having the
+		 * necessary rights, we downgrade the policy to SCHED_ISO.
+		 * We also set the parameter to zero to pass the checks.
+		 */
+		policy = SCHED_ISO;
+		priority = 0;
+	}
+recheck:
+	/* Double check policy once rq lock held */
+	if (policy < 0) {
+		reset_on_fork = p->sched_reset_on_fork;
+		policy = oldpolicy = p->policy;
+	} else {
+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+		policy &= ~SCHED_RESET_ON_FORK;
+
+		if (!SCHED_RANGE(policy))
+			return -EINVAL;
+	}
+
+	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
+		return -EINVAL;
+
+	/*
+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
+	 * SCHED_BATCH is 0.
+	 */
+	if (priority < 0 ||
+	    (p->mm && priority > MAX_USER_RT_PRIO - 1) ||
+	    (!p->mm && priority > MAX_RT_PRIO - 1))
+		return -EINVAL;
+	if (is_rt_policy(policy) != (priority != 0))
+		return -EINVAL;
+
+	/*
+	 * Allow unprivileged RT tasks to decrease priority:
+	 */
+	if (user && !capable(CAP_SYS_NICE)) {
+		if (is_rt_policy(policy)) {
+			unsigned long rlim_rtprio =
+					task_rlimit(p, RLIMIT_RTPRIO);
+
+			/* Can't set/change the rt policy */
+			if (policy != p->policy && !rlim_rtprio)
+				return -EPERM;
+
+			/* Can't increase priority */
+			if (priority > p->rt_priority &&
+			    priority > rlim_rtprio)
+				return -EPERM;
+		} else {
+			switch (p->policy) {
+				/*
+				 * Can only downgrade policies but not back to
+				 * SCHED_NORMAL
+				 */
+				case SCHED_ISO:
+					if (policy == SCHED_ISO)
+						goto out;
+					if (policy != SCHED_NORMAL)
+						return -EPERM;
+					break;
+				case SCHED_BATCH:
+					if (policy == SCHED_BATCH)
+						goto out;
+					if (policy != SCHED_IDLEPRIO)
+						return -EPERM;
+					break;
+				case SCHED_IDLEPRIO:
+					if (policy == SCHED_IDLEPRIO)
+						goto out;
+					return -EPERM;
+				default:
+					break;
+			}
+		}
+
+		/* Can't change other user's priorities */
+		if (!check_same_owner(p))
+			return -EPERM;
+
+		/* Normal users shall not reset the sched_reset_on_fork flag: */
+		if (p->sched_reset_on_fork && !reset_on_fork)
+			return -EPERM;
+	}
+
+	if (user) {
+		retval = security_task_setscheduler(p);
+		if (retval)
+			return retval;
+	}
+
+	/*
+	 * Make sure no PI-waiters arrive (or leave) while we are
+	 * changing the priority of the task:
+	 *
+	 * To be able to change p->policy safely, the runqueue lock must be
+	 * held.
+	 */
+	rq = task_rq_lock(p, &flags);
+	update_rq_clock(rq);
+
+	/*
+	 * Changing the policy of the stop threads its a very bad idea:
+	 */
+	if (p == rq->stop) {
+		task_rq_unlock(rq, p, &flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * If not changing anything there's no need to proceed further:
+	 */
+	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
+	    priority == p->rt_priority))) {
+		task_rq_unlock(rq, p, &flags);
+		return 0;
+	}
+
+	/* Re-check policy now with rq lock held */
+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
+		policy = oldpolicy = -1;
+		task_rq_unlock(rq, p, &flags);
+		goto recheck;
+	}
+	p->sched_reset_on_fork = reset_on_fork;
+
+	__setscheduler(p, rq, policy, priority, pi);
+	task_rq_unlock(rq, p, &flags);
+
+	if (pi)
+		rt_mutex_adjust_pi(p);
+out:
+	return 0;
+}
+
+static int _sched_setscheduler(struct task_struct *p, int policy,
+			       const struct sched_param *param, bool check)
+{
+	struct sched_attr attr = {
+		.sched_policy   = policy,
+		.sched_priority = param->sched_priority,
+		.sched_nice	= PRIO_TO_NICE(p->static_prio),
+	};
+
+	return __sched_setscheduler(p, &attr, check, true);
+}
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
+ *
+ * NOTE that the task may be already dead.
+ */
+int sched_setscheduler(struct task_struct *p, int policy,
+		       const struct sched_param *param)
+{
+	return _sched_setscheduler(p, policy, param, true);
+}
+
+EXPORT_SYMBOL_GPL(sched_setscheduler);
+
+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
+{
+	return __sched_setscheduler(p, attr, true, true);
+}
+EXPORT_SYMBOL_GPL(sched_setattr);
+
+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
+{
+	return __sched_setscheduler(p, attr, false, true);
+}
+
+/**
+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * Just like sched_setscheduler, only don't bother checking if the
+ * current context has permission.  For example, this is needed in
+ * stop_machine(): we create temporary high priority worker threads,
+ * but our caller might not have that capability.
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
+			       const struct sched_param *param)
+{
+	return _sched_setscheduler(p, policy, param, false);
+}
+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
+
+static int
+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+{
+	struct sched_param lparam;
+	struct task_struct *p;
+	int retval;
+
+	if (!param || pid < 0)
+		return -EINVAL;
+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
+		return -EFAULT;
+
+	rcu_read_lock();
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (p != NULL)
+		retval = sched_setscheduler(p, policy, &lparam);
+	rcu_read_unlock();
+
+	return retval;
+}
+
+/*
+ * Mimics kernel/events/core.c perf_copy_attr().
+ */
+static int sched_copy_attr(struct sched_attr __user *uattr,
+			   struct sched_attr *attr)
+{
+	u32 size;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/* Zero the full structure, so that a short copy will be nice: */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	/* Bail out on silly large: */
+	if (size > PAGE_SIZE)
+		goto err_size;
+
+	/* ABI compatibility quirk: */
+	if (!size)
+		size = SCHED_ATTR_SIZE_VER0;
+
+	if (size < SCHED_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(*attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+		size = sizeof(*attr);
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
+	 * to be strict and return an error on out-of-bounds values?
+	 */
+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
+
+	/* sched/core.c uses zero here but we already know ret is zero */
+	return 0;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	return -E2BIG;
+}
+
+/*
+ * sched_setparam() passes in -1 for its policy, to let the functions
+ * it calls know not to change it.
+ */
+#define SETPARAM_POLICY	-1
+
+/**
+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
+ * @pid: the pid in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
+{
+	if (policy < 0)
+		return -EINVAL;
+
+	return do_sched_setscheduler(pid, policy, param);
+}
+
+/**
+ * sys_sched_setparam - set/change the RT priority of a thread
+ * @pid: the pid in question.
+ * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
+{
+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
+}
+
+/**
+ * sys_sched_setattr - same as above, but with extended sched_attr
+ * @pid: the pid in question.
+ * @uattr: structure containing the extended parameters.
+ */
+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
+			       unsigned int, flags)
+{
+	struct sched_attr attr;
+	struct task_struct *p;
+	int retval;
+
+	if (!uattr || pid < 0 || flags)
+		return -EINVAL;
+
+	retval = sched_copy_attr(uattr, &attr);
+	if (retval)
+		return retval;
+
+	if ((int)attr.sched_policy < 0)
+		return -EINVAL;
+
+	rcu_read_lock();
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (p != NULL)
+		retval = sched_setattr(p, &attr);
+	rcu_read_unlock();
+
+	return retval;
+}
+
+/**
+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
+ * @pid: the pid in question.
+ *
+ * Return: On success, the policy of the thread. Otherwise, a negative error
+ * code.
+ */
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+{
+	struct task_struct *p;
+	int retval = -EINVAL;
+
+	if (pid < 0)
+		goto out_nounlock;
+
+	retval = -ESRCH;
+	rcu_read_lock();
+	p = find_process_by_pid(pid);
+	if (p) {
+		retval = security_task_getscheduler(p);
+		if (!retval)
+			retval = p->policy;
+	}
+	rcu_read_unlock();
+
+out_nounlock:
+	return retval;
+}
+
+/**
+ * sys_sched_getscheduler - get the RT priority of a thread
+ * @pid: the pid in question.
+ * @param: structure containing the RT priority.
+ *
+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+ * code.
+ */
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+{
+	struct sched_param lp = { .sched_priority = 0 };
+	struct task_struct *p;
+	int retval = -EINVAL;
+
+	if (!param || pid < 0)
+		goto out_nounlock;
+
+	rcu_read_lock();
+	p = find_process_by_pid(pid);
+	retval = -ESRCH;
+	if (!p)
+		goto out_unlock;
+
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	if (has_rt_policy(p))
+		lp.sched_priority = p->rt_priority;
+	rcu_read_unlock();
+
+	/*
+	 * This one might sleep, we cannot do it with a spinlock held ...
+	 */
+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
+
+out_nounlock:
+	return retval;
+
+out_unlock:
+	rcu_read_unlock();
+	return retval;
+}
+
+static int sched_read_attr(struct sched_attr __user *uattr,
+			   struct sched_attr *attr,
+			   unsigned int usize)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, usize))
+		return -EFAULT;
+
+	/*
+	 * If we're handed a smaller struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. old
+	 * user-space does not get uncomplete information.
+	 */
+	if (usize < sizeof(*attr)) {
+		unsigned char *addr;
+		unsigned char *end;
+
+		addr = (void *)attr + usize;
+		end  = (void *)attr + sizeof(*attr);
+
+		for (; addr < end; addr++) {
+			if (*addr)
+				return -EFBIG;
+		}
+
+		attr->size = usize;
+	}
+
+	ret = copy_to_user(uattr, attr, attr->size);
+	if (ret)
+		return -EFAULT;
+
+	/* sched/core.c uses zero here but we already know ret is zero */
+	return ret;
+}
+
+/**
+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
+ * @pid: the pid in question.
+ * @uattr: structure containing the extended parameters.
+ * @size: sizeof(attr) for fwd/bwd comp.
+ * @flags: for future extension.
+ */
+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
+		unsigned int, size, unsigned int, flags)
+{
+	struct sched_attr attr = {
+		.size = sizeof(struct sched_attr),
+	};
+	struct task_struct *p;
+	int retval;
+
+	if (!uattr || pid < 0 || size > PAGE_SIZE ||
+	    size < SCHED_ATTR_SIZE_VER0 || flags)
+		return -EINVAL;
+
+	rcu_read_lock();
+	p = find_process_by_pid(pid);
+	retval = -ESRCH;
+	if (!p)
+		goto out_unlock;
+
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	attr.sched_policy = p->policy;
+	if (rt_task(p))
+		attr.sched_priority = p->rt_priority;
+	else
+		attr.sched_nice = task_nice(p);
+
+	rcu_read_unlock();
+
+	retval = sched_read_attr(uattr, &attr, size);
+	return retval;
+
+out_unlock:
+	rcu_read_unlock();
+	return retval;
+}
+
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+{
+	cpumask_var_t cpus_allowed, new_mask;
+	struct task_struct *p;
+	int retval;
+
+	rcu_read_lock();
+
+	p = find_process_by_pid(pid);
+	if (!p) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	/* Prevent p going away */
+	get_task_struct(p);
+	rcu_read_unlock();
+
+	if (p->flags & PF_NO_SETAFFINITY) {
+		retval = -EINVAL;
+		goto out_put_task;
+	}
+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_put_task;
+	}
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto out_free_cpus_allowed;
+	}
+	retval = -EPERM;
+	if (!check_same_owner(p)) {
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+			rcu_read_unlock();
+			goto out_unlock;
+		}
+		rcu_read_unlock();
+	}
+
+	retval = security_task_setscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	cpuset_cpus_allowed(p, cpus_allowed);
+	cpumask_and(new_mask, in_mask, cpus_allowed);
+again:
+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
+
+	if (!retval) {
+		cpuset_cpus_allowed(p, cpus_allowed);
+		if (!cpumask_subset(new_mask, cpus_allowed)) {
+			/*
+			 * We must have raced with a concurrent cpuset
+			 * update. Just reset the cpus_allowed to the
+			 * cpuset's cpus_allowed
+			 */
+			cpumask_copy(new_mask, cpus_allowed);
+			goto again;
+		}
+	}
+out_unlock:
+	free_cpumask_var(new_mask);
+out_free_cpus_allowed:
+	free_cpumask_var(cpus_allowed);
+out_put_task:
+	put_task_struct(p);
+	return retval;
+}
+
+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
+			     cpumask_t *new_mask)
+{
+	if (len < cpumask_size())
+		cpumask_clear(new_mask);
+	else if (len > cpumask_size())
+		len = cpumask_size();
+
+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
+}
+
+
+/**
+ * sys_sched_setaffinity - set the CPU affinity of a process
+ * @pid: pid of the process
+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
+ * @user_mask_ptr: user-space pointer to the new CPU mask
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+		unsigned long __user *, user_mask_ptr)
+{
+	cpumask_var_t new_mask;
+	int retval;
+
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
+	if (retval == 0)
+		retval = sched_setaffinity(pid, new_mask);
+	free_cpumask_var(new_mask);
+	return retval;
+}
+
+long sched_getaffinity(pid_t pid, cpumask_t *mask)
+{
+	struct task_struct *p;
+	unsigned long flags;
+	int retval;
+
+	get_online_cpus();
+	rcu_read_lock();
+
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (!p)
+		goto out_unlock;
+
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+out_unlock:
+	rcu_read_unlock();
+	put_online_cpus();
+
+	return retval;
+}
+
+/**
+ * sys_sched_getaffinity - get the CPU affinity of a process
+ * @pid: pid of the process
+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
+ *
+ * Return: 0 on success. An error code otherwise.
+ */
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+		unsigned long __user *, user_mask_ptr)
+{
+	int ret;
+	cpumask_var_t mask;
+
+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+		return -EINVAL;
+	if (len & (sizeof(unsigned long)-1))
+		return -EINVAL;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = sched_getaffinity(pid, mask);
+	if (ret == 0) {
+		unsigned int retlen = min(len, cpumask_size());
+
+		if (copy_to_user(user_mask_ptr, mask, retlen))
+			ret = -EFAULT;
+		else
+			ret = retlen;
+	}
+	free_cpumask_var(mask);
+
+	return ret;
+}
+
+/**
+ * sys_sched_yield - yield the current processor to other threads.
+ *
+ * This function yields the current CPU to other tasks. It does this by
+ * scheduling away the current task. If it still has the earliest deadline
+ * it will be scheduled again as the next task.
+ *
+ * Return: 0.
+ */
+SYSCALL_DEFINE0(sched_yield)
+{
+	struct rq *rq;
+
+	if (!sched_yield_type)
+		goto out;
+
+	local_irq_disable();
+	rq = this_rq();
+	rq_lock(rq);
+
+	if (sched_yield_type > 1)
+		time_slice_expired(current, rq);
+	schedstat_inc(rq->yld_count);
+
+	/*
+	 * Since we are going to call schedule() anyway, there's
+	 * no need to preempt or enable interrupts:
+	 */
+	preempt_disable();
+	rq_unlock(rq);
+	sched_preempt_enable_no_resched();
+
+	schedule();
+out:
+	return 0;
+}
+
+#ifndef CONFIG_PREEMPT
+int __sched _cond_resched(void)
+{
+	if (should_resched(0)) {
+		preempt_schedule_common();
+		return 1;
+	}
+	rcu_all_qs();
+	return 0;
+}
+EXPORT_SYMBOL(_cond_resched);
+#endif
+
+/*
+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * call schedule, and on return reacquire the lock.
+ *
+ * This works OK both with and without CONFIG_PREEMPT.  We do strange low-level
+ * operations here to prevent schedule() from being called twice (once via
+ * spin_unlock(), once by hand).
+ */
+int __cond_resched_lock(spinlock_t *lock)
+{
+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
+	int ret = 0;
+
+	lockdep_assert_held(lock);
+
+	if (spin_needbreak(lock) || resched) {
+		spin_unlock(lock);
+		if (resched)
+			preempt_schedule_common();
+		else
+			cpu_relax();
+		ret = 1;
+		spin_lock(lock);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__cond_resched_lock);
+
+int __sched __cond_resched_softirq(void)
+{
+	BUG_ON(!in_softirq());
+
+	if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
+		local_bh_enable();
+		preempt_schedule_common();
+		local_bh_disable();
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__cond_resched_softirq);
+
+/**
+ * yield - yield the current processor to other threads.
+ *
+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
+ *
+ * The scheduler is at all times free to pick the calling task as the most
+ * eligible task to run, if removing the yield() call from your code breaks
+ * it, its already broken.
+ *
+ * Typical broken usage is:
+ *
+ * while (!event)
+ *	yield();
+ *
+ * where one assumes that yield() will let 'the other' process run that will
+ * make event true. If the current task is a SCHED_FIFO task that will never
+ * happen. Never use yield() as a progress guarantee!!
+ *
+ * If you want to use yield() to wait for something, use wait_event().
+ * If you want to use yield() to be 'nice' for others, use cond_resched().
+ * If you still want to use yield(), do not!
+ */
+void __sched yield(void)
+{
+	set_current_state(TASK_RUNNING);
+	sys_sched_yield();
+}
+EXPORT_SYMBOL(yield);
+
+/**
+ * yield_to - yield the current processor to another thread in
+ * your thread group, or accelerate that thread toward the
+ * processor it's on.
+ * @p: target task
+ * @preempt: whether task preemption is allowed or not
+ *
+ * It's the caller's job to ensure that the target task struct
+ * can't go away on us before we can do any checks.
+ *
+ * Return:
+ *	true (>0) if we indeed boosted the target task.
+ *	false (0) if we failed to boost the target.
+ *	-ESRCH if there's no task to yield to.
+ */
+int __sched yield_to(struct task_struct *p, bool preempt)
+{
+	struct task_struct *rq_p;
+	struct rq *rq, *p_rq;
+	unsigned long flags;
+	int yielded = 0;
+
+	local_irq_save(flags);
+	rq = this_rq();
+
+again:
+	p_rq = task_rq(p);
+	/*
+	 * If we're the only runnable task on the rq and target rq also
+	 * has only one task, there's absolutely no point in yielding.
+	 */
+	if (task_running(p_rq, p) || p->state) {
+		yielded = -ESRCH;
+		goto out_irq;
+	}
+
+	double_rq_lock(rq, p_rq);
+	if (unlikely(task_rq(p) != p_rq)) {
+		double_rq_unlock(rq, p_rq);
+		goto again;
+	}
+
+	yielded = 1;
+	schedstat_inc(rq->yld_count);
+	rq_p = rq->curr;
+	if (p->deadline > rq_p->deadline)
+		p->deadline = rq_p->deadline;
+	p->time_slice += rq_p->time_slice;
+	if (p->time_slice > timeslice())
+		p->time_slice = timeslice();
+	time_slice_expired(rq_p, rq);
+	if (preempt && rq != p_rq)
+		resched_task(p_rq->curr);
+	double_rq_unlock(rq, p_rq);
+out_irq:
+	local_irq_restore(flags);
+
+	if (yielded > 0)
+		schedule();
+	return yielded;
+}
+EXPORT_SYMBOL_GPL(yield_to);
+
+int io_schedule_prepare(void)
+{
+	int old_iowait = current->in_iowait;
+
+	current->in_iowait = 1;
+	blk_schedule_flush_plug(current);
+
+	return old_iowait;
+}
+
+void io_schedule_finish(int token)
+{
+	current->in_iowait = token;
+}
+
+/*
+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
+ * that process accounting knows that this is a task in IO wait state.
+ *
+ * But don't do that if it is a deliberate, throttling IO wait (this task
+ * has set its backing_dev_info: the queue against which it should throttle)
+ */
+
+long __sched io_schedule_timeout(long timeout)
+{
+	int token;
+	long ret;
+
+	token = io_schedule_prepare();
+	ret = schedule_timeout(timeout);
+	io_schedule_finish(token);
+
+	return ret;
+}
+EXPORT_SYMBOL(io_schedule_timeout);
+
+void io_schedule(void)
+{
+	int token;
+
+	token = io_schedule_prepare();
+	schedule();
+	io_schedule_finish(token);
+}
+EXPORT_SYMBOL(io_schedule);
+
+/**
+ * sys_sched_get_priority_max - return maximum RT priority.
+ * @policy: scheduling class.
+ *
+ * Return: On success, this syscall returns the maximum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
+ */
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
+{
+	int ret = -EINVAL;
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = MAX_USER_RT_PRIO-1;
+		break;
+	case SCHED_NORMAL:
+	case SCHED_BATCH:
+	case SCHED_ISO:
+	case SCHED_IDLEPRIO:
+		ret = 0;
+		break;
+	}
+	return ret;
+}
+
+/**
+ * sys_sched_get_priority_min - return minimum RT priority.
+ * @policy: scheduling class.
+ *
+ * Return: On success, this syscall returns the minimum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
+ */
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
+{
+	int ret = -EINVAL;
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = 1;
+		break;
+	case SCHED_NORMAL:
+	case SCHED_BATCH:
+	case SCHED_ISO:
+	case SCHED_IDLEPRIO:
+		ret = 0;
+		break;
+	}
+	return ret;
+}
+
+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
+{
+	struct task_struct *p;
+	unsigned int time_slice;
+	unsigned long flags;
+	struct rq *rq;
+	int retval;
+
+	if (pid < 0)
+		return -EINVAL;
+
+	retval = -ESRCH;
+	rcu_read_lock();
+	p = find_process_by_pid(pid);
+	if (!p)
+		goto out_unlock;
+
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
+	rq = task_rq_lock(p, &flags);
+	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
+	task_rq_unlock(rq, p, &flags);
+
+	rcu_read_unlock();
+	*t = ns_to_timespec64(time_slice);
+	return 0;
+
+out_unlock:
+	rcu_read_unlock();
+	return retval;
+}
+
+/**
+ * sys_sched_rr_get_interval - return the default timeslice of a process.
+ * @pid: pid of the process.
+ * @interval: userspace pointer to the timeslice value.
+ *
+ * this syscall writes the default timeslice value of a given process
+ * into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
+ */
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+		struct timespec __user *, interval)
+{
+	struct timespec64 t;
+	int retval = sched_rr_get_interval(pid, &t);
+
+	if (retval == 0)
+		retval = put_timespec64(&t, interval);
+
+	return retval;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
+		       compat_pid_t, pid,
+		       struct compat_timespec __user *, interval)
+{
+	struct timespec64 t;
+	int retval = sched_rr_get_interval(pid, &t);
+
+	if (retval == 0)
+		retval = compat_put_timespec64(&t, interval);
+	return retval;
+}
+#endif
+
+void sched_show_task(struct task_struct *p)
+{
+	unsigned long free = 0;
+	int ppid;
+
+	if (!try_get_task_stack(p))
+		return;
+
+	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
+
+	if (p->state == TASK_RUNNING)
+		printk(KERN_CONT "  running task    ");
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	free = stack_not_used(p);
+#endif
+	ppid = 0;
+	rcu_read_lock();
+	if (pid_alive(p))
+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
+	rcu_read_unlock();
+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+		task_pid_nr(p), ppid,
+		(unsigned long)task_thread_info(p)->flags);
+
+	print_worker_info(KERN_INFO, p);
+	show_stack(p, NULL);
+	put_task_stack(p);
+}
+EXPORT_SYMBOL_GPL(sched_show_task);
+
+static inline bool
+state_filter_match(unsigned long state_filter, struct task_struct *p)
+{
+	/* no filter, everything matches */
+	if (!state_filter)
+		return true;
+
+	/* filter, but doesn't match */
+	if (!(p->state & state_filter))
+		return false;
+
+	/*
+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
+	 * TASK_KILLABLE).
+	 */
+	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+		return false;
+
+	return true;
+}
+
+void show_state_filter(unsigned long state_filter)
+{
+	struct task_struct *g, *p;
+
+#if BITS_PER_LONG == 32
+	printk(KERN_INFO
+		"  task                PC stack   pid father\n");
+#else
+	printk(KERN_INFO
+		"  task                        PC stack   pid father\n");
+#endif
+	rcu_read_lock();
+	for_each_process_thread(g, p) {
+		/*
+		 * reset the NMI-timeout, listing all files on a slow
+		 * console might take a lot of time:
+		 * Also, reset softlockup watchdogs on all CPUs, because
+		 * another CPU might be blocked waiting for us to process
+		 * an IPI.
+		 */
+		touch_nmi_watchdog();
+		touch_all_softlockup_watchdogs();
+		if (state_filter_match(state_filter, p))
+			sched_show_task(p);
+	}
+
+	rcu_read_unlock();
+	/*
+	 * Only show locks if all tasks are dumped:
+	 */
+	if (!state_filter)
+		debug_show_all_locks();
+}
+
+void dump_cpu_task(int cpu)
+{
+	pr_info("Task dump for CPU %d:\n", cpu);
+	sched_show_task(cpu_curr(cpu));
+}
+
+#ifdef CONFIG_SMP
+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
+{
+	cpumask_copy(&p->cpus_allowed, new_mask);
+	p->nr_cpus_allowed = cpumask_weight(new_mask);
+}
+
+void __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+	struct rq *rq = task_rq(p);
+
+	lockdep_assert_held(&p->pi_lock);
+
+	cpumask_copy(&p->cpus_allowed, new_mask);
+
+	if (task_queued(p)) {
+		/*
+		 * Because __kthread_bind() calls this on blocked tasks without
+		 * holding rq->lock.
+		 */
+		lockdep_assert_held(rq->lock);
+	}
+}
+
+/*
+ * Calling do_set_cpus_allowed from outside the scheduler code should not be
+ * called on a running or queued task. We should be holding pi_lock.
+ */
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+	__do_set_cpus_allowed(p, new_mask);
+	if (needs_other_cpu(p, task_cpu(p))) {
+		struct rq *rq;
+
+		rq = __task_rq_lock(p);
+		set_task_cpu(p, valid_task_cpu(p));
+		resched_task(p);
+		__task_rq_unlock(rq);
+	}
+}
+#endif
+
+/**
+ * init_idle - set up an idle thread for a given CPU
+ * @idle: task in question
+ * @cpu: cpu the idle task belongs to
+ *
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
+void init_idle(struct task_struct *idle, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
+	raw_spin_lock(rq->lock);
+	idle->last_ran = rq->niffies;
+	time_slice_expired(idle, rq);
+	idle->state = TASK_RUNNING;
+	/* Setting prio to illegal value shouldn't matter when never queued */
+	idle->prio = PRIO_LIMIT;
+
+	kasan_unpoison_task_stack(idle);
+
+#ifdef CONFIG_SMP
+	/*
+	 * It's possible that init_idle() gets called multiple times on a task,
+	 * in that case do_set_cpus_allowed() will not do the right thing.
+	 *
+	 * And since this is boot we can forgo the serialisation.
+	 */
+	set_cpus_allowed_common(idle, cpumask_of(cpu));
+#ifdef CONFIG_SMT_NICE
+	idle->smt_bias = 0;
+#endif
+#endif
+	set_rq_task(rq, idle);
+
+	/* Silence PROVE_RCU */
+	rcu_read_lock();
+	set_task_cpu(idle, cpu);
+	rcu_read_unlock();
+
+	rq->curr = rq->idle = idle;
+	idle->on_rq = TASK_ON_RQ_QUEUED;
+	raw_spin_unlock(rq->lock);
+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
+
+	/* Set the preempt count _outside_ the spinlocks! */
+	init_idle_preempt_count(idle, cpu);
+
+	ftrace_graph_init_idle_task(idle, cpu);
+	vtime_init_idle(idle, cpu);
+#ifdef CONFIG_SMP
+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+#endif
+}
+
+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
+			      const struct cpumask __maybe_unused *trial)
+{
+	return 1;
+}
+
+int task_can_attach(struct task_struct *p,
+		    const struct cpumask *cs_cpus_allowed)
+{
+	int ret = 0;
+
+	/*
+	 * Kthreads which disallow setaffinity shouldn't be moved
+	 * to a new cpuset; we don't want to change their CPU
+	 * affinity and isolating such threads by their set of
+	 * allowed nodes is unnecessary.  Thus, cpusets are not
+	 * applicable for such threads.  This prevents checking for
+	 * success of set_cpus_allowed_ptr() on all attached tasks
+	 * before cpus_allowed may be changed.
+	 */
+	if (p->flags & PF_NO_SETAFFINITY)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void resched_cpu(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	rq_lock_irqsave(rq, &flags);
+	if (cpu_online(cpu) || cpu == smp_processor_id())
+		resched_curr(rq);
+	rq_unlock_irqrestore(rq, &flags);
+}
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_NO_HZ_COMMON
+void nohz_balance_enter_idle(int cpu)
+{
+}
+
+void select_nohz_load_balancer(int stop_tick)
+{
+}
+
+void set_cpu_sd_state_idle(void) {}
+
+/*
+ * In the semi idle case, use the nearest busy CPU for migrating timers
+ * from an idle CPU.  This is good for power-savings.
+ *
+ * We don't do similar optimization for completely idle system, as
+ * selecting an idle CPU will add more delays to the timers than intended
+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
+ */
+int get_nohz_timer_target(void)
+{
+	int i, cpu = smp_processor_id();
+	struct sched_domain *sd;
+
+	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
+		return cpu;
+
+	rcu_read_lock();
+	for_each_domain(cpu, sd) {
+		for_each_cpu(i, sched_domain_span(sd)) {
+			if (cpu == i)
+				continue;
+
+			if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
+ 				cpu = i;
+				cpu = i;
+				goto unlock;
+			}
+		}
+	}
+
+	if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
+		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+unlock:
+	rcu_read_unlock();
+	return cpu;
+}
+
+/*
+ * When add_timer_on() enqueues a timer into the timer wheel of an
+ * idle CPU then this timer might expire before the next timer event
+ * which is scheduled to wake up that CPU. In case of a completely
+ * idle system the next event might even be infinite time into the
+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
+ * leaves the inner idle loop so the newly added timer is taken into
+ * account when the CPU goes back to idle and evaluates the timer
+ * wheel for the next timer event.
+ */
+void wake_up_idle_cpu(int cpu)
+{
+	if (cpu == smp_processor_id())
+		return;
+
+	if (set_nr_and_not_polling(cpu_rq(cpu)->idle))
+		smp_sched_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
+}
+
+static bool wake_up_full_nohz_cpu(int cpu)
+{
+	/*
+	 * We just need the target to call irq_exit() and re-evaluate
+	 * the next tick. The nohz full kick at least implies that.
+	 * If needed we can still optimize that later with an
+	 * empty IRQ.
+	 */
+	if (cpu_is_offline(cpu))
+		return true;  /* Don't try to wake offline CPUs. */
+	if (tick_nohz_full_cpu(cpu)) {
+		if (cpu != smp_processor_id() ||
+		    tick_nohz_tick_stopped())
+			tick_nohz_full_kick_cpu(cpu);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Wake up the specified CPU.  If the CPU is going offline, it is the
+ * caller's responsibility to deal with the lost wakeup, for example,
+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
+ */
+void wake_up_nohz_cpu(int cpu)
+{
+	if (!wake_up_full_nohz_cpu(cpu))
+		wake_up_idle_cpu(cpu);
+}
+#endif /* CONFIG_NO_HZ_COMMON */
+
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+				  const struct cpumask *new_mask, bool check)
+{
+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
+	bool queued = false, running_wrong = false, kthread;
+	struct cpumask old_mask;
+	unsigned long flags;
+	struct rq *rq;
+	int ret = 0;
+
+	rq = task_rq_lock(p, &flags);
+	update_rq_clock(rq);
+
+	kthread = !!(p->flags & PF_KTHREAD);
+	if (kthread) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
+
+	/*
+	 * Must re-check here, to close a race against __kthread_bind(),
+	 * sched_setaffinity() is not guaranteed to observe the flag.
+	 */
+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	cpumask_copy(&old_mask, &p->cpus_allowed);
+	if (cpumask_equal(&old_mask, new_mask))
+		goto out;
+
+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	queued = task_queued(p);
+	__do_set_cpus_allowed(p, new_mask);
+
+	if (kthread) {
+		/*
+		 * For kernel threads that do indeed end up on online &&
+		 * !active we want to ensure they are strict per-CPU threads.
+		 */
+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+			!cpumask_intersects(new_mask, cpu_active_mask) &&
+			p->nr_cpus_allowed != 1);
+	}
+
+	/* Can the task run on the task's current CPU? If so, we're done */
+	if (cpumask_test_cpu(task_cpu(p), new_mask))
+		goto out;
+
+	if (task_running(rq, p)) {
+		/* Task is running on the wrong cpu now, reschedule it. */
+		if (rq == this_rq()) {
+			set_tsk_need_resched(p);
+			running_wrong = true;
+		} else
+			resched_task(p);
+	} else {
+		int cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+
+		if (queued) {
+			/*
+			 * Switch runqueue locks after dequeueing the task
+			 * here while still holding the pi_lock to be holding
+			 * the correct lock for enqueueing.
+			 */
+			dequeue_task(rq, p, 0);
+			rq_unlock(rq);
+
+			rq = cpu_rq(cpu);
+			rq_lock(rq);
+		}
+		set_task_cpu(p, cpu);
+		if (queued)
+			enqueue_task(rq, p, 0);
+	}
+	if (queued)
+		try_preempt(p, rq);
+	if (running_wrong)
+		preempt_disable();
+out:
+	task_rq_unlock(rq, p, &flags);
+
+	if (running_wrong) {
+		__schedule(true);
+		preempt_enable();
+	}
+
+	return ret;
+}
+
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+{
+	return __set_cpus_allowed_ptr(p, new_mask, false);
+}
+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Run through task list and find tasks affined to the dead cpu, then remove
+ * that cpu from the list, enable cpu0 and set the zerobound flag. Must hold
+ * cpu 0 and src_cpu's runqueue locks.
+ */
+static void bind_zero(int src_cpu)
+{
+	struct task_struct *p, *t;
+	struct rq *rq0;
+	int bound = 0;
+
+	if (src_cpu == 0)
+		return;
+
+	rq0 = cpu_rq(0);
+
+	do_each_thread(t, p) {
+		if (cpumask_test_cpu(src_cpu, &p->cpus_allowed)) {
+			bool local = (task_cpu(p) == src_cpu);
+			struct rq *rq = task_rq(p);
+
+			/* task_running is the cpu stopper thread */
+			if (local && task_running(rq, p))
+				continue;
+			atomic_clear_cpu(src_cpu, &p->cpus_allowed);
+			atomic_set_cpu(0, &p->cpus_allowed);
+			p->zerobound = true;
+			bound++;
+			if (local) {
+				bool queued = task_queued(p);
+
+				if (queued)
+					dequeue_task(rq, p, 0);
+				set_task_cpu(p, 0);
+				if (queued)
+					enqueue_task(rq0, p, 0);
+			}
+		}
+	} while_each_thread(t, p);
+
+	if (bound) {
+		printk(KERN_INFO "Removed affinity for %d processes to cpu %d\n",
+		       bound, src_cpu);
+	}
+}
+
+/* Find processes with the zerobound flag and reenable their affinity for the
+ * CPU coming alive. */
+static void unbind_zero(int src_cpu)
+{
+	int unbound = 0, zerobound = 0;
+	struct task_struct *p, *t;
+
+	if (src_cpu == 0)
+		return;
+
+	do_each_thread(t, p) {
+		if (!p->mm)
+			p->zerobound = false;
+		if (p->zerobound) {
+			unbound++;
+			cpumask_set_cpu(src_cpu, &p->cpus_allowed);
+			/* Once every CPU affinity has been re-enabled, remove
+			 * the zerobound flag */
+			if (cpumask_subset(cpu_possible_mask, &p->cpus_allowed)) {
+				p->zerobound = false;
+				zerobound++;
+			}
+		}
+	} while_each_thread(t, p);
+
+	if (unbound) {
+		printk(KERN_INFO "Added affinity for %d processes to cpu %d\n",
+		       unbound, src_cpu);
+	}
+	if (zerobound) {
+		printk(KERN_INFO "Released forced binding to cpu0 for %d processes\n",
+		       zerobound);
+	}
+}
+
+/*
+ * Ensure that the idle task is using init_mm right before its cpu goes
+ * offline.
+ */
+void idle_task_exit(void)
+{
+	struct mm_struct *mm = current->active_mm;
+
+	BUG_ON(cpu_online(smp_processor_id()));
+
+	if (mm != &init_mm) {
+		switch_mm(mm, &init_mm, current);
+		finish_arch_post_lock_switch();
+	}
+	mmdrop(mm);
+}
+#else /* CONFIG_HOTPLUG_CPU */
+static void unbind_zero(int src_cpu) {}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void sched_set_stop_task(int cpu, struct task_struct *stop)
+{
+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
+	struct sched_param start_param = { .sched_priority = 0 };
+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
+
+	if (stop) {
+		/*
+		 * Make it appear like a SCHED_FIFO task, its something
+		 * userspace knows about and won't get confused about.
+		 *
+		 * Also, it will make PI more or less work without too
+		 * much confusion -- but then, stop work should not
+		 * rely on PI working anyway.
+		 */
+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
+	}
+
+	cpu_rq(cpu)->stop = stop;
+
+	if (old_stop) {
+		/*
+		 * Reset it back to a normal scheduling policy so that
+		 * it can die in pieces.
+		 */
+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
+	}
+}
+
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+
+static struct ctl_table sd_ctl_dir[] = {
+	{
+		.procname	= "sched_domain",
+		.mode		= 0555,
+	},
+	{}
+};
+
+static struct ctl_table sd_ctl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= sd_ctl_dir,
+	},
+	{}
+};
+
+static struct ctl_table *sd_alloc_ctl_entry(int n)
+{
+	struct ctl_table *entry =
+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
+
+	return entry;
+}
+
+static void sd_free_ctl_entry(struct ctl_table **tablep)
+{
+	struct ctl_table *entry;
+
+	/*
+	 * In the intermediate directories, both the child directory and
+	 * procname are dynamically allocated and could fail but the mode
+	 * will always be set. In the lowest directory the names are
+	 * static strings and all have proc handlers.
+	 */
+	for (entry = *tablep; entry->mode; entry++) {
+		if (entry->child)
+			sd_free_ctl_entry(&entry->child);
+		if (entry->proc_handler == NULL)
+			kfree(entry->procname);
+	}
+
+	kfree(*tablep);
+	*tablep = NULL;
+}
+
+#define CPU_LOAD_IDX_MAX 5
+static int min_load_idx = 0;
+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
+
+static void
+set_table_entry(struct ctl_table *entry,
+		const char *procname, void *data, int maxlen,
+		umode_t mode, proc_handler *proc_handler,
+		bool load_idx)
+{
+	entry->procname = procname;
+	entry->data = data;
+	entry->maxlen = maxlen;
+	entry->mode = mode;
+	entry->proc_handler = proc_handler;
+
+	if (load_idx) {
+		entry->extra1 = &min_load_idx;
+		entry->extra2 = &max_load_idx;
+	}
+}
+
+static struct ctl_table *
+sd_alloc_ctl_domain_table(struct sched_domain *sd)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(14);
+
+	if (table == NULL)
+		return NULL;
+
+	set_table_entry(&table[0], "min_interval", &sd->min_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[1], "max_interval", &sd->max_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[9], "cache_nice_tries",
+		&sd->cache_nice_tries,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[10], "flags", &sd->flags,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[11], "max_newidle_lb_cost",
+		&sd->max_newidle_lb_cost,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[12], "name", sd->name,
+		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
+	/* &table[13] is terminator */
+
+	return table;
+}
+
+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
+{
+	struct ctl_table *entry, *table;
+	struct sched_domain *sd;
+	int domain_num = 0, i;
+	char buf[32];
+
+	for_each_domain(cpu, sd)
+		domain_num++;
+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
+	if (table == NULL)
+		return NULL;
+
+	i = 0;
+	for_each_domain(cpu, sd) {
+		snprintf(buf, 32, "domain%d", i);
+		entry->procname = kstrdup(buf, GFP_KERNEL);
+		entry->mode = 0555;
+		entry->child = sd_alloc_ctl_domain_table(sd);
+		entry++;
+		i++;
+	}
+	return table;
+}
+
+static cpumask_var_t sd_sysctl_cpus;
+static struct ctl_table_header *sd_sysctl_header;
+
+void register_sched_domain_sysctl(void)
+{
+	static struct ctl_table *cpu_entries;
+	static struct ctl_table **cpu_idx;
+	char buf[32];
+	int i;
+
+	if (!cpu_entries) {
+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
+		if (!cpu_entries)
+			return;
+
+		WARN_ON(sd_ctl_dir[0].child);
+		sd_ctl_dir[0].child = cpu_entries;
+	}
+
+	if (!cpu_idx) {
+		struct ctl_table *e = cpu_entries;
+
+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
+		if (!cpu_idx)
+			return;
+
+		/* deal with sparse possible map */
+		for_each_possible_cpu(i) {
+			cpu_idx[i] = e;
+			e++;
+		}
+	}
+
+	if (!cpumask_available(sd_sysctl_cpus)) {
+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
+			return;
+
+		/* init to possible to not have holes in @cpu_entries */
+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
+	}
+
+	for_each_cpu(i, sd_sysctl_cpus) {
+		struct ctl_table *e = cpu_idx[i];
+
+		if (e->child)
+			sd_free_ctl_entry(&e->child);
+
+		if (!e->procname) {
+			snprintf(buf, 32, "cpu%d", i);
+			e->procname = kstrdup(buf, GFP_KERNEL);
+		}
+		e->mode = 0555;
+		e->child = sd_alloc_ctl_cpu_table(i);
+
+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
+	}
+
+	WARN_ON(sd_sysctl_header);
+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
+}
+
+void dirty_sched_domain_sysctl(int cpu)
+{
+	if (cpumask_available(sd_sysctl_cpus))
+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
+}
+
+/* may be called multiple times per register */
+void unregister_sched_domain_sysctl(void)
+{
+	unregister_sysctl_table(sd_sysctl_header);
+	sd_sysctl_header = NULL;
+}
+#endif /* CONFIG_SYSCTL */
+
+void set_rq_online(struct rq *rq)
+{
+	if (!rq->online) {
+		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
+		rq->online = true;
+	}
+}
+
+void set_rq_offline(struct rq *rq)
+{
+	if (rq->online) {
+		int cpu = cpu_of(rq);
+
+		cpumask_clear_cpu(cpu, rq->rd->online);
+		rq->online = false;
+		clear_cpuidle_map(cpu);
+	}
+}
+
+/*
+ * used to mark begin/end of suspend/resume:
+ */
+static int num_cpus_frozen;
+
+/*
+ * Update cpusets according to cpu_active mask.  If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
+ *
+ * If we come here as part of a suspend/resume, don't touch cpusets because we
+ * want to restore it back to its original state upon resume anyway.
+ */
+static void cpuset_cpu_active(void)
+{
+	if (cpuhp_tasks_frozen) {
+		/*
+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
+		 * resume sequence. As long as this is not the last online
+		 * operation in the resume sequence, just build a single sched
+		 * domain, ignoring cpusets.
+		 */
+		partition_sched_domains(1, NULL, NULL);
+		if (--num_cpus_frozen)
+			return;
+		/*
+		 * This is the last CPU online operation. So fall through and
+		 * restore the original sched domains by considering the
+		 * cpuset configurations.
+		 */
+		cpuset_force_rebuild();
+	}
+
+	cpuset_update_active_cpus();
+}
+
+static int cpuset_cpu_inactive(unsigned int cpu)
+{
+	if (!cpuhp_tasks_frozen) {
+		cpuset_update_active_cpus();
+	} else {
+		num_cpus_frozen++;
+		partition_sched_domains(1, NULL, NULL);
+	}
+	return 0;
+}
+
+int sched_cpu_activate(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	set_cpu_active(cpu, true);
+
+	if (sched_smp_initialized) {
+		sched_domains_numa_masks_set(cpu);
+		cpuset_cpu_active();
+	}
+
+	/*
+	 * Put the rq online, if not already. This happens:
+	 *
+	 * 1) In the early boot process, because we build the real domains
+	 *    after all CPUs have been brought up.
+	 *
+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
+	 *    domains.
+	 */
+	rq_lock_irqsave(rq, &flags);
+	if (rq->rd) {
+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+		set_rq_online(rq);
+	}
+	unbind_zero(cpu);
+	rq_unlock_irqrestore(rq, &flags);
+
+	return 0;
+}
+
+int sched_cpu_deactivate(unsigned int cpu)
+{
+	int ret;
+
+	set_cpu_active(cpu, false);
+	/*
+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
+	 * users of this state to go away such that all new such users will
+	 * observe it.
+	 *
+	 * Do sync before park smpboot threads to take care the rcu boost case.
+	 */
+	synchronize_rcu_mult(call_rcu, call_rcu_sched);
+
+	if (!sched_smp_initialized)
+		return 0;
+
+	ret = cpuset_cpu_inactive(cpu);
+	if (ret) {
+		set_cpu_active(cpu, true);
+		return ret;
+	}
+	sched_domains_numa_masks_clear(cpu);
+	return 0;
+}
+
+int sched_cpu_starting(unsigned int __maybe_unused cpu)
+{
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+int sched_cpu_dying(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	double_rq_lock(rq, cpu_rq(0));
+	if (rq->rd) {
+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+		set_rq_offline(rq);
+	}
+	bind_zero(cpu);
+	double_rq_unlock(rq, cpu_rq(0));
+	sched_start_tick(rq, cpu);
+	hrexpiry_clear(rq);
+	local_irq_restore(flags);
+
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
+/*
+ * Cheaper version of the below functions in case support for SMT and MC is
+ * compiled in but CPUs have no siblings.
+ */
+static bool sole_cpu_idle(struct rq *rq)
+{
+	return rq_idle(rq);
+}
+#endif
+#ifdef CONFIG_SCHED_SMT
+static const cpumask_t *thread_cpumask(int cpu)
+{
+	return topology_sibling_cpumask(cpu);
+}
+/* All this CPU's SMT siblings are idle */
+static bool siblings_cpu_idle(struct rq *rq)
+{
+	return cpumask_subset(&rq->thread_mask, &cpu_idle_map);
+}
+#endif
+#ifdef CONFIG_SCHED_MC
+static const cpumask_t *core_cpumask(int cpu)
+{
+	return topology_core_cpumask(cpu);
+}
+/* All this CPU's shared cache siblings are idle */
+static bool cache_cpu_idle(struct rq *rq)
+{
+	return cpumask_subset(&rq->core_mask, &cpu_idle_map);
+}
+#endif
+
+enum sched_domain_level {
+	SD_LV_NONE = 0,
+	SD_LV_SIBLING,
+	SD_LV_MC,
+	SD_LV_BOOK,
+	SD_LV_CPU,
+	SD_LV_NODE,
+	SD_LV_ALLNODES,
+	SD_LV_MAX
+};
+
+void __init sched_init_smp(void)
+{
+	struct rq *rq, *other_rq, *leader;
+	struct sched_domain *sd;
+	int cpu, other_cpu, i;
+#ifdef CONFIG_SCHED_SMT
+	bool smt_threads = false;
+#endif
+	sched_init_numa();
+
+	/*
+	 * There's no userspace yet to cause hotplug operations; hence all the
+	 * cpu masks are stable and all blatant races in the below code cannot
+	 * happen.
+	 */
+	mutex_lock(&sched_domains_mutex);
+	sched_init_domains(cpu_active_mask);
+	mutex_unlock(&sched_domains_mutex);
+
+	/* Move init over to a non-isolated CPU */
+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
+		BUG();
+
+	mutex_lock(&sched_domains_mutex);
+	local_irq_disable();
+	lock_all_rqs();
+	/*
+	 * Set up the relative cache distance of each online cpu from each
+	 * other in a simple array for quick lookup. Locality is determined
+	 * by the closest sched_domain that CPUs are separated by. CPUs with
+	 * shared cache in SMT and MC are treated as local. Separate CPUs
+	 * (within the same package or physically) within the same node are
+	 * treated as not local. CPUs not even in the same domain (different
+	 * nodes) are treated as very distant.
+	 */
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+
+		/* First check if this cpu is in the same node */
+		for_each_domain(cpu, sd) {
+			if (sd->level > SD_LV_MC)
+				continue;
+			leader = NULL;
+			/* Set locality to local node if not already found lower */
+			for_each_cpu(other_cpu, sched_domain_span(sd)) {
+				if (rqshare == RQSHARE_SMP) {
+					other_rq = cpu_rq(other_cpu);
+
+					/* Set the smp_leader to the first CPU */
+					if (!leader)
+						leader = rq;
+					other_rq->smp_leader = leader;
+				}
+
+				if (rq->cpu_locality[other_cpu] > 3)
+					rq->cpu_locality[other_cpu] = 3;
+			}
+		}
+
+		/*
+		 * Each runqueue has its own function in case it doesn't have
+		 * siblings of its own allowing mixed topologies.
+		 */
+#ifdef CONFIG_SCHED_MC
+		leader = NULL;
+		if (cpumask_weight(core_cpumask(cpu)) > 1) {
+			cpumask_copy(&rq->core_mask, core_cpumask(cpu));
+			cpumask_clear_cpu(cpu, &rq->core_mask);
+			for_each_cpu(other_cpu, core_cpumask(cpu)) {
+				if (rqshare == RQSHARE_MC) {
+					other_rq = cpu_rq(other_cpu);
+
+					/* Set the mc_leader to the first CPU */
+					if (!leader)
+						leader = rq;
+					other_rq->mc_leader = leader;
+				}
+				if (rq->cpu_locality[other_cpu] > 2)
+					rq->cpu_locality[other_cpu] = 2;
+			}
+			rq->cache_idle = cache_cpu_idle;
+		}
+#endif
+#ifdef CONFIG_SCHED_SMT
+		leader = NULL;
+		if (cpumask_weight(thread_cpumask(cpu)) > 1) {
+			cpumask_copy(&rq->thread_mask, thread_cpumask(cpu));
+			cpumask_clear_cpu(cpu, &rq->thread_mask);
+			for_each_cpu(other_cpu, thread_cpumask(cpu)) {
+				if (rqshare == RQSHARE_SMT) {
+					other_rq = cpu_rq(other_cpu);
+
+					/* Set the smt_leader to the first CPU */
+					if (!leader)
+						leader = rq;
+					other_rq->smt_leader = leader;
+				}
+				if (rq->cpu_locality[other_cpu] > 1)
+					rq->cpu_locality[other_cpu] = 1;
+			}
+			rq->siblings_idle = siblings_cpu_idle;
+			smt_threads = true;
+		}
+#endif
+	}
+
+#ifdef CONFIG_SMT_NICE
+	if (smt_threads) {
+		check_siblings = &check_smt_siblings;
+		wake_siblings = &wake_smt_siblings;
+		smt_schedule = &smt_should_schedule;
+	}
+#endif
+	unlock_all_rqs();
+	local_irq_enable();
+	mutex_unlock(&sched_domains_mutex);
+
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+
+		for_each_online_cpu(other_cpu) {
+			if (other_cpu <= cpu)
+				continue;
+			printk(KERN_DEBUG "MuQSS locality CPU %d to %d: %d\n", cpu, other_cpu, rq->cpu_locality[other_cpu]);
+		}
+	}
+
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+		leader = rq->smp_leader;
+
+		rq_lock(rq);
+		if (leader && rq != leader) {
+			printk(KERN_INFO "Sharing SMP runqueue from CPU %d to CPU %d\n",
+			       leader->cpu, rq->cpu);
+			kfree(rq->node);
+			kfree(rq->sl);
+			kfree(rq->lock);
+			rq->node = leader->node;
+			rq->sl = leader->sl;
+			rq->lock = leader->lock;
+			barrier();
+			/* To make up for not unlocking the freed runlock */
+			preempt_enable();
+		} else
+			rq_unlock(rq);
+	}
+
+#ifdef CONFIG_SCHED_MC
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+		leader = rq->mc_leader;
+
+		rq_lock(rq);
+		if (leader && rq != leader) {
+			printk(KERN_INFO "Sharing MC runqueue from CPU %d to CPU %d\n",
+			       leader->cpu, rq->cpu);
+			kfree(rq->node);
+			kfree(rq->sl);
+			kfree(rq->lock);
+			rq->node = leader->node;
+			rq->sl = leader->sl;
+			rq->lock = leader->lock;
+			barrier();
+			/* To make up for not unlocking the freed runlock */
+			preempt_enable();
+		} else
+			rq_unlock(rq);
+	}
+#endif /* CONFIG_SCHED_MC */
+
+#ifdef CONFIG_SCHED_SMT
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+
+		leader = rq->smt_leader;
+
+		rq_lock(rq);
+		if (leader && rq != leader) {
+			printk(KERN_INFO "Sharing SMT runqueue from CPU %d to CPU %d\n",
+			       leader->cpu, rq->cpu);
+			kfree(rq->node);
+			kfree(rq->sl);
+			kfree(rq->lock);
+			rq->node = leader->node;
+			rq->sl = leader->sl;
+			rq->lock = leader->lock;
+			barrier();
+			/* To make up for not unlocking the freed runlock */
+			preempt_enable();
+		} else
+			rq_unlock(rq);
+	}
+#endif /* CONFIG_SCHED_SMT */
+
+	total_runqueues = 0;
+	for_each_possible_cpu(cpu) {
+		int locality, total_rqs = 0, total_cpus = 0;
+
+		rq = cpu_rq(cpu);
+		if (
+#ifdef CONFIG_SCHED_MC
+		    (rq->mc_leader == rq) &&
+#endif
+#ifdef CONFIG_SCHED_SMT
+		    (rq->smt_leader == rq) &&
+#endif
+	            (rq->smp_leader == rq))
+			total_runqueues++;
+
+		for (locality = 0; locality <= 4; locality++) {
+			int test_cpu;
+
+			for_each_possible_cpu(test_cpu) {
+				/* Work from each CPU up instead of every rq
+				 * starting at CPU 0 */
+				other_cpu = test_cpu + cpu;
+				other_cpu %= num_possible_cpus();
+				other_rq = cpu_rq(other_cpu);
+
+				if (rq->cpu_locality[other_cpu] == locality) {
+					rq->cpu_order[total_cpus++] = other_rq;
+					if (
+
+#ifdef CONFIG_SCHED_MC
+					    (other_rq->mc_leader == other_rq) &&
+#endif
+#ifdef CONFIG_SCHED_SMT
+					    (other_rq->smt_leader == other_rq) &&
+#endif
+					    (other_rq->smp_leader == other_rq))
+						rq->rq_order[total_rqs++] = other_rq;
+				}
+			}
+		}
+	}
+
+	for_each_possible_cpu(cpu) {
+		rq = cpu_rq(cpu);
+		for (i = 0; i < total_runqueues; i++) {
+			printk(KERN_DEBUG "CPU %d RQ order %d RQ %d\n", cpu, i,
+			       rq->rq_order[i]->cpu);
+		}
+	}
+	for_each_possible_cpu(cpu) {
+		rq = cpu_rq(cpu);
+		for (i = 0; i < num_possible_cpus(); i++) {
+			printk(KERN_DEBUG "CPU %d CPU order %d RQ %d\n", cpu, i,
+			       rq->cpu_order[i]->cpu);
+		}
+	}
+	switch (rqshare) {
+		case RQSHARE_SMP:
+			printk(KERN_INFO "MuQSS runqueue share type SMP total runqueues: %d\n",
+				total_runqueues);
+			break;
+		case RQSHARE_MC:
+			printk(KERN_INFO "MuQSS runqueue share type MC total runqueues: %d\n",
+				total_runqueues);
+			break;
+		case RQSHARE_SMT:
+			printk(KERN_INFO "MuQSS runqueue share type SMT total runqueues: %d\n",
+				total_runqueues);
+			break;
+		case RQSHARE_NONE:
+			printk(KERN_INFO "MuQSS runqueue share type none total runqueues: %d\n",
+				total_runqueues);
+			break;
+	}
+
+	sched_smp_initialized = true;
+}
+#else
+void __init sched_init_smp(void)
+{
+	sched_smp_initialized = true;
+}
+#endif /* CONFIG_SMP */
+
+int in_sched_functions(unsigned long addr)
+{
+	return in_lock_functions(addr) ||
+		(addr >= (unsigned long)__sched_text_start
+		&& addr < (unsigned long)__sched_text_end);
+}
+
+#ifdef CONFIG_CGROUP_SCHED
+/* task group related information */
+struct task_group {
+	struct cgroup_subsys_state css;
+
+	struct rcu_head rcu;
+	struct list_head list;
+
+	struct task_group *parent;
+	struct list_head siblings;
+	struct list_head children;
+};
+
+/*
+ * Default task group.
+ * Every task in system belongs to this group at bootup.
+ */
+struct task_group root_task_group;
+LIST_HEAD(task_groups);
+
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
+#endif /* CONFIG_CGROUP_SCHED */
+
+void __init sched_init(void)
+{
+#ifdef CONFIG_SMP
+	int cpu_ids;
+#endif
+	int i;
+	struct rq *rq;
+
+	sched_clock_init();
+
+	wait_bit_init();
+
+	prio_ratios[0] = 128;
+	for (i = 1 ; i < NICE_WIDTH ; i++)
+		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
+
+	skiplist_node_init(&init_task.node);
+
+#ifdef CONFIG_SMP
+	init_defrootdomain();
+	cpumask_clear(&cpu_idle_map);
+#else
+	uprq = &per_cpu(runqueues, 0);
+#endif
+
+#ifdef CONFIG_CGROUP_SCHED
+	task_group_cache = KMEM_CACHE(task_group, 0);
+
+	list_add(&root_task_group.list, &task_groups);
+	INIT_LIST_HEAD(&root_task_group.children);
+	INIT_LIST_HEAD(&root_task_group.siblings);
+#endif /* CONFIG_CGROUP_SCHED */
+	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+		rq->node = kmalloc(sizeof(skiplist_node), GFP_ATOMIC);
+		skiplist_init(rq->node);
+		rq->sl = new_skiplist(rq->node);
+		rq->lock = kmalloc(sizeof(raw_spinlock_t), GFP_ATOMIC);
+		raw_spin_lock_init(rq->lock);
+		rq->nr_running = 0;
+		rq->nr_uninterruptible = 0;
+		rq->nr_switches = 0;
+		rq->clock = rq->old_clock = rq->last_niffy = rq->niffies = 0;
+		rq->last_jiffy = jiffies;
+		rq->user_ns = rq->nice_ns = rq->softirq_ns = rq->system_ns =
+			      rq->iowait_ns = rq->idle_ns = 0;
+		rq->dither = 0;
+		set_rq_task(rq, &init_task);
+		rq->iso_ticks = 0;
+		rq->iso_refractory = false;
+#ifdef CONFIG_SMP
+		rq->smp_leader = rq;
+#ifdef CONFIG_SCHED_MC
+		rq->mc_leader = rq;
+#endif
+#ifdef CONFIG_SCHED_SMT
+		rq->smt_leader = rq;
+#endif
+		rq->sd = NULL;
+		rq->rd = NULL;
+		rq->online = false;
+		rq->cpu = i;
+		rq_attach_root(rq, &def_root_domain);
+#endif
+		init_rq_hrexpiry(rq);
+		atomic_set(&rq->nr_iowait, 0);
+	}
+
+#ifdef CONFIG_SMP
+	cpu_ids = i;
+	/*
+	 * Set the base locality for cpu cache distance calculation to
+	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
+	 */
+	for_each_possible_cpu(i) {
+		int j;
+
+		rq = cpu_rq(i);
+#ifdef CONFIG_SCHED_SMT
+		rq->siblings_idle = sole_cpu_idle;
+#endif
+#ifdef CONFIG_SCHED_MC
+		rq->cache_idle = sole_cpu_idle;
+#endif
+		rq->cpu_locality = kmalloc(cpu_ids * sizeof(int *), GFP_ATOMIC);
+		for_each_possible_cpu(j) {
+			if (i == j)
+				rq->cpu_locality[j] = 0;
+			else
+				rq->cpu_locality[j] = 4;
+		}
+		rq->rq_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
+		rq->cpu_order = kmalloc(cpu_ids * sizeof(struct rq *), GFP_ATOMIC);
+		rq->rq_order[0] = rq->cpu_order[0] = rq;
+		for (j = 1; j < cpu_ids; j++)
+			rq->rq_order[j] = rq->cpu_order[j] = cpu_rq(j);
+	}
+#endif
+
+	/*
+	 * The boot idle thread does lazy MMU switching as well:
+	 */
+	mmgrab(&init_mm);
+	enter_lazy_tlb(&init_mm, current);
+
+	/*
+	 * Make us the idle thread. Technically, schedule() should not be
+	 * called from this thread, however somewhere below it might be,
+	 * but because we are the idle thread, we just pick up running again
+	 * when this runqueue becomes "idle".
+	 */
+	init_idle(current, smp_processor_id());
+
+#ifdef CONFIG_SMP
+	idle_thread_set_boot_cpu();
+#endif /* SMP */
+
+	init_schedstats();
+}
+
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+static inline int preempt_count_equals(int preempt_offset)
+{
+	int nested = preempt_count() + rcu_preempt_depth();
+
+	return (nested == preempt_offset);
+}
+
+void __might_sleep(const char *file, int line, int preempt_offset)
+{
+	/*
+	 * Blocking primitives will set (and therefore destroy) current->state,
+	 * since we will exit with TASK_RUNNING make sure we enter with it,
+	 * otherwise we will destroy state.
+	 */
+	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
+			"do not call blocking ops when !TASK_RUNNING; "
+			"state=%lx set at [<%p>] %pS\n",
+			current->state,
+			(void *)current->task_state_change,
+			(void *)current->task_state_change);
+
+	___might_sleep(file, line, preempt_offset);
+}
+EXPORT_SYMBOL(__might_sleep);
+
+void ___might_sleep(const char *file, int line, int preempt_offset)
+{
+	/* Ratelimiting timestamp: */
+	static unsigned long prev_jiffy;
+
+	unsigned long preempt_disable_ip;
+
+	/* WARN_ON_ONCE() by default, no rate limit required: */
+	rcu_sleep_check();
+
+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
+	     !is_idle_task(current)) ||
+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
+	    oops_in_progress)
+		return;
+
+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+		return;
+	prev_jiffy = jiffies;
+
+	/* Save this before calling printk(), since that will clobber it: */
+	preempt_disable_ip = get_preempt_disable_ip(current);
+
+	printk(KERN_ERR
+		"BUG: sleeping function called from invalid context at %s:%d\n",
+			file, line);
+	printk(KERN_ERR
+		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(),
+			current->pid, current->comm);
+
+	if (task_stack_end_corrupted(current))
+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
+
+	debug_show_held_locks(current);
+	if (irqs_disabled())
+		print_irqtrace_events(current);
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+	    && !preempt_count_equals(preempt_offset)) {
+		pr_err("Preemption disabled at:");
+		print_ip_sym(preempt_disable_ip);
+		pr_cont("\n");
+	}
+	dump_stack();
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+}
+EXPORT_SYMBOL(___might_sleep);
+#endif
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static inline void normalise_rt_tasks(void)
+{
+	struct task_struct *g, *p;
+	unsigned long flags;
+	struct rq *rq;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, p) {
+		/*
+		 * Only normalize user tasks:
+		 */
+		if (p->flags & PF_KTHREAD)
+			continue;
+
+		if (!rt_task(p) && !iso_task(p))
+			continue;
+
+		rq = task_rq_lock(p, &flags);
+		__setscheduler(p, rq, SCHED_NORMAL, 0, false);
+		task_rq_unlock(rq, p, &flags);
+	}
+	read_unlock(&tasklist_lock);
+}
+
+void normalize_rt_tasks(void)
+{
+	normalise_rt_tasks();
+}
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
+/*
+ * These functions are only useful for the IA64 MCA handling, or kdb.
+ *
+ * They can only be called when the whole system has been
+ * stopped - every CPU needs to be quiescent, and no scheduling
+ * activity can take place. Using them for anything else would
+ * be a serious bug, and as a result, they aren't even visible
+ * under any other configuration.
+ */
+
+/**
+ * curr_task - return the current task for a given CPU.
+ * @cpu: the processor in question.
+ *
+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ *
+ * Return: The current task for @cpu.
+ */
+struct task_struct *curr_task(int cpu)
+{
+	return cpu_curr(cpu);
+}
+
+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
+
+#ifdef CONFIG_IA64
+/**
+ * set_curr_task - set the current task for a given CPU.
+ * @cpu: the processor in question.
+ * @p: the task pointer to set.
+ *
+ * Description: This function must only be used when non-maskable interrupts
+ * are serviced on a separate stack.  It allows the architecture to switch the
+ * notion of the current task on a CPU in a non-blocking manner.  This function
+ * must be called with all CPU's synchronised, and interrupts disabled, the
+ * and caller must save the original value of the current task (see
+ * curr_task() above) and restore that value before reenabling interrupts and
+ * re-starting the system.
+ *
+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ */
+void ia64_set_curr_task(int cpu, struct task_struct *p)
+{
+	cpu_curr(cpu) = p;
+}
+
+#endif
+
+void init_idle_bootup_task(struct task_struct *idle)
+{}
+
+#ifdef CONFIG_SCHED_DEBUG
+__read_mostly bool sched_debug_enabled;
+
+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+			  struct seq_file *m)
+{}
+
+void proc_sched_set_task(struct task_struct *p)
+{}
+#endif
+
+#ifdef CONFIG_SMP
+#define SCHED_LOAD_SHIFT	(10)
+#define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
+
+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	return SCHED_LOAD_SCALE;
+}
+
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+	unsigned long weight = cpumask_weight(sched_domain_span(sd));
+	unsigned long smt_gain = sd->smt_gain;
+
+	smt_gain /= weight;
+
+	return smt_gain;
+}
+#endif
+
+#ifdef CONFIG_CGROUP_SCHED
+static void sched_free_group(struct task_group *tg)
+{
+	kmem_cache_free(task_group_cache, tg);
+}
+
+/* allocate runqueue etc for a new task group */
+struct task_group *sched_create_group(struct task_group *parent)
+{
+	struct task_group *tg;
+
+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
+	if (!tg)
+		return ERR_PTR(-ENOMEM);
+
+	return tg;
+}
+
+void sched_online_group(struct task_group *tg, struct task_group *parent)
+{
+}
+
+/* rcu callback to free various structures associated with a task group */
+static void sched_free_group_rcu(struct rcu_head *rhp)
+{
+	/* Now it should be safe to free those cfs_rqs */
+	sched_free_group(container_of(rhp, struct task_group, rcu));
+}
+
+void sched_destroy_group(struct task_group *tg)
+{
+	/* Wait for possible concurrent references to cfs_rqs complete */
+	call_rcu(&tg->rcu, sched_free_group_rcu);
+}
+
+void sched_offline_group(struct task_group *tg)
+{
+}
+
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct task_group, css) : NULL;
+}
+
+static struct cgroup_subsys_state *
+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct task_group *parent = css_tg(parent_css);
+	struct task_group *tg;
+
+	if (!parent) {
+		/* This is early initialization for the top cgroup */
+		return &root_task_group.css;
+	}
+
+	tg = sched_create_group(parent);
+	if (IS_ERR(tg))
+		return ERR_PTR(-ENOMEM);
+	return &tg->css;
+}
+
+/* Expose task group only after completing cgroup initialization */
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+	struct task_group *tg = css_tg(css);
+	struct task_group *parent = css_tg(css->parent);
+
+	if (parent)
+		sched_online_group(tg, parent);
+	return 0;
+}
+
+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
+{
+	struct task_group *tg = css_tg(css);
+
+	sched_offline_group(tg);
+}
+
+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+	struct task_group *tg = css_tg(css);
+
+	/*
+	 * Relies on the RCU grace period between css_released() and this.
+	 */
+	sched_free_group(tg);
+}
+
+static void cpu_cgroup_fork(struct task_struct *task)
+{
+}
+
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
+{
+	return 0;
+}
+
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
+{
+}
+
+static struct cftype cpu_legacy_files[] = {
+	{ }	/* Terminate */
+};
+
+static struct cftype cpu_files[] = {
+	{ }	/* terminate */
+};
+
+static int cpu_extra_stat_show(struct seq_file *sf,
+			       struct cgroup_subsys_state *css)
+{
+	return 0;
+}
+
+struct cgroup_subsys cpu_cgrp_subsys = {
+	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_online	= cpu_cgroup_css_online,
+	.css_released	= cpu_cgroup_css_released,
+	.css_free	= cpu_cgroup_css_free,
+	.css_extra_stat_show = cpu_extra_stat_show,
+	.fork		= cpu_cgroup_fork,
+	.can_attach	= cpu_cgroup_can_attach,
+	.attach		= cpu_cgroup_attach,
+	.legacy_cftypes	= cpu_files,
+	.legacy_cftypes	= cpu_legacy_files,
+	.dfl_cftypes	= cpu_files,
+	.early_init	= true,
+	.threaded	= true,
+};
+#endif	/* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/MuQSS.h b/kernel/sched/MuQSS.h
new file mode 100644
index 000000000..4784a9486
--- /dev/null
+++ b/kernel/sched/MuQSS.h
@@ -0,0 +1,768 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MUQSS_SCHED_H
+#define MUQSS_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/cpuidle.h>
+#include <linux/freezer.h>
+#include <linux/interrupt.h>
+#include <linux/skip_list.h>
+#include <linux/stop_machine.h>
+#include <linux/sched/topology.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/tsacct_kern.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/cpufreq.h>
+#include <linux/sched/stat.h>
+#include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
+#include <linux/sched/init.h>
+
+#include <linux/u64_stats_sync.h>
+#include <linux/kernel_stat.h>
+#include <linux/tick.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
+#else
+# define SCHED_WARN_ON(x)	((void)(x))
+#endif
+
+/* task_struct::on_rq states: */
+#define TASK_ON_RQ_QUEUED	1
+#define TASK_ON_RQ_MIGRATING	2
+
+struct rq;
+
+#ifdef CONFIG_SMP
+
+static inline bool sched_asym_prefer(int a, int b)
+{
+	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
+}
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables. Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset. Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ */
+struct root_domain {
+	atomic_t refcount;
+	atomic_t rto_count;
+	struct rcu_head rcu;
+	cpumask_var_t span;
+	cpumask_var_t online;
+
+	/* Indicate more than one runnable task for any CPU */
+	bool overload;
+
+	/*
+	 * The bit corresponding to a CPU gets set here if such CPU has more
+	 * than one runnable -deadline task (as it is below for RT tasks).
+	 */
+	cpumask_var_t dlo_mask;
+	atomic_t dlo_count;
+	/* Replace unused CFS structures with void */
+	//struct dl_bw dl_bw;
+	//struct cpudl cpudl;
+	void *dl_bw;
+	void *cpudl;
+
+	/*
+	 * The "RT overload" flag: it gets set if a CPU has more than
+	 * one runnable RT task.
+	 */
+	cpumask_var_t rto_mask;
+	//struct cpupri cpupri;
+	void *cpupri;
+
+	unsigned long max_cpu_capacity;
+};
+
+extern struct root_domain def_root_domain;
+extern struct mutex sched_domains_mutex;
+
+extern void init_defrootdomain(void);
+extern int sched_init_domains(const struct cpumask *cpu_map);
+extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
+
+static inline void cpupri_cleanup(void __maybe_unused *cpupri)
+{
+}
+
+static inline void cpudl_cleanup(void __maybe_unused *cpudl)
+{
+}
+
+static inline void init_dl_bw(void __maybe_unused *dl_bw)
+{
+}
+
+static inline int cpudl_init(void __maybe_unused *dl_bw)
+{
+	return 0;
+}
+
+static inline int cpupri_init(void __maybe_unused *cpupri)
+{
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ * This data should only be modified by the local cpu.
+ */
+struct rq {
+	raw_spinlock_t *lock;
+	raw_spinlock_t *orig_lock;
+
+	struct task_struct *curr, *idle, *stop;
+	struct mm_struct *prev_mm;
+
+	unsigned int nr_running;
+	/*
+	 * This is part of a global counter where only the total sum
+	 * over all CPUs matters. A task can increase this counter on
+	 * one CPU and if it got migrated afterwards it may decrease
+	 * it on another CPU. Always updated under the runqueue lock:
+	 */
+	unsigned long nr_uninterruptible;
+	u64 nr_switches;
+
+	/* Stored data about rq->curr to work outside rq lock */
+	u64 rq_deadline;
+	int rq_prio;
+
+	/* Best queued id for use outside lock */
+	u64 best_key;
+
+	unsigned long last_scheduler_tick; /* Last jiffy this RQ ticked */
+	unsigned long last_jiffy; /* Last jiffy this RQ updated rq clock */
+	u64 niffies; /* Last time this RQ updated rq clock */
+	u64 last_niffy; /* Last niffies as updated by local clock */
+	u64 last_jiffy_niffies; /* Niffies @ last_jiffy */
+
+	u64 load_update; /* When we last updated load */
+	unsigned long load_avg; /* Rolling load average */
+#ifdef CONFIG_SMT_NICE
+	struct mm_struct *rq_mm;
+	int rq_smt_bias; /* Policy/nice level bias across smt siblings */
+#endif
+	/* Accurate timekeeping data */
+	unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
+		iowait_ns, idle_ns;
+	atomic_t nr_iowait;
+
+	skiplist_node *node;
+	skiplist *sl;
+#ifdef CONFIG_SMP
+	struct task_struct *preempt; /* Preempt triggered on this task */
+	struct task_struct *preempting; /* Hint only, what task is preempting */
+
+	int cpu;		/* cpu of this runqueue */
+	bool online;
+
+	struct root_domain *rd;
+	struct sched_domain *sd;
+
+	unsigned long cpu_capacity_orig;
+
+	int *cpu_locality; /* CPU relative cache distance */
+	struct rq **rq_order; /* Shared RQs ordered by relative cache distance */
+	struct rq **cpu_order; /* RQs of discrete CPUs ordered by distance */
+
+	struct rq *smp_leader; /* First physical CPU per node */
+#ifdef CONFIG_SCHED_SMT
+	struct rq *smt_leader; /* First logical CPU in SMT siblings */
+	cpumask_t thread_mask;
+	bool (*siblings_idle)(struct rq *rq);
+	/* See if all smt siblings are idle */
+#endif /* CONFIG_SCHED_SMT */
+#ifdef CONFIG_SCHED_MC
+	struct rq *mc_leader; /* First logical CPU in MC siblings */
+	cpumask_t core_mask;
+	bool (*cache_idle)(struct rq *rq);
+	/* See if all cache siblings are idle */
+#endif /* CONFIG_SCHED_MC */
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	u64 prev_irq_time;
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+#ifdef CONFIG_PARAVIRT
+	u64 prev_steal_time;
+#endif /* CONFIG_PARAVIRT */
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	u64 prev_steal_time_rq;
+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
+
+	u64 clock, old_clock, last_tick;
+	u64 clock_task;
+	int dither;
+
+	int iso_ticks;
+	bool iso_refractory;
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+	struct hrtimer hrexpiry_timer;
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+
+	/* latency stats */
+	struct sched_info rq_sched_info;
+	unsigned long long rq_cpu_time;
+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+
+	/* sys_sched_yield() stats */
+	unsigned int yld_count;
+
+	/* schedule() stats */
+	unsigned int sched_switch;
+	unsigned int sched_count;
+	unsigned int sched_goidle;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_count;
+	unsigned int ttwu_local;
+#endif /* CONFIG_SCHEDSTATS */
+
+#ifdef CONFIG_SMP
+	struct llist_head wake_list;
+#endif
+
+#ifdef CONFIG_CPU_IDLE
+	/* Must be inspected within a rcu lock section */
+	struct cpuidle_state *idle_state;
+#endif
+};
+
+#ifdef CONFIG_SMP
+struct rq *cpu_rq(int cpu);
+#endif
+
+#ifndef CONFIG_SMP
+extern struct rq *uprq;
+#define cpu_rq(cpu)	(uprq)
+#define this_rq()	(uprq)
+#define raw_rq()	(uprq)
+#define task_rq(p)	(uprq)
+#define cpu_curr(cpu)	((uprq)->curr)
+#else /* CONFIG_SMP */
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#define this_rq()		this_cpu_ptr(&runqueues)
+#define raw_rq()		raw_cpu_ptr(&runqueues)
+#define task_rq(p)		cpu_rq(task_cpu(p))
+#endif /* CONFIG_SMP */
+
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+	return rq->curr == p;
+}
+
+static inline int task_running(struct rq *rq, struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
+	return task_current(rq, p);
+#endif
+}
+
+static inline void rq_lock(struct rq *rq)
+	__acquires(rq->lock)
+{
+	raw_spin_lock(rq->lock);
+}
+
+static inline void rq_unlock(struct rq *rq)
+	__releases(rq->lock)
+{
+	raw_spin_unlock(rq->lock);
+}
+
+static inline void rq_lock_irq(struct rq *rq)
+	__acquires(rq->lock)
+{
+	raw_spin_lock_irq(rq->lock);
+}
+
+static inline void rq_unlock_irq(struct rq *rq)
+	__releases(rq->lock)
+{
+	raw_spin_unlock_irq(rq->lock);
+}
+
+static inline void rq_lock_irqsave(struct rq *rq, unsigned long *flags)
+	__acquires(rq->lock)
+{
+	raw_spin_lock_irqsave(rq->lock, *flags);
+}
+
+static inline void rq_unlock_irqrestore(struct rq *rq, unsigned long *flags)
+	__releases(rq->lock)
+{
+	raw_spin_unlock_irqrestore(rq->lock, *flags);
+}
+
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+	__acquires(p->pi_lock)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	while (42) {
+		raw_spin_lock_irqsave(&p->pi_lock, *flags);
+		rq = task_rq(p);
+		raw_spin_lock(rq->lock);
+		if (likely(rq == task_rq(p)))
+			break;
+		raw_spin_unlock(rq->lock);
+		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+	}
+	return rq;
+}
+
+static inline void task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+	__releases(rq->lock)
+	__releases(p->pi_lock)
+{
+	rq_unlock(rq);
+	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	lockdep_assert_held(&p->pi_lock);
+
+	while (42) {
+		rq = task_rq(p);
+		raw_spin_lock(rq->lock);
+		if (likely(rq == task_rq(p)))
+			break;
+		raw_spin_unlock(rq->lock);
+	}
+	return rq;
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+{
+	rq_unlock(rq);
+}
+
+/*
+ * {de,en}queue flags: Most not used on MuQSS.
+ *
+ * DEQUEUE_SLEEP  - task is no longer runnable
+ * ENQUEUE_WAKEUP - task just became runnable
+ *
+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
+ *                are in a known state which allows modification. Such pairs
+ *                should preserve as much state as possible.
+ *
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ *        in the runqueue.
+ *
+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
+ *
+ */
+
+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
+
+#define ENQUEUE_RESTORE		0x02
+
+static inline u64 __rq_clock_broken(struct rq *rq)
+{
+	return READ_ONCE(rq->clock);
+}
+
+static inline u64 rq_clock(struct rq *rq)
+{
+	lockdep_assert_held(rq->lock);
+
+	return rq->clock;
+}
+
+static inline u64 rq_clock_task(struct rq *rq)
+{
+	lockdep_assert_held(rq->lock);
+
+	return rq->clock_task;
+}
+
+#ifdef CONFIG_NUMA
+enum numa_topology_type {
+	NUMA_DIRECT,
+	NUMA_GLUELESS_MESH,
+	NUMA_BACKPLANE,
+};
+extern enum numa_topology_type sched_numa_topology_type;
+extern int sched_max_numa_distance;
+extern bool find_numa_distance(int distance);
+
+extern void sched_init_numa(void);
+extern void sched_domains_numa_masks_set(unsigned int cpu);
+extern void sched_domains_numa_masks_clear(unsigned int cpu);
+#else
+static inline void sched_init_numa(void) { }
+static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
+static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
+#endif
+
+extern struct mutex sched_domains_mutex;
+extern struct static_key_false sched_schedstats;
+
+#define rcu_dereference_check_sched_domain(p) \
+	rcu_dereference_check((p), \
+			      lockdep_is_held(&sched_domains_mutex))
+
+#ifdef CONFIG_SMP
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
+			__sd; __sd = __sd->parent)
+
+#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
+
+/**
+ * highest_flag_domain - Return highest sched_domain containing flag.
+ * @cpu:	The cpu whose highest level of sched domain is to
+ *		be returned.
+ * @flag:	The flag to check for the highest sched_domain
+ *		for the given cpu.
+ *
+ * Returns the highest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd, *hsd = NULL;
+
+	for_each_domain(cpu, sd) {
+		if (!(sd->flags & flag))
+			break;
+		hsd = sd;
+	}
+
+	return hsd;
+}
+
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd) {
+		if (sd->flags & flag)
+			break;
+	}
+
+	return sd;
+}
+
+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+DECLARE_PER_CPU(int, sd_llc_size);
+DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+
+struct sched_group_capacity {
+	atomic_t ref;
+	/*
+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
+	 * for a single CPU.
+	 */
+	unsigned long capacity;
+	unsigned long min_capacity; /* Min per-CPU capacity in group */
+	unsigned long next_update;
+	int imbalance; /* XXX unrelated to capacity but shared group state */
+
+#ifdef CONFIG_SCHED_DEBUG
+	int id;
+#endif
+
+	unsigned long cpumask[0]; /* balance mask */
+};
+
+struct sched_group {
+	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
+
+	unsigned int group_weight;
+	struct sched_group_capacity *sgc;
+	int asym_prefer_cpu;		/* cpu of highest priority in group */
+
+	/*
+	 * The CPUs this group covers.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 */
+	unsigned long cpumask[0];
+};
+
+static inline struct cpumask *sched_group_span(struct sched_group *sg)
+{
+	return to_cpumask(sg->cpumask);
+}
+
+/*
+ * See build_balance_mask().
+ */
+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
+{
+	return to_cpumask(sg->sgc->cpumask);
+}
+
+/**
+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+ * @group: The group whose first cpu is to be returned.
+ */
+static inline unsigned int group_first_cpu(struct sched_group *group)
+{
+	return cpumask_first(sched_group_span(group));
+}
+
+
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+void register_sched_domain_sysctl(void);
+void dirty_sched_domain_sysctl(int cpu);
+void unregister_sched_domain_sysctl(void);
+#else
+static inline void register_sched_domain_sysctl(void)
+{
+}
+static inline void dirty_sched_domain_sysctl(int cpu)
+{
+}
+static inline void unregister_sched_domain_sysctl(void)
+{
+}
+#endif
+
+extern void sched_ttwu_pending(void);
+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
+extern void set_rq_online (struct rq *rq);
+extern void set_rq_offline(struct rq *rq);
+extern bool sched_smp_initialized;
+
+static inline void update_group_capacity(struct sched_domain *sd, int cpu)
+{
+}
+
+static inline void trigger_load_balance(struct rq *rq)
+{
+}
+
+#define sched_feat(x) 0
+
+#else /* CONFIG_SMP */
+
+static inline void sched_ttwu_pending(void) { }
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_CPU_IDLE
+static inline void idle_set_state(struct rq *rq,
+				  struct cpuidle_state *idle_state)
+{
+	rq->idle_state = idle_state;
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+	SCHED_WARN_ON(!rcu_read_lock_held());
+	return rq->idle_state;
+}
+#else
+static inline void idle_set_state(struct rq *rq,
+				  struct cpuidle_state *idle_state)
+{
+}
+
+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_SCHED_DEBUG
+extern bool sched_debug_enabled;
+#endif
+
+extern void schedule_idle(void);
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+struct irqtime {
+	u64			total;
+	u64			tick_delta;
+	u64			irq_start_time;
+	struct u64_stats_sync	sync;
+};
+
+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
+
+/*
+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * and never move forward.
+ */
+static inline u64 irq_time_read(int cpu)
+{
+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+	unsigned int seq;
+	u64 total;
+
+	do {
+		seq = __u64_stats_fetch_begin(&irqtime->sync);
+		total = irqtime->total;
+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+
+	return total;
+}
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_SMP
+static inline int cpu_of(struct rq *rq)
+{
+	return rq->cpu;
+}
+#else /* CONFIG_SMP */
+static inline int cpu_of(struct rq *rq)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_CPU_FREQ
+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+
+static inline void cpufreq_trigger(struct rq *rq, unsigned int flags)
+{
+	struct update_util_data *data;
+
+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
+						  cpu_of(rq)));
+
+	if (data)
+		data->func(data, rq->niffies, flags);
+}
+#else
+static inline void cpufreq_trigger(struct rq *rq, unsigned int flag)
+{
+}
+#endif /* CONFIG_CPU_FREQ */
+
+#ifdef arch_scale_freq_capacity
+#ifndef arch_scale_freq_invariant
+#define arch_scale_freq_invariant()	(true)
+#endif
+#else /* arch_scale_freq_capacity */
+#define arch_scale_freq_invariant()	(false)
+#endif
+
+/*
+ * This should only be called when current == rq->idle. Dodgy workaround for
+ * when softirqs are pending and we are in the idle loop. Setting current to
+ * resched will kick us out of the idle loop and the softirqs will be serviced
+ * on our next pass through schedule().
+ */
+static inline bool softirq_pending(int cpu)
+{
+	if (likely(!local_softirq_pending()))
+		return false;
+	set_tsk_need_resched(current);
+	return true;
+}
+
+#ifdef CONFIG_64BIT
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	return tsk_seruntime(t);
+}
+#else
+struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags);
+void task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags);
+
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	unsigned long flags;
+	u64 ns;
+	struct rq *rq;
+
+	rq = task_rq_lock(t, &flags);
+	ns = tsk_seruntime(t);
+	task_rq_unlock(rq, t, &flags);
+
+	return ns;
+}
+#endif
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
+#ifndef arch_scale_cpu_capacity
+static __always_inline
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+	if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
+		return sd->smt_gain / sd->span_weight;
+
+	return SCHED_CAPACITY_SCALE;
+}
+#endif
+
+#define SCHED_FLAG_SUGOV	0x10000000
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+
+static inline unsigned long cpu_util_dl(struct rq *rq)
+{
+	return 0;
+}
+
+static inline unsigned long cpu_util_cfs(struct rq *rq)
+{
+	unsigned long ret = rq->load_avg;
+
+	if (ret > SCHED_CAPACITY_SCALE)
+		ret = SCHED_CAPACITY_SCALE;
+	return ret;
+}
+
+#endif
+
+#endif /* MUQSS_SCHED_H */
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 6a64d45a4..f36a6ab1f 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -481,7 +481,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
 	struct task_struct *thread;
 	struct sched_attr attr = {
 		.size = sizeof(struct sched_attr),
+#ifdef CONFIG_SCHED_MUQSS
+		.sched_policy = SCHED_ISO,
+#else
 		.sched_policy = SCHED_DEADLINE,
+#endif
 		.sched_flags = SCHED_FLAG_SUGOV,
 		.sched_nice = 0,
 		.sched_priority = 0,
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index bac6ac9a4..e3153ba66 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -269,26 +269,6 @@ static inline u64 account_other_time(u64 max)
 	return accounted;
 }
 
-#ifdef CONFIG_64BIT
-static inline u64 read_sum_exec_runtime(struct task_struct *t)
-{
-	return t->se.sum_exec_runtime;
-}
-#else
-static u64 read_sum_exec_runtime(struct task_struct *t)
-{
-	u64 ns;
-	struct rq_flags rf;
-	struct rq *rq;
-
-	rq = task_rq_lock(t, &rf);
-	ns = t->se.sum_exec_runtime;
-	task_rq_unlock(rq, t, &rf);
-
-	return ns;
-}
-#endif
-
 /*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
@@ -666,7 +646,7 @@ out:
 void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 {
 	struct task_cputime cputime = {
-		.sum_exec_runtime = p->se.sum_exec_runtime,
+		.sum_exec_runtime = tsk_seruntime(p),
 	};
 
 	task_cputime(p, &cputime.utime, &cputime.stime);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5eb3ffc9b..3c6b9b660 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -52,8 +52,13 @@
  *
  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_latency			= 3000000ULL;
+unsigned int normalized_sysctl_sched_latency		= 3000000ULL;
+#else
 unsigned int sysctl_sched_latency			= 6000000ULL;
 unsigned int normalized_sysctl_sched_latency		= 6000000ULL;
+#endif
 
 /*
  * The initial- and re-scaling of tunables is configurable
@@ -73,13 +78,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
  *
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_min_granularity		= 300000ULL;
+unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
+#else
 unsigned int sysctl_sched_min_granularity		= 750000ULL;
 unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
+#endif
 
 /*
  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+static unsigned int sched_nr_latency = 10;
+#else
 static unsigned int sched_nr_latency = 8;
+#endif
 
 /*
  * After fork, child runs first. If set to 0 (default) then
@@ -96,10 +110,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
  *
  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_wakeup_granularity		= 500000UL;
+unsigned int normalized_sysctl_sched_wakeup_granularity	= 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost	= 250000UL;
+#else
 unsigned int sysctl_sched_wakeup_granularity		= 1000000UL;
 unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
+#endif
 
 #ifdef CONFIG_SMP
 /*
@@ -122,8 +143,12 @@ int __weak arch_asym_cpu_priority(int cpu)
  *
  * (default: 5 msec, units: microseconds)
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
+#else
 unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
 #endif
+#endif
 
 /*
  * The margin used when comparing utilization with CPU capacity:
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 7dae9eb8c..0a374d84c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -210,6 +210,8 @@ exit_idle:
 static void do_idle(void)
 {
 	int cpu = smp_processor_id();
+	bool pending = false;
+
 	/*
 	 * If the arch has a polling bit, we maintain an invariant:
 	 *
@@ -220,7 +222,10 @@ static void do_idle(void)
 	 */
 
 	__current_set_polling();
-	tick_nohz_idle_enter();
+	if (unlikely(softirq_pending(cpu)))
+		pending = true;
+	else
+		tick_nohz_idle_enter();
 
 	while (!need_resched()) {
 		check_pgt_cache();
@@ -255,7 +260,8 @@ static void do_idle(void)
 	 * an IPI to fold the state for us.
 	 */
 	preempt_set_need_resched();
-	tick_nohz_idle_exit();
+	if (!pending)
+		tick_nohz_idle_exit();
 	__current_clr_polling();
 
 	/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fb5fc4585..ba3d008c1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#ifdef CONFIG_SCHED_MUQSS
+#include "MuQSS.h"
+#else /* CONFIG_SCHED_MUQSS */
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/sysctl.h>
@@ -2133,3 +2136,30 @@ static inline unsigned long cpu_util_cfs(struct rq *rq)
 }
 
 #endif
+
+/* MuQSS compatibility functions */
+static inline bool softirq_pending(int cpu)
+{
+	return false;
+}
+
+#ifdef CONFIG_64BIT
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	return t->se.sum_exec_runtime;
+}
+#else
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	u64 ns;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(t, &rf);
+	ns = t->se.sum_exec_runtime;
+	task_rq_unlock(rq, t, &rf);
+
+	return ns;
+}
+#endif
+#endif /* CONFIG_SCHED_MUQSS */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 519b024f4..de9852460 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -227,7 +227,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	struct root_domain *old_rd = NULL;
 	unsigned long flags;
 
+#ifdef CONFIG_SCHED_MUQSS
+	raw_spin_lock_irqsave(rq->lock, flags);
+#else
 	raw_spin_lock_irqsave(&rq->lock, flags);
+#endif
 
 	if (rq->rd) {
 		old_rd = rq->rd;
@@ -253,7 +257,11 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
 		set_rq_online(rq);
 
+#ifdef CONFIG_SCHED_MUQSS
+	raw_spin_unlock_irqrestore(rq->lock, flags);
+#else
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
+#endif
 
 	if (old_rd)
 		call_rcu_sched(&old_rd->rcu, free_rootdomain);
diff --git a/kernel/skip_list.c b/kernel/skip_list.c
new file mode 100644
index 000000000..bf5c6e97e
--- /dev/null
+++ b/kernel/skip_list.c
@@ -0,0 +1,148 @@
+/*
+  Copyright (C) 2011,2016 Con Kolivas.
+
+  Code based on example originally by William Pugh.
+
+Skip Lists are a probabilistic alternative to balanced trees, as
+described in the June 1990 issue of CACM and were invented by
+William Pugh in 1987.
+
+A couple of comments about this implementation:
+The routine randomLevel has been hard-coded to generate random
+levels using p=0.25. It can be easily changed.
+
+The insertion routine has been implemented so as to use the
+dirty hack described in the CACM paper: if a random level is
+generated that is more than the current maximum level, the
+current maximum level plus one is used instead.
+
+Levels start at zero and go up to MaxLevel (which is equal to
+MaxNumberOfLevels-1).
+
+The routines defined in this file are:
+
+init: defines slnode
+
+new_skiplist: returns a new, empty list
+
+randomLevel: Returns a random level based on a u64 random seed passed to it.
+In MuQSS, the "niffy" time is used for this purpose.
+
+insert(l,key, value): inserts the binding (key, value) into l. This operation
+occurs in O(log n) time.
+
+delnode(slnode, l, node): deletes any binding of key from the l based on the
+actual node value. This operation occurs in O(k) time where k is the
+number of levels of the node in question (max 8). The original delete
+function occurred in O(log n) time and involved a search.
+
+MuQSS Notes: In this implementation of skiplists, there are bidirectional
+next/prev pointers and the insert function returns a pointer to the actual
+node the value is stored. The key here is chosen by the scheduler so as to
+sort tasks according to the priority list requirements and is no longer used
+by the scheduler after insertion. The scheduler lookup, however, occurs in
+O(1) time because it is always the first item in the level 0 linked list.
+Since the task struct stores a copy of the node pointer upon skiplist_insert,
+it can also remove it much faster than the original implementation with the
+aid of prev<->next pointer manipulation and no searching.
+
+*/
+
+#include <linux/slab.h>
+#include <linux/skip_list.h>
+
+#define MaxNumberOfLevels 8
+#define MaxLevel (MaxNumberOfLevels - 1)
+
+void skiplist_init(skiplist_node *slnode)
+{
+	int i;
+
+	slnode->key = 0xFFFFFFFFFFFFFFFF;
+	slnode->level = 0;
+	slnode->value = NULL;
+	for (i = 0; i < MaxNumberOfLevels; i++)
+		slnode->next[i] = slnode->prev[i] = slnode;
+}
+
+skiplist *new_skiplist(skiplist_node *slnode)
+{
+	skiplist *l = kzalloc(sizeof(skiplist), GFP_ATOMIC);
+
+	BUG_ON(!l);
+	l->header = slnode;
+	return l;
+}
+
+void free_skiplist(skiplist *l)
+{
+	skiplist_node *p, *q;
+
+	p = l->header;
+	do {
+		q = p->next[0];
+		p->next[0]->prev[0] = q->prev[0];
+		skiplist_node_init(p);
+		p = q;
+	} while (p != l->header);
+	kfree(l);
+}
+
+void skiplist_node_init(skiplist_node *node)
+{
+	memset(node, 0, sizeof(skiplist_node));
+}
+
+static inline unsigned int randomLevel(const long unsigned int randseed)
+{
+	return find_first_bit(&randseed, MaxLevel) / 2;
+}
+
+void skiplist_insert(skiplist *l, skiplist_node *node, keyType key, valueType value, unsigned int randseed)
+{
+	skiplist_node *update[MaxNumberOfLevels];
+	skiplist_node *p, *q;
+	int k = l->level;
+
+	p = l->header;
+	do {
+		while (q = p->next[k], q->key <= key)
+			p = q;
+		update[k] = p;
+	} while (--k >= 0);
+
+	++l->entries;
+	k = randomLevel(randseed);
+	if (k > l->level) {
+		k = ++l->level;
+		update[k] = l->header;
+	}
+
+	node->level = k;
+	node->key = key;
+	node->value = value;
+	do {
+		p = update[k];
+		node->next[k] = p->next[k];
+		p->next[k] = node;
+		node->prev[k] = p;
+		node->next[k]->prev[k] = node;
+	} while (--k >= 0);
+}
+
+void skiplist_delete(skiplist *l, skiplist_node *node)
+{
+	int k, m = node->level;
+
+	for (k = 0; k <= m; k++) {
+		node->prev[k]->next[k] = node->next[k];
+		node->next[k]->prev[k] = node->prev[k];
+	}
+	skiplist_node_init(node);
+	if (m == l->level) {
+		while (l->header->next[m] == l->header && l->header->prev[m] == l->header && m > 0)
+			m--;
+		l->level = m;
+	}
+	l->entries--;
+}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f98f28c12..8bc9f2e9f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,9 @@ extern int core_uses_pid;
 extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 #endif
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#endif
 extern int pid_max;
 extern int pid_max_min, pid_max_max;
 extern int percpu_pagelist_fraction;
@@ -126,8 +129,14 @@ static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
 static unsigned long one_ul = 1;
-static int one_hundred = 100;
-static int one_thousand = 1000;
+static int __read_mostly one_hundred = 100;
+static int __read_mostly one_thousand = 1000;
+#ifdef CONFIG_SCHED_MUQSS
+extern int rr_interval;
+extern int sched_interactive;
+extern int sched_iso_cpu;
+extern int sched_yield_type;
+#endif
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 10000;
 #endif
@@ -288,7 +297,7 @@ static struct ctl_table sysctl_base_table[] = {
 	{ }
 };
 
-#ifdef CONFIG_SCHED_DEBUG
+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_MUQSS)
 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_wakeup_granularity_ns;			/* 0 usecs */
@@ -305,6 +314,7 @@ static int max_extfrag_threshold = 1000;
 #endif
 
 static struct ctl_table kern_table[] = {
+#ifndef CONFIG_SCHED_MUQSS
 	{
 		.procname	= "sched_child_runs_first",
 		.data		= &sysctl_sched_child_runs_first,
@@ -467,6 +477,7 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 	},
 #endif
+#endif /* !CONFIG_SCHED_MUQSS */
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
@@ -515,6 +526,15 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_USER_NS
+	{
+		.procname	= "unprivileged_userns_clone",
+		.data		= &unprivileged_userns_clone,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #ifdef CONFIG_PROC_SYSCTL
 	{
 		.procname	= "tainted",
@@ -1025,6 +1045,44 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_SCHED_MUQSS
+	{
+		.procname	= "rr_interval",
+		.data		= &rr_interval,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one_thousand,
+	},
+	{
+		.procname	= "interactive",
+		.data		= &sched_interactive,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
+		.procname	= "iso_cpu",
+		.data		= &sched_iso_cpu,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "yield_type",
+		.data		= &sched_yield_type,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
+#endif
 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
 	{
 		.procname	= "spin_retry",
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 0fef39566..83fb1ecfc 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -116,3 +116,4 @@ void task_work_run(void)
 		} while (work);
 	}
 }
+EXPORT_SYMBOL_GPL(task_work_run);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 16c027e9c..fcc3fe0a1 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -198,8 +198,13 @@ int clockevents_tick_resume(struct clock_event_device *dev)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
 
+#ifdef CONFIG_SCHED_MUQSS
+/* Limit min_delta to 100us */
+#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / 10000)
+#else
 /* Limit min_delta to a jiffie */
 #define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
+#endif
 
 /**
  * clockevents_increase_min_delta - raise minimum delta of a clock event device
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6..5d8386746 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -830,7 +830,7 @@ static void check_thread_timers(struct task_struct *tsk,
 	tsk_expires->virt_exp = expires;
 
 	tsk_expires->sched_exp = check_timers_list(++timers, firing,
-						   tsk->se.sum_exec_runtime);
+						   tsk_seruntime(tsk));
 
 	/*
 	 * Check for the special case thread timers.
@@ -840,7 +840,7 @@ static void check_thread_timers(struct task_struct *tsk,
 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
 
 		if (hard != RLIM_INFINITY &&
-		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+		    tsk_rttimeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
 			/*
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
@@ -852,7 +852,7 @@ static void check_thread_timers(struct task_struct *tsk,
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
-		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+		if (tsk_rttimeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
@@ -1096,7 +1096,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 		struct task_cputime task_sample;
 
 		task_cputime(tsk, &task_sample.utime, &task_sample.stime);
-		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
+		task_sample.sum_exec_runtime = tsk_seruntime(tsk);
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
 			return 1;
 	}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4a4fd567f..b03e2ca20 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1479,7 +1479,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
  * Check, if the next hrtimer event is before the next timer wheel
  * event:
  */
-static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
+static u64 cmp_next_hrtimer_event(struct timer_base *base, u64 basem, u64 expires)
 {
 	u64 nextevt = hrtimer_get_next_event();
 
@@ -1497,6 +1497,9 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
 	if (nextevt <= basem)
 		return basem;
 
+	if (nextevt < expires && nextevt - basem <= TICK_NSEC)
+		base->is_idle = false;
+
 	/*
 	 * Round up to the next jiffie. High resolution timers are
 	 * off, so the hrtimers are expired in the tick and we need to
@@ -1566,7 +1569,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 	}
 	raw_spin_unlock(&base->lock);
 
-	return cmp_next_hrtimer_event(basem, expires);
+	return cmp_next_hrtimer_event(base, basem, expires);
 }
 
 /**
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 11e9daa4a..4c4e1d5bd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1041,10 +1041,15 @@ static int trace_wakeup_test_thread(void *data)
 {
 	/* Make this a -deadline thread */
 	static const struct sched_attr attr = {
+#ifdef CONFIG_SCHED_MUQSS
+		/* No deadline on MuQSS, use RR */
+		.sched_policy = SCHED_RR,
+#else
 		.sched_policy = SCHED_DEADLINE,
 		.sched_runtime = 100000ULL,
 		.sched_deadline = 10000000ULL,
 		.sched_period = 10000000ULL
+#endif
 	};
 	struct wakeup_test_data *x = data;
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 246d4d4ce..4ceada38d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -26,6 +26,13 @@
 #include <linux/bsearch.h>
 #include <linux/sort.h>
 
+/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
+int unprivileged_userns_clone;
+#endif
+
 static struct kmem_cache *user_ns_cachep __read_mostly;
 static DEFINE_MUTEX(userns_state_mutex);
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 64155e310..92a2f4ecd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -986,7 +986,7 @@ config SCHED_INFO
 
 config SCHEDSTATS
 	bool "Collect scheduler statistics"
-	depends on DEBUG_KERNEL && PROC_FS
+	depends on DEBUG_KERNEL && PROC_FS && !SCHED_MUQSS
 	select SCHED_INFO
 	help
 	  If you say Y here, additional code will be inserted into the
@@ -1580,6 +1580,7 @@ config LATENCYTOP
 	depends on DEBUG_KERNEL
 	depends on STACKTRACE_SUPPORT
 	depends on PROC_FS
+	depends on !SCHED_MUQSS
 	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
 	select KALLSYMS
 	select KALLSYMS_ALL
diff --git a/lib/Makefile b/lib/Makefile
index a90d4fcd7..3a3be2029 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+	 rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
new file mode 100644
index 000000000..ab21e6309
--- /dev/null
+++ b/lib/sradix-tree.c
@@ -0,0 +1,476 @@
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/gcd.h>
+#include <linux/sradix-tree.h>
+
+static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
+{
+	return node->fulls == root->stores_size ||
+		(node->height == 1 && node->count == root->stores_size);
+}
+
+/*
+ *	Extend a sradix tree so it can store key @index.
+ */
+static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
+{
+	struct sradix_tree_node *node;
+	unsigned int height;
+
+	if (unlikely(root->rnode == NULL)) {
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		node->height = 1;
+		root->rnode = node;
+		root->height = 1;
+	}
+
+	/* Figure out what the height should be.  */
+	height = root->height;
+	index >>= root->shift * height;
+
+	while (index) {
+		index >>= root->shift;
+		height++;
+	}
+
+	while (height > root->height) {
+		unsigned int newheight;
+
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		/* Increase the height.  */
+		node->stores[0] = root->rnode;
+		root->rnode->parent = node;
+		if (root->extend)
+			root->extend(node, root->rnode);
+
+		newheight = root->height + 1;
+		node->height = newheight;
+		node->count = 1;
+		if (sradix_node_full(root, root->rnode))
+			node->fulls = 1;
+
+		root->rnode = node;
+		root->height = newheight;
+	}
+
+	return 0;
+}
+
+/*
+ * Search the next item from the current node, that is not NULL
+ * and can satify root->iter().
+ */
+void *sradix_tree_next(struct sradix_tree_root *root,
+		       struct sradix_tree_node *node, unsigned long index,
+		       int (*iter)(void *item, unsigned long height))
+{
+	unsigned long offset;
+	void *item;
+
+	if (unlikely(node == NULL)) {
+		node = root->rnode;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+
+		if (node->height == 1)
+			return item;
+		else
+			goto go_down;
+	}
+
+	while (node) {
+		offset = (index & root->mask) + 1;
+		for (; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (offset < root->stores_size)
+			break;
+
+		node = node->parent;
+		index >>= root->shift;
+	}
+
+	if (!node)
+		return NULL;
+
+	while (node->height > 1) {
+go_down:
+		node = item;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+	}
+
+	BUG_ON(offset > root->stores_size);
+
+	return item;
+}
+
+/*
+ * Blindly insert the item to the tree. Typically, we reuse the
+ * first empty store item.
+ */
+int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
+{
+	unsigned long index;
+	unsigned int height;
+	struct sradix_tree_node *node, *tmp = NULL;
+	int offset, offset_saved;
+	void **store = NULL;
+	int error, i, j, shift;
+
+go_on:
+	index = root->min;
+
+	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
+		node = root->enter_node;
+		BUG_ON((index >> (root->shift * root->height)));
+	} else {
+		node = root->rnode;
+		if (node == NULL || (index >> (root->shift * root->height))
+		    || sradix_node_full(root, node)) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return error;
+
+			node = root->rnode;
+		}
+	}
+
+
+	height = node->height;
+	shift = (height - 1) * root->shift;
+	offset = (index >> shift) & root->mask;
+	while (shift > 0) {
+		offset_saved = offset;
+		for (; offset < root->stores_size; offset++) {
+			store = &node->stores[offset];
+			tmp = *store;
+
+			if (!tmp || !sradix_node_full(root, tmp))
+				break;
+		}
+		BUG_ON(offset >= root->stores_size);
+
+		if (offset != offset_saved) {
+			index += (offset - offset_saved) << shift;
+			index &= ~((1UL << shift) - 1);
+		}
+
+		if (!tmp) {
+			if (!(tmp = root->alloc()))
+				return -ENOMEM;
+
+			tmp->height = shift / root->shift;
+			*store = tmp;
+			tmp->parent = node;
+			node->count++;
+//			if (root->extend)
+//				root->extend(node, tmp);
+		}
+
+		node = tmp;
+		shift -= root->shift;
+		offset = (index >> shift) & root->mask;
+	}
+
+	BUG_ON(node->height != 1);
+
+
+	store = &node->stores[offset];
+	for (i = 0, j = 0;
+	      j < root->stores_size - node->count &&
+	      i < root->stores_size - offset && j < num; i++) {
+		if (!store[i]) {
+			store[i] = item[j];
+			if (root->assign)
+				root->assign(node, index + i, item[j]);
+			j++;
+		}
+	}
+
+	node->count += j;
+	root->num += j;
+	num -= j;
+
+	while (sradix_node_full(root, node)) {
+		node = node->parent;
+		if (!node)
+			break;
+
+		node->fulls++;
+	}
+
+	if (unlikely(!node)) {
+		/* All nodes are full */
+		root->min = 1 << (root->height * root->shift);
+		root->enter_node = NULL;
+	} else {
+		root->min = index + i - 1;
+		root->min |= (1UL << (node->height - 1)) - 1;
+		root->min++;
+		root->enter_node = node;
+	}
+
+	if (num) {
+		item += j;
+		goto go_on;
+	}
+
+	return 0;
+}
+
+
+/**
+ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
+ *      @root		sradix tree root
+ *
+ */
+static inline void sradix_tree_shrink(struct sradix_tree_root *root)
+{
+	/* try to shrink tree height */
+	while (root->height > 1) {
+		struct sradix_tree_node *to_free = root->rnode;
+
+		/*
+		 * The candidate node has more than one child, or its child
+		 * is not at the leftmost store, we cannot shrink.
+		 */
+		if (to_free->count != 1 || !to_free->stores[0])
+			break;
+
+		root->rnode = to_free->stores[0];
+		root->rnode->parent = NULL;
+		root->height--;
+		if (unlikely(root->enter_node == to_free))
+			root->enter_node = NULL;
+		root->free(to_free);
+	}
+}
+
+/*
+ * Del the item on the known leaf node and index
+ */
+void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
+				  struct sradix_tree_node *node, unsigned long index)
+{
+	unsigned int offset;
+	struct sradix_tree_node *start, *end;
+
+	BUG_ON(node->height != 1);
+
+	start = node;
+	while (node && !(--node->count))
+		node = node->parent;
+
+	end = node;
+	if (!node) {
+		root->rnode = NULL;
+		root->height = 0;
+		root->min = 0;
+		root->num = 0;
+		root->enter_node = NULL;
+	} else {
+		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
+		if (root->rm)
+			root->rm(node, offset);
+		node->stores[offset] = NULL;
+		root->num--;
+		if (root->min > index) {
+			root->min = index;
+			root->enter_node = node;
+		}
+	}
+
+	if (start != end) {
+		do {
+			node = start;
+			start = start->parent;
+			if (unlikely(root->enter_node == node))
+				root->enter_node = end;
+			root->free(node);
+		} while (start != end);
+
+		/*
+		 * Note that shrink may free "end", so enter_node still need to
+		 * be checked inside.
+		 */
+		sradix_tree_shrink(root);
+	} else if (node->count == root->stores_size - 1) {
+		/* It WAS a full leaf node. Update the ancestors */
+		node = node->parent;
+		while (node) {
+			node->fulls--;
+			if (node->fulls != root->stores_size - 1)
+				break;
+
+			node = node->parent;
+		}
+	}
+}
+
+void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return NULL;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return NULL;
+
+		shift -= root->shift;
+	} while (shift >= 0);
+
+	return node;
+}
+
+/*
+ * Return the item if it exists, otherwise create it in place
+ * and return the created item.
+ */
+void *sradix_tree_lookup_create(struct sradix_tree_root *root,
+			unsigned long index, void *(*item_alloc)(void))
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node, *tmp;
+	void *item;
+	int shift, error;
+
+	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
+		if (item_alloc) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return NULL;
+		} else {
+			return NULL;
+		}
+	}
+
+	node = root->rnode;
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		if (!node->stores[offset]) {
+			if (!(tmp = root->alloc()))
+				return NULL;
+
+			tmp->height = shift / root->shift;
+			node->stores[offset] = tmp;
+			tmp->parent = node;
+			node->count++;
+			node = tmp;
+		} else {
+			node = node->stores[offset];
+		}
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	BUG_ON(node->height != 1);
+	offset = index & root->mask;
+	if (node->stores[offset]) {
+		return node->stores[offset];
+	} else if (item_alloc) {
+		if (!(item = item_alloc()))
+			return NULL;
+
+		node->stores[offset] = item;
+
+		/*
+		 * NOTE: we do NOT call root->assign here, since this item is
+		 * newly created by us having no meaning. Caller can call this
+		 * if it's necessary to do so.
+		 */
+
+		node->count++;
+		root->num++;
+
+		while (sradix_node_full(root, node)) {
+			node = node->parent;
+			if (!node)
+				break;
+
+			node->fulls++;
+		}
+
+		if (unlikely(!node)) {
+			/* All nodes are full */
+			root->min = 1 << (root->height * root->shift);
+		} else {
+			if (root->min == index) {
+				root->min |= (1UL << (node->height - 1)) - 1;
+				root->min++;
+				root->enter_node = node;
+			}
+		}
+
+		return item;
+	} else {
+		return NULL;
+	}
+
+}
+
+int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return -ENOENT;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return -ENOENT;
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	offset = index & root->mask;
+	if (!node->stores[offset])
+		return -ENOENT;
+
+	sradix_tree_delete_from_leaf(root, node, index);
+
+	return 0;
+}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index dc81f16b9..1c0a8699f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -1016,6 +1016,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	}
 	return nelems;
 }
+EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8fb7..396522f2a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -315,6 +315,32 @@ config KSM
 	  See Documentation/vm/ksm.txt for more information: KSM is inactive
 	  until a program has madvised that an area is MADV_MERGEABLE, and
 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
+choice
+	prompt "Choose UKSM/KSM strategy"
+	default UKSM
+	depends on KSM
+	help
+	  This option allows to select a UKSM/KSM stragety.
+
+config UKSM
+	bool "Ultra-KSM for page merging"
+	depends on KSM
+	help
+	UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
+	page Merging), but with a fundamentally rewritten core algorithm. With
+	an advanced algorithm, UKSM now can transparently scans all anonymously
+	mapped user space applications with an significantly improved scan speed
+	and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
+	UKSM. Now UKSM has its first stable release and first real world enterprise user.
+	For more information, please goto its project page.
+	(www.kerneldedup.org)
+
+config KSM_LEGACY
+	bool "Legacy KSM implementation"
+	depends on KSM
+	help
+	The legacy KSM implementation from Red Hat.
+endchoice
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
diff --git a/mm/Makefile b/mm/Makefile
index e669f02c5..7a8672c3a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -39,7 +39,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
 			   compaction.o vmacache.o swap_slots.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   debug.o $(mmu-y)
+			   prfile.o debug.o $(mmu-y)
 
 obj-y += init-mm.o
 
@@ -65,7 +65,8 @@ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
-obj-$(CONFIG_KSM) += ksm.o
+obj-$(CONFIG_KSM_LEGACY) += ksm.o
+obj-$(CONFIG_UKSM) += uksm.o
 obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 787ff1866..15bcdb64e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2702,7 +2702,7 @@ int filemap_page_mkwrite(struct vm_fault *vmf)
 	int ret = VM_FAULT_LOCKED;
 
 	sb_start_pagefault(inode->i_sb);
-	file_update_time(vmf->vma->vm_file);
+	vma_file_update_time(vmf->vma);
 	lock_page(page);
 	if (page->mapping != inode->i_mapping) {
 		unlock_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index 5fcfc2490..211a4ce99 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -128,6 +128,25 @@ EXPORT_SYMBOL(zero_pfn);
 
 unsigned long highest_memmap_pfn __read_mostly;
 
+#ifdef CONFIG_UKSM
+unsigned long uksm_zero_pfn __read_mostly;
+EXPORT_SYMBOL_GPL(uksm_zero_pfn);
+struct page *empty_uksm_zero_page;
+
+static int __init setup_uksm_zero_page(void)
+{
+	empty_uksm_zero_page = alloc_pages(__GFP_ZERO & ~__GFP_MOVABLE, 0);
+	if (!empty_uksm_zero_page)
+		panic("Oh boy, that early out of memory?");
+
+	SetPageReserved(empty_uksm_zero_page);
+	uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
+
+	return 0;
+}
+core_initcall(setup_uksm_zero_page);
+#endif
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */
@@ -139,6 +158,7 @@ static int __init init_zero_pfn(void)
 core_initcall(init_zero_pfn);
 
 
+
 #if defined(SPLIT_RSS_COUNTING)
 
 void sync_mm_rss(struct mm_struct *mm)
@@ -1039,6 +1059,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		get_page(page);
 		page_dup_rmap(page, false);
 		rss[mm_counter(page)]++;
+
+		/* Should return NULL in vm_normal_page() */
+		uksm_bugon_zeropage(pte);
 	} else if (pte_devmap(pte)) {
 		page = pte_page(pte);
 
@@ -1052,6 +1075,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			page_dup_rmap(page, false);
 			rss[mm_counter(page)]++;
 		}
+	} else {
+		uksm_map_zero_page(pte);
 	}
 
 out_set_pte:
@@ -1321,8 +1346,10 @@ again:
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
 							tlb->fullmm);
 			tlb_remove_tlb_entry(tlb, pte, addr);
-			if (unlikely(!page))
+			if (unlikely(!page)) {
+				uksm_unmap_zero_page(ptent);
 				continue;
+			}
 
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
@@ -2336,8 +2363,10 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
 			clear_page(kaddr);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(dst);
-	} else
+	} else {
 		copy_user_highpage(dst, src, va, vma);
+		uksm_cow_page(vma, src);
+	}
 }
 
 static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
@@ -2486,6 +2515,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 							      vmf->address);
 		if (!new_page)
 			goto oom;
+		uksm_cow_pte(vma, vmf->orig_pte);
 	} else {
 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
 				vmf->address);
@@ -2512,7 +2542,9 @@ static int wp_page_copy(struct vm_fault *vmf)
 						mm_counter_file(old_page));
 				inc_mm_counter_fast(mm, MM_ANONPAGES);
 			}
+			uksm_bugon_zeropage(vmf->orig_pte);
 		} else {
+			uksm_unmap_zero_page(vmf->orig_pte);
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
diff --git a/mm/mmap.c b/mm/mmap.c
index 03ca089cc..013715138 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,6 +45,7 @@
 #include <linux/moduleparam.h>
 #include <linux/pkeys.h>
 #include <linux/oom.h>
+#include <linux/ksm.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
@@ -171,8 +172,9 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	mpol_put(vma_policy(vma));
+	uksm_remove_vma(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 	return next;
 }
@@ -692,9 +694,16 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	long adjust_next = 0;
 	int remove_next = 0;
 
+/*
+ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
+ * acquired
+ */
+	uksm_remove_vma(vma);
+
 	if (next && !insert) {
 		struct vm_area_struct *exporter = NULL, *importer = NULL;
 
+		uksm_remove_vma(next);
 		if (end >= next->vm_end) {
 			/*
 			 * vma expands, overlapping all the next, and
@@ -827,6 +836,7 @@ again:
 		end_changed = true;
 	}
 	vma->vm_pgoff = pgoff;
+
 	if (adjust_next) {
 		next->vm_start += adjust_next << PAGE_SHIFT;
 		next->vm_pgoff += adjust_next;
@@ -896,7 +906,7 @@ again:
 	if (remove_next) {
 		if (file) {
 			uprobe_munmap(next, next->vm_start, next->vm_end);
-			fput(file);
+			vma_fput(vma);
 		}
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
@@ -932,6 +942,7 @@ again:
 		if (remove_next == 2) {
 			remove_next = 1;
 			end = next->vm_end;
+			uksm_remove_vma(next);
 			goto again;
 		}
 		else if (next)
@@ -958,10 +969,14 @@ again:
 			 */
 			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
 		}
+	} else {
+		if (next && !insert)
+			uksm_vma_add_new(next);
 	}
 	if (insert && file)
 		uprobe_mmap(insert);
 
+	uksm_vma_add_new(vma);
 	validate_mm(mm);
 
 	return 0;
@@ -1378,6 +1393,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
+	/* If uksm is enabled, we add VM_MERGEABLE to new VMAs. */
+	uksm_vm_flags_mod(&vm_flags);
+
 	if (flags & MAP_LOCKED)
 		if (!can_do_mlock())
 			return -EPERM;
@@ -1732,6 +1750,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 			allow_write_access(file);
 	}
 	file = vma->vm_file;
+	uksm_vma_add_new(vma);
 out:
 	perf_event_mmap(vma);
 
@@ -1761,8 +1780,8 @@ out:
 	return addr;
 
 unmap_and_free_vma:
+	vma_fput(vma);
 	vma->vm_file = NULL;
-	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
@@ -1773,6 +1792,7 @@ allow_write_and_free_vma:
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
+	uksm_remove_vma(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
 	if (charged)
@@ -2586,7 +2606,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_free_mpol;
 
 	if (new->vm_file)
-		get_file(new->vm_file);
+		vma_get_file(new);
 
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
@@ -2597,6 +2617,8 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	else
 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
+	uksm_vma_add_new(new);
+
 	/* Success. */
 	if (!err)
 		return 0;
@@ -2605,7 +2627,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (new->vm_ops && new->vm_ops->close)
 		new->vm_ops->close(new);
 	if (new->vm_file)
-		fput(new->vm_file);
+		vma_fput(new);
 	unlink_anon_vmas(new);
  out_free_mpol:
 	mpol_put(vma_policy(new));
@@ -2767,7 +2789,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 	struct vm_area_struct *vma;
 	unsigned long populate = 0;
 	unsigned long ret = -EINVAL;
-	struct file *file;
+	struct file *file, *prfile;
 
 	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
 		     current->comm, current->pid);
@@ -2842,10 +2864,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		}
 	}
 
-	file = get_file(vma->vm_file);
+	vma_get_file(vma);
+	file = vma->vm_file;
+	prfile = vma->vm_prfile;
 	ret = do_mmap_pgoff(vma->vm_file, start, size,
 			prot, flags, pgoff, &populate, NULL);
+	if (!IS_ERR_VALUE(ret) && file && prfile) {
+		struct vm_area_struct *new_vma;
+
+		new_vma = find_vma(mm, ret);
+		if (!new_vma->vm_prfile)
+			new_vma->vm_prfile = prfile;
+		if (new_vma != vma)
+			get_file(prfile);
+	}
+	/*
+	 * two fput()s instead of vma_fput(vma),
+	 * coz vma may not be available anymore.
+	 */
 	fput(file);
+	if (prfile)
+		fput(prfile);
 out:
 	up_write(&mm->mmap_sem);
 	if (populate)
@@ -2889,6 +2928,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 	if ((flags & (~VM_EXEC)) != 0)
 		return -EINVAL;
 	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+	uksm_vm_flags_mod(&flags);
 
 	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
 	if (offset_in_page(error))
@@ -2946,6 +2986,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 	vma->vm_flags = flags;
 	vma->vm_page_prot = vm_get_page_prot(flags);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
+	uksm_vma_add_new(vma);
 out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -2997,6 +3038,12 @@ void exit_mmap(struct mm_struct *mm)
 	/* mm's last user has gone, and its about to be pulled down */
 	mmu_notifier_release(mm);
 
+	/*
+	 * Taking write lock on mmap_sem does not harm others,
+	 * but it's crucial for uksm to avoid races.
+	 */
+	down_write(&mm->mmap_sem);
+
 	if (unlikely(mm_is_oom_victim(mm))) {
 		/*
 		 * Manually reap the mm to free as much memory as possible.
@@ -3057,6 +3104,11 @@ void exit_mmap(struct mm_struct *mm)
 		vma = remove_vma(vma);
 	}
 	vm_unacct_memory(nr_accounted);
+
+	mm->mmap = NULL;
+	mm->mm_rb = RB_ROOT;
+	vmacache_invalidate(mm);
+	up_write(&mm->mmap_sem);
 }
 
 /* Insert vm structure into process list sorted by address
@@ -3161,11 +3213,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		if (anon_vma_clone(new_vma, vma))
 			goto out_free_mempol;
 		if (new_vma->vm_file)
-			get_file(new_vma->vm_file);
+			vma_get_file(new_vma);
 		if (new_vma->vm_ops && new_vma->vm_ops->open)
 			new_vma->vm_ops->open(new_vma);
 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
 		*need_rmap_locks = false;
+		uksm_vma_add_new(new_vma);
 	}
 	return new_vma;
 
@@ -3316,6 +3369,7 @@ static struct vm_area_struct *__install_special_mapping(
 	vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
 
 	perf_event_mmap(vma);
+	uksm_vma_add_new(vma);
 
 	return vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index ebb6e618d..8cf24288a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -641,7 +641,7 @@ static void __put_nommu_region(struct vm_region *region)
 		up_write(&nommu_region_sem);
 
 		if (region->vm_file)
-			fput(region->vm_file);
+			vmr_fput(region);
 
 		/* IO memory and memory shared directly out of the pagecache
 		 * from ramfs/tmpfs mustn't be released here */
@@ -799,7 +799,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	put_nommu_region(vma->vm_region);
 	kmem_cache_free(vm_area_cachep, vma);
 }
@@ -1321,7 +1321,7 @@ unsigned long do_mmap(struct file *file,
 					goto error_just_free;
 				}
 			}
-			fput(region->vm_file);
+			vmr_fput(region);
 			kmem_cache_free(vm_region_jar, region);
 			region = pregion;
 			result = start;
@@ -1396,10 +1396,10 @@ error_just_free:
 	up_write(&nommu_region_sem);
 error:
 	if (region->vm_file)
-		fput(region->vm_file);
+		vmr_fput(region);
 	kmem_cache_free(vm_region_jar, region);
 	if (vma->vm_file)
-		fput(vma->vm_file);
+		vma_fput(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8369572e1..2cb971573 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
 /*
  * Start background writeback (via writeback threads) at this percentage
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+int dirty_background_ratio = 20;
+#else
 int dirty_background_ratio = 10;
+#endif
 
 /*
  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
 /*
  * The generator of dirty data starts writeback at this percentage
  */
+#ifdef CONFIG_PCK_INTERACTIVE
+int vm_dirty_ratio = 50;
+#else
 int vm_dirty_ratio = 20;
+#endif
 
 /*
  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
diff --git a/mm/prfile.c b/mm/prfile.c
new file mode 100644
index 000000000..14efc4f64
--- /dev/null
+++ b/mm/prfile.c
@@ -0,0 +1,86 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ * Mainly for aufs which mmap(2) different file and wants to print different
+ * path in /proc/PID/maps.
+ * Call these functions via macros defined in linux/mm.h.
+ *
+ * See Documentation/filesystems/aufs/design/06mmap.txt
+ *
+ * Copyright (c) 2014-2018 Junjro R. Okajima
+ * Copyright (c) 2014 Ian Campbell
+ */
+
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+
+/* #define PRFILE_TRACE */
+static inline void prfile_trace(struct file *f, struct file *pr,
+			      const char func[], int line, const char func2[])
+{
+#ifdef PRFILE_TRACE
+	if (pr)
+		pr_info("%s:%d: %s, %pD2\n", func, line, func2, f);
+#endif
+}
+
+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
+			     int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	file_update_time(f);
+	if (f && pr)
+		file_update_time(pr);
+}
+
+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
+			       int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	return (f && pr) ? pr : f;
+}
+
+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	get_file(f);
+	if (f && pr)
+		get_file(pr);
+}
+
+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
+{
+	struct file *f = vma->vm_file, *pr = vma->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	fput(f);
+	if (f && pr)
+		fput(pr);
+}
+
+#ifndef CONFIG_MMU
+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
+			       int line)
+{
+	struct file *f = region->vm_file, *pr = region->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	return (f && pr) ? pr : f;
+}
+
+void vmr_do_fput(struct vm_region *region, const char func[], int line)
+{
+	struct file *f = region->vm_file, *pr = region->vm_prfile;
+
+	prfile_trace(f, pr, func, line, __func__);
+	fput(f);
+	if (f && pr)
+		fput(pr);
+}
+#endif /* !CONFIG_MMU */
diff --git a/mm/rmap.c b/mm/rmap.c
index 47db27f80..fdd0fd742 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1016,9 +1016,9 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
 
 /**
  * __page_set_anon_rmap - set up new anonymous rmap
- * @page:	Page to add to rmap	
+ * @page:	Page to add to rmap
  * @vma:	VM area to add page to.
- * @address:	User virtual address of the mapping	
+ * @address:	User virtual address of the mapping
  * @exclusive:	the page is exclusively owned by the current process
  */
 static void __page_set_anon_rmap(struct page *page,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c7a33717d..df25aec50 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -11,6 +11,7 @@
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
 #include <linux/vmalloc.h>
@@ -47,7 +48,6 @@
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
 static void free_swap_count_continuations(struct swap_info_struct *);
-static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
 DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
@@ -1033,6 +1033,60 @@ swp_entry_t get_swap_page_of_type(int type)
 	return (swp_entry_t) {0};
 }
 
+static unsigned int find_next_to_unuse(struct swap_info_struct *si,
+					unsigned int prev, bool frontswap);
+
+void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end,
+		unsigned int limit)
+{
+	struct swap_info_struct *si;
+	pgoff_t start_at;
+	unsigned int i;
+
+	*start = swp_entry(0, 0);
+	*end = swp_entry(0, 0);
+	si = swap_info[type];
+	spin_lock(&si->lock);
+	if (si && (si->flags & SWP_WRITEOK)) {
+		atomic_long_dec(&nr_swap_pages);
+		/* This is called for allocating swap entry, not cache */
+		start_at = scan_swap_map(si, 1);
+		if (start_at) {
+			unsigned long stop_at = find_next_to_unuse(si, start_at, 0);
+			if (stop_at > start_at)
+				stop_at--;
+			else
+				stop_at = si->max - 1;
+			if (stop_at - start_at + 1 > limit)
+				stop_at = min_t(unsigned int,
+						start_at + limit - 1,
+						si->max - 1);
+			/* Mark them used */
+			for (i = start_at; i <= stop_at; i++)
+				si->swap_map[i] = 1;
+			/* first page already done above */
+			si->inuse_pages += stop_at - start_at;
+
+			atomic_long_sub(stop_at - start_at, &nr_swap_pages);
+			if (start_at == si->lowest_bit)
+				si->lowest_bit = stop_at + 1;
+			if (stop_at == si->highest_bit)
+				si->highest_bit = start_at - 1;
+			if (si->inuse_pages == si->pages) {
+				si->lowest_bit = si->max;
+				si->highest_bit = 0;
+			}
+			for (i = start_at + 1; i <= stop_at; i++)
+				inc_cluster_info_page(si, si->cluster_info, i);
+			si->cluster_next = stop_at + 1;
+			*start = swp_entry(type, start_at);
+			*end = swp_entry(type, stop_at);
+		} else
+			atomic_long_inc(&nr_swap_pages);
+	}
+	spin_unlock(&si->lock);
+}
+
 static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -2278,7 +2332,7 @@ static void drain_mmlist(void)
  * Note that the type of this function is sector_t, but it returns page offset
  * into the bdev, not sector offset.
  */
-static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
+sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
 {
 	struct swap_info_struct *sis;
 	struct swap_extent *start_se;
@@ -3487,8 +3541,14 @@ pgoff_t __page_file_index(struct page *page)
 	swp_entry_t swap = { .val = page_private(page) };
 	return swp_offset(swap);
 }
+
 EXPORT_SYMBOL_GPL(__page_file_index);
 
+struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+	return swap_info[type];
+}
+
 /*
  * add_swap_count_continuation - called when a swap count is duplicated
  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
diff --git a/mm/uksm.c b/mm/uksm.c
new file mode 100644
index 000000000..48ddb7460
--- /dev/null
+++ b/mm/uksm.c
@@ -0,0 +1,5584 @@
+/*
+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
+ *
+ * This is an improvement upon KSM. Some basic data structures and routines
+ * are borrowed from ksm.c .
+ *
+ * Its new features:
+ * 1. Full system scan:
+ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
+ *      interaction to submit a memory area to KSM is no longer needed.
+ *
+ * 2. Rich area detection:
+ *      It automatically detects rich areas containing abundant duplicated
+ *      pages based. Rich areas are given a full scan speed. Poor areas are
+ *      sampled at a reasonable speed with very low CPU consumption.
+ *
+ * 3. Ultra Per-page scan speed improvement:
+ *      A new hash algorithm is proposed. As a result, on a machine with
+ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
+ *      can scan memory areas that does not contain duplicated pages at speed of
+ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
+ *      477MB/sec ~ 923MB/sec.
+ *
+ * 4. Thrashing area avoidance:
+ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
+ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
+ *      hash value based volatile page detection.
+ *
+ *
+ * 5. Misc changes upon KSM:
+ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
+ *        comparison. It's much faster than default C version on x86.
+ *      * rmap_item now has an struct *page member to loosely cache a
+ *        address-->page mapping, which reduces too much time-costly
+ *        follow_page().
+ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
+ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
+ *        ksm is needed for this case.
+ *
+ * 6. Full Zero Page consideration(contributed by Figo Zhang)
+ *    Now uksmd consider full zero pages as special pages and merge them to an
+ *    special unswappable uksm zero page.
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/cputime.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/hash.h>
+#include <linux/random.h>
+#include <linux/math64.h>
+#include <linux/gcd.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
+#include <linux/numa.h>
+#include <linux/sradix-tree.h>
+
+#include <asm/tlbflush.h>
+#include "internal.h"
+
+#ifdef CONFIG_X86
+#undef memcmp
+
+#ifdef CONFIG_X86_32
+#define memcmp memcmpx86_32
+/*
+ * Compare 4-byte-aligned address s1 and s2, with length n
+ */
+int memcmpx86_32(void *s1, void *s2, size_t n)
+{
+	size_t num = n / 4;
+	register int res;
+
+	__asm__ __volatile__
+	(
+	 "testl %3,%3\n\t"
+	 "repe; cmpsd\n\t"
+	 "je        1f\n\t"
+	 "sbbl      %0,%0\n\t"
+	 "orl       $1,%0\n"
+	 "1:"
+	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
+	 : "0" (0)
+	 : "cc");
+
+	return res;
+}
+
+/*
+ * Check the page is all zero ?
+ */
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned char same;
+
+	len /= 4;
+
+	__asm__ __volatile__
+	("repe; scasl;"
+	 "sete %0"
+	 : "=qm" (same), "+D" (s1), "+c" (len)
+	 : "a" (0)
+	 : "cc");
+
+	return same;
+}
+
+
+#elif defined(CONFIG_X86_64)
+#define memcmp memcmpx86_64
+/*
+ * Compare 8-byte-aligned address s1 and s2, with length n
+ */
+int memcmpx86_64(void *s1, void *s2, size_t n)
+{
+	size_t num = n / 8;
+	register int res;
+
+	__asm__ __volatile__
+	(
+	 "testq %q3,%q3\n\t"
+	 "repe; cmpsq\n\t"
+	 "je        1f\n\t"
+	 "sbbq      %q0,%q0\n\t"
+	 "orq       $1,%q0\n"
+	 "1:"
+	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
+	 : "0" (0)
+	 : "cc");
+
+	return res;
+}
+
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned char same;
+
+	len /= 8;
+
+	__asm__ __volatile__
+	("repe; scasq;"
+	 "sete %0"
+	 : "=qm" (same), "+D" (s1), "+c" (len)
+	 : "a" (0)
+	 : "cc");
+
+	return same;
+}
+
+#endif
+#else
+static int is_full_zero(const void *s1, size_t len)
+{
+	unsigned long *src = s1;
+	int i;
+
+	len /= sizeof(*src);
+
+	for (i = 0; i < len; i++) {
+		if (src[i])
+			return 0;
+	}
+
+	return 1;
+}
+#endif
+
+#define UKSM_RUNG_ROUND_FINISHED  (1 << 0)
+#define TIME_RATIO_SCALE	10000
+
+#define SLOT_TREE_NODE_SHIFT	8
+#define SLOT_TREE_NODE_STORE_SIZE	(1UL << SLOT_TREE_NODE_SHIFT)
+struct slot_tree_node {
+	unsigned long size;
+	struct sradix_tree_node snode;
+	void *stores[SLOT_TREE_NODE_STORE_SIZE];
+};
+
+static struct kmem_cache *slot_tree_node_cachep;
+
+static struct sradix_tree_node *slot_tree_node_alloc(void)
+{
+	struct slot_tree_node *p;
+
+	p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL |
+			      __GFP_NORETRY | __GFP_NOWARN);
+	if (!p)
+		return NULL;
+
+	return &p->snode;
+}
+
+static void slot_tree_node_free(struct sradix_tree_node *node)
+{
+	struct slot_tree_node *p;
+
+	p = container_of(node, struct slot_tree_node, snode);
+	kmem_cache_free(slot_tree_node_cachep, p);
+}
+
+static void slot_tree_node_extend(struct sradix_tree_node *parent,
+				  struct sradix_tree_node *child)
+{
+	struct slot_tree_node *p, *c;
+
+	p = container_of(parent, struct slot_tree_node, snode);
+	c = container_of(child, struct slot_tree_node, snode);
+
+	p->size += c->size;
+}
+
+void slot_tree_node_assign(struct sradix_tree_node *node,
+			   unsigned int index, void *item)
+{
+	struct vma_slot *slot = item;
+	struct slot_tree_node *cur;
+
+	slot->snode = node;
+	slot->sindex = index;
+
+	while (node) {
+		cur = container_of(node, struct slot_tree_node, snode);
+		cur->size += slot->pages;
+		node = node->parent;
+	}
+}
+
+void slot_tree_node_rm(struct sradix_tree_node *node, unsigned int offset)
+{
+	struct vma_slot *slot;
+	struct slot_tree_node *cur;
+	unsigned long pages;
+
+	if (node->height == 1) {
+		slot = node->stores[offset];
+		pages = slot->pages;
+	} else {
+		cur = container_of(node->stores[offset],
+				   struct slot_tree_node, snode);
+		pages = cur->size;
+	}
+
+	while (node) {
+		cur = container_of(node, struct slot_tree_node, snode);
+		cur->size -= pages;
+		node = node->parent;
+	}
+}
+
+unsigned long slot_iter_index;
+int slot_iter(void *item,  unsigned long height)
+{
+	struct slot_tree_node *node;
+	struct vma_slot *slot;
+
+	if (height == 1) {
+		slot = item;
+		if (slot_iter_index < slot->pages) {
+			/*in this one*/
+			return 1;
+		} else {
+			slot_iter_index -= slot->pages;
+			return 0;
+		}
+
+	} else {
+		node = container_of(item, struct slot_tree_node, snode);
+		if (slot_iter_index < node->size) {
+			/*in this one*/
+			return 1;
+		} else {
+			slot_iter_index -= node->size;
+			return 0;
+		}
+	}
+}
+
+
+static inline void slot_tree_init_root(struct sradix_tree_root *root)
+{
+	init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
+	root->alloc = slot_tree_node_alloc;
+	root->free = slot_tree_node_free;
+	root->extend = slot_tree_node_extend;
+	root->assign = slot_tree_node_assign;
+	root->rm = slot_tree_node_rm;
+}
+
+void slot_tree_init(void)
+{
+	slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
+				sizeof(struct slot_tree_node), 0,
+				SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
+				NULL);
+}
+
+
+/* Each rung of this ladder is a list of VMAs having a same scan ratio */
+struct scan_rung {
+	//struct list_head scanned_list;
+	struct sradix_tree_root vma_root;
+	struct sradix_tree_root vma_root2;
+
+	struct vma_slot *current_scan;
+	unsigned long current_offset;
+
+	/*
+	 * The initial value for current_offset, it should loop over
+	 * [0~ step - 1] to let all slot have its chance to be scanned.
+	 */
+	unsigned long offset_init;
+	unsigned long step; /* dynamic step for current_offset */
+	unsigned int flags;
+	unsigned long pages_to_scan;
+	//unsigned long fully_scanned_slots;
+	/*
+	 * a little bit tricky - if cpu_time_ratio > 0, then the value is the
+	 * the cpu time ratio it can spend in rung_i for every scan
+	 * period. if < 0, then it is the cpu time ratio relative to the
+	 * max cpu percentage user specified. Both in unit of
+	 * 1/TIME_RATIO_SCALE
+	 */
+	int cpu_ratio;
+
+	/*
+	 * How long it will take for all slots in this rung to be fully
+	 * scanned? If it's zero, we don't care about the cover time:
+	 * it's fully scanned.
+	 */
+	unsigned int cover_msecs;
+	//unsigned long vma_num;
+	//unsigned long pages; /* Sum of all slot's pages in rung */
+};
+
+/**
+ * node of either the stable or unstale rbtree
+ *
+ */
+struct tree_node {
+	struct rb_node node; /* link in the main (un)stable rbtree */
+	struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
+	u32 hash;
+	unsigned long count; /* TODO: merged with sub_root */
+	struct list_head all_list; /* all tree nodes in stable/unstable tree */
+};
+
+/**
+ * struct stable_node - node of the stable rbtree
+ * @node: rb node of this ksm page in the stable tree
+ * @hlist: hlist head of rmap_items using this ksm page
+ * @kpfn: page frame number of this ksm page
+ */
+struct stable_node {
+	struct rb_node node; /* link in sub-rbtree */
+	struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
+	struct hlist_head hlist;
+	unsigned long kpfn;
+	u32 hash_max; /* if ==0 then it's not been calculated yet */
+	struct list_head all_list; /* in a list for all stable nodes */
+};
+
+/**
+ * struct node_vma - group rmap_items linked in a same stable
+ * node together.
+ */
+struct node_vma {
+	union {
+		struct vma_slot *slot;
+		unsigned long key;  /* slot is used as key sorted on hlist */
+	};
+	struct hlist_node hlist;
+	struct hlist_head rmap_hlist;
+	struct stable_node *head;
+};
+
+/**
+ * struct rmap_item - reverse mapping item for virtual addresses
+ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
+ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
+ * @mm: the memory structure this rmap_item is pointing into
+ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
+ * @node: rb node of this rmap_item in the unstable tree
+ * @head: pointer to stable_node heading this list in the stable tree
+ * @hlist: link into hlist of rmap_items hanging off that stable_node
+ */
+struct rmap_item {
+	struct vma_slot *slot;
+	struct page *page;
+	unsigned long address;	/* + low bits used for flags below */
+	unsigned long hash_round;
+	unsigned long entry_index;
+	union {
+		struct {/* when in unstable tree */
+			struct rb_node node;
+			struct tree_node *tree_node;
+			u32 hash_max;
+		};
+		struct { /* when in stable tree */
+			struct node_vma *head;
+			struct hlist_node hlist;
+			struct anon_vma *anon_vma;
+		};
+	};
+} __aligned(4);
+
+struct rmap_list_entry {
+	union {
+		struct rmap_item *item;
+		unsigned long addr;
+	};
+	/* lowest bit is used for is_addr tag */
+} __aligned(4); /* 4 aligned to fit in to pages*/
+
+
+/* Basic data structure definition ends */
+
+
+/*
+ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
+ * The flags use the low bits of rmap_item.address
+ */
+#define UNSTABLE_FLAG	0x1
+#define STABLE_FLAG	0x2
+#define get_rmap_addr(x)	((x)->address & PAGE_MASK)
+
+/*
+ * rmap_list_entry helpers
+ */
+#define IS_ADDR_FLAG	1
+#define is_addr(ptr)		((unsigned long)(ptr) & IS_ADDR_FLAG)
+#define set_is_addr(ptr)	((ptr) |= IS_ADDR_FLAG)
+#define get_clean_addr(ptr)	(((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
+
+
+/*
+ * High speed caches for frequently allocated and freed structs
+ */
+static struct kmem_cache *rmap_item_cache;
+static struct kmem_cache *stable_node_cache;
+static struct kmem_cache *node_vma_cache;
+static struct kmem_cache *vma_slot_cache;
+static struct kmem_cache *tree_node_cache;
+#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
+		sizeof(struct __struct), __alignof__(struct __struct),\
+		(__flags), NULL)
+
+/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
+#define SCAN_LADDER_SIZE 4
+static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
+
+/* The evaluation rounds uksmd has finished */
+static unsigned long long uksm_eval_round = 1;
+
+/*
+ * we add 1 to this var when we consider we should rebuild the whole
+ * unstable tree.
+ */
+static unsigned long uksm_hash_round = 1;
+
+/*
+ * How many times the whole memory is scanned.
+ */
+static unsigned long long fully_scanned_round = 1;
+
+/* The total number of virtual pages of all vma slots */
+static u64 uksm_pages_total;
+
+/* The number of pages has been scanned since the start up */
+static u64 uksm_pages_scanned;
+
+static u64 scanned_virtual_pages;
+
+/* The number of pages has been scanned since last encode_benefit call */
+static u64 uksm_pages_scanned_last;
+
+/* If the scanned number is tooo large, we encode it here */
+static u64 pages_scanned_stored;
+
+static unsigned long pages_scanned_base;
+
+/* The number of nodes in the stable tree */
+static unsigned long uksm_pages_shared;
+
+/* The number of page slots additionally sharing those nodes */
+static unsigned long uksm_pages_sharing;
+
+/* The number of nodes in the unstable tree */
+static unsigned long uksm_pages_unshared;
+
+/*
+ * Milliseconds ksmd should sleep between scans,
+ * >= 100ms to be consistent with
+ * scan_time_to_sleep_msec()
+ */
+static unsigned int uksm_sleep_jiffies;
+
+/* The real value for the uksmd next sleep */
+static unsigned int uksm_sleep_real;
+
+/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
+static unsigned int uksm_sleep_saved;
+
+/* Max percentage of cpu utilization ksmd can take to scan in one batch */
+static unsigned int uksm_max_cpu_percentage;
+
+static int uksm_cpu_governor;
+
+static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
+
+struct uksm_cpu_preset_s {
+	int cpu_ratio[SCAN_LADDER_SIZE];
+	unsigned int cover_msecs[SCAN_LADDER_SIZE];
+	unsigned int max_cpu; /* percentage */
+};
+
+struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
+	{ {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
+	{ {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
+	{ {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
+	{ {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
+};
+
+/* The default value for uksm_ema_page_time if it's not initialized */
+#define UKSM_PAGE_TIME_DEFAULT	500
+
+/*cost to scan one page by expotional moving average in nsecs */
+static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
+
+/* The expotional moving average alpha weight, in percentage. */
+#define EMA_ALPHA	20
+
+/*
+ * The threshold used to filter out thrashing areas,
+ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
+ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
+ * will be considered as having a zero duplication ratio.
+ */
+static unsigned int uksm_thrash_threshold = 50;
+
+/* How much dedup ratio is considered to be abundant*/
+static unsigned int uksm_abundant_threshold = 10;
+
+/* All slots having merged pages in this eval round. */
+struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
+
+/* How many times the ksmd has slept since startup */
+static unsigned long long uksm_sleep_times;
+
+#define UKSM_RUN_STOP	0
+#define UKSM_RUN_MERGE	1
+static unsigned int uksm_run = 1;
+
+static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
+static DEFINE_MUTEX(uksm_thread_mutex);
+
+/*
+ * List vma_slot_new is for newly created vma_slot waiting to be added by
+ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
+ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
+ * VMA has been removed/freed.
+ */
+struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
+struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
+struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
+static DEFINE_SPINLOCK(vma_slot_list_lock);
+
+/* The unstable tree heads */
+static struct rb_root root_unstable_tree = RB_ROOT;
+
+/*
+ * All tree_nodes are in a list to be freed at once when unstable tree is
+ * freed after each scan round.
+ */
+static struct list_head unstable_tree_node_list =
+				LIST_HEAD_INIT(unstable_tree_node_list);
+
+/* List contains all stable nodes */
+static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
+
+/*
+ * When the hash strength is changed, the stable tree must be delta_hashed and
+ * re-structured. We use two set of below structs to speed up the
+ * re-structuring of stable tree.
+ */
+static struct list_head
+stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
+			    LIST_HEAD_INIT(stable_tree_node_list[1])};
+
+static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
+static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
+static struct rb_root *root_stable_treep = &root_stable_tree[0];
+static unsigned long stable_tree_index;
+
+/* The hash strength needed to hash a full page */
+#define HASH_STRENGTH_FULL		(PAGE_SIZE / sizeof(u32))
+
+/* The hash strength needed for loop-back hashing */
+#define HASH_STRENGTH_MAX		(HASH_STRENGTH_FULL + 10)
+
+/* The random offsets in a page */
+static u32 *random_nums;
+
+/* The hash strength */
+static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
+
+/* The delta value each time the hash strength increases or decreases */
+static unsigned long hash_strength_delta;
+#define HASH_STRENGTH_DELTA_MAX	5
+
+/* The time we have saved due to random_sample_hash */
+static u64 rshash_pos;
+
+/* The time we have wasted due to hash collision */
+static u64 rshash_neg;
+
+struct uksm_benefit {
+	u64 pos;
+	u64 neg;
+	u64 scanned;
+	unsigned long base;
+} benefit;
+
+/*
+ * The relative cost of memcmp, compared to 1 time unit of random sample
+ * hash, this value is tested when ksm module is initialized
+ */
+static unsigned long memcmp_cost;
+
+static unsigned long  rshash_neg_cont_zero;
+static unsigned long  rshash_cont_obscure;
+
+/* The possible states of hash strength adjustment heuristic */
+enum rshash_states {
+		RSHASH_STILL,
+		RSHASH_TRYUP,
+		RSHASH_TRYDOWN,
+		RSHASH_NEW,
+		RSHASH_PRE_STILL,
+};
+
+/* The possible direction we are about to adjust hash strength */
+enum rshash_direct {
+	GO_UP,
+	GO_DOWN,
+	OBSCURE,
+	STILL,
+};
+
+/* random sampling hash state machine */
+static struct {
+	enum rshash_states state;
+	enum rshash_direct pre_direct;
+	u8 below_count;
+	/* Keep a lookup window of size 5, iff above_count/below_count > 3
+	 * in this window we stop trying.
+	 */
+	u8 lookup_window_index;
+	u64 stable_benefit;
+	unsigned long turn_point_down;
+	unsigned long turn_benefit_down;
+	unsigned long turn_point_up;
+	unsigned long turn_benefit_up;
+	unsigned long stable_point;
+} rshash_state;
+
+/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
+static u32 *zero_hash_table;
+
+static inline struct node_vma *alloc_node_vma(void)
+{
+	struct node_vma *node_vma;
+
+	node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL |
+				     __GFP_NORETRY | __GFP_NOWARN);
+	if (node_vma) {
+		INIT_HLIST_HEAD(&node_vma->rmap_hlist);
+		INIT_HLIST_NODE(&node_vma->hlist);
+	}
+	return node_vma;
+}
+
+static inline void free_node_vma(struct node_vma *node_vma)
+{
+	kmem_cache_free(node_vma_cache, node_vma);
+}
+
+
+static inline struct vma_slot *alloc_vma_slot(void)
+{
+	struct vma_slot *slot;
+
+	/*
+	 * In case ksm is not initialized by now.
+	 * Oops, we need to consider the call site of uksm_init() in the future.
+	 */
+	if (!vma_slot_cache)
+		return NULL;
+
+	slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL |
+				 __GFP_NORETRY | __GFP_NOWARN);
+	if (slot) {
+		INIT_LIST_HEAD(&slot->slot_list);
+		INIT_LIST_HEAD(&slot->dedup_list);
+		slot->flags |= UKSM_SLOT_NEED_RERAND;
+	}
+	return slot;
+}
+
+static inline void free_vma_slot(struct vma_slot *vma_slot)
+{
+	kmem_cache_free(vma_slot_cache, vma_slot);
+}
+
+
+
+static inline struct rmap_item *alloc_rmap_item(void)
+{
+	struct rmap_item *rmap_item;
+
+	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+				      __GFP_NORETRY | __GFP_NOWARN);
+	if (rmap_item) {
+		/* bug on lowest bit is not clear for flag use */
+		BUG_ON(is_addr(rmap_item));
+	}
+	return rmap_item;
+}
+
+static inline void free_rmap_item(struct rmap_item *rmap_item)
+{
+	rmap_item->slot = NULL;	/* debug safety */
+	kmem_cache_free(rmap_item_cache, rmap_item);
+}
+
+static inline struct stable_node *alloc_stable_node(void)
+{
+	struct stable_node *node;
+
+	node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL |
+				__GFP_NORETRY | __GFP_NOWARN);
+	if (!node)
+		return NULL;
+
+	INIT_HLIST_HEAD(&node->hlist);
+	list_add(&node->all_list, &stable_node_list);
+	return node;
+}
+
+static inline void free_stable_node(struct stable_node *stable_node)
+{
+	list_del(&stable_node->all_list);
+	kmem_cache_free(stable_node_cache, stable_node);
+}
+
+static inline struct tree_node *alloc_tree_node(struct list_head *list)
+{
+	struct tree_node *node;
+
+	node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL |
+				 __GFP_NORETRY | __GFP_NOWARN);
+	if (!node)
+		return NULL;
+
+	list_add(&node->all_list, list);
+	return node;
+}
+
+static inline void free_tree_node(struct tree_node *node)
+{
+	list_del(&node->all_list);
+	kmem_cache_free(tree_node_cache, node);
+}
+
+static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
+{
+	struct anon_vma *anon_vma = rmap_item->anon_vma;
+
+	put_anon_vma(anon_vma);
+}
+
+
+/**
+ * Remove a stable node from stable_tree, may unlink from its tree_node and
+ * may remove its parent tree_node if no other stable node is pending.
+ *
+ * @stable_node	    The node need to be removed
+ * @unlink_rb	    Will this node be unlinked from the rbtree?
+ * @remove_tree_    node Will its tree_node be removed if empty?
+ */
+static void remove_node_from_stable_tree(struct stable_node *stable_node,
+					 int unlink_rb,  int remove_tree_node)
+{
+	struct node_vma *node_vma;
+	struct rmap_item *rmap_item;
+	struct hlist_node *n;
+
+	if (!hlist_empty(&stable_node->hlist)) {
+		hlist_for_each_entry_safe(node_vma, n,
+					  &stable_node->hlist, hlist) {
+			hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
+				uksm_pages_sharing--;
+
+				uksm_drop_anon_vma(rmap_item);
+				rmap_item->address &= PAGE_MASK;
+			}
+			free_node_vma(node_vma);
+			cond_resched();
+		}
+
+		/* the last one is counted as shared */
+		uksm_pages_shared--;
+		uksm_pages_sharing++;
+	}
+
+	if (stable_node->tree_node && unlink_rb) {
+		rb_erase(&stable_node->node,
+			 &stable_node->tree_node->sub_root);
+
+		if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
+		    remove_tree_node) {
+			rb_erase(&stable_node->tree_node->node,
+				 root_stable_treep);
+			free_tree_node(stable_node->tree_node);
+		} else {
+			stable_node->tree_node->count--;
+		}
+	}
+
+	free_stable_node(stable_node);
+}
+
+
+/*
+ * get_uksm_page: checks if the page indicated by the stable node
+ * is still its ksm page, despite having held no reference to it.
+ * In which case we can trust the content of the page, and it
+ * returns the gotten page; but if the page has now been zapped,
+ * remove the stale node from the stable tree and return NULL.
+ *
+ * You would expect the stable_node to hold a reference to the ksm page.
+ * But if it increments the page's count, swapping out has to wait for
+ * ksmd to come around again before it can free the page, which may take
+ * seconds or even minutes: much too unresponsive.  So instead we use a
+ * "keyhole reference": access to the ksm page from the stable node peeps
+ * out through its keyhole to see if that page still holds the right key,
+ * pointing back to this stable node.  This relies on freeing a PageAnon
+ * page to reset its page->mapping to NULL, and relies on no other use of
+ * a page to put something that might look like our key in page->mapping.
+ *
+ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
+ * but this is different - made simpler by uksm_thread_mutex being held, but
+ * interesting for assuming that no other use of the struct page could ever
+ * put our expected_mapping into page->mapping (or a field of the union which
+ * coincides with page->mapping).  The RCU calls are not for KSM at all, but
+ * to keep the page_count protocol described with page_cache_get_speculative.
+ *
+ * Note: it is possible that get_uksm_page() will return NULL one moment,
+ * then page the next, if the page is in between page_freeze_refs() and
+ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
+ * is on its way to being freed; but it is an anomaly to bear in mind.
+ *
+ * @unlink_rb:			if the removal of this node will firstly unlink from
+ * its rbtree. stable_node_reinsert will prevent this when restructuring the
+ * node from its old tree.
+ *
+ * @remove_tree_node:	if this is the last one of its tree_node, will the
+ * tree_node be freed ? If we are inserting stable node, this tree_node may
+ * be reused, so don't free it.
+ */
+static struct page *get_uksm_page(struct stable_node *stable_node,
+				 int unlink_rb, int remove_tree_node)
+{
+	struct page *page;
+	void *expected_mapping;
+	unsigned long kpfn;
+
+	expected_mapping = (void *)((unsigned long)stable_node |
+				    PAGE_MAPPING_KSM);
+again:
+	kpfn = READ_ONCE(stable_node->kpfn);
+	page = pfn_to_page(kpfn);
+
+	/*
+	 * page is computed from kpfn, so on most architectures reading
+	 * page->mapping is naturally ordered after reading node->kpfn,
+	 * but on Alpha we need to be more careful.
+	 */
+	smp_read_barrier_depends();
+
+	if (READ_ONCE(page->mapping) != expected_mapping)
+		goto stale;
+
+	/*
+	 * We cannot do anything with the page while its refcount is 0.
+	 * Usually 0 means free, or tail of a higher-order page: in which
+	 * case this node is no longer referenced, and should be freed;
+	 * however, it might mean that the page is under page_freeze_refs().
+	 * The __remove_mapping() case is easy, again the node is now stale;
+	 * but if page is swapcache in migrate_page_move_mapping(), it might
+	 * still be our page, in which case it's essential to keep the node.
+	 */
+	while (!get_page_unless_zero(page)) {
+		/*
+		 * Another check for page->mapping != expected_mapping would
+		 * work here too.  We have chosen the !PageSwapCache test to
+		 * optimize the common case, when the page is or is about to
+		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
+		 * in the freeze_refs section of __remove_mapping(); but Anon
+		 * page->mapping reset to NULL later, in free_pages_prepare().
+		 */
+		if (!PageSwapCache(page))
+			goto stale;
+		cpu_relax();
+	}
+
+	if (READ_ONCE(page->mapping) != expected_mapping) {
+		put_page(page);
+		goto stale;
+	}
+
+	lock_page(page);
+	if (READ_ONCE(page->mapping) != expected_mapping) {
+		unlock_page(page);
+		put_page(page);
+		goto stale;
+	}
+	unlock_page(page);
+	return page;
+stale:
+	/*
+	 * We come here from above when page->mapping or !PageSwapCache
+	 * suggests that the node is stale; but it might be under migration.
+	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
+	 * before checking whether node->kpfn has been changed.
+	 */
+	smp_rmb();
+	if (stable_node->kpfn != kpfn)
+		goto again;
+
+	remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
+
+	return NULL;
+}
+
+/*
+ * Removing rmap_item from stable or unstable tree.
+ * This function will clean the information from the stable/unstable tree.
+ */
+static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
+{
+	if (rmap_item->address & STABLE_FLAG) {
+		struct stable_node *stable_node;
+		struct node_vma *node_vma;
+		struct page *page;
+
+		node_vma = rmap_item->head;
+		stable_node = node_vma->head;
+		page = get_uksm_page(stable_node, 1, 1);
+		if (!page)
+			goto out;
+
+		/*
+		 * page lock is needed because it's racing with
+		 * try_to_unmap_ksm(), etc.
+		 */
+		lock_page(page);
+		hlist_del(&rmap_item->hlist);
+
+		if (hlist_empty(&node_vma->rmap_hlist)) {
+			hlist_del(&node_vma->hlist);
+			free_node_vma(node_vma);
+		}
+		unlock_page(page);
+
+		put_page(page);
+		if (hlist_empty(&stable_node->hlist)) {
+			/* do NOT call remove_node_from_stable_tree() here,
+			 * it's possible for a forked rmap_item not in
+			 * stable tree while the in-tree rmap_items were
+			 * deleted.
+			 */
+			uksm_pages_shared--;
+		} else
+			uksm_pages_sharing--;
+
+
+		uksm_drop_anon_vma(rmap_item);
+	} else if (rmap_item->address & UNSTABLE_FLAG) {
+		if (rmap_item->hash_round == uksm_hash_round) {
+
+			rb_erase(&rmap_item->node,
+				 &rmap_item->tree_node->sub_root);
+			if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
+				rb_erase(&rmap_item->tree_node->node,
+					 &root_unstable_tree);
+
+				free_tree_node(rmap_item->tree_node);
+			} else
+				rmap_item->tree_node->count--;
+		}
+		uksm_pages_unshared--;
+	}
+
+	rmap_item->address &= PAGE_MASK;
+	rmap_item->hash_max = 0;
+
+out:
+	cond_resched();		/* we're called from many long loops */
+}
+
+static inline int slot_in_uksm(struct vma_slot *slot)
+{
+	return list_empty(&slot->slot_list);
+}
+
+/*
+ * Test if the mm is exiting
+ */
+static inline bool uksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline unsigned long vma_pool_size(struct vma_slot *slot)
+{
+	return round_up(sizeof(struct rmap_list_entry) * slot->pages,
+			PAGE_SIZE) >> PAGE_SHIFT;
+}
+
+#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
+
+/* must be done with sem locked */
+static int slot_pool_alloc(struct vma_slot *slot)
+{
+	unsigned long pool_size;
+
+	if (slot->rmap_list_pool)
+		return 0;
+
+	pool_size = vma_pool_size(slot);
+	slot->rmap_list_pool = kcalloc(pool_size, sizeof(struct page *),
+				       GFP_KERNEL);
+	if (!slot->rmap_list_pool)
+		return -ENOMEM;
+
+	slot->pool_counts = kcalloc(pool_size, sizeof(unsigned int),
+				    GFP_KERNEL);
+	if (!slot->pool_counts) {
+		kfree(slot->rmap_list_pool);
+		return -ENOMEM;
+	}
+
+	slot->pool_size = pool_size;
+	BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
+	slot->flags |= UKSM_SLOT_IN_UKSM;
+	uksm_pages_total += slot->pages;
+
+	return 0;
+}
+
+/*
+ * Called after vma is unlinked from its mm
+ */
+void uksm_remove_vma(struct vm_area_struct *vma)
+{
+	struct vma_slot *slot;
+
+	if (!vma->uksm_vma_slot)
+		return;
+
+	spin_lock(&vma_slot_list_lock);
+	slot = vma->uksm_vma_slot;
+	if (!slot)
+		goto out;
+
+	if (slot_in_uksm(slot)) {
+		/**
+		 * This slot has been added by ksmd, so move to the del list
+		 * waiting ksmd to free it.
+		 */
+		list_add_tail(&slot->slot_list, &vma_slot_del);
+	} else {
+		/**
+		 * It's still on new list. It's ok to free slot directly.
+		 */
+		list_del(&slot->slot_list);
+		free_vma_slot(slot);
+	}
+out:
+	vma->uksm_vma_slot = NULL;
+	spin_unlock(&vma_slot_list_lock);
+}
+
+/**
+ * Need to do two things:
+ * 1. check if slot was moved to del list
+ * 2. make sure the mmap_sem is manipulated under valid vma.
+ *
+ * My concern here is that in some cases, this may make
+ * vma_slot_list_lock() waiters to serialized further by some
+ * sem->wait_lock, can this really be expensive?
+ *
+ *
+ * @return
+ * 0: if successfully locked mmap_sem
+ * -ENOENT: this slot was moved to del list
+ * -EBUSY: vma lock failed
+ */
+static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct rw_semaphore *sem;
+
+	spin_lock(&vma_slot_list_lock);
+
+	/* the slot_list was removed and inited from new list, when it enters
+	 * uksm_list. If now it's not empty, then it must be moved to del list
+	 */
+	if (!slot_in_uksm(slot)) {
+		spin_unlock(&vma_slot_list_lock);
+		return -ENOENT;
+	}
+
+	BUG_ON(slot->pages != vma_pages(slot->vma));
+	/* Ok, vma still valid */
+	vma = slot->vma;
+	mm = vma->vm_mm;
+	sem = &mm->mmap_sem;
+
+	if (uksm_test_exit(mm)) {
+		spin_unlock(&vma_slot_list_lock);
+		return -ENOENT;
+	}
+
+	if (down_read_trylock(sem)) {
+		spin_unlock(&vma_slot_list_lock);
+		if (slot_pool_alloc(slot)) {
+			uksm_remove_vma(vma);
+			up_read(sem);
+			return -ENOENT;
+		}
+		return 0;
+	}
+
+	spin_unlock(&vma_slot_list_lock);
+	return -EBUSY;
+}
+
+static inline unsigned long
+vma_page_address(struct page *page, struct vm_area_struct *vma)
+{
+	pgoff_t pgoff = page->index;
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
+		/* page should be within @vma mapping range */
+		return -EFAULT;
+	}
+	return address;
+}
+
+
+/* return 0 on success with the item's mmap_sem locked */
+static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
+{
+	struct mm_struct *mm;
+	struct vma_slot *slot = item->slot;
+	int err = -EINVAL;
+
+	struct page *page;
+
+	/*
+	 * try_down_read_slot_mmap_sem() returns non-zero if the slot
+	 * has been removed by uksm_remove_vma().
+	 */
+	if (try_down_read_slot_mmap_sem(slot))
+		return -EBUSY;
+
+	mm = slot->vma->vm_mm;
+
+	if (uksm_test_exit(mm))
+		goto failout_up;
+
+	page = item->page;
+	rcu_read_lock();
+	if (!get_page_unless_zero(page)) {
+		rcu_read_unlock();
+		goto failout_up;
+	}
+
+	/* No need to consider huge page here. */
+	if (item->slot->vma->anon_vma != page_anon_vma(page) ||
+	    vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
+		/*
+		 * TODO:
+		 * should we release this item becase of its stale page
+		 * mapping?
+		 */
+		put_page(page);
+		rcu_read_unlock();
+		goto failout_up;
+	}
+	rcu_read_unlock();
+	return 0;
+
+failout_up:
+	up_read(&mm->mmap_sem);
+	return err;
+}
+
+/*
+ * What kind of VMA is considered ?
+ */
+static inline int vma_can_enter(struct vm_area_struct *vma)
+{
+	return uksm_flags_can_scan(vma->vm_flags);
+}
+
+/*
+ * Called whenever a fresh new vma is created A new vma_slot.
+ * is created and inserted into a global list Must be called.
+ * after vma is inserted to its mm.
+ */
+void uksm_vma_add_new(struct vm_area_struct *vma)
+{
+	struct vma_slot *slot;
+
+	if (!vma_can_enter(vma)) {
+		vma->uksm_vma_slot = NULL;
+		return;
+	}
+
+	slot = alloc_vma_slot();
+	if (!slot) {
+		vma->uksm_vma_slot = NULL;
+		return;
+	}
+
+	vma->uksm_vma_slot = slot;
+	vma->vm_flags |= VM_MERGEABLE;
+	slot->vma = vma;
+	slot->mm = vma->vm_mm;
+	slot->ctime_j = jiffies;
+	slot->pages = vma_pages(vma);
+	spin_lock(&vma_slot_list_lock);
+	list_add_tail(&slot->slot_list, &vma_slot_new);
+	spin_unlock(&vma_slot_list_lock);
+}
+
+/*   32/3 < they < 32/2 */
+#define shiftl	8
+#define shiftr	12
+
+#define HASH_FROM_TO(from, to)			\
+for (index = from; index < to; index++) {	\
+	pos = random_nums[index];		\
+	hash += key[pos];			\
+	hash += (hash << shiftl);		\
+	hash ^= (hash >> shiftr);		\
+}
+
+
+#define HASH_FROM_DOWN_TO(from, to)		\
+for (index = from - 1; index >= to; index--) {	\
+	hash ^= (hash >> shiftr);		\
+	hash ^= (hash >> (shiftr*2));		\
+	hash -= (hash << shiftl);		\
+	hash += (hash << (shiftl*2));		\
+	pos = random_nums[index];		\
+	hash -= key[pos];			\
+}
+
+/*
+ * The main random sample hash function.
+ */
+static u32 random_sample_hash(void *addr, u32 hash_strength)
+{
+	u32 hash = 0xdeadbeef;
+	int index, pos, loop = hash_strength;
+	u32 *key = (u32 *)addr;
+
+	if (loop > HASH_STRENGTH_FULL)
+		loop = HASH_STRENGTH_FULL;
+
+	HASH_FROM_TO(0, loop);
+
+	if (hash_strength > HASH_STRENGTH_FULL) {
+		loop = hash_strength - HASH_STRENGTH_FULL;
+		HASH_FROM_TO(0, loop);
+	}
+
+	return hash;
+}
+
+
+/**
+ * It's used when hash strength is adjusted
+ *
+ * @addr The page's virtual address
+ * @from The original hash strength
+ * @to   The hash strength changed to
+ * @hash The hash value generated with "from" hash value
+ *
+ * return the hash value
+ */
+static u32 delta_hash(void *addr, int from, int to, u32 hash)
+{
+	u32 *key = (u32 *)addr;
+	int index, pos; /* make sure they are int type */
+
+	if (to > from) {
+		if (from >= HASH_STRENGTH_FULL) {
+			from -= HASH_STRENGTH_FULL;
+			to -= HASH_STRENGTH_FULL;
+			HASH_FROM_TO(from, to);
+		} else if (to <= HASH_STRENGTH_FULL) {
+			HASH_FROM_TO(from, to);
+		} else {
+			HASH_FROM_TO(from, HASH_STRENGTH_FULL);
+			HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
+		}
+	} else {
+		if (from <= HASH_STRENGTH_FULL) {
+			HASH_FROM_DOWN_TO(from, to);
+		} else if (to >= HASH_STRENGTH_FULL) {
+			from -= HASH_STRENGTH_FULL;
+			to -= HASH_STRENGTH_FULL;
+			HASH_FROM_DOWN_TO(from, to);
+		} else {
+			HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
+			HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
+		}
+	}
+
+	return hash;
+}
+
+/**
+ *
+ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
+ * has finished.
+ *
+ * return 0 if no page has been scanned since last call, 1 otherwise.
+ */
+static inline int encode_benefit(void)
+{
+	u64 scanned_delta, pos_delta, neg_delta;
+	unsigned long base = benefit.base;
+
+	scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
+
+	if (!scanned_delta)
+		return 0;
+
+	scanned_delta >>= base;
+	pos_delta = rshash_pos >> base;
+	neg_delta = rshash_neg >> base;
+
+	if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
+	    CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
+	    CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
+		benefit.scanned >>= 1;
+		benefit.neg >>= 1;
+		benefit.pos >>= 1;
+		benefit.base++;
+		scanned_delta >>= 1;
+		pos_delta >>= 1;
+		neg_delta >>= 1;
+	}
+
+	benefit.pos += pos_delta;
+	benefit.neg += neg_delta;
+	benefit.scanned += scanned_delta;
+
+	BUG_ON(!benefit.scanned);
+
+	rshash_pos = rshash_neg = 0;
+	uksm_pages_scanned_last = uksm_pages_scanned;
+
+	return 1;
+}
+
+static inline void reset_benefit(void)
+{
+	benefit.pos = 0;
+	benefit.neg = 0;
+	benefit.base = 0;
+	benefit.scanned = 0;
+}
+
+static inline void inc_rshash_pos(unsigned long delta)
+{
+	if (CAN_OVERFLOW_U64(rshash_pos, delta))
+		encode_benefit();
+
+	rshash_pos += delta;
+}
+
+static inline void inc_rshash_neg(unsigned long delta)
+{
+	if (CAN_OVERFLOW_U64(rshash_neg, delta))
+		encode_benefit();
+
+	rshash_neg += delta;
+}
+
+
+static inline u32 page_hash(struct page *page, unsigned long hash_strength,
+			    int cost_accounting)
+{
+	u32 val;
+	unsigned long delta;
+
+	void *addr = kmap_atomic(page);
+
+	val = random_sample_hash(addr, hash_strength);
+	kunmap_atomic(addr);
+
+	if (cost_accounting) {
+		if (hash_strength < HASH_STRENGTH_FULL)
+			delta = HASH_STRENGTH_FULL - hash_strength;
+		else
+			delta = 0;
+
+		inc_rshash_pos(delta);
+	}
+
+	return val;
+}
+
+static int memcmp_pages(struct page *page1, struct page *page2,
+			int cost_accounting)
+{
+	char *addr1, *addr2;
+	int ret;
+
+	addr1 = kmap_atomic(page1);
+	addr2 = kmap_atomic(page2);
+	ret = memcmp(addr1, addr2, PAGE_SIZE);
+	kunmap_atomic(addr2);
+	kunmap_atomic(addr1);
+
+	if (cost_accounting)
+		inc_rshash_neg(memcmp_cost);
+
+	return ret;
+}
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+	return !memcmp_pages(page1, page2, 0);
+}
+
+static inline int is_page_full_zero(struct page *page)
+{
+	char *addr;
+	int ret;
+
+	addr = kmap_atomic(page);
+	ret = is_full_zero(addr, PAGE_SIZE);
+	kunmap_atomic(addr);
+
+	return ret;
+}
+
+static int write_protect_page(struct vm_area_struct *vma, struct page *page,
+			      pte_t *orig_pte, pte_t *old_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+	};
+	int swapped;
+	int err = -EFAULT;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+
+	pvmw.address = page_address_in_vma(page, vma);
+	if (pvmw.address == -EFAULT)
+		goto out;
+
+	BUG_ON(PageTransCompound(page));
+
+	mmun_start = pvmw.address;
+	mmun_end   = pvmw.address + PAGE_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
+	if (!page_vma_mapped_walk(&pvmw))
+		goto out_mn;
+	if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
+		goto out_unlock;
+
+	if (old_pte)
+		*old_pte = *pvmw.pte;
+
+	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
+	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || mm_tlb_flush_pending(mm)) {
+		pte_t entry;
+
+		swapped = PageSwapCache(page);
+		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
+		/*
+		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
+		 * take any lock, therefore the check that we are going to make
+		 * with the pagecount against the mapcount is racey and
+		 * O_DIRECT can happen right after the check.
+		 * So we clear the pte and flush the tlb before the check
+		 * this assure us that no O_DIRECT can happen after the check
+		 * or in the middle of the check.
+		 */
+		entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
+		/*
+		 * Check that no O_DIRECT or similar I/O is in progress on the
+		 * page
+		 */
+		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
+			set_pte_at(mm, pvmw.address, pvmw.pte, entry);
+			goto out_unlock;
+		}
+		if (pte_dirty(entry))
+			set_page_dirty(page);
+
+		if (pte_protnone(entry))
+			entry = pte_mkclean(pte_clear_savedwrite(entry));
+		else
+			entry = pte_mkclean(pte_wrprotect(entry));
+
+		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
+	}
+	*orig_pte = *pvmw.pte;
+	err = 0;
+
+out_unlock:
+	page_vma_mapped_walk_done(&pvmw);
+out_mn:
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out:
+	return err;
+}
+
+#define MERGE_ERR_PGERR		1 /* the page is invalid cannot continue */
+#define MERGE_ERR_COLLI		2 /* there is a collision */
+#define MERGE_ERR_COLLI_MAX	3 /* collision at the max hash strength */
+#define MERGE_ERR_CHANGED	4 /* the page has changed since last hash */
+
+
+/**
+ * replace_page - replace page in vma by new ksm page
+ * @vma:      vma that holds the pte pointing to page
+ * @page:     the page we are replacing by kpage
+ * @kpage:    the ksm page we replace page by
+ * @orig_pte: the original value of the pte
+ *
+ * Returns 0 on success, MERGE_ERR_PGERR on failure.
+ */
+static int replace_page(struct vm_area_struct *vma, struct page *page,
+			struct page *kpage, pte_t orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	pte_t entry;
+
+	unsigned long addr;
+	int err = MERGE_ERR_PGERR;
+	unsigned long mmun_start;	/* For mmu_notifiers */
+	unsigned long mmun_end;		/* For mmu_notifiers */
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	BUG_ON(pmd_trans_huge(*pmd));
+	if (!pmd_present(*pmd))
+		goto out;
+
+	mmun_start = addr;
+	mmun_end   = addr + PAGE_SIZE;
+	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, orig_pte)) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out_mn;
+	}
+
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush_notify(vma, addr, ptep);
+	entry = mk_pte(kpage, vma->vm_page_prot);
+
+	/* special treatment is needed for zero_page */
+	if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
+				(page_to_pfn(kpage) == zero_pfn)) {
+		entry = pte_mkspecial(entry);
+		dec_mm_counter(mm, MM_ANONPAGES);
+		inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
+	} else {
+		get_page(kpage);
+		page_add_anon_rmap(kpage, vma, addr, false);
+	}
+
+	set_pte_at_notify(mm, addr, ptep, entry);
+
+	page_remove_rmap(page, false);
+	if (!page_mapped(page))
+		try_to_free_swap(page);
+	put_page(page);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out_mn:
+	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out:
+	return err;
+}
+
+
+/**
+ *  Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
+ *  zero hash value at HASH_STRENGTH_MAX is used to indicated that its
+ *  hash_max member has not been calculated.
+ *
+ * @page The page needs to be hashed
+ * @hash_old The hash value calculated with current hash strength
+ *
+ * return the new hash value calculated at HASH_STRENGTH_MAX
+ */
+static inline u32 page_hash_max(struct page *page, u32 hash_old)
+{
+	u32 hash_max = 0;
+	void *addr;
+
+	addr = kmap_atomic(page);
+	hash_max = delta_hash(addr, hash_strength,
+			      HASH_STRENGTH_MAX, hash_old);
+
+	kunmap_atomic(addr);
+
+	if (!hash_max)
+		hash_max = 1;
+
+	inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
+	return hash_max;
+}
+
+/*
+ * We compare the hash again, to ensure that it is really a hash collision
+ * instead of being caused by page write.
+ */
+static inline int check_collision(struct rmap_item *rmap_item,
+				  u32 hash)
+{
+	int err;
+	struct page *page = rmap_item->page;
+
+	/* if this rmap_item has already been hash_maxed, then the collision
+	 * must appears in the second-level rbtree search. In this case we check
+	 * if its hash_max value has been changed. Otherwise, the collision
+	 * happens in the first-level rbtree search, so we check against it's
+	 * current hash value.
+	 */
+	if (rmap_item->hash_max) {
+		inc_rshash_neg(memcmp_cost);
+		inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
+
+		if (rmap_item->hash_max == page_hash_max(page, hash))
+			err = MERGE_ERR_COLLI;
+		else
+			err = MERGE_ERR_CHANGED;
+	} else {
+		inc_rshash_neg(memcmp_cost + hash_strength);
+
+		if (page_hash(page, hash_strength, 0) == hash)
+			err = MERGE_ERR_COLLI;
+		else
+			err = MERGE_ERR_CHANGED;
+	}
+
+	return err;
+}
+
+/**
+ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
+ * already be a ksm page.
+ *
+ * @return 0 if the pages were merged, -EFAULT otherwise.
+ */
+static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
+				      struct page *kpage, u32 hash)
+{
+	struct vm_area_struct *vma = rmap_item->slot->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t orig_pte = __pte(0);
+	int err = MERGE_ERR_PGERR;
+	struct page *page;
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	page = rmap_item->page;
+
+	if (page == kpage) { /* ksm page forked */
+		err = 0;
+		goto out;
+	}
+
+	/*
+	 * We need the page lock to read a stable PageSwapCache in
+	 * write_protect_page().  We use trylock_page() instead of
+	 * lock_page() because we don't want to wait here - we
+	 * prefer to continue scanning and merging different pages,
+	 * then come back to this page when it is unlocked.
+	 */
+	if (!trylock_page(page))
+		goto out;
+
+	if (!PageAnon(page) || !PageKsm(kpage))
+		goto out_unlock;
+
+	if (PageTransCompound(page)) {
+		err = split_huge_page(page);
+		if (err)
+			goto out_unlock;
+	}
+
+	/*
+	 * If this anonymous page is mapped only here, its pte may need
+	 * to be write-protected.  If it's mapped elsewhere, all of its
+	 * ptes are necessarily already write-protected.  But in either
+	 * case, we need to lock and check page_count is not raised.
+	 */
+	if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
+		if (pages_identical(page, kpage))
+			err = replace_page(vma, page, kpage, orig_pte);
+		else
+			err = check_collision(rmap_item, hash);
+	}
+
+	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
+		munlock_vma_page(page);
+		if (!PageMlocked(kpage)) {
+			unlock_page(page);
+			lock_page(kpage);
+			mlock_vma_page(kpage);
+			page = kpage;		/* for final unlock */
+		}
+	}
+
+out_unlock:
+	unlock_page(page);
+out:
+	return err;
+}
+
+
+
+/**
+ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
+ * to restore a page mapping that has been changed in try_to_merge_two_pages.
+ *
+ * @return 0 on success.
+ */
+static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t orig_pte, pte_t wprt_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+
+	int err = -EFAULT;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, wprt_pte)) {
+		/* already copied, let it be */
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
+	/*
+	 * Good boy, still here. When we still get the ksm page, it does not
+	 * return to the free page pool, there is no way that a pte was changed
+	 * to other page and gets back to this page. And remind that ksm page
+	 * do not reuse in do_wp_page(). So it's safe to restore the original
+	 * pte.
+	 */
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush_notify(vma, addr, ptep);
+	set_pte_at_notify(mm, addr, ptep, orig_pte);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out:
+	return err;
+}
+
+/**
+ * try_to_merge_two_pages() - take two identical pages and prepare
+ * them to be merged into one page(rmap_item->page)
+ *
+ * @return 0 if we successfully merged two identical pages into
+ *         one ksm page. MERGE_ERR_COLLI if it's only a hash collision
+ *         search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
+ *         changed since it's hashed. MERGE_ERR_PGERR otherwise.
+ *
+ */
+static int try_to_merge_two_pages(struct rmap_item *rmap_item,
+				  struct rmap_item *tree_rmap_item,
+				  u32 hash)
+{
+	pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
+	pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
+	struct vm_area_struct *vma1 = rmap_item->slot->vma;
+	struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
+	struct page *page = rmap_item->page;
+	struct page *tree_page = tree_rmap_item->page;
+	int err = MERGE_ERR_PGERR;
+	struct address_space *saved_mapping;
+
+
+	if (rmap_item->page == tree_rmap_item->page)
+		goto out;
+
+	if (!trylock_page(page))
+		goto out;
+
+	if (!PageAnon(page))
+		goto out_unlock;
+
+	if (PageTransCompound(page)) {
+		err = split_huge_page(page);
+		if (err)
+			goto out_unlock;
+	}
+
+	if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
+		unlock_page(page);
+		goto out;
+	}
+
+	/*
+	 * While we hold page lock, upgrade page from
+	 * PageAnon+anon_vma to PageKsm+NULL stable_node:
+	 * stable_tree_insert() will update stable_node.
+	 */
+	saved_mapping = page->mapping;
+	set_page_stable_node(page, NULL);
+	mark_page_accessed(page);
+	if (!PageDirty(page))
+		SetPageDirty(page);
+
+	unlock_page(page);
+
+	if (!trylock_page(tree_page))
+		goto restore_out;
+
+	if (!PageAnon(tree_page)) {
+		unlock_page(tree_page);
+		goto restore_out;
+	}
+
+	if (PageTransCompound(tree_page)) {
+		err = split_huge_page(tree_page);
+		if (err) {
+			unlock_page(tree_page);
+			goto restore_out;
+		}
+	}
+
+	if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
+		unlock_page(tree_page);
+		goto restore_out;
+	}
+
+	if (pages_identical(page, tree_page)) {
+		err = replace_page(vma2, tree_page, page, wprt_pte2);
+		if (err) {
+			unlock_page(tree_page);
+			goto restore_out;
+		}
+
+		if ((vma2->vm_flags & VM_LOCKED)) {
+			munlock_vma_page(tree_page);
+			if (!PageMlocked(page)) {
+				unlock_page(tree_page);
+				lock_page(page);
+				mlock_vma_page(page);
+				tree_page = page; /* for final unlock */
+			}
+		}
+
+		unlock_page(tree_page);
+
+		goto out; /* success */
+
+	} else {
+		if (tree_rmap_item->hash_max &&
+		    tree_rmap_item->hash_max == rmap_item->hash_max) {
+			err = MERGE_ERR_COLLI_MAX;
+		} else if (page_hash(page, hash_strength, 0) ==
+		    page_hash(tree_page, hash_strength, 0)) {
+			inc_rshash_neg(memcmp_cost + hash_strength * 2);
+			err = MERGE_ERR_COLLI;
+		} else {
+			err = MERGE_ERR_CHANGED;
+		}
+
+		unlock_page(tree_page);
+	}
+
+restore_out:
+	lock_page(page);
+	if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
+				  orig_pte1, wprt_pte1))
+		page->mapping = saved_mapping;
+
+out_unlock:
+	unlock_page(page);
+out:
+	return err;
+}
+
+static inline int hash_cmp(u32 new_val, u32 node_val)
+{
+	if (new_val > node_val)
+		return 1;
+	else if (new_val < node_val)
+		return -1;
+	else
+		return 0;
+}
+
+static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
+{
+	u32 hash_max = item->hash_max;
+
+	if (!hash_max) {
+		hash_max = page_hash_max(item->page, hash);
+
+		item->hash_max = hash_max;
+	}
+
+	return hash_max;
+}
+
+
+
+/**
+ * stable_tree_search() - search the stable tree for a page
+ *
+ * @item:	the rmap_item we are comparing with
+ * @hash:	the hash value of this item->page already calculated
+ *
+ * @return	the page we have found, NULL otherwise. The page returned has
+ *			been gotten.
+ */
+static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
+{
+	struct rb_node *node = root_stable_treep->rb_node;
+	struct tree_node *tree_node;
+	unsigned long hash_max;
+	struct page *page = item->page;
+	struct stable_node *stable_node;
+
+	stable_node = page_stable_node(page);
+	if (stable_node) {
+		/* ksm page forked, that is
+		 * if (PageKsm(page) && !in_stable_tree(rmap_item))
+		 * it's actually gotten once outside.
+		 */
+		get_page(page);
+		return page;
+	}
+
+	while (node) {
+		int cmp;
+
+		tree_node = rb_entry(node, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			break;
+	}
+
+	if (!node)
+		return NULL;
+
+	if (tree_node->count == 1) {
+		stable_node = rb_entry(tree_node->sub_root.rb_node,
+				       struct stable_node, node);
+		BUG_ON(!stable_node);
+
+		goto get_page_out;
+	}
+
+	/*
+	 * ok, we have to search the second
+	 * level subtree, hash the page to a
+	 * full strength.
+	 */
+	node = tree_node->sub_root.rb_node;
+	BUG_ON(!node);
+	hash_max = rmap_item_hash_max(item, hash);
+
+	while (node) {
+		int cmp;
+
+		stable_node = rb_entry(node, struct stable_node, node);
+
+		cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			goto get_page_out;
+	}
+
+	return NULL;
+
+get_page_out:
+	page = get_uksm_page(stable_node, 1, 1);
+	return page;
+}
+
+static int try_merge_rmap_item(struct rmap_item *item,
+			       struct page *kpage,
+			       struct page *tree_page)
+{
+	struct vm_area_struct *vma = item->slot->vma;
+	struct page_vma_mapped_walk pvmw = {
+		.page = kpage,
+		.vma = vma,
+	};
+
+	pvmw.address = get_rmap_addr(item);
+	if (!page_vma_mapped_walk(&pvmw))
+		return 0;
+
+	if (pte_write(*pvmw.pte)) {
+		/* has changed, abort! */
+		page_vma_mapped_walk_done(&pvmw);
+		return 0;
+	}
+
+	get_page(tree_page);
+	page_add_anon_rmap(tree_page, vma, pvmw.address, false);
+
+	flush_cache_page(vma, pvmw.address, page_to_pfn(kpage));
+	ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
+	set_pte_at_notify(vma->vm_mm, pvmw.address, pvmw.pte,
+			  mk_pte(tree_page, vma->vm_page_prot));
+
+	page_remove_rmap(kpage, false);
+	put_page(kpage);
+
+	page_vma_mapped_walk_done(&pvmw);
+
+	return 1;
+}
+
+/**
+ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
+ * into stable tree, the page was found to be identical to a stable ksm page,
+ * this is the last chance we can merge them into one.
+ *
+ * @item1:	the rmap_item holding the page which we wanted to insert
+ *		into stable tree.
+ * @item2:	the other rmap_item we found when unstable tree search
+ * @oldpage:	the page currently mapped by the two rmap_items
+ * @tree_page:	the page we found identical in stable tree node
+ * @success1:	return if item1 is successfully merged
+ * @success2:	return if item2 is successfully merged
+ */
+static void try_merge_with_stable(struct rmap_item *item1,
+				  struct rmap_item *item2,
+				  struct page **kpage,
+				  struct page *tree_page,
+				  int *success1, int *success2)
+{
+	struct vm_area_struct *vma1 = item1->slot->vma;
+	struct vm_area_struct *vma2 = item2->slot->vma;
+	*success1 = 0;
+	*success2 = 0;
+
+	if (unlikely(*kpage == tree_page)) {
+		/* I don't think this can really happen */
+		pr_warn("UKSM: unexpected condition detected in "
+			"%s -- *kpage == tree_page !\n", __func__);
+		*success1 = 1;
+		*success2 = 1;
+		return;
+	}
+
+	if (!PageAnon(*kpage) || !PageKsm(*kpage))
+		goto failed;
+
+	if (!trylock_page(tree_page))
+		goto failed;
+
+	/* If the oldpage is still ksm and still pointed
+	 * to in the right place, and still write protected,
+	 * we are confident it's not changed, no need to
+	 * memcmp anymore.
+	 * be ware, we cannot take nested pte locks,
+	 * deadlock risk.
+	 */
+	if (!try_merge_rmap_item(item1, *kpage, tree_page))
+		goto unlock_failed;
+
+	/* ok, then vma2, remind that pte1 already set */
+	if (!try_merge_rmap_item(item2, *kpage, tree_page))
+		goto success_1;
+
+	*success2 = 1;
+success_1:
+	*success1 = 1;
+
+
+	if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
+	    (*success2 && vma2->vm_flags & VM_LOCKED)) {
+		munlock_vma_page(*kpage);
+		if (!PageMlocked(tree_page))
+			mlock_vma_page(tree_page);
+	}
+
+	/*
+	 * We do not need oldpage any more in the caller, so can break the lock
+	 * now.
+	 */
+	unlock_page(*kpage);
+	*kpage = tree_page; /* Get unlocked outside. */
+	return;
+
+unlock_failed:
+	unlock_page(tree_page);
+failed:
+	return;
+}
+
+static inline void stable_node_hash_max(struct stable_node *node,
+					 struct page *page, u32 hash)
+{
+	u32 hash_max = node->hash_max;
+
+	if (!hash_max) {
+		hash_max = page_hash_max(page, hash);
+		node->hash_max = hash_max;
+	}
+}
+
+static inline
+struct stable_node *new_stable_node(struct tree_node *tree_node,
+				    struct page *kpage, u32 hash_max)
+{
+	struct stable_node *new_stable_node;
+
+	new_stable_node = alloc_stable_node();
+	if (!new_stable_node)
+		return NULL;
+
+	new_stable_node->kpfn = page_to_pfn(kpage);
+	new_stable_node->hash_max = hash_max;
+	new_stable_node->tree_node = tree_node;
+	set_page_stable_node(kpage, new_stable_node);
+
+	return new_stable_node;
+}
+
+static inline
+struct stable_node *first_level_insert(struct tree_node *tree_node,
+				       struct rmap_item *rmap_item,
+				       struct rmap_item *tree_rmap_item,
+				       struct page **kpage, u32 hash,
+				       int *success1, int *success2)
+{
+	int cmp;
+	struct page *tree_page;
+	u32 hash_max = 0;
+	struct stable_node *stable_node, *new_snode;
+	struct rb_node *parent = NULL, **new;
+
+	/* this tree node contains no sub-tree yet */
+	stable_node = rb_entry(tree_node->sub_root.rb_node,
+			       struct stable_node, node);
+
+	tree_page = get_uksm_page(stable_node, 1, 0);
+	if (tree_page) {
+		cmp = memcmp_pages(*kpage, tree_page, 1);
+		if (!cmp) {
+			try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
+					      tree_page, success1, success2);
+			put_page(tree_page);
+			if (!*success1 && !*success2)
+				goto failed;
+
+			return stable_node;
+
+		} else {
+			/*
+			 * collision in first level try to create a subtree.
+			 * A new node need to be created.
+			 */
+			put_page(tree_page);
+
+			stable_node_hash_max(stable_node, tree_page,
+					     tree_node->hash);
+			hash_max = rmap_item_hash_max(rmap_item, hash);
+			cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+			parent = &stable_node->node;
+			if (cmp < 0)
+				new = &parent->rb_left;
+			else if (cmp > 0)
+				new = &parent->rb_right;
+			else
+				goto failed;
+		}
+
+	} else {
+		/* the only stable_node deleted, we reuse its tree_node.
+		 */
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+	new_snode = new_stable_node(tree_node, *kpage, hash_max);
+	if (!new_snode)
+		goto failed;
+
+	rb_link_node(&new_snode->node, parent, new);
+	rb_insert_color(&new_snode->node, &tree_node->sub_root);
+	tree_node->count++;
+	*success1 = *success2 = 1;
+
+	return new_snode;
+
+failed:
+	return NULL;
+}
+
+static inline
+struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
+					  struct rmap_item *rmap_item,
+					  struct rmap_item *tree_rmap_item,
+					  struct page **kpage, u32 hash,
+					  int *success1, int *success2)
+{
+	struct page *tree_page;
+	u32 hash_max;
+	struct stable_node *stable_node, *new_snode;
+	struct rb_node *parent, **new;
+
+research:
+	parent = NULL;
+	new = &tree_node->sub_root.rb_node;
+	BUG_ON(!*new);
+	hash_max = rmap_item_hash_max(rmap_item, hash);
+	while (*new) {
+		int cmp;
+
+		stable_node = rb_entry(*new, struct stable_node, node);
+
+		cmp = hash_cmp(hash_max, stable_node->hash_max);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else {
+			tree_page = get_uksm_page(stable_node, 1, 0);
+			if (tree_page) {
+				cmp = memcmp_pages(*kpage, tree_page, 1);
+				if (!cmp) {
+					try_merge_with_stable(rmap_item,
+						tree_rmap_item, kpage,
+						tree_page, success1, success2);
+
+					put_page(tree_page);
+					if (!*success1 && !*success2)
+						goto failed;
+					/*
+					 * successfully merged with a stable
+					 * node
+					 */
+					return stable_node;
+				} else {
+					put_page(tree_page);
+					goto failed;
+				}
+			} else {
+				/*
+				 * stable node may be deleted,
+				 * and subtree maybe
+				 * restructed, cannot
+				 * continue, research it.
+				 */
+				if (tree_node->count) {
+					goto research;
+				} else {
+					/* reuse the tree node*/
+					parent = NULL;
+					new = &tree_node->sub_root.rb_node;
+				}
+			}
+		}
+	}
+
+	new_snode = new_stable_node(tree_node, *kpage, hash_max);
+	if (!new_snode)
+		goto failed;
+
+	rb_link_node(&new_snode->node, parent, new);
+	rb_insert_color(&new_snode->node, &tree_node->sub_root);
+	tree_node->count++;
+	*success1 = *success2 = 1;
+
+	return new_snode;
+
+failed:
+	return NULL;
+}
+
+
+/**
+ * stable_tree_insert() - try to insert a merged page in unstable tree to
+ * the stable tree
+ *
+ * @kpage:		the page need to be inserted
+ * @hash:		the current hash of this page
+ * @rmap_item:		the rmap_item being scanned
+ * @tree_rmap_item:	the rmap_item found on unstable tree
+ * @success1:		return if rmap_item is merged
+ * @success2:		return if tree_rmap_item is merged
+ *
+ * @return		the stable_node on stable tree if at least one
+ *			rmap_item is inserted into stable tree, NULL
+ *			otherwise.
+ */
+static struct stable_node *
+stable_tree_insert(struct page **kpage, u32 hash,
+		   struct rmap_item *rmap_item,
+		   struct rmap_item *tree_rmap_item,
+		   int *success1, int *success2)
+{
+	struct rb_node **new = &root_stable_treep->rb_node;
+	struct rb_node *parent = NULL;
+	struct stable_node *stable_node;
+	struct tree_node *tree_node;
+	u32 hash_max = 0;
+
+	*success1 = *success2 = 0;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		if (tree_node->count == 1) {
+			stable_node = first_level_insert(tree_node, rmap_item,
+						tree_rmap_item, kpage,
+						hash, success1, success2);
+		} else {
+			stable_node = stable_subtree_insert(tree_node,
+					rmap_item, tree_rmap_item, kpage,
+					hash, success1, success2);
+		}
+	} else {
+
+		/* no tree node found */
+		tree_node = alloc_tree_node(stable_tree_node_listp);
+		if (!tree_node) {
+			stable_node = NULL;
+			goto out;
+		}
+
+		stable_node = new_stable_node(tree_node, *kpage, hash_max);
+		if (!stable_node) {
+			free_tree_node(tree_node);
+			goto out;
+		}
+
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, root_stable_treep);
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+
+		rb_link_node(&stable_node->node, parent, new);
+		rb_insert_color(&stable_node->node, &tree_node->sub_root);
+		tree_node->count++;
+		*success1 = *success2 = 1;
+	}
+
+out:
+	return stable_node;
+}
+
+
+/**
+ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
+ *
+ * @return	0 on success, -EBUSY if unable to lock the mmap_sem,
+ *		-EINVAL if the page mapping has been changed.
+ */
+static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
+{
+	int err;
+
+	err = get_mergeable_page_lock_mmap(tree_rmap_item);
+
+	if (err == -EINVAL) {
+		/* its page map has been changed, remove it */
+		remove_rmap_item_from_tree(tree_rmap_item);
+	}
+
+	/* The page is gotten and mmap_sem is locked now. */
+	return err;
+}
+
+
+/**
+ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
+ * same hash value. Get its page and trylock the mmap_sem
+ */
+static inline
+struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
+					      u32 hash)
+
+{
+	struct rb_node **new = &root_unstable_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct tree_node *tree_node;
+	u32 hash_max;
+	struct rmap_item *tree_rmap_item;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		/* got the tree_node */
+		if (tree_node->count == 1) {
+			tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
+						  struct rmap_item, node);
+			BUG_ON(!tree_rmap_item);
+
+			goto get_page_out;
+		}
+
+		/* well, search the collision subtree */
+		new = &tree_node->sub_root.rb_node;
+		BUG_ON(!*new);
+		hash_max = rmap_item_hash_max(rmap_item, hash);
+
+		while (*new) {
+			int cmp;
+
+			tree_rmap_item = rb_entry(*new, struct rmap_item,
+						  node);
+
+			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
+			parent = *new;
+			if (cmp < 0)
+				new = &parent->rb_left;
+			else if (cmp > 0)
+				new = &parent->rb_right;
+			else
+				goto get_page_out;
+		}
+	} else {
+		/* alloc a new tree_node */
+		tree_node = alloc_tree_node(&unstable_tree_node_list);
+		if (!tree_node)
+			return NULL;
+
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, &root_unstable_tree);
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+	/* did not found even in sub-tree */
+	rmap_item->tree_node = tree_node;
+	rmap_item->address |= UNSTABLE_FLAG;
+	rmap_item->hash_round = uksm_hash_round;
+	rb_link_node(&rmap_item->node, parent, new);
+	rb_insert_color(&rmap_item->node, &tree_node->sub_root);
+
+	uksm_pages_unshared++;
+	return NULL;
+
+get_page_out:
+	if (tree_rmap_item->page == rmap_item->page)
+		return NULL;
+
+	if (get_tree_rmap_item_page(tree_rmap_item))
+		return NULL;
+
+	return tree_rmap_item;
+}
+
+static void hold_anon_vma(struct rmap_item *rmap_item,
+			  struct anon_vma *anon_vma)
+{
+	rmap_item->anon_vma = anon_vma;
+	get_anon_vma(anon_vma);
+}
+
+
+/**
+ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
+ * ratio statistics is done in this function.
+ *
+ */
+static void stable_tree_append(struct rmap_item *rmap_item,
+			       struct stable_node *stable_node, int logdedup)
+{
+	struct node_vma *node_vma = NULL, *new_node_vma, *node_vma_cont = NULL;
+	unsigned long key = (unsigned long)rmap_item->slot;
+	unsigned long factor = rmap_item->slot->rung->step;
+
+	BUG_ON(!stable_node);
+	rmap_item->address |= STABLE_FLAG;
+
+	if (hlist_empty(&stable_node->hlist)) {
+		uksm_pages_shared++;
+		goto node_vma_new;
+	} else {
+		uksm_pages_sharing++;
+	}
+
+	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
+		if (node_vma->key >= key)
+			break;
+
+		if (logdedup) {
+			node_vma->slot->pages_bemerged += factor;
+			if (list_empty(&node_vma->slot->dedup_list))
+				list_add(&node_vma->slot->dedup_list,
+					 &vma_slot_dedup);
+		}
+	}
+
+	if (node_vma) {
+		if (node_vma->key == key) {
+			node_vma_cont = hlist_entry_safe(node_vma->hlist.next, struct node_vma, hlist);
+			goto node_vma_ok;
+		} else if (node_vma->key > key) {
+			node_vma_cont = node_vma;
+		}
+	}
+
+node_vma_new:
+	/* no same vma already in node, alloc a new node_vma */
+	new_node_vma = alloc_node_vma();
+	BUG_ON(!new_node_vma);
+	new_node_vma->head = stable_node;
+	new_node_vma->slot = rmap_item->slot;
+
+	if (!node_vma) {
+		hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
+	} else if (node_vma->key != key) {
+		if (node_vma->key < key)
+			hlist_add_behind(&new_node_vma->hlist, &node_vma->hlist);
+		else {
+			hlist_add_before(&new_node_vma->hlist,
+					 &node_vma->hlist);
+		}
+
+	}
+	node_vma = new_node_vma;
+
+node_vma_ok: /* ok, ready to add to the list */
+	rmap_item->head = node_vma;
+	hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
+	hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
+	if (logdedup) {
+		rmap_item->slot->pages_merged++;
+		if (node_vma_cont) {
+			node_vma = node_vma_cont;
+			hlist_for_each_entry_continue(node_vma, hlist) {
+				node_vma->slot->pages_bemerged += factor;
+				if (list_empty(&node_vma->slot->dedup_list))
+					list_add(&node_vma->slot->dedup_list,
+						 &vma_slot_dedup);
+			}
+		}
+	}
+}
+
+/*
+ * We use break_ksm to break COW on a ksm page: it's a stripped down
+ *
+ *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
+ *		put_page(page);
+ *
+ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
+ * in case the application has unmapped and remapped mm,addr meanwhile.
+ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
+ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
+ */
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct page *page;
+	int ret = 0;
+
+	do {
+		cond_resched();
+		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
+		if (IS_ERR_OR_NULL(page))
+			break;
+		if (PageKsm(page)) {
+			ret = handle_mm_fault(vma, addr,
+					      FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
+		} else
+			ret = VM_FAULT_WRITE;
+		put_page(page);
+	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
+	/*
+	 * We must loop because handle_mm_fault() may back out if there's
+	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
+	 *
+	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
+	 * COW has been broken, even if the vma does not permit VM_WRITE;
+	 * but note that a concurrent fault might break PageKsm for us.
+	 *
+	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
+	 * backing file, which also invalidates anonymous pages: that's
+	 * okay, that truncation will have unmapped the PageKsm for us.
+	 *
+	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
+	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
+	 * current task has TIF_MEMDIE set, and will be OOM killed on return
+	 * to user; and ksmd, having no mm, would never be chosen for that.
+	 *
+	 * But if the mm is in a limited mem_cgroup, then the fault may fail
+	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
+	 * even ksmd can fail in this way - though it's usually breaking ksm
+	 * just to undo a merge it made a moment before, so unlikely to oom.
+	 *
+	 * That's a pity: we might therefore have more kernel pages allocated
+	 * than we're counting as nodes in the stable tree; but uksm_do_scan
+	 * will retry to break_cow on each pass, so should recover the page
+	 * in due course.  The important thing is to not let VM_MERGEABLE
+	 * be cleared while any such pages might remain in the area.
+	 */
+	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
+}
+
+static void break_cow(struct rmap_item *rmap_item)
+{
+	struct vm_area_struct *vma = rmap_item->slot->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr = get_rmap_addr(rmap_item);
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	break_ksm(vma, addr);
+out:
+	return;
+}
+
+/*
+ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
+ * than check every pte of a given vma, the locking doesn't quite work for
+ * that - an rmap_item is assigned to the stable tree after inserting ksm
+ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
+ * rmap_items from parent to child at fork time (so as not to waste time
+ * if exit comes before the next scan reaches it).
+ *
+ * Similarly, although we'd like to remove rmap_items (so updating counts
+ * and freeing memory) when unmerging an area, it's easier to leave that
+ * to the next pass of ksmd - consider, for example, how ksmd might be
+ * in cmp_and_merge_page on one of the rmap_items we would be removing.
+ */
+inline int unmerge_uksm_pages(struct vm_area_struct *vma,
+		      unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	int err = 0;
+
+	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (uksm_test_exit(vma->vm_mm))
+			break;
+		if (signal_pending(current))
+			err = -ERESTARTSYS;
+		else
+			err = break_ksm(vma, addr);
+	}
+	return err;
+}
+
+static inline void inc_uksm_pages_scanned(void)
+{
+	u64 delta;
+
+
+	if (uksm_pages_scanned == U64_MAX) {
+		encode_benefit();
+
+		delta = uksm_pages_scanned >> pages_scanned_base;
+
+		if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
+			pages_scanned_stored >>= 1;
+			delta >>= 1;
+			pages_scanned_base++;
+		}
+
+		pages_scanned_stored += delta;
+
+		uksm_pages_scanned = uksm_pages_scanned_last = 0;
+	}
+
+	uksm_pages_scanned++;
+}
+
+static inline int find_zero_page_hash(int strength, u32 hash)
+{
+	return (zero_hash_table[strength] == hash);
+}
+
+static
+int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
+{
+	struct page *zero_page = empty_uksm_zero_page;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t orig_pte = __pte(0);
+	int err = -EFAULT;
+
+	if (uksm_test_exit(mm))
+		goto out;
+
+	if (!trylock_page(page))
+		goto out;
+
+	if (!PageAnon(page))
+		goto out_unlock;
+
+	if (PageTransCompound(page)) {
+		err = split_huge_page(page);
+		if (err)
+			goto out_unlock;
+	}
+
+	if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
+		if (is_page_full_zero(page))
+			err = replace_page(vma, page, zero_page, orig_pte);
+	}
+
+out_unlock:
+	unlock_page(page);
+out:
+	return err;
+}
+
+/*
+ * cmp_and_merge_page() - first see if page can be merged into the stable
+ * tree; if not, compare hash to previous and if it's the same, see if page
+ * can be inserted into the unstable tree, or merged with a page already there
+ * and both transferred to the stable tree.
+ *
+ * @page: the page that we are searching identical page to.
+ * @rmap_item: the reverse mapping into the virtual address of this page
+ */
+static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
+{
+	struct rmap_item *tree_rmap_item;
+	struct page *page;
+	struct page *kpage = NULL;
+	u32 hash_max;
+	int err;
+	unsigned int success1, success2;
+	struct stable_node *snode;
+	int cmp;
+	struct rb_node *parent = NULL, **new;
+
+	remove_rmap_item_from_tree(rmap_item);
+	page = rmap_item->page;
+
+	/* We first start with searching the page inside the stable tree */
+	kpage = stable_tree_search(rmap_item, hash);
+	if (kpage) {
+		err = try_to_merge_with_uksm_page(rmap_item, kpage,
+						 hash);
+		if (!err) {
+			/*
+			 * The page was successfully merged, add
+			 * its rmap_item to the stable tree.
+			 * page lock is needed because it's
+			 * racing with try_to_unmap_ksm(), etc.
+			 */
+			lock_page(kpage);
+			snode = page_stable_node(kpage);
+			stable_tree_append(rmap_item, snode, 1);
+			unlock_page(kpage);
+			put_page(kpage);
+			return; /* success */
+		}
+		put_page(kpage);
+
+		/*
+		 * if it's a collision and it has been search in sub-rbtree
+		 * (hash_max != 0), we want to abort, because if it is
+		 * successfully merged in unstable tree, the collision trends to
+		 * happen again.
+		 */
+		if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
+			return;
+	}
+
+	tree_rmap_item =
+		unstable_tree_search_insert(rmap_item, hash);
+	if (tree_rmap_item) {
+		err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
+		/*
+		 * As soon as we merge this page, we want to remove the
+		 * rmap_item of the page we have merged with from the unstable
+		 * tree, and insert it instead as new node in the stable tree.
+		 */
+		if (!err) {
+			kpage = page;
+			remove_rmap_item_from_tree(tree_rmap_item);
+			lock_page(kpage);
+			snode = stable_tree_insert(&kpage, hash,
+						   rmap_item, tree_rmap_item,
+						   &success1, &success2);
+
+			/*
+			 * Do not log dedup for tree item, it's not counted as
+			 * scanned in this round.
+			 */
+			if (success2)
+				stable_tree_append(tree_rmap_item, snode, 0);
+
+			/*
+			 * The order of these two stable append is important:
+			 * we are scanning rmap_item.
+			 */
+			if (success1)
+				stable_tree_append(rmap_item, snode, 1);
+
+			/*
+			 * The original kpage may be unlocked inside
+			 * stable_tree_insert() already. This page
+			 * should be unlocked before doing
+			 * break_cow().
+			 */
+			unlock_page(kpage);
+
+			if (!success1)
+				break_cow(rmap_item);
+
+			if (!success2)
+				break_cow(tree_rmap_item);
+
+		} else if (err == MERGE_ERR_COLLI) {
+			BUG_ON(tree_rmap_item->tree_node->count > 1);
+
+			rmap_item_hash_max(tree_rmap_item,
+					   tree_rmap_item->tree_node->hash);
+
+			hash_max = rmap_item_hash_max(rmap_item, hash);
+			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
+			parent = &tree_rmap_item->node;
+			if (cmp < 0)
+				new = &parent->rb_left;
+			else if (cmp > 0)
+				new = &parent->rb_right;
+			else
+				goto put_up_out;
+
+			rmap_item->tree_node = tree_rmap_item->tree_node;
+			rmap_item->address |= UNSTABLE_FLAG;
+			rmap_item->hash_round = uksm_hash_round;
+			rb_link_node(&rmap_item->node, parent, new);
+			rb_insert_color(&rmap_item->node,
+					&tree_rmap_item->tree_node->sub_root);
+			rmap_item->tree_node->count++;
+		} else {
+			/*
+			 * either one of the page has changed or they collide
+			 * at the max hash, we consider them as ill items.
+			 */
+			remove_rmap_item_from_tree(tree_rmap_item);
+		}
+put_up_out:
+		put_page(tree_rmap_item->page);
+		up_read(&tree_rmap_item->slot->vma->vm_mm->mmap_sem);
+	}
+}
+
+
+
+
+static inline unsigned long get_pool_index(struct vma_slot *slot,
+					   unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
+	if (pool_index >= slot->pool_size)
+		BUG();
+	return pool_index;
+}
+
+static inline unsigned long index_page_offset(unsigned long index)
+{
+	return offset_in_page(sizeof(struct rmap_list_entry *) * index);
+}
+
+static inline
+struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
+					    unsigned long index, int need_alloc)
+{
+	unsigned long pool_index;
+	struct page *page;
+	void *addr;
+
+
+	pool_index = get_pool_index(slot, index);
+	if (!slot->rmap_list_pool[pool_index]) {
+		if (!need_alloc)
+			return NULL;
+
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
+		if (!page)
+			return NULL;
+
+		slot->rmap_list_pool[pool_index] = page;
+	}
+
+	addr = kmap(slot->rmap_list_pool[pool_index]);
+	addr += index_page_offset(index);
+
+	return addr;
+}
+
+static inline void put_rmap_list_entry(struct vma_slot *slot,
+				       unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	kunmap(slot->rmap_list_pool[pool_index]);
+}
+
+static inline int entry_is_new(struct rmap_list_entry *entry)
+{
+	return !entry->item;
+}
+
+static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
+						unsigned long index)
+{
+	return slot->vma->vm_start + (index << PAGE_SHIFT);
+}
+
+static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
+{
+	unsigned long addr;
+
+	if (is_addr(entry->addr))
+		addr = get_clean_addr(entry->addr);
+	else if (entry->item)
+		addr = get_rmap_addr(entry->item);
+	else
+		BUG();
+
+	return addr;
+}
+
+static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
+{
+	if (is_addr(entry->addr))
+		return NULL;
+
+	return entry->item;
+}
+
+static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
+					    unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	slot->pool_counts[pool_index]++;
+}
+
+static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
+					    unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	BUG_ON(!slot->rmap_list_pool[pool_index]);
+	BUG_ON(!slot->pool_counts[pool_index]);
+	slot->pool_counts[pool_index]--;
+}
+
+static inline int entry_has_rmap(struct rmap_list_entry *entry)
+{
+	return !is_addr(entry->addr) && entry->item;
+}
+
+static inline void swap_entries(struct rmap_list_entry *entry1,
+				unsigned long index1,
+				struct rmap_list_entry *entry2,
+				unsigned long index2)
+{
+	struct rmap_list_entry tmp;
+
+	/* swapping two new entries is meaningless */
+	BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
+
+	tmp = *entry1;
+	*entry1 = *entry2;
+	*entry2 = tmp;
+
+	if (entry_has_rmap(entry1))
+		entry1->item->entry_index = index1;
+
+	if (entry_has_rmap(entry2))
+		entry2->item->entry_index = index2;
+
+	if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
+		inc_rmap_list_pool_count(entry1->item->slot, index1);
+		dec_rmap_list_pool_count(entry1->item->slot, index2);
+	} else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
+		inc_rmap_list_pool_count(entry2->item->slot, index2);
+		dec_rmap_list_pool_count(entry2->item->slot, index1);
+	}
+}
+
+static inline void free_entry_item(struct rmap_list_entry *entry)
+{
+	unsigned long index;
+	struct rmap_item *item;
+
+	if (!is_addr(entry->addr)) {
+		BUG_ON(!entry->item);
+		item = entry->item;
+		entry->addr = get_rmap_addr(item);
+		set_is_addr(entry->addr);
+		index = item->entry_index;
+		remove_rmap_item_from_tree(item);
+		dec_rmap_list_pool_count(item->slot, index);
+		free_rmap_item(item);
+	}
+}
+
+static inline int pool_entry_boundary(unsigned long index)
+{
+	unsigned long linear_addr;
+
+	linear_addr = sizeof(struct rmap_list_entry *) * index;
+	return index && !offset_in_page(linear_addr);
+}
+
+static inline void try_free_last_pool(struct vma_slot *slot,
+				      unsigned long index)
+{
+	unsigned long pool_index;
+
+	pool_index = get_pool_index(slot, index);
+	if (slot->rmap_list_pool[pool_index] &&
+	    !slot->pool_counts[pool_index]) {
+		__free_page(slot->rmap_list_pool[pool_index]);
+		slot->rmap_list_pool[pool_index] = NULL;
+		slot->flags |= UKSM_SLOT_NEED_SORT;
+	}
+
+}
+
+static inline unsigned long vma_item_index(struct vm_area_struct *vma,
+					   struct rmap_item *item)
+{
+	return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
+}
+
+static int within_same_pool(struct vma_slot *slot,
+			    unsigned long i, unsigned long j)
+{
+	unsigned long pool_i, pool_j;
+
+	pool_i = get_pool_index(slot, i);
+	pool_j = get_pool_index(slot, j);
+
+	return (pool_i == pool_j);
+}
+
+static void sort_rmap_entry_list(struct vma_slot *slot)
+{
+	unsigned long i, j;
+	struct rmap_list_entry *entry, *swap_entry;
+
+	entry = get_rmap_list_entry(slot, 0, 0);
+	for (i = 0; i < slot->pages; ) {
+
+		if (!entry)
+			goto skip_whole_pool;
+
+		if (entry_is_new(entry))
+			goto next_entry;
+
+		if (is_addr(entry->addr)) {
+			entry->addr = 0;
+			goto next_entry;
+		}
+
+		j = vma_item_index(slot->vma, entry->item);
+		if (j == i)
+			goto next_entry;
+
+		if (within_same_pool(slot, i, j))
+			swap_entry = entry + j - i;
+		else
+			swap_entry = get_rmap_list_entry(slot, j, 1);
+
+		swap_entries(entry, i, swap_entry, j);
+		if (!within_same_pool(slot, i, j))
+			put_rmap_list_entry(slot, j);
+		continue;
+
+skip_whole_pool:
+		i += PAGE_SIZE / sizeof(*entry);
+		if (i < slot->pages)
+			entry = get_rmap_list_entry(slot, i, 0);
+		continue;
+
+next_entry:
+		if (i >= slot->pages - 1 ||
+		    !within_same_pool(slot, i, i + 1)) {
+			put_rmap_list_entry(slot, i);
+			if (i + 1 < slot->pages)
+				entry = get_rmap_list_entry(slot, i + 1, 0);
+		} else
+			entry++;
+		i++;
+		continue;
+	}
+
+	/* free empty pool entries which contain no rmap_item */
+	/* CAN be simplied to based on only pool_counts when bug freed !!!!! */
+	for (i = 0; i < slot->pool_size; i++) {
+		unsigned char has_rmap;
+		void *addr;
+
+		if (!slot->rmap_list_pool[i])
+			continue;
+
+		has_rmap = 0;
+		addr = kmap(slot->rmap_list_pool[i]);
+		BUG_ON(!addr);
+		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
+			entry = (struct rmap_list_entry *)addr + j;
+			if (is_addr(entry->addr))
+				continue;
+			if (!entry->item)
+				continue;
+			has_rmap = 1;
+		}
+		kunmap(slot->rmap_list_pool[i]);
+		if (!has_rmap) {
+			BUG_ON(slot->pool_counts[i]);
+			__free_page(slot->rmap_list_pool[i]);
+			slot->rmap_list_pool[i] = NULL;
+		}
+	}
+
+	slot->flags &= ~UKSM_SLOT_NEED_SORT;
+}
+
+/*
+ * vma_fully_scanned() - if all the pages in this slot have been scanned.
+ */
+static inline int vma_fully_scanned(struct vma_slot *slot)
+{
+	return slot->pages_scanned == slot->pages;
+}
+
+/**
+ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
+ * its random permutation. This function is embedded with the random
+ * permutation index management code.
+ */
+static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
+{
+	unsigned long rand_range, addr, swap_index, scan_index;
+	struct rmap_item *item = NULL;
+	struct rmap_list_entry *scan_entry, *swap_entry = NULL;
+	struct page *page;
+
+	scan_index = swap_index = slot->pages_scanned % slot->pages;
+
+	if (pool_entry_boundary(scan_index))
+		try_free_last_pool(slot, scan_index - 1);
+
+	if (vma_fully_scanned(slot)) {
+		if (slot->flags & UKSM_SLOT_NEED_SORT)
+			slot->flags |= UKSM_SLOT_NEED_RERAND;
+		else
+			slot->flags &= ~UKSM_SLOT_NEED_RERAND;
+		if (slot->flags & UKSM_SLOT_NEED_SORT)
+			sort_rmap_entry_list(slot);
+	}
+
+	scan_entry = get_rmap_list_entry(slot, scan_index, 1);
+	if (!scan_entry)
+		return NULL;
+
+	if (entry_is_new(scan_entry)) {
+		scan_entry->addr = get_index_orig_addr(slot, scan_index);
+		set_is_addr(scan_entry->addr);
+	}
+
+	if (slot->flags & UKSM_SLOT_NEED_RERAND) {
+		rand_range = slot->pages - scan_index;
+		BUG_ON(!rand_range);
+		swap_index = scan_index + (prandom_u32() % rand_range);
+	}
+
+	if (swap_index != scan_index) {
+		swap_entry = get_rmap_list_entry(slot, swap_index, 1);
+		if (entry_is_new(swap_entry)) {
+			swap_entry->addr = get_index_orig_addr(slot,
+							       swap_index);
+			set_is_addr(swap_entry->addr);
+		}
+		swap_entries(scan_entry, scan_index, swap_entry, swap_index);
+	}
+
+	addr = get_entry_address(scan_entry);
+	item = get_entry_item(scan_entry);
+	BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
+
+	page = follow_page(slot->vma, addr, FOLL_GET);
+	if (IS_ERR_OR_NULL(page))
+		goto nopage;
+
+	if (!PageAnon(page))
+		goto putpage;
+
+	/*check is zero_page pfn or uksm_zero_page*/
+	if ((page_to_pfn(page) == zero_pfn)
+			|| (page_to_pfn(page) == uksm_zero_pfn))
+		goto putpage;
+
+	flush_anon_page(slot->vma, page, addr);
+	flush_dcache_page(page);
+
+
+	*hash = page_hash(page, hash_strength, 1);
+	inc_uksm_pages_scanned();
+	/*if the page content all zero, re-map to zero-page*/
+	if (find_zero_page_hash(hash_strength, *hash)) {
+		if (!cmp_and_merge_zero_page(slot->vma, page)) {
+			slot->pages_merged++;
+
+			/* For full-zero pages, no need to create rmap item */
+			goto putpage;
+		} else {
+			inc_rshash_neg(memcmp_cost / 2);
+		}
+	}
+
+	if (!item) {
+		item = alloc_rmap_item();
+		if (item) {
+			/* It has already been zeroed */
+			item->slot = slot;
+			item->address = addr;
+			item->entry_index = scan_index;
+			scan_entry->item = item;
+			inc_rmap_list_pool_count(slot, scan_index);
+		} else
+			goto putpage;
+	}
+
+	BUG_ON(item->slot != slot);
+	/* the page may have changed */
+	item->page = page;
+	put_rmap_list_entry(slot, scan_index);
+	if (swap_entry)
+		put_rmap_list_entry(slot, swap_index);
+	return item;
+
+putpage:
+	put_page(page);
+	page = NULL;
+nopage:
+	/* no page, store addr back and free rmap_item if possible */
+	free_entry_item(scan_entry);
+	put_rmap_list_entry(slot, scan_index);
+	if (swap_entry)
+		put_rmap_list_entry(slot, swap_index);
+	return NULL;
+}
+
+static inline int in_stable_tree(struct rmap_item *rmap_item)
+{
+	return rmap_item->address & STABLE_FLAG;
+}
+
+/**
+ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
+ * mmap_sem locked.
+ */
+static noinline void scan_vma_one_page(struct vma_slot *slot)
+{
+	u32 hash;
+	struct mm_struct *mm;
+	struct rmap_item *rmap_item = NULL;
+	struct vm_area_struct *vma = slot->vma;
+
+	mm = vma->vm_mm;
+	BUG_ON(!mm);
+	BUG_ON(!slot);
+
+	rmap_item = get_next_rmap_item(slot, &hash);
+	if (!rmap_item)
+		goto out1;
+
+	if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
+		goto out2;
+
+	cmp_and_merge_page(rmap_item, hash);
+out2:
+	put_page(rmap_item->page);
+out1:
+	slot->pages_scanned++;
+	slot->this_sampled++;
+	if (slot->fully_scanned_round != fully_scanned_round)
+		scanned_virtual_pages++;
+
+	if (vma_fully_scanned(slot))
+		slot->fully_scanned_round = fully_scanned_round;
+}
+
+static inline unsigned long rung_get_pages(struct scan_rung *rung)
+{
+	struct slot_tree_node *node;
+
+	if (!rung->vma_root.rnode)
+		return 0;
+
+	node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
+
+	return node->size;
+}
+
+#define RUNG_SAMPLED_MIN	3
+
+static inline
+void uksm_calc_rung_step(struct scan_rung *rung,
+			 unsigned long page_time, unsigned long ratio)
+{
+	unsigned long sampled, pages;
+
+	/* will be fully scanned ? */
+	if (!rung->cover_msecs) {
+		rung->step = 1;
+		return;
+	}
+
+	sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
+		  * ratio / page_time;
+
+	/*
+	 *  Before we finsish a scan round and expensive per-round jobs,
+	 *  we need to have a chance to estimate the per page time. So
+	 *  the sampled number can not be too small.
+	 */
+	if (sampled < RUNG_SAMPLED_MIN)
+		sampled = RUNG_SAMPLED_MIN;
+
+	pages = rung_get_pages(rung);
+	if (likely(pages > sampled))
+		rung->step = pages / sampled;
+	else
+		rung->step = 1;
+}
+
+static inline int step_need_recalc(struct scan_rung *rung)
+{
+	unsigned long pages, stepmax;
+
+	pages = rung_get_pages(rung);
+	stepmax = pages / RUNG_SAMPLED_MIN;
+
+	return pages && (rung->step > pages ||
+			 (stepmax && rung->step > stepmax));
+}
+
+static inline
+void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
+{
+	struct vma_slot *slot;
+
+	if (finished)
+		rung->flags |= UKSM_RUNG_ROUND_FINISHED;
+
+	if (step_recalc || step_need_recalc(rung)) {
+		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
+		BUG_ON(step_need_recalc(rung));
+	}
+
+	slot_iter_index = prandom_u32() % rung->step;
+	BUG_ON(!rung->vma_root.rnode);
+	slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
+	BUG_ON(!slot);
+
+	rung->current_scan = slot;
+	rung->current_offset = slot_iter_index;
+}
+
+static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
+{
+	return &slot->rung->vma_root;
+}
+
+/*
+ * return if resetted.
+ */
+static int advance_current_scan(struct scan_rung *rung)
+{
+	unsigned short n;
+	struct vma_slot *slot, *next = NULL;
+
+	BUG_ON(!rung->vma_root.num);
+
+	slot = rung->current_scan;
+	n = (slot->pages - rung->current_offset) % rung->step;
+	slot_iter_index = rung->step - n;
+	next = sradix_tree_next(&rung->vma_root, slot->snode,
+				slot->sindex, slot_iter);
+
+	if (next) {
+		rung->current_offset = slot_iter_index;
+		rung->current_scan = next;
+		return 0;
+	} else {
+		reset_current_scan(rung, 1, 0);
+		return 1;
+	}
+}
+
+static inline void rung_rm_slot(struct vma_slot *slot)
+{
+	struct scan_rung *rung = slot->rung;
+	struct sradix_tree_root *root;
+
+	if (rung->current_scan == slot)
+		advance_current_scan(rung);
+
+	root = slot_get_root(slot);
+	sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
+	slot->snode = NULL;
+	if (step_need_recalc(rung)) {
+		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
+		BUG_ON(step_need_recalc(rung));
+	}
+
+	/* In case advance_current_scan loop back to this slot again */
+	if (rung->vma_root.num && rung->current_scan == slot)
+		reset_current_scan(slot->rung, 1, 0);
+}
+
+static inline void rung_add_new_slots(struct scan_rung *rung,
+			struct vma_slot **slots, unsigned long num)
+{
+	int err;
+	struct vma_slot *slot;
+	unsigned long i;
+	struct sradix_tree_root *root = &rung->vma_root;
+
+	err = sradix_tree_enter(root, (void **)slots, num);
+	BUG_ON(err);
+
+	for (i = 0; i < num; i++) {
+		slot = slots[i];
+		slot->rung = rung;
+		BUG_ON(vma_fully_scanned(slot));
+	}
+
+	if (rung->vma_root.num == num)
+		reset_current_scan(rung, 0, 1);
+}
+
+static inline int rung_add_one_slot(struct scan_rung *rung,
+				     struct vma_slot *slot)
+{
+	int err;
+
+	err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
+	if (err)
+		return err;
+
+	slot->rung = rung;
+	if (rung->vma_root.num == 1)
+		reset_current_scan(rung, 0, 1);
+
+	return 0;
+}
+
+/*
+ * Return true if the slot is deleted from its rung.
+ */
+static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
+{
+	struct scan_rung *old_rung = slot->rung;
+	int err;
+
+	if (old_rung == rung)
+		return 0;
+
+	rung_rm_slot(slot);
+	err = rung_add_one_slot(rung, slot);
+	if (err) {
+		err = rung_add_one_slot(old_rung, slot);
+		WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
+	}
+
+	return 1;
+}
+
+static inline int vma_rung_up(struct vma_slot *slot)
+{
+	struct scan_rung *rung;
+
+	rung = slot->rung;
+	if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
+		rung++;
+
+	return vma_rung_enter(slot, rung);
+}
+
+static inline int vma_rung_down(struct vma_slot *slot)
+{
+	struct scan_rung *rung;
+
+	rung = slot->rung;
+	if (slot->rung != &uksm_scan_ladder[0])
+		rung--;
+
+	return vma_rung_enter(slot, rung);
+}
+
+/**
+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
+ */
+static unsigned long cal_dedup_ratio(struct vma_slot *slot)
+{
+	unsigned long ret;
+	unsigned long pages;
+
+	pages = slot->this_sampled;
+	if (!pages)
+		return 0;
+
+	BUG_ON(slot->pages_scanned == slot->last_scanned);
+
+	ret = slot->pages_merged;
+
+	/* Thrashing area filtering */
+	if (ret && uksm_thrash_threshold) {
+		if (slot->pages_cowed * 100 / slot->pages_merged
+		    > uksm_thrash_threshold) {
+			ret = 0;
+		} else {
+			ret = slot->pages_merged - slot->pages_cowed;
+		}
+	}
+
+	return ret * 100 / pages;
+}
+
+/**
+ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
+ */
+static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
+{
+	unsigned long ret;
+	unsigned long pages;
+
+	pages = slot->pages;
+	if (!pages)
+		return 0;
+
+	ret = slot->pages_bemerged;
+
+	/* Thrashing area filtering */
+	if (ret && uksm_thrash_threshold) {
+		if (slot->pages_cowed * 100 / slot->pages_bemerged
+		    > uksm_thrash_threshold) {
+			ret = 0;
+		} else {
+			ret = slot->pages_bemerged - slot->pages_cowed;
+		}
+	}
+
+	return ret * 100 / pages;
+}
+
+/**
+ * stable_node_reinsert() - When the hash_strength has been adjusted, the
+ * stable tree need to be restructured, this is the function re-inserting the
+ * stable node.
+ */
+static inline void stable_node_reinsert(struct stable_node *new_node,
+					struct page *page,
+					struct rb_root *root_treep,
+					struct list_head *tree_node_listp,
+					u32 hash)
+{
+	struct rb_node **new = &root_treep->rb_node;
+	struct rb_node *parent = NULL;
+	struct stable_node *stable_node;
+	struct tree_node *tree_node;
+	struct page *tree_page;
+	int cmp;
+
+	while (*new) {
+		int cmp;
+
+		tree_node = rb_entry(*new, struct tree_node, node);
+
+		cmp = hash_cmp(hash, tree_node->hash);
+
+		if (cmp < 0) {
+			parent = *new;
+			new = &parent->rb_left;
+		} else if (cmp > 0) {
+			parent = *new;
+			new = &parent->rb_right;
+		} else
+			break;
+	}
+
+	if (*new) {
+		/* find a stable tree node with same first level hash value */
+		stable_node_hash_max(new_node, page, hash);
+		if (tree_node->count == 1) {
+			stable_node = rb_entry(tree_node->sub_root.rb_node,
+					       struct stable_node, node);
+			tree_page = get_uksm_page(stable_node, 1, 0);
+			if (tree_page) {
+				stable_node_hash_max(stable_node,
+						      tree_page, hash);
+				put_page(tree_page);
+
+				/* prepare for stable node insertion */
+
+				cmp = hash_cmp(new_node->hash_max,
+						   stable_node->hash_max);
+				parent = &stable_node->node;
+				if (cmp < 0)
+					new = &parent->rb_left;
+				else if (cmp > 0)
+					new = &parent->rb_right;
+				else
+					goto failed;
+
+				goto add_node;
+			} else {
+				/* the only stable_node deleted, the tree node
+				 * was not deleted.
+				 */
+				goto tree_node_reuse;
+			}
+		}
+
+		/* well, search the collision subtree */
+		new = &tree_node->sub_root.rb_node;
+		parent = NULL;
+		BUG_ON(!*new);
+		while (*new) {
+			int cmp;
+
+			stable_node = rb_entry(*new, struct stable_node, node);
+
+			cmp = hash_cmp(new_node->hash_max,
+					   stable_node->hash_max);
+
+			if (cmp < 0) {
+				parent = *new;
+				new = &parent->rb_left;
+			} else if (cmp > 0) {
+				parent = *new;
+				new = &parent->rb_right;
+			} else {
+				/* oh, no, still a collision */
+				goto failed;
+			}
+		}
+
+		goto add_node;
+	}
+
+	/* no tree node found */
+	tree_node = alloc_tree_node(tree_node_listp);
+	if (!tree_node) {
+		pr_err("UKSM: memory allocation error!\n");
+		goto failed;
+	} else {
+		tree_node->hash = hash;
+		rb_link_node(&tree_node->node, parent, new);
+		rb_insert_color(&tree_node->node, root_treep);
+
+tree_node_reuse:
+		/* prepare for stable node insertion */
+		parent = NULL;
+		new = &tree_node->sub_root.rb_node;
+	}
+
+add_node:
+	rb_link_node(&new_node->node, parent, new);
+	rb_insert_color(&new_node->node, &tree_node->sub_root);
+	new_node->tree_node = tree_node;
+	tree_node->count++;
+	return;
+
+failed:
+	/* This can only happen when two nodes have collided
+	 * in two levels.
+	 */
+	new_node->tree_node = NULL;
+	return;
+}
+
+static inline void free_all_tree_nodes(struct list_head *list)
+{
+	struct tree_node *node, *tmp;
+
+	list_for_each_entry_safe(node, tmp, list, all_list) {
+		free_tree_node(node);
+	}
+}
+
+/**
+ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
+ * strength to the current hash_strength. It re-structures the hole tree.
+ */
+static inline void stable_tree_delta_hash(u32 prev_hash_strength)
+{
+	struct stable_node *node, *tmp;
+	struct rb_root *root_new_treep;
+	struct list_head *new_tree_node_listp;
+
+	stable_tree_index = (stable_tree_index + 1) % 2;
+	root_new_treep = &root_stable_tree[stable_tree_index];
+	new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
+	*root_new_treep = RB_ROOT;
+	BUG_ON(!list_empty(new_tree_node_listp));
+
+	/*
+	 * we need to be safe, the node could be removed by get_uksm_page()
+	 */
+	list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
+		void *addr;
+		struct page *node_page;
+		u32 hash;
+
+		/*
+		 * We are completely re-structuring the stable nodes to a new
+		 * stable tree. We don't want to touch the old tree unlinks and
+		 * old tree_nodes. The old tree_nodes will be freed at once.
+		 */
+		node_page = get_uksm_page(node, 0, 0);
+		if (!node_page)
+			continue;
+
+		if (node->tree_node) {
+			hash = node->tree_node->hash;
+
+			addr = kmap_atomic(node_page);
+
+			hash = delta_hash(addr, prev_hash_strength,
+					  hash_strength, hash);
+			kunmap_atomic(addr);
+		} else {
+			/*
+			 *it was not inserted to rbtree due to collision in last
+			 *round scan.
+			 */
+			hash = page_hash(node_page, hash_strength, 0);
+		}
+
+		stable_node_reinsert(node, node_page, root_new_treep,
+				     new_tree_node_listp, hash);
+		put_page(node_page);
+	}
+
+	root_stable_treep = root_new_treep;
+	free_all_tree_nodes(stable_tree_node_listp);
+	BUG_ON(!list_empty(stable_tree_node_listp));
+	stable_tree_node_listp = new_tree_node_listp;
+}
+
+static inline void inc_hash_strength(unsigned long delta)
+{
+	hash_strength += 1 << delta;
+	if (hash_strength > HASH_STRENGTH_MAX)
+		hash_strength = HASH_STRENGTH_MAX;
+}
+
+static inline void dec_hash_strength(unsigned long delta)
+{
+	unsigned long change = 1 << delta;
+
+	if (hash_strength <= change + 1)
+		hash_strength = 1;
+	else
+		hash_strength -= change;
+}
+
+static inline void inc_hash_strength_delta(void)
+{
+	hash_strength_delta++;
+	if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
+		hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
+}
+
+static inline unsigned long get_current_neg_ratio(void)
+{
+	u64 pos = benefit.pos;
+	u64 neg = benefit.neg;
+
+	if (!neg)
+		return 0;
+
+	if (!pos || neg > pos)
+		return 100;
+
+	if (neg > div64_u64(U64_MAX, 100))
+		pos = div64_u64(pos, 100);
+	else
+		neg *= 100;
+
+	return div64_u64(neg, pos);
+}
+
+static inline unsigned long get_current_benefit(void)
+{
+	u64 pos = benefit.pos;
+	u64 neg = benefit.neg;
+	u64 scanned = benefit.scanned;
+
+	if (neg > pos)
+		return 0;
+
+	return div64_u64((pos - neg), scanned);
+}
+
+static inline int judge_rshash_direction(void)
+{
+	u64 current_neg_ratio, stable_benefit;
+	u64 current_benefit, delta = 0;
+	int ret = STILL;
+
+	/*
+	 * Try to probe a value after the boot, and in case the system
+	 * are still for a long time.
+	 */
+	if ((fully_scanned_round & 0xFFULL) == 10) {
+		ret = OBSCURE;
+		goto out;
+	}
+
+	current_neg_ratio = get_current_neg_ratio();
+
+	if (current_neg_ratio == 0) {
+		rshash_neg_cont_zero++;
+		if (rshash_neg_cont_zero > 2)
+			return GO_DOWN;
+		else
+			return STILL;
+	}
+	rshash_neg_cont_zero = 0;
+
+	if (current_neg_ratio > 90) {
+		ret = GO_UP;
+		goto out;
+	}
+
+	current_benefit = get_current_benefit();
+	stable_benefit = rshash_state.stable_benefit;
+
+	if (!stable_benefit) {
+		ret = OBSCURE;
+		goto out;
+	}
+
+	if (current_benefit > stable_benefit)
+		delta = current_benefit - stable_benefit;
+	else if (current_benefit < stable_benefit)
+		delta = stable_benefit - current_benefit;
+
+	delta = div64_u64(100 * delta, stable_benefit);
+
+	if (delta > 50) {
+		rshash_cont_obscure++;
+		if (rshash_cont_obscure > 2)
+			return OBSCURE;
+		else
+			return STILL;
+	}
+
+out:
+	rshash_cont_obscure = 0;
+	return ret;
+}
+
+/**
+ * rshash_adjust() - The main function to control the random sampling state
+ * machine for hash strength adapting.
+ *
+ * return true if hash_strength has changed.
+ */
+static inline int rshash_adjust(void)
+{
+	unsigned long prev_hash_strength = hash_strength;
+
+	if (!encode_benefit())
+		return 0;
+
+	switch (rshash_state.state) {
+	case RSHASH_STILL:
+		switch (judge_rshash_direction()) {
+		case GO_UP:
+			if (rshash_state.pre_direct == GO_DOWN)
+				hash_strength_delta = 0;
+
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.stable_benefit = get_current_benefit();
+			rshash_state.pre_direct = GO_UP;
+			break;
+
+		case GO_DOWN:
+			if (rshash_state.pre_direct == GO_UP)
+				hash_strength_delta = 0;
+
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.stable_benefit = get_current_benefit();
+			rshash_state.pre_direct = GO_DOWN;
+			break;
+
+		case OBSCURE:
+			rshash_state.stable_point = hash_strength;
+			rshash_state.turn_point_down = hash_strength;
+			rshash_state.turn_point_up = hash_strength;
+			rshash_state.turn_benefit_down = get_current_benefit();
+			rshash_state.turn_benefit_up = get_current_benefit();
+			rshash_state.lookup_window_index = 0;
+			rshash_state.state = RSHASH_TRYDOWN;
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			break;
+
+		case STILL:
+			break;
+		default:
+			BUG();
+		}
+		break;
+
+	case RSHASH_TRYDOWN:
+		if (rshash_state.lookup_window_index++ % 5 == 0)
+			rshash_state.below_count = 0;
+
+		if (get_current_benefit() < rshash_state.stable_benefit)
+			rshash_state.below_count++;
+		else if (get_current_benefit() >
+			 rshash_state.turn_benefit_down) {
+			rshash_state.turn_point_down = hash_strength;
+			rshash_state.turn_benefit_down = get_current_benefit();
+		}
+
+		if (rshash_state.below_count >= 3 ||
+		    judge_rshash_direction() == GO_UP ||
+		    hash_strength == 1) {
+			hash_strength = rshash_state.stable_point;
+			hash_strength_delta = 0;
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+			rshash_state.lookup_window_index = 0;
+			rshash_state.state = RSHASH_TRYUP;
+			hash_strength_delta = 0;
+		} else {
+			dec_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+		}
+		break;
+
+	case RSHASH_TRYUP:
+		if (rshash_state.lookup_window_index++ % 5 == 0)
+			rshash_state.below_count = 0;
+
+		if (get_current_benefit() < rshash_state.turn_benefit_down)
+			rshash_state.below_count++;
+		else if (get_current_benefit() > rshash_state.turn_benefit_up) {
+			rshash_state.turn_point_up = hash_strength;
+			rshash_state.turn_benefit_up = get_current_benefit();
+		}
+
+		if (rshash_state.below_count >= 3 ||
+		    judge_rshash_direction() == GO_DOWN ||
+		    hash_strength == HASH_STRENGTH_MAX) {
+			hash_strength = rshash_state.turn_benefit_up >
+				rshash_state.turn_benefit_down ?
+				rshash_state.turn_point_up :
+				rshash_state.turn_point_down;
+
+			rshash_state.state = RSHASH_PRE_STILL;
+		} else {
+			inc_hash_strength(hash_strength_delta);
+			inc_hash_strength_delta();
+		}
+
+		break;
+
+	case RSHASH_NEW:
+	case RSHASH_PRE_STILL:
+		rshash_state.stable_benefit = get_current_benefit();
+		rshash_state.state = RSHASH_STILL;
+		hash_strength_delta = 0;
+		break;
+	default:
+		BUG();
+	}
+
+	/* rshash_neg = rshash_pos = 0; */
+	reset_benefit();
+
+	if (prev_hash_strength != hash_strength)
+		stable_tree_delta_hash(prev_hash_strength);
+
+	return prev_hash_strength != hash_strength;
+}
+
+/**
+ * round_update_ladder() - The main function to do update of all the
+ * adjustments whenever a scan round is finished.
+ */
+static noinline void round_update_ladder(void)
+{
+	int i;
+	unsigned long dedup;
+	struct vma_slot *slot, *tmp_slot;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++)
+		uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
+
+	list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
+
+		/* slot may be rung_rm_slot() when mm exits */
+		if (slot->snode) {
+			dedup = cal_dedup_ratio_old(slot);
+			if (dedup && dedup >= uksm_abundant_threshold)
+				vma_rung_up(slot);
+		}
+
+		slot->pages_bemerged = 0;
+		slot->pages_cowed = 0;
+
+		list_del_init(&slot->dedup_list);
+	}
+}
+
+static void uksm_del_vma_slot(struct vma_slot *slot)
+{
+	int i, j;
+	struct rmap_list_entry *entry;
+
+	if (slot->snode) {
+		/*
+		 * In case it just failed when entering the rung, it's not
+		 * necessary.
+		 */
+		rung_rm_slot(slot);
+	}
+
+	if (!list_empty(&slot->dedup_list))
+		list_del(&slot->dedup_list);
+
+	if (!slot->rmap_list_pool || !slot->pool_counts) {
+		/* In case it OOMed in uksm_vma_enter() */
+		goto out;
+	}
+
+	for (i = 0; i < slot->pool_size; i++) {
+		void *addr;
+
+		if (!slot->rmap_list_pool[i])
+			continue;
+
+		addr = kmap(slot->rmap_list_pool[i]);
+		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
+			entry = (struct rmap_list_entry *)addr + j;
+			if (is_addr(entry->addr))
+				continue;
+			if (!entry->item)
+				continue;
+
+			remove_rmap_item_from_tree(entry->item);
+			free_rmap_item(entry->item);
+			slot->pool_counts[i]--;
+		}
+		BUG_ON(slot->pool_counts[i]);
+		kunmap(slot->rmap_list_pool[i]);
+		__free_page(slot->rmap_list_pool[i]);
+	}
+	kfree(slot->rmap_list_pool);
+	kfree(slot->pool_counts);
+
+out:
+	slot->rung = NULL;
+	if (slot->flags & UKSM_SLOT_IN_UKSM) {
+		BUG_ON(uksm_pages_total < slot->pages);
+		uksm_pages_total -= slot->pages;
+	}
+
+	if (slot->fully_scanned_round == fully_scanned_round)
+		scanned_virtual_pages -= slot->pages;
+	else
+		scanned_virtual_pages -= slot->pages_scanned;
+	free_vma_slot(slot);
+}
+
+
+#define SPIN_LOCK_PERIOD	32
+static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
+static inline void cleanup_vma_slots(void)
+{
+	struct vma_slot *slot;
+	int i;
+
+	i = 0;
+	spin_lock(&vma_slot_list_lock);
+	while (!list_empty(&vma_slot_del)) {
+		slot = list_entry(vma_slot_del.next,
+				  struct vma_slot, slot_list);
+		list_del(&slot->slot_list);
+		cleanup_slots[i++] = slot;
+		if (i == SPIN_LOCK_PERIOD) {
+			spin_unlock(&vma_slot_list_lock);
+			while (--i >= 0)
+				uksm_del_vma_slot(cleanup_slots[i]);
+			i = 0;
+			spin_lock(&vma_slot_list_lock);
+		}
+	}
+	spin_unlock(&vma_slot_list_lock);
+
+	while (--i >= 0)
+		uksm_del_vma_slot(cleanup_slots[i]);
+}
+
+/*
+ * Expotional moving average formula
+ */
+static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
+{
+	/*
+	 * For a very high burst, even the ema cannot work well, a false very
+	 * high per-page time estimation can result in feedback in very high
+	 * overhead of context switch and rung update -- this will then lead
+	 * to higher per-paper time, this may not converge.
+	 *
+	 * Instead, we try to approach this value in a binary manner.
+	 */
+	if (curr > last_ema * 10)
+		return last_ema * 2;
+
+	return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
+}
+
+/*
+ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
+ * nanoseconds based on current uksm_sleep_jiffies.
+ */
+static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
+{
+	return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
+		(TIME_RATIO_SCALE - ratio) * ratio;
+}
+
+
+static inline unsigned long rung_real_ratio(int cpu_time_ratio)
+{
+	unsigned long ret;
+
+	BUG_ON(!cpu_time_ratio);
+
+	if (cpu_time_ratio > 0)
+		ret = cpu_time_ratio;
+	else
+		ret = (unsigned long)(-cpu_time_ratio) *
+			uksm_max_cpu_percentage / 100UL;
+
+	return ret ? ret : 1;
+}
+
+static noinline void uksm_calc_scan_pages(void)
+{
+	struct scan_rung *ladder = uksm_scan_ladder;
+	unsigned long sleep_usecs, nsecs;
+	unsigned long ratio;
+	int i;
+	unsigned long per_page;
+
+	if (uksm_ema_page_time > 100000 ||
+	    (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
+		uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
+
+	per_page = uksm_ema_page_time;
+	BUG_ON(!per_page);
+
+	/*
+	 * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
+	 * based on saved user input.
+	 */
+	if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
+		uksm_sleep_jiffies = uksm_sleep_saved;
+
+	/* We require a rung scan at least 1 page in a period. */
+	nsecs = per_page;
+	ratio = rung_real_ratio(ladder[0].cpu_ratio);
+	if (cpu_ratio_to_nsec(ratio) < nsecs) {
+		sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
+				/ NSEC_PER_USEC;
+		uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		ratio = rung_real_ratio(ladder[i].cpu_ratio);
+		ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
+					per_page;
+		BUG_ON(!ladder[i].pages_to_scan);
+		uksm_calc_rung_step(&ladder[i], per_page, ratio);
+	}
+}
+
+/*
+ * From the scan time of this round (ns) to next expected min sleep time
+ * (ms), be careful of the possible overflows. ratio is taken from
+ * rung_real_ratio()
+ */
+static inline
+unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
+{
+	scan_time >>= 20; /* to msec level now */
+	BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
+
+	return (unsigned int) ((unsigned long) scan_time *
+			       (TIME_RATIO_SCALE - ratio) / ratio);
+}
+
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
+{
+	struct scan_rung *rung;
+
+	rung = &uksm_scan_ladder[0];
+	rung_add_new_slots(rung, slots, num);
+}
+
+static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
+
+static void uksm_enter_all_slots(void)
+{
+	struct vma_slot *slot;
+	unsigned long index;
+	struct list_head empty_vma_list;
+	int i;
+
+	i = 0;
+	index = 0;
+	INIT_LIST_HEAD(&empty_vma_list);
+
+	spin_lock(&vma_slot_list_lock);
+	while (!list_empty(&vma_slot_new)) {
+		slot = list_entry(vma_slot_new.next,
+				  struct vma_slot, slot_list);
+
+		if (!slot->vma->anon_vma) {
+			list_move(&slot->slot_list, &empty_vma_list);
+		} else if (vma_can_enter(slot->vma)) {
+			batch_slots[index++] = slot;
+			list_del_init(&slot->slot_list);
+		} else {
+			list_move(&slot->slot_list, &vma_slot_noadd);
+		}
+
+		if (++i == SPIN_LOCK_PERIOD ||
+		    (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
+			spin_unlock(&vma_slot_list_lock);
+
+			if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
+				uksm_vma_enter(batch_slots, index);
+				index = 0;
+			}
+			i = 0;
+			cond_resched();
+			spin_lock(&vma_slot_list_lock);
+		}
+	}
+
+	list_splice(&empty_vma_list, &vma_slot_new);
+
+	spin_unlock(&vma_slot_list_lock);
+
+	if (index)
+		uksm_vma_enter(batch_slots, index);
+
+}
+
+static inline int rung_round_finished(struct scan_rung *rung)
+{
+	return rung->flags & UKSM_RUNG_ROUND_FINISHED;
+}
+
+static inline void judge_slot(struct vma_slot *slot)
+{
+	struct scan_rung *rung = slot->rung;
+	unsigned long dedup;
+	int deleted;
+
+	dedup = cal_dedup_ratio(slot);
+	if (vma_fully_scanned(slot) && uksm_thrash_threshold)
+		deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
+	else if (dedup && dedup >= uksm_abundant_threshold)
+		deleted = vma_rung_up(slot);
+	else
+		deleted = vma_rung_down(slot);
+
+	slot->pages_merged = 0;
+	slot->pages_cowed = 0;
+	slot->this_sampled = 0;
+
+	if (vma_fully_scanned(slot))
+		slot->pages_scanned = 0;
+
+	slot->last_scanned = slot->pages_scanned;
+
+	/* If its deleted in above, then rung was already advanced. */
+	if (!deleted)
+		advance_current_scan(rung);
+}
+
+
+static inline int hash_round_finished(void)
+{
+	if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
+		scanned_virtual_pages = 0;
+		if (uksm_pages_scanned)
+			fully_scanned_round++;
+
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+#define UKSM_MMSEM_BATCH	5
+#define BUSY_RETRY		100
+
+/**
+ * uksm_do_scan()  - the main worker function.
+ */
+static noinline void uksm_do_scan(void)
+{
+	struct vma_slot *slot, *iter;
+	struct mm_struct *busy_mm;
+	unsigned char round_finished, all_rungs_emtpy;
+	int i, err, mmsem_batch;
+	unsigned long pcost;
+	long long delta_exec;
+	unsigned long vpages, max_cpu_ratio;
+	unsigned long long start_time, end_time, scan_time;
+	unsigned int expected_jiffies;
+
+	might_sleep();
+
+	vpages = 0;
+
+	start_time = task_sched_runtime(current);
+	max_cpu_ratio = 0;
+	mmsem_batch = 0;
+
+	for (i = 0; i < SCAN_LADDER_SIZE;) {
+		struct scan_rung *rung = &uksm_scan_ladder[i];
+		unsigned long ratio;
+		int busy_retry;
+
+		if (!rung->pages_to_scan) {
+			i++;
+			continue;
+		}
+
+		if (!rung->vma_root.num) {
+			rung->pages_to_scan = 0;
+			i++;
+			continue;
+		}
+
+		ratio = rung_real_ratio(rung->cpu_ratio);
+		if (ratio > max_cpu_ratio)
+			max_cpu_ratio = ratio;
+
+		busy_retry = BUSY_RETRY;
+		/*
+		 * Do not consider rung_round_finished() here, just used up the
+		 * rung->pages_to_scan quota.
+		 */
+		while (rung->pages_to_scan && rung->vma_root.num &&
+		       likely(!freezing(current))) {
+			int reset = 0;
+
+			slot = rung->current_scan;
+
+			BUG_ON(vma_fully_scanned(slot));
+
+			if (mmsem_batch)
+				err = 0;
+			else
+				err = try_down_read_slot_mmap_sem(slot);
+
+			if (err == -ENOENT) {
+rm_slot:
+				rung_rm_slot(slot);
+				continue;
+			}
+
+			busy_mm = slot->mm;
+
+			if (err == -EBUSY) {
+				/* skip other vmas on the same mm */
+				do {
+					reset = advance_current_scan(rung);
+					iter = rung->current_scan;
+					busy_retry--;
+					if (iter->vma->vm_mm != busy_mm ||
+					    !busy_retry || reset)
+						break;
+				} while (1);
+
+				if (iter->vma->vm_mm != busy_mm) {
+					continue;
+				} else {
+					/* scan round finsished */
+					break;
+				}
+			}
+
+			BUG_ON(!vma_can_enter(slot->vma));
+			if (uksm_test_exit(slot->vma->vm_mm)) {
+				mmsem_batch = 0;
+				up_read(&slot->vma->vm_mm->mmap_sem);
+				goto rm_slot;
+			}
+
+			if (mmsem_batch)
+				mmsem_batch--;
+			else
+				mmsem_batch = UKSM_MMSEM_BATCH;
+
+			/* Ok, we have take the mmap_sem, ready to scan */
+			scan_vma_one_page(slot);
+			rung->pages_to_scan--;
+			vpages++;
+
+			if (rung->current_offset + rung->step > slot->pages - 1
+			    || vma_fully_scanned(slot)) {
+				up_read(&slot->vma->vm_mm->mmap_sem);
+				judge_slot(slot);
+				mmsem_batch = 0;
+			} else {
+				rung->current_offset += rung->step;
+				if (!mmsem_batch)
+					up_read(&slot->vma->vm_mm->mmap_sem);
+			}
+
+			busy_retry = BUSY_RETRY;
+			cond_resched();
+		}
+
+		if (mmsem_batch) {
+			up_read(&slot->vma->vm_mm->mmap_sem);
+			mmsem_batch = 0;
+		}
+
+		if (freezing(current))
+			break;
+
+		cond_resched();
+	}
+	end_time = task_sched_runtime(current);
+	delta_exec = end_time - start_time;
+
+	if (freezing(current))
+		return;
+
+	cleanup_vma_slots();
+	uksm_enter_all_slots();
+
+	round_finished = 1;
+	all_rungs_emtpy = 1;
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		struct scan_rung *rung = &uksm_scan_ladder[i];
+
+		if (rung->vma_root.num) {
+			all_rungs_emtpy = 0;
+			if (!rung_round_finished(rung))
+				round_finished = 0;
+		}
+	}
+
+	if (all_rungs_emtpy)
+		round_finished = 0;
+
+	if (round_finished) {
+		round_update_ladder();
+		uksm_eval_round++;
+
+		if (hash_round_finished() && rshash_adjust()) {
+			/* Reset the unstable root iff hash strength changed */
+			uksm_hash_round++;
+			root_unstable_tree = RB_ROOT;
+			free_all_tree_nodes(&unstable_tree_node_list);
+		}
+
+		/*
+		 * A number of pages can hang around indefinitely on per-cpu
+		 * pagevecs, raised page count preventing write_protect_page
+		 * from merging them.  Though it doesn't really matter much,
+		 * it is puzzling to see some stuck in pages_volatile until
+		 * other activity jostles them out, and they also prevented
+		 * LTP's KSM test from succeeding deterministically; so drain
+		 * them here (here rather than on entry to uksm_do_scan(),
+		 * so we don't IPI too often when pages_to_scan is set low).
+		 */
+		lru_add_drain_all();
+	}
+
+
+	if (vpages && delta_exec > 0) {
+		pcost = (unsigned long) delta_exec / vpages;
+		if (likely(uksm_ema_page_time))
+			uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
+		else
+			uksm_ema_page_time = pcost;
+	}
+
+	uksm_calc_scan_pages();
+	uksm_sleep_real = uksm_sleep_jiffies;
+	/* in case of radical cpu bursts, apply the upper bound */
+	end_time = task_sched_runtime(current);
+	if (max_cpu_ratio && end_time > start_time) {
+		scan_time = end_time - start_time;
+		expected_jiffies = msecs_to_jiffies(
+			scan_time_to_sleep(scan_time, max_cpu_ratio));
+
+		if (expected_jiffies > uksm_sleep_real)
+			uksm_sleep_real = expected_jiffies;
+
+		/* We have a 1 second up bound for responsiveness. */
+		if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
+			uksm_sleep_real = msecs_to_jiffies(1000);
+	}
+
+	return;
+}
+
+static int ksmd_should_run(void)
+{
+	return uksm_run & UKSM_RUN_MERGE;
+}
+
+static int uksm_scan_thread(void *nothing)
+{
+	set_freezable();
+	set_user_nice(current, 5);
+
+	while (!kthread_should_stop()) {
+		mutex_lock(&uksm_thread_mutex);
+		if (ksmd_should_run())
+			uksm_do_scan();
+		mutex_unlock(&uksm_thread_mutex);
+
+		try_to_freeze();
+
+		if (ksmd_should_run()) {
+			schedule_timeout_interruptible(uksm_sleep_real);
+			uksm_sleep_times++;
+		} else {
+			wait_event_freezable(uksm_thread_wait,
+				ksmd_should_run() || kthread_should_stop());
+		}
+	}
+	return 0;
+}
+
+void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
+{
+	struct stable_node *stable_node;
+	struct node_vma *node_vma;
+	struct rmap_item *rmap_item;
+	int search_new_forks = 0;
+	unsigned long address;
+
+	VM_BUG_ON_PAGE(!PageKsm(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return;
+again:
+	hlist_for_each_entry(node_vma, &stable_node->hlist, hlist) {
+		hlist_for_each_entry(rmap_item, &node_vma->rmap_hlist, hlist) {
+			struct anon_vma *anon_vma = rmap_item->anon_vma;
+			struct anon_vma_chain *vmac;
+			struct vm_area_struct *vma;
+
+			cond_resched();
+			anon_vma_lock_read(anon_vma);
+			anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
+						       0, ULONG_MAX) {
+				cond_resched();
+				vma = vmac->vma;
+				address = get_rmap_addr(rmap_item);
+
+				if (address < vma->vm_start ||
+				    address >= vma->vm_end)
+					continue;
+
+				if ((rmap_item->slot->vma == vma) ==
+				    search_new_forks)
+					continue;
+
+				if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+					continue;
+
+				if (!rwc->rmap_one(page, vma, address, rwc->arg)) {
+					anon_vma_unlock_read(anon_vma);
+					return;
+				}
+
+				if (rwc->done && rwc->done(page)) {
+					anon_vma_unlock_read(anon_vma);
+					return;
+				}
+			}
+			anon_vma_unlock_read(anon_vma);
+		}
+	}
+	if (!search_new_forks++)
+		goto again;
+}
+
+#ifdef CONFIG_MIGRATION
+/* Common ksm interface but may be specific to uksm */
+void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+{
+	struct stable_node *stable_node;
+
+	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+	VM_BUG_ON(newpage->mapping != oldpage->mapping);
+
+	stable_node = page_stable_node(newpage);
+	if (stable_node) {
+		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
+		stable_node->kpfn = page_to_pfn(newpage);
+		/*
+		 * newpage->mapping was set in advance; now we need smp_wmb()
+		 * to make sure that the new stable_node->kpfn is visible
+		 * to get_ksm_page() before it can see that oldpage->mapping
+		 * has gone stale (or that PageSwapCache has been cleared).
+		 */
+		smp_wmb();
+		set_page_stable_node(oldpage, NULL);
+	}
+}
+#endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
+						 unsigned long end_pfn)
+{
+	struct rb_node *node;
+
+	for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
+		struct stable_node *stable_node;
+
+		stable_node = rb_entry(node, struct stable_node, node);
+		if (stable_node->kpfn >= start_pfn &&
+		    stable_node->kpfn < end_pfn)
+			return stable_node;
+	}
+	return NULL;
+}
+
+static int uksm_memory_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	struct stable_node *stable_node;
+
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		/*
+		 * Keep it very simple for now: just lock out ksmd and
+		 * MADV_UNMERGEABLE while any memory is going offline.
+		 * mutex_lock_nested() is necessary because lockdep was alarmed
+		 * that here we take uksm_thread_mutex inside notifier chain
+		 * mutex, and later take notifier chain mutex inside
+		 * uksm_thread_mutex to unlock it.   But that's safe because both
+		 * are inside mem_hotplug_mutex.
+		 */
+		mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
+		break;
+
+	case MEM_OFFLINE:
+		/*
+		 * Most of the work is done by page migration; but there might
+		 * be a few stable_nodes left over, still pointing to struct
+		 * pages which have been offlined: prune those from the tree.
+		 */
+		while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
+					mn->start_pfn + mn->nr_pages)) != NULL)
+			remove_node_from_stable_tree(stable_node, 1, 1);
+		/* fallthrough */
+
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&uksm_thread_mutex);
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+#ifdef CONFIG_SYSFS
+/*
+ * This all compiles without CONFIG_SYSFS, but is a waste of space.
+ */
+
+#define UKSM_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define UKSM_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static ssize_t max_cpu_percentage_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
+}
+
+static ssize_t max_cpu_percentage_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	unsigned long max_cpu_percentage;
+	int err;
+
+	err = kstrtoul(buf, 10, &max_cpu_percentage);
+	if (err || max_cpu_percentage > 100)
+		return -EINVAL;
+
+	if (max_cpu_percentage == 100)
+		max_cpu_percentage = 99;
+	else if (max_cpu_percentage < 10)
+		max_cpu_percentage = 10;
+
+	uksm_max_cpu_percentage = max_cpu_percentage;
+
+	return count;
+}
+UKSM_ATTR(max_cpu_percentage);
+
+static ssize_t sleep_millisecs_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
+}
+
+static ssize_t sleep_millisecs_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = kstrtoul(buf, 10, &msecs);
+	if (err || msecs > MSEC_PER_SEC)
+		return -EINVAL;
+
+	uksm_sleep_jiffies = msecs_to_jiffies(msecs);
+	uksm_sleep_saved = uksm_sleep_jiffies;
+
+	return count;
+}
+UKSM_ATTR(sleep_millisecs);
+
+
+static ssize_t cpu_governor_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
+	int i;
+
+	buf[0] = '\0';
+	for (i = 0; i < n ; i++) {
+		if (uksm_cpu_governor == i)
+			strcat(buf, "[");
+
+		strcat(buf, uksm_cpu_governor_str[i]);
+
+		if (uksm_cpu_governor == i)
+			strcat(buf, "]");
+
+		strcat(buf, " ");
+	}
+	strcat(buf, "\n");
+
+	return strlen(buf);
+}
+
+static inline void init_performance_values(void)
+{
+	int i;
+	struct scan_rung *rung;
+	struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
+
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = uksm_scan_ladder + i;
+		rung->cpu_ratio = preset->cpu_ratio[i];
+		rung->cover_msecs = preset->cover_msecs[i];
+	}
+
+	uksm_max_cpu_percentage = preset->max_cpu;
+}
+
+static ssize_t cpu_governor_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
+
+	for (n--; n >= 0 ; n--) {
+		if (!strncmp(buf, uksm_cpu_governor_str[n],
+			     strlen(uksm_cpu_governor_str[n])))
+			break;
+	}
+
+	if (n < 0)
+		return -EINVAL;
+	else
+		uksm_cpu_governor = n;
+
+	init_performance_values();
+
+	return count;
+}
+UKSM_ATTR(cpu_governor);
+
+static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_run);
+}
+
+static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > UINT_MAX)
+		return -EINVAL;
+	if (flags > UKSM_RUN_MERGE)
+		return -EINVAL;
+
+	mutex_lock(&uksm_thread_mutex);
+	if (uksm_run != flags)
+		uksm_run = flags;
+	mutex_unlock(&uksm_thread_mutex);
+
+	if (flags & UKSM_RUN_MERGE)
+		wake_up_interruptible(&uksm_thread_wait);
+
+	return count;
+}
+UKSM_ATTR(run);
+
+static ssize_t abundant_threshold_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_abundant_threshold);
+}
+
+static ssize_t abundant_threshold_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > 99)
+		return -EINVAL;
+
+	uksm_abundant_threshold = flags;
+
+	return count;
+}
+UKSM_ATTR(abundant_threshold);
+
+static ssize_t thrash_threshold_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", uksm_thrash_threshold);
+}
+
+static ssize_t thrash_threshold_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int err;
+	unsigned long flags;
+
+	err = kstrtoul(buf, 10, &flags);
+	if (err || flags > 99)
+		return -EINVAL;
+
+	uksm_thrash_threshold = flags;
+
+	return count;
+}
+UKSM_ATTR(thrash_threshold);
+
+static ssize_t cpu_ratios_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	int i, size;
+	struct scan_rung *rung;
+	char *p = buf;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		if (rung->cpu_ratio > 0)
+			size = sprintf(p, "%d ", rung->cpu_ratio);
+		else
+			size = sprintf(p, "MAX/%d ",
+					TIME_RATIO_SCALE / -rung->cpu_ratio);
+
+		p += size;
+	}
+
+	*p++ = '\n';
+	*p = '\0';
+
+	return p - buf;
+}
+
+static ssize_t cpu_ratios_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int i, cpuratios[SCAN_LADDER_SIZE], err;
+	unsigned long value;
+	struct scan_rung *rung;
+	char *p, *end = NULL;
+
+	p = kzalloc(count, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, buf, count);
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		if (i != SCAN_LADDER_SIZE - 1) {
+			end = strchr(p, ' ');
+			if (!end)
+				return -EINVAL;
+
+			*end = '\0';
+		}
+
+		if (strstr(p, "MAX/")) {
+			p = strchr(p, '/') + 1;
+			err = kstrtoul(p, 10, &value);
+			if (err || value > TIME_RATIO_SCALE || !value)
+				return -EINVAL;
+
+			cpuratios[i] = -(int) (TIME_RATIO_SCALE / value);
+		} else {
+			err = kstrtoul(p, 10, &value);
+			if (err || value > TIME_RATIO_SCALE || !value)
+				return -EINVAL;
+
+			cpuratios[i] = value;
+		}
+
+		p = end + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		rung->cpu_ratio = cpuratios[i];
+	}
+
+	return count;
+}
+UKSM_ATTR(cpu_ratios);
+
+static ssize_t eval_intervals_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	int i, size;
+	struct scan_rung *rung;
+	char *p = buf;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+		size = sprintf(p, "%u ", rung->cover_msecs);
+		p += size;
+	}
+
+	*p++ = '\n';
+	*p = '\0';
+
+	return p - buf;
+}
+
+static ssize_t eval_intervals_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	int i, err;
+	unsigned long values[SCAN_LADDER_SIZE];
+	struct scan_rung *rung;
+	char *p, *end = NULL;
+	ssize_t ret = count;
+
+	p = kzalloc(count + 2, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, buf, count);
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		if (i != SCAN_LADDER_SIZE - 1) {
+			end = strchr(p, ' ');
+			if (!end) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			*end = '\0';
+		}
+
+		err = kstrtoul(p, 10, &values[i]);
+		if (err) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		p = end + 1;
+	}
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = &uksm_scan_ladder[i];
+
+		rung->cover_msecs = values[i];
+	}
+
+out:
+	kfree(p);
+	return ret;
+}
+UKSM_ATTR(eval_intervals);
+
+static ssize_t ema_per_page_time_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_ema_page_time);
+}
+UKSM_ATTR_RO(ema_per_page_time);
+
+static ssize_t pages_shared_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_shared);
+}
+UKSM_ATTR_RO(pages_shared);
+
+static ssize_t pages_sharing_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_sharing);
+}
+UKSM_ATTR_RO(pages_sharing);
+
+static ssize_t pages_unshared_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", uksm_pages_unshared);
+}
+UKSM_ATTR_RO(pages_unshared);
+
+static ssize_t full_scans_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", fully_scanned_round);
+}
+UKSM_ATTR_RO(full_scans);
+
+static ssize_t pages_scanned_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	unsigned long base = 0;
+	u64 delta, ret;
+
+	if (pages_scanned_stored) {
+		base = pages_scanned_base;
+		ret = pages_scanned_stored;
+		delta = uksm_pages_scanned >> base;
+		if (CAN_OVERFLOW_U64(ret, delta)) {
+			ret >>= 1;
+			delta >>= 1;
+			base++;
+			ret += delta;
+		}
+	} else {
+		ret = uksm_pages_scanned;
+	}
+
+	while (ret > ULONG_MAX) {
+		ret >>= 1;
+		base++;
+	}
+
+	if (base)
+		return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
+	else
+		return sprintf(buf, "%lu\n", (unsigned long)ret);
+}
+UKSM_ATTR_RO(pages_scanned);
+
+static ssize_t hash_strength_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", hash_strength);
+}
+UKSM_ATTR_RO(hash_strength);
+
+static ssize_t sleep_times_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", uksm_sleep_times);
+}
+UKSM_ATTR_RO(sleep_times);
+
+
+static struct attribute *uksm_attrs[] = {
+	&max_cpu_percentage_attr.attr,
+	&sleep_millisecs_attr.attr,
+	&cpu_governor_attr.attr,
+	&run_attr.attr,
+	&ema_per_page_time_attr.attr,
+	&pages_shared_attr.attr,
+	&pages_sharing_attr.attr,
+	&pages_unshared_attr.attr,
+	&full_scans_attr.attr,
+	&pages_scanned_attr.attr,
+	&hash_strength_attr.attr,
+	&sleep_times_attr.attr,
+	&thrash_threshold_attr.attr,
+	&abundant_threshold_attr.attr,
+	&cpu_ratios_attr.attr,
+	&eval_intervals_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uksm_attr_group = {
+	.attrs = uksm_attrs,
+	.name = "uksm",
+};
+#endif /* CONFIG_SYSFS */
+
+static inline void init_scan_ladder(void)
+{
+	int i;
+	struct scan_rung *rung;
+
+	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
+		rung = uksm_scan_ladder + i;
+		slot_tree_init_root(&rung->vma_root);
+	}
+
+	init_performance_values();
+	uksm_calc_scan_pages();
+}
+
+static inline int cal_positive_negative_costs(void)
+{
+	struct page *p1, *p2;
+	unsigned char *addr1, *addr2;
+	unsigned long i, time_start, hash_cost;
+	unsigned long loopnum = 0;
+
+	/*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
+	volatile u32 hash;
+	volatile int ret;
+
+	p1 = alloc_page(GFP_KERNEL);
+	if (!p1)
+		return -ENOMEM;
+
+	p2 = alloc_page(GFP_KERNEL);
+	if (!p2)
+		return -ENOMEM;
+
+	addr1 = kmap_atomic(p1);
+	addr2 = kmap_atomic(p2);
+	memset(addr1, prandom_u32(), PAGE_SIZE);
+	memcpy(addr2, addr1, PAGE_SIZE);
+
+	/* make sure that the two pages differ in last byte */
+	addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
+	kunmap_atomic(addr2);
+	kunmap_atomic(addr1);
+
+	time_start = jiffies;
+	while (jiffies - time_start < 100) {
+		for (i = 0; i < 100; i++)
+			hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
+		loopnum += 100;
+	}
+	hash_cost = (jiffies - time_start);
+
+	time_start = jiffies;
+	for (i = 0; i < loopnum; i++)
+		ret = pages_identical(p1, p2);
+	memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
+	memcmp_cost /= hash_cost;
+	pr_info("UKSM: relative memcmp_cost = %lu "
+		"hash=%u cmp_ret=%d.\n",
+		memcmp_cost, hash, ret);
+
+	__free_page(p1);
+	__free_page(p2);
+	return 0;
+}
+
+static int init_zeropage_hash_table(void)
+{
+	struct page *page;
+	char *addr;
+	int i;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	addr = kmap_atomic(page);
+	memset(addr, 0, PAGE_SIZE);
+	kunmap_atomic(addr);
+
+	zero_hash_table = kmalloc_array(HASH_STRENGTH_MAX, sizeof(u32),
+		GFP_KERNEL);
+	if (!zero_hash_table)
+		return -ENOMEM;
+
+	for (i = 0; i < HASH_STRENGTH_MAX; i++)
+		zero_hash_table[i] = page_hash(page, i, 0);
+
+	__free_page(page);
+
+	return 0;
+}
+
+static inline int init_random_sampling(void)
+{
+	unsigned long i;
+
+	random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!random_nums)
+		return -ENOMEM;
+
+	for (i = 0; i < HASH_STRENGTH_FULL; i++)
+		random_nums[i] = i;
+
+	for (i = 0; i < HASH_STRENGTH_FULL; i++) {
+		unsigned long rand_range, swap_index, tmp;
+
+		rand_range = HASH_STRENGTH_FULL - i;
+		swap_index = i + prandom_u32() % rand_range;
+		tmp = random_nums[i];
+		random_nums[i] =  random_nums[swap_index];
+		random_nums[swap_index] = tmp;
+	}
+
+	rshash_state.state = RSHASH_NEW;
+	rshash_state.below_count = 0;
+	rshash_state.lookup_window_index = 0;
+
+	return cal_positive_negative_costs();
+}
+
+static int __init uksm_slab_init(void)
+{
+	rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
+	if (!rmap_item_cache)
+		goto out;
+
+	stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
+	if (!stable_node_cache)
+		goto out_free1;
+
+	node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
+	if (!node_vma_cache)
+		goto out_free2;
+
+	vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
+	if (!vma_slot_cache)
+		goto out_free3;
+
+	tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
+	if (!tree_node_cache)
+		goto out_free4;
+
+	return 0;
+
+out_free4:
+	kmem_cache_destroy(vma_slot_cache);
+out_free3:
+	kmem_cache_destroy(node_vma_cache);
+out_free2:
+	kmem_cache_destroy(stable_node_cache);
+out_free1:
+	kmem_cache_destroy(rmap_item_cache);
+out:
+	return -ENOMEM;
+}
+
+static void __init uksm_slab_free(void)
+{
+	kmem_cache_destroy(stable_node_cache);
+	kmem_cache_destroy(rmap_item_cache);
+	kmem_cache_destroy(node_vma_cache);
+	kmem_cache_destroy(vma_slot_cache);
+	kmem_cache_destroy(tree_node_cache);
+}
+
+/* Common interface to ksm, different to it. */
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	int err;
+
+	switch (advice) {
+	case MADV_MERGEABLE:
+		return 0;		/* just ignore the advice */
+
+	case MADV_UNMERGEABLE:
+		if (!(*vm_flags & VM_MERGEABLE) || !uksm_flags_can_scan(*vm_flags))
+			return 0;		/* just ignore the advice */
+
+		if (vma->anon_vma) {
+			err = unmerge_uksm_pages(vma, start, end);
+			if (err)
+				return err;
+		}
+
+		uksm_remove_vma(vma);
+		*vm_flags &= ~VM_MERGEABLE;
+		break;
+	}
+
+	return 0;
+}
+
+/* Common interface to ksm, actually the same. */
+struct page *ksm_might_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = page_anon_vma(page);
+	struct page *new_page;
+
+	if (PageKsm(page)) {
+		if (page_stable_node(page))
+			return page;	/* no need to copy it */
+	} else if (!anon_vma) {
+		return page;		/* no need to copy it */
+	} else if (anon_vma->root == vma->anon_vma->root &&
+		 page->index == linear_page_index(vma, address)) {
+		return page;		/* still no need to copy it */
+	}
+	if (!PageUptodate(page))
+		return page;		/* let do_swap_page report the error */
+
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (new_page) {
+		copy_user_highpage(new_page, page, address, vma);
+
+		SetPageDirty(new_page);
+		__SetPageUptodate(new_page);
+		__SetPageLocked(new_page);
+	}
+
+	return new_page;
+}
+
+static int __init uksm_init(void)
+{
+	struct task_struct *uksm_thread;
+	int err;
+
+	uksm_sleep_jiffies = msecs_to_jiffies(100);
+	uksm_sleep_saved = uksm_sleep_jiffies;
+
+	slot_tree_init();
+	init_scan_ladder();
+
+
+	err = init_random_sampling();
+	if (err)
+		goto out_free2;
+
+	err = uksm_slab_init();
+	if (err)
+		goto out_free1;
+
+	err = init_zeropage_hash_table();
+	if (err)
+		goto out_free0;
+
+	uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
+	if (IS_ERR(uksm_thread)) {
+		pr_err("uksm: creating kthread failed\n");
+		err = PTR_ERR(uksm_thread);
+		goto out_free;
+	}
+
+#ifdef CONFIG_SYSFS
+	err = sysfs_create_group(mm_kobj, &uksm_attr_group);
+	if (err) {
+		pr_err("uksm: register sysfs failed\n");
+		kthread_stop(uksm_thread);
+		goto out_free;
+	}
+#else
+	uksm_run = UKSM_RUN_MERGE;	/* no way for user to start it */
+
+#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	/*
+	 * Choose a high priority since the callback takes uksm_thread_mutex:
+	 * later callbacks could only be taking locks which nest within that.
+	 */
+	hotplug_memory_notifier(uksm_memory_callback, 100);
+#endif
+	return 0;
+
+out_free:
+	kfree(zero_hash_table);
+out_free0:
+	uksm_slab_free();
+out_free1:
+	kfree(random_nums);
+out_free2:
+	kfree(uksm_scan_ladder);
+	return err;
+}
+
+#ifdef MODULE
+subsys_initcall(ksm_init);
+#else
+late_initcall(uksm_init);
+#endif
+
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f6a1587f9..47db06d87 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1601,7 +1601,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
 {
 	unsigned long inactive, isolated;
 
-	if (current_is_kswapd())
+	if (current_is_kswapd() || sc->hibernation_mode)
 		return 0;
 
 	if (!sane_reclaim(sc))
@@ -2477,6 +2477,9 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	unsigned long inactive_lru_pages;
 	int z;
 
+	if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed)
+		return true;
+
 	/* If not in reclaim/compaction mode, stop */
 	if (!in_reclaim_compaction(sc))
 		return false;
@@ -3575,6 +3578,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 	if (!managed_zone(zone))
 		return;
 
+#ifdef CONFIG_FREEZER
+	if (pm_freezing)
+		return;
+#endif
+
 	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
 		return;
 	pgdat = zone->zone_pgdat;
@@ -3604,7 +3612,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
  * LRU order by reclaiming preferentially
  * inactive > active > active referenced > active mapped
  */
-unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask)
 {
 	struct reclaim_state reclaim_state;
 	struct scan_control sc = {
@@ -3635,6 +3643,11 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 
 	return nr_reclaimed;
 }
+
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+{
+	return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE);
+}
 #endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 33581be70..f2eb7b480 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1162,6 +1162,9 @@ const char * const vmstat_text[] = {
 	"nr_dirtied",
 	"nr_written",
 
+#ifdef CONFIG_UKSM
+	"nr_uksm_zero_pages",
+#endif
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7..3c05cb430 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -697,6 +697,9 @@ choice
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
+	config DEFAULT_YEAH
+		bool "YeAH" if TCP_CONG_YEAH=y
+
 	config DEFAULT_VENO
 		bool "Veno" if TCP_CONG_VENO=y
 
@@ -730,6 +733,7 @@ config DEFAULT_TCP_CONG
 	default "htcp" if DEFAULT_HTCP
 	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
+	default "yeah" if DEFAULT_YEAH
 	default "westwood" if DEFAULT_WESTWOOD
 	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 87f1fc980..52d203a77 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -50,8 +50,8 @@ else
 fi
 
 UTS_VERSION="#$VERSION"
-CONFIG_FLAGS=""
-if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
+CONFIG_FLAGS="PCK"
+if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi
 if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
 UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
 
diff --git a/scripts/tuxonice_output_to_csv.sh b/scripts/tuxonice_output_to_csv.sh
new file mode 100755
index 000000000..b96e6803b
--- /dev/null
+++ b/scripts/tuxonice_output_to_csv.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+cat $1 | grep "\*TOI\*" | cut -b 22- | sed "s/ /,/g" | sed "s/\.//" | sort -n > $1.tmp
+COLUMNS=$(cat $1.tmp | awk -F ',' ' { print $2 } ' | sort | uniq)
+echo -n "pfn," > $1.tmp2
+for NAME in $COLUMNS; do
+    echo -n "$NAME," >> $1.tmp2
+done
+echo >> $1.tmp2
+FIRST=1
+declare -A data
+while IFS=, read -r pfn column value; do
+    if [ $FIRST -eq 1 ]; then
+        FIRST=0
+        LAST_PFN=$pfn
+    fi
+    if [ $pfn -ne $LAST_PFN ]; then
+        echo -n "$LAST_PFN," >> $1.tmp2;
+        for NAME in $COLUMNS; do
+            echo -n "${data[$NAME]}," >> $1.tmp2
+        done
+        data=( )
+        echo >> $1.tmp2
+        LAST_PFN=$pfn
+    fi
+    if [ -z "$value" ]; then
+        data[$column]=X
+    else
+        data[$column]=$value
+    fi
+done < $1.tmp
+mv $1.tmp2 $1.csv
+rm $1.tmp
+LIBREOFFICE=$(which libreoffice)
+[ -n "$LIBREOFFICE" ] && libreoffice $1.csv &
diff --git a/security/commoncap.c b/security/commoncap.c
index 1ce701fcb..a0d106e6b 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1332,12 +1332,14 @@ int cap_mmap_addr(unsigned long addr)
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cap_mmap_addr);
 
 int cap_mmap_file(struct file *file, unsigned long reqprot,
 		  unsigned long prot, unsigned long flags)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cap_mmap_file);
 
 #ifdef CONFIG_SECURITY
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index c65b39baf..e363d2205 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -8,6 +8,7 @@
 #include <linux/device_cgroup.h>
 #include <linux/cgroup.h>
 #include <linux/ctype.h>
+#include <linux/export.h>
 #include <linux/list.h>
 #include <linux/uaccess.h>
 #include <linux/seq_file.h>
@@ -824,3 +825,4 @@ int __devcgroup_check_permission(short type, u32 major, u32 minor,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__devcgroup_check_permission);
diff --git a/security/security.c b/security/security.c
index 1cd8526cb..f2e473650 100644
--- a/security/security.c
+++ b/security/security.c
@@ -531,6 +531,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
 		return 0;
 	return call_int_hook(path_rmdir, 0, dir, dentry);
 }
+EXPORT_SYMBOL_GPL(security_path_rmdir);
 
 int security_path_unlink(const struct path *dir, struct dentry *dentry)
 {
@@ -547,6 +548,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
 		return 0;
 	return call_int_hook(path_symlink, 0, dir, dentry, old_name);
 }
+EXPORT_SYMBOL_GPL(security_path_symlink);
 
 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
 		       struct dentry *new_dentry)
@@ -555,6 +557,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
 		return 0;
 	return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
 }
+EXPORT_SYMBOL_GPL(security_path_link);
 
 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
 			 const struct path *new_dir, struct dentry *new_dentry,
@@ -582,6 +585,7 @@ int security_path_truncate(const struct path *path)
 		return 0;
 	return call_int_hook(path_truncate, 0, path);
 }
+EXPORT_SYMBOL_GPL(security_path_truncate);
 
 int security_path_chmod(const struct path *path, umode_t mode)
 {
@@ -589,6 +593,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
 		return 0;
 	return call_int_hook(path_chmod, 0, path, mode);
 }
+EXPORT_SYMBOL_GPL(security_path_chmod);
 
 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
 {
@@ -596,6 +601,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
 		return 0;
 	return call_int_hook(path_chown, 0, path, uid, gid);
 }
+EXPORT_SYMBOL_GPL(security_path_chown);
 
 int security_path_chroot(const struct path *path)
 {
@@ -681,6 +687,7 @@ int security_inode_readlink(struct dentry *dentry)
 		return 0;
 	return call_int_hook(inode_readlink, 0, dentry);
 }
+EXPORT_SYMBOL_GPL(security_inode_readlink);
 
 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
 			       bool rcu)
@@ -696,6 +703,7 @@ int security_inode_permission(struct inode *inode, int mask)
 		return 0;
 	return call_int_hook(inode_permission, 0, inode, mask);
 }
+EXPORT_SYMBOL_GPL(security_inode_permission);
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 {
@@ -867,6 +875,7 @@ int security_file_permission(struct file *file, int mask)
 
 	return fsnotify_perm(file, mask);
 }
+EXPORT_SYMBOL_GPL(security_file_permission);
 
 int security_file_alloc(struct file *file)
 {
@@ -926,6 +935,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
 		return ret;
 	return ima_file_mmap(file, prot);
 }
+EXPORT_SYMBOL_GPL(security_mmap_file);
 
 int security_mmap_addr(unsigned long addr)
 {